from subprocess import call import os import re html_pattern = re.compile('html') proper_end_pattern = re.compile("\) \)\)\)") # TODO Optimize def clean_file_exists(file_name): if not os.path.exists(file_name): print '{} does not exist'.format(file_name) return False lines = open(file_name).readlines() for line in lines: if len(re.findall(html_pattern, line)) > 0: print '{} is dirty'.format(file_name) return False break lines = reversed(lines) for line in lines: if len(re.findall(proper_end_pattern, line)) >= 1: return True # We only need the first line, but we only have an iterator break print '{} is dirty'.format(file_name) return False def get_until_clean_sexpr(blk_num): file_name = 'blk{}'.format(blk_num) while not clean_file_exists(file_name): print 'grabbing {} from Ben'.format(file_name) base_url = 'http://mimisbrunnr.cascadianhacker.com/blocks/' url = '{}{}&raw=true'.format(base_url, blk_num) call(['wget', '-O', file_name, url]) print '{} is clean'.format(file_name) if __name__ == '__main__': from sys import argv try: start = int(argv[1]) end = int(argv[2]) except: print 'Use: grabber.py ' for blk_num in range(start, end): get_until_clean_sexpr(blk_num)