from subprocess import call
import os
import re

html_pattern = re.compile('html')
proper_end_pattern = re.compile("\) \)\)\)")


# TODO Optimize
def clean_file_exists(file_name):
    if not os.path.exists(file_name):
        print '{} does not exist'.format(file_name)
        return False
    lines = open(file_name).readlines()
    for line in lines:
        if len(re.findall(html_pattern, line)) > 0:
            print '{} is dirty'.format(file_name)
            return False
        break
    lines = reversed(lines)
    for line in lines:
        if len(re.findall(proper_end_pattern, line)) >= 1:
            return True
        # We only need the first line, but we only have an iterator
        break
    print '{} is dirty'.format(file_name)
    return False


def get_until_clean_sexpr(blk_num):
    file_name = 'blk{}'.format(blk_num)
    while not clean_file_exists(file_name):
        print 'grabbing {} from Ben'.format(file_name)
        base_url = 'http://mimisbrunnr.cascadianhacker.com/blocks/'
        url = '{}{}&raw=true'.format(base_url, blk_num)
        call(['wget', '-O', file_name, url])
    print '{} is clean'.format(file_name)


if __name__ == '__main__':
    from sys import argv
    try:
        start = int(argv[1])
        end = int(argv[2])
    except:
        print 'Use: grabber.py <start> <end>'
    for blk_num in range(start, end):
        get_until_clean_sexpr(blk_num)