import subprocess
import urllib2
import xml.dom.minidom

query_url = ('https://bugs.winehq.org/buglist.cgi?'
             'bug_status=UNCONFIRMED&'
             'bug_status=NEW&'
             'bug_status=ASSIGNED&'
             'bug_status=STAGED&'
             'bug_status=REOPENED&'
             'bug_status=NEEDINFO&'
             'f1=cf_regression_sha1sum&'
             'list_id=241131&'
             'o1=regexp&'
             'query_format=advanced&'
             'v1=.&'
             'ctype=csv')

buglist_csv = urllib2.urlopen(query_url).read()

toplevel_files = set()
path_subfiles = {}
commits_by_file = {'': []}

for line in buglist_csv.splitlines():
    bugnum = line.split(',', 1)[0]
    if not bugnum.isdigit():
        continue

    bug_xml = urllib2.urlopen('https://bugs.winehq.org/show_bug.cgi?ctype=xml&id=%s' % bugnum).read()

    dom = xml.dom.minidom.parseString(bug_xml)

    regression_sha = dom.getElementsByTagName('cf_regression_sha1sum')[0].childNodes[0].data

    commit_desc = subprocess.check_output(['git', 'show', '--format=format:%h %aN: %s', regression_sha]).splitlines()[0].strip()

    files = [x.strip('\n') for x in subprocess.check_output(['git', 'diff', '--name-only', regression_sha+'^', regression_sha]).splitlines()]

    files_changed = set()

    for filename in files:
        parts = filename.split('/')
        for i in range(len(parts)): 
            files_changed.add('/'.join(parts[0:i+1]))
            if i == 0:
                toplevel_files.add(parts[0])
            else:
                parent = '/'.join(parts[0:i])
                if parent not in path_subfiles:
                    path_subfiles[parent] = set()
                path_subfiles[parent].add(parts[i])

    for filename in files_changed:
        if filename not in commits_by_file:
            commits_by_file[filename] = []
        commits_by_file[filename].append((bugnum, commit_desc))

    commits_by_file[''].append((bugnum, commit_desc))

def is_trivial_subtree(path, subfiles):
    commits = commits_by_file.get(path, ())
    num_commits = len(commits)

    if path:
        paths_to_check = ['%s/%s' % (path, x) for x in path_subfiles.get(path, ())]
    else:
        paths_to_check = list(toplevel_files)

    if not paths_to_check:
        return True

    while paths_to_check:
        path = paths_to_check.pop()

        if len(commits_by_file.get(path, ())) == num_commits:
            return True

        paths_to_check.extend('%s/%s' % (path, x) for x in path_subfiles.get(path, ()))

    return False

def output_tree(path, subfiles, indent=''):
    commits = commits_by_file.get(path, ())
    if len(commits) <= 3 or is_trivial_subtree(path, subfiles):
        for bugnum, commit_desc in commits:
            print '%s%s %s' % (indent, bugnum, commit_desc)
    else:
        for subfile in sorted(subfiles):
            if path:
                subpath = path + '/' + subfile
            else:
                subpath = subfile
            subcommits = commits_by_file.get(subpath, ())
            print '%s%s\t%s' % (indent, subfile, len(subcommits))
            output_tree(subpath, path_subfiles.get(subpath, ()), indent+'  ')

output_tree('', toplevel_files)

