#!/usr/bin/env python3 from pprint import pprint import re import json import subprocess as sp from collections import defaultdict from graphviz import Digraph import pickle NEEDS_CALL = "rg --json -t js --multiline exports.needs[^\)]*\)" GIVES_CALL = "rg --json -t js --multiline exports.gives[^\)]*\)" PARENTHESIS_EXTRACTION_RE = re.compile(r'\(([^)]+)') def parse_ripgrep(lines): for line in lines: if not line.strip(): continue line = json.loads(line) if line['type'] != 'match': continue file_name = line['data']['path']['text'] matched_text = line['data']['lines']['text'] assert len(line['data']['submatches']) == 1, json.dumps(line, indent=2) yield file_name, matched_text JS_TEMPLATE = ''' const nest = require('depnest') console.log(JSON.stringify(nest({obj_str}))) ''' def flatten(d): for k, v in d.items(): if type(v) == bool: assert v, d yield k continue if type(v) == str: # print(v) yield k continue v_postfixes = flatten(v) for postfix in v_postfixes: yield f'{k}.{postfix}' def edges(rg_matches): rg_matches = list(parse_ripgrep(rg_matches.split('\n'))) # print(len(rg_matches)) result = dict() for fn, text in rg_matches: # print('='*80) # print(f'{fn}\n{text}') # print() if 'nest(' in text: obj_str = text[text.find('(')+1:text.find(')')] else: obj_str = text[text.find('{'):text.rfind('}')+1] obj_str = sp.check_output(['node', '-e', JS_TEMPLATE.format(obj_str=obj_str)]) obj_str = obj_str.decode() obj = json.loads(obj_str) # print(json.dumps(obj, indent=2)) # print() # print('\n'.join(list(flatten(obj)))) result[fn] = list(flatten(obj)) return result if __name__ == '__main__': gives_rg = sp.check_output(GIVES_CALL.split()).decode() gives = edges(gives_rg) needs_rg = sp.check_output(NEEDS_CALL.split()).decode() needs = edges(needs_rg) # for every method: which files provide this method? providers = defaultdict(list) for filename, methods in gives.items(): for method in methods: providers[method].append(filename) providers = dict(providers) pprint(providers) # for every file: which other files does this depend on, and for which method? dependencies = defaultdict(list) # for every file: which other files depend on this, and for which method? dependents = defaultdict(list) for filename, methods in needs.items(): for method in methods: method_providers = providers.get(method, ['UNKNOWN']) dependencies[filename] += [(mp, method) for mp in method_providers] for mp in method_providers: dependents[mp].append((filename, method)) dependencies = dict(dependencies) # for p in providers.keys(): # _ = dependents[p] dependents = dict(dependents) print('='*80) pprint(dependencies) print('='*80) pprint(dependents) print('='*80) sortedProviders = list(sorted(dependents.keys(), key=lambda k: len(dependents[k]))) counts = {k: len(dependents[k]) for k in sortedProviders} max_count = len(str(max(*counts.values()))) max_len = len(max(*counts.keys(), key=len)) print(max_count, max_len) print('-'*80) for k in sortedProviders: print(f'{counts[k]:{max_count}}\t{k:{max_len}}') with open('results.pickle', 'wb') as f: pickle.dump({ 'gives': gives, 'needs': needs, 'counts': counts, 'providers': providers, 'dependents': dependents, 'dependencies': dependencies, }, f) # dot = Digraph(comment='Dependencies') # # for filename in dependencies.keys(): # dot.node(filename) # # for filename, deps in dependencies.items(): # for dep, api in deps: # dot.edge(filename, dep, label=api) # # print(dot.source) # # with open('dependencies.dot', 'w') as f: # f.write(dot.source) #