Edit on GitHub

pytact.graph_sanity_check

  1import argparse
  2from multiprocessing import Pool
  3from pathlib import Path
  4from functools import partial
  5from collections import Counter
  6import math
  7from pytact.data_reader import GlobalContextSets, lowlevel_data_reader, lowlevel_to_highlevel, Definition, Node, Original, Substituted, Discharged, node_dependencies, definition_dependencies
  8import pytact.graph_api_capnp_cython as apic
  9import capnp
 10import pytact.graph_api_capnp as graph_api_capnp
 11
 12def open_dataset(dataset_path: Path):
 13    global ctx
 14    ctx = lowlevel_data_reader(dataset_path)
 15    global data
 16    data = ctx.__enter__()
 17    global node_counts
 18    node_counts = [len(data[i].graph.nodes) for i in range(0, len(data.graph_files))]
 19    global hdata
 20    hdata = lowlevel_to_highlevel(data)
 21    global global_contexts
 22    global_contexts = GlobalContextSets.new_context().__enter__()
 23
 24# def node_dependencies(n: Node, deps: set[Definition] | None = None) -> set[Definition]:
 25#     if deps is None:
 26#         deps = set()
 27#     seen: set[Node] = set()
 28#     stack = [n]
 29#     while stack:
 30#         node = stack.pop()
 31#         if node in seen:
 32#             continue
 33#         seen.add(node)
 34#         if d := node.definition:
 35#             deps.add(d)
 36#             continue
 37#         for _, child in node.children:
 38#             stack.append(child)
 39#     return deps
 40
 41# def definition_dependencies(d: Definition):
 42#     deps = set()
 43#     for _, c in d.node.children:
 44#         node_dependencies(c, deps)
 45#     return deps
 46
 47def process1(args, fname: Path):
 48    file_errors = []
 49    file_definitions = 0
 50    file_original_definitions = 0
 51    file_base_tactics_text = set()
 52    file_base_tactics_intermtext = set()
 53    file_tactics = Counter()
 54    file_original_tactics = Counter()
 55    file_tactic_arguments = {}
 56    file_tactic_ident_to_base = {}
 57    proof_steps = 0
 58    original_proof_steps = 0
 59    original_proof_steps_faithful = 0
 60    outcomes = 0
 61    original_outcomes = 0
 62    proofs = 0
 63    original_proofs = 0
 64    original_proofs_faithful = 0
 65    unresolvable = 0
 66    file_disappearing_proof_states = 0
 67    graphid = data.graphid_by_filename[fname]
 68    freader = data[graphid]
 69    graph = freader.graph
 70    nodes_count = node_counts[graphid]
 71    edges_count = len(graph.edges)
 72    dependency_count = len(freader.dependencies)
 73    print(f"Checking file {fname}: {nodes_count} nodes and {edges_count} edges")
 74
 75    # Check correctness of Graph struct
 76    for n in graph.nodes:
 77        if not (n.children_index + n.children_count <= edges_count):
 78            file_errors.append(f"{fname}: Children of node {n} are not valid indices of Graph.edges")
 79        if n.label.is_definition:
 80            file_definitions += 1
 81
 82    for t in graph.edges:
 83        target = t.target
 84        dep_index = target.dep_index
 85        if dep_index >= dependency_count:
 86            file_errors.append(f"{fname}: target.depIndex {t.target.dep_index} "
 87                  f"but len(g.dependencies) is {dependency_count} "
 88                  f"and g.dependencies = {freader.dependencies}")
 89        if target.node_index >= node_counts[data.local_to_global(graphid, dep_index)]:
 90            file_errors.append(f"{fname}: Reference for target {t} is out of bounds")
 91
 92    # Check correctness of Dataset struct
 93    if file_definitions != len(freader.definitions):
 94        file_errors.append(f"{fname}: Counted {file_definitions} definitions in the file, "
 95                        f"but Dataset.definitions has size {len(freader.definitions)}")
 96
 97    hreader = hdata[fname]
 98    with global_contexts.sub_context(lambda d: d.is_file_representative) as sub_global_contexts:
 99        for d in hreader.definitions():
100            if not d.node.definition:
101                file_errors.append(f"{fname}: Node {d.node} should be a definition but is not")
102
103            # Check correctness of Definition struct
104            if isinstance(d.status, Original):
105                file_original_definitions += 1
106            elif isinstance(d.status, Discharged):
107                if not d.status.original.node.definition:
108                    file_errors.append(f"{fname}: Discharged node of definition {d.name} "
109                                       f"is not a definition")
110            elif isinstance(d.status, Substituted):
111                if not d.status.original.node.definition:
112                    file_errors.append(f"{fname}: Substituted node of definition {d.name} "
113                                       f"is not a definition")
114
115            def check_in_global_context(s):
116                global_context = sub_global_contexts.global_context_set(d)
117                for dd in s:
118                    if dd not in global_context:
119                        file_errors.append(f"{fname}: Definition {d.name} has dependency {dd.name} "
120                                        f"which is not part of its global context")
121
122            if not args.quick:
123                check_in_global_context(definition_dependencies(d))
124
125            crepr = d.cluster_representative
126            if d not in d.cluster:
127                file_errors.append(f"{fname}: Cluster represented by {crepr.name}"
128                                f"does not contain {d.name}")
129            for cd in d.cluster:
130                if crepr != cd.cluster_representative:
131                    file_errors.append(f"{fname}: Cluster represented by {crepr.name} contains unrelated "
132                                    f"definition {cd.name}")
133            if prev := d.previous:
134                if not prev.node.definition:
135                    file_errors.append(f"{fname}: Previous node of definition {d.name} is not a definition")
136            for ep in d.external_previous:
137                if not ep.node.definition:
138                    file_errors.append(f"{fname}: External dependency of definition {d.name} is "
139                                    f"not a definition")
140            if ps := d.proof:
141                proofs += 1
142                faithful = True
143                before_states = set()
144                for p in ps:
145                    for outcome in p.outcomes:
146                        before_states.add(outcome.before.id)
147                for p in ps:
148                    proof_steps += 1
149                    if t := p.tactic:
150                        ident = t.ident
151                        file_base_tactics_text.add(t.base_text)
152                        file_base_tactics_intermtext.add(t.interm_text)
153                    else:
154                        ident = 0
155                    file_tactics[ident] += 1
156                    if isinstance(d.status, Original):
157                        file_original_tactics[ident] += 1
158                        original_proof_steps += 1
159                        if (t := p.tactic) and t.text == t.interm_text:
160                                original_proof_steps_faithful += 1
161                        else:
162                            faithful = False
163                    for outcome in p.outcomes:
164                        for after in outcome.after:
165                            if after.id not in before_states:
166                                file_disappearing_proof_states += 1
167
168                        if not args.quick:
169                            check_in_global_context(node_dependencies(outcome.term))
170                        for state in [outcome.before] + list(outcome.after):
171                            root_label = state.root.label
172                            if not root_label.is_proof_state:
173                                file_errors.append(f"{fname}: root is proof state {state} is {root_label}")
174
175                            if not args.quick:
176                                check_in_global_context(node_dependencies(state.root))
177                            if len(state.context) != len(state.context_names):
178                                file_errors.append(f"{fname}: Length of context is different from length of"
179                                                f"context_names in proof state {state}")
180                            if len(state.context) != len(state.context_text):
181                                file_errors.append(f"{fname}: Length of context is different from length of"
182                                                f"context_text in proof state {state}")
183                            root_children = [child for _, child in state.root.children]
184                            for c in state.context:
185                                if c not in root_children:
186                                    file_errors.append(
187                                        f"{fname}: hyp {c} of state {state} in def {d.name} is not "
188                                        f"reachable from the root")
189                                if not (c.label.is_context_def or c.label.is_context_assum):
190                                    file_errors.append(
191                                        f"{fname}: ctx {state.context} of a state {state} "
192                                        f"has the wrong node classification {c.label}")
193
194                        file_tactic_arguments.setdefault(ident, len(outcome.tactic_arguments))
195                        if t := p.tactic:
196                            file_tactic_ident_to_base.setdefault(ident, t.base_text)
197                        if file_tactic_arguments[ident] != len(outcome.tactic_arguments):
198                            file_errors.append(f"{fname}: Tactic with two different argument lengths detected. : Original: {file_tactic_arguments[ident]} : {file_tactic_ident_to_base[ident]}. New {len(outcome.tactic_arguments)} : {t.text} : {t.base_text}")
199                        outcomes += 1
200                        if isinstance(d.status, Original):
201                            original_outcomes += 1
202                        for a in outcome.tactic_arguments:
203                            if not a:
204                                unresolvable += 1
205                            else:
206                                if not args.quick:
207                                    check_in_global_context(node_dependencies(a))
208                if isinstance(d.status, Original):
209                    original_proofs += 1
210                    if faithful:
211                        original_proofs_faithful += 1
212    if freader.representative != len(graph.nodes):
213        if not graph.nodes[freader.representative].label.is_definition:
214            file_errors.append(f"{fname}: Representative {freader.representative} is not a definition")
215    for dep in freader.dependencies:
216        if Path(dep) not in data.graphid_by_filename:
217            file_errors.append(f"{fname}: Dependency {dep} could not be found")
218
219
220    return (fname, nodes_count, edges_count,
221            file_definitions, file_original_definitions, file_base_tactics_text,
222            file_base_tactics_intermtext, file_tactics, file_original_tactics, file_tactic_arguments,
223            proof_steps, original_proof_steps, original_proof_steps_faithful,
224            outcomes, original_outcomes, proofs, original_proofs,
225            original_proofs_faithful, unresolvable, file_errors, file_disappearing_proof_states)
226
227def entropy(d):
228    n = sum(d.values())
229    return -sum([(c / n) * math.log(c / n, 2) for c in d.values()])
230
231def main2():
232    parser = argparse.ArgumentParser(
233        description = 'Run sanity checks on a dataset and print some statistics.',
234        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
235
236    parser.add_argument('dataset',
237                        type=str,
238                        help=('The location of the dataset to check. ' +
239                              'Either a dataset directory, or a SquashFS image, ' +
240                              'which will be automatically mounted.'))
241    parser.add_argument('--jobs',
242                        type=int,
243                        default=None,
244                        help='number of parallel multiprocessing jobs to run')
245    parser.add_argument('--quick',
246                        action='store_true',
247                        help='skip the more expensive checks')
248    parser.add_argument('--verbose',
249                       type = int,
250                       default = 0,
251                       help='level of being verbose 0,1..')
252
253    args = parser.parse_args()
254
255    import sys
256    sys.setrecursionlimit(10000)
257
258    dataset_path = Path(args.dataset).resolve()
259
260    errors = []
261    tactics = Counter()
262    original_tactics = Counter()
263    tactic_arguments = {}
264    base_tactics_text = set()
265    base_tactics_intermtext = set()
266    definitions = 0
267    original_definitions = 0
268    nodes_total = 0
269    edges_total = 0
270    proof_steps_total = 0
271    original_proof_steps_total = 0
272    original_proof_steps_faithful_total = 0
273    outcomes_total = 0
274    original_outcomes_total = 0
275    proofs_total = 0
276    original_proofs_total = 0
277    original_proofs_faithful_total = 0
278    unresolvable_total = 0
279    disappearing_proof_states = 0
280
281    with lowlevel_data_reader(dataset_path) as data:
282        file_list = data.graph_files
283
284    process1_partial = partial(process1, args)
285    with Pool(args.jobs, open_dataset, [dataset_path]) as pool:
286        results = pool.map(process1_partial, file_list, chunksize=1)
287    # file_list = file_list[280:360]
288    # print(file_list)
289    # open_dataset(dataset_path)
290    # results = [process1(args, f) for f in file_list]
291
292    for res in results:
293        (fname, nodes_count, edges_count,
294         file_definitions, file_original_definitions, file_base_tactics_text,
295         file_base_tactics_intermtext, file_tactics, file_original_tactics, file_tactic_arguments,
296         proof_steps, original_proof_steps, original_proof_steps_faithful,
297         outcomes, original_outcomes, proofs, original_proofs,
298         original_proofs_faithful, unresolvable, file_errors, file_disappearing_proof_states) = res
299        nodes_total += nodes_count
300        edges_total += edges_count
301        proof_steps_total += proof_steps
302        original_proof_steps_total += original_proof_steps
303        original_proof_steps_faithful_total += original_proof_steps_faithful
304        outcomes_total += outcomes
305        original_outcomes_total += original_outcomes
306        proofs_total += proofs
307        original_proofs_total += original_proofs
308        original_proofs_faithful_total += original_proofs_faithful
309        definitions +=  file_definitions
310        original_definitions += file_original_definitions
311        base_tactics_text.update(file_base_tactics_text)
312        base_tactics_intermtext.update(file_base_tactics_intermtext)
313        tactics += file_tactics
314        original_tactics += file_original_tactics
315        disappearing_proof_states += file_disappearing_proof_states
316        errors += file_errors
317        for tac, length in file_tactic_arguments.items():
318            tactic_arguments.setdefault(tac, length)
319            if tactic_arguments[tac] != length:
320                errors.append(f'{fname}: Tactic with two different argument lengths detected: {tac}')
321
322        unresolvable_total += unresolvable
323
324    for error in errors:
325        print(error)
326
327    print(f"Errors encountered {len(errors)}")
328    print(f"Nodes total {nodes_total}")
329    print(f"Edges total {edges_total}")
330    print(f"Tactics total {len(tactics)}")
331    print(f"Original tactics total {len(original_tactics)}")
332    print(f"Tactics entropy (bits) {entropy(tactics)}")
333    print(f"Original tactics entropy (bits) {entropy(original_tactics)}")
334    print(f"Tactics base text total {len(base_tactics_text)}")
335    print(f"Tactics base intermtext total {len(base_tactics_intermtext)}")
336    print(f"Definitions total {definitions}")
337    print(f"Original definitions total {original_definitions}")
338    print(f"Proof steps total {proof_steps_total}")
339    print(f"Original proof steps total {original_proof_steps_total}")
340    print(f"Faithfully represented original proof steps total {original_proof_steps_faithful_total}")
341    print(f"Outcomes total {outcomes_total}")
342    print(f"Original outcomes total {original_outcomes_total}")
343    print(f"Proofs total {proofs_total}")
344    print(f"Original proofs total {original_proofs_total}")
345    print(f"Faithful original proofs total {original_proofs_faithful_total}")
346    print(f"Unresolvable tactic arguments {unresolvable_total}")
347    print(f"Disappearing proof states (spooky action at a distance) {disappearing_proof_states}")
348
349    if (args.verbose >= 1):
350        print("Tactics base text:")
351        for t in base_tactics_text:
352            print(t)
353        print("Tactics intermtext text:")
354        for t in base_tactics_intermtext:
355            print(t)
356
357    if len(errors) > 0:
358        raise Exception("Errors occurred")
359
360def main():
361    # import pstats, cProfile
362    # cProfile.runctx("main2()", globals(), locals(), "Profile.prof")
363    # s = pstats.Stats("Profile.prof")
364    # s.strip_dirs().sort_stats("time").print_stats()
365    main2()
366
367if __name__ == '__main__':
368    main()
def open_dataset(dataset_path: pathlib.Path):
13def open_dataset(dataset_path: Path):
14    global ctx
15    ctx = lowlevel_data_reader(dataset_path)
16    global data
17    data = ctx.__enter__()
18    global node_counts
19    node_counts = [len(data[i].graph.nodes) for i in range(0, len(data.graph_files))]
20    global hdata
21    hdata = lowlevel_to_highlevel(data)
22    global global_contexts
23    global_contexts = GlobalContextSets.new_context().__enter__()
def process1(args, fname: pathlib.Path):
 48def process1(args, fname: Path):
 49    file_errors = []
 50    file_definitions = 0
 51    file_original_definitions = 0
 52    file_base_tactics_text = set()
 53    file_base_tactics_intermtext = set()
 54    file_tactics = Counter()
 55    file_original_tactics = Counter()
 56    file_tactic_arguments = {}
 57    file_tactic_ident_to_base = {}
 58    proof_steps = 0
 59    original_proof_steps = 0
 60    original_proof_steps_faithful = 0
 61    outcomes = 0
 62    original_outcomes = 0
 63    proofs = 0
 64    original_proofs = 0
 65    original_proofs_faithful = 0
 66    unresolvable = 0
 67    file_disappearing_proof_states = 0
 68    graphid = data.graphid_by_filename[fname]
 69    freader = data[graphid]
 70    graph = freader.graph
 71    nodes_count = node_counts[graphid]
 72    edges_count = len(graph.edges)
 73    dependency_count = len(freader.dependencies)
 74    print(f"Checking file {fname}: {nodes_count} nodes and {edges_count} edges")
 75
 76    # Check correctness of Graph struct
 77    for n in graph.nodes:
 78        if not (n.children_index + n.children_count <= edges_count):
 79            file_errors.append(f"{fname}: Children of node {n} are not valid indices of Graph.edges")
 80        if n.label.is_definition:
 81            file_definitions += 1
 82
 83    for t in graph.edges:
 84        target = t.target
 85        dep_index = target.dep_index
 86        if dep_index >= dependency_count:
 87            file_errors.append(f"{fname}: target.depIndex {t.target.dep_index} "
 88                  f"but len(g.dependencies) is {dependency_count} "
 89                  f"and g.dependencies = {freader.dependencies}")
 90        if target.node_index >= node_counts[data.local_to_global(graphid, dep_index)]:
 91            file_errors.append(f"{fname}: Reference for target {t} is out of bounds")
 92
 93    # Check correctness of Dataset struct
 94    if file_definitions != len(freader.definitions):
 95        file_errors.append(f"{fname}: Counted {file_definitions} definitions in the file, "
 96                        f"but Dataset.definitions has size {len(freader.definitions)}")
 97
 98    hreader = hdata[fname]
 99    with global_contexts.sub_context(lambda d: d.is_file_representative) as sub_global_contexts:
100        for d in hreader.definitions():
101            if not d.node.definition:
102                file_errors.append(f"{fname}: Node {d.node} should be a definition but is not")
103
104            # Check correctness of Definition struct
105            if isinstance(d.status, Original):
106                file_original_definitions += 1
107            elif isinstance(d.status, Discharged):
108                if not d.status.original.node.definition:
109                    file_errors.append(f"{fname}: Discharged node of definition {d.name} "
110                                       f"is not a definition")
111            elif isinstance(d.status, Substituted):
112                if not d.status.original.node.definition:
113                    file_errors.append(f"{fname}: Substituted node of definition {d.name} "
114                                       f"is not a definition")
115
116            def check_in_global_context(s):
117                global_context = sub_global_contexts.global_context_set(d)
118                for dd in s:
119                    if dd not in global_context:
120                        file_errors.append(f"{fname}: Definition {d.name} has dependency {dd.name} "
121                                        f"which is not part of its global context")
122
123            if not args.quick:
124                check_in_global_context(definition_dependencies(d))
125
126            crepr = d.cluster_representative
127            if d not in d.cluster:
128                file_errors.append(f"{fname}: Cluster represented by {crepr.name}"
129                                f"does not contain {d.name}")
130            for cd in d.cluster:
131                if crepr != cd.cluster_representative:
132                    file_errors.append(f"{fname}: Cluster represented by {crepr.name} contains unrelated "
133                                    f"definition {cd.name}")
134            if prev := d.previous:
135                if not prev.node.definition:
136                    file_errors.append(f"{fname}: Previous node of definition {d.name} is not a definition")
137            for ep in d.external_previous:
138                if not ep.node.definition:
139                    file_errors.append(f"{fname}: External dependency of definition {d.name} is "
140                                    f"not a definition")
141            if ps := d.proof:
142                proofs += 1
143                faithful = True
144                before_states = set()
145                for p in ps:
146                    for outcome in p.outcomes:
147                        before_states.add(outcome.before.id)
148                for p in ps:
149                    proof_steps += 1
150                    if t := p.tactic:
151                        ident = t.ident
152                        file_base_tactics_text.add(t.base_text)
153                        file_base_tactics_intermtext.add(t.interm_text)
154                    else:
155                        ident = 0
156                    file_tactics[ident] += 1
157                    if isinstance(d.status, Original):
158                        file_original_tactics[ident] += 1
159                        original_proof_steps += 1
160                        if (t := p.tactic) and t.text == t.interm_text:
161                                original_proof_steps_faithful += 1
162                        else:
163                            faithful = False
164                    for outcome in p.outcomes:
165                        for after in outcome.after:
166                            if after.id not in before_states:
167                                file_disappearing_proof_states += 1
168
169                        if not args.quick:
170                            check_in_global_context(node_dependencies(outcome.term))
171                        for state in [outcome.before] + list(outcome.after):
172                            root_label = state.root.label
173                            if not root_label.is_proof_state:
174                                file_errors.append(f"{fname}: root is proof state {state} is {root_label}")
175
176                            if not args.quick:
177                                check_in_global_context(node_dependencies(state.root))
178                            if len(state.context) != len(state.context_names):
179                                file_errors.append(f"{fname}: Length of context is different from length of"
180                                                f"context_names in proof state {state}")
181                            if len(state.context) != len(state.context_text):
182                                file_errors.append(f"{fname}: Length of context is different from length of"
183                                                f"context_text in proof state {state}")
184                            root_children = [child for _, child in state.root.children]
185                            for c in state.context:
186                                if c not in root_children:
187                                    file_errors.append(
188                                        f"{fname}: hyp {c} of state {state} in def {d.name} is not "
189                                        f"reachable from the root")
190                                if not (c.label.is_context_def or c.label.is_context_assum):
191                                    file_errors.append(
192                                        f"{fname}: ctx {state.context} of a state {state} "
193                                        f"has the wrong node classification {c.label}")
194
195                        file_tactic_arguments.setdefault(ident, len(outcome.tactic_arguments))
196                        if t := p.tactic:
197                            file_tactic_ident_to_base.setdefault(ident, t.base_text)
198                        if file_tactic_arguments[ident] != len(outcome.tactic_arguments):
199                            file_errors.append(f"{fname}: Tactic with two different argument lengths detected. : Original: {file_tactic_arguments[ident]} : {file_tactic_ident_to_base[ident]}. New {len(outcome.tactic_arguments)} : {t.text} : {t.base_text}")
200                        outcomes += 1
201                        if isinstance(d.status, Original):
202                            original_outcomes += 1
203                        for a in outcome.tactic_arguments:
204                            if not a:
205                                unresolvable += 1
206                            else:
207                                if not args.quick:
208                                    check_in_global_context(node_dependencies(a))
209                if isinstance(d.status, Original):
210                    original_proofs += 1
211                    if faithful:
212                        original_proofs_faithful += 1
213    if freader.representative != len(graph.nodes):
214        if not graph.nodes[freader.representative].label.is_definition:
215            file_errors.append(f"{fname}: Representative {freader.representative} is not a definition")
216    for dep in freader.dependencies:
217        if Path(dep) not in data.graphid_by_filename:
218            file_errors.append(f"{fname}: Dependency {dep} could not be found")
219
220
221    return (fname, nodes_count, edges_count,
222            file_definitions, file_original_definitions, file_base_tactics_text,
223            file_base_tactics_intermtext, file_tactics, file_original_tactics, file_tactic_arguments,
224            proof_steps, original_proof_steps, original_proof_steps_faithful,
225            outcomes, original_outcomes, proofs, original_proofs,
226            original_proofs_faithful, unresolvable, file_errors, file_disappearing_proof_states)
def entropy(d):
228def entropy(d):
229    n = sum(d.values())
230    return -sum([(c / n) * math.log(c / n, 2) for c in d.values()])
def main2():
232def main2():
233    parser = argparse.ArgumentParser(
234        description = 'Run sanity checks on a dataset and print some statistics.',
235        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
236
237    parser.add_argument('dataset',
238                        type=str,
239                        help=('The location of the dataset to check. ' +
240                              'Either a dataset directory, or a SquashFS image, ' +
241                              'which will be automatically mounted.'))
242    parser.add_argument('--jobs',
243                        type=int,
244                        default=None,
245                        help='number of parallel multiprocessing jobs to run')
246    parser.add_argument('--quick',
247                        action='store_true',
248                        help='skip the more expensive checks')
249    parser.add_argument('--verbose',
250                       type = int,
251                       default = 0,
252                       help='level of being verbose 0,1..')
253
254    args = parser.parse_args()
255
256    import sys
257    sys.setrecursionlimit(10000)
258
259    dataset_path = Path(args.dataset).resolve()
260
261    errors = []
262    tactics = Counter()
263    original_tactics = Counter()
264    tactic_arguments = {}
265    base_tactics_text = set()
266    base_tactics_intermtext = set()
267    definitions = 0
268    original_definitions = 0
269    nodes_total = 0
270    edges_total = 0
271    proof_steps_total = 0
272    original_proof_steps_total = 0
273    original_proof_steps_faithful_total = 0
274    outcomes_total = 0
275    original_outcomes_total = 0
276    proofs_total = 0
277    original_proofs_total = 0
278    original_proofs_faithful_total = 0
279    unresolvable_total = 0
280    disappearing_proof_states = 0
281
282    with lowlevel_data_reader(dataset_path) as data:
283        file_list = data.graph_files
284
285    process1_partial = partial(process1, args)
286    with Pool(args.jobs, open_dataset, [dataset_path]) as pool:
287        results = pool.map(process1_partial, file_list, chunksize=1)
288    # file_list = file_list[280:360]
289    # print(file_list)
290    # open_dataset(dataset_path)
291    # results = [process1(args, f) for f in file_list]
292
293    for res in results:
294        (fname, nodes_count, edges_count,
295         file_definitions, file_original_definitions, file_base_tactics_text,
296         file_base_tactics_intermtext, file_tactics, file_original_tactics, file_tactic_arguments,
297         proof_steps, original_proof_steps, original_proof_steps_faithful,
298         outcomes, original_outcomes, proofs, original_proofs,
299         original_proofs_faithful, unresolvable, file_errors, file_disappearing_proof_states) = res
300        nodes_total += nodes_count
301        edges_total += edges_count
302        proof_steps_total += proof_steps
303        original_proof_steps_total += original_proof_steps
304        original_proof_steps_faithful_total += original_proof_steps_faithful
305        outcomes_total += outcomes
306        original_outcomes_total += original_outcomes
307        proofs_total += proofs
308        original_proofs_total += original_proofs
309        original_proofs_faithful_total += original_proofs_faithful
310        definitions +=  file_definitions
311        original_definitions += file_original_definitions
312        base_tactics_text.update(file_base_tactics_text)
313        base_tactics_intermtext.update(file_base_tactics_intermtext)
314        tactics += file_tactics
315        original_tactics += file_original_tactics
316        disappearing_proof_states += file_disappearing_proof_states
317        errors += file_errors
318        for tac, length in file_tactic_arguments.items():
319            tactic_arguments.setdefault(tac, length)
320            if tactic_arguments[tac] != length:
321                errors.append(f'{fname}: Tactic with two different argument lengths detected: {tac}')
322
323        unresolvable_total += unresolvable
324
325    for error in errors:
326        print(error)
327
328    print(f"Errors encountered {len(errors)}")
329    print(f"Nodes total {nodes_total}")
330    print(f"Edges total {edges_total}")
331    print(f"Tactics total {len(tactics)}")
332    print(f"Original tactics total {len(original_tactics)}")
333    print(f"Tactics entropy (bits) {entropy(tactics)}")
334    print(f"Original tactics entropy (bits) {entropy(original_tactics)}")
335    print(f"Tactics base text total {len(base_tactics_text)}")
336    print(f"Tactics base intermtext total {len(base_tactics_intermtext)}")
337    print(f"Definitions total {definitions}")
338    print(f"Original definitions total {original_definitions}")
339    print(f"Proof steps total {proof_steps_total}")
340    print(f"Original proof steps total {original_proof_steps_total}")
341    print(f"Faithfully represented original proof steps total {original_proof_steps_faithful_total}")
342    print(f"Outcomes total {outcomes_total}")
343    print(f"Original outcomes total {original_outcomes_total}")
344    print(f"Proofs total {proofs_total}")
345    print(f"Original proofs total {original_proofs_total}")
346    print(f"Faithful original proofs total {original_proofs_faithful_total}")
347    print(f"Unresolvable tactic arguments {unresolvable_total}")
348    print(f"Disappearing proof states (spooky action at a distance) {disappearing_proof_states}")
349
350    if (args.verbose >= 1):
351        print("Tactics base text:")
352        for t in base_tactics_text:
353            print(t)
354        print("Tactics intermtext text:")
355        for t in base_tactics_intermtext:
356            print(t)
357
358    if len(errors) > 0:
359        raise Exception("Errors occurred")
def main():
361def main():
362    # import pstats, cProfile
363    # cProfile.runctx("main2()", globals(), locals(), "Profile.prof")
364    # s = pstats.Stats("Profile.prof")
365    # s.strip_dirs().sort_stats("time").print_stats()
366    main2()