diff --git a/scripts/verapdf.py b/scripts/verapdf.py index 1c72b9664..28937d5c8 100755 --- a/scripts/verapdf.py +++ b/scripts/verapdf.py @@ -9,6 +9,7 @@ # USAGE: ./verapdf.py [$pdf_filepath|--process-all-test-pdf-files|--print-aggregated-report] +import json import sys from subprocess import run, DEVNULL, PIPE @@ -22,28 +23,30 @@ def analyze_pdf_file(pdf_filepath): command = [ "verapdf/verapdf" + BAT_EXT, "--format", - "text", - "-v", + "json", pdf_filepath, ] # print(" ".join(command)) - output = run( - command, stdout=PIPE, stderr=DEVNULL if HIDE_STDERR else None - ).stdout.decode() - # print(output) - return pdf_filepath, parse_output(output) + output = run(command, stdout=PIPE, stderr=DEVNULL if HIDE_STDERR else None) + + if output.returncode != 0: + return pdf_filepath, {"failure": output.stdout.decode()} + + return parse_output(output.stdout.decode()) def parse_output(output): "Parse VeraPDF CLI output into a dict." - lines = output.splitlines() - try: - grave_line = next(line for line in lines if line.startswith("GRAVE:")) - return {"failure": grave_line} - except StopIteration: - # Skipping the first line - errors = [line[len(" FAIL ") :] for line in lines[1:]] - return {"errors": errors} + output_dict = json.loads(output) + assert len(output_dict["report"]["jobs"]) == 1 + output_job = output_dict["report"]["jobs"][0] + file_path = output_job["itemDetails"]["name"] + rule_summaries = output_job["validationResult"]["details"]["ruleSummaries"] + errors = [ + f"{rule_summary['clause']}-{rule_summary['testNumber']}" + for rule_summary in rule_summaries + ] + return file_path, {"errors": errors} if __name__ == "__main__":