Skip to content

Commit

Permalink
Use JSON output from verapdf
Browse files Browse the repository at this point in the history
  • Loading branch information
Tolker-KU committed Jul 26, 2023
1 parent 892d58a commit aa6062f
Showing 1 changed file with 18 additions and 15 deletions.
33 changes: 18 additions & 15 deletions scripts/verapdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

# USAGE: ./verapdf.py [$pdf_filepath|--process-all-test-pdf-files|--print-aggregated-report]

import json
import sys
from subprocess import run, DEVNULL, PIPE

Expand All @@ -22,28 +23,30 @@ def analyze_pdf_file(pdf_filepath):
command = [
"verapdf/verapdf" + BAT_EXT,
"--format",
"text",
"-v",
"json",
pdf_filepath,
]
# print(" ".join(command))
output = run(
command, stdout=PIPE, stderr=DEVNULL if HIDE_STDERR else None
).stdout.decode()
# print(output)
return pdf_filepath, parse_output(output)
output = run(command, stdout=PIPE, stderr=DEVNULL if HIDE_STDERR else None)

if output.returncode != 0:
return pdf_filepath, {"failure": output.stdout.decode()}

return parse_output(output.stdout.decode())


def parse_output(output):
"Parse VeraPDF CLI output into a dict."
lines = output.splitlines()
try:
grave_line = next(line for line in lines if line.startswith("GRAVE:"))
return {"failure": grave_line}
except StopIteration:
# Skipping the first line
errors = [line[len(" FAIL ") :] for line in lines[1:]]
return {"errors": errors}
output_dict = json.loads(output)
assert len(output_dict["report"]["jobs"]) == 1
output_job = output_dict["report"]["jobs"][0]
file_path = output_job["itemDetails"]["name"]
rule_summaries = output_job["validationResult"]["details"]["ruleSummaries"]
errors = [
f"{rule_summary['clause']}-{rule_summary['testNumber']}"
for rule_summary in rule_summaries
]
return file_path, {"errors": errors}


if __name__ == "__main__":
Expand Down

0 comments on commit aa6062f

Please sign in to comment.