diff --git a/CHANGELOG.md b/CHANGELOG.md index 36a7b57..2541ed1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Python PEP 440 Versioning](https://www.python.org/d ## [Unreleased] ### Added +- SHACL Rules Expander Mode + - A new alternative Run Mode for PySHACL + - PySHACL will not validate the DataGraph against Shapes and Constraints, instead it will simply run all SHACL-AF Rules to expand the DataGraph. + - By default it will output a new graph containing the existing DataGraph Triples plus the expanded triples + - Run with inplace mode to expand the new triples directly into the input DataGraph - Focus Node Filtering - You can now pass in a list of focus nodes to the validator, and it will only validate those focus nodes. - Note, you still need to pass in a SHACL Shapes Graph, and the shapes still need to target the focus nodes. @@ -19,6 +24,9 @@ and this project adheres to [Python PEP 440 Versioning](https://www.python.org/d - If you give the validator a list of Shapes to use, and a list of focus nodes, the validator will operate in a highly-targeted mode, it feeds those focus nodes directly into those given Shapes for validation. - In this mode, the selected SHACL Shape does not need to specify any focus-targeting mechanisms of its own. +- Combined Rules Expander Mode with Shape Selection + - The combination of SHACL Rules Expander Mode and Shape Selection will allow specialised workflows. + - For example, you can run specific expansion rules from a SHACL Shapes File, based on the new triples required. ### Changed - Don't make a clone of the DataGraph if the input data graph is ephemeral. diff --git a/Dockerfile b/Dockerfile index 0c74b19..246d826 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,7 +14,7 @@ COPY . . RUN chown -R pyshacl:pyshacl /home/pyshacl /app && chmod -R 775 /home/pyshacl /app USER pyshacl ENV PATH="/home/pyshacl/.local/bin:$PATH" -RUN pip3 install "poetry>=1.5.0,<2.0" +RUN pip3 install "poetry>=1.8.3,<2.0" RUN poetry install --no-dev --extras "js http" USER root RUN apk del build-dependencies diff --git a/pyproject.toml b/pyproject.toml index daa0be8..29176a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,6 +109,7 @@ dev-coverage = ["pytest-cov", "coverage", "platformdirs"] [tool.poetry.scripts] pyshacl = "pyshacl.cli:main" +pyshacl_rules = "pyshacl.cli_rules:main" pyshacl_validate = "pyshacl.cli:main" pyshacl_server = "pyshacl.http:cli" diff --git a/pyshacl/__init__.py b/pyshacl/__init__.py index 7db1108..78aa013 100644 --- a/pyshacl/__init__.py +++ b/pyshacl/__init__.py @@ -1,11 +1,13 @@ # -*- coding: latin-1 -*- # +from .entrypoints import shacl_rules, validate +from .rule_expand_runner import RuleExpandRunner from .shape import Shape from .shapes_graph import ShapesGraph -from .validate import Validator, validate +from .validator import Validator # version compliant with https://www.python.org/dev/peps/pep-0440/ __version__ = '0.26.0' # Don't forget to change the version number in pyproject.toml, Dockerfile, and CITATION.cff along with this one -__all__ = ['validate', 'Validator', '__version__', 'Shape', 'ShapesGraph'] +__all__ = ['validate', 'shacl_rules', 'Validator', 'RuleExpandRunner', '__version__', 'Shape', 'ShapesGraph'] diff --git a/pyshacl/__main__.py b/pyshacl/__main__.py index 1dd0dd9..0c53f60 100644 --- a/pyshacl/__main__.py +++ b/pyshacl/__main__.py @@ -3,7 +3,8 @@ import os import sys -from pyshacl.cli import main +from pyshacl.cli import main as validate_main +from pyshacl.cli_rules import main as rules_main def str_is_true(s_var: str): @@ -16,11 +17,15 @@ def str_is_true(s_var: str): do_server = os.getenv("PYSHACL_HTTP", "") do_server = os.getenv("PYSHACL_SERVER", do_server) -if (len(sys.argv) > 1 and str(sys.argv[1]).lower() in ('serve', 'server', '--server')) or ( +first_arg = None if len(sys.argv) < 2 else sys.argv[1] + +if first_arg is not None and str(first_arg).lower() in ('rules', '--rules'): + rules_main(prog="python3 -m pyshacl") +elif (first_arg is not None and str(first_arg).lower() in ('serve', 'server', '--server')) or ( do_server and str_is_true(do_server) ): from pyshacl.sh_http import main as http_main http_main() - -main(prog="python3 -m pyshacl") +else: + validate_main(prog="python3 -m pyshacl") diff --git a/pyshacl/cli.py b/pyshacl/cli.py index c82dc55..c02f07d 100644 --- a/pyshacl/cli.py +++ b/pyshacl/cli.py @@ -216,6 +216,13 @@ def str_is_true(s_var: str): help='Send output to a file (defaults to stdout).', default=sys.stdout, ) +parser.add_argument( + '--rules', + help='Ignore validation options, run PySHACL in Rules Expansion mode. Same as `pyshacl_rules`.', + action='store_true', + dest='do_rules', + default=False, +) parser.add_argument( '--server', help='Ignore all the rest of the options, start the HTTP Server. Same as `pyshacl_server`.', @@ -240,6 +247,11 @@ def main(prog: Union[str, None] = None) -> None: # http_main calls sys.exit(0) and never returns http_main() + if args.do_rules: + from pyshacl.cli_rules import main as rules_main + + # rules_main calls sys.exit(0) and never returns + rules_main() if not args.data: # No datafile give, and not starting in server mode. sys.stderr.write('Input Error. No DataGraph file or endpoint supplied.\n') diff --git a/pyshacl/cli_rules.py b/pyshacl/cli_rules.py new file mode 100644 index 0000000..37d380a --- /dev/null +++ b/pyshacl/cli_rules.py @@ -0,0 +1,316 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import argparse +import sys +from io import BufferedReader +from typing import Union, cast + +from pyshacl import __version__, shacl_rules +from pyshacl.cli import ShowVersion +from pyshacl.errors import ( + ConstraintLoadError, + ReportableRuntimeError, + RuleLoadError, + ShapeLoadError, + ValidationFailure, +) + +parser = argparse.ArgumentParser( + description='PySHACL {} SHACL Rules Expander command line tool.'.format(str(__version__)) +) +parser.add_argument( + 'data', + metavar='DataGraph', + help='The file or endpoint containing the Target Data Graph.', + default=None, + nargs='?', +) +parser.add_argument( + '-s', + '--shapes', + '--shacl', + dest='shacl', + action='store', + nargs='?', + help='A file containing the SHACL Shapes Graph.', +) +parser.add_argument( + '-e', + '--ont-graph', + dest='ont', + action='store', + nargs='?', + help='A file path or URL to a document containing extra ontological information. ' + 'RDFS and OWL definitions from this are used to inoculate the DataGraph.', +) +parser.add_argument( + '-i', + '--inference', + dest='inference', + action='store', + default='none', + choices=('none', 'rdfs', 'owlrl', 'both'), + help='Choose a type of inferencing to run against the Data Graph before validating.', +) +parser.add_argument( + '-m', + '--metashacl', + dest='metashacl', + action='store_true', + default=False, + help='Validate the SHACL Shapes graph against the shacl-shacl Shapes Graph before validating the Data Graph.', +) +parser.add_argument( + '-im', + '--imports', + dest='imports', + action='store_true', + default=False, + help='Allow import of sub-graphs defined in statements with owl:imports.', +) +parser.add_argument( + '-a', + '--advanced', + dest='advanced', + action='store_true', + default=False, + help='Enable features from the SHACL Advanced Features specification.', +) +parser.add_argument( + '-j', + '--js', + dest='js', + action='store_true', + default=False, + help='Enable features from the SHACL-JS Specification.', +) +parser.add_argument( + '-it', + '--iterate-rules', + dest='iterate_rules', + action='store_true', + default=False, + help="Run Shape's SHACL Rules iteratively until the data_graph reaches a steady state.", +) +parser.add_argument('--abort', dest='abort', action='store_true', default=False, help='Abort on first invalid data.') +parser.add_argument( + '--allow-info', + '--allow-infos', + dest='allow_infos', + action='store_true', + default=False, + help='Shapes marked with severity of Info will not cause result to be invalid.', +) +parser.add_argument( + '-w', + '--allow-warning', + '--allow-warnings', + dest='allow_warnings', + action='store_true', + default=False, + help='Shapes marked with severity of Warning or Info will not cause result to be invalid.', +) +parser.add_argument( + '--max-depth', + dest='max_depth', + action='store', + nargs='?', + type=int, + help="The maximum number of SHACL shapes \"deep\" that the validator can go before reaching an \"endpoint\" constraint.", +) +parser.add_argument( + '-d', + '--debug', + dest='debug', + action='store_true', + default=False, + help='Output additional verbose runtime messages.', +) +parser.add_argument( + '--focus', + dest='focus', + action='store', + help='Optional IRIs of focus nodes from the DataGraph, the shapes will validate only these node. Comma-separated list.', + nargs="?", + default=None, +) +parser.add_argument( + '--shape', + dest='shape', + action='store', + help='Optional IRIs of a NodeShape or PropertyShape from the SHACL ShapesGraph, only these shapes will be used to validate the DataGraph. Comma-separated list.', + nargs="?", + default=None, +) +parser.add_argument( + '-f', + '--format', + dest='format', + action='store', + help='Choose an output format. Default is \"trig\" for Datasets and \"turtle\" for Graphs.', + default='auto', + choices=('auto', 'turtle', 'xml', 'trig', 'json-ld', 'nt', 'n3', 'nquads'), +) +parser.add_argument( + '-df', + '--data-file-format', + dest='data_file_format', + action='store', + help='Explicitly state the RDF File format of the input DataGraph file. Default=\"auto\".', + default='auto', + choices=('auto', 'turtle', 'xml', 'trig', 'json-ld', 'nt', 'n3', 'nquads'), +) +parser.add_argument( + '-sf', + '--shacl-file-format', + dest='shacl_file_format', + action='store', + help='Explicitly state the RDF File format of the input SHACL file. Default=\"auto\".', + default='auto', + choices=('auto', 'turtle', 'xml', 'trig', 'json-ld', 'nt', 'n3', 'nquads'), +) +parser.add_argument( + '-ef', + '--ont-file-format', + dest='ont_file_format', + action='store', + help='Explicitly state the RDF File format of the extra ontology file. Default=\"auto\".', + default='auto', + choices=('auto', 'turtle', 'xml', 'trig', 'json-ld', 'nt', 'n3', 'nquads'), +) +parser.add_argument('-V', '--version', action=ShowVersion, help='Show PySHACL version and exit.') +parser.add_argument( + '-o', + '--output', + dest='output', + nargs='?', + type=argparse.FileType('w'), + help='Send output to a file (defaults to stdout).', + default=sys.stdout, +) +# parser.add_argument('-h', '--help', action="help", help='Show this help text.') + + +def main(prog: Union[str, None] = None) -> None: + if prog is not None and len(prog) > 0: + parser.prog = prog + + args = parser.parse_args() + if not args.data: + # No datafile give, and not starting in server mode. + sys.stderr.write('Input Error. No DataGraph file or endpoint supplied.\n') + parser.print_usage(sys.stderr) + sys.exit(1) + runner_kwargs = { + 'debug': args.debug, + 'serialize_expanded_graph': True, + } + data_file = None + data_graph: Union[BufferedReader, str] + + try: + data_file = open(args.data, 'rb') + except FileNotFoundError: + sys.stderr.write('Input Error. DataGraph file not found.\n') + sys.exit(1) + except PermissionError: + sys.stderr.write('Input Error. DataGraph file not readable.\n') + sys.exit(1) + else: + # NOTE: This cast is not necessary in Python >= 3.10. + data_graph = cast(BufferedReader, data_file) + if args.shacl is not None: + runner_kwargs['shacl_graph'] = args.shacl + if args.ont is not None: + runner_kwargs['ont_graph'] = args.ont + if args.inference != 'none': + runner_kwargs['inference'] = args.inference + if args.imports: + runner_kwargs['do_owl_imports'] = True + if args.js: + runner_kwargs['js'] = True + if args.focus: + runner_kwargs['focus_nodes'] = [_f.strip() for _f in args.focus.split(',')] + if args.shape: + runner_kwargs['use_shapes'] = [_s.strip() for _s in args.shape.split(',')] + if args.iterate_rules: + runner_kwargs['iterate_rules'] = True + if args.shacl_file_format: + _f: str = args.shacl_file_format + if _f != "auto": + runner_kwargs['shacl_graph_format'] = _f + if args.ont_file_format: + _f = args.ont_file_format + if _f != "auto": + runner_kwargs['ont_graph_format'] = _f + if args.data_file_format: + _f = args.data_file_format + if _f != "auto": + runner_kwargs['data_graph_format'] = _f + if args.format != "auto": + runner_kwargs['serialize_expanded_graph_format'] = args.format + exit_code: Union[int, None] = None + try: + output_txt = shacl_rules(data_graph, **runner_kwargs) + if isinstance(output_txt, BaseException): + raise output_txt + except ValidationFailure as vf: + args.output.write("Rules Runner generated a Validation Failure result:\n") + args.output.write(str(vf.message)) + args.output.write("\n") + exit_code = 1 + except ShapeLoadError as sle: + sys.stderr.write("Rules Runner encountered a Shape Load Error:\n") + sys.stderr.write(str(sle)) + exit_code = 2 + except ConstraintLoadError as cle: + sys.stderr.write("Rules Runner encountered a Constraint Load Error:\n") + sys.stderr.write(str(cle)) + exit_code = 2 + except RuleLoadError as rle: + sys.stderr.write("Rules Runner encountered a Rule Load Error:\n") + sys.stderr.write(str(rle)) + exit_code = 2 + except ReportableRuntimeError as rre: + sys.stderr.write("Rules Runner encountered a Runtime Error:\n") + sys.stderr.write(str(rre.message)) + sys.stderr.write("\nIf you believe this is a bug in pyshacl, open an Issue on the pyshacl github page.\n") + exit_code = 2 + except NotImplementedError as nie: + sys.stderr.write("Rules Runner feature is not implemented:\n") + if len(nie.args) > 0: + sys.stderr.write(str(nie.args[0])) + else: + sys.stderr.write("No message provided.") + sys.stderr.write("\nIf your use-case requires this feature, open an Issue on the pyshacl github page.\n") + exit_code = 3 + except RuntimeError as re: + import traceback + + traceback.print_tb(re.__traceback__) + sys.stderr.write( + "\n\nRules Runner encountered a Runtime Error. Please report this to the PySHACL issue tracker.\n" + ) + exit_code = 2 + finally: + if data_file is not None: + try: + data_file.close() + except Exception as e: + sys.stderr.write("Error closing data file:\n") + sys.stderr.write(str(e)) + if exit_code is not None: + sys.exit(exit_code) + + if isinstance(output_txt, bytes): + output_unicode = output_txt.decode('utf-8') + else: + output_unicode = output_txt + args.output.write(output_unicode) + args.output.close() + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/pyshacl/entrypoints.py b/pyshacl/entrypoints.py new file mode 100644 index 0000000..bb43222 --- /dev/null +++ b/pyshacl/entrypoints.py @@ -0,0 +1,343 @@ +import logging +import os +import sys +from functools import wraps +from io import BufferedIOBase, TextIOBase +from sys import stderr +from typing import List, Optional, Tuple, Union + +from rdflib import ConjunctiveGraph, Dataset, Graph, Literal, URIRef + +from pyshacl.errors import ReportableRuntimeError, ValidationFailure +from pyshacl.pytypes import GraphLike + +from .consts import SH, RDF_type +from .monkey import apply_patches, rdflib_bool_patch, rdflib_bool_unpatch +from .rdfutil import load_from_source +from .rule_expand_runner import RuleExpandRunner +from .validator import Validator, assign_baked_in +from .validator_conformance import check_dash_result + + +def validate( + data_graph: Union[GraphLike, BufferedIOBase, TextIOBase, str, bytes], + *args, + shacl_graph: Optional[Union[GraphLike, BufferedIOBase, TextIOBase, str, bytes]] = None, + ont_graph: Optional[Union[GraphLike, BufferedIOBase, TextIOBase, str, bytes]] = None, + advanced: Optional[bool] = False, + inference: Optional[str] = None, + inplace: Optional[bool] = False, + abort_on_first: Optional[bool] = False, + allow_infos: Optional[bool] = False, + allow_warnings: Optional[bool] = False, + max_validation_depth: Optional[int] = None, + sparql_mode: Optional[bool] = False, + focus_nodes: Optional[List[Union[str, URIRef]]] = None, + use_shapes: Optional[List[Union[str, URIRef]]] = None, + **kwargs, +): + """ + :param data_graph: rdflib.Graph or file path or web url of the data to validate + :type data_graph: rdflib.Graph | str | bytes + :param args: + :type args: list + :param shacl_graph: rdflib.Graph or file path or web url of the SHACL Shapes graph to use to + validate the data graph + :type shacl_graph: rdflib.Graph | str | bytes + :param ont_graph: rdflib.Graph or file path or web url of an extra ontology document to mix into the data graph + :type ont_graph: rdflib.Graph | str | bytes + :param advanced: Enable advanced SHACL features, default=False + :type advanced: bool | None + :param inference: One of "rdfs", "owlrl", "both", "none", or None + :type inference: str | None + :param inplace: If this is enabled, do not clone the datagraph, manipulate it in-place + :type inplace: bool + :param abort_on_first: Stop evaluating constraints after first violation is found + :type abort_on_first: bool | None + :param allow_infos: Shapes marked with severity of sh:Info will not cause result to be invalid. + :type allow_infos: bool | None + :param allow_warnings: Shapes marked with severity of sh:Warning or sh:Info will not cause result to be invalid. + :type allow_warnings: bool | None + :param max_validation_depth: The maximum number of SHACL shapes "deep" that the validator can go before reaching an "endpoint" constraint. + :type max_validation_depth: int | None + :param sparql_mode: Treat the DataGraph as a SPARQL endpoint, validate the graph at the SPARQL endpoint. + :type sparql_mode: bool | None + :param focus_nodes: A list of IRIs to validate only those nodes. + :type focus_nodes: list | None + :param use_shapes: A list of IRIs to use only those shapes from the SHACL ShapesGraph. + :type use_shapes: list | None + :param kwargs: + :return: + """ + + do_debug = kwargs.get('debug', False) + log = make_default_logger(name="pyshacl-validate", debug=do_debug) + apply_patches() + assign_baked_in() + do_check_dash_result = kwargs.pop('check_dash_result', False) # type: bool + if kwargs.get('meta_shacl', False): + to_meta_val = shacl_graph or data_graph + conforms, v_r, v_t = meta_validate(to_meta_val, inference=inference, **kwargs) + if not conforms: + msg = f"SHACL File does not validate against the SHACL Shapes SHACL (MetaSHACL) file.\n{v_t}" + log.error(msg) + raise ReportableRuntimeError(msg) + do_owl_imports = kwargs.pop('do_owl_imports', False) + data_graph_format = kwargs.pop('data_graph_format', None) + + if isinstance(data_graph, (str, bytes, BufferedIOBase, TextIOBase)): + # DataGraph is passed in as Text. It is not an rdflib.Graph + # That means we load it into an ephemeral graph at runtime + # that means we don't need to make a copy to prevent polluting it. + ephemeral = True + else: + ephemeral = False + use_js = kwargs.pop('js', None) + if sparql_mode: + if use_js: + raise ReportableRuntimeError("Cannot use SHACL-JS in SPARQL Remote Graph Mode.") + if inplace: + raise ReportableRuntimeError("Cannot use inplace mode in SPARQL Remote Graph Mode.") + if ont_graph is not None: + raise ReportableRuntimeError("Cannot use SPARQL Remote Graph Mode with extra Ontology Graph inoculation.") + if isinstance(data_graph, bytes): + data_graph = data_graph.decode('utf-8') + else: + data_graph = data_graph + ephemeral = False + inplace = True + if ( + sparql_mode + and isinstance(data_graph, str) + and (data_graph.lower().startswith("http:") or data_graph.lower().startswith("https:")) + ): + from rdflib.plugins.stores.sparqlstore import SPARQLStore + + query_endpoint: str = data_graph + username = os.getenv("PYSHACL_SPARQL_USERNAME", "") + method = os.getenv("PYSHACL_SPARQL_METHOD", "GET") + auth: Optional[Tuple[str, str]] + if username: + password: str = os.getenv("PYSHACL_SPARQL_PASSWORD", "") + auth = (username, password) + else: + auth = None + store = SPARQLStore(query_endpoint=query_endpoint, auth=auth, method=method) + loaded_dg = Dataset(store=store, default_union=True) + else: + # force no owl imports on data_graph + loaded_dg = load_from_source( + data_graph, rdf_format=data_graph_format, multigraph=True, do_owl_imports=False, logger=log + ) + ont_graph_format = kwargs.pop('ont_graph_format', None) + if ont_graph is not None: + loaded_og = load_from_source( + ont_graph, rdf_format=ont_graph_format, multigraph=True, do_owl_imports=do_owl_imports, logger=log + ) + else: + loaded_og = None + shacl_graph_format = kwargs.pop('shacl_graph_format', None) + if shacl_graph is not None: + rdflib_bool_patch() + loaded_sg = load_from_source( + shacl_graph, rdf_format=shacl_graph_format, multigraph=True, do_owl_imports=do_owl_imports, logger=log + ) + rdflib_bool_unpatch() + else: + loaded_sg = None + iterate_rules = kwargs.pop('iterate_rules', False) + if "abort_on_error" in kwargs: + log.warning("Usage of abort_on_error is deprecated. Use abort_on_first instead.") + ae = kwargs.pop("abort_on_error") + abort_on_first = bool(abort_on_first) or bool(ae) + validator_options_dict = { + 'debug': do_debug or False, + 'inference': inference, + 'inplace': inplace or ephemeral, + 'abort_on_first': abort_on_first, + 'allow_infos': allow_infos, + 'allow_warnings': allow_warnings, + 'advanced': advanced, + 'iterate_rules': iterate_rules, + 'use_js': use_js, + 'sparql_mode': sparql_mode, + 'logger': log, + 'focus_nodes': focus_nodes, + 'use_shapes': use_shapes, + } + if max_validation_depth is not None: + validator_options_dict['max_validation_depth'] = max_validation_depth + validator = None + try: + validator = Validator( + loaded_dg, + shacl_graph=loaded_sg, + ont_graph=loaded_og, + options=validator_options_dict, + ) + conforms, report_graph, report_text = validator.run() + except ValidationFailure as e: + conforms = False + report_graph = e + report_text = "Validation Failure - {}".format(e.message) + if do_check_dash_result and validator is not None: + passes = check_dash_result(validator, report_graph, loaded_sg or loaded_dg) + return passes, report_graph, report_text + do_serialize_report_graph = kwargs.pop('serialize_report_graph', False) + if do_serialize_report_graph and isinstance(report_graph, Graph): + if not (isinstance(do_serialize_report_graph, str)): + do_serialize_report_graph = 'turtle' + report_graph = report_graph.serialize(None, encoding='utf-8', format=do_serialize_report_graph) + return conforms, report_graph, report_text + + +def with_metashacl_shacl_graph_cache(f): + # noinspection PyPep8Naming + EMPTY = object() + + @wraps(f) + def wrapped(*args, **kwargs): + graph_cache = getattr(wrapped, "graph_cache", None) + assert graph_cache is not None + if graph_cache is EMPTY: + import pickle + + if getattr(sys, 'frozen', False): + # runs in a pyinstaller bundle + here_dir = sys._MEIPASS + else: + here_dir = os.path.dirname(__file__) + pickle_file = os.path.join(here_dir, "assets", "shacl-shacl.pickle") + with open(pickle_file, 'rb') as shacl_pickle: + u = pickle.Unpickler(shacl_pickle, fix_imports=False) + shacl_shacl_store, identifier = u.load() + shacl_shacl_graph = Graph(store=shacl_shacl_store, identifier=identifier) + setattr(wrapped, "graph_cache", shacl_shacl_graph) + return f(*args, **kwargs) + + setattr(wrapped, "graph_cache", EMPTY) + return wrapped + + +@with_metashacl_shacl_graph_cache +def meta_validate(shacl_graph: Union[GraphLike, str], inference: Optional[str] = 'rdfs', **kwargs): + shacl_shacl_graph = meta_validate.graph_cache + shacl_graph = load_from_source(shacl_graph, rdf_format=kwargs.pop('shacl_graph_format', None), multigraph=True) + _ = kwargs.pop('meta_shacl', None) + return validate(shacl_graph, shacl_graph=shacl_shacl_graph, inference=inference, **kwargs) + + +def make_default_logger(name: Union[str, None] = None, debug: bool = False) -> logging.Logger: + log_handler = logging.StreamHandler(stderr) + log = logging.getLogger() + for h in log.handlers: + log.removeHandler(h) # pragma:no cover + log.addHandler(log_handler) + log.setLevel(logging.INFO if not debug else logging.DEBUG) + log_handler.setLevel(logging.INFO if not debug else logging.DEBUG) + return log + + +def shacl_rules( + data_graph: Union[GraphLike, BufferedIOBase, TextIOBase, str, bytes], + *args, + shacl_graph: Optional[Union[GraphLike, BufferedIOBase, TextIOBase, str, bytes]] = None, + ont_graph: Optional[Union[GraphLike, BufferedIOBase, TextIOBase, str, bytes]] = None, + inference: Optional[str] = None, + inplace: Optional[bool] = False, + focus_nodes: Optional[List[Union[str, URIRef]]] = None, + use_shapes: Optional[List[Union[str, URIRef]]] = None, + **kwargs, +) -> Union[str, GraphLike]: + """ + :param data_graph: rdflib.Graph or file path or web url of the data to validate + :type data_graph: rdflib.Graph | str | bytes + :param args: + :type args: list + :param shacl_graph: rdflib.Graph or file path or web url of the SHACL Shapes graph to use to + validate the data graph + :type shacl_graph: rdflib.Graph | str | bytes + :param ont_graph: rdflib.Graph or file path or web url of an extra ontology document to mix into the data graph + :type ont_graph: rdflib.Graph | str | bytes + :param inference: One of "rdfs", "owlrl", "both", "none", or None + :type inference: str | None + :param inplace: If this is enabled, do not clone the datagraph, manipulate it in-place + :type inplace: bool + :param focus_nodes: A list of IRIs to validate only those nodes. + :type focus_nodes: list | None + :param use_shapes: A list of IRIs to use only those shapes from the SHACL ShapesGraph. + :type use_shapes: list | None + :param kwargs: + :return: + """ + + do_debug = kwargs.get('debug', False) + log = make_default_logger(name="pyshacl-rules", debug=do_debug) + apply_patches() + assign_baked_in() + do_owl_imports = kwargs.pop('do_owl_imports', False) + data_graph_format = kwargs.pop('data_graph_format', None) + if kwargs.get('sparql_mode', None): + raise ReportableRuntimeError("The SHACL Rules expander cannot be used in SPARQL Remote Graph Mode.") + if isinstance(data_graph, (str, bytes, BufferedIOBase, TextIOBase)): + # DataGraph is passed in as Text. It is not a rdflib.Graph + # That means we load it into an ephemeral graph at runtime + # that means we don't need to make a copy to prevent polluting it. + ephemeral = True + else: + ephemeral = False + use_js = kwargs.pop('js', None) + # force no owl imports on data_graph + loaded_dg = load_from_source( + data_graph, rdf_format=data_graph_format, multigraph=True, do_owl_imports=False, logger=log + ) + ont_graph_format = kwargs.pop('ont_graph_format', None) + if ont_graph is not None: + loaded_og = load_from_source( + ont_graph, rdf_format=ont_graph_format, multigraph=True, do_owl_imports=do_owl_imports, logger=log + ) + else: + loaded_og = None + shacl_graph_format = kwargs.pop('shacl_graph_format', None) + if shacl_graph is not None: + rdflib_bool_patch() + loaded_sg = load_from_source( + shacl_graph, rdf_format=shacl_graph_format, multigraph=True, do_owl_imports=do_owl_imports, logger=log + ) + rdflib_bool_unpatch() + else: + loaded_sg = None + iterate_rules = kwargs.pop('iterate_rules', False) + runner_options_dict = { + 'debug': do_debug or False, + 'inference': inference, + 'inplace': inplace or ephemeral, + 'iterate_rules': iterate_rules, + 'use_js': use_js, + 'logger': log, + 'focus_nodes': focus_nodes, + 'use_shapes': use_shapes, + } + serialize_expanded_graph = kwargs.get('serialize_expanded_graph', None) + try: + runner = RuleExpandRunner( + loaded_dg, + shacl_graph=loaded_sg, + ont_graph=loaded_og, + options=runner_options_dict, + ) + expanded_graph = runner.run() + except ValidationFailure as e: + error = "SHACL Rules Expansion Failure - {}".format(e.message) + if serialize_expanded_graph: + return error + else: + g = Graph() + g.add((URIRef(""), RDF_type, SH.ValidationFailure)) + g.add((URIRef(""), SH.message, Literal(error))) + return g + if serialize_expanded_graph: + guess_format = "trig" if isinstance(expanded_graph, (Dataset, ConjunctiveGraph)) else "turtle" + serialize_format = kwargs.get('serialize_expanded_graph_format', guess_format) + return expanded_graph.serialize(format=serialize_format) + return expanded_graph diff --git a/pyshacl/extras/js/rules.py b/pyshacl/extras/js/rules.py index 5d2548c..bd16557 100644 --- a/pyshacl/extras/js/rules.py +++ b/pyshacl/extras/js/rules.py @@ -1,6 +1,9 @@ # # import typing +from typing import List, Sequence, Union + +import rdflib from pyshacl.consts import SH from pyshacl.errors import ReportableRuntimeError @@ -9,7 +12,8 @@ from .js_executable import JSExecutable if typing.TYPE_CHECKING: - from pyshacl.pytypes import GraphLike, SHACLExecutor + + from pyshacl.pytypes import GraphLike, RDFNode, SHACLExecutor from pyshacl.shape import Shape from pyshacl.shapes_graph import ShapesGraph @@ -24,8 +28,25 @@ def __init__(self, executor: 'SHACLExecutor', shape: 'Shape', rule_node, **kwarg shapes_graph: 'ShapesGraph' = shape.sg self.js_exe = JSExecutable(shapes_graph, rule_node) - def apply(self, data_graph: 'GraphLike') -> int: - focus_nodes = self.shape.focus_nodes(data_graph) # uses target nodes to find focus nodes + def apply( + self, + data_graph: 'GraphLike', + focus_nodes: Union[Sequence['RDFNode'], None] = None, + ) -> int: + focus_list: Sequence['RDFNode'] + if focus_nodes is not None: + focus_list = list(focus_nodes) + else: + focus_list = list(self.shape.focus_nodes(data_graph)) + if self.executor.focus_nodes is not None and len(self.executor.focus_nodes) > 0: + filtered_focus_nodes: List[Union[rdflib.URIRef]] = [] + for _fo in focus_list: # type: RDFNode + if isinstance(_fo, rdflib.URIRef) and _fo in self.executor.focus_nodes: + filtered_focus_nodes.append(_fo) + len_filtered_focus = len(filtered_focus_nodes) + if len_filtered_focus < 1: + return 0 + focus_list = filtered_focus_nodes all_added = 0 iterate_limit = 100 while True: @@ -33,7 +54,7 @@ def apply(self, data_graph: 'GraphLike') -> int: raise ReportableRuntimeError("Local rule iteration exceeded iteration limit of 100.") iterate_limit -= 1 added = 0 - applicable_nodes = self.filter_conditions(focus_nodes, data_graph) + applicable_nodes = self.filter_conditions(focus_list, data_graph) sets_to_add = [] for a in applicable_nodes: args_map = {"this": a} diff --git a/pyshacl/rule_expand_runner.py b/pyshacl/rule_expand_runner.py new file mode 100644 index 0000000..269deec --- /dev/null +++ b/pyshacl/rule_expand_runner.py @@ -0,0 +1,293 @@ +# -*- coding: utf-8 -*- +# +import logging +from os import getenv +from typing import Any, Dict, List, Optional, Sequence, Union + +import rdflib +from rdflib import URIRef + +from .consts import ( + env_truths, +) +from .errors import ReportableRuntimeError +from .extras import check_extra_installed +from .functions import apply_functions, gather_functions, unapply_functions +from .pytypes import GraphLike, SHACLExecutor +from .rdfutil import ( + clone_graph, + inoculate, + inoculate_dataset, + mix_datasets, + mix_graphs, +) +from .rules import apply_rules, gather_rules +from .run_type import PySHACLRunType +from .shapes_graph import ShapesGraph +from .target import apply_target_types, gather_target_types + +USE_FULL_MIXIN = getenv("PYSHACL_USE_FULL_MIXIN") in env_truths + + +class RuleExpandRunner(PySHACLRunType): + def __init__( + self, + data_graph: GraphLike, + *args, + shacl_graph: Optional[GraphLike] = None, + ont_graph: Optional[GraphLike] = None, + options: Optional[Dict[str, Any]] = None, + **kwargs, + ): + options = options or {} + self._load_default_options(options) + self.options = options # type: dict + self.logger = options['logger'] # type: logging.Logger + self.debug = options['debug'] + self.pre_inferenced = kwargs.pop('pre_inferenced', False) + self.inplace = options['inplace'] + if not isinstance(data_graph, rdflib.Graph): + raise RuntimeError("data_graph must be a rdflib Graph-like object") + self.data_graph = data_graph # type: GraphLike + self._target_graph = None + self.ont_graph = ont_graph # type: Optional[GraphLike] + self.data_graph_is_multigraph = isinstance(self.data_graph, (rdflib.Dataset, rdflib.ConjunctiveGraph)) + if self.ont_graph is not None and isinstance(self.ont_graph, (rdflib.Dataset, rdflib.ConjunctiveGraph)): + self.ont_graph.default_union = True + if shacl_graph is None: + shacl_graph = clone_graph(data_graph, identifier='shacl') + assert isinstance(shacl_graph, rdflib.Graph), "shacl_graph must be a rdflib Graph object" + self.shacl_graph = ShapesGraph(shacl_graph, self.debug, self.logger) # type: ShapesGraph + + if options['use_js']: + is_js_installed = check_extra_installed('js') + if is_js_installed: + self.shacl_graph.enable_js() + + @classmethod + def _load_default_options(cls, options_dict: dict): + options_dict.setdefault('debug', False) + options_dict.setdefault('inference', 'none') + options_dict.setdefault('inplace', False) + options_dict.setdefault('use_js', False) + options_dict.setdefault('iterate_rules', False) + options_dict.setdefault('focus_nodes', None) + options_dict.setdefault('use_shapes', None) + if 'logger' not in options_dict: + options_dict['logger'] = logging.getLogger(__name__) + if options_dict['debug']: + options_dict['logger'].setLevel(logging.DEBUG) + + @classmethod + def _run_pre_inference( + cls, target_graph: GraphLike, inference_option: str, logger: Optional[logging.Logger] = None + ): + """ + Note, this is the OWL/RDFS pre-inference, + it is not the Advanced Spec SHACL-Rule inferencing step. + :param target_graph: + :type target_graph: rdflib.Graph|rdflib.ConjunctiveGraph|rdflib.Dataset + :param inference_option: + :type inference_option: str + :return: + :rtype: NoneType + """ + # Lazy import owlrl + import owlrl + + from .inference import CustomRDFSOWLRLSemantics, CustomRDFSSemantics + + if logger is None: + logger = logging.getLogger(__name__) + try: + if inference_option == 'rdfs': + inferencer = owlrl.DeductiveClosure(CustomRDFSSemantics) + elif inference_option == 'owlrl': + inferencer = owlrl.DeductiveClosure(owlrl.OWLRL_Semantics) + elif inference_option == 'both' or inference_option == 'all' or inference_option == 'rdfsowlrl': + inferencer = owlrl.DeductiveClosure(CustomRDFSOWLRLSemantics) + else: + raise ReportableRuntimeError("Don't know how to do '{}' type inferencing.".format(inference_option)) + except Exception as e: # pragma: no cover + logger.error("Error during creation of OWL-RL Deductive Closure") + if isinstance(e, ReportableRuntimeError): + raise e + raise ReportableRuntimeError( + "Error during creation of OWL-RL Deductive Closure\n{}".format(str(e.args[0])) + ) + if isinstance(target_graph, (rdflib.Dataset, rdflib.ConjunctiveGraph)): + named_graphs = [] + for i in target_graph.store.contexts(None): + if isinstance(i, rdflib.Graph): + named_graphs.append(i) + else: + named_graphs.append( + rdflib.Graph(target_graph.store, i, namespace_manager=target_graph.namespace_manager) + ) + else: + named_graphs = [target_graph] + try: + for g in named_graphs: + inferencer.expand(g) + except Exception as e: # pragma: no cover + logger.error("Error while running OWL-RL Deductive Closure") + raise ReportableRuntimeError("Error while running OWL-RL Deductive Closure\n{}".format(str(e.args[0]))) + + @property + def target_graph(self): + return self._target_graph + + def mix_in_ontology(self): + if USE_FULL_MIXIN: + if not self.data_graph_is_multigraph: + return mix_graphs(self.data_graph, self.ont_graph, "inplace" if self.inplace else None) + return mix_datasets(self.data_graph, self.ont_graph, "inplace" if self.inplace else None) + if not self.data_graph_is_multigraph: + if self.inplace: + to_graph = self.data_graph + else: + to_graph = clone_graph(self.data_graph, identifier=self.data_graph.identifier) + return inoculate(to_graph, self.ont_graph) + return inoculate_dataset(self.data_graph, self.ont_graph, self.data_graph if self.inplace else None) + + def make_executor(self) -> SHACLExecutor: + return SHACLExecutor( + validator=self, + advanced_mode=True, + abort_on_first=False, + allow_infos=False, + allow_warnings=False, + iterate_rules=bool(self.options.get("iterate_rules", False)), + sparql_mode=False, + max_validation_depth=999, + focus_nodes=self.options.get("focus_nodes", None), + debug=self.debug, + ) + + def run(self) -> GraphLike: + if self.target_graph is not None: + # Target graph is already set up with pre-inferenced and pre-cloned data_graph + the_target_graph = self.target_graph + else: + has_cloned = False + if self.ont_graph is not None: + if self.inplace: + self.logger.debug("Adding ontology definitions to DataGraph") + else: + self.logger.debug("Cloning DataGraph to temporary memory graph, to add ontology definitions.") + # creates a copy of self.data_graph, doesn't modify it + the_target_graph = self.mix_in_ontology() + has_cloned = True + else: + the_target_graph = self.data_graph + inference_option = self.options.get('inference', 'none') + if self.inplace and self.debug: + self.logger.debug("Skipping DataGraph clone because PySHACL is operating in inplace mode.") + if inference_option and not self.pre_inferenced and str(inference_option) != "none": + if not has_cloned and not self.inplace: + self.logger.debug("Cloning DataGraph to temporary memory graph before pre-inferencing.") + the_target_graph = clone_graph(the_target_graph) + has_cloned = True + self.logger.debug(f"Running pre-inferencing with option='{inference_option}'.") + self._run_pre_inference(the_target_graph, inference_option, logger=self.logger) + self.pre_inferenced = True + if not has_cloned and not self.inplace: + # We still need to clone in advanced mode, because of triple rules + self.logger.debug( + "Forcing clone of DataGraph because expanding rules cannot modify the input datagraph." + ) + the_target_graph = clone_graph(the_target_graph) + has_cloned = True + self._target_graph = the_target_graph + + if self.options.get("use_shapes", None) is not None and len(self.options["use_shapes"]) > 0: + using_manually_specified_shapes = True + expanded_use_shapes = [] + for s in self.options["use_shapes"]: + s_lower = s.lower() + if ( + s_lower.startswith("http:") + or s_lower.startswith("https:") + or s_lower.startswith("urn:") + or s_lower.startswith("file:") + ): + expanded_use_shapes.append(URIRef(s)) + else: + try: + expanded_use_shape = self.shacl_graph.graph.namespace_manager.expand_curie(s) + except ValueError: + expanded_use_shape = URIRef(s) + expanded_use_shapes.append(expanded_use_shape) + shapes = self.shacl_graph.shapes_from_uris(expanded_use_shapes) + else: + using_manually_specified_shapes = False + shapes = self.shacl_graph.shapes # This property getter triggers shapes harvest. + option_focus_nodes = self.options.get("focus_nodes", None) + if option_focus_nodes is not None and len(option_focus_nodes) > 0: + # Expand any CURIEs in the focus_nodes list + expanded_focus_nodes: List[URIRef] = [] + for f in option_focus_nodes: + f_lower = f.lower() + if ( + f_lower.startswith("http:") + or f_lower.startswith("https:") + or f_lower.startswith("urn:") + or f_lower.startswith("file:") + ): + expanded_focus_nodes.append(URIRef(f)) + else: + try: + expanded_focus_node = self.target_graph.namespace_manager.expand_curie(f) + except ValueError: + expanded_focus_node = URIRef(f) + expanded_focus_nodes.append(expanded_focus_node) + self.options["focus_nodes"] = expanded_focus_nodes + specified_focus_nodes: Union[None, Sequence[URIRef]] = expanded_focus_nodes + else: + specified_focus_nodes = None + executor = self.make_executor() + # Special hack, if we are using manually specified shapes, and have + # manually specified focus nodes, then we need to disable the + # focus_nodes in the executor, because we apply the specified focus + # nodes directly to the specified shapes. + if using_manually_specified_shapes and specified_focus_nodes is not None: + executor.focus_nodes = None + self.logger.debug("Activating SHACL-AF Features.") + target_types = gather_target_types(self.shacl_graph) + gather_from_shapes = None if not using_manually_specified_shapes else [s.node for s in shapes] + gathered_functions = gather_functions(executor, self.shacl_graph) + gathered_rules = gather_rules(executor, self.shacl_graph, from_shapes=gather_from_shapes) + + for s in shapes: + s.set_advanced(True) + apply_target_types(target_types) + if isinstance(the_target_graph, (rdflib.Dataset, rdflib.ConjunctiveGraph)): + named_graphs = [ + ( + rdflib.Graph(the_target_graph.store, i, namespace_manager=the_target_graph.namespace_manager) # type: ignore[arg-type] + if not isinstance(i, rdflib.Graph) + else i + ) + for i in the_target_graph.store.contexts(None) + ] + else: + named_graphs = [the_target_graph] + if specified_focus_nodes is not None and using_manually_specified_shapes: + on_focus_nodes: Union[Sequence[URIRef], None] = specified_focus_nodes + else: + on_focus_nodes = None + if self.debug: + self.logger.debug(f"Will run SHACL Rules expansion on {len(named_graphs)} named graph/s.") + for g in named_graphs: + if self.debug: + self.logger.debug(f"Running SHACL Rules on DataGraph named {g.identifier}") + if gathered_functions: + apply_functions(executor, gathered_functions, g) + try: + if gathered_rules: + apply_rules(executor, gathered_rules, g, focus_nodes=on_focus_nodes) + finally: + if gathered_functions: + unapply_functions(gathered_functions, g) + + return the_target_graph diff --git a/pyshacl/rules/__init__.py b/pyshacl/rules/__init__.py index f25fd3e..3eae4cc 100644 --- a/pyshacl/rules/__init__.py +++ b/pyshacl/rules/__init__.py @@ -1,10 +1,12 @@ # -*- coding: utf-8 -*- from collections import defaultdict -from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Type, Union +from typing import TYPE_CHECKING, Any, Dict, List, Sequence, Tuple, Type, Union + +from rdflib import BNode, URIRef from pyshacl.consts import RDF_type, SH_rule, SH_SPARQLRule, SH_TripleRule from pyshacl.errors import ReportableRuntimeError, RuleLoadError -from pyshacl.pytypes import GraphLike, SHACLExecutor +from pyshacl.pytypes import GraphLike, RDFNode, SHACLExecutor from pyshacl.rules.sparql import SPARQLRule from pyshacl.rules.triple import TripleRule @@ -15,7 +17,11 @@ from .shacl_rule import SHACLRule -def gather_rules(executor: SHACLExecutor, shacl_graph: 'ShapesGraph') -> Dict['Shape', List['SHACLRule']]: +def gather_rules( + executor: SHACLExecutor, + shacl_graph: 'ShapesGraph', + from_shapes: Union[Sequence[Union[URIRef, BNode]], None] = None, +) -> Dict['Shape', List['SHACLRule']]: """ :param executor: :type executor: SHACLExecutor @@ -55,6 +61,9 @@ def gather_rules(executor: SHACLExecutor, shacl_graph: 'ShapesGraph') -> Dict['S used_rules = shacl_graph.subject_objects(SH_rule) ret_rules = defaultdict(list) for sub, obj in used_rules: + if from_shapes is not None and sub not in from_shapes: + # Skipping rule that is not in the whitelist of Shapes to use + continue try: shape: Shape = shacl_graph.lookup_shape_from_node(sub) except (AttributeError, KeyError): @@ -77,7 +86,12 @@ def gather_rules(executor: SHACLExecutor, shacl_graph: 'ShapesGraph') -> Dict['S return ret_rules -def apply_rules(executor: SHACLExecutor, shapes_rules: Dict, data_graph: GraphLike) -> int: +def apply_rules( + executor: SHACLExecutor, + shapes_rules: Dict, + data_graph: GraphLike, + focus_nodes: Union[Sequence[RDFNode], None] = None, +) -> int: # short the shapes dict by shapes sh:order before execution sorted_shapes_rules: List[Tuple[Any, Any]] = sorted(shapes_rules.items(), key=lambda x: x[0].order) total_modified = 0 @@ -93,7 +107,7 @@ def apply_rules(executor: SHACLExecutor, shapes_rules: Dict, data_graph: GraphLi for r in rules: if r.deactivated: continue - n_modified = r.apply(data_graph) + n_modified = r.apply(data_graph, focus_nodes=focus_nodes) this_modified += n_modified if this_modified > 0: total_modified += this_modified diff --git a/pyshacl/rules/shacl_rule.py b/pyshacl/rules/shacl_rule.py index 2d8c69c..0014823 100644 --- a/pyshacl/rules/shacl_rule.py +++ b/pyshacl/rules/shacl_rule.py @@ -1,11 +1,12 @@ # -*- coding: utf-8 -*- from decimal import Decimal +from typing import Sequence, Union from rdflib import RDF, Literal from pyshacl.consts import SH_condition, SH_deactivated, SH_order from pyshacl.errors import RuleLoadError -from pyshacl.pytypes import SHACLExecutor +from pyshacl.pytypes import RDFNode, SHACLExecutor RDF_first = RDF.first @@ -96,7 +97,7 @@ def get_conditions(self): conditions.append(condition) return conditions - def filter_conditions(self, focus_nodes, data_graph): + def filter_conditions(self, focus_nodes: Sequence[RDFNode], data_graph): conditions = self.get_conditions() applicable_focus_nodes = [] for f in focus_nodes: @@ -108,5 +109,9 @@ def filter_conditions(self, focus_nodes, data_graph): applicable_focus_nodes.append(f) return applicable_focus_nodes - def apply(self, data_graph): + def apply( + self, + data_graph, + focus_nodes: Union[Sequence[RDFNode], None] = None, + ): raise NotImplementedError() diff --git a/pyshacl/rules/sparql/__init__.py b/pyshacl/rules/sparql/__init__.py index 0d9f23c..9942a9a 100644 --- a/pyshacl/rules/sparql/__init__.py +++ b/pyshacl/rules/sparql/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, List, Sequence, Union import rdflib from rdflib import Literal @@ -13,7 +13,7 @@ from ..shacl_rule import SHACLRule if TYPE_CHECKING: - from pyshacl.pytypes import GraphLike, SHACLExecutor + from pyshacl.pytypes import GraphLike, RDFNode, SHACLExecutor from pyshacl.shape import Shape XSD_string = XSD.string @@ -49,8 +49,25 @@ def __init__(self, executor: 'SHACLExecutor', shape: 'Shape', rule_node: 'rdflib query_helper.collect_prefixes() self._qh = query_helper - def apply(self, data_graph: 'GraphLike') -> int: - focus_nodes = self.shape.focus_nodes(data_graph) # uses target nodes to find focus nodes + def apply( + self, + data_graph: 'GraphLike', + focus_nodes: Union[Sequence['RDFNode'], None] = None, + ) -> int: + focus_list: Sequence['RDFNode'] + if focus_nodes is not None: + focus_list = list(focus_nodes) + else: + focus_list = list(self.shape.focus_nodes(data_graph)) + if self.executor.focus_nodes is not None and len(self.executor.focus_nodes) > 0: + filtered_focus_nodes: List[Union[rdflib.URIRef]] = [] + for _fo in focus_list: # type: RDFNode + if isinstance(_fo, rdflib.URIRef) and _fo in self.executor.focus_nodes: + filtered_focus_nodes.append(_fo) + len_filtered_focus = len(filtered_focus_nodes) + if len_filtered_focus < 1: + return 0 + focus_list = filtered_focus_nodes all_added = 0 SPARQLQueryHelper = get_query_helper_cls() iterate_limit = 100 @@ -59,7 +76,7 @@ def apply(self, data_graph: 'GraphLike') -> int: raise ReportableRuntimeError("Local SPARQLRule iteration exceeded iteration limit of 100.") iterate_limit -= 1 added = 0 - applicable_nodes = self.filter_conditions(focus_nodes, data_graph) + applicable_nodes = self.filter_conditions(focus_list, data_graph) construct_graphs = set() for a in applicable_nodes: for c in self._constructs: diff --git a/pyshacl/rules/triple/__init__.py b/pyshacl/rules/triple/__init__.py index 88979e8..1dd25f2 100644 --- a/pyshacl/rules/triple/__init__.py +++ b/pyshacl/rules/triple/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- import itertools -from typing import TYPE_CHECKING, Tuple, cast +from typing import TYPE_CHECKING, List, Sequence, Tuple, Union, cast import rdflib @@ -10,9 +10,8 @@ from pyshacl.rules.shacl_rule import SHACLRule if TYPE_CHECKING: - from rdflib.term import Node - from pyshacl.pytypes import GraphLike, SHACLExecutor + from pyshacl.pytypes import GraphLike, RDFNode, SHACLExecutor from pyshacl.shape import Shape @@ -50,9 +49,27 @@ def __init__(self, executor: 'SHACLExecutor', shape: 'Shape', rule_node: 'rdflib raise RuntimeError("Too many sh:object") self.o = next(iter(my_object_nodes)) - def apply(self, data_graph: 'GraphLike') -> int: - focus_nodes = self.shape.focus_nodes(data_graph) # uses target nodes to find focus nodes - applicable_nodes = self.filter_conditions(focus_nodes, data_graph) + def apply( + self, + data_graph: 'GraphLike', + focus_nodes: Union[Sequence['RDFNode'], None] = None, + ) -> int: + focus_list: Sequence['RDFNode'] + if focus_nodes is not None: + focus_list = list(focus_nodes) + else: + focus_list = list(self.shape.focus_nodes(data_graph)) + if self.executor.focus_nodes is not None and len(self.executor.focus_nodes) > 0: + filtered_focus_nodes: List[Union[rdflib.URIRef]] = [] + for _fo in focus_list: # type: RDFNode + if isinstance(_fo, rdflib.URIRef) and _fo in self.executor.focus_nodes: + filtered_focus_nodes.append(_fo) + len_filtered_focus = len(filtered_focus_nodes) + if len_filtered_focus < 1: + return 0 + focus_list = filtered_focus_nodes + # uses target nodes to find focus nodes + applicable_nodes = self.filter_conditions(focus_list, data_graph) all_added = 0 iterate_limit = 100 while True: @@ -75,7 +92,7 @@ def apply(self, data_graph: 'GraphLike') -> int: added += 1 if added > 0: for i in to_add: - data_graph.add(cast(Tuple['Node', 'Node', 'Node'], i)) + data_graph.add(cast(Tuple['RDFNode', 'RDFNode', 'RDFNode'], i)) all_added += added if self.iterate: continue # Jump up to iterate diff --git a/pyshacl/run_type.py b/pyshacl/run_type.py new file mode 100644 index 0000000..89b4b7b --- /dev/null +++ b/pyshacl/run_type.py @@ -0,0 +1,9 @@ +from abc import ABCMeta, abstractmethod + + +class PySHACLRunType(metaclass=ABCMeta): + __slots__ = () + + @abstractmethod + def run(self): + raise NotImplementedError() # pragma: no cover diff --git a/pyshacl/shape.py b/pyshacl/shape.py index 8c885b5..b689005 100644 --- a/pyshacl/shape.py +++ b/pyshacl/shape.py @@ -632,7 +632,7 @@ def validate( if executor.debug: self.logger.debug(f"Skipping shape because it is deactivated: {str(self)}") return True, [] - focus_list: Sequence[RDFNode] = [] + focus_list: Sequence[RDFNode] if focus is not None: lh_shape = False rh_shape = True diff --git a/pyshacl/validate.py b/pyshacl/validator.py similarity index 63% rename from pyshacl/validate.py rename to pyshacl/validator.py index 20c4793..762bb22 100644 --- a/pyshacl/validate.py +++ b/pyshacl/validator.py @@ -1,13 +1,9 @@ # -*- coding: utf-8 -*- # import logging -import os import sys -from functools import wraps -from io import BufferedIOBase, TextIOBase from os import getenv, path -from sys import stderr -from typing import Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Sequence, Tuple, Union import rdflib from rdflib import BNode, Literal, URIRef @@ -19,10 +15,9 @@ SH_ValidationReport, env_truths, ) -from .errors import ReportableRuntimeError, ValidationFailure +from .errors import ReportableRuntimeError from .extras import check_extra_installed from .functions import apply_functions, gather_functions, unapply_functions -from .monkey import apply_patches, rdflib_bool_patch, rdflib_bool_unpatch from .pytypes import GraphLike, SHACLExecutor from .rdfutil import ( add_baked_in, @@ -30,27 +25,60 @@ clone_graph, inoculate, inoculate_dataset, - load_from_source, mix_datasets, mix_graphs, ) from .rules import apply_rules, gather_rules +from .run_type import PySHACLRunType from .shapes_graph import ShapesGraph from .target import apply_target_types, gather_target_types -from .validator_conformance import check_dash_result USE_FULL_MIXIN = getenv("PYSHACL_USE_FULL_MIXIN") in env_truths -log_handler = logging.StreamHandler(stderr) -log = logging.getLogger(__name__) -for h in log.handlers: - log.removeHandler(h) # pragma:no cover -log.addHandler(log_handler) -log.setLevel(logging.INFO) -log_handler.setLevel(logging.INFO) +class Validator(PySHACLRunType): + def __init__( + self, + data_graph: GraphLike, + *args, + shacl_graph: Optional[GraphLike] = None, + ont_graph: Optional[GraphLike] = None, + options: Optional[Dict[str, Any]] = None, + **kwargs, + ): + options = options or {} + self._load_default_options(options) + self.options = options # type: dict + self.logger = options['logger'] # type: logging.Logger + self.debug = options['debug'] + self.pre_inferenced = kwargs.pop('pre_inferenced', False) + self.inplace = options['inplace'] + if not isinstance(data_graph, rdflib.Graph): + raise RuntimeError("data_graph must be a rdflib Graph object") + self.data_graph = data_graph # type: GraphLike + self._target_graph = None + self.ont_graph = ont_graph # type: Optional[GraphLike] + self.data_graph_is_multigraph = isinstance(self.data_graph, (rdflib.Dataset, rdflib.ConjunctiveGraph)) + if self.ont_graph is not None and isinstance(self.ont_graph, (rdflib.Dataset, rdflib.ConjunctiveGraph)): + self.ont_graph.default_union = True + if self.ont_graph is not None and options['sparql_mode']: + raise ReportableRuntimeError("Cannot use SPARQL Remote Graph Mode with extra Ontology Graph inoculation.") + if shacl_graph is None: + if options['sparql_mode']: + raise ReportableRuntimeError( + "SHACL Shapes Graph must be a separate local graph or file when in SPARQL Remote Graph Mode." + ) + shacl_graph = clone_graph(data_graph, identifier='shacl') + assert isinstance(shacl_graph, rdflib.Graph), "shacl_graph must be a rdflib Graph object" + self.shacl_graph = ShapesGraph(shacl_graph, self.debug, self.logger) # type: ShapesGraph + + if options['use_js']: + if options['sparql_mode']: + raise ReportableRuntimeError("Cannot use SHACL-JS in SPARQL Remote Graph Mode.") + is_js_installed = check_extra_installed('js') + if is_js_installed: + self.shacl_graph.enable_js() -class Validator(object): @classmethod def _load_default_options(cls, options_dict: dict): options_dict.setdefault('debug', False) @@ -163,48 +191,6 @@ def create_validation_report(cls, sg, conforms: bool, results: List[Tuple]): vg.add((s, p, o)) return vg, v_text - def __init__( - self, - data_graph: GraphLike, - *args, - shacl_graph: Optional[GraphLike] = None, - ont_graph: Optional[GraphLike] = None, - options: Optional[dict] = None, - **kwargs, - ): - options = options or {} - self._load_default_options(options) - self.options = options # type: dict - self.logger = options['logger'] # type: logging.Logger - self.debug = options['debug'] - self.pre_inferenced = kwargs.pop('pre_inferenced', False) - self.inplace = options['inplace'] - if not isinstance(data_graph, rdflib.Graph): - raise RuntimeError("data_graph must be a rdflib Graph object") - self.data_graph = data_graph # type: GraphLike - self._target_graph = None - self.ont_graph = ont_graph # type: Optional[GraphLike] - self.data_graph_is_multigraph = isinstance(self.data_graph, (rdflib.Dataset, rdflib.ConjunctiveGraph)) - if self.ont_graph is not None and isinstance(self.ont_graph, (rdflib.Dataset, rdflib.ConjunctiveGraph)): - self.ont_graph.default_union = True - if self.ont_graph is not None and options['sparql_mode']: - raise ReportableRuntimeError("Cannot use SPARQL Remote Graph Mode with extra Ontology Graph inoculation.") - if shacl_graph is None: - if options['sparql_mode']: - raise ReportableRuntimeError( - "SHACL Shapes Graph must be a separate local graph or file when in SPARQL Remote Graph Mode." - ) - shacl_graph = clone_graph(data_graph, identifier='shacl') - assert isinstance(shacl_graph, rdflib.Graph), "shacl_graph must be a rdflib Graph object" - self.shacl_graph = ShapesGraph(shacl_graph, self.debug, self.logger) # type: ShapesGraph - - if options['use_js']: - if options['sparql_mode']: - raise ReportableRuntimeError("Cannot use SHACL-JS in SPARQL Remote Graph Mode.") - is_js_installed = check_extra_installed('js') - if is_js_installed: - self.shacl_graph.enable_js() - @property def target_graph(self): return self._target_graph @@ -318,7 +304,7 @@ def run(self): expanded_focus_node = URIRef(f) expanded_focus_nodes.append(expanded_focus_node) self.options["focus_nodes"] = expanded_focus_nodes - specified_focus_nodes: Union[None, List[URIRef]] = expanded_focus_nodes + specified_focus_nodes: Union[None, Sequence[URIRef]] = expanded_focus_nodes else: specified_focus_nodes = None executor = self.make_executor() @@ -333,19 +319,21 @@ def run(self): if executor.advanced_mode: self.logger.debug("Activating SHACL-AF Features.") target_types = gather_target_types(self.shacl_graph) + gather_from_shapes = None if not using_manually_specified_shapes else [s.node for s in shapes] advanced = { 'functions': gather_functions(executor, self.shacl_graph), - 'rules': gather_rules(executor, self.shacl_graph), + 'rules': gather_rules(executor, self.shacl_graph, from_shapes=gather_from_shapes), } for s in shapes: s.set_advanced(True) apply_target_types(target_types) else: advanced = {} + if isinstance(the_target_graph, (rdflib.Dataset, rdflib.ConjunctiveGraph)): named_graphs = [ ( - rdflib.Graph(the_target_graph.store, i, namespace_manager=the_target_graph.namespace_manager) + rdflib.Graph(the_target_graph.store, i, namespace_manager=the_target_graph.namespace_manager) # type: ignore[arg-type] if not isinstance(i, rdflib.Graph) else i ) @@ -353,8 +341,11 @@ def run(self): ] else: named_graphs = [the_target_graph] + if specified_focus_nodes is not None and using_manually_specified_shapes: + on_focus_nodes: Union[Sequence[URIRef], None] = specified_focus_nodes + else: + on_focus_nodes = None reports = [] - non_conformant = False aborted = False if executor.abort_on_first and self.debug: @@ -373,13 +364,10 @@ def run(self): if executor.sparql_mode: self.logger.warning("Skipping SHACL Rules because operating in SPARQL Remote Graph Mode.") else: - apply_rules(executor, advanced['rules'], g) + apply_rules(executor, advanced['rules'], g, focus_nodes=on_focus_nodes) try: for s in shapes: - if using_manually_specified_shapes and specified_focus_nodes is not None: - _is_conform, _reports = s.validate(executor, g, focus=specified_focus_nodes) - else: - _is_conform, _reports = s.validate(executor, g) + _is_conform, _reports = s.validate(executor, g, focus=on_focus_nodes) non_conformant = non_conformant or (not _is_conform) reports.extend(_reports) if executor.abort_on_first and non_conformant: @@ -388,7 +376,7 @@ def run(self): if aborted: break finally: - if advanced: + if advanced and advanced['functions']: unapply_functions(advanced['functions'], g) v_report, v_text = self.create_validation_report(self.shacl_graph, not non_conformant, reports) return (not non_conformant), v_report, v_text @@ -412,211 +400,3 @@ def assign_baked_in(): add_baked_in("http://datashapes.org/schema", schema_file) add_baked_in("https://datashapes.org/schema", schema_file) add_baked_in("http://datashapes.org/schema.ttl", schema_file) - - -def with_metashacl_shacl_graph_cache(f): - # noinspection PyPep8Naming - EMPTY = object() - - @wraps(f) - def wrapped(*args, **kwargs): - graph_cache = getattr(wrapped, "graph_cache", None) - assert graph_cache is not None - if graph_cache is EMPTY: - import pickle - - if getattr(sys, 'frozen', False): - # runs in a pyinstaller bundle - here_dir = sys._MEIPASS - else: - here_dir = path.dirname(__file__) - pickle_file = path.join(here_dir, "assets", "shacl-shacl.pickle") - with open(pickle_file, 'rb') as shacl_pickle: - u = pickle.Unpickler(shacl_pickle, fix_imports=False) - shacl_shacl_store, identifier = u.load() - shacl_shacl_graph = rdflib.Graph(store=shacl_shacl_store, identifier=identifier) - setattr(wrapped, "graph_cache", shacl_shacl_graph) - return f(*args, **kwargs) - - setattr(wrapped, "graph_cache", EMPTY) - return wrapped - - -@with_metashacl_shacl_graph_cache -def meta_validate(shacl_graph: Union[GraphLike, str], inference: Optional[str] = 'rdfs', **kwargs): - shacl_shacl_graph = meta_validate.graph_cache - shacl_graph = load_from_source(shacl_graph, rdf_format=kwargs.pop('shacl_graph_format', None), multigraph=True) - _ = kwargs.pop('meta_shacl', None) - return validate(shacl_graph, shacl_graph=shacl_shacl_graph, inference=inference, **kwargs) - - -def validate( - data_graph: Union[GraphLike, BufferedIOBase, TextIOBase, str, bytes], - *args, - shacl_graph: Optional[Union[GraphLike, BufferedIOBase, TextIOBase, str, bytes]] = None, - ont_graph: Optional[Union[GraphLike, BufferedIOBase, TextIOBase, str, bytes]] = None, - advanced: Optional[bool] = False, - inference: Optional[str] = None, - inplace: Optional[bool] = False, - abort_on_first: Optional[bool] = False, - allow_infos: Optional[bool] = False, - allow_warnings: Optional[bool] = False, - max_validation_depth: Optional[int] = None, - sparql_mode: Optional[bool] = False, - focus_nodes: Optional[List[Union[str, URIRef]]] = None, - use_shapes: Optional[List[Union[str, URIRef]]] = None, - **kwargs, -): - """ - :param data_graph: rdflib.Graph or file path or web url of the data to validate - :type data_graph: rdflib.Graph | str | bytes - :param args: - :type args: list - :param shacl_graph: rdflib.Graph or file path or web url of the SHACL Shapes graph to use to - validate the data graph - :type shacl_graph: rdflib.Graph | str | bytes - :param ont_graph: rdflib.Graph or file path or web url of an extra ontology document to mix into the data graph - :type ont_graph: rdflib.Graph | str | bytes - :param advanced: Enable advanced SHACL features, default=False - :type advanced: bool | None - :param inference: One of "rdfs", "owlrl", "both", "none", or None - :type inference: str | None - :param inplace: If this is enabled, do not clone the datagraph, manipulate it in-place - :type inplace: bool - :param abort_on_first: Stop evaluating constraints after first violation is found - :type abort_on_first: bool | None - :param allow_infos: Shapes marked with severity of sh:Info will not cause result to be invalid. - :type allow_infos: bool | None - :param allow_warnings: Shapes marked with severity of sh:Warning or sh:Info will not cause result to be invalid. - :type allow_warnings: bool | None - :param max_validation_depth: The maximum number of SHACL shapes "deep" that the validator can go before reaching an "endpoint" constraint. - :type max_validation_depth: int | None - :param sparql_mode: Treat the DataGraph as a SPARQL endpoint, validate the graph at the SPARQL endpoint. - :type sparql_mode: bool | None - :param focus_nodes: A list of IRIs to validate only those nodes. - :type focus_nodes: list | None - :param kwargs: - :return: - """ - - do_debug = kwargs.get('debug', False) - if do_debug: - log_handler.setLevel(logging.DEBUG) - log.setLevel(logging.DEBUG) - apply_patches() - assign_baked_in() - do_check_dash_result = kwargs.pop('check_dash_result', False) # type: bool - if kwargs.get('meta_shacl', False): - to_meta_val = shacl_graph or data_graph - conforms, v_r, v_t = meta_validate(to_meta_val, inference=inference, **kwargs) - if not conforms: - msg = f"SHACL File does not validate against the SHACL Shapes SHACL (MetaSHACL) file.\n{v_t}" - log.error(msg) - raise ReportableRuntimeError(msg) - do_owl_imports = kwargs.pop('do_owl_imports', False) - data_graph_format = kwargs.pop('data_graph_format', None) - - if isinstance(data_graph, (str, bytes, BufferedIOBase, TextIOBase)): - # DataGraph is passed in as Text. It is not an rdflib.Graph - # That means we load it into an ephemeral graph at runtime - # that means we don't need to make a copy to prevent polluting it. - ephemeral = True - else: - ephemeral = False - use_js = kwargs.pop('js', None) - if sparql_mode: - if use_js: - raise ReportableRuntimeError("Cannot use SHACL-JS in SPARQL Remote Graph Mode.") - if inplace: - raise ReportableRuntimeError("Cannot use inplace mode in SPARQL Remote Graph Mode.") - if ont_graph is not None: - raise ReportableRuntimeError("Cannot use SPARQL Remote Graph Mode with extra Ontology Graph inoculation.") - if isinstance(data_graph, bytes): - data_graph = data_graph.decode('utf-8') - else: - data_graph = data_graph - ephemeral = False - inplace = True - if ( - sparql_mode - and isinstance(data_graph, str) - and (data_graph.lower().startswith("http:") or data_graph.lower().startswith("https:")) - ): - from rdflib.plugins.stores.sparqlstore import SPARQLStore - - query_endpoint: str = data_graph - username = os.getenv("PYSHACL_SPARQL_USERNAME", "") - method = os.getenv("PYSHACL_SPARQL_METHOD", "GET") - auth: Optional[Tuple[str, str]] - if username: - password: str = os.getenv("PYSHACL_SPARQL_PASSWORD", "") - auth = (username, password) - else: - auth = None - store = SPARQLStore(query_endpoint=query_endpoint, auth=auth, method=method) - loaded_dg = rdflib.Dataset(store=store, default_union=True) - else: - # force no owl imports on data_graph - loaded_dg = load_from_source( - data_graph, rdf_format=data_graph_format, multigraph=True, do_owl_imports=False, logger=log - ) - ont_graph_format = kwargs.pop('ont_graph_format', None) - if ont_graph is not None: - loaded_og = load_from_source( - ont_graph, rdf_format=ont_graph_format, multigraph=True, do_owl_imports=do_owl_imports, logger=log - ) - else: - loaded_og = None - shacl_graph_format = kwargs.pop('shacl_graph_format', None) - if shacl_graph is not None: - rdflib_bool_patch() - loaded_sg = load_from_source( - shacl_graph, rdf_format=shacl_graph_format, multigraph=True, do_owl_imports=do_owl_imports, logger=log - ) - rdflib_bool_unpatch() - else: - loaded_sg = None - iterate_rules = kwargs.pop('iterate_rules', False) - if "abort_on_error" in kwargs: - log.warning("Usage of abort_on_error is deprecated. Use abort_on_first instead.") - ae = kwargs.pop("abort_on_error") - abort_on_first = bool(abort_on_first) or bool(ae) - validator_options_dict = { - 'debug': do_debug or False, - 'inference': inference, - 'inplace': inplace or ephemeral, - 'abort_on_first': abort_on_first, - 'allow_infos': allow_infos, - 'allow_warnings': allow_warnings, - 'advanced': advanced, - 'iterate_rules': iterate_rules, - 'use_js': use_js, - 'sparql_mode': sparql_mode, - 'logger': log, - 'focus_nodes': focus_nodes, - 'use_shapes': use_shapes, - } - if max_validation_depth is not None: - validator_options_dict['max_validation_depth'] = max_validation_depth - validator = None - try: - validator = Validator( - loaded_dg, - shacl_graph=loaded_sg, - ont_graph=loaded_og, - options=validator_options_dict, - ) - conforms, report_graph, report_text = validator.run() - except ValidationFailure as e: - conforms = False - report_graph = e - report_text = "Validation Failure - {}".format(e.message) - if do_check_dash_result and validator is not None: - passes = check_dash_result(validator, report_graph, loaded_sg or loaded_dg) - return passes, report_graph, report_text - do_serialize_report_graph = kwargs.pop('serialize_report_graph', False) - if do_serialize_report_graph and isinstance(report_graph, rdflib.Graph): - if not (isinstance(do_serialize_report_graph, str)): - do_serialize_report_graph = 'turtle' - report_graph = report_graph.serialize(None, encoding='utf-8', format=do_serialize_report_graph) - return conforms, report_graph, report_text diff --git a/pyshacl/validator_conformance.py b/pyshacl/validator_conformance.py index d788b20..6d230e2 100644 --- a/pyshacl/validator_conformance.py +++ b/pyshacl/validator_conformance.py @@ -26,7 +26,7 @@ from pyshacl.rdfutil import compare_blank_node, compare_node, order_graph_literal, stringify_node if TYPE_CHECKING: - from pyshacl.validate import Validator + from pyshacl.validator import Validator def clean_validation_reports(actual_graph, actual_report, expected_graph, expected_report): diff --git a/test/resources/cmdline_tests/rules_d.ttl b/test/resources/cmdline_tests/rules_d.ttl new file mode 100644 index 0000000..d125f97 --- /dev/null +++ b/test/resources/cmdline_tests/rules_d.ttl @@ -0,0 +1,20 @@ +# prefix: ex + +@prefix ex: . +@prefix exOnt: . +@prefix owl: . +@prefix rdf: . +@prefix rdfs: . +@prefix xsd: . + +ex:Kate + rdf:type exOnt:Person ; + exOnt:firstName "Kate" ; + exOnt:lastName "Jones" ; +. + +ex:Jenny + rdf:type exOnt:Administrator ; + exOnt:firstName "Jennifer" ; + exOnt:lastName "Wolfeschlegelsteinhausenbergerdorff" ; +. diff --git a/test/resources/cmdline_tests/rules_s.ttl b/test/resources/cmdline_tests/rules_s.ttl new file mode 100644 index 0000000..acfc1a4 --- /dev/null +++ b/test/resources/cmdline_tests/rules_s.ttl @@ -0,0 +1,96 @@ +# prefix: ex + +@prefix ex: . +@prefix exOnt: . +@prefix exData: . +@prefix owl: . +@prefix rdf: . +@prefix rdfs: . +@prefix sh: . +@prefix xsd: . + + + rdf:type owl:Ontology ; + rdfs:label "Test of SHACL Rules expander mode" ; +. + +ex:concat + a sh:SPARQLFunction ; + rdfs:comment "Concatenates strings $op1 and $op2." ; + sh:parameter [ + sh:path ex:op1 ; + sh:datatype xsd:string ; + sh:description "The first string" ; + ] ; + sh:parameter [ + sh:path ex:op2 ; + sh:datatype xsd:string ; + sh:description "The second string" ; + ] ; + sh:returnType xsd:string ; + sh:select """ + SELECT ?result + WHERE { + BIND(CONCAT(STR(?op1),STR(?op2)) AS ?result) . + } + """ . + +ex:strlen + a sh:SPARQLFunction ; + rdfs:comment "Returns length of the given string." ; + sh:parameter [ + sh:path ex:op1 ; + sh:datatype xsd:string ; + sh:description "The string" ; + ] ; + sh:returnType xsd:integer ; + sh:select """ + SELECT ?result + WHERE { + BIND(STRLEN(?op1) AS ?result) . + } + """ . + +ex:lessThan + a sh:SPARQLFunction ; + rdfs:comment "Returns True if op1 < op2." ; + sh:parameter [ + sh:path ex:op1 ; + sh:datatype xsd:integer ; + sh:description "The first int" ; + ] ; + sh:parameter [ + sh:path ex:op2 ; + sh:datatype xsd:integer ; + sh:description "The second int" ; + ] ; + sh:returnType xsd:boolean ; + sh:select """ + SELECT ?result + WHERE { + BIND(IF(?op1 < ?op2, true, false) AS ?result) . + } + """ . + +ex:PersonExpressionShape + a sh:NodeShape ; + sh:targetClass exOnt:Person ; + sh:expression [ + sh:message "Person's firstName and lastName together should be less than 35 chars long." ; + ex:lessThan ( + [ ex:strlen ( + [ ex:concat ( [ sh:path exOnt:firstName] [ sh:path exOnt:lastName ] ) ] ) + ] + 35 ); + ] . + +ex:PersonRuleShape + a sh:NodeShape ; + sh:targetClass exOnt:Administrator ; + sh:message "An administrator is a person too." ; + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object exOnt:Person ; + ] . diff --git a/test/test_cmdline.py b/test/test_cmdline.py index 69d05d3..d228c05 100644 --- a/test/test_cmdline.py +++ b/test/test_cmdline.py @@ -4,12 +4,9 @@ import platform import subprocess import sys - from os import getenv, path from sys import stderr - -print(os.environ, file=stderr) PATH = getenv("PATH", "") PP = getenv('PYTHONPATH', "") here_dir = path.abspath(path.dirname(__file__)) @@ -37,7 +34,6 @@ ENV_VARS["PYTHONPATH"] = ':'.join((lib_dir, PP)) it = ENV_VARS["PYTHONPATH"].split(":") -print(it, file=stderr, flush=True) scr_dir = "scripts-{}.{}".format(sys.version_info[0], sys.version_info[1]) if in_test_dir: scr_dir = path.join('..', scr_dir) @@ -131,9 +127,11 @@ def test_cmdline_table(): args = [graph_file, '-s', shacl_file, '-f', 'table'] res = subprocess.run(pyshacl_command + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=ENV_VARS) output_table = res.stdout.decode('utf-8') - assert "+-----+-----------+---------------------------+---------------------------+" \ - "---------------------------+--------------------------+---------------------------+" \ - "---------------------------+" in output_table + assert ( + "+-----+-----------+---------------------------+---------------------------+" + "---------------------------+--------------------------+---------------------------+" + "---------------------------+" in output_table + ) assert "| 1 | Violation | http://example.com/ex#Hum | http://example.com/exOnt# " in output_table diff --git a/test/test_cmdline_rules.py b/test/test_cmdline_rules.py new file mode 100644 index 0000000..d3d3128 --- /dev/null +++ b/test/test_cmdline_rules.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- +# +import os +import platform +import subprocess +import sys +from os import getenv, path +from sys import stderr + +from rdflib import RDF, Graph, URIRef + +PATH = getenv("PATH", "") +PP = getenv('PYTHONPATH', "") +here_dir = path.abspath(path.dirname(__file__)) +ENV_VARS = {"PATH": PATH, "PYTHONPATH": ':'.join((here_dir, PP))} +PH = getenv('PYTHONHOME', "") +if PH: + ENV_VARS['PYTHONHOME'] = PH +VE = getenv('VIRTUAL_ENV', "") +if VE: + ENV_VARS['VIRTUAL_ENV'] = VE + virtual_bin = path.join(VE, "bin") + ENV_VARS['PATH'] = ':'.join((virtual_bin, PATH)) +abs_resources_dir = path.join(here_dir, 'resources') +cmdline_files_dir = path.join(abs_resources_dir, 'cmdline_tests') + +check_resources = path.join(path.abspath(os.getcwd()), 'resources') +in_test_dir = False +if path.exists(check_resources) and path.isdir(check_resources): + in_test_dir = True +else: + in_test_dir = False + +if in_test_dir: + lib_dir = os.path.abspath(os.path.join(here_dir, os.pardir)) + ENV_VARS["PYTHONPATH"] = ':'.join((lib_dir, PP)) + +it = ENV_VARS["PYTHONPATH"].split(":") +scr_dir = "scripts-{}.{}".format(sys.version_info[0], sys.version_info[1]) +if in_test_dir: + scr_dir = path.join('..', scr_dir) +check_scrdir = path.join(path.abspath(os.getcwd()), scr_dir) +if path.exists(check_scrdir) and path.isdir(check_scrdir): + has_scripts_dir = True +else: + has_scripts_dir = False + +bin_dir = "bin" +if in_test_dir: + bin_dir = path.join('..', bin_dir) +check_bindir = path.join(path.abspath(os.getcwd()), bin_dir) +if path.exists(check_bindir) and path.isdir(check_bindir): + has_bin_dir = True +else: + has_bin_dir = False + +cli_rules_script = "pyshacl/cli_rules.py" +if in_test_dir: + cli_rules_script = path.join('..', cli_rules_script) +check_cli_script = path.join(path.abspath(os.getcwd()), cli_rules_script) +if path.exists(check_cli_script) and path.isfile(check_cli_script): + has_cli_script = True +else: + has_cli_script = False + +if has_scripts_dir: + pyshacl_rules_command = ["{}/pyshacl_rules".format(scr_dir)] +elif has_bin_dir: + pyshacl_rules_command = ["{}/pyshacl_rules".format(bin_dir)] +elif has_cli_script: + pyshacl_rules_command = ["python3", cli_rules_script] +else: + pyshacl_rules_command = ["pyshacl_rules"] + + +def test_cmdline_rules(): + if not hasattr(subprocess, 'run'): + print("Subprocess.run() not available, skip this test") + assert True + return True + if platform.system() == "Windows": + print("Commandline tests cannot run on Windows.") + assert True + return True + if os.environ.get("PYBUILD_NAME", None) is not None: + print("We don't have access to scripts dir during pybuild process.") + assert True + return True + graph_file = path.join(cmdline_files_dir, 'rules_d.ttl') + shacl_file = path.join(cmdline_files_dir, 'rules_s.ttl') + cmd = pyshacl_rules_command + args = [graph_file, '-s', shacl_file, '-i', 'rdfs'] + res = subprocess.run(cmd + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=ENV_VARS) + print("result = {}".format(res.returncode)) + output_unicode = res.stdout.decode('utf-8') + print(res.stderr.decode('utf-8')) + assert res.returncode == 0 + output_g = Graph().parse(data=output_unicode, format='trig') + person_classes = set( + output_g.objects( + URIRef("http://datashapes.org/shasf/tests/expression/rules.test.data#Jenny"), predicate=RDF.type + ) + ) + assert URIRef("http://datashapes.org/shasf/tests/expression/rules.test.ont#Administrator") in person_classes + assert URIRef("http://datashapes.org/shasf/tests/expression/rules.test.ont#Person") in person_classes + + +if __name__ == "__main__": + test_cmdline_rules() diff --git a/test/test_shacl_rules_runner.py b/test/test_shacl_rules_runner.py new file mode 100644 index 0000000..f7c0d47 --- /dev/null +++ b/test/test_shacl_rules_runner.py @@ -0,0 +1,147 @@ +"""\ +A Test for the SHACL Rules Runner mode. +""" + +from rdflib import RDF, Graph, URIRef + +from pyshacl import shacl_rules + +shacl_file = '''\ +# prefix: ex + +@prefix ex: . +@prefix exOnt: . +@prefix exData: . +@prefix owl: . +@prefix rdf: . +@prefix rdfs: . +@prefix sh: . +@prefix xsd: . + + + rdf:type owl:Ontology ; + rdfs:label "Test of SHACL Rules expander mode" ; +. + +ex:concat + a sh:SPARQLFunction ; + rdfs:comment "Concatenates strings $op1 and $op2." ; + sh:parameter [ + sh:path ex:op1 ; + sh:datatype xsd:string ; + sh:description "The first string" ; + ] ; + sh:parameter [ + sh:path ex:op2 ; + sh:datatype xsd:string ; + sh:description "The second string" ; + ] ; + sh:returnType xsd:string ; + sh:select """ + SELECT ?result + WHERE { + BIND(CONCAT(STR(?op1),STR(?op2)) AS ?result) . + } + """ . + +ex:strlen + a sh:SPARQLFunction ; + rdfs:comment "Returns length of the given string." ; + sh:parameter [ + sh:path ex:op1 ; + sh:datatype xsd:string ; + sh:description "The string" ; + ] ; + sh:returnType xsd:integer ; + sh:select """ + SELECT ?result + WHERE { + BIND(STRLEN(?op1) AS ?result) . + } + """ . + +ex:lessThan + a sh:SPARQLFunction ; + rdfs:comment "Returns True if op1 < op2." ; + sh:parameter [ + sh:path ex:op1 ; + sh:datatype xsd:integer ; + sh:description "The first int" ; + ] ; + sh:parameter [ + sh:path ex:op2 ; + sh:datatype xsd:integer ; + sh:description "The second int" ; + ] ; + sh:returnType xsd:boolean ; + sh:select """ + SELECT ?result + WHERE { + BIND(IF(?op1 < ?op2, true, false) AS ?result) . + } + """ . + +ex:PersonExpressionShape + a sh:NodeShape ; + sh:targetClass exOnt:Person ; + sh:expression [ + sh:message "Person's firstName and lastName together should be less than 35 chars long." ; + ex:lessThan ( + [ ex:strlen ( + [ ex:concat ( [ sh:path exOnt:firstName] [ sh:path exOnt:lastName ] ) ] ) + ] + 35 ); + ] . + +ex:PersonRuleShape + a sh:NodeShape ; + sh:targetClass exOnt:Administrator ; + sh:message "An administrator is a person too." ; + sh:rule [ + a sh:TripleRule ; + sh:subject sh:this ; + sh:predicate rdf:type ; + sh:object exOnt:Person ; + ] . +''' + +data_graph = ''' +# prefix: ex + +@prefix ex: . +@prefix exOnt: . +@prefix owl: . +@prefix rdf: . +@prefix rdfs: . +@prefix xsd: . + +ex:Kate + rdf:type exOnt:Person ; + exOnt:firstName "Kate" ; + exOnt:lastName "Jones" ; +. + +ex:Jenny + rdf:type exOnt:Administrator ; + exOnt:firstName "Jennifer" ; + exOnt:lastName "Wolfeschlegelsteinhausenbergerdorff" ; +. +''' + + +def test_rules_runner(): + d = Graph().parse(data=data_graph, format="turtle") + s = Graph().parse(data=shacl_file, format="turtle") + output_g = shacl_rules(d, shacl_graph=s, advanced=True, debug=False) + person_classes = set( + output_g.objects( + URIRef("http://datashapes.org/shasf/tests/expression/rules.test.data#Jenny"), predicate=RDF.type + ) + ) + assert URIRef("http://datashapes.org/shasf/tests/expression/rules.test.ont#Administrator") in person_classes + assert URIRef("http://datashapes.org/shasf/tests/expression/rules.test.ont#Person") in person_classes + print(output_g.serialize(format="turtle")) + + +if __name__ == "__main__": + exit(test_rules_runner())