Source code for linkml.utils.sparqlvalidator

import logging
import sys
from dataclasses import dataclass
from typing import Union

import click
from SPARQLWrapper import SPARQLWrapper, N3, SPARQLWrapper2, RDFXML, TURTLE, RDF, JSON


from linkml_runtime.linkml_model import SchemaDefinition
from linkml_runtime.utils.schemaview import SchemaView
from rdflib import Graph
from linkml.generators.sparqlgen import SparqlGenerator
from linkml.generators.yamlgen import YAMLGenerator
from linkml.reporting import CheckResult, Report
from linkml.utils.datautils import get_dumper, _get_format, get_loader, dumpers_loaders


[docs]def sparqljson2dict(row: dict): return {k: v['value'] for k, v in row.items()}
def _make_result(row): return CheckResult( type=row.get('check'), source=row.get('graph'), subject=row.get('subject'), predicate=row.get('predicate') )
[docs]@dataclass class SparqlValidator(): schema: SchemaDefinition = None queries: dict = None
[docs] def validate_file(self, input: str, format: str = 'turtle', **kwargs): g = Graph() g.parse(input, format=format) return self.validate_graph(g, **kwargs)
[docs] def validate_graph(self, g: Graph, **kwargs): if self.queries is None: self.queries = SparqlGenerator(self.schema, **kwargs).queries invalid = [] for qn, q in self.queries.items(): print(f'QUERY: {qn}') q: str #q = "\n".join([line for line in q.split('\n') if not line.lower().startswith('prefix')]) print(q) qres = g.query(q) try: qres = g.query(q) for row in qres: invalid += row except Exception: logging.error(f'FAILED: {qn}') return invalid
[docs] def validate_endpoint(self, url: str, **kwargs): if self.queries is None: self.queries = SparqlGenerator(self.schema, **kwargs).queries invalid = [] report = Report() for qn, q in self.queries.items(): q += " LIMIT 20" print(f'QUERY: {qn}') print(f'{q}') sw = SPARQLWrapper(url) sw.setQuery(q) sw.setReturnFormat(JSON) sw_q = sw.query() results = sw_q.convert() for row in results["results"]["bindings"]: row = sparqljson2dict(row) report.results.append(_make_result(row)) invalid += row return report
[docs] def load_schema(self, schema: Union[str, SchemaDefinition]): self.schemaview = SchemaView(schema) self.schema = self.schemaview.schema #self.schema = YAMLGenerator(schema).schema return self.schema
@click.command() @click.option("--named-graph", "-G", multiple=True, help="Constrain query to a named graph") @click.option("--input", "-i", help="Input file to validate") @click.option("--endpoint-url", "-U", help="URL of sparql endpoint") @click.option("--limit", "-L", help="Max results per query") @click.option("--output", "-o", help="Path to report file") @click.option("--input-format", "-f", type=click.Choice(list(dumpers_loaders.keys())), help="Input format. Inferred from input suffix if not specified") @click.option("--output-format", "-t", type=click.Choice(list(dumpers_loaders.keys())), help="Output format. Inferred from output suffix if not specified") @click.option("--schema", "-s", help="Path to schema specified as LinkML yaml") def cli(input, output=None, input_format=None, output_format=None, endpoint_url=None, limit=None, named_graph=None, schema=None) -> None: """ Validates sparql Example: linkml-sparql-validate -U http://sparql.hegroup.org/sparql -s tests/test_validation/input/omo.yaml """ if schema is not None: sv = SchemaView(schema) validator = SparqlValidator(schema) if endpoint_url is not None: results = validator.validate_endpoint(endpoint_url, limit=limit, named_graphs=named_graph) else: if input is None: raise Exception(f'Must pass one of --endpoint-url OR --input') input_format = _get_format(input, input_format) results = validator.validate_file(input, format=input_format) output_format = _get_format(output, output_format, default='json') dumper = get_dumper(output_format) if output is not None: dumper.dump(results, output) else: print(dumper.dumps(results)) if __name__ == '__main__': cli(sys.argv[1:])