Source code for linkml.generators.namespacegen

import os

import click

from linkml.generators import PYTHON_GEN_VERSION
from linkml.generators.pythongen import PythonGenerator
from linkml_runtime.utils.formatutils import split_line, be
from linkml.utils.generator import shared_arguments


[docs]class NamespaceGenerator(PythonGenerator): generatorname = os.path.basename(__file__) generatorversion = PYTHON_GEN_VERSION valid_formats = ['py'] visit_all_class_slots = False
[docs] def gen_namespaces(self) -> str: return '\n\t\t'.join([ f"CurieNamespace('{pfx.replace('.', '_')}', '{self.namespaces[pfx]}')," for pfx in sorted(self.emit_prefixes) if pfx in self.namespaces ])
[docs] def gen_schema(self) -> str: split_descripton = '\n# '.join(split_line(be(self.schema.description), split_len=100)) head = f'''# Auto generated from {self.schema.source_file} by {self.generatorname} version: {self.generatorversion} # Generation date: {self.schema.generation_date} # Schema: {self.schema.name} #''' if self.schema.generation_date else '' return f'''{head} # id: {self.schema.id} # description: {split_descripton} # license: {be(self.schema.license)} from collections import defaultdict from typing import Iterable, Dict, Tuple from linkml_runtime.utils.curienamespace import CurieNamespace GENE = 'gene' DISEASE = 'disease' CHEMICAL_SUBSTANCE = 'chemical substance' SYMBOL = 'Approved_Symbol' class IdentifierResolverException(RuntimeError): pass class BiolinkNameSpace: """ Map of BioLink Model registered URI Namespaces """ _namespaces = [ {self.gen_namespaces()} ] # class level dictionaries _prefix_map: Dict[str, CurieNamespace] = {{}} @classmethod def _get_prefix_map(cls): if not cls._prefix_map: for ns in cls._namespaces: # index by upper case for uniformity of search cls._prefix_map[ns.prefix.upper()] = ns return cls._prefix_map @classmethod def parse_curie(cls, curie: str) -> Tuple[CurieNamespace, str]: """ Parse a candidate CURIE :param curie: candidate curie string :return: CURIE namespace and object_id """ found = CurieNamespace("", ""), curie # default value if not a CURIE or unknown XMLNS prefix if ':' in curie: part = curie.split(":") # Normalize retrieval with upper case of prefix for lookup prefix = part[0].upper() if prefix in cls._get_prefix_map(): found = cls._prefix_map[prefix], part[1] return found @classmethod def parse_uri(cls, uri: str) -> Tuple[CurieNamespace, str]: """ Parse a candidate URI :param uri: candidate URI string :return: namespace and object_id """ found = CurieNamespace("", ""), uri # default value returned if unknown URI namespace # TODO: is there a more efficient lookup scheme here than a linear search of namespaces? for ns in cls._namespaces: base_uri = str(ns) if uri.startswith(base_uri): # simple minded deletion of base_uri to give the object_id object_id = uri.replace(base_uri, "") found = ns, object_id break return found @classmethod def parse_identifier(cls, identifier: str) -> Tuple[CurieNamespace, str]: # trivial case of a null identifier? if not identifier: return CurieNamespace("", ""), "" # check if this is a candidate URI... if identifier.lower().startswith("http"): # guess that perhaps it is, so try to parse it return cls.parse_uri(identifier) else: # attempt to parse as a CURIE return cls.parse_curie(identifier) def object_id(identifier, keep_version=False) -> str: """ Returns the core object_id of a CURIE, with or without the version suffix. Note: not designed to be used with a URI (will give an invalid outcome) :param identifier: candidate CURIE identifier for processing :param keep_version: True if the version string suffix is to be retained in the identifier :return: """ # trivial case: null input value? if not identifier: return identifier if ':' in identifier: identifier = identifier.split(":")[1] if not keep_version and '.' in identifier: identifier = identifier.split(".")[0] return identifier def fix_curies(identifiers, prefix=''): """ Applies the specified XMLNS prefix to (an) identifier(s) known to be "raw" IDs as keys in a dictionary or elements in a list (or a simple string) :param identifiers: :param prefix: :return: """ if not prefix: # return identifiers without modification # Caller may already consider them in curie format return identifiers if isinstance(identifiers, dict): curie_dict = defaultdict(dict) for key in identifiers.keys(): curie_dict[prefix + ':' + object_id(key, keep_version=True)] = identifiers[key] return curie_dict # identifiers assumed to be just a single object identifier elif isinstance(identifiers, str): # single string to convert return prefix + ':' + object_id(identifiers, keep_version=True) elif isinstance(identifiers, Iterable): return [prefix + ':' + object_id(x, keep_version=True) for x in identifiers] else: raise RuntimeError("fix_curie() is not sure how to fix an instance of data type '", type(identifiers)) def curie(identifier) -> str: # Ignore enpty strings if not identifier: return "" else: namespace: CurieNamespace identifier_object_id: str namespace, identifier_object_id = BiolinkNameSpace.parse_identifier(identifier) return namespace.curie(identifier_object_id) '''
@shared_arguments(NamespaceGenerator) @click.command() def cli(yamlfile, **args): """ Generate a namespace manager for all of the prefixes represented in a LinkML model """ print(NamespaceGenerator(yamlfile,**args).serialize(**args)) if __name__ == '__main__': cli()