import os
import click
from linkml.generators import PYTHON_GEN_VERSION
from linkml.generators.pythongen import PythonGenerator
from linkml_runtime.utils.formatutils import split_line, be
from linkml.utils.generator import shared_arguments
[docs]class NamespaceGenerator(PythonGenerator):
generatorname = os.path.basename(__file__)
generatorversion = PYTHON_GEN_VERSION
valid_formats = ['py']
visit_all_class_slots = False
[docs] def gen_namespaces(self) -> str:
return '\n\t\t'.join([
f"CurieNamespace('{pfx.replace('.', '_')}', '{self.namespaces[pfx]}'),"
for pfx in sorted(self.emit_prefixes) if pfx in self.namespaces
])
[docs] def gen_schema(self) -> str:
split_descripton = '\n# '.join(split_line(be(self.schema.description), split_len=100))
head = f'''# Auto generated from {self.schema.source_file} by {self.generatorname} version: {self.generatorversion}
# Generation date: {self.schema.generation_date}
# Schema: {self.schema.name}
#''' if self.schema.generation_date else ''
return f'''{head}
# id: {self.schema.id}
# description: {split_descripton}
# license: {be(self.schema.license)}
from collections import defaultdict
from typing import Iterable, Dict, Tuple
from linkml_runtime.utils.curienamespace import CurieNamespace
GENE = 'gene'
DISEASE = 'disease'
CHEMICAL_SUBSTANCE = 'chemical substance'
SYMBOL = 'Approved_Symbol'
class IdentifierResolverException(RuntimeError):
pass
class BiolinkNameSpace:
"""
Map of BioLink Model registered URI Namespaces
"""
_namespaces = [
{self.gen_namespaces()}
]
# class level dictionaries
_prefix_map: Dict[str, CurieNamespace] = {{}}
@classmethod
def _get_prefix_map(cls):
if not cls._prefix_map:
for ns in cls._namespaces:
# index by upper case for uniformity of search
cls._prefix_map[ns.prefix.upper()] = ns
return cls._prefix_map
@classmethod
def parse_curie(cls, curie: str) -> Tuple[CurieNamespace, str]:
"""
Parse a candidate CURIE
:param curie: candidate curie string
:return: CURIE namespace and object_id
"""
found = CurieNamespace("", ""), curie # default value if not a CURIE or unknown XMLNS prefix
if ':' in curie:
part = curie.split(":")
# Normalize retrieval with upper case of prefix for lookup
prefix = part[0].upper()
if prefix in cls._get_prefix_map():
found = cls._prefix_map[prefix], part[1]
return found
@classmethod
def parse_uri(cls, uri: str) -> Tuple[CurieNamespace, str]:
"""
Parse a candidate URI
:param uri: candidate URI string
:return: namespace and object_id
"""
found = CurieNamespace("", ""), uri # default value returned if unknown URI namespace
# TODO: is there a more efficient lookup scheme here than a linear search of namespaces?
for ns in cls._namespaces:
base_uri = str(ns)
if uri.startswith(base_uri):
# simple minded deletion of base_uri to give the object_id
object_id = uri.replace(base_uri, "")
found = ns, object_id
break
return found
@classmethod
def parse_identifier(cls, identifier: str) -> Tuple[CurieNamespace, str]:
# trivial case of a null identifier?
if not identifier:
return CurieNamespace("", ""), ""
# check if this is a candidate URI...
if identifier.lower().startswith("http"):
# guess that perhaps it is, so try to parse it
return cls.parse_uri(identifier)
else: # attempt to parse as a CURIE
return cls.parse_curie(identifier)
def object_id(identifier, keep_version=False) -> str:
"""
Returns the core object_id of a CURIE, with or without the version suffix.
Note: not designed to be used with a URI (will give an invalid outcome)
:param identifier: candidate CURIE identifier for processing
:param keep_version: True if the version string suffix is to be retained in the identifier
:return:
"""
# trivial case: null input value?
if not identifier:
return identifier
if ':' in identifier:
identifier = identifier.split(":")[1]
if not keep_version and '.' in identifier:
identifier = identifier.split(".")[0]
return identifier
def fix_curies(identifiers, prefix=''):
"""
Applies the specified XMLNS prefix to (an) identifier(s) known
to be "raw" IDs as keys in a dictionary or elements in a list (or a simple string)
:param identifiers:
:param prefix:
:return:
"""
if not prefix:
# return identifiers without modification
# Caller may already consider them in curie format
return identifiers
if isinstance(identifiers, dict):
curie_dict = defaultdict(dict)
for key in identifiers.keys():
curie_dict[prefix + ':' + object_id(key, keep_version=True)] = identifiers[key]
return curie_dict
# identifiers assumed to be just a single object identifier
elif isinstance(identifiers, str):
# single string to convert
return prefix + ':' + object_id(identifiers, keep_version=True)
elif isinstance(identifiers, Iterable):
return [prefix + ':' + object_id(x, keep_version=True) for x in identifiers]
else:
raise RuntimeError("fix_curie() is not sure how to fix an instance of data type '", type(identifiers))
def curie(identifier) -> str:
# Ignore enpty strings
if not identifier:
return ""
else:
namespace: CurieNamespace
identifier_object_id: str
namespace, identifier_object_id = BiolinkNameSpace.parse_identifier(identifier)
return namespace.curie(identifier_object_id)
'''
@shared_arguments(NamespaceGenerator)
@click.command()
def cli(yamlfile, **args):
""" Generate a namespace manager for all of the prefixes represented in a LinkML model """
print(NamespaceGenerator(yamlfile,**args).serialize(**args))
if __name__ == '__main__':
cli()