Source code for linkml.generators.pythongen

import keyword
import os
import re
from typing import Optional, Tuple, List, Union, TextIO, Callable, Dict, Iterator, Set
import logging

import click
from linkml_runtime.linkml_model import linkml_files
from rdflib import URIRef

import linkml
from linkml.generators import PYTHON_GEN_VERSION
from linkml_runtime.linkml_model.meta import SchemaDefinition, SlotDefinition, ClassDefinition, ClassDefinitionName, \
    SlotDefinitionName, DefinitionName, Element, TypeDefinition, Definition, EnumDefinition, PermissibleValue
from linkml_runtime.utils.formatutils import camelcase, underscore, be, wrapped_annotation, split_line, sfx
from linkml.utils.generator import Generator, shared_arguments
from linkml.utils.ifabsent_functions import ifabsent_value_declaration, ifabsent_postinit_declaration, \
    default_curie_or_uri
from linkml_runtime.utils.metamodelcore import builtinnames


[docs]class PythonGenerator(Generator): generatorname = os.path.basename(__file__) generatorversion = PYTHON_GEN_VERSION valid_formats = ['py'] visit_all_class_slots = False def __init__(self, schema: Union[str, TextIO, SchemaDefinition], format: str = valid_formats[0], genmeta: bool=False, gen_classvars: bool=True, gen_slots: bool=True, **kwargs) -> None: self.sourcefile = schema self.emit_prefixes: Set[str] = set() if format is None: format = self.valid_formats[0] self.genmeta = genmeta self.gen_classvars = gen_classvars self.gen_slots = gen_slots super().__init__(schema, format, **kwargs) if self.schema.default_prefix == 'linkml' and not self.genmeta: logging.error(f'Generating metamodel without --genmeta is highly inadvised!') if not self.schema.source_file and isinstance(self.sourcefile, str) and '\n' not in self.sourcefile: self.schema.source_file = os.path.basename(self.sourcefile)
[docs] def visit_schema(self, **kwargs) -> None: # Add explicitly declared prefixes self.emit_prefixes.update([p.prefix_prefix for p in self.schema.prefixes.values()]) # Add all emit statements self.emit_prefixes.update(self.schema.emit_prefixes) # Add the default prefix if self.schema.default_prefix: self.emit_prefixes.add(self.namespaces.prefix_for(self.schema.default_prefix))
[docs] def visit_class(self, cls: ClassDefinition) -> bool: if not cls.imported_from: cls_prefix = self.namespaces.prefix_for(cls.class_uri) if cls_prefix: self.emit_prefixes.add(cls_prefix) self.add_mappings(cls) return False
[docs] def visit_slot(self, aliased_slot_name: str, slot: SlotDefinition) -> None: if not slot.imported_from: slot_prefix = self.namespaces.prefix_for(slot.slot_uri) if slot_prefix: self.emit_prefixes.add(slot_prefix) self.add_mappings(slot)
[docs] def visit_type(self, typ: TypeDefinition) -> None: if not typ.imported_from: type_prefix = self.namespaces.prefix_for(typ.uri) if type_prefix: self.emit_prefixes.add(type_prefix)
[docs] def gen_schema(self) -> str: # The metamodel uses Enumerations to define itself, so don't import if we are generating the metamodel enumimports = '' if self.genmeta else \ 'from linkml_runtime.linkml_model.meta import EnumDefinition, PermissibleValue, PvFormulaOptions\n' handlerimport = 'from linkml_runtime.utils.enumerations import EnumDefinitionImpl' split_descripton = '\n# '.join(split_line(be(self.schema.description), split_len=100)) head = f'''# Auto generated from {self.schema.source_file} by {self.generatorname} version: {self.generatorversion} # Generation date: {self.schema.generation_date} # Schema: {self.schema.name} #''' if self.emit_metadata and self.schema.generation_date else '' return f'''{head} # id: {self.schema.id} # description: {split_descripton} # license: {be(self.schema.license)} import dataclasses import sys import re from jsonasobj2 import JsonObj, as_dict from typing import Optional, List, Union, Dict, ClassVar, Any from dataclasses import dataclass {enumimports} from linkml_runtime.utils.slot import Slot from linkml_runtime.utils.metamodelcore import empty_list, empty_dict, bnode from linkml_runtime.utils.yamlutils import YAMLRoot, extended_str, extended_float, extended_int from linkml_runtime.utils.dataclass_extensions_376 import dataclasses_init_fn_with_kwargs from linkml_runtime.utils.formatutils import camelcase, underscore, sfx {handlerimport} from rdflib import Namespace, URIRef from linkml_runtime.utils.curienamespace import CurieNamespace {self.gen_imports()} metamodel_version = "{self.schema.metamodel_version}" # Overwrite dataclasses _init_fn to add **kwargs in __init__ dataclasses._init_fn = dataclasses_init_fn_with_kwargs # Namespaces {self.gen_namespaces()} # Types {self.gen_typedefs()} # Class references {self.gen_references()} {self.gen_classdefs()} # Enumerations {self.gen_enumerations()} # Slots {self.gen_slotdefs()}'''
[docs] def end_schema(self, **_): print(re.sub(r' +\n', '\n', self.gen_schema().replace('\t', ' ')).strip(' '), end='')
[docs] def gen_imports(self) -> str: listents = [f"from {k} import {', '.join(v)}" for k, v in self.gen_import_list().items()] return '\n'.join(listents)
[docs] def gen_import_list(self) -> Dict[str, List[str]]: """ Generate a list of types to import :return: source file followed by elements to import """ class ImportList: def __init__(self, schema_location: str): self.schema_location = schema_location self.v: Dict[str, Set[str]] = {} def add_element(self, e: Element) -> None: if e.imported_from: self.add_entry(e.imported_from, camelcase(e.name)) def add_entry(innerself, path: Union[str, URIRef], name: str) -> None: path = str(self.namespaces.uri_for(path) if ':' in path else path) if path.startswith(linkml_files.LINKML_NAMESPACE): model_base = '.' if self.genmeta else 'linkml_runtime.linkml_model.' innerself.v.setdefault(model_base + path[len(linkml_files.LINKML_NAMESPACE):], set()).add(name) elif path == linkml.BIOLINK_MODEL_URI: innerself.v.setdefault(linkml.BIOLINK_MODEL_PYTHON_LOC, set()).add(name) elif '://' in path: raise ValueError(f"Cannot map {path} into a python import statement") elif '/' in path: innerself.v.setdefault(path.replace('./', '.').replace('/', '.'), set()).add(name) elif '.' in path: innerself.v.setdefault(path, set()).add(name) else: innerself.v.setdefault('. ' + path, set()).add(name) def values(self) -> Dict[str, List[str]]: return {k: sorted(self.v[k]) for k in sorted(self.v.keys())} def add_type_ref(typ: TypeDefinition) -> None: if not typ.typeof and typ.base and typ.base not in builtinnames: if '.' in typ.base: rval.add_entry(*typ.base.rsplit('.')) else: rval.add_entry('linkml_runtime.utils.metamodelcore', typ.base) if typ.typeof: add_type_ref(self.schema.types[typ.typeof]) rval.add_element(typ) def add_enum_ref(e: EnumDefinition) -> None: rval.add_element(e) def add_slot_range(slot: SlotDefinition) -> None: if slot.range: if slot.range in self.schema.types: add_type_ref(self.schema.types[slot.range]) elif slot.range in self.schema.enums: add_enum_ref(self.schema.enums[slot.range]) else: cls = self.schema.classes[slot.range] if cls.imported_from: if self.class_identifier(cls): identifier_range = self.class_identifier_path(cls, False)[-1] if identifier_range in self.schema.types: add_type_ref(TypeDefinition(identifier_range)) else: rval.add_entry(cls.imported_from, identifier_range) if slot.inlined: rval.add_element(cls) rval = ImportList(self.schema_location) for typ in self.schema.types.values(): if not typ.imported_from: add_type_ref(typ) for slot in self.schema.slots.values(): if not slot.imported_from: if slot.is_a: parent = self.schema.slots[slot.is_a] if (parent.key or parent.identifier) and parent.imported_from: rval.add_element(self.schema.slots[slot.is_a]) if slot.domain: domain = self.schema.classes[slot.domain] if domain.imported_from: rval.add_element(self.schema.classes[slot.domain]) add_slot_range(slot) for cls in self.schema.classes.values(): if not cls.imported_from: if cls.is_a: parent = self.schema.classes[cls.is_a] if parent.imported_from: rval.add_element(self.schema.classes[cls.is_a]) if self.class_identifier(parent): rval.add_entry(parent.imported_from, self.class_identifier_path(parent, False)[-1]) for slotname in cls.slots: add_slot_range(self.schema.slots[slotname]) # for slotname in cls.slot_usage: # add_slot_range(self.schema.slots[slotname]) return rval.values()
[docs] def gen_namespaces(self) -> str: dflt_prefix = default_curie_or_uri(self) dflt = f"CurieNamespace('', '{sfx(dflt_prefix)}')" if ':/' in dflt_prefix else dflt_prefix.upper() return '\n'.join([ f"{pfx.upper().replace('.', '_').replace('-', '_')} = CurieNamespace('{pfx.replace('.', '_')}', '{self.namespaces[pfx]}')" for pfx in sorted(self.emit_prefixes) if pfx in self.namespaces ] + [f"DEFAULT_ = {dflt}"])
[docs] def gen_references(self) -> str: """ Generate python type declarations for all identifiers (primary keys) """ rval = [] for cls in self._sort_classes(self.schema.classes.values()): if not cls.imported_from: pkeys = self.primary_keys_for(cls) if pkeys: for pk in pkeys: classname = camelcase(cls.name) + camelcase(self.aliased_slot_name(pk)) # If we've got a parent slot and the range of the parent is the range of the child, the # child slot is a subclass of the parent. Otherwise, the child range has been overridden, # so the inheritence chain has been broken parent_pk = self.class_identifier(cls.is_a) if cls.is_a else None parent_pk_slot = self.schema.slots[parent_pk] if parent_pk else None pk_slot = self.schema.slots[pk] if parent_pk_slot and (parent_pk_slot.name == pk or pk_slot.range == parent_pk_slot.range): parents = self.class_identifier_path(cls.is_a, False) else: parents = self.slot_range_path(pk_slot) parent_cls = 'extended_' + parents[-1] if parents[-1] in ['str', 'float', 'int'] else parents[-1] rval.append(f'class {classname}({parent_cls}):\n\tpass') break # We only do the first primary key return '\n\n\n'.join(rval)
[docs] def gen_typedefs(self) -> str: """ Generate python type declarations for all defined types """ rval = [] for typ in self.schema.types.values(): if not typ.imported_from: typname = camelcase(typ.name) desc = f'\n\t""" {typ.description} """' if typ.description else '' if typ.typeof: parent_typename = camelcase(typ.typeof) rval.append(f'class {typname}({parent_typename}):{desc}\n\t{self.gen_type_meta(typ)}\n\n') else: base_base = typ.base.rsplit('.')[-1] rval.append(f'class {typname}({base_base}):{desc}\n\t{self.gen_type_meta(typ)}\n\n') return '\n'.join(rval)
[docs] def gen_classdefs(self) -> str: """ Create class definitions for all non-mixin classes in the model Note that apply_to classes are transformed to mixins """ clist = self._sort_classes(self.schema.classes.values()) return '\n'.join([self.gen_classdef(v) for v in clist if not v.imported_from])
[docs] def gen_classdef(self, cls: ClassDefinition) -> str: """ Generate python definition for class cls """ parentref = f'({self.formatted_element_name(cls.is_a, True) if cls.is_a else "YAMLRoot"})' slotdefs = self.gen_class_variables(cls) postinits = self.gen_postinits(cls) wrapped_description = f'\n\t"""\n\t{wrapped_annotation(be(cls.description))}\n\t"""' if be(cls.description) else '' return ('\n@dataclass' if slotdefs else '') + \ f'\nclass {self.class_or_type_name(cls.name)}{parentref}:{wrapped_description}' + \ f'{self.gen_inherited_slots(cls)}' + \ f'{self.gen_class_meta(cls)}' + \ (f'\n\t{slotdefs}' if slotdefs else '') + \ (f'\n{postinits}' if postinits else '')
[docs] def gen_inherited_slots(self, cls: ClassDefinition) -> str: if not self.gen_classvars: return '' inherited_slots = [] for slotname in cls.slots: slot = self.schema.slots[slotname] if slot.inherited: inherited_slots.append(slot.alias if slot.alias else slotname) inherited_slots_str = ", ".join([f'"{underscore(s)}"' for s in inherited_slots]) return f"\n\t_inherited_slots: ClassVar[List[str]] = [{inherited_slots_str}]\n"
[docs] def gen_class_meta(self, cls: ClassDefinition) -> str: if not self.gen_classvars: return '' class_class_uri = self.namespaces.uri_for(cls.class_uri) if class_class_uri: cls_python_uri = self.namespaces.curie_for(class_class_uri, default_ok=False, pythonform=True) class_class_curie = self.namespaces.curie_for(class_class_uri, default_ok=False, pythonform=False) else: cls_python_uri = None class_class_curie = None if class_class_curie: class_class_curie = f'"{class_class_curie}"' class_class_uri = cls_python_uri if cls_python_uri else f'URIRef("{class_class_uri}")' class_model_uri = self.namespaces.uri_or_curie_for(self.schema.default_prefix or "DEFAULT_", camelcase(cls.name)) if ':/' in class_model_uri: class_model_uri = f'URIRef("{class_model_uri}")' else: ns, ln = class_model_uri.split(':', 1) class_model_uri = f"{ns.upper()}.{ln}" vars = [f'class_class_uri: ClassVar[URIRef] = {class_class_uri}', f'class_class_curie: ClassVar[str] = {class_class_curie}', f'class_name: ClassVar[str] = "{cls.name}"', f'class_model_uri: ClassVar[URIRef] = {class_model_uri}'] return "\n\t" + "\n\t".join(vars) + "\n"
[docs] def gen_type_meta(self, typ: TypeDefinition) -> str: type_class_uri = self.namespaces.uri_for(typ.uri) if type_class_uri: type_python_uri = self.namespaces.curie_for(type_class_uri, default_ok=False, pythonform=True) type_class_curie = self.namespaces.curie_for(type_class_uri, default_ok=False, pythonform=False) else: type_python_uri = None type_class_curie = None if type_class_curie: type_class_curie = f'"{type_class_curie}"' type_class_uri = type_python_uri if type_python_uri else f'URIRef("{type_class_uri}")' type_model_uri = self.namespaces.uri_or_curie_for(self.schema.default_prefix, camelcase(typ.name)) if ':/' in type_model_uri: type_model_uri = f'URIRef("{type_model_uri}")' else: ns, ln = type_model_uri.split(':', 1) ln_suffix = f".{ln}" if ln.isidentifier() else f'["{ln}"]' type_model_uri = f"{ns.upper()}{ln_suffix}" vars = [f'type_class_uri = {type_class_uri}', f'type_class_curie = {type_class_curie}', f'type_name = "{typ.name}"', f'type_model_uri = {type_model_uri}'] return "\n\t".join(vars)
[docs] def gen_class_variables(self, cls: ClassDefinition) -> str: """ Generate the variable declarations for a dataclass. :param cls: class containing variables to be rendered in inheritence hierarchy :return: variable declarations for target class and its ancestors """ initializers = [] is_root = not cls.is_a domain_slots = self.domain_slots(cls) # Root keys and identifiers go first. Note that even if a key or identifier is overridden it still # appears at the top of the list, as we need to keep the position slot_variables = self._slot_iter(cls, lambda slot: (slot.identifier or slot.key) and not slot.ifabsent, first_hit_only=True) initializers += [self.gen_class_variable(cls, slot, not is_root) for slot in slot_variables] # Required slots slot_variables = self._slot_iter(cls, lambda slot: slot.required and not slot.identifier and not slot.key and not slot.ifabsent) initializers += [self.gen_class_variable(cls, slot, not is_root) for slot in slot_variables] # Required or key slots with default values slot_variables = self._slot_iter(cls, lambda slot: slot.ifabsent and slot.required) initializers += [self.gen_class_variable(cls, slot, False) for slot in slot_variables] # Followed by everything else slot_variables = self._slot_iter(cls, lambda slot: not slot.required and slot in domain_slots) initializers += [self.gen_class_variable(cls, slot, False) for slot in slot_variables] return '\n\t'.join(initializers)
[docs] def gen_class_variable(self, cls: ClassDefinition, slot: SlotDefinition, can_be_positional: bool) -> str: """ Generate a class variable declaration for the supplied slot. Note: the can_be_positional attribute works, but it makes tag/value lists unduly complex, as you can't load them with tag=..., value=... -- you HAVE to load positionally. We currently ignore this parameter, meaning that we have a tag/value option for any LinkML element :param cls: Owning class :param slot: slot definition :param can_be_positional: True means that positional parameters are allowed. :return: Initializer string """ can_be_positional = False # Force everything to be tag values slotname = self.slot_name(slot.name) slot_range, default_val = self.range_cardinality(slot, cls, can_be_positional) ifabsent_text = ifabsent_value_declaration(slot.ifabsent, self, cls, slot) if slot.ifabsent is not None else None if ifabsent_text: default = f'= {ifabsent_text}' else: default = f'= {default_val}' if default_val else '' return f'''{slotname}: {slot_range} {default}'''
[docs] def range_cardinality(self, slot: SlotDefinition, cls: Optional[ClassDefinition], positional_allowed: bool) \ -> Tuple[str, Optional[str]]: """ Return the range type including initializers, etc. Generate a class variable declaration for the supplied slot. Note: the positional_allowed attribute works, but it makes tag/value lists unduly complex, as you can't load them with tag=..., value=... -- you HAVE to load positionally. We currently ignore this parameter, meaning that we have a tag/value option for any LinkML element :param slot: slot to generate type for :param cls: containing class -- used to render key slots correctly. If absent, slot is an add-in :param positional_allowed: True Means that we are in the positional space and defaults are not supplied :return: python property name and initializer (if any) """ positional_allowed = False # Force everything to be tag values range_type, parent_type, _ = self.class_reference_type(slot, cls) pkey = self.class_identifier(slot.range) # Special case, inlined, identified range if pkey and slot.inlined and slot.multivalued: base_key = self.gen_class_reference(self.class_identifier_path(slot.range, False)) num_elements = len(self.schema.classes[slot.range].slots) dflt = None if slot.required and positional_allowed else 'empty_dict()' if num_elements == 1: if slot.required: return f'Union[List[{base_key}], Dict[{base_key}, {range_type}]]', dflt else: return f'Optional[Union[List[{base_key}], Dict[{base_key}, {range_type}]]]', dflt else: if slot.required: return f'Union[Dict[{base_key}, {range_type}], List[{range_type}]]', dflt else: return f'Optional[Union[Dict[{base_key}, {range_type}], List[{range_type}]]]', dflt # All other cases if slot.multivalued: if slot.required: return f'Union[{range_type}, List[{range_type}]]', (None if positional_allowed else 'None') else: return f'Optional[Union[{range_type}, List[{range_type}]]]', 'empty_list()' elif slot.required: return range_type, (None if positional_allowed else 'None') else: return f'Optional[{range_type}]', 'None'
[docs] def class_reference_type(self, slot: SlotDefinition, cls: Optional[ClassDefinition]) \ -> Tuple[str, str, str]: """ Return the type of a slot referencing a class :param slot: slot to be typed :param cls: owning class. Used for generating key references :return: Python class reference type, most proximal type, most proximal type name """ rangelist = self.class_identifier_path(cls, False) if slot.key or slot.identifier else self.slot_range_path(slot) prox_type = self.slot_range_path(slot)[-1].rsplit('.')[-1] prox_type_name = rangelist[-1] # Quote forward references - note that enums always gen at the end if slot.range in self.schema.enums or \ (cls and slot.inlined and slot.range in self.schema.classes and self.forward_reference(slot.range, cls.name)): rangelist[-1] = f'"{rangelist[-1]}"' return str(self.gen_class_reference(rangelist)), prox_type, prox_type_name
[docs] @staticmethod def gen_class_reference(rangelist: List[str]) -> str: """ Return a basic or a union type depending on the number of elements in range list :param rangelist: List of types from distal to proximal :return: """ base = rangelist[0].rsplit('.')[-1] return f"Union[{base}, {rangelist[-1]}]" if len(rangelist) > 1 else base
[docs] def gen_postinits(self, cls: ClassDefinition) -> str: """ Generate all the typing and existence checks post initialize """ post_inits_pre_super = [] for slot in self.domain_slots(cls): if slot.ifabsent: dflt = ifabsent_postinit_declaration(slot.ifabsent, self, cls, slot) if dflt and dflt != "None": post_inits_pre_super.append(f'if self.{self.slot_name(slot.name)} is None:') post_inits_pre_super.append(f'\tself.{self.slot_name(slot.name)} = {dflt}') post_inits = [] if not (cls.mixin or cls.abstract): pkeys = self.primary_keys_for(cls) for pkey in pkeys: slot = self.schema.slots[pkey] # TODO: Remove the bypass whenever we get default_range fixed if not slot.ifabsent or True: post_inits.append(self.gen_postinit(cls, slot)) else: pkeys = [] for slot in self.domain_slots(cls): if slot.required: # TODO: Remove the bypass whenever we get default_range fixed if slot.name not in pkeys and (not slot.ifabsent or True): post_inits.append(self.gen_postinit(cls, slot)) for slot in self.domain_slots(cls): if not slot.required: # TODO: Remove the bypass whenever we get default_range fixed if slot.name not in pkeys and (not slot.ifabsent or True): post_inits.append(self.gen_postinit(cls, slot)) post_inits_pre_super_line = '\n\t\t'.join([p for p in post_inits_pre_super if p]) + \ ('\n\t\t' if post_inits_pre_super else '') post_inits_line = '\n\t\t'.join([p for p in post_inits if p]) return (f''' def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): {post_inits_pre_super_line}{post_inits_line} super().__post_init__(**kwargs)''' + '\n') if post_inits_line or post_inits_pre_super_line else ''
# sort classes such that if C is a child of P then C appears after P in the list def _sort_classes(self, clist: List[ClassDefinition]) -> List[ClassDefinition]: clist = list(clist) slist = [] # sorted while len(clist) > 0: for i in range(len(clist)): candidate = clist[i] can_add = False if candidate.is_a is None: can_add = True else: if candidate.is_a in [p.name for p in slist]: can_add = True if can_add: slist = slist + [candidate] del clist[i] break if not can_add: raise (f'could not find suitable element in {clist} that does not ref {slist}') return slist
[docs] def is_key_value_class(self, range_name: DefinitionName) -> bool: """ Return True if range_name references a class with exactly one key and one value :param range_name: class definition (name) :return: True if meets the special case """ rng = self.schema.classes.get(range_name) if rng: pkeys = self.primary_keys_for(rng) if pkeys: return len(rng.slots) - len(pkeys) == 1 return False
[docs] def gen_postinit(self, cls: ClassDefinition, slot: SlotDefinition) -> Optional[str]: """ Generate python post init rules for slot in class """ rlines: List[str] = [] aliased_slot_name = self.slot_name(slot.name) # Mangled name by which the slot is known in python range_type, base_type, base_type_name = self.class_reference_type(slot, cls) slot_identifier = self.class_identifier(slot.range) # Generate existence check for required slots. Note that inherited classes have to do post init checks because # You can't have required elements after optional elements in the parent class if slot.required: rlines.append(f'if self._is_empty(self.{aliased_slot_name}):') rlines.append(f'\tself.MissingRequiredField("{aliased_slot_name}")') # Generate the type co-orcion for the various types. indent = len(f'self.{aliased_slot_name} = [') * ' ' # NOTE: if you set this to true, we will cast all types. This may be what we really want if not slot.multivalued: if slot.required: rlines.append(f'if not isinstance(self.{aliased_slot_name}, {base_type_name}):') else: rlines.append(f'if self.{aliased_slot_name} is not None and ' f'not isinstance(self.{aliased_slot_name}, {base_type_name}):') # A really wierd case -- a class that has no properties if slot.range in self.schema.classes and not self.schema.classes[slot.range].slots: rlines.append(f'\tself.{aliased_slot_name} = {base_type_name}()') else: if (self.class_identifier(slot.range) and not slot.inlined) or\ slot.range in self.schema.types or\ slot.range in self.schema.enums: rlines.append(f'\tself.{aliased_slot_name} = {base_type_name}(self.{aliased_slot_name})') else: rlines.append(f'\tself.{aliased_slot_name} = {base_type_name}(**as_dict(self.{aliased_slot_name}))') elif slot.inlined: slot_range_cls = self.schema.classes[slot.range] identifier = self.class_identifier(slot_range_cls) # If we don't have an identifier, we will switch to the first required field in the target class if not identifier: for range_slot_name in slot_range_cls.slots: range_slot = self.schema.slots[range_slot_name] if range_slot.required: inlined_as_list = True keyed = False identifier = range_slot.name break keyed = False else: # Place for future expansion keyed = True if identifier: if not slot.inlined_as_list: rlines.append(f'self._normalize_inlined_as_dict(slot_name="{aliased_slot_name}", ' f'slot_type={base_type_name}, ' f'key_name="{self.aliased_slot_name(identifier)}", ' f'keyed={keyed})') else: rlines.append(f'self._normalize_inlined_as_list(slot_name="{aliased_slot_name}", ' f'slot_type={base_type_name}, ' f'key_name="{self.aliased_slot_name(identifier)}", ' f'keyed={keyed})') else: # Multivalued, inlined and no identifier # TODO: JsonObj([...]) will not be treated correctly here. sn = f'self.{aliased_slot_name}' rlines.append(f'if not isinstance({sn}, list):') rlines.append(f'\t{sn} = [{sn}] if {sn} is not None else []') rlines.append(f'{sn} = [v if isinstance(v, {base_type_name}) else {base_type_name}(**as_dict(v)) for v in {sn}]') else: # Multivalued and not inlined # TODO: JsonObj([...]) will fail here as well sn = f'self.{aliased_slot_name}' rlines.append(f'if not isinstance({sn}, list):') rlines.append(f'\t{sn} = [{sn}] if {sn} is not None else []') rlines.append(f'{sn} = [v if isinstance(v, {base_type_name}) ' f'else {base_type_name}(v) for v in {sn}]') if rlines: rlines.append('') return '\n\t\t'.join(rlines)
def _slot_iter(self, cls: ClassDefinition, test: Callable[[SlotDefinition], bool], first_hit_only: bool = False) \ -> Iterator[SlotDefinition]: """ Return the representation for the set of own slots in cls that pass test :param cls: Class containing a set of slots :param test: Slot test function :param first_hit_only: True means stop on first match. False means generate all :return: Set of slots that match """ for slot in self.all_slots(cls): if test(slot): yield slot if first_hit_only: break
[docs] def primary_keys_for(self, cls: ClassDefinition) -> List[SlotDefinitionName]: """ Return the primary key for cls. Note: At the moment we return at most one entry. At some point, keys will be expanded to support composite keys. @param cls: class to get keys for @return: List of primary keys or identifiers """ return [slot_name for slot_name in cls.slots if self.schema.slots[slot_name].key or self.schema.slots[slot_name].identifier]
[docs] def key_name_for(self, class_name: ClassDefinitionName) -> Optional[str]: for slot_name in self.primary_keys_for(self.schema.classes[class_name]): return self.formatted_element_name(class_name, True) + camelcase(slot_name) return None
[docs] def range_type_name(self, slot: SlotDefinition) -> str: """ Generate the type name for the slot """ cidpath = self.slot_range_path(slot) if len(cidpath) < 2: return cidpath[0] else: return f"Union[{cidpath[0]}, {cidpath[-1]}]"
[docs] def forward_reference(self, slot_range: str, owning_class: str) -> bool: """ Determine whether slot_range is a forward reference """ if (slot_range in self.schema.classes and self.schema.classes[slot_range].imported_from) or \ (slot_range in self.schema.enums and self.schema.enums[slot_range].imported_from): return False if slot_range in self.schema.enums: return True for cname in self.schema.classes: if cname == owning_class: return True # Occurs on or after elif cname == slot_range: return False # Occurs before return True
[docs] def python_uri_for(self, uriorcurie: Union[str, URIRef]) -> Tuple[str, Optional[str]]: """ Return the python form of uriorcurie :param uriorcurie: :return: URI and CURIE form """ ns, ln = self.namespaces.prefix_suffix(uriorcurie) if ns == '': ns = 'DEFAULT_' if ns is None: return f'"str(uriorcurie)"', None return ns.upper() + (f".{ln}" if ln.isidentifier() else f"['{ln}']"), ns.upper() + f".curie('{ln}')"
[docs] def gen_slotdefs(self) -> str: if self.gen_slots: return "class slots:\n\tpass\n\n" + \ '\n\n'.join([self.gen_slot(slot) for slot in self.schema.slots.values() if not slot.imported_from]) else: return ''
[docs] def gen_slot(self, slot: SlotDefinition) -> str: python_slot_name = underscore(slot.name) slot_uri, slot_curie = self.python_uri_for(slot.slot_uri) slot_model_uri, slot_model_curie = \ self.python_uri_for(self.namespaces.uri_or_curie_for(self.schema.default_prefix, python_slot_name)) domain = camelcase(slot.domain) if slot.domain and not self.schema.classes[slot.domain].mixin else "None" # Going to omit the range on keys where the domain isn't specified (for now) if slot.domain is None and (slot.key or slot.identifier): rnge = "URIRef" else: rnge, _ = self.range_cardinality(slot, self.schema.classes[slot.domain] if slot.domain else None, True) if slot.mappings: map_texts = [self.namespaces.curie_for(self.namespaces.uri_for(m), default_ok=True, pythonform=True) for m in slot.mappings if m != slot.slot_uri] else: map_texts = [] if map_texts: mappings = ', mappings = [' + ', '.join(map_texts)+ ']' else: mappings = '' pattern = f",\n pattern=re.compile(r'{slot.pattern}')" if slot.pattern else "" return f"""slots.{python_slot_name} = Slot(uri={slot_uri}, name="{slot.name}", curie={slot_curie}, model_uri={slot_model_uri}, domain={domain}, range={rnge}{mappings}{pattern})"""
[docs] def gen_enumerations(self) -> str: return '\n\n'.join([self.gen_enum(enum) for enum in self.schema.enums.values() if not enum.imported_from])
[docs] def gen_enum(self, enum: EnumDefinition) -> str: enum_name = camelcase(enum.name) return f''' class {enum_name}(EnumDefinitionImpl): {self.gen_enum_comment(enum)} {self.gen_enum_description(enum, enum_name)} '''.strip()
[docs] def gen_enum_comment(self, enum: EnumDefinition) -> str: return f'"""\n\t{wrapped_annotation(be(enum.description))}\n\t"""' if be(enum.description) else ''
[docs] def gen_enum_description(self, enum: EnumDefinition, enum_name: str) -> str: return f''' {self.gen_pvs(enum)} {self.gen_enum_definition(enum, enum_name)} {self.gen_pvs2(enum)} '''.strip()
[docs] def gen_pvs(self, enum: EnumDefinition) -> str: """ Generate the python compliant permissible value initializers as a set of class variables @param enum: @return: """ init_list = [] for pv in enum.permissible_values.values(): if str.isidentifier(pv.text) and not keyword.iskeyword(pv.text): l1 = f'{pv.text} = ' l1len = len(l1) l2ton = '\n' + l1len * ' ' init_list.append(l1 + (l2ton.join(self.gen_pv_constructor(pv, l1len)))) return '\n\t'.join(init_list).strip()
[docs] def gen_enum_definition(self, enum: EnumDefinition, enum_name: str) -> str: enum_desc = enum.description.replace('"', '\\"').replace(r'\n', r'\\n') if enum.description else None desc = f'\t\tdescription="{enum_desc}",\n' if enum.description else '' cs = f'\t\tcode_set={self.namespaces.curie_for(self.namespaces.uri_for(enum.code_set), default_ok=False, pythonform=True)},\n'\ if enum.code_set else '' tag = f'\t\tcode_set_tag="{enum.code_set_tag}",\n' if enum.code_set_tag else '' ver = f'\t\tcode_set_version="{enum.code_set_version}",\n' if enum.code_set_version else '' vf = f'\t\tpv_formula=PvFormulaOptions.{enum.pv_formula.code.text},\n' if enum.pv_formula else '' return f'''_defn = EnumDefinition(\n\t\tname="{enum_name}",\n{desc}{cs}{tag}{ver}{vf}\t)'''
[docs] def gen_pvs2(self, enum: EnumDefinition) -> str: """ Generate the non-python compliant permissible value initializers as a set of setattr instructions @param enum: @return: """ if any(not str.isidentifier(pv.text) or keyword.iskeyword(pv.text) for pv in enum.permissible_values.values()): return f''' @classmethod def _addvals(cls): {self.gen_pvs2_initializers(enum)}''' else: return ''
[docs] def gen_pvs2_initializers(self, enum: EnumDefinition) -> str: init_list = [] for pv in enum.permissible_values.values(): if not str.isidentifier(pv.text) or keyword.iskeyword(pv.text): l1 = ' setattr(' l2ton = len(l1) * ' ' pv_cons = ('\n'.join(self.gen_pv_constructor(pv, len(l1)))) pv_text = pv.text.replace('"', '\\"').replace(r'\n', r'\\n') init_list.append(f'{l1}cls, "{pv_text}",\n{l2ton}{pv_cons} )') return '\n'.join(init_list).strip()
[docs] def gen_pv_constructor(self, pv: PermissibleValue, indent: int) -> List[str]: """ Generate a permissible value constructor @param pv: Value to be constructed @param indent: number of additional spaces to add on successive lines @return: Permissible value constructor """ # PermissibleValue(text="CODE", # description="...", # meaning="...") constructor = 'PermissibleValue(' indent = (len(constructor) + indent) * ' ' c1 = ',' if pv.description or pv.meaning else ')' rval = [f'{constructor}text="{pv.text}"{c1}'] if pv.description: c2 = ',' if pv.meaning else ')' rval.append(f'{indent}description="{pv.description}"{c2}') if pv.meaning: pv_meaning = self.namespaces.curie_for(self.namespaces.uri_for(pv.meaning), default_ok=False, pythonform=True) rval.append(f'{indent}meaning={pv_meaning})') return rval
@shared_arguments(PythonGenerator) @click.command() @click.option("--head/--no-head", default=True, help="Emit metadata heading") @click.option("--genmeta/--no-genmeta", default=False, help="Generating metamodel. Only use this for generating meta.py") @click.option("--classvars/--no-classvars", default=True, help="Generate CLASSVAR info") @click.option("--slots/--no-slots", default=True, help="Generate Slot information") def cli(yamlfile, head=True, genmeta=False, classvars=True, slots=True, **args): """Generate python classes to represent a LinkML model""" print(PythonGenerator(yamlfile, emit_metadata=head, genmeta=genmeta, gen_classvars=classvars, gen_slots=slots, **args).serialize(emit_metadata=head, **args)) if __name__ == '__main__': cli()