Source code for linkml.utils.rawloader

import copy
from datetime import datetime
from typing import Union, TextIO, Optional
from urllib.parse import urlparse

import yaml
from hbreader import FileInfo, detect_type, HBType
from linkml_runtime.linkml_model.meta import SchemaDefinition, metamodel_version, SlotDefinition, ClassDefinition
from linkml_runtime.loaders import yaml_loader
from linkml_runtime.utils.yamlutils import YAMLMark, YAMLRoot

from linkml.utils.mergeutils import set_from_schema

yaml.error.Mark = YAMLMark


# Override the default linkml missing value tests
[docs]def mrf(self, field_name: str) -> None: if isinstance(self, SchemaDefinition) and field_name == "name" and self.id: id_parts = self.id.replace('#', '/').rsplit('/') self.name = id_parts[-1] else: YAMLRoot.MissingRequiredField(self, f"{type(self).__name__}.{field_name}")
SchemaDefinition.MissingRequiredField = mrf
[docs]def load_raw_schema(data: Union[str, dict, TextIO], source_file: Optional[str] = None, source_file_date: Optional[str] = None, source_file_size: Optional[int] = None, base_dir: Optional[str] = None, merge_modules: Optional[bool] = True, emit_metadata: Optional[bool] = True) -> SchemaDefinition: """ Load and flatten SchemaDefinition from a file name, a URL or a block of text @param data: URL, file name or block of text YAML Object or open file handle @param source_file: Source file name for the schema if data is type TextIO @param source_file_date: timestamp of source file if data is type TextIO @param source_file_size: size of source file if data is type TextIO @param base_dir: Working directory or base URL of sources @param merge_modules: True means combine modules into one source, false means keep separate @param emit_metadata: True means add source file info to the output @return: Un-processed Schema Definition object """ def _name_from_url(url) -> str: return urlparse(url).path.rsplit('/', 1)[-1].rsplit('.', 1)[0] # Passing a URL or file name if detect_type(data, base_dir) not in (HBType.STRING, HBType.STRINGABLE): assert source_file is None, "source_file parameter not allowed if data is a file or URL" assert source_file_date is None, "source_file_date parameter not allowed if data is a file or URL" assert source_file_size is None, "source_file_size parameter not allowed if data is a file or URL" # Convert the input into a valid SchemaDefinition if isinstance(data, (str, dict, TextIO)): # TODO: Build a generic loader that detects type from suffix or content and invokes the appropriate loader schema_metadata = FileInfo() schema_metadata.source_file = source_file schema_metadata.source_file_date = source_file_date schema_metadata.source_file_size = source_file_size schema_metadata.base_path = base_dir schema = yaml_loader.load(copy.deepcopy(data) if isinstance(data, dict) else data, SchemaDefinition, base_dir=base_dir, metadata=schema_metadata) elif isinstance(data, SchemaDefinition): schema = copy.deepcopy(data) else: raise ValueError("Unrecognized input to raw loader") if schema is None: raise ValueError("Empty schema - cannot process") if schema.name is None: if schema.id is None: raise ValueError("Unable to determine schema name") else: schema.name = _name_from_url(schema.id) elif schema.id is None: # TODO: figure out how to generate this from the default_prefix and namespace map raise ValueError("Schema identifier must be supplied") if emit_metadata: schema.source_file = schema_metadata.source_file schema.source_file_date = schema_metadata.source_file_date schema.source_file_size = schema_metadata.source_file_size schema.generation_date = datetime.now().strftime("%Y-%m-%d %H:%M") schema.metamodel_version = metamodel_version set_from_schema(schema) return schema