Source code for h1st.schema.schema_validator

from h1st.schema.schema_inferrer import SchemaInferrer
from h1st.schema.validators.type_helper import type_name, is_list_type, get_list_type, validate_python_type
from h1st.schema.validators import UnionValidator, ListValidator, PyArrowSchemaValidator, NumpySchemaValidator, FieldValidator
from h1st.schema.schema_validation_result import SchemaValidationResult

__all__ = ['SchemaValidator']

[docs]class SchemaValidator: _validators = [ PyArrowSchemaValidator, NumpySchemaValidator, UnionValidator, ListValidator, FieldValidator, ]
[docs] def validate(self, data, schema) -> SchemaValidationResult: """ Validate the given data with a schema.:: data = [1, 2, 3, 4, 5] # list of integer schema = {'type': list, 'item': str} result = SchemaValidator().validate(data, schema) print(result.errors) The current implementation infers the schema from the data, and then compare two schema together. In the future, we will use data directly to compare the schema. :param data: data to be validated, it can be anything. :param schema: target schema :return: validation result """ inferrer = SchemaInferrer() return self.validate_downstream_schema(inferrer.infer_schema(data), schema)
[docs] def validate_downstream_schema(self, source, target) -> SchemaValidationResult: """ Compare two schema and return the differences. :param source: source schema :param target: target schema :return: validation result """ return SchemaValidationResult(self._validate(source, target))
def _validate(self, upstream, downstream) -> list: # when there is one schema missing, ignore the validation if upstream is None or downstream is None: return [] # normalize schema to dict type upstream = self._normalize_type(upstream) downstream = self._normalize_type(downstream) result = [] if not validate_python_type(upstream.get('type'), downstream.get('type')): result.append(f'Expects {type_name(downstream)}, receives {type_name(upstream)}') return result for klass in self._validators: validator = klass() validator.validate = self._validate if validator.is_applicable(downstream): result += validator.validate_type(upstream, downstream) break return result def _normalize_type(self, t): """ Normalize type to dict type format """ if isinstance(t, dict): return t if is_list_type(t): return { 'type': list, 'item': get_list_type(t) } return {'type': t}