Source code for h1st.schema.validators.pyarrow_validator

import pyarrow as pa
from h1st.schema.validators.base import BaseValidator


[docs]class PyArrowSchemaValidator(BaseValidator): """ Validate pyarrow schema """
[docs] def is_applicable(self, schema): return isinstance(schema.get('type'), pa.Schema)
[docs] def validate_type(self, upstream, downstream): result = [] upstream = upstream['type'] downstream = downstream['type'] for name in downstream.names: # check if field is available if name not in upstream.names: result.append(f'Field "{name}" is missing') continue field = downstream.field(name) upstream_field = upstream.field(name) # check if type is compatible # TODO: check if we can upcast # TODO: pyarrow dict, struct for err in self.validate(upstream_field.type, field.type): result.append(f'Field "{name}": {err}') return result