Source code for pycognaize.document.field.section_field

import logging
from typing import List, Optional, Dict, Type

import bson

from pycognaize.common.enums import (
    IqDocumentKeysEnum,
    ID, START, END,
    IqFieldKeyEnum, IqDataTypesEnum,
)
from pycognaize.document.html_info import HTML
from pycognaize.document.page import Page
from pycognaize.document.field import Field
from pycognaize.document.tag.section_tag import SectionTag


[docs] class SectionField(Field): """Base class for all pycognaize Section fields""" tag_class: Type[SectionTag] = SectionTag def __init__(self, name: str, value: str = '', ocr_value: str = '', tags: Optional[List[SectionTag]] = None, field_id: Optional[str] = None, group_key: str = None, confidence: Optional[float] = -1.0, group_name: str = None, mapping: Optional[List[Dict[str, str]]] = None ): """ Create a SectionField object :param name: Name of the field :param value: Value of the field (left empty if values provided through tags) :param tags: List of tag objects :param field_id: The id of the field """ super().__init__(name=name, tags=tags, group_key=group_key, confidence=confidence, group_name=group_name, mapping=mapping) self._field_id = field_id self._value = value self._ocr_value = ocr_value @property def start(self) -> Optional[SectionTag]: """Returns the start tag of the field""" return self.tags[0] if len(self.tags) > 0 else None @property def end(self) -> Optional[SectionTag]: """Returns the end tag of the field""" return self.tags[1] if len(self.tags) == 2 else None
[docs] @classmethod def construct_from_raw(cls, raw: dict, pages: Dict[int, Page], html: Optional[HTML] = None, labels=None) -> 'SectionField': """Create SectionField object from dictionary""" section_dict: List[dict] = raw[IqDocumentKeysEnum.tags.value] tags = [] for i in section_dict: for tag_type, tag_data in i[IqDocumentKeysEnum. section.value].items(): try: tags.append(cls.tag_class.construct_from_raw( raw=tag_data, pages=pages, tag_type=tag_type)) except Exception as e: logging.debug(f"Failed creating tag" f" for field {raw[ID]}: {e}") return cls(name=raw[IqDocumentKeysEnum.name.value], value=section_dict[0][ IqFieldKeyEnum.value.value] if section_dict else '', ocr_value=section_dict[0][ IqFieldKeyEnum.ocr_value.value] if section_dict else '', tags=tags, field_id=str(raw[ID]), group_key=raw.get(IqFieldKeyEnum.group_key.value, ''), group_name=raw.get(IqFieldKeyEnum.group.value, ''), mapping=raw.get(IqFieldKeyEnum.mapping.value, []), )
[docs] def to_dict(self) -> dict: """Converts SectionField object to dictionary""" field_dict = dict() field_dict[IqFieldKeyEnum.name.value] = self.name field_dict[ID] = self.field_id field_dict[IqFieldKeyEnum.group_key.value] = self._group_key field_dict[IqFieldKeyEnum.group.value] = self._group_name field_dict[IqFieldKeyEnum.mapping.value] = self._mapping field_dict[IqFieldKeyEnum.value.value] = '' field_dict[IqFieldKeyEnum.data_type.value] =\ IqDataTypesEnum.section.value field_dict[IqFieldKeyEnum.tags.value] = [] if self.start is not None and self.end is not None: tag_data = { ID: str(bson.ObjectId()), IqFieldKeyEnum.value.value: self._value, IqFieldKeyEnum.ocr_value.value: self._ocr_value, IqFieldKeyEnum.section.value: { START: self.start.to_dict(), END: self.end.to_dict() } } field_dict[IqFieldKeyEnum.tags.value].append(tag_data) return field_dict
def __repr__(self): return f"<{self.__class__.__name__}: {self.name}>" def __str__(self): return f"<{self.__class__.__name__}: {self.name}>"