import logging
import math
from typing import List, Optional, Dict, Type
from pycognaize.common.enums import (
IqDocumentKeysEnum,
ID,
IqFieldKeyEnum,
IqDataTypesEnum
)
from pycognaize.document.html_info import HTML
from pycognaize.document.page import Page
from pycognaize.document.field import Field
from pycognaize.document.tag import ExtractionTag
from pycognaize.document.tag.html_tag import HTMLTag
[docs]
class NumericField(Field):
"""Base class for all pycognaize number fields"""
tag_class: Type[ExtractionTag] = ExtractionTag
html_tag_class: Type[HTMLTag] = HTMLTag
def __init__(self,
name: str,
value: str = '',
calculated_value: str = '',
tags: Optional[List[ExtractionTag]] = None,
field_id: Optional[str] = None,
group_key: str = None,
confidence: Optional[float] = -1.0,
group_name: str = None,
scale: int = None,
mapping: Optional[List[Dict[str, str]]] = None,
is_calculated: bool = None
):
super().__init__(name=name, tags=tags, value=value,
group_key=group_key, confidence=confidence,
group_name=group_name, mapping=mapping)
self.scale = scale
self._field_id = field_id
self._raw_field_value = value
self._calculated_value = self.convert_to_numeric(calculated_value)
self._value = self.convert_to_numeric(value)
if math.isnan(self._value):
self._value = self.calculated_value
self._field_value = self.convert_to_numeric(value)
self._tag_value = None
if self.tags:
self._value = sum([self.convert_to_numeric(i.raw_value)
for i in self.tags])
self._tag_value = self._value
self._is_calculated = is_calculated
@property
def name(self):
return self._name
@property
def value(self):
return self._value
@property
def calculated_value(self):
return self._calculated_value
@property
def field_value(self):
return self._field_value
@property
def tag_value(self):
return self._tag_value
@property
def raw_field_value(self):
return self._raw_field_value
@property
def is_calculated(self):
return self._is_calculated
[docs]
@classmethod
def construct_from_raw(cls, raw: dict, pages: Dict[int, Page],
html: Optional[HTML] = None,
labels=None) -> 'NumericField':
"""Create NumericField object from dictionary"""
tag_dicts: List[dict] = raw[IqDocumentKeysEnum.tags.value]
tags = []
for i in tag_dicts:
try:
if pages:
tags.append(cls.tag_class.construct_from_raw(
raw=i, page=pages[i['page']]))
else:
tags.append(cls.html_tag_class.construct_from_raw(
raw=i, html=html))
except Exception as e:
logging.debug(f"Failed creating tag for numeric field"
f" {raw[ID]}: {e}")
calculated_value = raw.get(IqFieldKeyEnum.calculated_value.value, '')
field_value = raw[IqFieldKeyEnum.value.value]
field_value = (tags[0].raw_value if (html.path and tags)
else field_value)
is_calculated = raw.get(
IqFieldKeyEnum.field_type.value, ''
) == "extraction and computation"
return cls(name=raw[IqDocumentKeysEnum.name.value],
value=field_value,
is_calculated=is_calculated,
calculated_value=calculated_value,
tags=tags,
field_id=str(raw[ID]),
group_key=raw.get(IqFieldKeyEnum.group_key.value, ''),
group_name=raw.get(IqFieldKeyEnum.group.value, ''),
scale=raw.get(IqFieldKeyEnum.scale.value, ''),
mapping=raw.get(IqFieldKeyEnum.mapping.value, [])
)
[docs]
@staticmethod
def convert_to_numeric(value):
"""converts string value to numeric"""
# noinspection PyBroadException
try:
value = float(value)
except Exception:
value = float('nan')
return value
[docs]
def to_dict(self) -> dict:
"""Converts NumericField object to dictionary"""
field_dict = super().to_dict()
field_dict[ID] = self._field_id
field_dict[IqFieldKeyEnum.value.value] = self.value
field_dict[
IqFieldKeyEnum.data_type.value] = IqDataTypesEnum.number.value
if (self.calculated_value is not None
and not math.isnan(self.calculated_value)):
field_dict[
IqFieldKeyEnum.calculated_value.value] = self.calculated_value
field_dict[IqFieldKeyEnum.data_type.scale.value] = self.scale
return field_dict
def __repr__(self):
return (f"<{self.__class__.__name__}: {self.name}:"
f" {'|'.join([i.raw_value for i in self.tags])}>")
def __str__(self):
return f"{'|'.join([i.raw_value for i in self.tags])}"