import enum
ID = "_id"
START = 'start'
END = 'end'
HASH_FILE = "document_summary_hash.md5"
IMG_EXTENSION = 'jpeg'
OCR_DATA_EXTENSION = 'json'
SNAPSHOT_EXTENSION = 'pickle'
[docs]
class FieldDataTypeEnum(enum.Enum):
extraction = 'extraction'
computation = 'computation'
classification = 'classification'
extraction_and_computation = 'extraction and computation'
[docs]
class ApiConfigEnum(enum.Enum):
API_PREFIX = '/api/v1/integration/'
CREATE_TASK_ENDPOINT = f"{API_PREFIX}document/modelinput"
RUN_MODEL_ENDPOINT = f"{API_PREFIX}data"
DEFAULT_TIMEOUT = 600
[docs]
class StorageEnum(enum.Enum):
ocr_folder = 'data'
image_folder = 'images'
doc_file = 'document.json'
pdf_file = 'original.pdf'
html_file = 'source.html'
# snap_file = f"snap.{SNAPSHOT_EXTENSION}"
[docs]
class EnvConfigEnum(enum.Enum):
IQ_DB_URL = "IQ_DB_URL"
IQ_SNAP_STORAGE_PATH = "IQ_SNAP_STORAGE_PATH"
IQ_DOCUMENTS_STORAGE_PATH = "IQ_DOCUMENTS_STORAGE_PATH" # Images and OCR
# IQ_DATASNAP_URL = "IQ_DATASNAP_URL"
SNAPSHOT_ID = 'SNAPSHOT_ID'
SNAPSHOT_PATH = "SNAPSHOT_PATH" # Images, OCR and snap.json
HOST = "API_HOST"
X_AUTH = "X_AUTH_TOKEN"
[docs]
class IqCollectionEnum(enum.Enum):
documents = 'documents'
collections = 'collections'
templates = 'templates'
recipes = 'datarecipes'
datasets = 'datasets'
[docs]
class IqDatasetKeyEnum(enum.Enum):
collections = 'collections'
dataset_type = "type"
recipe_id = "dataRecipeId"
[docs]
class IqDatasetTypeEnum(enum.Enum):
snapshot = "snapshot"
pipeline = "pipeline"
[docs]
class IqDocumentKeysEnum(enum.Enum):
classes = 'classes'
collection_id = 'collectionId'
data_type = 'dataType'
table = 'table'
templates = 'templates'
template = 'template'
fields = 'fields'
tags = 'tags'
pages = 'pages'
name = 'name'
section = 'section'
field_type = 'fieldType'
field_category = 'fieldCategories'
field_id = '_id'
src_field_id = "srcFieldId"
page = 'page'
src = 'src'
page_src = 'src'
text_span = 'text-span'
# TODO: move Document enum values to corresponding enum (field/tag etc)
[docs]
class IqFieldKeyEnum(enum.Enum):
data_type = 'dataType'
field_type = 'fieldType'
group = 'group'
group_key = 'groupKey'
has_valid_value = 'hasValidValue'
multiple = 'multiple'
name = 'name'
order = 'order'
repeatable = 'repeatable'
required = 'required'
src_field_id = 'srcFieldId'
tags = 'tags'
template_id = 'templateId'
value = 'value'
calculated_value = 'calculatedValue'
ocr_value = 'ocrValue'
repeat_parent = 'repeatParent'
section = 'section'
scale = "scale"
mapping = 'mapping'
[docs]
class IqTagKeyEnum(enum.Enum):
value = 'value'
has_valid_value = 'hasValidValue'
top = 'top'
left = 'left'
width = 'width'
height = 'height'
page = 'page'
ocr_value = 'ocrValue'
has_valid_ocr_value = 'hasValidOcrValue'
is_table = 'isTable'
confidence = 'confidence'
[docs]
class IqPageEnum(enum.Enum):
page = 'page'
src = 'src'
[docs]
class IqDataTypesEnum(enum.Enum):
table = 'table'
text = 'text'
section = 'section'
date = 'date'
number = 'number'
area = 'area'
link = 'link'
span = 'text-span'
[docs]
class IqRecipeEnum(enum.Enum):
id = '_id'
field_id = 'fieldId'
name = 'name'
templates = 'templates'
template = 'template'
template_id = 'template_id'
fields = 'fields'
python_name = 'pythonName'
document_name = 'document_name'
user = 'user'
raw_field_type = 'rawFieldType'
version = '__v'
[docs]
class IqTableTagEnum(enum.Enum):
table = 'table'
cells = 'cells'
page = 'page'
left = "left"
top = "top"
height = "height"
width = "width"
confidence = 'confidence'
[docs]
class IqCellKeyEnum(enum.Enum):
left_col_top_row = 'left_col_top_row'
col_span = "colspan"
row_span = "rowspan"
text = "value"
left = "left"
top = "top"
width = "width"
height = "height"
[docs]
class PythonShellEnum(enum.Enum):
zmq_interactive_shell = 'ZMQInteractiveShell'
terminal_interactive_shell = 'TerminalInteractiveShell'
other_type_shell = 'OtherTypeShell'
standard_python_interpreter = 'StandardPythonInterpreter'
[docs]
class FieldTypeEnum(enum.Enum):
"""Enumeration of field types"""
INPUT_FIELD = 'input'
OUTPUT_FIELD = 'output'
BOTH = 'both'
[docs]
class XBRLTagEnum(enum.Enum):
"""Enumeration for XBRL document fields"""
source = 'source'
td_id = '_id'
xpath = 'xpath'
anchor_id = 'anchorId'
id = 'id'
is_bold = 'isBold'
left_indentation = 'leftIndentation'
ids = 'ids'
row_index = 'rowIndex'
col_index = 'colIndex'
tag_id = 'tagId'
ocr_value = 'ocrValue'
value = 'value'
is_table = 'isTable'
html = 'html'
parent_id = 'parentId'
[docs]
class XBRLCellEnum(enum.Enum):
id = 'id'
source = 'source'
ids = 'ids'
html_id = 'id'
xpath = 'xpath'
row_index = 'rowIndex'
col_index = 'colIndex'
col_span = 'colspan'
row_span = 'rowspan'
raw_value = 'value'
is_bold = 'isBold'
left_indentation = 'leftIndentation'
[docs]
class XBRLTableTagEnum(enum.Enum):
source = 'source'
ocr_value = 'ocrValue'
value = 'value'
is_table = 'isTable'
is_bold = 'isBold'
left_indentation = 'leftIndentation'
anchor_id = 'anchorId'
xpath = 'xpath'
table = 'table'
cells = 'cells'
title = 'title'
_id = '_id'
[docs]
class PageLayoutEnum(enum.Enum):
"""Enum of page layout blocks, including tables"""
TEXT = 'page_layout__text'
# 'page_layout__picture'
CAPTION = 'page_layout__caption'
SECTION_HEADER = 'page_layout__section_header'
FOOTNOTE = 'page_layout__footnote'
FORMULA = 'page_layout__formula'
# 'page_layout__table'
LIST_ITEM = 'page_layout__list_item'
PAGE_HEADER = 'page_layout__page_header'
PAGE_FOOTER = 'page_layout__page_footer'
TITLE = 'page_layout__title'
TABLE = 'tables__table'