Source code for pycognaize.index

import abc
from typing import Any
import requests

from pycognaize.document import Document


[docs] class Index(metaclass=abc.ABCMeta): """ Use this abstract class for creating task specific Index subclasses, which allow comparison and matching between pycognaize documents """ def __init__(self, token: str, url: str): self.url = url self.session = requests.Session() self.session.headers = {'x-auth': token} @property def id(self) -> str: return self.url.strip('/').rsplit('/', 1)[-1]
[docs] def build_and_store(self, document: Document) -> requests.Response: """Builds an encoded document and stores it in the session""" doc_encoded = self.build(document) doc_idx = document.id return self._store(doc_id=doc_idx, encoding=doc_encoded)
def _store(self, doc_id: str, encoding) -> requests.Response: return self.session.put(url=self.url, json={'data': {doc_id: encoding}}, verify=False)
[docs] @staticmethod def response_to_dict(response): """Returns a dictionary representing the response where keys are document ids and values are the encoded documents""" full_index = {} for i in response['findexes']: full_index[i['documentId']] = i['data'] return full_index
[docs] def match_and_get(self, document: Document) -> Document: """Match a given document with an existing document in the index""" get_response: requests.Response = self.session.get(self.url, verify=False) full_index: dict = self.response_to_dict(get_response.json()) if document.id not in full_index: encoding = self.build(document=document) self._store(doc_id=document.id, encoding=encoding) full_index[document.id] = encoding document = self.match(document=document, full_index=full_index) return document
[docs] @abc.abstractmethod def build(self, document: Document) -> Any: """Build an encoding for a document, which can then be used for comparison and matching :param document: The document object to be built :return: The final encoding. The type of the encoding is task specific, therefore allows any type """ raise NotImplementedError
[docs] @abc.abstractmethod def match(self, document: Document, full_index: dict) -> Document: """Match a given document with an existing document in the index :param document: The document to be matched with an existing one :param full_index: The document encoding :return int: A tuple with the matched document, and a confidence value from 0 to 100 """ raise NotImplementedError