Source code for alternat.generation.rules.ocr_handler

from alternat.generation.base.action_data_handler import ActionDataHandler
from alternat.generation.config import Config as GenerationConfig
from .ocr_cluster import ClusterTree


[docs]class OCRDataHandler(ActionDataHandler): """Rule for processing OCR data from driver. :param ActionDataHandler: Base class for rule. :type ActionDataHandler: [type] """ def __init__(self, input_data, confidence_threshold: float = None, ocr_filter_threshold: float = None): """Initialize the handler with input data and confidence threshold (if available) :param input_data: Data from driver. :type input_data: [type] :param confidence_threshold: Confidence threshold to filter OCR with low threshold, defaults to None (Driver config default) :type confidence_threshold: float, optional :param ocr_filter_threshold: Confidence threshold to filter OCR data based on line height ratio to image height. :type ocr_filter_threshold: float, optional """ super(OCRDataHandler, self).__init__(input_data) # CONFIDENCE THRESHOLD FOR OCR if confidence_threshold is None: self.CONFIDENCE_THRESHOLD = 0.75 else: self.CONFIDENCE_THRESHOLD = confidence_threshold print("OCR Threshold : ", self.CONFIDENCE_THRESHOLD) self.OCR_HEIGHT_RATIO_TO_IMAGE_THRESHOLD = ocr_filter_threshold # AREA OCCUPIED THRESHOLD self.AREA_OCCUPIED_BY_TEXT_THRESHOLD = 0.30 # Numbers of ocr data to show self.NUM_OF_OCR = 3 self.PREFIX_TEXT = "Appears to contain text: " self.ocr_cluster_cls = ClusterTree()
[docs] def has_data(self) -> bool: """Checks whethere OCR data is avalible in the input data. :return: [description] :rtype: bool """ if self.actions.OCR in self.input_data.keys(): ocr_data = self.input_data[self.actions.OCR] if len(ocr_data) > 0: return True else: return False else: return False
[docs] def process_ocr(self) -> dict: """Process the OCR data from the driver and filter it on the basis of line confidence threshold value and the ratio of line height to image height. Based on the configuration also invokes alternat clustering implementation (default to True) :return: [description] :rtype: dict """ ocr_data = self.input_data[self.actions.OCR] lines_data = ocr_data["lines"] image_metadata = self.input_data[self.actions.get_metadata_key()] image_height = image_metadata["height"] image_width = image_metadata["width"] filtered_lines = list(filter(lambda l: l["confidence"] >= self.CONFIDENCE_THRESHOLD, lines_data)) filtered_text = "" # only for debug debug_line_data = [] total_line_area = 0 final_lines_data = [] for line in filtered_lines: line_width = abs(line["boundingBox"][1]["x"] - line["boundingBox"][0]["x"]) line_height = abs(line["boundingBox"][3]["y"] - line["boundingBox"][0]["y"]) line_area = line_width * line_height # print(" line height, image height ", line_height, image_height) if ((line_height / image_height) > self.OCR_HEIGHT_RATIO_TO_IMAGE_THRESHOLD): filtered_text += line["text"] + "\n" total_line_area += line_area final_lines_data.append(line) #debug array data has everything debug_line_data.append({ "confidence": line["confidence"], "line_area": line_area, "text": line["text"], "height_ratio": round(line_height / image_height, 5), "height":line_height }) image_area = image_height * image_width occupied_area_by_lines = total_line_area / image_area # print("Occupied Area :", occupied_area_by_lines) # use the default final text final_text = filtered_text.replace("\n", " ") if GenerationConfig.ENABLE_OCR_CLUSTERING: ocr_cluster_text = self.ocr_cluster_cls.generate_data(final_lines_data) if len(ocr_cluster_text) > 0: final_text = ocr_cluster_text if GenerationConfig.DEBUG: return { "text": final_text, "is_area_above_threshold": occupied_area_by_lines > self.AREA_OCCUPIED_BY_TEXT_THRESHOLD, "data": debug_line_data } else: return { "text": final_text, "is_area_above_threshold": occupied_area_by_lines > self.AREA_OCCUPIED_BY_TEXT_THRESHOLD }
[docs] def apply(self, interim_result: dict) -> dict: """Process intermin result from previous rules in the chain and run OCR rule. :param interim_result: Intermediate results from previous rules in the chain. :type interim_result: dict :return: [description] :rtype: dict """ if self.has_data(): processed_ocr_data = self.process_ocr() if processed_ocr_data["is_area_above_threshold"]: alt_recommendation = self.PREFIX_TEXT + processed_ocr_data["text"] + ". " + \ interim_result[self.alt_recommendation_key] interim_result[self.alt_recommendation_key] = alt_recommendation if GenerationConfig.DEBUG: interim_result[self.actions.OCR] = processed_ocr_data["data"] else: interim_result[self.actions.OCR] = self.PREFIX_TEXT + processed_ocr_data["text"] else: if len(processed_ocr_data["text"]) > 0: alt_recommendation = interim_result[self.alt_recommendation_key] + self.PREFIX_TEXT + \ processed_ocr_data["text"] + ". " if GenerationConfig.DEBUG: interim_result[self.actions.OCR] = processed_ocr_data["data"] else: interim_result[self.actions.OCR] = self.PREFIX_TEXT + processed_ocr_data["text"] else: alt_recommendation = interim_result[self.alt_recommendation_key] interim_result[self.actions.OCR] = "" interim_result[self.alt_recommendation_key] = alt_recommendation else: interim_result[self.actions.OCR] = "" return interim_result