Source code for alternat.generation.opensource.analyze

from .config import Config
from .pytorchcaption import PytorchCaption
from alternat.generation.base.analyzer import AnalyzeImageBase
import os, json, time
import requests, functools
from PIL import Image as PIL_Image
import easyocr
from alternat.generation.exceptions import InputImageNotAvailable
import numpy as np
import cv2



[docs]class AnalyzeImage(AnalyzeImageBase): """Opensource driver class. :param AnalyzeImageBase: Driver base class. :type AnalyzeImageBase: [type] """ def __init__(self): super(AnalyzeImage, self).__init__() self.config = Config self.reader = easyocr.Reader(['en']) self.captionworker = PytorchCaption()
[docs] def modifyBoundingBoxData(self, bounding_box: list): """Transform bounding box data as per the convention. EasyOCR return bounding box info in the format [left, top, right, top, right, bottom, left, bottom] which is transformed to format [{x: left, y: top}, {x: right, y: top}, {x: right, y: bottom}, {x: left, y: bototm}]. :param bounding_box: Bounding box data form EasyOCR. :type bounding_box: list :return: [description] :rtype: [type] """ arr = bounding_box return [ {"x": arr[0], "y": arr[1]}, {"x": arr[2], "y": arr[3]}, {"x": arr[4], "y": arr[5]}, {"x": arr[6], "y": arr[7]} ]
[docs] def ocr_analysis(self, image: PIL_Image): """Does OCR Analysis using EasyOCR. :param image: PIL Image object. :type image: PIL_Image """ result = self.reader.readtext(self.pil_to_image_content(image)) lines_data = [] text = "" for line in result: confidence = line[2] bounding_box_data = line[0] line_text = line[1] bounding_box_arr = [float(bounding_box_data[0][0]), float(bounding_box_data[0][1]), float(bounding_box_data[1][0]), float(bounding_box_data[1][1]), float(bounding_box_data[2][0]), float(bounding_box_data[2][1]), float(bounding_box_data[3][0]), float(bounding_box_data[3][1])] lines_data.append({ "confidence": float(round(confidence, 2)), "text": line_text, "boundingBox": self.modifyBoundingBoxData(bounding_box_arr) }) text += line_text + "\n" final_ocr_data = { "text": text, "lines": lines_data } self.data[self.actions.OCR] = final_ocr_data
# TODO: Add open source implementation for image captioning
[docs] def describe_image(self, image: PIL_Image): """Describe image using open source solution. Not implemented right now. :param image: PIL Image object :type image: PIL_Image """ opencv_image = np.array(image) #im_np = np.asarray(self.pil_to_image_content(image)) opencv_image = cv2.cvtColor(opencv_image, cv2.COLOR_RGB2BGR) #small = cv2.resize(opencv_image, (0,0), fx=0.25, fy=0.25) #cv2.imshow("window_name", small) #cv2.waitKey(0) caption, confidence = self.captionworker.getCaptions(opencv_image) final_caption_data = {"text": caption, "confidence": confidence} self.data[self.actions.DESCRIBE] = final_caption_data
# TODO: Add open source implementation for image labelling
[docs] def extract_labels(self, image: PIL_Image): """Extract labels of image using open source solution. Not implemented righ now. :param image: PIL Image object. :type image: PIL_Image """ self.data[self.actions.LABELS] = {}
[docs] def handle(self, image_path: str = None, base64_image: str = None, actions: list = None) -> dict: """Entry point for the driver. Implements all the action and generates data for rule engine. :param image_path: Path to image on disk, defaults to None :type image_path: str, optional :param base64_image: Base64 image string, defaults to None :type base64_image: str, optional :param actions: list of actions to run, defaults to None (all actions execute) :type actions: list, optional :return: [description] :rtype: dict """ try: im = self.extract_metadata(base64_image, image_path) except InputImageNotAvailable as e: print("ERROR: %s" % e) return self.data if actions is None: actions = self.actions.get_all() for action in actions: # if feature is supported if action in self.actions.get_all(): if action == self.actions.OCR: self.ocr_analysis(im) if action == self.actions.LABELS: self.extract_labels(im) if action == self.actions.DESCRIBE: self.describe_image(im) return self.data