Source code for alternat.generation.microsoft.analyze

from .config import Config
from alternat.generation.base.analyzer import AnalyzeImageBase
import os, json, time
import time
import requests, functools
from PIL import Image as PIL_Image
import json
from alternat.generation.exceptions import InputImageNotAvailable


[docs]class AnalyzeImage(AnalyzeImageBase): """Azure / Microsoft Analyzer driver class. :param AnalyzeImageBase: Driver base class. :type AnalyzeImageBase: [type] """ def __init__(self): super(AnalyzeImage, self).__init__() self.config = Config self.params = self.config.params() self.describe_endpoint = self.params["endpoint"] + "vision/v3.1/analyze" self.ocr_endpoint = self.params["endpoint"] + "vision/v3.1/read/analyze"
[docs] def modifyBoundingBoxData(self, bounding_box: list): """Transform bounding box data as per the convention. Azure API return bounding box info in the format [left, top, right, top, right, bottom, left, bottom] which is transformed to format [{x: left, y: top}, {x: right, y: top}, {x: right, y: bottom}, {x: left, y: bototm}]. :param bounding_box: Bounding box data form Azure API. :type bounding_box: list :return: [description] :rtype: [type] """ arr = bounding_box return [ {"x": arr[0], "y": arr[1]}, {"x": arr[2], "y": arr[3]}, {"x": arr[4], "y": arr[5]}, {"x": arr[6], "y": arr[7]} ]
[docs] def ocr_analysis(self, image: PIL_Image): """Does OCR Analysis using Azure Vision API. :param image: PIL Image object. :type image: PIL_Image """ headers = {'Ocp-Apim-Subscription-Key': self.params["subscription"], 'Content-Type': 'application/octet-stream'} params = {'language': 'en', 'detectOrientation': 'true'} try: response = requests.post( self.ocr_endpoint, headers=headers, params=params, data=self.pil_to_image_content(image)) response.raise_for_status() except requests.exceptions.HTTPError: exception = json.loads(response.content) if exception["error"]["code"] == "429": if Config.AZURE_RATE_LIMIT_ON: print("\n\nRate limited by Azure. The next call will execute after %d sec \n\n" % Config.AZURE_RATE_LIMIT_TIME_IN_SEC) time.sleep(Config.AZURE_RATE_LIMIT_TIME_IN_SEC) self.ocr_analysis(image) return else: print("\n\nRate limited by Azure. You can enable rate limiting by " "setting AZURE_RATE_LIMIT_ON in config.py\n\n") return else: print("Error in sending request: ", response.content) return # Holds the URI used to retrieve the recognized text. operation_url = response.headers["Operation-Location"] # The recognized text isn't immediately available, so poll to wait for completion. analysis = {} poll = True while poll: response_final = requests.get( response.headers["Operation-Location"], headers=headers) analysis = response_final.json() # print(json.dumps(analysis, indent=4)) time.sleep(1) if "analyzeResult" in analysis: poll = False if "status" in analysis and analysis['status'] == 'failed': poll = False lines_data = [] text = "" if "analyzeResult" in analysis: # Extract the recognized text, with bounding boxes. for line in analysis["analyzeResult"]["readResults"][0]["lines"]: words = line["words"] total_words_confidence = functools.reduce(lambda a, b: a + b, [word["confidence"] for word in words]) average_confidence = round(total_words_confidence / len(words), 2) lines_data.append({ "text": line["text"], "confidence": average_confidence, "boundingBox": self.modifyBoundingBoxData(line["boundingBox"]) }) text += line["text"] + "\n" final_ocr_data = { "text": text, "lines": lines_data } self.data[self.actions.OCR] = final_ocr_data
[docs] def describe_image(self, image: PIL_Image): """Describe image using Azure Vision API. :param image: PIL Image object :type image: PIL_Image """ headers = {'Ocp-Apim-Subscription-Key': self.params["subscription"], 'Content-Type': 'application/octet-stream'} params = {'visualFeatures': 'Categories,Description'} try: response = requests.post( self.describe_endpoint, headers=headers, params=params, data=self.pil_to_image_content(image)) response.raise_for_status() except requests.exceptions.HTTPError: exception = json.loads(response.content) if exception["error"]["code"] == "429": if Config.AZURE_RATE_LIMIT_ON: print("\n\nRate limited by Azure. The next call will execute after %d sec \n\n" % Config.AZURE_RATE_LIMIT_TIME_IN_SEC) time.sleep(Config.AZURE_RATE_LIMIT_TIME_IN_SEC) self.describe_image(image) return else: print("\n\nRate limited by Azure. You can enable rate limiting by " "setting AZURE_RATE_LIMIT_ON in config.py\n\n") return else: print("Error in sending request: ", response.content) return analysis = response.json() # print(" DESCRIBE : ", analysis) caption = analysis["description"]["captions"][0]["text"] confidence = analysis["description"]["captions"][0]["confidence"] self.data[self.actions.DESCRIBE] = {"text": caption, "confidence": confidence}
[docs] def extract_labels(self, image: PIL_Image): """Extract labels of image using Azure Vision API. :param image: PIL Image object. :type image: PIL_Image """ headers = {'Ocp-Apim-Subscription-Key': self.params["subscription"], 'Content-Type': 'application/octet-stream'} params = {'visualFeatures': 'Tags'} try: response = requests.post( self.describe_endpoint, headers=headers, params=params, data=self.pil_to_image_content(image)) response.raise_for_status() except requests.exceptions.HTTPError: exception = json.loads(response.content) if exception["error"]["code"] == "429": if Config.AZURE_RATE_LIMIT_ON: print("\n\nRate limited by Azure. The next call will execute after %d sec \n\n" % Config.AZURE_RATE_LIMIT_TIME_IN_SEC) time.sleep(Config.AZURE_RATE_LIMIT_TIME_IN_SEC) self.extract_labels(image) return else: print("\n\nRate limited by Azure. You can enable rate limiting by " "setting AZURE_RATE_LIMIT_ON in config.py\n\n") return else: print("Error in sending request: ", response.content) return analysis = response.json() labels = analysis["tags"] labels_data = [] for label in labels: labels_data.append({ "description": label["name"], "confidence": label["confidence"] }) self.data[self.actions.LABELS] = labels_data
[docs] def resize_image(self, image: PIL_Image): """Resize image (maintaining aspect ratio) if width / height > 5000 pixels (API constrain from Azure) :param image: [description] :type image: PIL_Image """ size = Config._MAX_IMAGE_SIZE_IN_PIXEL, Config._MAX_IMAGE_SIZE_IN_PIXEL image.thumbnail(size, PIL_Image.ANTIALIAS)
# im.save(abs_image_path)
[docs] def is_clean(self, image: PIL_Image) -> bool: """Check if the image has proper resolution, and is clean. :param image:PIL Image object. :type image: PIL_Image :return: [description] :rtype: bool """ width, height = image.size if width < Config._MIN_IMAGE_SIZE_IN_PIXEL or height < Config._MIN_IMAGE_SIZE_IN_PIXEL: return False if width > Config._MAX_IMAGE_SIZE_IN_PIXEL or height > Config._MAX_IMAGE_SIZE_IN_PIXEL: self.resize_image(image) return True
[docs] def handle(self, image_path: str = None, base64_image: str = None, actions: list = None) -> dict: """Entry point for the driver. Implements all the action and generates data for rule engine. :param image_path: Path to image on disk, defaults to None :type image_path: str, optional :param base64_image: Base64 image string, defaults to None :type base64_image: str, optional :param actions: list of actions to run, defaults to None (all actions execute) :type actions: list, optional :return: [description] :rtype: dict """ try: im = self.extract_metadata(base64_image, image_path) except InputImageNotAvailable as e: print("ERROR: %s" % e) return self.data if self.is_clean(im): if actions is None: actions = self.actions.get_all() for action in actions: # if feature is supported if action in self.actions.get_all(): if action == self.actions.OCR: self.ocr_analysis(im) if action == self.actions.LABELS: self.extract_labels(im) if action == self.actions.DESCRIBE: self.describe_image(im) else: print("Image dimensions outside of bound Min: %d X %d | Max : %d X %d" % (Config._MIN_IMAGE_SIZE_IN_PIXEL, Config._MIN_IMAGE_SIZE_IN_PIXEL, Config._MAX_IMAGE_SIZE_IN_PIXEL, Config._MAX_IMAGE_SIZE_IN_PIXEL)) print("Skipping this image") return self.data