Source code for alternat.generation.google.analyze

from alternat.generation.base.analyzer import AnalyzeImageBase
from google.cloud import vision
import io, os
from .config import Config
from PIL import Image as PIL_IMAGE
from alternat.generation.exceptions import InputImageNotAvailable


[docs]class AnalyzeImage(AnalyzeImageBase):
    """Google Analyzer driver class.

    :param AnalyzeImageBase: Driver base class.
    :type AnalyzeImageBase: [type]
    """
    def __init__(self):
        super(AnalyzeImage, self).__init__()
        self.config = Config

        self.params = self.config.params()

        self.set_environment_variables()

[docs]    def set_environment_variables(self):
        """Sets environment variable GOOGLE_APPLICATION_CREDENTIALS based on config.
        """
        os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.params["credentials"]

[docs]    def describe_image(self, image: PIL_IMAGE):
        """Describe image (used for captioning) - Not availble in Google Computer Vision API

        :param image: [description]
        :type image: PIL_IMAGE
        """
        self.data[self.actions.DESCRIBE] = {"text": '', "confidence": 1}

[docs]    def ocr_analysis(self, image: PIL_IMAGE):
        """Does OCR analysis using Google Computer Vision API. Also runs the alternat clustering rule
        if app is configured for it.

        :param image: PIL Image object.
        :type image: PIL_IMAGE
        """

        client = vision.ImageAnnotatorClient()

        # with io.open(abs_image_path, 'rb') as image_file:
        #     content = image_file.read()

        image = vision.types.Image(content=self.pil_to_image_content(image))

        # response = client.annotate_image({
        #     "image": image,
        #     "features": [{"type": vision.enums.Feature.Type.TEXT_DETECTION}]
        # })

        response = client.document_text_detection(image=image)

        full_ocr_data = response.full_text_annotation

        final_ocr_data = {
            "text": full_ocr_data.text,
            "lines": []
        }

        lines_data = []

        pages = full_ocr_data.pages

        for page in pages:
            blocks = page.blocks

            # google doesnt not gives line level information so the blocks here become the lines
            for block in blocks:
                paragraphs = block.paragraphs
                block_text = ""
                for paragraph in paragraphs:
                    for word in paragraph.words:
                        symbols = word.symbols
                        for character in symbols:
                            block_text += character.text

                        # add a space between words
                        block_text += " "

                # a block of word has finished
                block_text += "."

                lines_data.append({
                    "confidence": round(block.confidence, 2),
                    "text": block_text,
                    "boundingBox": [{"x": coord.x, "y": coord.y} for coord in block.bounding_box.vertices]
                })

        final_ocr_data["lines"] = lines_data

        # self.data[self.actions.OCR] = lines_data
        self.data[self.actions.OCR] = final_ocr_data

[docs]    def extract_labels(self, image: PIL_IMAGE):
        """Extract labels of image using Google Computer Vision API.

        :param image: PIL Image object.
        :type image: PIL_IMAGE
        :raises Exception: Google Cloud specific error messages based on request.
        """

        client = vision.ImageAnnotatorClient()

        # with io.open(abs_image_path, 'rb') as image_file:
        #     content = image_file.read()

        image = vision.types.Image(content=self.pil_to_image_content(image))

        response = client.label_detection(image=image)
        labels = response.label_annotations
        if response.error.message:
            raise Exception(
                '{}\nFor more info on error messages, check: '
                'https://cloud.google.com/apis/design/errors'.format(
                    response.error.message))

        label_data = []
        for label in labels:
            label_data.append({
                "description": label.description,
                "confidence": label.score
            })

        self.data[self.actions.LABELS] = label_data

[docs]    def handle(self, image_path: str = None, base64_image: str = None, actions: list = None) -> dict:
        """Entry point for the driver. Implements all the action and generates data for rule engine.

        :param image_path: Path to image on disk, defaults to None
        :type image_path: str, optional
        :param base64_image: Base64 image string, defaults to None
        :type base64_image: str, optional
        :param actions: list of actions to run, defaults to None (all actions execute)
        :type actions: list, optional
        :return: [description]
        :rtype: dict
        """
        try:
            im = self.extract_metadata(base64_image, image_path)
        except InputImageNotAvailable as e:
            print("ERROR: %s" % e)
            return self.data

        if actions is None:
            actions = self.actions.get_all()

        for action in actions:
            # if feature is supported
            if action in self.actions.get_all():

                if action == self.actions.OCR:
                    self.ocr_analysis(im)
                if action == self.actions.LABELS:
                    self.extract_labels(im)
                if action == self.actions.DESCRIBE:
                    self.describe_image(im)

        return self.data