from alternat.generation.base.analyzer import AnalyzeImageBase
from google.cloud import vision
import io, os
from .config import Config
from PIL import Image as PIL_IMAGE
from alternat.generation.exceptions import InputImageNotAvailable
[docs]class AnalyzeImage(AnalyzeImageBase):
"""Google Analyzer driver class.
:param AnalyzeImageBase: Driver base class.
:type AnalyzeImageBase: [type]
"""
def __init__(self):
super(AnalyzeImage, self).__init__()
self.config = Config
self.params = self.config.params()
self.set_environment_variables()
[docs] def set_environment_variables(self):
"""Sets environment variable GOOGLE_APPLICATION_CREDENTIALS based on config.
"""
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.params["credentials"]
[docs] def describe_image(self, image: PIL_IMAGE):
"""Describe image (used for captioning) - Not availble in Google Computer Vision API
:param image: [description]
:type image: PIL_IMAGE
"""
self.data[self.actions.DESCRIBE] = {"text": '', "confidence": 1}
[docs] def ocr_analysis(self, image: PIL_IMAGE):
"""Does OCR analysis using Google Computer Vision API. Also runs the alternat clustering rule
if app is configured for it.
:param image: PIL Image object.
:type image: PIL_IMAGE
"""
client = vision.ImageAnnotatorClient()
# with io.open(abs_image_path, 'rb') as image_file:
# content = image_file.read()
image = vision.types.Image(content=self.pil_to_image_content(image))
# response = client.annotate_image({
# "image": image,
# "features": [{"type": vision.enums.Feature.Type.TEXT_DETECTION}]
# })
response = client.document_text_detection(image=image)
full_ocr_data = response.full_text_annotation
final_ocr_data = {
"text": full_ocr_data.text,
"lines": []
}
lines_data = []
pages = full_ocr_data.pages
for page in pages:
blocks = page.blocks
# google doesnt not gives line level information so the blocks here become the lines
for block in blocks:
paragraphs = block.paragraphs
block_text = ""
for paragraph in paragraphs:
for word in paragraph.words:
symbols = word.symbols
for character in symbols:
block_text += character.text
# add a space between words
block_text += " "
# a block of word has finished
block_text += "."
lines_data.append({
"confidence": round(block.confidence, 2),
"text": block_text,
"boundingBox": [{"x": coord.x, "y": coord.y} for coord in block.bounding_box.vertices]
})
final_ocr_data["lines"] = lines_data
# self.data[self.actions.OCR] = lines_data
self.data[self.actions.OCR] = final_ocr_data
[docs] def handle(self, image_path: str = None, base64_image: str = None, actions: list = None) -> dict:
"""Entry point for the driver. Implements all the action and generates data for rule engine.
:param image_path: Path to image on disk, defaults to None
:type image_path: str, optional
:param base64_image: Base64 image string, defaults to None
:type base64_image: str, optional
:param actions: list of actions to run, defaults to None (all actions execute)
:type actions: list, optional
:return: [description]
:rtype: dict
"""
try:
im = self.extract_metadata(base64_image, image_path)
except InputImageNotAvailable as e:
print("ERROR: %s" % e)
return self.data
if actions is None:
actions = self.actions.get_all()
for action in actions:
# if feature is supported
if action in self.actions.get_all():
if action == self.actions.OCR:
self.ocr_analysis(im)
if action == self.actions.LABELS:
self.extract_labels(im)
if action == self.actions.DESCRIBE:
self.describe_image(im)
return self.data