Azure Computer Vision includes Optical Character Recognition (OCR) capabilities. You can use the Read API to extract printed and handwritten text from images and documents. It uses deep learning based models and works with text on a variety of surfaces and backgrounds. These include business documents, invoices, receipts, posters, business cards, letters, and whiteboards. The OCR APIs support extracting printed text in several languages. Follow a quickstart to get started.
Azure configuration
- Register for an account in Microsoft Azure Cloud Platform, The Azure free account includes access to several Azure products that are free for 12 months.
- Create a Cognitive Services resource.
- Make note of the Api Key and the endpoints which are located under "Keys and Endpoint"
Demo
Code Breakdown
Importing required libraries
import os
import json
import time
from requests import get,post
Setting Global Variables
dir=os.getcwd()
API_KEY = '<< YOURAPIKEY >>'
Image Handler
def handler(pathToImage):
ENDPOINT = 'https://iit-ocr.cognitiveservices.azure.com/vision/v3.1/read/analyze'
#DIR = os.getcwd()
#pathToImage = '{0}/{1}'.format(DIR, pathToImage)
print('Processing: ' + pathToImage)
headers = {
'Ocp-Apim-Subscription-Key': API_KEY,
'Content-Type': 'application/octet-stream'
}
body = {
'language': 'en',
}
payload = open(pathToImage, 'rb').read()
response = post(ENDPOINT, headers=headers, json = body, data=payload)
get_url=response.headers['Operation-Location']
return get_endpoint(get_url)
Geting Asyncronous Endpoint
def get_endpoint(end_url):
n_tries = 15
n_try = 0
wait_sec = 5
max_wait_sec = 60
while n_try < n_tries:
try:
print(n_try,wait_sec)
resp = get(url = end_url, headers = {"Ocp-Apim-Subscription-Key": API_KEY})
resp_json = resp.json()
if resp.status_code != 200:
print("GET analyze results failed:\n%s" % json.dumps(resp_json))
status = resp_json["status"]
if status == "succeeded":
print("Analysis succeeded:\n")
return str(resp_json)
n_try = n_tries
if status == "failed":
return("Analysis failed:\n%s" % json.dumps(resp_json))
# Analysis still running. Wait and retry.
time.sleep(wait_sec)
n_try += 1
wait_sec = min(2*wait_sec, max_wait_sec)
except Exception as e:
msg = "GET analyze results failed:\n%s" % str(e)
return(msg)
Extracting Text From Response
def parse_text(results):
text = ''
dic=results['analyzeResult']['readResults']
for part in dic:
for line in part['lines']:
for word in line['words']:
text += word['text'] + ' '
text += '\n'
return (text)
Visualisation Code
def plot_rectangle(orig,x1,y1,confidence):
if confidence>0.9:
col=(0, 255, 0)
elif confidence>0.7:
col=(0, 255, 0)
else:
col=(0, 0, 255)
mod = cv2.rectangle(orig, x1, y1, col, 5)
return mod
def plot(json_file):
alpha = 0.4
img_file = 'sign.jpg'
orig = cv2.imread(img_file)
copy=orig.copy()
flag=0
temp = json_file['analyzeResult']['readResults'][0]['lines']
for line in temp:
for box in line['words']:
arr=box['boundingBox']
confidence=box['confidence']
orig=plot_rectangle(orig,(arr[2],arr[1]),(arr[6],arr[5]),confidence)
print((arr[2],arr[1]),(arr[6],arr[5]),confidence)
mode = cv2.addWeighted(orig, alpha, copy, 1 - alpha,0)
cv2.imshow('image', cv2.resize(mode, (1360, 768)))
cv2.waitKey(0)
Write to file
def write_file(get_data,filename):
filename=os.path.join('docs',filename)
f = open(filename, "w")
f.write(get_data)
f.close()
Integrating all the components
handler(os.path.join(dir,path))
plot(get_data)
write_file(get_data,os.path.join(dir, 'jsondic.py'))