
Figure 1. OCR and annotation of mock form to extract specific data
import json
import docExtraction
import os
from flask import Flask, request
from werkzeug.utils import secure_filename
UPLOAD_FOLDER = 'uploadFolder/'
ALLOWED_EXTENSIONS = {'txt', 'pdf', 'png', 'jpg', 'jpeg', 'gif'}
app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
@app.route('/upload/', methods=['GET', 'POST'])
def upload_file():
if request.method == 'POST':
# check if the post request has the file part
print('File type is ' + str(request.files), flush=True)
if 'file' not in request.files:
print('No file part', flush=True)
# message for appgyver alert
return 'No file part'
file = request.files['file']
# If the user does not select a file, the browser submits an
# empty file without a filename.
if file.filename == '':
print('No selected file', flush=True)
# message for appgyver alert
return 'No selected file'
if file and allowed_file(file.filename):
# create a secure filename
filename = secure_filename(file.filename)
# save file to /static/uploads
filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
file.save(filepath)
print('PDF saved to directory: ' + str(UPLOAD_FOLDER), flush=True)
# call OCR function in docExtraction.py
rois = docExtraction.process(filepath)
print('OCR complete and saved to directory: ' + str(UPLOAD_FOLDER), flush=True)
# json response
return json.dumps(rois, sort_keys=True, indent=4)
# message for appgyver alert
print('The file name was not allowed.', flush=True)
return ('The file name was not allowed.')
return '''
<!doctype html>
<title>Upload new File</title>
<h1>Upload File (Test View) </h1>
<form method=post enctype=multipart/form-data>
<input type=file name=file>
<input type=submit value=Upload>
</form>
<p>This is a test page to test a file upload without using frontend</p>
'''
if __name__ == '__main__':
app.run('0.0.0.0','5000')
import pdf2image
import cv2
import numpy
import datetime
import pytesseract
def process(filepath):
# setup regions of interest (ROI, x , y, w, h, text extract (blank until getOCR)
rois = [['CompanyName', 1450, 2380, 2250, 88, ''],
['ACNARBN', 1450, 2475, 2250, 88, ''],
['Address1', 1450, 2570, 2250, 88, ''],
['TownCity', 1450, 2750, 1250, 88, ''],
['State', 1450, 2848, 650, 84, ''],
['Postcode', 2815, 2848, 600, 84, ''],
['Country', 1450, 2942, 2250, 85, ''],
['Phone', 1450, 3046, 650, 85, ''],
['Email', 1450, 3155, 2250, 88, ''],
['BlockList', 1650, 1945, 2050, 88, ''],
['Date', 1650, 5120, 500, 90, '']]
# declare var to hold images
images = []
# add every page in pdf as an image
images.extend(list(map(lambda image: cv2.cvtColor(numpy.asarray(image), code=cv2.COLOR_RGB2BGR),
pdf2image.convert_from_path(filepath, dpi=500))))
# if more than 1 page in pdf, then add loop e.g. for i in range(len(image)):
# since my example is a single page I'll only look at page 1, i.e. images[0]
images[0] = draw_border(images[0])
# extract text in regions of interest and add to our ROIS.
images[0], rois = getOCR(images[0], rois)
values = []
for r in range(len(rois)):
values.append([rois[r][0], rois[r][5]])
return values
def getOCR(image, rois):
for i in range(len(rois)):
# set local variables for region of interest rectangle
x, y, w, h = rois[i][1], rois[i][2], rois[i][3], rois[i][4]
# create new local image with just region of interest
image_roi = image[y:y+h, x:x+w]
# convert colour region of interest to grayscale
gray = cv2.cvtColor(image_roi, cv2.COLOR_BGR2GRAY)
# get the text from region of interest
rois[i][5] = pytesseract.image_to_string(gray)
# draw regions of interest on original image
cv2.rectangle(image, (x, y), (x + w, y + h), (241, 196, 15), 2)
cv2.imwrite("uploadFolder/Output.png",image)
return image, rois
def draw_border(image):
hImg, wImg, _ = image.shape
cv2.line(image, (0, 100), (int(wImg), 100), (34, 126, 230), 5)
cv2.line(image, (int(wImg) - 100, 0), (int(wImg) - 100, int(hImg)), (34, 126, 230), 5)
cv2.line(image, (int(wImg), int(hImg) - 100), (0, int(hImg) - 100), (34, 126, 230), 5)
cv2.line(image, (100, int(hImg)), (100, 0), (34, 126, 230), 5)
# datetime object containing current date and time
now = datetime.datetime.now()
# dd/mm/YY H:M:S
dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
cv2.putText(image, "Processed - " + dt_string, (140, 80), cv2.FONT_HERSHEY_PLAIN, 4, (34, 126, 230), 4)
return image
Figure 2. Extracted OCR information from uploaded file returned in JSON format.
FROM ubuntu:18.04
ENV DEBIAN_FRONTEND=noninteractive
WORKDIR /program
RUN apt-get update \
&& apt-get -y install tesseract-ocr \
&& apt-get install -y python3 python3-distutils python3-pip \
&& cd /usr/local/bin \
&& ln -s /usr/bin/python3 python \
&& pip3 --no-cache-dir install --upgrade pip \
&& rm -rf /var/lib/apt/lists/*
RUN apt update \
&& apt-get install ffmpeg libsm6 libxext6 poppler-utils -y
RUN pip3 install pytesseract
RUN pip3 install opencv-python
RUN pip3 install pillow
COPY . .
RUN pip3 install -r requirements.txt
EXPOSE 5000
CMD ["python3", "./app.py"]
Flask~=2.0.3
Werkzeug~=2.0.3
pdf2image~=1.16.0
opencv-python~=4.6.0.66
numpy~=1.19.5
pytesseract~=0.3.8
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: documentprocessing
spec:
selector:
matchLabels:
app: documentprocessing
replicas: 1
template:
metadata:
labels:
app: documentprocessing
spec:
containers:
- env:
- name: PORT
value: "5000"
image: /documentprocessing # replace with your Docker Hub account name
name: documentprocessing
ports:
- containerPort: 5000
resources:
limits:
ephemeral-storage: 2048M
memory: 2048M
requests:
cpu: 100m
ephemeral-storage: 2048M
memory: 2048M
---
apiVersion: v1
kind: Service
metadata:
name: documentprocessing-service
labels:
app: documentprocessing
spec:
ports:
- name: http
port: 5000
selector:
app: documentprocessing
---
apiVersion: gateway.kyma-project.io/v1alpha1
kind: APIRule
metadata:
name: documentprocessing-api
labels:
app: documentprocessing
spec:
gateway: kyma-gateway.kyma-system.svc.cluster.local
rules:
- accessStrategies:
- handler: allow
methods:
- GET
- POST
path: /.*
service:
host: documentprocessing-subd-node..kyma.ondemand.com # replace with the values of your account
name: documentprocessing-service
port: 5000
Figure 3. SAP Build Apps flow for upload and page variable structure
// Goal is take the output of the 'Pick Files' flow function and submit to Flask route using multipart/form-data encoding and populate Page Variable with output as parsed JSON response.
// Declare 2 inputs.
// - First is the endpoint URL where we want to upload our file. We've hard coded this value.
// - Second is the file with it's 6 object properties from the output of 'Pick Files'
let { url, file } = inputs
// Get the path of the file selected in 'Pick Files'. Note - Only allowed single file upload so hard-coded to first i.e. file[0]
let path = await fetch(file[0].path)
console.log('Path:' + path)
// Transform path into blob
let blob = await path.blob()
// Declare the form we'll submit with the payload
const formData = new FormData()
// Append upload details into the formData
formData.append('file', blob, file[0].name)
try {
// POST the formData and parse the text/html (utf-8) response into JSON format
const response = await fetch(url, { method: 'POST', body: formData })
const parsed = await response.json();
return [0, {
CompanyName: parsed[0][1],
ACNARBN: parsed[1][1],
Address1: parsed[2][1],
TownCity: parsed[3][1],
State: parsed[4][1],
Postcode: parsed[5][1],
Country: parsed[6][1],
Phone: parsed[7][1],
Email: parsed[8][1],
BlockList: parsed[9][1],
Date: parsed[10][1]
} ]
} catch {
return [0, { result: "File Error" } ]
}
Figure 4. Binding configuration between JS Upload File and Page Variable
Figure 5. End-to-end test of file upload with OCR and response in frontend
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
User | Count |
---|---|
16 | |
14 | |
13 | |
11 | |
11 | |
11 | |
10 | |
8 | |
7 | |
6 |