[init] initial commit

This commit is contained in:
Klesh Wong 2021-07-12 18:30:35 +08:00
commit b775423b1d
4 changed files with 204 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
.venv

17
Dockerfile Normal file
View File

@ -0,0 +1,17 @@
FROM alpine:latest
MAINTAINER Klesh Wong <klesh@kleshwong.com>
LABEL description="Simple verification-code solver"
WORKDIR /data
RUN sed -i 's|dl-cdn.alpinelinux.org|mirrors.aliyun.com|g' /etc/apk/repositories
RUN apk update --no-cache \
&& apk add --update --no-cache tesseract-ocr py3-pip py3-numpy py3-pillow \
&& rm -rf /var/cache/apk/*
ADD requirements.txt /data/requirements.txt
RUN pip3 install -r /data/requirements.txt --index-url=https://mirrors.aliyun.com/pypi/simple
ADD main.py /data/main.py
CMD python3 /data/main.py

180
main.py Normal file
View File

@ -0,0 +1,180 @@
import pytesseract
import base64
import numpy as np
from flask import Flask, request
from flasgger import Swagger
from io import BytesIO
from PIL import Image
from tempfile import NamedTemporaryFile
from typing import Tuple
from os import environ
app = Flask(__name__)
app.config['SWAGGER'] = {
'title': 'Simple verification-code solver',
}
Swagger(app)
def remove_noise(img_array: np.ndarray, dominant_color: Tuple[int, int, int], threshold=10):
img_array = img_array.copy()
h, w, c = img_array.shape
visited = set()
grid = [
(-1, -1),
(-1, 0),
(-1, +1),
(0, -1),
(0, +1),
(+1, -1),
(+1, 0),
(+1, +1)
]
def find_connected(y: int, x: int):
if y < 0 or y >= h or x < 0 or x >= w:
return
if (y, x) in visited:
return
visited.add((y, x))
if np.array_equal(img_array[y, x], dominant_color):
return
connected = [(y, x)]
for dy, dx in grid:
next_connected = find_connected(dy+y, dx+x)
if next_connected:
connected += next_connected
return connected
for y in range(h):
for x in range(w):
pixel = img_array[y, x]
if pixel[0] != pixel[1] or pixel[1] != pixel[2]:
img_array[y, x] = dominant_color
for y in range(h):
for x in range(w):
connected = find_connected(y, x)
if not connected or len(connected) > threshold:
continue
for pos in connected:
img_array[pos[0], pos[1]] = dominant_color
return img_array
def sanitize(pil_img: Image) -> np.ndarray:
data = np.asarray(pil_img)
clip = data[1:-1, 1:-1]
count_colors = sorted(pil_img.getcolors(), key=lambda cc: cc[0])
dominant_color = np.asarray(count_colors[-1][1])
return remove_noise(clip, dominant_color)
@app.route('/verification-code', methods=['POST'])
def solve_verification_code():
"""
Solve verification code
---
tags:
- Verification Code
parameters:
- in: formData
name: image
type: file
required: true
description: Image file
- in: formData
name: timeout
type: int
required: false
description: Timeout
- in: header
name: X-Key
type: string
required: true
responses:
500:
description: Error message
200:
description: text
"""
x_key = request.headers.get('X-Key')
if x_key != environ.get('X_KEY'):
return "Unauthorized", 401
if 'image' not in request.files:
return "No file uploaded", 400
image_file = request.files['image']
if image_file.filename == '':
return "No file name", 400
timeout = 300
if "timeout" in request.form and int(request.form['timeout']) > 0:
timeout = int(request.form['timeout'])
img = Image.open(BytesIO(image_file.read()))
ary = sanitize(img)
txt = pytesseract.image_to_string(ary, timeout=timeout)
return txt
@app.route('/verification-code/base64', methods=['POST'])
def sove_verification_code_base64():
"""
Solve verification code from base64-encoded image
---
tags:
- Verification Code
parameters:
- in: formData
name: image
type: string
required: true
description: Base64-encoded image
- in: formData
name: timeout
type: int
required: false
description: Timeout
- in: header
name: X-Key
type: string
required: true
responses:
500:
description: Error message
200:
description: text
"""
x_key = request.headers.get('X-Key')
if x_key != environ.get('X_KEY'):
return "Unauthorized", 401
image = request.form.get('image')
if not image:
return 'No image data', 400
timeout = 300
if "timeout" in request.form and int(request.form['timeout']) > 0:
timeout = int(request.form['timeout'])
img = Image.open(BytesIO(base64.b64decode(image)))
ary = sanitize(img)
txt = pytesseract.image_to_string(ary, timeout=timeout)
return txt
if __name__ == "__main__":
app.run(debug=environ.get("FLASK_ENV") == 'development', host='0.0.0.0', port=8000)

6
requirements.txt Normal file
View File

@ -0,0 +1,6 @@
pillow
pytesseract
flask
flasgger
numpy
waitress