[init] initial commit
This commit is contained in:
commit
b775423b1d
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
.venv
|
17
Dockerfile
Normal file
17
Dockerfile
Normal file
|
@ -0,0 +1,17 @@
|
|||
FROM alpine:latest
|
||||
|
||||
MAINTAINER Klesh Wong <klesh@kleshwong.com>
|
||||
LABEL description="Simple verification-code solver"
|
||||
|
||||
WORKDIR /data
|
||||
|
||||
RUN sed -i 's|dl-cdn.alpinelinux.org|mirrors.aliyun.com|g' /etc/apk/repositories
|
||||
RUN apk update --no-cache \
|
||||
&& apk add --update --no-cache tesseract-ocr py3-pip py3-numpy py3-pillow \
|
||||
&& rm -rf /var/cache/apk/*
|
||||
|
||||
ADD requirements.txt /data/requirements.txt
|
||||
RUN pip3 install -r /data/requirements.txt --index-url=https://mirrors.aliyun.com/pypi/simple
|
||||
|
||||
ADD main.py /data/main.py
|
||||
CMD python3 /data/main.py
|
180
main.py
Normal file
180
main.py
Normal file
|
@ -0,0 +1,180 @@
|
|||
import pytesseract
|
||||
import base64
|
||||
import numpy as np
|
||||
from flask import Flask, request
|
||||
from flasgger import Swagger
|
||||
from io import BytesIO
|
||||
from PIL import Image
|
||||
from tempfile import NamedTemporaryFile
|
||||
from typing import Tuple
|
||||
from os import environ
|
||||
|
||||
|
||||
app = Flask(__name__)
|
||||
app.config['SWAGGER'] = {
|
||||
'title': 'Simple verification-code solver',
|
||||
}
|
||||
|
||||
Swagger(app)
|
||||
|
||||
|
||||
def remove_noise(img_array: np.ndarray, dominant_color: Tuple[int, int, int], threshold=10):
|
||||
img_array = img_array.copy()
|
||||
h, w, c = img_array.shape
|
||||
visited = set()
|
||||
|
||||
grid = [
|
||||
(-1, -1),
|
||||
(-1, 0),
|
||||
(-1, +1),
|
||||
(0, -1),
|
||||
(0, +1),
|
||||
(+1, -1),
|
||||
(+1, 0),
|
||||
(+1, +1)
|
||||
]
|
||||
|
||||
def find_connected(y: int, x: int):
|
||||
if y < 0 or y >= h or x < 0 or x >= w:
|
||||
return
|
||||
if (y, x) in visited:
|
||||
return
|
||||
visited.add((y, x))
|
||||
if np.array_equal(img_array[y, x], dominant_color):
|
||||
return
|
||||
|
||||
connected = [(y, x)]
|
||||
for dy, dx in grid:
|
||||
next_connected = find_connected(dy+y, dx+x)
|
||||
if next_connected:
|
||||
connected += next_connected
|
||||
return connected
|
||||
|
||||
for y in range(h):
|
||||
for x in range(w):
|
||||
pixel = img_array[y, x]
|
||||
if pixel[0] != pixel[1] or pixel[1] != pixel[2]:
|
||||
img_array[y, x] = dominant_color
|
||||
|
||||
for y in range(h):
|
||||
for x in range(w):
|
||||
connected = find_connected(y, x)
|
||||
if not connected or len(connected) > threshold:
|
||||
continue
|
||||
for pos in connected:
|
||||
img_array[pos[0], pos[1]] = dominant_color
|
||||
|
||||
return img_array
|
||||
|
||||
|
||||
def sanitize(pil_img: Image) -> np.ndarray:
|
||||
data = np.asarray(pil_img)
|
||||
clip = data[1:-1, 1:-1]
|
||||
count_colors = sorted(pil_img.getcolors(), key=lambda cc: cc[0])
|
||||
dominant_color = np.asarray(count_colors[-1][1])
|
||||
return remove_noise(clip, dominant_color)
|
||||
|
||||
|
||||
@app.route('/verification-code', methods=['POST'])
|
||||
def solve_verification_code():
|
||||
"""
|
||||
Solve verification code
|
||||
---
|
||||
tags:
|
||||
- Verification Code
|
||||
parameters:
|
||||
- in: formData
|
||||
name: image
|
||||
type: file
|
||||
required: true
|
||||
description: Image file
|
||||
- in: formData
|
||||
name: timeout
|
||||
type: int
|
||||
required: false
|
||||
description: Timeout
|
||||
- in: header
|
||||
name: X-Key
|
||||
type: string
|
||||
required: true
|
||||
responses:
|
||||
500:
|
||||
description: Error message
|
||||
200:
|
||||
description: text
|
||||
"""
|
||||
|
||||
x_key = request.headers.get('X-Key')
|
||||
if x_key != environ.get('X_KEY'):
|
||||
return "Unauthorized", 401
|
||||
|
||||
if 'image' not in request.files:
|
||||
return "No file uploaded", 400
|
||||
|
||||
image_file = request.files['image']
|
||||
|
||||
if image_file.filename == '':
|
||||
return "No file name", 400
|
||||
|
||||
timeout = 300
|
||||
if "timeout" in request.form and int(request.form['timeout']) > 0:
|
||||
timeout = int(request.form['timeout'])
|
||||
|
||||
img = Image.open(BytesIO(image_file.read()))
|
||||
ary = sanitize(img)
|
||||
txt = pytesseract.image_to_string(ary, timeout=timeout)
|
||||
|
||||
return txt
|
||||
|
||||
|
||||
@app.route('/verification-code/base64', methods=['POST'])
|
||||
def sove_verification_code_base64():
|
||||
"""
|
||||
Solve verification code from base64-encoded image
|
||||
---
|
||||
tags:
|
||||
- Verification Code
|
||||
parameters:
|
||||
- in: formData
|
||||
name: image
|
||||
type: string
|
||||
required: true
|
||||
description: Base64-encoded image
|
||||
- in: formData
|
||||
name: timeout
|
||||
type: int
|
||||
required: false
|
||||
description: Timeout
|
||||
- in: header
|
||||
name: X-Key
|
||||
type: string
|
||||
required: true
|
||||
responses:
|
||||
500:
|
||||
description: Error message
|
||||
200:
|
||||
description: text
|
||||
"""
|
||||
|
||||
x_key = request.headers.get('X-Key')
|
||||
if x_key != environ.get('X_KEY'):
|
||||
return "Unauthorized", 401
|
||||
|
||||
image = request.form.get('image')
|
||||
if not image:
|
||||
return 'No image data', 400
|
||||
|
||||
timeout = 300
|
||||
if "timeout" in request.form and int(request.form['timeout']) > 0:
|
||||
timeout = int(request.form['timeout'])
|
||||
|
||||
img = Image.open(BytesIO(base64.b64decode(image)))
|
||||
ary = sanitize(img)
|
||||
txt = pytesseract.image_to_string(ary, timeout=timeout)
|
||||
|
||||
return txt
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(debug=environ.get("FLASK_ENV") == 'development', host='0.0.0.0', port=8000)
|
6
requirements.txt
Normal file
6
requirements.txt
Normal file
|
@ -0,0 +1,6 @@
|
|||
pillow
|
||||
pytesseract
|
||||
flask
|
||||
flasgger
|
||||
numpy
|
||||
waitress
|
Loading…
Reference in New Issue
Block a user