[init] initial commit
This commit is contained in:
commit
b775423b1d
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
.venv
|
17
Dockerfile
Normal file
17
Dockerfile
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
FROM alpine:latest
|
||||||
|
|
||||||
|
MAINTAINER Klesh Wong <klesh@kleshwong.com>
|
||||||
|
LABEL description="Simple verification-code solver"
|
||||||
|
|
||||||
|
WORKDIR /data
|
||||||
|
|
||||||
|
RUN sed -i 's|dl-cdn.alpinelinux.org|mirrors.aliyun.com|g' /etc/apk/repositories
|
||||||
|
RUN apk update --no-cache \
|
||||||
|
&& apk add --update --no-cache tesseract-ocr py3-pip py3-numpy py3-pillow \
|
||||||
|
&& rm -rf /var/cache/apk/*
|
||||||
|
|
||||||
|
ADD requirements.txt /data/requirements.txt
|
||||||
|
RUN pip3 install -r /data/requirements.txt --index-url=https://mirrors.aliyun.com/pypi/simple
|
||||||
|
|
||||||
|
ADD main.py /data/main.py
|
||||||
|
CMD python3 /data/main.py
|
180
main.py
Normal file
180
main.py
Normal file
|
@ -0,0 +1,180 @@
|
||||||
|
import pytesseract
|
||||||
|
import base64
|
||||||
|
import numpy as np
|
||||||
|
from flask import Flask, request
|
||||||
|
from flasgger import Swagger
|
||||||
|
from io import BytesIO
|
||||||
|
from PIL import Image
|
||||||
|
from tempfile import NamedTemporaryFile
|
||||||
|
from typing import Tuple
|
||||||
|
from os import environ
|
||||||
|
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
app.config['SWAGGER'] = {
|
||||||
|
'title': 'Simple verification-code solver',
|
||||||
|
}
|
||||||
|
|
||||||
|
Swagger(app)
|
||||||
|
|
||||||
|
|
||||||
|
def remove_noise(img_array: np.ndarray, dominant_color: Tuple[int, int, int], threshold=10):
|
||||||
|
img_array = img_array.copy()
|
||||||
|
h, w, c = img_array.shape
|
||||||
|
visited = set()
|
||||||
|
|
||||||
|
grid = [
|
||||||
|
(-1, -1),
|
||||||
|
(-1, 0),
|
||||||
|
(-1, +1),
|
||||||
|
(0, -1),
|
||||||
|
(0, +1),
|
||||||
|
(+1, -1),
|
||||||
|
(+1, 0),
|
||||||
|
(+1, +1)
|
||||||
|
]
|
||||||
|
|
||||||
|
def find_connected(y: int, x: int):
|
||||||
|
if y < 0 or y >= h or x < 0 or x >= w:
|
||||||
|
return
|
||||||
|
if (y, x) in visited:
|
||||||
|
return
|
||||||
|
visited.add((y, x))
|
||||||
|
if np.array_equal(img_array[y, x], dominant_color):
|
||||||
|
return
|
||||||
|
|
||||||
|
connected = [(y, x)]
|
||||||
|
for dy, dx in grid:
|
||||||
|
next_connected = find_connected(dy+y, dx+x)
|
||||||
|
if next_connected:
|
||||||
|
connected += next_connected
|
||||||
|
return connected
|
||||||
|
|
||||||
|
for y in range(h):
|
||||||
|
for x in range(w):
|
||||||
|
pixel = img_array[y, x]
|
||||||
|
if pixel[0] != pixel[1] or pixel[1] != pixel[2]:
|
||||||
|
img_array[y, x] = dominant_color
|
||||||
|
|
||||||
|
for y in range(h):
|
||||||
|
for x in range(w):
|
||||||
|
connected = find_connected(y, x)
|
||||||
|
if not connected or len(connected) > threshold:
|
||||||
|
continue
|
||||||
|
for pos in connected:
|
||||||
|
img_array[pos[0], pos[1]] = dominant_color
|
||||||
|
|
||||||
|
return img_array
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize(pil_img: Image) -> np.ndarray:
|
||||||
|
data = np.asarray(pil_img)
|
||||||
|
clip = data[1:-1, 1:-1]
|
||||||
|
count_colors = sorted(pil_img.getcolors(), key=lambda cc: cc[0])
|
||||||
|
dominant_color = np.asarray(count_colors[-1][1])
|
||||||
|
return remove_noise(clip, dominant_color)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/verification-code', methods=['POST'])
|
||||||
|
def solve_verification_code():
|
||||||
|
"""
|
||||||
|
Solve verification code
|
||||||
|
---
|
||||||
|
tags:
|
||||||
|
- Verification Code
|
||||||
|
parameters:
|
||||||
|
- in: formData
|
||||||
|
name: image
|
||||||
|
type: file
|
||||||
|
required: true
|
||||||
|
description: Image file
|
||||||
|
- in: formData
|
||||||
|
name: timeout
|
||||||
|
type: int
|
||||||
|
required: false
|
||||||
|
description: Timeout
|
||||||
|
- in: header
|
||||||
|
name: X-Key
|
||||||
|
type: string
|
||||||
|
required: true
|
||||||
|
responses:
|
||||||
|
500:
|
||||||
|
description: Error message
|
||||||
|
200:
|
||||||
|
description: text
|
||||||
|
"""
|
||||||
|
|
||||||
|
x_key = request.headers.get('X-Key')
|
||||||
|
if x_key != environ.get('X_KEY'):
|
||||||
|
return "Unauthorized", 401
|
||||||
|
|
||||||
|
if 'image' not in request.files:
|
||||||
|
return "No file uploaded", 400
|
||||||
|
|
||||||
|
image_file = request.files['image']
|
||||||
|
|
||||||
|
if image_file.filename == '':
|
||||||
|
return "No file name", 400
|
||||||
|
|
||||||
|
timeout = 300
|
||||||
|
if "timeout" in request.form and int(request.form['timeout']) > 0:
|
||||||
|
timeout = int(request.form['timeout'])
|
||||||
|
|
||||||
|
img = Image.open(BytesIO(image_file.read()))
|
||||||
|
ary = sanitize(img)
|
||||||
|
txt = pytesseract.image_to_string(ary, timeout=timeout)
|
||||||
|
|
||||||
|
return txt
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/verification-code/base64', methods=['POST'])
|
||||||
|
def sove_verification_code_base64():
|
||||||
|
"""
|
||||||
|
Solve verification code from base64-encoded image
|
||||||
|
---
|
||||||
|
tags:
|
||||||
|
- Verification Code
|
||||||
|
parameters:
|
||||||
|
- in: formData
|
||||||
|
name: image
|
||||||
|
type: string
|
||||||
|
required: true
|
||||||
|
description: Base64-encoded image
|
||||||
|
- in: formData
|
||||||
|
name: timeout
|
||||||
|
type: int
|
||||||
|
required: false
|
||||||
|
description: Timeout
|
||||||
|
- in: header
|
||||||
|
name: X-Key
|
||||||
|
type: string
|
||||||
|
required: true
|
||||||
|
responses:
|
||||||
|
500:
|
||||||
|
description: Error message
|
||||||
|
200:
|
||||||
|
description: text
|
||||||
|
"""
|
||||||
|
|
||||||
|
x_key = request.headers.get('X-Key')
|
||||||
|
if x_key != environ.get('X_KEY'):
|
||||||
|
return "Unauthorized", 401
|
||||||
|
|
||||||
|
image = request.form.get('image')
|
||||||
|
if not image:
|
||||||
|
return 'No image data', 400
|
||||||
|
|
||||||
|
timeout = 300
|
||||||
|
if "timeout" in request.form and int(request.form['timeout']) > 0:
|
||||||
|
timeout = int(request.form['timeout'])
|
||||||
|
|
||||||
|
img = Image.open(BytesIO(base64.b64decode(image)))
|
||||||
|
ary = sanitize(img)
|
||||||
|
txt = pytesseract.image_to_string(ary, timeout=timeout)
|
||||||
|
|
||||||
|
return txt
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app.run(debug=environ.get("FLASK_ENV") == 'development', host='0.0.0.0', port=8000)
|
6
requirements.txt
Normal file
6
requirements.txt
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
pillow
|
||||||
|
pytesseract
|
||||||
|
flask
|
||||||
|
flasgger
|
||||||
|
numpy
|
||||||
|
waitress
|
Loading…
Reference in New Issue
Block a user