verification-code-solver/main.py
2021-07-12 18:30:35 +08:00

181 lines
4.3 KiB
Python

import pytesseract
import base64
import numpy as np
from flask import Flask, request
from flasgger import Swagger
from io import BytesIO
from PIL import Image
from tempfile import NamedTemporaryFile
from typing import Tuple
from os import environ
app = Flask(__name__)
app.config['SWAGGER'] = {
'title': 'Simple verification-code solver',
}
Swagger(app)
def remove_noise(img_array: np.ndarray, dominant_color: Tuple[int, int, int], threshold=10):
img_array = img_array.copy()
h, w, c = img_array.shape
visited = set()
grid = [
(-1, -1),
(-1, 0),
(-1, +1),
(0, -1),
(0, +1),
(+1, -1),
(+1, 0),
(+1, +1)
]
def find_connected(y: int, x: int):
if y < 0 or y >= h or x < 0 or x >= w:
return
if (y, x) in visited:
return
visited.add((y, x))
if np.array_equal(img_array[y, x], dominant_color):
return
connected = [(y, x)]
for dy, dx in grid:
next_connected = find_connected(dy+y, dx+x)
if next_connected:
connected += next_connected
return connected
for y in range(h):
for x in range(w):
pixel = img_array[y, x]
if pixel[0] != pixel[1] or pixel[1] != pixel[2]:
img_array[y, x] = dominant_color
for y in range(h):
for x in range(w):
connected = find_connected(y, x)
if not connected or len(connected) > threshold:
continue
for pos in connected:
img_array[pos[0], pos[1]] = dominant_color
return img_array
def sanitize(pil_img: Image) -> np.ndarray:
data = np.asarray(pil_img)
clip = data[1:-1, 1:-1]
count_colors = sorted(pil_img.getcolors(), key=lambda cc: cc[0])
dominant_color = np.asarray(count_colors[-1][1])
return remove_noise(clip, dominant_color)
@app.route('/verification-code', methods=['POST'])
def solve_verification_code():
"""
Solve verification code
---
tags:
- Verification Code
parameters:
- in: formData
name: image
type: file
required: true
description: Image file
- in: formData
name: timeout
type: int
required: false
description: Timeout
- in: header
name: X-Key
type: string
required: true
responses:
500:
description: Error message
200:
description: text
"""
x_key = request.headers.get('X-Key')
if x_key != environ.get('X_KEY'):
return "Unauthorized", 401
if 'image' not in request.files:
return "No file uploaded", 400
image_file = request.files['image']
if image_file.filename == '':
return "No file name", 400
timeout = 300
if "timeout" in request.form and int(request.form['timeout']) > 0:
timeout = int(request.form['timeout'])
img = Image.open(BytesIO(image_file.read()))
ary = sanitize(img)
txt = pytesseract.image_to_string(ary, timeout=timeout)
return txt
@app.route('/verification-code/base64', methods=['POST'])
def sove_verification_code_base64():
"""
Solve verification code from base64-encoded image
---
tags:
- Verification Code
parameters:
- in: formData
name: image
type: string
required: true
description: Base64-encoded image
- in: formData
name: timeout
type: int
required: false
description: Timeout
- in: header
name: X-Key
type: string
required: true
responses:
500:
description: Error message
200:
description: text
"""
x_key = request.headers.get('X-Key')
if x_key != environ.get('X_KEY'):
return "Unauthorized", 401
image = request.form.get('image')
if not image:
return 'No image data', 400
timeout = 300
if "timeout" in request.form and int(request.form['timeout']) > 0:
timeout = int(request.form['timeout'])
img = Image.open(BytesIO(base64.b64decode(image)))
ary = sanitize(img)
txt = pytesseract.image_to_string(ary, timeout=timeout)
return txt
if __name__ == "__main__":
app.run(debug=environ.get("FLASK_ENV") == 'development', host='0.0.0.0', port=8000)