import pytesseract import base64 import numpy as np from flask import Flask, request from flasgger import Swagger from io import BytesIO from PIL import Image from tempfile import NamedTemporaryFile from typing import Tuple from os import environ app = Flask(__name__) app.config['SWAGGER'] = { 'title': 'Simple verification-code solver', } Swagger(app) def remove_noise(img_array: np.ndarray, dominant_color: Tuple[int, int, int], threshold=10): img_array = img_array.copy() h, w, c = img_array.shape visited = set() grid = [ (-1, -1), (-1, 0), (-1, +1), (0, -1), (0, +1), (+1, -1), (+1, 0), (+1, +1) ] def find_connected(y: int, x: int): if y < 0 or y >= h or x < 0 or x >= w: return if (y, x) in visited: return visited.add((y, x)) if np.array_equal(img_array[y, x], dominant_color): return connected = [(y, x)] for dy, dx in grid: next_connected = find_connected(dy+y, dx+x) if next_connected: connected += next_connected return connected for y in range(h): for x in range(w): pixel = img_array[y, x] if pixel[0] != pixel[1] or pixel[1] != pixel[2]: img_array[y, x] = dominant_color for y in range(h): for x in range(w): connected = find_connected(y, x) if not connected or len(connected) > threshold: continue for pos in connected: img_array[pos[0], pos[1]] = dominant_color return img_array def sanitize(pil_img: Image) -> np.ndarray: data = np.asarray(pil_img) clip = data[1:-1, 1:-1] count_colors = sorted(pil_img.getcolors(), key=lambda cc: cc[0]) dominant_color = np.asarray(count_colors[-1][1]) return remove_noise(clip, dominant_color) @app.route('/verification-code', methods=['POST']) def solve_verification_code(): """ Solve verification code --- tags: - Verification Code parameters: - in: formData name: image type: file required: true description: Image file - in: formData name: timeout type: int required: false description: Timeout - in: header name: X-Key type: string required: true responses: 500: description: Error message 200: description: text """ x_key = request.headers.get('X-Key') if x_key != environ.get('X_KEY'): return "Unauthorized", 401 if 'image' not in request.files: return "No file uploaded", 400 image_file = request.files['image'] if image_file.filename == '': return "No file name", 400 timeout = 300 if "timeout" in request.form and int(request.form['timeout']) > 0: timeout = int(request.form['timeout']) img = Image.open(BytesIO(image_file.read())) ary = sanitize(img) txt = pytesseract.image_to_string(ary, timeout=timeout) return txt @app.route('/verification-code/base64', methods=['POST']) def sove_verification_code_base64(): """ Solve verification code from base64-encoded image --- tags: - Verification Code parameters: - in: formData name: image type: string required: true description: Base64-encoded image - in: formData name: timeout type: int required: false description: Timeout - in: header name: X-Key type: string required: true responses: 500: description: Error message 200: description: text """ x_key = request.headers.get('X-Key') if x_key != environ.get('X_KEY'): return "Unauthorized", 401 image = request.form.get('image') if not image: return 'No image data', 400 timeout = 300 if "timeout" in request.form and int(request.form['timeout']) > 0: timeout = int(request.form['timeout']) img = Image.open(BytesIO(base64.b64decode(image))) ary = sanitize(img) txt = pytesseract.image_to_string(ary, timeout=timeout) return txt if __name__ == "__main__": app.run(debug=environ.get("FLASK_ENV") == 'development', host='0.0.0.0', port=8000)