notebooks/fsm-regex.ipynb

405 lines
11 KiB
Plaintext
Raw Permalink Normal View History

2020-09-07 06:54:31 +00:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Regex - ab*c\n",
"\n",
"Here we implement FSM, that matches regular expression `ab*c`, using Python Coroutines"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
"<!-- Generated by graphviz version 2.42.3 (20191010.1750)\n",
" -->\n",
"<!-- Title: G Pages: 1 -->\n",
"<svg width=\"294pt\" height=\"188pt\"\n",
" viewBox=\"0.00 0.00 293.76 188.01\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 184.01)\">\n",
"<title>G</title>\n",
"<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-184.01 289.76,-184.01 289.76,4 -4,4\"/>\n",
"<!-- q0 -->\n",
"<g id=\"node1\" class=\"node\">\n",
"<title>q0</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"22.01\" cy=\"-22.01\" rx=\"22.02\" ry=\"22.02\"/>\n",
"<text text-anchor=\"middle\" x=\"22.01\" y=\"-18.71\" font-family=\"monospace\" font-size=\"11.00\">start</text>\n",
"</g>\n",
"<!-- q1 -->\n",
"<g id=\"node2\" class=\"node\">\n",
"<title>q1</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"104.14\" cy=\"-22.01\" rx=\"18\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"104.14\" y=\"-18.71\" font-family=\"monospace\" font-size=\"11.00\">q1</text>\n",
"</g>\n",
"<!-- q0&#45;&gt;q1 -->\n",
"<g id=\"edge1\" class=\"edge\">\n",
"<title>q0&#45;&gt;q1</title>\n",
"<path fill=\"none\" stroke=\"grey\" d=\"M44.07,-22.01C53.77,-22.01 65.41,-22.01 75.79,-22.01\"/>\n",
"<polygon fill=\"grey\" stroke=\"grey\" points=\"76,-25.51 86,-22.01 76,-18.51 76,-25.51\"/>\n",
"<text text-anchor=\"middle\" x=\"65.08\" y=\"-24.21\" font-family=\"monospace\" font-size=\"11.00\">a</text>\n",
"</g>\n",
"<!-- q2 -->\n",
"<g id=\"node3\" class=\"node\">\n",
"<title>q2</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"182.26\" cy=\"-79.01\" rx=\"18\" ry=\"18\"/>\n",
"<text text-anchor=\"middle\" x=\"182.26\" y=\"-75.71\" font-family=\"monospace\" font-size=\"11.00\">q2</text>\n",
"</g>\n",
"<!-- q1&#45;&gt;q2 -->\n",
"<g id=\"edge4\" class=\"edge\">\n",
"<title>q1&#45;&gt;q2</title>\n",
"<path fill=\"none\" stroke=\"grey\" d=\"M119.1,-32.47C130.26,-40.82 146.12,-52.7 159.02,-62.36\"/>\n",
"<polygon fill=\"grey\" stroke=\"grey\" points=\"157.28,-65.43 167.39,-68.62 161.48,-59.83 157.28,-65.43\"/>\n",
"<text text-anchor=\"middle\" x=\"143.2\" y=\"-54.21\" font-family=\"monospace\" font-size=\"11.00\">b</text>\n",
"</g>\n",
"<!-- q3 -->\n",
"<g id=\"node4\" class=\"node\">\n",
"<title>q3</title>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"263.76\" cy=\"-22.01\" rx=\"18\" ry=\"18\"/>\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"263.76\" cy=\"-22.01\" rx=\"22\" ry=\"22\"/>\n",
"<text text-anchor=\"middle\" x=\"263.76\" y=\"-18.71\" font-family=\"monospace\" font-size=\"11.00\">q3</text>\n",
"</g>\n",
"<!-- q1&#45;&gt;q3 -->\n",
"<g id=\"edge2\" class=\"edge\">\n",
"<title>q1&#45;&gt;q3</title>\n",
"<path fill=\"none\" stroke=\"grey\" d=\"M122.15,-19.09C133.93,-17.22 150,-14.97 164.26,-14.01 180.22,-12.94 184.29,-12.99 200.26,-14.01 210.63,-14.67 221.91,-15.95 232,-17.28\"/>\n",
"<polygon fill=\"grey\" stroke=\"grey\" points=\"231.6,-20.76 241.99,-18.68 232.57,-13.83 231.6,-20.76\"/>\n",
"<text text-anchor=\"middle\" x=\"182.26\" y=\"-16.21\" font-family=\"monospace\" font-size=\"11.00\">c</text>\n",
"</g>\n",
"<!-- q2&#45;&gt;q2 -->\n",
"<g id=\"edge5\" class=\"edge\">\n",
"<title>q2&#45;&gt;q2</title>\n",
"<path fill=\"none\" stroke=\"grey\" d=\"M178.2,-96.68C173.04,-127.86 174.39,-169.01 182.26,-169.01 189.3,-169.01 191.12,-136.09 187.73,-106.82\"/>\n",
"<polygon fill=\"grey\" stroke=\"grey\" points=\"191.17,-106.1 186.32,-96.68 184.23,-107.07 191.17,-106.1\"/>\n",
"<text text-anchor=\"middle\" x=\"182.26\" y=\"-171.21\" font-family=\"monospace\" font-size=\"11.00\">b</text>\n",
"</g>\n",
"<!-- q2&#45;&gt;q3 -->\n",
"<g id=\"edge3\" class=\"edge\">\n",
"<title>q2&#45;&gt;q3</title>\n",
"<path fill=\"none\" stroke=\"grey\" d=\"M197.47,-68.82C208.46,-60.94 223.97,-49.82 237.12,-40.4\"/>\n",
"<polygon fill=\"grey\" stroke=\"grey\" points=\"239.25,-43.17 245.34,-34.5 235.17,-37.48 239.25,-43.17\"/>\n",
"<text text-anchor=\"middle\" x=\"221.01\" y=\"-54.21\" font-family=\"monospace\" font-size=\"11.00\">c</text>\n",
"</g>\n",
"</g>\n",
"</svg>\n"
],
"text/plain": [
"<graphviz.files.Source at 0x10867ef90>"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from graphviz import Source\n",
"with open(\"./regex-1.dot\", \"r\") as f:\n",
" gr = Source(f.read())\n",
"gr"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def prime(fn):\n",
" def wrapper(*args, **kwargs):\n",
" v = fn(*args, **kwargs)\n",
" v.send(None)\n",
" return v\n",
" return wrapper"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"class RegexFSM:\n",
" def __init__(self):\n",
" self.start = self._create_start()\n",
" self.q1 = self._create_q1()\n",
" self.q2 = self._create_q2()\n",
" self.q3 = self._create_q3()\n",
" \n",
" self.current_state = self.start\n",
" self.stopped = False\n",
" \n",
" def send(self, char):\n",
" try:\n",
" self.current_state.send(char)\n",
" except StopIteration:\n",
" self.stopped = True\n",
" \n",
" def does_match(self):\n",
" if self.stopped:\n",
" return False\n",
" return self.current_state == self.q3\n",
"\n",
" @prime\n",
" def _create_start(self):\n",
" while True:\n",
" char = yield\n",
" if char == 'a':\n",
" self.current_state = self.q1\n",
" else:\n",
" break\n",
" \n",
" @prime\n",
" def _create_q1(self):\n",
" while True:\n",
" char = yield\n",
" if char == 'b':\n",
" self.current_state = self.q2\n",
" elif char == 'c':\n",
" self.current_state = self.q3\n",
" else:\n",
" break\n",
"\n",
" @prime\n",
" def _create_q2(self):\n",
" while True:\n",
" char = yield\n",
" if char == 'b':\n",
" self.current_state = self.q2\n",
" elif char == 'c':\n",
" self.current_state = self.q3\n",
" else:\n",
" break\n",
"\n",
" @prime\n",
" def _create_q3(self):\n",
" while True:\n",
" char = yield\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def grep_regex(text):\n",
" evaluator = RegexFSM()\n",
" for ch in text:\n",
" evaluator.send(ch)\n",
" return evaluator.does_match()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grep_regex(\"a\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grep_regex(\"ab\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grep_regex(\"ac\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grep_regex(\"abc\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grep_regex(\"aba\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grep_regex(\"abbbbbbbc\")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grep_regex(\"abcc\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grep_regex(\"abcd\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grep_regex(\"bcbc\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}