dotfiles/bin/utf8_nf

282 lines
8.6 KiB
Plaintext
Raw Normal View History

2020-11-29 18:04:21 +00:00
#!/bin/env python3
import gzip
import argparse
import re
import sys
import io
import os
from abc import ABC, abstractclassmethod, abstractmethod, abstractstaticmethod
2020-11-29 18:04:21 +00:00
from typing import List, TextIO
def extract_nerdfont_ranges(font_patcher: TextIO):
range_re = re.compile(r"'((Src|Sym)(Start|End))':\s*([xXA-Fa-f0-9]+|None)")
name_re = re.compile(r"'Name':\s+\"(.*?)\"")
for line in font_patcher:
results = range_re.findall(line)
if len(results) != 4:
continue
props = {r[0]: int(r[3], base=16) if r[3] != "None" else None for r in results}
r = (
props["SrcStart"] or props["SymStart"],
props["SrcEnd"] or props["SymEnd"],
)
if r != (0, 0):
m = name_re.search(line)
if not m:
raise Exception("unable to find name: %s" % line)
yield r[0], r[1], m[1]
class Scope(ABC):
comment_override = None
2020-11-29 18:04:21 +00:00
def __init__(self, ranges: List[tuple], output: TextIO):
self.ranges = ranges
self.length = len(ranges)
self.output = output
self.cursor = 0
self.escape_char = None
self.re = re.compile(self.pattern())
@property
def empty(self) -> bool:
return self.cursor >= self.length
@property
def current(self) -> tuple:
return self.ranges[self.cursor]
def pop(self) -> bool:
if self.empty:
return False
ss, se, comment = self.current
self.write_line(ss, se, self.comment_override or comment)
self.cursor += 1
def read_line(self, line: str) -> bool:
m = self.re.match(line)
if not m:
raise Exception("unkown line: " + line)
ts = int(m[1], base=16)
te = int(m[3], base=16) if m[3] else ts
while not self.empty and self.current[1] < ts:
self.pop()
if self.empty:
return True
ss, se, _ = self.current
# output no intersection
if te < ss:
return True
# dont output subset
if ts >= ss and te <= se:
return False
if ss <= te: # clip left intersection
self.write_line(ts, ss - 1, m[5])
if se <= te:
self.pop()
if ts <= se:
self.write_line(se + 1, te, m[5])
if se < ts:
return True
return False
def write_line(self, start: int = None, end: int = None, comment: str = None) -> None:
2020-11-29 18:04:21 +00:00
if end < start:
return
self.output.write(self.format(start, end, comment) + '\n')
@abstractmethod
def pattern(self) -> str:
pass
@abstractmethod
def format(self, start: int, end: int, arg: any) -> str:
pass
class CharmapScope(Scope):
def pattern(self) -> str:
return r'<U([A-Z0-9]+)>(..<U([A-Z0-9]+)>)?\s+(\S+)\s+(.*?)$'
def format(self, start: int, end: int, comment: str) -> str:
return "%s%s %s %s" % (
"<U%04X>" % start,
"..<U%04X>" % end if end > start else "",
"".join(map(lambda x: "%sx%02x" % (self.escape_char, x), chr(start).encode('utf-8'))),
comment,
)
class WidthScope(Scope):
comment_override = '2'
def pattern(self) -> str:
return r'<U([A-Z0-9]+)>(...<U([A-Z0-9]+)>)?(\s+)(\d+)$'
def format(self, start: int, end: int, comment: str) -> str:
return "%s%s\t%s" % (
"<U%04X>" % start,
"...<U%04X>" % end if end > start else "",
comment
)
def inject_nerdfont_ranges(ranges: List[tuple], textin: TextIO, textout: TextIO):
comment_char = None
keyword_re = re.compile(r'<(\w+)>\s+(\S+)\s*')
charmap_scope = CharmapScope(ranges, textout)
width_scope = WidthScope(ranges, textout)
scope = None
2020-11-29 18:04:21 +00:00
for line in textin:
line = line.strip()
if line.startswith("CHARMAP"):
scope = charmap_scope
2020-11-29 18:04:21 +00:00
elif line.startswith("WIDTH"):
scope = width_scope
2020-11-29 18:04:21 +00:00
elif line.startswith("END "):
while not scope.empty:
scope.pop()
2020-11-29 18:04:21 +00:00
scope = None
elif comment_char and line.startswith(comment_char):
pass
elif scope and not scope.empty:
if scope.read_line(line) is False:
2020-11-29 18:04:21 +00:00
continue
elif line:
m = keyword_re.match(line)
if m:
if m[1] == "comment_char":
comment_char = m[2]
elif m[1] == 'escape_char':
charmap_scope.escape_char = m[2]
2020-11-29 18:04:21 +00:00
textout.write(line + "\n")
def test():
TARGET = (
"<comment_char> %\n"
"<escape_char> /\n\n"
"CHARMAP\n"
"<U0000> /x00 no\n"
"<U0001>..<U0005> /x01 left\n"
"<U0006> /x06 subset\n"
"<U0007> /x07 subset\n"
"<U0008>..<U0009> /x08 right\n"
"<U4E00>..<U4E99> /xe4/xb8/x80 superset\n"
"<U5E00> /x00 gap\n"
"END CHARMAP\n"
"WIDTH\n"
"<U0000>...<U0004>\t0\n"
"<U6F00>...<U7FFF>\t1\n"
"END WIDTH\n"
)
ranges = [
(4, 8, "test"),
(0x4e03, 0x4e0a, "test2"),
(0x4F00, 0x4F00, "test3"),
(0x6F00, 0x6F00, "tail"),
]
EXPECT = (
"<comment_char> %\n"
"<escape_char> /\n\n"
"CHARMAP\n"
"<U0000> /x00 no\n"
"<U0001>..<U0003> /x01 left\n"
"<U0004>..<U0008> /x04 test\n"
"<U0009> /x09 right\n"
"<U4E00>..<U4E02> /xe4/xb8/x80 superset\n"
"<U4E03>..<U4E0A> /xe4/xb8/x83 test2\n"
"<U4E0B>..<U4E99> /xe4/xb8/x8b superset\n"
"<U4F00> /xe4/xbc/x80 test3\n"
"<U5E00> /x00 gap\n"
"<U6F00> /xe6/xbc/x80 tail\n"
"END CHARMAP\n"
"WIDTH\n"
"<U0000>...<U0003>\t0\n"
"<U0004>...<U0008>\t2\n"
"<U4E03>...<U4E0A>\t2\n"
"<U4F00>\t2\n"
"<U6F00>\t2\n"
"<U6F01>...<U7FFF>\t1\n"
"END WIDTH\n"
)
target = io.StringIO(TARGET)
result = io.StringIO()
inject_nerdfont_ranges(ranges, target, result)
if result.getvalue() != EXPECT:
print("\033[42m origin\033[0m")
print(TARGET)
print()
print("\033[42m inject\033[0m")
for r in ranges:
print("%04X %04X %s" % r)
print()
print("\033[42m expect \033[0m", len(EXPECT))
print(EXPECT)
print()
print("\033[42m result \033[0m", len(result.getvalue()))
a = EXPECT.split('\n')
b = result.getvalue().split('\n')
for i in range(max(len(a), len(b))):
print("\033[%dm%s\033[0m" % (32 if a[i] == b[i] else 31, b[i]))
2020-11-29 18:04:21 +00:00
else:
print("pass")
if __name__ == "__main__":
if os.environ.get('DEBUGGING') == '1':
2020-11-29 18:04:21 +00:00
test()
else:
parser = argparse.ArgumentParser(
description="patch charmap to make NerdFont icons double width"
)
parser.add_argument(
"-i", "--in-charmap",
dest="in_charmap",
default="/usr/share/i18n/charmaps/UTF-8.gz",
help="input charmap file path")
parser.add_argument(
"-o", "--out",
dest="out_charmap",
default="/usr/share/i18n/charmaps/UTF-8NF.gz",
help="output charmap file path")
parser.add_argument(
"-f", "--font-patcher",
dest="font_patcher",
required=True,
help="file path of font_patcher from NerdFont")
parser.add_argument(
"--plainout",
dest="plainout",
action="store_true",
help="write to stdout in plain-text")
args = parser.parse_args()
font_patcher = open(args.font_patcher, 'r',)
double_width_ranges = extract_nerdfont_ranges(font_patcher)
in_charmap = gzip.open(args.in_charmap, 'rt', encoding="ascii")
if args.plainout:
out_charmap = sys.stdout
elif args.out_charmap.endswith('.gz'):
out_charmap = gzip.open(args.out_charmap, 'wt')
else:
out_charmap = open(args.out_charmap, 'wt')
2020-11-30 11:02:30 +00:00
ranges = sorted(
filter(lambda x: x[2] in ['Seti-UI + Custom', 'Devicons'], double_width_ranges),
key=lambda x: x[0]
)
for r in ranges:
print("%04X-%04X %s" % r)
inject_nerdfont_ranges(ranges, in_charmap, out_charmap)
2020-11-29 18:04:21 +00:00
# add `en_US.UTF-8NF UTF-8NF` to `/etc/locale.gen`
# run `locale-gen`
# update `/etc/locale.conf` to `LANG=en_US.UTF-8NF`
# restart