Skip to content

Commit 7222195

Browse files
committed
caracteres faltando
1 parent d3f24b8 commit 7222195

File tree

1 file changed

+82
-11
lines changed

1 file changed

+82
-11
lines changed

ferramentas/list_symbols.py

Lines changed: 82 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,102 @@
11
#!/usr/bin/env python3
22

3+
"""
4+
This CLI script reads stdin or a list of filenames and
5+
prints to stdout an asciidoc document to
6+
visually find missing glyphs after rendering to
7+
HTML or PDF.
8+
9+
Example::
10+
11+
$ ./list_symbols ../online/index.html > symbols.adoc
12+
"""
13+
314
import fileinput
415
from collections import Counter, namedtuple
516
import unicodedata
617
from operator import attrgetter
18+
from time import strftime
719

820
UniChar = namedtuple('UniChar', 'char name categ count')
921

10-
def main():
11-
non_ascii = Counter()
12-
for line in fileinput.input():
22+
def count_non_ascii(lines, counter=None) -> Counter[str]:
23+
if counter is None:
24+
non_ascii = Counter()
25+
else:
26+
non_ascii = counter
27+
for line in lines:
1328
if line.isascii():
1429
continue
1530
for char in line:
1631
if not char.isascii():
1732
non_ascii[char] += 1
33+
return non_ascii
34+
35+
36+
def arrange_sample(sample, row_len, filler=None):
37+
'''
38+
Break `sample` iterable into rows of `row_len` items,
39+
filling the empty places in the last row if needed.
40+
>>> arrange_sample(range(6), 3)
41+
[[0, 1, 2], [3, 4, 5]]
42+
>>> arrange_sample(range(5), 3)
43+
[[0, 1, 2], [3, 4, None]]
44+
'''
45+
source = list(sample)
46+
rows = []
47+
for start in range(0, len(source), row_len):
48+
row = source[start:start+row_len]
49+
if len(row) < row_len:
50+
row.extend([filler]*(row_len-len(row)))
51+
rows.append(row)
52+
return rows
53+
54+
55+
def compact_display(characters, row_width):
56+
sample = arrange_sample(characters, row_width, ' ')
57+
for row in sample:
58+
for cell in row:
59+
print('|'+cell, end='')
60+
print()
61+
1862

19-
chars = []
20-
for char, count in non_ascii.items():
63+
def main():
64+
print('Generated', strftime('%H:%M:%S'))
65+
non_ascii = count_non_ascii(fileinput.input())
66+
num_cols = 32
67+
68+
print('\n## Latin 1\n')
69+
latin1 = [c for c in non_ascii if ord(c) < 256]
70+
print('|====')
71+
compact_display(latin1, num_cols)
72+
print('|====')
73+
74+
print('\n## CP1252\n')
75+
octets = bytes(i for i in range(129, 160))
76+
cp1252 = octets.decode('cp1252', errors='replace')
77+
used = (char for char in cp1252 if char in non_ascii)
78+
print('|====')
79+
compact_display(used, num_cols)
80+
print('|====')
81+
82+
83+
print('\n## Other')
84+
85+
uchars: list[UniChar] = []
86+
87+
for char, count in ((c, n) for c, n in non_ascii.items()
88+
if ord(c) >= 256):
2189
name = unicodedata.name(char)
2290
categ = unicodedata.category(char)
23-
chars.append(UniChar(char, name, categ, count))
24-
25-
chars.sort(key=attrgetter('categ', 'char'))
26-
27-
for char, name, categ, count in chars:
28-
print(f'U+{ord(char):04x}\t{char}\t{categ}\t{count}\t{name} ')
91+
uchars.append(UniChar(char, name, categ, count))
92+
93+
uchars.sort(key=attrgetter('categ', 'count'))
94+
95+
print('[cols=">3,^1,11,1,>1"]')
96+
print('|====')
97+
for char, name, categ, count in uchars:
98+
print(f'|`U+{ord(char):04x}`|{char}|{name}|{categ}|{count}')
99+
print('|====')
29100

30101

31102
if __name__ == '__main__':

0 commit comments

Comments
 (0)