Skip to content

Commit 072cc8d

Browse files
committed
Added get_unicode_blocks and get_unicode_scripts methods.
1 parent 2b89a9c commit 072cc8d

File tree

4 files changed

+2314
-211
lines changed

4 files changed

+2314
-211
lines changed

fontbro/font.py

Lines changed: 85 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,18 @@
44

55
from fontbro.features import FEATURES as _FEATURES_LIST
66
from fontbro.flags import get_flag, set_flag
7+
from fontbro.uni import (
8+
UNICODE_BLOCKS as _UNICODE_BLOCKS,
9+
UNICODE_SCRIPTS as _UNICODE_SCRIPTS,
10+
)
711

812
from fontTools import unicodedata
913
from fontTools.subset import parse_unicodes, Subsetter
1014
from fontTools.ttLib import TTFont, TTLibError
1115
from fontTools.varLib import instancer
1216
from fontTools.varLib.instancer import OverlapMode
1317

18+
import copy
1419
import fsutil
1520
import itertools
1621
import math
@@ -239,11 +244,17 @@ def get_characters(self):
239244
name = unicodedata.name(char)
240245
except ValueError:
241246
pass
247+
block_name = unicodedata.block(code)
248+
script_tag = unicodedata.script(code)
249+
script_name = unicodedata.script_name(script_tag)
242250
yield {
243251
"character": char,
244252
"character_name": char_name,
245253
"code": code,
246254
"name": name,
255+
"block_name": block_name,
256+
"script_name": script_name,
257+
"script_tag": script_tag,
247258
}
248259

249260
def get_characters_count(self):
@@ -357,73 +368,6 @@ def get_names(self):
357368
}
358369
return names
359370

360-
@classmethod
361-
def get_script_by_character(cls, char):
362-
"""
363-
Gets the script by character (even if not included in the font).
364-
365-
:param char: The character
366-
:type char: str
367-
368-
:returns: The script by character.
369-
:rtype: dict
370-
"""
371-
return cls.get_script_by_code(ord(char))
372-
373-
@classmethod
374-
def get_script_by_code(cls, code):
375-
"""
376-
Gets the script by unicode code point (even if not included in the font).
377-
378-
:param code: The code
379-
:type code: int
380-
381-
:returns: The script by code.
382-
:rtype: dict
383-
"""
384-
script_tag = unicodedata.script(code)
385-
return {
386-
"tag": script_tag,
387-
"name": unicodedata.script_name(script_tag),
388-
"block": unicodedata.block(code),
389-
}
390-
391-
@classmethod
392-
def get_scripts_by_characters(cls, chars):
393-
"""
394-
Gets the scripts by characters (even if not included in the font).
395-
396-
:returns: The scripts.
397-
:rtype: list of dict
398-
"""
399-
blocks_by_scripts_tags = {}
400-
for char in chars:
401-
script = cls.get_script_by_code(char["code"])
402-
script_tag = script["tag"]
403-
script_block = script["block"]
404-
if script_tag not in blocks_by_scripts_tags:
405-
blocks_by_scripts_tags[script_tag] = set()
406-
blocks_by_scripts_tags[script_tag].add(script_block)
407-
scripts_tags = sorted(blocks_by_scripts_tags.keys())
408-
scripts = [
409-
{
410-
"tag": script_tag,
411-
"name": unicodedata.script_name(script_tag),
412-
"blocks": sorted(blocks_by_scripts_tags[script_tag]),
413-
}
414-
for script_tag in scripts_tags
415-
]
416-
return scripts
417-
418-
def get_scripts(self):
419-
"""
420-
Gets the scripts supported by the font.
421-
422-
:returns: The scripts.
423-
:rtype: list of dict
424-
"""
425-
return self.get_scripts_by_characters(chars=self.get_characters())
426-
427371
def get_style_flag(self, key):
428372
"""
429373
Gets the style flag reading OS/2 and macStyle tables.
@@ -470,6 +414,80 @@ def get_ttfont(self):
470414
"""
471415
return self._ttfont
472416

417+
@classmethod
418+
def _populate_unicode_items_set(cls, items, items_cache, item):
419+
item_key = item["name"]
420+
if item_key not in items_cache:
421+
item = item.copy()
422+
item["characters_count"] = 0
423+
items_cache[item_key] = item
424+
items.append(item)
425+
item = items_cache[item_key]
426+
item["characters_count"] += 1
427+
428+
@staticmethod
429+
def _get_unicode_items_set_with_coverage(all_items, items, coverage_threshold=0.0):
430+
all_items = copy.deepcopy(all_items)
431+
items_indexed = {item["name"]: item.copy() for item in items}
432+
for item in all_items:
433+
item_key = item["name"]
434+
if item_key in items_indexed:
435+
item["characters_count"] = items_indexed[item_key]["characters_count"]
436+
item["coverage"] = item["characters_count"] / item["characters_total"]
437+
else:
438+
item["characters_count"] = 0
439+
item["coverage"] = 0.0
440+
items_filtered = [item for item in all_items if item["coverage"] >= coverage_threshold]
441+
# items_filtered.sort(key=lambda item: item['name'])
442+
return items_filtered
443+
444+
def get_unicode_blocks(self, coverage_threshold=0.00001):
445+
"""
446+
Gets the unicode blocks and their coverage.
447+
Only blocks with coverage >= coverage_threshold (0.0 <= coverage_threshold <= 1.0) will be returned.
448+
449+
:param coverage_threshold: The minumum required coverage for a block to be returned.
450+
:type coverage_threshold: float
451+
452+
:returns: The list of unicode blocks.
453+
:rtype: list of dicts
454+
"""
455+
items = []
456+
items_cache = {}
457+
for char in self.get_characters():
458+
item = {
459+
"name": char["block_name"],
460+
}
461+
self._populate_unicode_items_set(items, items_cache, item)
462+
blocks = self._get_unicode_items_set_with_coverage(
463+
_UNICODE_BLOCKS, items, coverage_threshold=coverage_threshold
464+
)
465+
return blocks
466+
467+
def get_unicode_scripts(self, coverage_threshold=0.00001):
468+
"""
469+
Gets the unicode scripts and their coverage.
470+
Only scripts with coverage >= coverage_threshold (0.0 <= coverage_threshold <= 1.0) will be returned.
471+
472+
:param coverage_threshold: The minumum required coverage for a script to be returned.
473+
:type coverage_threshold: float
474+
475+
:returns: The list of unicode scripts.
476+
:rtype: list of dicts
477+
"""
478+
items = []
479+
items_cache = {}
480+
for char in self.get_characters():
481+
item = {
482+
"name": char["script_name"],
483+
"tag": char["script_tag"],
484+
}
485+
self._populate_unicode_items_set(items, items_cache, item)
486+
scripts = self._get_unicode_items_set_with_coverage(
487+
_UNICODE_SCRIPTS, items, coverage_threshold=coverage_threshold
488+
)
489+
return scripts
490+
473491
def get_variable_axes(self):
474492
"""
475493
Gets the font variable axes.

0 commit comments

Comments
 (0)