|
4 | 4 |
|
5 | 5 | from fontbro.features import FEATURES as _FEATURES_LIST |
6 | 6 | from fontbro.flags import get_flag, set_flag |
| 7 | +from fontbro.uni import ( |
| 8 | + UNICODE_BLOCKS as _UNICODE_BLOCKS, |
| 9 | + UNICODE_SCRIPTS as _UNICODE_SCRIPTS, |
| 10 | +) |
7 | 11 |
|
8 | 12 | from fontTools import unicodedata |
9 | 13 | from fontTools.subset import parse_unicodes, Subsetter |
10 | 14 | from fontTools.ttLib import TTFont, TTLibError |
11 | 15 | from fontTools.varLib import instancer |
12 | 16 | from fontTools.varLib.instancer import OverlapMode |
13 | 17 |
|
| 18 | +import copy |
14 | 19 | import fsutil |
15 | 20 | import itertools |
16 | 21 | import math |
@@ -239,11 +244,17 @@ def get_characters(self): |
239 | 244 | name = unicodedata.name(char) |
240 | 245 | except ValueError: |
241 | 246 | pass |
| 247 | + block_name = unicodedata.block(code) |
| 248 | + script_tag = unicodedata.script(code) |
| 249 | + script_name = unicodedata.script_name(script_tag) |
242 | 250 | yield { |
243 | 251 | "character": char, |
244 | 252 | "character_name": char_name, |
245 | 253 | "code": code, |
246 | 254 | "name": name, |
| 255 | + "block_name": block_name, |
| 256 | + "script_name": script_name, |
| 257 | + "script_tag": script_tag, |
247 | 258 | } |
248 | 259 |
|
249 | 260 | def get_characters_count(self): |
@@ -357,73 +368,6 @@ def get_names(self): |
357 | 368 | } |
358 | 369 | return names |
359 | 370 |
|
360 | | - @classmethod |
361 | | - def get_script_by_character(cls, char): |
362 | | - """ |
363 | | - Gets the script by character (even if not included in the font). |
364 | | -
|
365 | | - :param char: The character |
366 | | - :type char: str |
367 | | -
|
368 | | - :returns: The script by character. |
369 | | - :rtype: dict |
370 | | - """ |
371 | | - return cls.get_script_by_code(ord(char)) |
372 | | - |
373 | | - @classmethod |
374 | | - def get_script_by_code(cls, code): |
375 | | - """ |
376 | | - Gets the script by unicode code point (even if not included in the font). |
377 | | -
|
378 | | - :param code: The code |
379 | | - :type code: int |
380 | | -
|
381 | | - :returns: The script by code. |
382 | | - :rtype: dict |
383 | | - """ |
384 | | - script_tag = unicodedata.script(code) |
385 | | - return { |
386 | | - "tag": script_tag, |
387 | | - "name": unicodedata.script_name(script_tag), |
388 | | - "block": unicodedata.block(code), |
389 | | - } |
390 | | - |
391 | | - @classmethod |
392 | | - def get_scripts_by_characters(cls, chars): |
393 | | - """ |
394 | | - Gets the scripts by characters (even if not included in the font). |
395 | | -
|
396 | | - :returns: The scripts. |
397 | | - :rtype: list of dict |
398 | | - """ |
399 | | - blocks_by_scripts_tags = {} |
400 | | - for char in chars: |
401 | | - script = cls.get_script_by_code(char["code"]) |
402 | | - script_tag = script["tag"] |
403 | | - script_block = script["block"] |
404 | | - if script_tag not in blocks_by_scripts_tags: |
405 | | - blocks_by_scripts_tags[script_tag] = set() |
406 | | - blocks_by_scripts_tags[script_tag].add(script_block) |
407 | | - scripts_tags = sorted(blocks_by_scripts_tags.keys()) |
408 | | - scripts = [ |
409 | | - { |
410 | | - "tag": script_tag, |
411 | | - "name": unicodedata.script_name(script_tag), |
412 | | - "blocks": sorted(blocks_by_scripts_tags[script_tag]), |
413 | | - } |
414 | | - for script_tag in scripts_tags |
415 | | - ] |
416 | | - return scripts |
417 | | - |
418 | | - def get_scripts(self): |
419 | | - """ |
420 | | - Gets the scripts supported by the font. |
421 | | -
|
422 | | - :returns: The scripts. |
423 | | - :rtype: list of dict |
424 | | - """ |
425 | | - return self.get_scripts_by_characters(chars=self.get_characters()) |
426 | | - |
427 | 371 | def get_style_flag(self, key): |
428 | 372 | """ |
429 | 373 | Gets the style flag reading OS/2 and macStyle tables. |
@@ -470,6 +414,80 @@ def get_ttfont(self): |
470 | 414 | """ |
471 | 415 | return self._ttfont |
472 | 416 |
|
| 417 | + @classmethod |
| 418 | + def _populate_unicode_items_set(cls, items, items_cache, item): |
| 419 | + item_key = item["name"] |
| 420 | + if item_key not in items_cache: |
| 421 | + item = item.copy() |
| 422 | + item["characters_count"] = 0 |
| 423 | + items_cache[item_key] = item |
| 424 | + items.append(item) |
| 425 | + item = items_cache[item_key] |
| 426 | + item["characters_count"] += 1 |
| 427 | + |
| 428 | + @staticmethod |
| 429 | + def _get_unicode_items_set_with_coverage(all_items, items, coverage_threshold=0.0): |
| 430 | + all_items = copy.deepcopy(all_items) |
| 431 | + items_indexed = {item["name"]: item.copy() for item in items} |
| 432 | + for item in all_items: |
| 433 | + item_key = item["name"] |
| 434 | + if item_key in items_indexed: |
| 435 | + item["characters_count"] = items_indexed[item_key]["characters_count"] |
| 436 | + item["coverage"] = item["characters_count"] / item["characters_total"] |
| 437 | + else: |
| 438 | + item["characters_count"] = 0 |
| 439 | + item["coverage"] = 0.0 |
| 440 | + items_filtered = [item for item in all_items if item["coverage"] >= coverage_threshold] |
| 441 | + # items_filtered.sort(key=lambda item: item['name']) |
| 442 | + return items_filtered |
| 443 | + |
| 444 | + def get_unicode_blocks(self, coverage_threshold=0.00001): |
| 445 | + """ |
| 446 | + Gets the unicode blocks and their coverage. |
| 447 | + Only blocks with coverage >= coverage_threshold (0.0 <= coverage_threshold <= 1.0) will be returned. |
| 448 | +
|
| 449 | + :param coverage_threshold: The minumum required coverage for a block to be returned. |
| 450 | + :type coverage_threshold: float |
| 451 | +
|
| 452 | + :returns: The list of unicode blocks. |
| 453 | + :rtype: list of dicts |
| 454 | + """ |
| 455 | + items = [] |
| 456 | + items_cache = {} |
| 457 | + for char in self.get_characters(): |
| 458 | + item = { |
| 459 | + "name": char["block_name"], |
| 460 | + } |
| 461 | + self._populate_unicode_items_set(items, items_cache, item) |
| 462 | + blocks = self._get_unicode_items_set_with_coverage( |
| 463 | + _UNICODE_BLOCKS, items, coverage_threshold=coverage_threshold |
| 464 | + ) |
| 465 | + return blocks |
| 466 | + |
| 467 | + def get_unicode_scripts(self, coverage_threshold=0.00001): |
| 468 | + """ |
| 469 | + Gets the unicode scripts and their coverage. |
| 470 | + Only scripts with coverage >= coverage_threshold (0.0 <= coverage_threshold <= 1.0) will be returned. |
| 471 | +
|
| 472 | + :param coverage_threshold: The minumum required coverage for a script to be returned. |
| 473 | + :type coverage_threshold: float |
| 474 | +
|
| 475 | + :returns: The list of unicode scripts. |
| 476 | + :rtype: list of dicts |
| 477 | + """ |
| 478 | + items = [] |
| 479 | + items_cache = {} |
| 480 | + for char in self.get_characters(): |
| 481 | + item = { |
| 482 | + "name": char["script_name"], |
| 483 | + "tag": char["script_tag"], |
| 484 | + } |
| 485 | + self._populate_unicode_items_set(items, items_cache, item) |
| 486 | + scripts = self._get_unicode_items_set_with_coverage( |
| 487 | + _UNICODE_SCRIPTS, items, coverage_threshold=coverage_threshold |
| 488 | + ) |
| 489 | + return scripts |
| 490 | + |
473 | 491 | def get_variable_axes(self): |
474 | 492 | """ |
475 | 493 | Gets the font variable axes. |
|
0 commit comments