Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to determine if a Glyph can be displayed?

I have a large list of Unicode icons that I want to display. However, I would like to hide/skip any icon that I cannot display (because I don't have the correct font installed). Is there a programmatic way to determine this?

like image 317
Espresso Avatar asked Sep 19 '25 16:09

Espresso


1 Answers

There's nothing built into Python for this. However, you can apply the fonttools module e.g. as follows (used in Windows 10):

# ToDo: find fallback font
# ToDo: reverse algorithm (font => characters) instead of (character => fonts)
# ToDo: check/print merely basic font (omit variants like Bold, Light, Condensed, …)

import unicodedata
import sys
import os
from fontTools.ttLib import TTFont, TTCollection

fontsPaths = []
fontcPaths = []
fontsdirs = [ os.path.join( os.getenv('SystemRoot'), 'Fonts') # r"c:\Windows\Fonts"
              , r"D:\Downloads\MathJax-TeX-fonts-otf"
              # , os.path.join( os.getenv('LOCALAPPDATA'), r'Microsoft\Windows\Fonts')
              ]

print(fontsdirs, file=sys.stderr)
for fontsdir in fontsdirs:
    for root,dirs,files in os.walk( fontsdir ):
      for file in files:
        if file.endswith(".ttf") or file.endswith(".otf") or file.endswith(".ttc"):
          tfile = os.path.join(root,file)
          if file.endswith(".ttc"):
            fontcPaths.append(tfile)
          else:
            fontsPaths.append(tfile)
    # print( len(fonts), "fonts", fontsdir)

def char_in_font(unicode_char, font):
  for cmap in font['cmap'].tables:
    if cmap.isUnicode() or cmap.getEncoding() == 'utf_16_be':
      if ord(unicode_char) in cmap.cmap:
        # print(type(cmap))
        auxcn =  cmap.cmap[ord(unicode_char)]
        # print(auxcn, type(auxcn))
        return auxcn if auxcn != '' else '<nil>'
  return ''

def checkfont(char,font,fontdict,fontpath):
    nameID_index = 1          # works generally (not always)
    for i,f in enumerate(font['name'].names):
        # An Introduction to TrueType Fonts: A look inside the TTF format
        # https://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&id=IWS-Chapter08
        # 1 = Font Family name, 2 = Font SubFamily name, 4 = Full font name
        if f.nameID == 1:
            nameID_index = i
            break
    fontname = font['name'].names[nameID_index].toStr()
    if fontname not in fontdict.keys():
        aux = char_in_font(char, font)
        if aux != '':
          fontdict[fontname] = "{} ({}) [{}] '{}' \t {} {}".format(
            char,
            '0x{:04x}'.format(ord(char)),
            aux,
            fontname, # string.decode('unicode-escape'),
            # '', '' 
            'in', fontpath.split('\\')[-1]
           )

def testfont(char):
    fontdict = {}
    for fontpath in fontsPaths:
        font = TTFont(fontpath)   # specify the path to the font
        checkfont(char,font,fontdict,fontpath)
    for fontpath in fontcPaths:   # specify the path to the font collection
        fonts = TTCollection(fontpath)
        for ii in range(len(fonts)):
          font = TTFont(fontpath, fontNumber=ii)   # fontfile and index 
          checkfont(char,font,fontdict,fontpath)
    return fontdict.values()

def testprint(char):
    print('') # empty line for better readability
    print(char, ' 0x{:04x}'.format(ord(char)), unicodedata.name(char, '???'))
    fontarray = testfont(char)
    for x in fontarray:
        print(x)

if len(sys.argv) == 1:
    # sample output
    testprint(u"ΰ€…")      # 0x0905 Devanagari Letter A
else:
    for i in range( 1, len(sys.argv) ):
        if len(sys.argv[i]) >=2:
          try:
            chars =  chr(int(sys.argv[i]))      # 0x042F or 1071
          except:
            try:
              chars =  chr(int(sys.argv[i],16)) # 042F
            except:
              chars = (sys.argv[i].
                encode('raw_unicode_escape').
                decode('unicode_escape'))       # βž•πŸˆ\U00010A30\u042F\xFE
        else:
            chars = sys.argv[i]                 # Π― (Cyrillic Capital Letter Ya)
        for char in chars:
            testprint(char);

Sample output (if called without arguments): .\FontGlyphs.py

['C:\\WINDOWS\\Fonts', 'D:\\Downloads\\MathJax-TeX-fonts-otf']

ΰ€…  0x0905 DEVANAGARI LETTER A
ΰ€… (0x0905) [uni0905] 'Nirmala UI'        in Nirmala.ttf
ΰ€… (0x0905) [uni0905] 'Nirmala UI Semilight'      in NirmalaS.ttf
ΰ€… (0x0905) [uni0905] 'Unifont'   in unifont-8.0.01.ttf
ΰ€… (0x0905) [uni0905] 'Unifont CSUR'      in unifont_csur-8.0.01.ttf

Another example: .\FontGlyphs.py 🐈

['C:\\WINDOWS\\Fonts', 'D:\\Downloads\\MathJax-TeX-fonts-otf']

🐈  0x1f408 CAT
🐈 (0x1f408) [u1F408] 'EmojiOne Color'   in EmojiOneColor-SVGinOT.ttf
🐈 (0x1f408) [u1F408] 'Segoe UI Emoji'   in seguiemj.ttf
🐈 (0x1f408) [u1F408] 'Segoe UI Symbol'          in seguisym.ttf

FYI, I have written similar script that shows output (glyphs) rendered using appropriate fonts (using default browser…

Limitation the script does not recognize Emoji Sequence, for instance

.\FontGlyphs.py πŸ‘πŸ½

['C:\\WINDOWS\\Fonts', 'D:\\Downloads\\MathJax-TeX-fonts-otf']

πŸ‘  0x1f44d THUMBS UP SIGN
πŸ‘ (0x1f44d) [u1F44D] 'EmojiOne Color'   in EmojiOneColor-SVGinOT.ttf
πŸ‘ (0x1f44d) [u1F44D] 'Segoe UI Emoji'   in seguiemj.ttf
πŸ‘ (0x1f44d) [u1F44D] 'Segoe UI Symbol'          in seguisym.ttf

🏽  0x1f3fd EMOJI MODIFIER FITZPATRICK TYPE-4
🏽 (0x1f3fd) [u1F3FD] 'EmojiOne Color'   in EmojiOneColor-SVGinOT.ttf
🏽 (0x1f3fd) [u1F3FD] 'Segoe UI Emoji'   in seguiemj.ttf
🏽 (0x1f3fd) [u1F3FD] 'Segoe UI Symbol'          in seguisym.ttf
like image 186
JosefZ Avatar answered Sep 21 '25 05:09

JosefZ