I was able to find the following method of determining if a Unicode-16
character is supported by a font. Unfortunately that doesn't work for surrogate pair Unicode characters, since WCRANGE
struct supported by GetFontUnicodeRanges
function returns only WCHAR
(16-bit) parameters as output.
Here's an example of what I'm trying to do:
LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam)
{
static HFONT hFont = NULL;
switch (message)
{
case WM_CREATE:
{
LOGFONT lf = {0};
lf.lfHeight = -64;
::StringCchCopy(lf.lfFaceName, _countof(lf.lfFaceName), L"Arial");
hFont = ::CreateFontIndirect(&lf);
}
break;
case WM_PAINT:
{
PAINTSTRUCT ps;
HDC hdc = BeginPaint(hWnd, &ps);
RECT rcClient = {0};
::GetClientRect(hWnd, &rcClient);
HGDIOBJ hOldFont = ::SelectObject(hdc, hFont);
LPCTSTR pStr = L">\U0001F609<";
int nLn = wcslen(pStr);
RECT rc = {20, 20, rcClient.right, rcClient.bottom};
::DrawText(hdc, pStr, nLn, &rc, DT_NOPREFIX | DT_SINGLELINE);
::SelectObject(hdc, hOldFont);
EndPaint(hWnd, &ps);
}
break;
//....
If I run it on Windows 10, I get this:
But this is what I'm getting on Windows 7:
So what is the way to tell if that middle character is not going to be rendered?
PS. I also attempted to use badly documented Uniscribe, and a modified version of this tutorial as an example. But no matter what I did, it failed to produce a discernible outcome between Win10 and Win7. If it helps to answer this question, here's the code I experimented with:
//Call from WM_PAINT handler
std::wstring str;
test02(hdc, pStr, str);
RECT rc0 = {0, 200, rcClient.right, rcClient.bottom};
::DrawText(hdc, str.c_str(), str.size(), &rc0, DT_NOPREFIX | DT_SINGLELINE);
and then:
void test02(HDC hDc, LPCTSTR pStr, std::wstring& str)
{
//'str' = receives debugging outcome (needs to be printed on the screen)
//SOURCE:
// https://maxradi.us/documents/uniscribe/
HRESULT hr;
SCRIPT_STRING_ANALYSIS ssa = {0};
int nLn = wcslen(pStr);
hr = ::ScriptStringAnalyse(hDc,
pStr,
nLn,
1024,
-1,
SSA_GLYPHS,
0, NULL, NULL, NULL, NULL, NULL, &ssa);
if(SUCCEEDED(hr))
{
const SCRIPT_PROPERTIES **g_ppScriptProperties;
int g_iMaxScript;
hr = ::ScriptGetProperties(&g_ppScriptProperties, &g_iMaxScript);
if(SUCCEEDED(hr))
{
const int cMaxItems = 20;
SCRIPT_ITEM si[cMaxItems + 1];
SCRIPT_ITEM *pItems = si;
int cItems; //Receives number of glyphs
SCRIPT_CONTROL scrCtrl = {0};
SCRIPT_STATE scrState = {0};
hr = ::ScriptItemize(pStr, nLn, cMaxItems, &scrCtrl, &scrState, pItems, &cItems);
if(SUCCEEDED(hr))
{
FormatAdd2(str, L"cItems=%d: ", cItems);
int nCntGlyphs = nLn * 4;
WORD* pGlyphs = new WORD[nCntGlyphs];
WORD* pLogClust = new WORD[nLn];
SCRIPT_VISATTR* pSVs = new SCRIPT_VISATTR[nCntGlyphs];
//Go through each run
for(int i = 0; i < cItems; i++)
{
FormatAdd2(str, L"[%d]:", i);
SCRIPT_CACHE sc = NULL;
int nCntGlyphsWrtn = 0;
int iPos = pItems[i].iCharPos;
const WCHAR* pP = &pStr[iPos];
int cChars = i + 1 < cItems ? pItems[i + 1].iCharPos - iPos : nLn - iPos;
hr = ::ScriptShape(hDc, &sc, pP, cChars,
nCntGlyphs, &pItems[i].a, pGlyphs, pLogClust, pSVs, &nCntGlyphsWrtn);
if(SUCCEEDED(hr))
{
std::wstring strGlyphs;
for(int g = 0; g < nCntGlyphsWrtn; g++)
{
FormatAdd2(strGlyphs, L"%02X,", pGlyphs[g]);
}
std::wstring strLogClust;
for(int w = 0; w < cChars; w++)
{
FormatAdd2(strLogClust, L"%02X,", pLogClust[w]);
}
std::wstring strSVs;
for(int g = 0; g < nCntGlyphsWrtn; g++)
{
FormatAdd2(strSVs, L"%02X,", pSVs[g]);
}
FormatAdd2(str, L"c=%d {G:%s LC:%s SV:%s} ", nCntGlyphsWrtn, strGlyphs.c_str(), strLogClust.c_str(), strSVs.c_str());
int* pAdvances = new int[nCntGlyphsWrtn];
GOFFSET* pOffsets = new GOFFSET[nCntGlyphsWrtn];
ABC abc = {0};
hr = ::ScriptPlace(hDc, &sc, pGlyphs, nCntGlyphsWrtn, pSVs, &pItems[i].a, pAdvances, pOffsets, &abc);
if(SUCCEEDED(hr))
{
std::wstring strAdvs;
for(int g = 0; g < nCntGlyphsWrtn; g++)
{
FormatAdd2(strAdvs, L"%02X,", pAdvances[g]);
}
std::wstring strOffs;
for(int g = 0; g < nCntGlyphsWrtn; g++)
{
FormatAdd2(strOffs, L"u=%02X v=%02X,", pOffsets[g].du, pOffsets[g].dv);
}
FormatAdd2(str, L"{a=%d,b=%d,c=%d} {A:%s OF:%s}", abc.abcA, abc.abcB, abc.abcC, strAdvs.c_str(), strOffs.c_str());
}
delete[] pAdvances;
delete[] pOffsets;
}
//Clear cache
hr = ::ScriptFreeCache(&sc);
assert(SUCCEEDED(hr));
}
delete[] pSVs;
delete[] pGlyphs;
delete[] pLogClust;
}
}
hr = ::ScriptStringFree(&ssa);
assert(SUCCEEDED(hr));
}
}
std::wstring& FormatAdd2(std::wstring& str, LPCTSTR pszFormat, ...)
{
va_list argList;
va_start(argList, pszFormat);
int nSz = _vsctprintf(pszFormat, argList) + 1;
TCHAR* pBuff = new TCHAR[nSz]; //One char for last null
pBuff[0] = 0;
_vstprintf_s(pBuff, nSz, pszFormat, argList);
pBuff[nSz - 1] = 0;
str.append(pBuff);
delete[] pBuff;
va_end(argList);
return str;
}
EDIT: I was able to create a demo GUI app that demonstrates the solution suggested by Barmak Shemirani below.
With surrogate pairs, a Unicode code point from range U+D800 to U+DBFF (called "high surrogate") gets combined with another Unicode code point from range U+DC00 to U+DFFF (called "low surrogate") to generate a whole new character, allowing the encoding of over one million additional characters.
Right-click your font and select Properties . Select the tab "CharSet/Unicode". If the Font Encoding Type is not Symbol and the Supported Unicode Ranges list anything besides or in addition to Basic Latin and Latin-1 Supplement, your font is a Unicode font or is compatible with Unicode.
These characters have some special values; they are made up of two Unicode characters in two specific ranges such that the first Unicode character is in one range (for example 0xD800-0xD8FF) and the second Unicode character is in the second range (for example 0xDC00-0xDCFF). This is called a surrogate pair.
😉
character is actually not supported in Windows 10 Arial font. Windows 10 uses "Segoe UI Emoji"
as fallback font for that particular code point.
So first we have to figure out if fallback font is used. Then check the glyph index to see if it is tofu character (usually shown as square sign â–¯
)
We can use meta file to find if font substitution is used. Select that font in to HDC
.
Use ScriptGetFontProperties
to find the values for unsupported glyphs.
Use GetCharacterPlacement
to find the glyph indices for the string. If the glyph index matches unsupported glyphs, then the code point is being printed as tofu â–¯
.
If you try to print Chinese character etc. then you have to choose the appropriate font (SimSun for Chinese)
This part is done by IMLangFontLink
. It's a different type of font substitution.
The example below will test for single code point (it can be expanded to handle a string).
If Segoe UI
font is selected, then for the Chinese character 请
, it will switch Segoe UI
to SimSun
.
For emojis, it will switch Segoe UI
to Segoe UI Emoji
See also this article in oldnewthing. Note the article in OldNewThing does not handle Emojis, it just lets TextOut
handle it (which is handled correctly in Windows 10 so the result appears okay)
#define _CRT_SECURE_NO_WARNINGS
#include <iostream>
#include <windows.h>
#include <usp10.h>
#include <AtlBase.h>
#include <AtlCom.h>
#include <mlang.h>
#pragma comment(lib, "Usp10.lib")
int CALLBACK metafileproc(HDC, HANDLETABLE*, const ENHMETARECORD *record,
int, LPARAM logfont)
{
if(record->iType == EMR_EXTCREATEFONTINDIRECTW)
{
auto ptr = (const EMREXTCREATEFONTINDIRECTW*)record;
*(LOGFONT*)logfont = ptr->elfw.elfLogFont;
}
return 1;
}
HFONT GetFallbackFont(const wchar_t *str, HFONT hfont_test)
{
//use metafile to find the fallback font
auto metafile_hdc = CreateEnhMetaFile(NULL, NULL, NULL, NULL);
auto metafile_oldfont = SelectObject(metafile_hdc, hfont_test);
SCRIPT_STRING_ANALYSIS ssa;
ScriptStringAnalyse(metafile_hdc, str, wcslen(str), 0, -1,
SSA_METAFILE | SSA_FALLBACK | SSA_GLYPHS | SSA_LINK,
0, NULL, NULL, NULL, NULL, NULL, &ssa);
ScriptStringOut(ssa, 0, 0, 0, NULL, 0, 0, FALSE);
ScriptStringFree(&ssa);
SelectObject(metafile_hdc, metafile_oldfont);
auto hmetafile = CloseEnhMetaFile(metafile_hdc);
LOGFONT logfont = { 0 };
EnumEnhMetaFile(0, hmetafile, metafileproc, &logfont, NULL);
wprintf(L"Selecting fallback font: %s\n", logfont.lfFaceName);
HFONT hfont = CreateFontIndirect(&logfont);
DeleteEnhMetaFile(hmetafile);
return hfont;
}
//IsTofu is for testing emojis
//It accepts a Unicode string
bool IsTofuError(HDC hdc, HFONT hfont_test, const wchar_t *str)
{
if(wcsstr(str, L" "))
{
wprintf(L"*** cannot test strings containing blank space\n");
}
auto hfont = GetFallbackFont(str, hfont_test);
auto oldfont = SelectObject(hdc, hfont);
//find the characters not supported in this font
//note, blank space is blank, unsupported fonts can be blank also
SCRIPT_CACHE sc = NULL;
SCRIPT_FONTPROPERTIES fp = { sizeof(fp) };
ScriptGetFontProperties(hdc, &sc, &fp);
ScriptFreeCache(&sc);
wprintf(L"SCRIPT_FONTPROPERTIES:\n");
wprintf(L" Blank: %d, Default: %d, Invalid: %d\n",
fp.wgBlank, fp.wgDefault, fp.wgInvalid);
// Get glyph indices for the string
GCP_RESULTS gcp_results = { sizeof(GCP_RESULTS) };
gcp_results.nGlyphs = wcslen(str);
auto wstr_memory = (wchar_t*)calloc(wcslen(str) + 1, sizeof(wchar_t));
gcp_results.lpGlyphs = wstr_memory;
GetCharacterPlacement(hdc, str, wcslen(str), 0, &gcp_results, GCP_GLYPHSHAPE);
//check the characters against wgBlank...
bool istofu = false;
wprintf(L"Glyphs:");
for(UINT i = 0; i < gcp_results.nGlyphs; i++)
{
wchar_t n = gcp_results.lpGlyphs[i];
wprintf(L"%d,", (int)n);
if(n == fp.wgBlank || n == fp.wgInvalid || n == fp.wgDefault)
istofu = true;
}
wprintf(L"\n");
free(wstr_memory);
SelectObject(hdc, oldfont);
DeleteObject(hfont);
if (istofu)
wprintf(L"Tofu error\n\n");
return istofu;
}
//get_font_link checks if there is font substitution,
//this usually applies to Asian fonts
//Note, this function doesn't accept a unicode string
//it only takes a single code point. You can imrpove it to accept strings
bool get_font_link(const wchar_t *single_codepoint,
HDC hdc,
HFONT &hfont_src,
HFONT &hfont_dst,
CComPtr<IMLangFontLink> &ifont,
CComPtr<IMLangCodePages> &icodepages)
{
DWORD codepages_dst[100] = { 0 };
LONG codepages_count = 100;
DWORD codepages = 0;
if(FAILED(icodepages->GetStrCodePages(single_codepoint, wcslen(single_codepoint),
0, codepages_dst, &codepages_count)))
return false;
codepages = codepages_dst[0];
if(FAILED(ifont->MapFont(hdc, codepages_dst[0], hfont_src, &hfont_dst)))
return false;
SelectObject(hdc, hfont_dst);
wchar_t buf[100];
GetTextFace(hdc, _countof(buf), buf);
wprintf(L"get_font_link:\nSelecting a different font: %s\n", buf);
return true;
}
int main()
{
CoInitialize(NULL);
{
CComPtr<IMultiLanguage> imultilang;
CComPtr<IMLangFontLink> ifont;
CComPtr<IMLangCodePages> icodepages;
if(FAILED(imultilang.CoCreateInstance(CLSID_CMultiLanguage))) return 0;
if(FAILED(imultilang->QueryInterface(&ifont))) return 0;
if(FAILED(imultilang->QueryInterface(&icodepages))) return 0;
//const wchar_t *single_codepoint = L"a";
//const wchar_t *single_codepoint = L"请";
const wchar_t *single_codepoint = L"😉";
auto hdc = GetDC(0);
auto memdc = CreateCompatibleDC(hdc);
auto hbitmap = CreateCompatibleBitmap(hdc, 1, 1);
auto oldbmp = SelectObject(memdc, hbitmap);
auto hfont_src = CreateFont(10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, L"Segoe UI");
auto oldfont = SelectObject(hdc, hfont_src);
HFONT hfont_dst = NULL;
if(IsTofuError(hdc, hfont_src, single_codepoint))
{
if(!get_font_link(
single_codepoint, memdc, hfont_src, hfont_dst, ifont, icodepages))
wprintf(L"Can't find a substitution!\n");
}
SelectObject(memdc, oldbmp);
SelectObject(memdc, oldfont);
DeleteObject(hbitmap);
DeleteDC(memdc);
ReleaseDC(0, hdc);
DeleteObject(hfont_src);
if(ifont && hfont_dst)
ifont->ReleaseFont(hfont_dst);
}
CoUninitialize();
return 0;
}
Output:
IsTofu
is false
for Windows 10.
It will be true
for some older Windows versions. But this is not tested in WinXP
Using GetUniscribeFallbackFont
from this link
Note, Windows documentation describes GetCharacterPlacement
as obsolete, it recommends using Uniscribe functions. But I don't know what replacement to use for it here.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With