diff --git a/core/string/char_range.inc b/core/string/char_range.inc
index b7d6bbdb611..2b081b96dec 100644
--- a/core/string/char_range.inc
+++ b/core/string/char_range.inc
@@ -2761,4 +2761,667 @@ inline constexpr CharRange lowercase_letter[] = {
{ 0x1e922, 0x1e943 },
};
+inline constexpr CharRange unicode_letter[] = {
+ { 0x41, 0x5a },
+ { 0x61, 0x7a },
+ { 0xaa, 0xaa },
+ { 0xb5, 0xb5 },
+ { 0xba, 0xba },
+ { 0xc0, 0xd6 },
+ { 0xd8, 0xf6 },
+ { 0xf8, 0x2c1 },
+ { 0x2c6, 0x2d1 },
+ { 0x2e0, 0x2e4 },
+ { 0x2ec, 0x2ec },
+ { 0x2ee, 0x2ee },
+ { 0x370, 0x374 },
+ { 0x376, 0x377 },
+ { 0x37a, 0x37d },
+ { 0x37f, 0x37f },
+ { 0x386, 0x386 },
+ { 0x388, 0x38a },
+ { 0x38c, 0x38c },
+ { 0x38e, 0x3a1 },
+ { 0x3a3, 0x3f5 },
+ { 0x3f7, 0x481 },
+ { 0x48a, 0x52f },
+ { 0x531, 0x556 },
+ { 0x559, 0x559 },
+ { 0x560, 0x588 },
+ { 0x5d0, 0x5ea },
+ { 0x5ef, 0x5f2 },
+ { 0x620, 0x64a },
+ { 0x66e, 0x66f },
+ { 0x671, 0x6d3 },
+ { 0x6d5, 0x6d5 },
+ { 0x6e5, 0x6e6 },
+ { 0x6ee, 0x6ef },
+ { 0x6fa, 0x6fc },
+ { 0x6ff, 0x6ff },
+ { 0x710, 0x710 },
+ { 0x712, 0x72f },
+ { 0x74d, 0x7a5 },
+ { 0x7b1, 0x7b1 },
+ { 0x7ca, 0x7ea },
+ { 0x7f4, 0x7f5 },
+ { 0x7fa, 0x7fa },
+ { 0x800, 0x815 },
+ { 0x81a, 0x81a },
+ { 0x824, 0x824 },
+ { 0x828, 0x828 },
+ { 0x840, 0x858 },
+ { 0x860, 0x86a },
+ { 0x870, 0x887 },
+ { 0x889, 0x88e },
+ { 0x8a0, 0x8c9 },
+ { 0x904, 0x939 },
+ { 0x93d, 0x93d },
+ { 0x950, 0x950 },
+ { 0x958, 0x961 },
+ { 0x971, 0x980 },
+ { 0x985, 0x98c },
+ { 0x98f, 0x990 },
+ { 0x993, 0x9a8 },
+ { 0x9aa, 0x9b0 },
+ { 0x9b2, 0x9b2 },
+ { 0x9b6, 0x9b9 },
+ { 0x9bd, 0x9bd },
+ { 0x9ce, 0x9ce },
+ { 0x9dc, 0x9dd },
+ { 0x9df, 0x9e1 },
+ { 0x9f0, 0x9f1 },
+ { 0x9fc, 0x9fc },
+ { 0xa05, 0xa0a },
+ { 0xa0f, 0xa10 },
+ { 0xa13, 0xa28 },
+ { 0xa2a, 0xa30 },
+ { 0xa32, 0xa33 },
+ { 0xa35, 0xa36 },
+ { 0xa38, 0xa39 },
+ { 0xa59, 0xa5c },
+ { 0xa5e, 0xa5e },
+ { 0xa72, 0xa74 },
+ { 0xa85, 0xa8d },
+ { 0xa8f, 0xa91 },
+ { 0xa93, 0xaa8 },
+ { 0xaaa, 0xab0 },
+ { 0xab2, 0xab3 },
+ { 0xab5, 0xab9 },
+ { 0xabd, 0xabd },
+ { 0xad0, 0xad0 },
+ { 0xae0, 0xae1 },
+ { 0xaf9, 0xaf9 },
+ { 0xb05, 0xb0c },
+ { 0xb0f, 0xb10 },
+ { 0xb13, 0xb28 },
+ { 0xb2a, 0xb30 },
+ { 0xb32, 0xb33 },
+ { 0xb35, 0xb39 },
+ { 0xb3d, 0xb3d },
+ { 0xb5c, 0xb5d },
+ { 0xb5f, 0xb61 },
+ { 0xb71, 0xb71 },
+ { 0xb83, 0xb83 },
+ { 0xb85, 0xb8a },
+ { 0xb8e, 0xb90 },
+ { 0xb92, 0xb95 },
+ { 0xb99, 0xb9a },
+ { 0xb9c, 0xb9c },
+ { 0xb9e, 0xb9f },
+ { 0xba3, 0xba4 },
+ { 0xba8, 0xbaa },
+ { 0xbae, 0xbb9 },
+ { 0xbd0, 0xbd0 },
+ { 0xc05, 0xc0c },
+ { 0xc0e, 0xc10 },
+ { 0xc12, 0xc28 },
+ { 0xc2a, 0xc39 },
+ { 0xc3d, 0xc3d },
+ { 0xc58, 0xc5a },
+ { 0xc5d, 0xc5d },
+ { 0xc60, 0xc61 },
+ { 0xc80, 0xc80 },
+ { 0xc85, 0xc8c },
+ { 0xc8e, 0xc90 },
+ { 0xc92, 0xca8 },
+ { 0xcaa, 0xcb3 },
+ { 0xcb5, 0xcb9 },
+ { 0xcbd, 0xcbd },
+ { 0xcdd, 0xcde },
+ { 0xce0, 0xce1 },
+ { 0xcf1, 0xcf2 },
+ { 0xd04, 0xd0c },
+ { 0xd0e, 0xd10 },
+ { 0xd12, 0xd3a },
+ { 0xd3d, 0xd3d },
+ { 0xd4e, 0xd4e },
+ { 0xd54, 0xd56 },
+ { 0xd5f, 0xd61 },
+ { 0xd7a, 0xd7f },
+ { 0xd85, 0xd96 },
+ { 0xd9a, 0xdb1 },
+ { 0xdb3, 0xdbb },
+ { 0xdbd, 0xdbd },
+ { 0xdc0, 0xdc6 },
+ { 0xe01, 0xe30 },
+ { 0xe32, 0xe33 },
+ { 0xe40, 0xe46 },
+ { 0xe81, 0xe82 },
+ { 0xe84, 0xe84 },
+ { 0xe86, 0xe8a },
+ { 0xe8c, 0xea3 },
+ { 0xea5, 0xea5 },
+ { 0xea7, 0xeb0 },
+ { 0xeb2, 0xeb3 },
+ { 0xebd, 0xebd },
+ { 0xec0, 0xec4 },
+ { 0xec6, 0xec6 },
+ { 0xedc, 0xedf },
+ { 0xf00, 0xf00 },
+ { 0xf40, 0xf47 },
+ { 0xf49, 0xf6c },
+ { 0xf88, 0xf8c },
+ { 0x1000, 0x102a },
+ { 0x103f, 0x103f },
+ { 0x1050, 0x1055 },
+ { 0x105a, 0x105d },
+ { 0x1061, 0x1061 },
+ { 0x1065, 0x1066 },
+ { 0x106e, 0x1070 },
+ { 0x1075, 0x1081 },
+ { 0x108e, 0x108e },
+ { 0x10a0, 0x10c5 },
+ { 0x10c7, 0x10c7 },
+ { 0x10cd, 0x10cd },
+ { 0x10d0, 0x10fa },
+ { 0x10fc, 0x1248 },
+ { 0x124a, 0x124d },
+ { 0x1250, 0x1256 },
+ { 0x1258, 0x1258 },
+ { 0x125a, 0x125d },
+ { 0x1260, 0x1288 },
+ { 0x128a, 0x128d },
+ { 0x1290, 0x12b0 },
+ { 0x12b2, 0x12b5 },
+ { 0x12b8, 0x12be },
+ { 0x12c0, 0x12c0 },
+ { 0x12c2, 0x12c5 },
+ { 0x12c8, 0x12d6 },
+ { 0x12d8, 0x1310 },
+ { 0x1312, 0x1315 },
+ { 0x1318, 0x135a },
+ { 0x1380, 0x138f },
+ { 0x13a0, 0x13f5 },
+ { 0x13f8, 0x13fd },
+ { 0x1401, 0x166c },
+ { 0x166f, 0x167f },
+ { 0x1681, 0x169a },
+ { 0x16a0, 0x16ea },
+ { 0x16f1, 0x16f8 },
+ { 0x1700, 0x1711 },
+ { 0x171f, 0x1731 },
+ { 0x1740, 0x1751 },
+ { 0x1760, 0x176c },
+ { 0x176e, 0x1770 },
+ { 0x1780, 0x17b3 },
+ { 0x17d7, 0x17d7 },
+ { 0x17dc, 0x17dc },
+ { 0x1820, 0x1878 },
+ { 0x1880, 0x1884 },
+ { 0x1887, 0x18a8 },
+ { 0x18aa, 0x18aa },
+ { 0x18b0, 0x18f5 },
+ { 0x1900, 0x191e },
+ { 0x1950, 0x196d },
+ { 0x1970, 0x1974 },
+ { 0x1980, 0x19ab },
+ { 0x19b0, 0x19c9 },
+ { 0x1a00, 0x1a16 },
+ { 0x1a20, 0x1a54 },
+ { 0x1aa7, 0x1aa7 },
+ { 0x1b05, 0x1b33 },
+ { 0x1b45, 0x1b4c },
+ { 0x1b83, 0x1ba0 },
+ { 0x1bae, 0x1baf },
+ { 0x1bba, 0x1be5 },
+ { 0x1c00, 0x1c23 },
+ { 0x1c4d, 0x1c4f },
+ { 0x1c5a, 0x1c7d },
+ { 0x1c80, 0x1c88 },
+ { 0x1c90, 0x1cba },
+ { 0x1cbd, 0x1cbf },
+ { 0x1ce9, 0x1cec },
+ { 0x1cee, 0x1cf3 },
+ { 0x1cf5, 0x1cf6 },
+ { 0x1cfa, 0x1cfa },
+ { 0x1d00, 0x1dbf },
+ { 0x1e00, 0x1f15 },
+ { 0x1f18, 0x1f1d },
+ { 0x1f20, 0x1f45 },
+ { 0x1f48, 0x1f4d },
+ { 0x1f50, 0x1f57 },
+ { 0x1f59, 0x1f59 },
+ { 0x1f5b, 0x1f5b },
+ { 0x1f5d, 0x1f5d },
+ { 0x1f5f, 0x1f7d },
+ { 0x1f80, 0x1fb4 },
+ { 0x1fb6, 0x1fbc },
+ { 0x1fbe, 0x1fbe },
+ { 0x1fc2, 0x1fc4 },
+ { 0x1fc6, 0x1fcc },
+ { 0x1fd0, 0x1fd3 },
+ { 0x1fd6, 0x1fdb },
+ { 0x1fe0, 0x1fec },
+ { 0x1ff2, 0x1ff4 },
+ { 0x1ff6, 0x1ffc },
+ { 0x2071, 0x2071 },
+ { 0x207f, 0x207f },
+ { 0x2090, 0x209c },
+ { 0x2102, 0x2102 },
+ { 0x2107, 0x2107 },
+ { 0x210a, 0x2113 },
+ { 0x2115, 0x2115 },
+ { 0x2119, 0x211d },
+ { 0x2124, 0x2124 },
+ { 0x2126, 0x2126 },
+ { 0x2128, 0x2128 },
+ { 0x212a, 0x212d },
+ { 0x212f, 0x2139 },
+ { 0x213c, 0x213f },
+ { 0x2145, 0x2149 },
+ { 0x214e, 0x214e },
+ { 0x2183, 0x2184 },
+ { 0x2c00, 0x2ce4 },
+ { 0x2ceb, 0x2cee },
+ { 0x2cf2, 0x2cf3 },
+ { 0x2d00, 0x2d25 },
+ { 0x2d27, 0x2d27 },
+ { 0x2d2d, 0x2d2d },
+ { 0x2d30, 0x2d67 },
+ { 0x2d6f, 0x2d6f },
+ { 0x2d80, 0x2d96 },
+ { 0x2da0, 0x2da6 },
+ { 0x2da8, 0x2dae },
+ { 0x2db0, 0x2db6 },
+ { 0x2db8, 0x2dbe },
+ { 0x2dc0, 0x2dc6 },
+ { 0x2dc8, 0x2dce },
+ { 0x2dd0, 0x2dd6 },
+ { 0x2dd8, 0x2dde },
+ { 0x2e2f, 0x2e2f },
+ { 0x3005, 0x3006 },
+ { 0x3031, 0x3035 },
+ { 0x303b, 0x303c },
+ { 0x3041, 0x3096 },
+ { 0x309d, 0x309f },
+ { 0x30a1, 0x30fa },
+ { 0x30fc, 0x30ff },
+ { 0x3105, 0x312f },
+ { 0x3131, 0x318e },
+ { 0x31a0, 0x31bf },
+ { 0x31f0, 0x31ff },
+ { 0x3400, 0x4dbf },
+ { 0x4e00, 0xa48c },
+ { 0xa4d0, 0xa4fd },
+ { 0xa500, 0xa60c },
+ { 0xa610, 0xa61f },
+ { 0xa62a, 0xa62b },
+ { 0xa640, 0xa66e },
+ { 0xa67f, 0xa69d },
+ { 0xa6a0, 0xa6e5 },
+ { 0xa717, 0xa71f },
+ { 0xa722, 0xa788 },
+ { 0xa78b, 0xa7ca },
+ { 0xa7d0, 0xa7d1 },
+ { 0xa7d3, 0xa7d3 },
+ { 0xa7d5, 0xa7d9 },
+ { 0xa7f2, 0xa801 },
+ { 0xa803, 0xa805 },
+ { 0xa807, 0xa80a },
+ { 0xa80c, 0xa822 },
+ { 0xa840, 0xa873 },
+ { 0xa882, 0xa8b3 },
+ { 0xa8f2, 0xa8f7 },
+ { 0xa8fb, 0xa8fb },
+ { 0xa8fd, 0xa8fe },
+ { 0xa90a, 0xa925 },
+ { 0xa930, 0xa946 },
+ { 0xa960, 0xa97c },
+ { 0xa984, 0xa9b2 },
+ { 0xa9cf, 0xa9cf },
+ { 0xa9e0, 0xa9e4 },
+ { 0xa9e6, 0xa9ef },
+ { 0xa9fa, 0xa9fe },
+ { 0xaa00, 0xaa28 },
+ { 0xaa40, 0xaa42 },
+ { 0xaa44, 0xaa4b },
+ { 0xaa60, 0xaa76 },
+ { 0xaa7a, 0xaa7a },
+ { 0xaa7e, 0xaaaf },
+ { 0xaab1, 0xaab1 },
+ { 0xaab5, 0xaab6 },
+ { 0xaab9, 0xaabd },
+ { 0xaac0, 0xaac0 },
+ { 0xaac2, 0xaac2 },
+ { 0xaadb, 0xaadd },
+ { 0xaae0, 0xaaea },
+ { 0xaaf2, 0xaaf4 },
+ { 0xab01, 0xab06 },
+ { 0xab09, 0xab0e },
+ { 0xab11, 0xab16 },
+ { 0xab20, 0xab26 },
+ { 0xab28, 0xab2e },
+ { 0xab30, 0xab5a },
+ { 0xab5c, 0xab69 },
+ { 0xab70, 0xabe2 },
+ { 0xac00, 0xd7a3 },
+ { 0xd7b0, 0xd7c6 },
+ { 0xd7cb, 0xd7fb },
+ { 0xf900, 0xfa6d },
+ { 0xfa70, 0xfad9 },
+ { 0xfb00, 0xfb06 },
+ { 0xfb13, 0xfb17 },
+ { 0xfb1d, 0xfb1d },
+ { 0xfb1f, 0xfb28 },
+ { 0xfb2a, 0xfb36 },
+ { 0xfb38, 0xfb3c },
+ { 0xfb3e, 0xfb3e },
+ { 0xfb40, 0xfb41 },
+ { 0xfb43, 0xfb44 },
+ { 0xfb46, 0xfbb1 },
+ { 0xfbd3, 0xfd3d },
+ { 0xfd50, 0xfd8f },
+ { 0xfd92, 0xfdc7 },
+ { 0xfdf0, 0xfdfb },
+ { 0xfe70, 0xfe74 },
+ { 0xfe76, 0xfefc },
+ { 0xff21, 0xff3a },
+ { 0xff41, 0xff5a },
+ { 0xff66, 0xffbe },
+ { 0xffc2, 0xffc7 },
+ { 0xffca, 0xffcf },
+ { 0xffd2, 0xffd7 },
+ { 0xffda, 0xffdc },
+ { 0x10000, 0x1000b },
+ { 0x1000d, 0x10026 },
+ { 0x10028, 0x1003a },
+ { 0x1003c, 0x1003d },
+ { 0x1003f, 0x1004d },
+ { 0x10050, 0x1005d },
+ { 0x10080, 0x100fa },
+ { 0x10280, 0x1029c },
+ { 0x102a0, 0x102d0 },
+ { 0x10300, 0x1031f },
+ { 0x1032d, 0x10340 },
+ { 0x10342, 0x10349 },
+ { 0x10350, 0x10375 },
+ { 0x10380, 0x1039d },
+ { 0x103a0, 0x103c3 },
+ { 0x103c8, 0x103cf },
+ { 0x10400, 0x1049d },
+ { 0x104b0, 0x104d3 },
+ { 0x104d8, 0x104fb },
+ { 0x10500, 0x10527 },
+ { 0x10530, 0x10563 },
+ { 0x10570, 0x1057a },
+ { 0x1057c, 0x1058a },
+ { 0x1058c, 0x10592 },
+ { 0x10594, 0x10595 },
+ { 0x10597, 0x105a1 },
+ { 0x105a3, 0x105b1 },
+ { 0x105b3, 0x105b9 },
+ { 0x105bb, 0x105bc },
+ { 0x10600, 0x10736 },
+ { 0x10740, 0x10755 },
+ { 0x10760, 0x10767 },
+ { 0x10780, 0x10785 },
+ { 0x10787, 0x107b0 },
+ { 0x107b2, 0x107ba },
+ { 0x10800, 0x10805 },
+ { 0x10808, 0x10808 },
+ { 0x1080a, 0x10835 },
+ { 0x10837, 0x10838 },
+ { 0x1083c, 0x1083c },
+ { 0x1083f, 0x10855 },
+ { 0x10860, 0x10876 },
+ { 0x10880, 0x1089e },
+ { 0x108e0, 0x108f2 },
+ { 0x108f4, 0x108f5 },
+ { 0x10900, 0x10915 },
+ { 0x10920, 0x10939 },
+ { 0x10980, 0x109b7 },
+ { 0x109be, 0x109bf },
+ { 0x10a00, 0x10a00 },
+ { 0x10a10, 0x10a13 },
+ { 0x10a15, 0x10a17 },
+ { 0x10a19, 0x10a35 },
+ { 0x10a60, 0x10a7c },
+ { 0x10a80, 0x10a9c },
+ { 0x10ac0, 0x10ac7 },
+ { 0x10ac9, 0x10ae4 },
+ { 0x10b00, 0x10b35 },
+ { 0x10b40, 0x10b55 },
+ { 0x10b60, 0x10b72 },
+ { 0x10b80, 0x10b91 },
+ { 0x10c00, 0x10c48 },
+ { 0x10c80, 0x10cb2 },
+ { 0x10cc0, 0x10cf2 },
+ { 0x10d00, 0x10d23 },
+ { 0x10e80, 0x10ea9 },
+ { 0x10eb0, 0x10eb1 },
+ { 0x10f00, 0x10f1c },
+ { 0x10f27, 0x10f27 },
+ { 0x10f30, 0x10f45 },
+ { 0x10f70, 0x10f81 },
+ { 0x10fb0, 0x10fc4 },
+ { 0x10fe0, 0x10ff6 },
+ { 0x11003, 0x11037 },
+ { 0x11071, 0x11072 },
+ { 0x11075, 0x11075 },
+ { 0x11083, 0x110af },
+ { 0x110d0, 0x110e8 },
+ { 0x11103, 0x11126 },
+ { 0x11144, 0x11144 },
+ { 0x11147, 0x11147 },
+ { 0x11150, 0x11172 },
+ { 0x11176, 0x11176 },
+ { 0x11183, 0x111b2 },
+ { 0x111c1, 0x111c4 },
+ { 0x111da, 0x111da },
+ { 0x111dc, 0x111dc },
+ { 0x11200, 0x11211 },
+ { 0x11213, 0x1122b },
+ { 0x1123f, 0x11240 },
+ { 0x11280, 0x11286 },
+ { 0x11288, 0x11288 },
+ { 0x1128a, 0x1128d },
+ { 0x1128f, 0x1129d },
+ { 0x1129f, 0x112a8 },
+ { 0x112b0, 0x112de },
+ { 0x11305, 0x1130c },
+ { 0x1130f, 0x11310 },
+ { 0x11313, 0x11328 },
+ { 0x1132a, 0x11330 },
+ { 0x11332, 0x11333 },
+ { 0x11335, 0x11339 },
+ { 0x1133d, 0x1133d },
+ { 0x11350, 0x11350 },
+ { 0x1135d, 0x11361 },
+ { 0x11400, 0x11434 },
+ { 0x11447, 0x1144a },
+ { 0x1145f, 0x11461 },
+ { 0x11480, 0x114af },
+ { 0x114c4, 0x114c5 },
+ { 0x114c7, 0x114c7 },
+ { 0x11580, 0x115ae },
+ { 0x115d8, 0x115db },
+ { 0x11600, 0x1162f },
+ { 0x11644, 0x11644 },
+ { 0x11680, 0x116aa },
+ { 0x116b8, 0x116b8 },
+ { 0x11700, 0x1171a },
+ { 0x11740, 0x11746 },
+ { 0x11800, 0x1182b },
+ { 0x118a0, 0x118df },
+ { 0x118ff, 0x11906 },
+ { 0x11909, 0x11909 },
+ { 0x1190c, 0x11913 },
+ { 0x11915, 0x11916 },
+ { 0x11918, 0x1192f },
+ { 0x1193f, 0x1193f },
+ { 0x11941, 0x11941 },
+ { 0x119a0, 0x119a7 },
+ { 0x119aa, 0x119d0 },
+ { 0x119e1, 0x119e1 },
+ { 0x119e3, 0x119e3 },
+ { 0x11a00, 0x11a00 },
+ { 0x11a0b, 0x11a32 },
+ { 0x11a3a, 0x11a3a },
+ { 0x11a50, 0x11a50 },
+ { 0x11a5c, 0x11a89 },
+ { 0x11a9d, 0x11a9d },
+ { 0x11ab0, 0x11af8 },
+ { 0x11c00, 0x11c08 },
+ { 0x11c0a, 0x11c2e },
+ { 0x11c40, 0x11c40 },
+ { 0x11c72, 0x11c8f },
+ { 0x11d00, 0x11d06 },
+ { 0x11d08, 0x11d09 },
+ { 0x11d0b, 0x11d30 },
+ { 0x11d46, 0x11d46 },
+ { 0x11d60, 0x11d65 },
+ { 0x11d67, 0x11d68 },
+ { 0x11d6a, 0x11d89 },
+ { 0x11d98, 0x11d98 },
+ { 0x11ee0, 0x11ef2 },
+ { 0x11f02, 0x11f02 },
+ { 0x11f04, 0x11f10 },
+ { 0x11f12, 0x11f33 },
+ { 0x11fb0, 0x11fb0 },
+ { 0x12000, 0x12399 },
+ { 0x12480, 0x12543 },
+ { 0x12f90, 0x12ff0 },
+ { 0x13000, 0x1342f },
+ { 0x13441, 0x13446 },
+ { 0x14400, 0x14646 },
+ { 0x16800, 0x16a38 },
+ { 0x16a40, 0x16a5e },
+ { 0x16a70, 0x16abe },
+ { 0x16ad0, 0x16aed },
+ { 0x16b00, 0x16b2f },
+ { 0x16b40, 0x16b43 },
+ { 0x16b63, 0x16b77 },
+ { 0x16b7d, 0x16b8f },
+ { 0x16e40, 0x16e7f },
+ { 0x16f00, 0x16f4a },
+ { 0x16f50, 0x16f50 },
+ { 0x16f93, 0x16f9f },
+ { 0x16fe0, 0x16fe1 },
+ { 0x16fe3, 0x16fe3 },
+ { 0x17000, 0x187f7 },
+ { 0x18800, 0x18cd5 },
+ { 0x18d00, 0x18d08 },
+ { 0x1aff0, 0x1aff3 },
+ { 0x1aff5, 0x1affb },
+ { 0x1affd, 0x1affe },
+ { 0x1b000, 0x1b122 },
+ { 0x1b132, 0x1b132 },
+ { 0x1b150, 0x1b152 },
+ { 0x1b155, 0x1b155 },
+ { 0x1b164, 0x1b167 },
+ { 0x1b170, 0x1b2fb },
+ { 0x1bc00, 0x1bc6a },
+ { 0x1bc70, 0x1bc7c },
+ { 0x1bc80, 0x1bc88 },
+ { 0x1bc90, 0x1bc99 },
+ { 0x1d400, 0x1d454 },
+ { 0x1d456, 0x1d49c },
+ { 0x1d49e, 0x1d49f },
+ { 0x1d4a2, 0x1d4a2 },
+ { 0x1d4a5, 0x1d4a6 },
+ { 0x1d4a9, 0x1d4ac },
+ { 0x1d4ae, 0x1d4b9 },
+ { 0x1d4bb, 0x1d4bb },
+ { 0x1d4bd, 0x1d4c3 },
+ { 0x1d4c5, 0x1d505 },
+ { 0x1d507, 0x1d50a },
+ { 0x1d50d, 0x1d514 },
+ { 0x1d516, 0x1d51c },
+ { 0x1d51e, 0x1d539 },
+ { 0x1d53b, 0x1d53e },
+ { 0x1d540, 0x1d544 },
+ { 0x1d546, 0x1d546 },
+ { 0x1d54a, 0x1d550 },
+ { 0x1d552, 0x1d6a5 },
+ { 0x1d6a8, 0x1d6c0 },
+ { 0x1d6c2, 0x1d6da },
+ { 0x1d6dc, 0x1d6fa },
+ { 0x1d6fc, 0x1d714 },
+ { 0x1d716, 0x1d734 },
+ { 0x1d736, 0x1d74e },
+ { 0x1d750, 0x1d76e },
+ { 0x1d770, 0x1d788 },
+ { 0x1d78a, 0x1d7a8 },
+ { 0x1d7aa, 0x1d7c2 },
+ { 0x1d7c4, 0x1d7cb },
+ { 0x1df00, 0x1df1e },
+ { 0x1df25, 0x1df2a },
+ { 0x1e030, 0x1e06d },
+ { 0x1e100, 0x1e12c },
+ { 0x1e137, 0x1e13d },
+ { 0x1e14e, 0x1e14e },
+ { 0x1e290, 0x1e2ad },
+ { 0x1e2c0, 0x1e2eb },
+ { 0x1e4d0, 0x1e4eb },
+ { 0x1e7e0, 0x1e7e6 },
+ { 0x1e7e8, 0x1e7eb },
+ { 0x1e7ed, 0x1e7ee },
+ { 0x1e7f0, 0x1e7fe },
+ { 0x1e800, 0x1e8c4 },
+ { 0x1e900, 0x1e943 },
+ { 0x1e94b, 0x1e94b },
+ { 0x1ee00, 0x1ee03 },
+ { 0x1ee05, 0x1ee1f },
+ { 0x1ee21, 0x1ee22 },
+ { 0x1ee24, 0x1ee24 },
+ { 0x1ee27, 0x1ee27 },
+ { 0x1ee29, 0x1ee32 },
+ { 0x1ee34, 0x1ee37 },
+ { 0x1ee39, 0x1ee39 },
+ { 0x1ee3b, 0x1ee3b },
+ { 0x1ee42, 0x1ee42 },
+ { 0x1ee47, 0x1ee47 },
+ { 0x1ee49, 0x1ee49 },
+ { 0x1ee4b, 0x1ee4b },
+ { 0x1ee4d, 0x1ee4f },
+ { 0x1ee51, 0x1ee52 },
+ { 0x1ee54, 0x1ee54 },
+ { 0x1ee57, 0x1ee57 },
+ { 0x1ee59, 0x1ee59 },
+ { 0x1ee5b, 0x1ee5b },
+ { 0x1ee5d, 0x1ee5d },
+ { 0x1ee5f, 0x1ee5f },
+ { 0x1ee61, 0x1ee62 },
+ { 0x1ee64, 0x1ee64 },
+ { 0x1ee67, 0x1ee6a },
+ { 0x1ee6c, 0x1ee72 },
+ { 0x1ee74, 0x1ee77 },
+ { 0x1ee79, 0x1ee7c },
+ { 0x1ee7e, 0x1ee7e },
+ { 0x1ee80, 0x1ee89 },
+ { 0x1ee8b, 0x1ee9b },
+ { 0x1eea1, 0x1eea3 },
+ { 0x1eea5, 0x1eea9 },
+ { 0x1eeab, 0x1eebb },
+ { 0x20000, 0x2a6df },
+ { 0x2a700, 0x2b739 },
+ { 0x2b740, 0x2b81d },
+ { 0x2b820, 0x2cea1 },
+ { 0x2ceb0, 0x2ebe0 },
+ { 0x2ebf0, 0x2ee5d },
+ { 0x2f800, 0x2fa1d },
+ { 0x30000, 0x3134a },
+ { 0x31350, 0x323af },
+};
+
#endif // CHAR_RANGE_INC
diff --git a/core/string/char_utils.h b/core/string/char_utils.h
index aa9bc198ca7..e1343438366 100644
--- a/core/string/char_utils.h
+++ b/core/string/char_utils.h
@@ -70,6 +70,10 @@ static _FORCE_INLINE_ bool is_unicode_lower_case(char32_t c) {
BSEARCH_CHAR_RANGE(lowercase_letter);
}
+static _FORCE_INLINE_ bool is_unicode_letter(char32_t c) {
+ BSEARCH_CHAR_RANGE(unicode_letter);
+}
+
#undef BSEARCH_CHAR_RANGE
static _FORCE_INLINE_ bool is_ascii_upper_case(char32_t c) {
diff --git a/doc/classes/TextServer.xml b/doc/classes/TextServer.xml
index d4b341b7000..5d5177c771c 100644
--- a/doc/classes/TextServer.xml
+++ b/doc/classes/TextServer.xml
@@ -1088,6 +1088,13 @@
- May contain Unicode characters of class XID_Continue in the other positions.
+
+
+
+
+ Returns [code]true[/code] if the given code point is a valid letter, i.e. it belongs to the Unicode category "L".
+
+
diff --git a/doc/classes/TextServerExtension.xml b/doc/classes/TextServerExtension.xml
index 5beb5797dbd..84f73348859 100644
--- a/doc/classes/TextServerExtension.xml
+++ b/doc/classes/TextServerExtension.xml
@@ -937,6 +937,12 @@
+
+
+
+
+
+
diff --git a/modules/text_server_adv/text_server_adv.cpp b/modules/text_server_adv/text_server_adv.cpp
index ea88278a177..3ff947e3810 100644
--- a/modules/text_server_adv/text_server_adv.cpp
+++ b/modules/text_server_adv/text_server_adv.cpp
@@ -7229,6 +7229,16 @@ bool TextServerAdvanced::_is_valid_identifier(const String &p_string) const {
return true;
}
+bool TextServerAdvanced::_is_valid_letter(char32_t p_unicode) const {
+#ifndef ICU_STATIC_DATA
+ if (!icu_data_loaded) {
+ return TextServer::is_valid_letter(p_unicode);
+ }
+#endif
+
+ return u_isalpha(p_unicode);
+}
+
TextServerAdvanced::TextServerAdvanced() {
_insert_num_systems_lang();
_insert_feature_sets();
diff --git a/modules/text_server_adv/text_server_adv.h b/modules/text_server_adv/text_server_adv.h
index 3f04c17d9cb..92f376480e7 100644
--- a/modules/text_server_adv/text_server_adv.h
+++ b/modules/text_server_adv/text_server_adv.h
@@ -979,6 +979,7 @@ public:
MODBIND1RC(String, strip_diacritics, const String &);
MODBIND1RC(bool, is_valid_identifier, const String &);
+ MODBIND1RC(bool, is_valid_letter, char32_t);
MODBIND2RC(String, string_to_upper, const String &, const String &);
MODBIND2RC(String, string_to_lower, const String &, const String &);
diff --git a/servers/text/text_server_extension.cpp b/servers/text/text_server_extension.cpp
index d5080e586d1..ab80d5c39f4 100644
--- a/servers/text/text_server_extension.cpp
+++ b/servers/text/text_server_extension.cpp
@@ -327,6 +327,7 @@ void TextServerExtension::_bind_methods() {
GDVIRTUAL_BIND(_strip_diacritics, "string");
GDVIRTUAL_BIND(_is_valid_identifier, "string");
+ GDVIRTUAL_BIND(_is_valid_letter, "unicode");
GDVIRTUAL_BIND(_string_get_word_breaks, "string", "language", "chars_per_line");
GDVIRTUAL_BIND(_string_get_character_breaks, "string", "language");
@@ -1460,6 +1461,14 @@ bool TextServerExtension::is_valid_identifier(const String &p_string) const {
return TextServer::is_valid_identifier(p_string);
}
+bool TextServerExtension::is_valid_letter(char32_t p_unicode) const {
+ bool ret;
+ if (GDVIRTUAL_CALL(_is_valid_letter, p_unicode, ret)) {
+ return ret;
+ }
+ return TextServer::is_valid_letter(p_unicode);
+}
+
String TextServerExtension::strip_diacritics(const String &p_string) const {
String ret;
if (GDVIRTUAL_CALL(_strip_diacritics, p_string, ret)) {
diff --git a/servers/text/text_server_extension.h b/servers/text/text_server_extension.h
index 4f9ea55d33d..fbc0ee9e59f 100644
--- a/servers/text/text_server_extension.h
+++ b/servers/text/text_server_extension.h
@@ -554,6 +554,8 @@ public:
virtual bool is_valid_identifier(const String &p_string) const override;
GDVIRTUAL1RC(bool, _is_valid_identifier, const String &);
+ virtual bool is_valid_letter(char32_t p_unicode) const override;
+ GDVIRTUAL1RC(bool, _is_valid_letter, char32_t);
virtual String string_to_upper(const String &p_string, const String &p_language = "") const override;
virtual String string_to_lower(const String &p_string, const String &p_language = "") const override;
diff --git a/servers/text_server.cpp b/servers/text_server.cpp
index b67a698615f..34daa4b4141 100644
--- a/servers/text_server.cpp
+++ b/servers/text_server.cpp
@@ -483,6 +483,7 @@ void TextServer::_bind_methods() {
ClassDB::bind_method(D_METHOD("strip_diacritics", "string"), &TextServer::strip_diacritics);
ClassDB::bind_method(D_METHOD("is_valid_identifier", "string"), &TextServer::is_valid_identifier);
+ ClassDB::bind_method(D_METHOD("is_valid_letter", "unicode"), &TextServer::is_valid_letter);
ClassDB::bind_method(D_METHOD("string_to_upper", "string", "language"), &TextServer::string_to_upper, DEFVAL(""));
ClassDB::bind_method(D_METHOD("string_to_lower", "string", "language"), &TextServer::string_to_lower, DEFVAL(""));
@@ -2091,6 +2092,10 @@ bool TextServer::is_valid_identifier(const String &p_string) const {
return true;
}
+bool TextServer::is_valid_letter(char32_t p_unicode) const {
+ return is_unicode_letter(p_unicode);
+}
+
TextServer::TextServer() {
_init_diacritics_map();
}
diff --git a/servers/text_server.h b/servers/text_server.h
index c2cc4446466..b5fe24caf43 100644
--- a/servers/text_server.h
+++ b/servers/text_server.h
@@ -534,6 +534,7 @@ public:
virtual String strip_diacritics(const String &p_string) const;
virtual bool is_valid_identifier(const String &p_string) const;
+ virtual bool is_valid_letter(char32_t p_unicode) const;
// Other string operations.
virtual String string_to_upper(const String &p_string, const String &p_language = "") const = 0;
diff --git a/tests/servers/test_text_server.h b/tests/servers/test_text_server.h
index 334c642d26d..d982102a038 100644
--- a/tests/servers/test_text_server.h
+++ b/tests/servers/test_text_server.h
@@ -637,6 +637,97 @@ TEST_SUITE("[TextServer]") {
}
}
+ SUBCASE("[TextServer] Unicode letters") {
+ for (int i = 0; i < TextServerManager::get_singleton()->get_interface_count(); i++) {
+ Ref ts = TextServerManager::get_singleton()->get_interface(i);
+ CHECK_FALSE_MESSAGE(ts.is_null(), "Invalid TS interface.");
+
+ struct ul_testcase {
+ int fail_index = -1; // Expecting failure at given index.
+ char32_t text[10]; // Using 0 as the terminator.
+ };
+ ul_testcase cases[14] = {
+ {
+ 0,
+ { 0x2D, 0x33, 0x30, 0, 0, 0, 0, 0, 0, 0 }, // "-30"
+ },
+ {
+ 1,
+ { 0x61, 0x2E, 0x31, 0, 0, 0, 0, 0, 0, 0 }, // "a.1"
+ },
+ {
+ 1,
+ { 0x61, 0x2C, 0x31, 0, 0, 0, 0, 0, 0, 0 }, // "a,1"
+ },
+ {
+ 0,
+ { 0x31, 0x65, 0x2D, 0x32, 0, 0, 0, 0, 0, 0 }, // "1e-2"
+ },
+ {
+ 0,
+ { 0xAB, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // "Left-Pointing Double Angle Quotation Mark"
+ },
+ {
+ -1,
+ { 0x41, 0x42, 0, 0, 0, 0, 0, 0, 0, 0 }, // "AB"
+ },
+ {
+ 4,
+ { 0x54, 0x65, 0x73, 0x74, 0x31, 0, 0, 0, 0, 0 }, // "Test1"
+ },
+ {
+ 2,
+ { 0x54, 0x65, 0x2A, 0x73, 0x74, 0, 0, 0, 0, 0 }, // "Te*st"
+ },
+ {
+ 4,
+ { 0x74, 0x65, 0x73, 0x74, 0x5F, 0x74, 0x65, 0x73, 0x74, 0x65 }, // "test_teste"
+ },
+ {
+ 4,
+ { 0x74, 0x65, 0x73, 0x74, 0x20, 0x74, 0x65, 0x73, 0x74, 0 }, // "test test"
+ },
+ {
+ -1,
+ { 0x643, 0x402, 0x716, 0xB05, 0, 0, 0, 0, 0, 0 }, // "كЂܖଅ" (arabic letters),
+ },
+ {
+ -1,
+ { 0x643, 0x402, 0x716, 0xB05, 0x54, 0x65, 0x73, 0x74, 0x30AA, 0x4E21 }, // 0-3 arabic letters, 4-7 latin letters, 8-9 CJK letters
+ },
+ {
+ -1,
+ { 0x4D2, 0x4D6, 0x4DA, 0x4DC, 0, 0, 0, 0, 0, 0 }, // "ӒӖӚӜ" cyrillic letters
+ },
+ {
+ -1,
+ { 0xC2, 0xC3, 0xC4, 0xC5, 0x100, 0x102, 0x104, 0xC7, 0x106, 0x108 }, // "ÂÃÄÅĀĂĄÇĆĈ" rarer latin letters
+ },
+ };
+
+ for (int j = 0; j < 14; j++) {
+ ul_testcase test = cases[j];
+ int failed_on_index = -1;
+ for (int k = 0; k < 10; k++) {
+ char32_t character = test.text[k];
+ if (character == 0) {
+ break;
+ }
+ if (!ts->is_valid_letter(character)) {
+ failed_on_index = k;
+ break;
+ }
+ }
+
+ if (test.fail_index == -1) {
+ CHECK_MESSAGE(test.fail_index == failed_on_index, "In interface ", ts->get_name() + ": In test case ", j, ", the character at index ", failed_on_index, " should have been a letter.");
+ } else {
+ CHECK_MESSAGE(test.fail_index == failed_on_index, "In interface ", ts->get_name() + ": In test case ", j, ", expected first non-letter at index ", test.fail_index, ", but found at index ", failed_on_index);
+ }
+ }
+ }
+ }
+
SUBCASE("[TextServer] Strip Diacritics") {
for (int i = 0; i < TextServerManager::get_singleton()->get_interface_count(); i++) {
Ref ts = TextServerManager::get_singleton()->get_interface(i);