1477 lines
47 KiB
Python
1477 lines
47 KiB
Python
# Generates Rust tables that define Unicode "script classes" for the purposes
|
||
# of autohinting.
|
||
#
|
||
# For performance, we want to link various pieces of data by index. For ease of
|
||
# modification and to avoid errors, we want to define those links symbolically
|
||
# by name. Thus, this script exists which converts symbolic references to
|
||
# indices when generating code.
|
||
#
|
||
# The bottom of this file contains the Rust generation code.
|
||
|
||
# Based on FreeType autofit coverage:
|
||
# https://gitlab.freedesktop.org/freetype/freetype/-/blob/57617782464411201ce7bbc93b086c1b4d7d84a5/src/autofit/afcover.h
|
||
STYLE_FEATURES = [
|
||
{
|
||
"name": "petite capitals from capitals",
|
||
"tag": "c2cp",
|
||
},
|
||
{
|
||
"name": "small capitals from capitals",
|
||
"tag": "c2sc",
|
||
},
|
||
{
|
||
"name": "ordinals",
|
||
"tag": "ordn",
|
||
},
|
||
{
|
||
"name": "petite capitals",
|
||
"tag": "pcap",
|
||
},
|
||
{
|
||
"name": "ruby",
|
||
"tag": "ruby",
|
||
},
|
||
{
|
||
"name": "scientific inferiors",
|
||
"tag": "sinf",
|
||
},
|
||
{
|
||
"name": "small capitals",
|
||
"tag": "smcp",
|
||
},
|
||
{
|
||
"name": "subscript",
|
||
"tag": "subs",
|
||
},
|
||
{
|
||
"name": "superscript",
|
||
"tag": "sups",
|
||
},
|
||
{
|
||
"name": "titling",
|
||
"tag": "titl",
|
||
},
|
||
]
|
||
|
||
# Scripts that generate styles with the extended feature set above
|
||
# FreeType refers to these as "meta latin"
|
||
SCRIPTS_WITH_FEATURES = ["CYRL", "GREK", "LATN"]
|
||
|
||
# In relation to FreeType, this combines the AF_ScriptClass,
|
||
# AF_Script_UniRangeRec and AF_BlueStringset.
|
||
# Script definitions: https://gitlab.freedesktop.org/freetype/freetype/-/blob/57617782464411201ce7bbc93b086c1b4d7d84a5/src/autofit/afscript.h
|
||
# Unicode ranges: https://gitlab.freedesktop.org/freetype/freetype/-/blob/57617782464411201ce7bbc93b086c1b4d7d84a5/src/autofit/afranges.c
|
||
# Blues: https://gitlab.freedesktop.org/freetype/freetype/-/blob/57617782464411201ce7bbc93b086c1b4d7d84a5/src/autofit/afblue.h
|
||
SCRIPT_CLASSES = [
|
||
{
|
||
"name": "Adlam",
|
||
"tag": "ADLM",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "𞤌 𞤮", # 𞤌 𞤮
|
||
"base_ranges": [
|
||
(0x1E900, 0x1E95F), # Adlam
|
||
],
|
||
"non_base_ranges": [
|
||
(0x1D944, 0x1E94A),
|
||
],
|
||
"blues": [
|
||
("𞤌 𞤅 𞤈 𞤏 𞤔 𞤚", "TOP"),
|
||
("𞤂 𞤖", "0"),
|
||
("𞤬 𞤮 𞤻 𞤼 𞤾", "TOP | X_HEIGHT"),
|
||
("𞤤 𞤨 𞤩 𞤭 𞤴 𞤸 𞤺 𞥀", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Arabic",
|
||
"tag": "ARAB",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "ل ح ـ", # ل ح ـ
|
||
"base_ranges": [
|
||
(0x0600, 0x06FF), # Arabic
|
||
(0x0750, 0x07FF), # Arabic Supplement
|
||
(0x08A0, 0x08FF), # Arabic Extended-A
|
||
(0xFB50, 0xFDFF), # Arabic Presentation Forms-A
|
||
(0xFE70, 0xFEFF), # Arabic Presentation Forms-B
|
||
(0x1EE00, 0x1EEFF), # Arabic Mathematical Alphabetic Symbols
|
||
],
|
||
"non_base_ranges": [
|
||
(0x0600, 0x0605),
|
||
(0x0610, 0x061A),
|
||
(0x064B, 0x065F),
|
||
(0x0670, 0x0670),
|
||
(0x06D6, 0x06DC),
|
||
(0x06DF, 0x06E4),
|
||
(0x06E7, 0x06E8),
|
||
(0x06EA, 0x06ED),
|
||
(0x08D4, 0x08E1),
|
||
(0x08D3, 0x08FF),
|
||
(0xFBB2, 0xFBC1),
|
||
(0xFE70, 0xFE70),
|
||
(0xFE72, 0xFE72),
|
||
(0xFE74, 0xFE74),
|
||
(0xFE76, 0xFE76),
|
||
(0xFE78, 0xFE78),
|
||
(0xFE7A, 0xFE7A),
|
||
(0xFE7C, 0xFE7C),
|
||
(0xFE7E, 0xFE7E),
|
||
],
|
||
"blues": [
|
||
("ا إ ل ك ط ظ", "TOP"),
|
||
("ت ث ط ظ ك", "0"),
|
||
("ـ", "NEUTRAL"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Armenian",
|
||
"tag": "ARMN",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "ս Ս", # ս Ս
|
||
"base_ranges": [
|
||
(0x0530, 0x058F), # Armenian
|
||
(0xFB13, 0xFB17), # Alphab. Present. Forms (Armenian)
|
||
],
|
||
"non_base_ranges": [
|
||
(0x0559, 0x055F),
|
||
],
|
||
"blues": [
|
||
("Ա Մ Ւ Ս Բ Գ Դ Օ", "TOP"),
|
||
("Ւ Ո Դ Ճ Շ Ս Տ Օ", "0"),
|
||
("ե է ի մ վ ֆ ճ", "TOP"),
|
||
("ա յ ւ ս գ շ ր օ", "TOP | X_HEIGHT"),
|
||
("հ ո ճ ա ե ծ ս օ", "0"),
|
||
("բ ը ի լ ղ պ փ ց", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Avestan",
|
||
"tag": "AVST",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "𐬚", # 𐬚
|
||
"base_ranges": [
|
||
(0x10B00, 0x10B3F), # Avestan
|
||
],
|
||
"non_base_ranges": [
|
||
(0x10B39, 0x10B3F),
|
||
],
|
||
"blues": [
|
||
("𐬀 𐬁 𐬐 𐬛", "TOP"),
|
||
("𐬀 𐬁", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Bamum",
|
||
"tag": "BAMU",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "ꛁ ꛯ", # ꛁ ꛯ
|
||
"base_ranges": [
|
||
(0xA6A0, 0xA6FF), # Bamum
|
||
# This is commented out in FreeType
|
||
# (0x16800, 0x16A3F), # Bamum Supplement
|
||
],
|
||
"non_base_ranges": [
|
||
(0xA6F0, 0xA6F1),
|
||
],
|
||
"blues": [
|
||
("ꚧ ꚨ ꛛ ꛉ ꛁ ꛈ ꛫ ꛯ", "TOP"),
|
||
("ꚭ ꚳ ꚶ ꛬ ꚢ ꚽ ꛯ ꛲", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Bengali",
|
||
"tag": "BENG",
|
||
"hint_top_to_bottom": True,
|
||
"std_chars": "০ ৪", # ০ ৪
|
||
"base_ranges": [
|
||
(0x0980, 0x09FF), # Bengali
|
||
],
|
||
"non_base_ranges": [
|
||
(0x0981, 0x0981),
|
||
(0x09BC, 0x09BC),
|
||
(0x09C1, 0x09C4),
|
||
(0x09CD, 0x09CD),
|
||
(0x09E2, 0x09E3),
|
||
(0x09FE, 0x09FE),
|
||
],
|
||
"blues": [
|
||
("ই ট ঠ ি ী ৈ ৗ", "TOP"),
|
||
("ও এ ড ত ন ব ল ক", "TOP"),
|
||
("অ ড ত ন ব ভ ল ক", "TOP | NEUTRAL | X_HEIGHT"),
|
||
("অ ড ত ন ব ভ ল ক", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Buhid",
|
||
"tag": "BUHD",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "ᝋ ᝏ", # ᝋ ᝏ
|
||
"base_ranges": [
|
||
(0x1740, 0x175F), # Buhid
|
||
],
|
||
"non_base_ranges": [
|
||
(0x1752, 0x1753),
|
||
],
|
||
"blues": [
|
||
("ᝐ ᝈ", "TOP"),
|
||
("ᝅ ᝊ ᝎ", "TOP"),
|
||
("ᝂ ᝃ ᝉ ᝌ", "TOP | X_HEIGHT"),
|
||
("ᝀ ᝃ ᝆ ᝉ ᝋ ᝏ ᝑ", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Chakma",
|
||
"tag": "CAKM",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "𑄤 𑄉 𑄛", # 𑄤 𑄉 𑄛
|
||
"base_ranges": [
|
||
(0x11100, 0x1114F), # Chakma
|
||
],
|
||
"non_base_ranges": [
|
||
(0x11100, 0x11102),
|
||
(0x11127, 0x11134),
|
||
(0x11146, 0x11146),
|
||
],
|
||
"blues": [
|
||
("𑄃 𑄅 𑄉 𑄙 𑄗", "TOP"),
|
||
("𑄅 𑄛 𑄝 𑄗 𑄓", "0"),
|
||
("𑄖𑄳𑄢 𑄘𑄳𑄢 𑄙𑄳𑄢 𑄤𑄳𑄢 𑄥𑄳𑄢", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Canadian Syllabics",
|
||
"tag": "CANS",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "ᑌ ᓚ", # ᑌ ᓚ
|
||
"base_ranges": [
|
||
(0x1400, 0x167F), # Unified Canadian Aboriginal Syllabics
|
||
(0x18B0, 0x18FF), # Unified Canadian Aboriginal Syllabics Extended
|
||
],
|
||
"non_base_ranges": [
|
||
],
|
||
"blues": [
|
||
("ᗜ ᖴ ᐁ ᒣ ᑫ ᑎ ᔑ ᗰ", "TOP"),
|
||
("ᗶ ᖵ ᒧ ᐃ ᑌ ᒍ ᔑ ᗢ", "0"),
|
||
("ᓓ ᓕ ᓀ ᓂ ᓄ ᕄ ᕆ ᘣ", "TOP | X_HEIGHT"),
|
||
("ᕃ ᓂ ᓀ ᕂ ᓗ ᓚ ᕆ ᘣ", "0"),
|
||
("ᐪ ᙆ ᣘ ᐢ ᒾ ᣗ ᔆ", "TOP"),
|
||
("ᙆ ᗮ ᒻ ᐞ ᔆ ᒡ ᒢ ᓑ", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Carian",
|
||
"tag": "CARI",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "𐊫 𐋉", # 𐊫 𐋉
|
||
"base_ranges": [
|
||
(0x102A0, 0x102DF), # Carian
|
||
],
|
||
"non_base_ranges": [
|
||
],
|
||
"blues": [
|
||
("𐊧 𐊫 𐊬 𐊭 𐊱 𐊺 𐊼 𐊿", "TOP"),
|
||
("𐊣 𐊧 𐊷 𐋀 𐊫 𐊸 𐋉", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Cherokee",
|
||
"tag": "CHER",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "Ꭴ Ꮕ ꮕ", # Ꭴ Ꮕ ꮕ
|
||
"base_ranges": [
|
||
(0x13A0, 0x13FF), # Cherokee
|
||
(0xAB70, 0xABBF), # Cherokee Supplement
|
||
],
|
||
"non_base_ranges": [
|
||
],
|
||
"blues": [
|
||
("Ꮖ Ꮋ Ꭼ Ꮓ Ꭴ Ꮳ Ꭶ Ꮥ", "TOP"),
|
||
("Ꮖ Ꮋ Ꭼ Ꮓ Ꭴ Ꮳ Ꭶ Ꮥ", "0"),
|
||
("ꮒ ꮤ ꮶ ꭴ ꭾ ꮗ ꮝ ꮿ", "TOP"),
|
||
("ꮖ ꭼ ꮓ ꮠ ꮳ ꭶ ꮥ ꮻ", "TOP | X_HEIGHT"),
|
||
("ꮖ ꭼ ꮓ ꮠ ꮳ ꭶ ꮥ ꮻ", "0"),
|
||
("ᏸ ꮐ ꭹ ꭻ", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Coptic",
|
||
"tag": "COPT",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "Ⲟ ⲟ", # Ⲟ ⲟ
|
||
"base_ranges": [
|
||
(0x2C80, 0x2CFF), # Coptic
|
||
],
|
||
"non_base_ranges": [
|
||
(0x2CEF, 0x2CF1),
|
||
],
|
||
"blues": [
|
||
("Ⲍ Ⲏ Ⲡ Ⳟ Ⲟ Ⲑ Ⲥ Ⳋ", "TOP"),
|
||
("Ⳑ Ⳙ Ⳟ Ⲏ Ⲟ Ⲑ Ⳝ Ⲱ", "0"),
|
||
("ⲍ ⲏ ⲡ ⳟ ⲟ ⲑ ⲥ ⳋ", "TOP | X_HEIGHT"),
|
||
("ⳑ ⳙ ⳟ ⲏ ⲟ ⲑ ⳝ Ⳓ", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Cypriot",
|
||
"tag": "CPRT",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "𐠅 𐠣", # 𐠅 𐠣
|
||
"base_ranges": [
|
||
(0x10800, 0x1083F), # Cypriot
|
||
],
|
||
"non_base_ranges": [
|
||
],
|
||
"blues": [
|
||
("𐠍 𐠙 𐠳 𐠱 𐠅 𐠓 𐠣 𐠦", "TOP"),
|
||
("𐠃 𐠊 𐠛 𐠣 𐠳 𐠵 𐠐", "0"),
|
||
("𐠈 𐠏 𐠖", "TOP"),
|
||
("𐠈 𐠏 𐠖", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Cyrillic",
|
||
"tag": "CYRL",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "о О", # о О
|
||
"base_ranges": [
|
||
(0x0400, 0x04FF), # Cyrillic
|
||
(0x0500, 0x052F), # Cyrillic Supplement
|
||
(0x2DE0, 0x2DFF), # Cyrillic Extended-A
|
||
(0xA640, 0xA69F), # Cyrillic Extended-B
|
||
(0x1C80, 0x1C8F), # Cyrillic Extended-C
|
||
],
|
||
"non_base_ranges": [
|
||
(0x0483, 0x0489),
|
||
(0x2DE0, 0x2DFF),
|
||
(0xA66F, 0xA67F),
|
||
(0xA69E, 0xA69F),
|
||
],
|
||
"blues": [
|
||
("Б В Е П З О С Э", "TOP"),
|
||
("Б В Е Ш З О С Э", "0"),
|
||
("х п н ш е з о с", "TOP | X_HEIGHT"),
|
||
("х п н ш е з о с", "0"),
|
||
("р у ф", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Devanagari",
|
||
"tag": "DEVA",
|
||
"hint_top_to_bottom": True,
|
||
"std_chars": "ठ व ट", # ठ व ट
|
||
"base_ranges": [
|
||
(0x0900, 0x093B), # Devanagari
|
||
(0x093D, 0x0950), # ... continued
|
||
(0x0953, 0x0963), # ... continued
|
||
(0x0966, 0x097F), # ... continued
|
||
(0x20B9, 0x20B9), # (new) Rupee sign
|
||
(0xA8E0, 0xA8FF), # Devanagari Extended
|
||
],
|
||
"non_base_ranges": [
|
||
(0x0900, 0x0902),
|
||
(0x093A, 0x093A),
|
||
(0x0941, 0x0948),
|
||
(0x094D, 0x094D),
|
||
(0x0953, 0x0957),
|
||
(0x0962, 0x0963),
|
||
(0xA8E0, 0xA8F1),
|
||
(0xA8FF, 0xA8FF),
|
||
],
|
||
"blues": [
|
||
("ई ऐ ओ औ ि ी ो ौ", "TOP"),
|
||
("क म अ आ थ ध भ श", "TOP"),
|
||
("क न म उ छ ट ठ ड", "TOP | NEUTRAL | X_HEIGHT"),
|
||
("क न म उ छ ट ठ ड", "0"),
|
||
("ु ृ", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Deseret",
|
||
"tag": "DSRT",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "𐐄 𐐬", # 𐐄 𐐬
|
||
"base_ranges": [
|
||
(0x10400, 0x1044F), # Deseret
|
||
],
|
||
"non_base_ranges": [
|
||
],
|
||
"blues": [
|
||
("𐐂 𐐄 𐐋 𐐗 𐐑", "TOP"),
|
||
("𐐀 𐐂 𐐄 𐐗 𐐛", "0"),
|
||
("𐐪 𐐬 𐐳 𐐿 𐐹", "TOP | X_HEIGHT"),
|
||
("𐐨 𐐪 𐐬 𐐿 𐑃", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Ethiopic",
|
||
"tag": "ETHI",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "ዐ", # ዐ
|
||
"base_ranges": [
|
||
(0x1200, 0x137F), # Ethiopic
|
||
(0x1380, 0x139F), # Ethiopic Supplement
|
||
(0x2D80, 0x2DDF), # Ethiopic Extended
|
||
(0xAB00, 0xAB2F), # Ethiopic Extended-A
|
||
],
|
||
"non_base_ranges": [
|
||
(0x135D, 0x135F),
|
||
],
|
||
"blues": [
|
||
("ሀ ሃ ዘ ፐ ማ በ ዋ ዐ", "TOP"),
|
||
("ለ ሐ በ ዘ ሀ ሪ ዐ ጨ", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Georgian (Mkhedruli)",
|
||
"tag": "GEOR",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "ი ე ა Ჿ", # ი ე ა Ი
|
||
"base_ranges": [
|
||
(0x10D0, 0x10FF), # Georgian (Mkhedruli)
|
||
(0x1C90, 0x1CBF), # Georgian Extended (Mtavruli)
|
||
],
|
||
"non_base_ranges": [
|
||
],
|
||
"blues": [
|
||
("გ დ ე ვ თ ი ო ღ", "TOP | X_HEIGHT"),
|
||
("ა ზ მ ს შ ძ ხ პ", "0"),
|
||
("ს ხ ქ ზ მ შ ჩ წ", "TOP"),
|
||
("ე ვ ჟ ტ უ ფ ქ ყ", "0"),
|
||
("Ნ Ჟ Ჳ Ჸ Გ Ე Ო Ჴ", "TOP"),
|
||
("Ი Ჲ Ო Ჩ Მ Შ Ჯ Ჽ", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Georgian (Khutsuri)",
|
||
"tag": "GEOK",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "Ⴖ Ⴑ ⴙ", # Ⴖ Ⴑ ⴙ
|
||
"base_ranges": [
|
||
(0x10A0, 0x10CD), # Georgian (Asomtavruli)
|
||
(0x2D00, 0x2D2D), # Georgian Supplement (Nuskhuri)
|
||
],
|
||
"non_base_ranges": [
|
||
],
|
||
"blues": [
|
||
("Ⴑ Ⴇ Ⴙ Ⴜ Ⴄ Ⴅ Ⴓ Ⴚ", "TOP"),
|
||
("Ⴄ Ⴅ Ⴇ Ⴈ Ⴆ Ⴑ Ⴊ Ⴋ", "0"),
|
||
("ⴁ ⴗ ⴂ ⴄ ⴅ ⴇ ⴔ ⴖ", "TOP | X_HEIGHT"),
|
||
("ⴈ ⴌ ⴖ ⴎ ⴃ ⴆ ⴋ ⴢ", "0"),
|
||
("ⴐ ⴑ ⴓ ⴕ ⴙ ⴛ ⴡ ⴣ", "TOP"),
|
||
("ⴄ ⴅ ⴔ ⴕ ⴁ ⴂ ⴘ ⴝ", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Glagolitic",
|
||
"tag": "GLAG",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "Ⱅ ⱅ", # Ⱅ ⱅ
|
||
"base_ranges": [
|
||
(0x2C00, 0x2C5F), # Glagolitic
|
||
(0x1E000, 0x1E02F), # Glagolitic Supplement
|
||
],
|
||
"non_base_ranges": [
|
||
(0x1E000, 0x1E02F),
|
||
],
|
||
"blues": [
|
||
("Ⰵ Ⱄ Ⱚ Ⰴ Ⰲ Ⰺ Ⱛ Ⰻ", "TOP"),
|
||
("Ⰵ Ⰴ Ⰲ Ⱚ Ⱎ Ⱑ Ⰺ Ⱄ", "0"),
|
||
("ⰵ ⱄ ⱚ ⰴ ⰲ ⰺ ⱛ ⰻ", "TOP | X_HEIGHT"),
|
||
("ⰵ ⰴ ⰲ ⱚ ⱎ ⱑ ⰺ ⱄ", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Gothic",
|
||
"tag": "GOTH",
|
||
"hint_top_to_bottom": True,
|
||
"std_chars": "𐌴 𐌾 𐍃", # 𐌴 𐌾 𐍃
|
||
"base_ranges": [
|
||
(0x10330, 0x1034F), # Gothic
|
||
],
|
||
"non_base_ranges": [
|
||
],
|
||
"blues": [
|
||
("𐌲 𐌶 𐍀 𐍄 𐌴 𐍃 𐍈 𐌾", "TOP"),
|
||
("𐌶 𐌴 𐍃 𐍈", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Greek",
|
||
"tag": "GREK",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "ο Ο", # ο Ο
|
||
"base_ranges": [
|
||
(0x0370, 0x03FF), # Greek and Coptic
|
||
(0x1F00, 0x1FFF), # Greek Extended
|
||
],
|
||
"non_base_ranges": [
|
||
(0x037A, 0x037A),
|
||
(0x0384, 0x0385),
|
||
(0x1FBD, 0x1FC1),
|
||
(0x1FCD, 0x1FCF),
|
||
(0x1FDD, 0x1FDF),
|
||
(0x1FED, 0x1FEF),
|
||
(0x1FFD, 0x1FFE),
|
||
],
|
||
"blues": [
|
||
("Γ Β Ε Ζ Θ Ο Ω", "TOP"),
|
||
("Β Δ Ζ Ξ Θ Ο", "0"),
|
||
("β θ δ ζ λ ξ", "TOP"),
|
||
("α ε ι ο π σ τ ω", "TOP | X_HEIGHT"),
|
||
("α ε ι ο π σ τ ω", "0"),
|
||
("β γ η μ ρ φ χ ψ", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Gujarati",
|
||
"tag": "GUJR",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "ટ ૦", # ટ ૦
|
||
"base_ranges": [
|
||
(0x0A80, 0x0AFF), # Gujarati
|
||
],
|
||
"non_base_ranges": [
|
||
(0x0A81, 0x0A82),
|
||
(0x0ABC, 0x0ABC),
|
||
(0x0AC1, 0x0AC8),
|
||
(0x0ACD, 0x0ACD),
|
||
(0x0AE2, 0x0AE3),
|
||
(0x0AFA, 0x0AFF),
|
||
],
|
||
"blues": [
|
||
("ત ન ઋ ઌ છ ટ ર ૦", "TOP | X_HEIGHT"),
|
||
("ખ ગ ઘ ઞ ઇ ઈ ઠ જ", "0"),
|
||
("ઈ ઊ િ ી લી શ્ચિ જિ સી", "TOP"),
|
||
("ુ ૃ ૄ ખુ છૃ છૄ", "0"),
|
||
("૦ ૧ ૨ ૩ ૭", "TOP"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Gurmukhi",
|
||
"tag": "GURU",
|
||
"hint_top_to_bottom": True,
|
||
"std_chars": "ਠ ਰ ੦", # ਠ ਰ ੦
|
||
"base_ranges": [
|
||
(0x0A00, 0x0A7F), # Gurmukhi
|
||
],
|
||
"non_base_ranges": [
|
||
(0x0A01, 0x0A02),
|
||
(0x0A3C, 0x0A3C),
|
||
(0x0A41, 0x0A51),
|
||
(0x0A70, 0x0A71),
|
||
(0x0A75, 0x0A75),
|
||
],
|
||
"blues": [
|
||
("ਇ ਈ ਉ ਏ ਓ ੳ ਿ ੀ", "TOP"),
|
||
("ਕ ਗ ਙ ਚ ਜ ਤ ਧ ਸ", "TOP"),
|
||
("ਕ ਗ ਙ ਚ ਜ ਤ ਧ ਸ", "TOP | NEUTRAL | X_HEIGHT"),
|
||
("ਅ ਏ ਓ ਗ ਜ ਠ ਰ ਸ", "0"),
|
||
("੦ ੧ ੨ ੩ ੭", "TOP"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Hebrew",
|
||
"tag": "HEBR",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "ם", # ם
|
||
"base_ranges": [
|
||
(0x0590, 0x05FF), # Hebrew
|
||
(0xFB1D, 0xFB4F), # Alphab. Present. Forms (Hebrew)
|
||
],
|
||
"non_base_ranges": [
|
||
(0x0591, 0x05BF),
|
||
(0x05C1, 0x05C2),
|
||
(0x05C4, 0x05C5),
|
||
(0x05C7, 0x05C7),
|
||
(0xFB1E, 0xFB1E),
|
||
],
|
||
"blues": [
|
||
("ב ד ה ח ך כ ם ס", "TOP | LONG"),
|
||
("ב ט כ ם ס צ", "0"),
|
||
("ק ך ן ף ץ", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Kayah Li",
|
||
"tag": "KALI",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "ꤍ ꤀", # ꤍ ꤀
|
||
"base_ranges": [
|
||
(0xA900, 0xA92F), # Kayah Li
|
||
],
|
||
"non_base_ranges": [
|
||
(0xA926, 0xA92D),
|
||
],
|
||
"blues": [
|
||
("꤅ ꤏ ꤁ ꤋ ꤀ ꤍ", "TOP | X_HEIGHT"),
|
||
("꤈ ꤘ ꤀ ꤍ ꤢ", "0"),
|
||
("ꤖ ꤡ", "TOP"),
|
||
("ꤑ ꤜ ꤞ", "0"),
|
||
("ꤑ꤬ ꤜ꤭ ꤔ꤬", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Khmer",
|
||
"tag": "KHMR",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "០", # ០
|
||
"base_ranges": [
|
||
(0x1780, 0x17FF), # Khmer
|
||
],
|
||
"non_base_ranges": [
|
||
(0x17B7, 0x17BD),
|
||
(0x17C6, 0x17C6),
|
||
(0x17C9, 0x17D3),
|
||
(0x17DD, 0x17DD),
|
||
],
|
||
"blues": [
|
||
("ខ ទ ន ឧ ឩ ា", "TOP | X_HEIGHT"),
|
||
("ក្ក ក្ខ ក្គ ក្ថ", "SUB_TOP"),
|
||
("ខ ឃ ច ឋ ប ម យ ឲ", "0"),
|
||
("ត្រ រៀ ឲ្យ អឿ", "0"),
|
||
("ន្ត្រៃ ង្ខ្យ ក្បៀ ច្រៀ ន្តឿ ល្បឿ", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Khmer Symbols",
|
||
"tag": "KHMS",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "᧡ ᧪", # ᧡ ᧪
|
||
"base_ranges": [
|
||
(0x19E0, 0x19FF), # Khmer Symbols
|
||
],
|
||
"non_base_ranges": [
|
||
],
|
||
"blues": [
|
||
("᧠ ᧡", "TOP | X_HEIGHT"),
|
||
("᧶ ᧹", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Kannada",
|
||
"tag": "KNDA",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "೦ ಬ", # ೦ ಬ
|
||
"base_ranges": [
|
||
(0x0C80, 0x0CFF), # Kannada
|
||
],
|
||
"non_base_ranges": [
|
||
(0x0C81, 0x0C81),
|
||
(0x0CBC, 0x0CBC),
|
||
(0x0CBF, 0x0CBF),
|
||
(0x0CC6, 0x0CC6),
|
||
(0x0CCC, 0x0CCD),
|
||
(0x0CE2, 0x0CE3),
|
||
],
|
||
"blues": [
|
||
("ಇ ಊ ಐ ಣ ಸಾ ನಾ ದಾ ರಾ", "TOP"),
|
||
("ಅ ಉ ಎ ಲ ೦ ೨ ೬ ೭", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Lao",
|
||
"tag": "LAOO",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "໐", # ໐
|
||
"base_ranges": [
|
||
(0x0E80, 0x0EFF), # Lao
|
||
],
|
||
"non_base_ranges": [
|
||
(0x0EB1, 0x0EB1),
|
||
(0x0EB4, 0x0EBC),
|
||
(0x0EC8, 0x0ECD),
|
||
],
|
||
"blues": [
|
||
("າ ດ ອ ມ ລ ວ ຣ ງ", "TOP | X_HEIGHT"),
|
||
("າ ອ ບ ຍ ຣ ຮ ວ ຢ", "0"),
|
||
("ປ ຢ ຟ ຝ", "TOP"),
|
||
("ໂ ໄ ໃ", "TOP"),
|
||
("ງ ຊ ຖ ຽ ໆ ຯ", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Latin",
|
||
"tag": "LATN",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "o O 0",
|
||
"base_ranges": [
|
||
(0x0020, 0x007F), # Basic Latin (no control chars)
|
||
(0x00A0, 0x00A9), # Latin-1 Supplement (no control chars)
|
||
(0x00AB, 0x00B1), # ... continued
|
||
(0x00B4, 0x00B8), # ... continued
|
||
(0x00BB, 0x00FF), # ... continued
|
||
(0x0100, 0x017F), # Latin Extended-A
|
||
(0x0180, 0x024F), # Latin Extended-B
|
||
(0x0250, 0x02AF), # IPA Extensions
|
||
(0x02B9, 0x02DF), # Spacing Modifier Letters
|
||
(0x02E5, 0x02FF), # ... continued
|
||
(0x0300, 0x036F), # Combining Diacritical Marks
|
||
(0x1AB0, 0x1ABE), # Combining Diacritical Marks Extended
|
||
(0x1D00, 0x1D2B), # Phonetic Extensions
|
||
(0x1D6B, 0x1D77), # ... continued
|
||
(0x1D79, 0x1D7F), # ... continued
|
||
(0x1D80, 0x1D9A), # Phonetic Extensions Supplement
|
||
(0x1DC0, 0x1DFF), # Combining Diacritical Marks Supplement
|
||
(0x1E00, 0x1EFF), # Latin Extended Additional
|
||
(0x2000, 0x206F), # General Punctuation
|
||
(0x20A0, 0x20B8), # Currency Symbols ...
|
||
(0x20BA, 0x20CF), # ... except new Rupee sign
|
||
(0x2150, 0x218F), # Number Forms
|
||
(0x2C60, 0x2C7B), # Latin Extended-C
|
||
(0x2C7E, 0x2C7F), # ... continued
|
||
(0x2E00, 0x2E7F), # Supplemental Punctuation
|
||
(0xA720, 0xA76F), # Latin Extended-D
|
||
(0xA771, 0xA7F7), # ... continued
|
||
(0xA7FA, 0xA7FF), # ... continued
|
||
(0xAB30, 0xAB5B), # Latin Extended-E
|
||
(0xAB60, 0xAB6F), # ... continued
|
||
(0xFB00, 0xFB06), # Alphab. Present. Forms (Latin Ligs)
|
||
(0x1D400, 0x1D7FF), # Mathematical Alphanumeric Symbols
|
||
],
|
||
"non_base_ranges": [
|
||
(0x005E, 0x0060),
|
||
(0x007E, 0x007E),
|
||
(0x00A8, 0x00A9),
|
||
(0x00AE, 0x00B0),
|
||
(0x00B4, 0x00B4),
|
||
(0x00B8, 0x00B8),
|
||
(0x00BC, 0x00BE),
|
||
(0x02B9, 0x02DF),
|
||
(0x02E5, 0x02FF),
|
||
(0x0300, 0x036F),
|
||
(0x1AB0, 0x1ABE),
|
||
(0x1DC0, 0x1DFF),
|
||
(0x2017, 0x2017),
|
||
(0x203E, 0x203E),
|
||
(0xA788, 0xA788),
|
||
(0xA7F8, 0xA7FA),
|
||
],
|
||
"blues": [
|
||
("T H E Z O C Q S", "TOP"),
|
||
("H E Z L O C U S", "0"),
|
||
("f i j k d b h", "TOP"),
|
||
("u v x z o e s c", "TOP | X_HEIGHT"),
|
||
("n r x z o e s c", "0"),
|
||
("p q g j y", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Latin Subscript Fallback",
|
||
"tag": "LATB",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "ₒ ₀", # ₒ ₀
|
||
"base_ranges": [
|
||
(0x1D62, 0x1D6A), # some small subscript letters
|
||
(0x2080, 0x209C), # subscript digits and letters
|
||
(0x2C7C, 0x2C7C), # latin subscript small letter j
|
||
],
|
||
"non_base_ranges": [
|
||
],
|
||
"blues": [
|
||
("₀ ₃ ₅ ₇ ₈", "TOP"),
|
||
("₀ ₁ ₂ ₃ ₈", "0"),
|
||
("ᵢ ⱼ ₕ ₖ ₗ", "TOP"),
|
||
("ₐ ₑ ₒ ₓ ₙ ₛ ᵥ ᵤ ᵣ", "TOP | X_HEIGHT"),
|
||
("ₐ ₑ ₒ ₓ ₙ ₛ ᵥ ᵤ ᵣ", "0"),
|
||
("ᵦ ᵧ ᵨ ᵩ ₚ", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Latin Superscript Fallback",
|
||
"tag": "LATP",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "ᵒ ᴼ ⁰", # ᵒ ᴼ ⁰
|
||
"base_ranges": [
|
||
(0x00AA, 0x00AA), # feminine ordinal indicator
|
||
(0x00B2, 0x00B3), # superscript two and three
|
||
(0x00B9, 0x00BA), # superscript one, masc. ord. indic.
|
||
(0x02B0, 0x02B8), # some latin superscript mod. letters
|
||
(0x02E0, 0x02E4), # some IPA modifier letters
|
||
(0x1D2C, 0x1D61), # latin superscript modifier letters
|
||
(0x1D78, 0x1D78), # modifier letter cyrillic en
|
||
(0x1D9B, 0x1DBF), # more modifier letters
|
||
(0x2070, 0x207F), # superscript digits and letters
|
||
(0x2C7D, 0x2C7D), # modifier letter capital v
|
||
(0xA770, 0xA770), # modifier letter us
|
||
(0xA7F8, 0xA7F9), # more modifier letters
|
||
(0xAB5C, 0xAB5F), # more modifier letters
|
||
],
|
||
"non_base_ranges": [
|
||
],
|
||
"blues": [
|
||
("⁰ ³ ⁵ ⁷ ᵀ ᴴ ᴱ ᴼ", "TOP"),
|
||
("⁰ ¹ ² ³ ᴱ ᴸ ᴼ ᵁ", "0"),
|
||
("ᵇ ᵈ ᵏ ʰ ʲ ᶠ ⁱ", "TOP"),
|
||
("ᵉ ᵒ ʳ ˢ ˣ ᶜ ᶻ", "TOP | X_HEIGHT"),
|
||
("ᵉ ᵒ ʳ ˢ ˣ ᶜ ᶻ", "0"),
|
||
("ᵖ ʸ ᵍ", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Lisu",
|
||
"tag": "LISU",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "ꓳ", # ꓳ
|
||
"base_ranges": [
|
||
(0xA4D0, 0xA4FF), # Lisu
|
||
],
|
||
"non_base_ranges": [
|
||
],
|
||
"blues": [
|
||
("ꓡ ꓧ ꓱ ꓶ ꓩ ꓚ ꓵ ꓳ", "TOP"),
|
||
("ꓕ ꓜ ꓞ ꓡ ꓛ ꓢ ꓳ ꓴ", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Malayalam",
|
||
"tag": "MLYM",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "ഠ റ", # ഠ റ
|
||
"base_ranges": [
|
||
(0x0D00, 0x0D7F), # Malayalam
|
||
],
|
||
"non_base_ranges": [
|
||
(0x0D00, 0x0D01),
|
||
(0x0D3B, 0x0D3C),
|
||
(0x0D4D, 0x0D4E),
|
||
(0x0D62, 0x0D63),
|
||
],
|
||
"blues": [
|
||
("ഒ ട ഠ റ ച പ ച്ച പ്പ", "TOP"),
|
||
("ട ഠ ധ ശ ഘ ച ഥ ല", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Medefaidrin",
|
||
"tag": "MEDF",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "𖹡 𖹛 𖹯", # 𖹡 𖹛 𖹯
|
||
"base_ranges": [
|
||
(0x16E40, 0x16E9F), # Medefaidrin
|
||
],
|
||
"non_base_ranges": [
|
||
],
|
||
"blues": [
|
||
("𖹀 𖹁 𖹂 𖹃 𖹏 𖹚 𖹟", "TOP"),
|
||
("𖹀 𖹁 𖹂 𖹃 𖹏 𖹚 𖹒 𖹓", "0"),
|
||
("𖹤 𖹬 𖹧 𖹴 𖹶 𖹾", "TOP"),
|
||
("𖹠 𖹡 𖹢 𖹹 𖹳 𖹮", "TOP | X_HEIGHT"),
|
||
("𖹠 𖹡 𖹢 𖹳 𖹭 𖹽", "0"),
|
||
("𖹥 𖹨 𖹩", "0"),
|
||
("𖺀 𖺅 𖺈 𖺄 𖺍", "TOP"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Mongolian",
|
||
"tag": "MONG",
|
||
"hint_top_to_bottom": True,
|
||
"std_chars": "ᡂ ᠪ", # ᡂ ᠪ
|
||
"base_ranges": [
|
||
(0x1800, 0x18AF), # Mongolian
|
||
(0x11660, 0x1167F), # Mongolian Supplement
|
||
],
|
||
"non_base_ranges": [
|
||
(0x1885, 0x1886),
|
||
(0x18A9, 0x18A9),
|
||
],
|
||
"blues": [
|
||
("ᠳ ᠴ ᠶ ᠽ ᡂ ᡊ ᡡ ᡳ", "TOP"),
|
||
("ᡃ", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Myanmar",
|
||
"tag": "MYMR",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "ဝ င ဂ", # ဝ င ဂ
|
||
"base_ranges": [
|
||
(0x1000, 0x109F), # Myanmar
|
||
(0xA9E0, 0xA9FF), # Myanmar Extended-B
|
||
(0xAA60, 0xAA7F), # Myanmar Extended-A
|
||
],
|
||
"non_base_ranges": [
|
||
(0x102D, 0x1030),
|
||
(0x1032, 0x1037),
|
||
(0x103A, 0x103A),
|
||
(0x103D, 0x103E),
|
||
(0x1058, 0x1059),
|
||
(0x105E, 0x1060),
|
||
(0x1071, 0x1074),
|
||
(0x1082, 0x1082),
|
||
(0x1085, 0x1086),
|
||
(0x108D, 0x108D),
|
||
(0xA9E5, 0xA9E5),
|
||
(0xAA7C, 0xAA7C),
|
||
],
|
||
"blues": [
|
||
("ခ ဂ င ဒ ဝ ၥ ၊ ။", "TOP | X_HEIGHT"),
|
||
("င ဎ ဒ ပ ဗ ဝ ၊ ။", "0"),
|
||
("ဩ ြ ၍ ၏ ၆ ါ ိ", "TOP"),
|
||
("ဉ ည ဥ ဩ ဨ ၂ ၅ ၉", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "N'Ko",
|
||
"tag": "NKOO",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "ߋ ߀", # ߋ ߀
|
||
"base_ranges": [
|
||
(0x07C0, 0x07FF), # N'Ko
|
||
],
|
||
"non_base_ranges": [
|
||
(0x07EB, 0x07F5),
|
||
(0x07FD, 0x07FD),
|
||
],
|
||
"blues": [
|
||
("ߐ ߉ ߒ ߟ ߖ ߜ ߠ ߥ", "TOP"),
|
||
("߀ ߘ ߡ ߠ ߥ", "0"),
|
||
("ߏ ߛ ߋ", "TOP | X_HEIGHT"),
|
||
("ߎ ߏ ߛ ߋ", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "no script",
|
||
"tag": "NONE",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "",
|
||
"base_ranges": [
|
||
],
|
||
"non_base_ranges": [
|
||
],
|
||
"blues": [
|
||
],
|
||
},
|
||
{
|
||
"name": "Ol Chiki",
|
||
"tag": "OLCK",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "ᱛ", # ᱛ
|
||
"base_ranges": [
|
||
(0x1C50, 0x1C7F), # Ol Chiki
|
||
],
|
||
"non_base_ranges": [
|
||
],
|
||
"blues": [
|
||
("ᱛ ᱜ ᱝ ᱡ ᱢ ᱥ", "TOP"),
|
||
("ᱛ ᱜ ᱝ ᱡ ᱢ ᱥ", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Old Turkic",
|
||
"tag": "ORKH",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "𐰗", # 𐰗
|
||
"base_ranges": [
|
||
(0x10C00, 0x10C4F), # Old Turkic
|
||
],
|
||
"non_base_ranges": [
|
||
],
|
||
"blues": [
|
||
("𐰗 𐰘 𐰧", "TOP"),
|
||
("𐰉 𐰗 𐰦 𐰧", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Osage",
|
||
"tag": "OSGE",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "𐓂 𐓪", # 𐓂 𐓪
|
||
"base_ranges": [
|
||
(0x104B0, 0x104FF), # Osage
|
||
],
|
||
"non_base_ranges": [
|
||
],
|
||
"blues": [
|
||
("𐒾 𐓍 𐓒 𐓓 𐒻 𐓂 𐒵 𐓆", "TOP"),
|
||
("𐒰 𐓍 𐓂 𐒿 𐓎 𐒹", "0"),
|
||
("𐒼 𐒽 𐒾", "0"),
|
||
("𐓵 𐓶 𐓺 𐓻 𐓝 𐓣 𐓪 𐓮", "TOP | X_HEIGHT"),
|
||
("𐓘 𐓚 𐓣 𐓵 𐓡 𐓧 𐓪 𐓶", "0"),
|
||
("𐓤 𐓦 𐓸 𐓹 𐓛", "TOP"),
|
||
("𐓤 𐓥 𐓦", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Osmanya",
|
||
"tag": "OSMA",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "𐒆 𐒠", # 𐒆 𐒠
|
||
"base_ranges": [
|
||
(0x10480, 0x104AF), # Osmanya
|
||
],
|
||
"non_base_ranges": [
|
||
],
|
||
"blues": [
|
||
("𐒆 𐒉 𐒐 𐒒 𐒘 𐒛 𐒠 𐒣", "TOP"),
|
||
("𐒀 𐒂 𐒆 𐒈 𐒊 𐒒 𐒠 𐒩", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Hanifi Rohingya",
|
||
"tag": "ROHG",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "𐴰", # 𐴰
|
||
"base_ranges": [
|
||
(0x10D00, 0x10D3F), # Hanifi Rohingya
|
||
],
|
||
"non_base_ranges": [
|
||
],
|
||
"blues": [
|
||
("𐴃 𐴀 𐴆 𐴖 𐴕", "TOP"),
|
||
("𐴔 𐴖 𐴕 𐴑 𐴐", "0"),
|
||
("ـ", "NEUTRAL"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Saurashtra",
|
||
"tag": "SAUR",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "ꢝ ꣐", # ꢝ ꣐
|
||
"base_ranges": [
|
||
(0xA880, 0xA8DF), # Saurashtra
|
||
],
|
||
"non_base_ranges": [
|
||
(0xA880, 0xA881),
|
||
(0xA8B4, 0xA8C5),
|
||
],
|
||
"blues": [
|
||
("ꢜ ꢞ ꢳ ꢂ ꢖ ꢒ ꢝ ꢛ", "TOP"),
|
||
("ꢂ ꢨ ꢺ ꢤ ꢎ", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Shavian",
|
||
"tag": "SHAW",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "𐑴", # 𐑴
|
||
"base_ranges": [
|
||
(0x10450, 0x1047F), # Shavian
|
||
],
|
||
"non_base_ranges": [
|
||
],
|
||
"blues": [
|
||
("𐑕 𐑙", "TOP"),
|
||
("𐑔 𐑖 𐑗 𐑹 𐑻", "0"),
|
||
("𐑟 𐑣", "0"),
|
||
("𐑱 𐑲 𐑳 𐑴 𐑸 𐑺 𐑼", "TOP | X_HEIGHT"),
|
||
("𐑴 𐑻 𐑹", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Sinhala",
|
||
"tag": "SINH",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "ට", # ට
|
||
"base_ranges": [
|
||
(0x0D80, 0x0DFF), # Sinhala
|
||
],
|
||
"non_base_ranges": [
|
||
(0x0DCA, 0x0DCA),
|
||
(0x0DD2, 0x0DD6),
|
||
],
|
||
"blues": [
|
||
("ඉ ක ඝ ඳ ප ය ල ෆ", "TOP"),
|
||
("එ ඔ ඝ ජ ට ථ ධ ර", "0"),
|
||
("ද ඳ උ ල තූ තු බු දු", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Sundanese",
|
||
"tag": "SUND",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "᮰", # ᮰
|
||
"base_ranges": [
|
||
(0x1B80, 0x1BBF), # Sundanese
|
||
(0x1CC0, 0x1CCF), # Sundanese Supplement
|
||
],
|
||
"non_base_ranges": [
|
||
(0x1B80, 0x1B82),
|
||
(0x1BA1, 0x1BAD),
|
||
],
|
||
"blues": [
|
||
("ᮋ ᮞ ᮮ ᮽ ᮰ ᮈ", "TOP"),
|
||
("ᮄ ᮔ ᮕ ᮗ ᮰ ᮆ ᮈ ᮉ", "0"),
|
||
("ᮼ ᳄", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Tamil",
|
||
"tag": "TAML",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "௦", # ௦
|
||
"base_ranges": [
|
||
(0x0B80, 0x0BFF), # Tamil
|
||
],
|
||
"non_base_ranges": [
|
||
(0x0B82, 0x0B82),
|
||
(0x0BC0, 0x0BC2),
|
||
(0x0BCD, 0x0BCD),
|
||
],
|
||
"blues": [
|
||
("உ ஒ ஓ ற ஈ க ங ச", "TOP"),
|
||
("க ச ல ஶ உ ங ட ப", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Tai Viet",
|
||
"tag": "TAVT",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "ꪒ ꪫ", # ꪒ ꪫ
|
||
"base_ranges": [
|
||
(0xAA80, 0xAADF), # Tai Viet
|
||
],
|
||
"non_base_ranges": [
|
||
(0xAAB0, 0xAAB0),
|
||
(0xAAB2, 0xAAB4),
|
||
(0xAAB7, 0xAAB8),
|
||
(0xAABE, 0xAABF),
|
||
(0xAAC1, 0xAAC1),
|
||
],
|
||
"blues": [
|
||
("ꪆ ꪔ ꪒ ꪖ ꪫ", "TOP"),
|
||
("ꪉ ꪫ ꪮ", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Telugu",
|
||
"tag": "TELU",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "౦ ౧", # ౦ ౧
|
||
"base_ranges": [
|
||
(0x0C00, 0x0C7F), # Telugu
|
||
],
|
||
"non_base_ranges": [
|
||
(0x0C00, 0x0C00),
|
||
(0x0C04, 0x0C04),
|
||
(0x0C3E, 0x0C40),
|
||
(0x0C46, 0x0C56),
|
||
(0x0C62, 0x0C63),
|
||
],
|
||
"blues": [
|
||
("ఇ ఌ ఙ ఞ ణ ఱ ౯", "TOP"),
|
||
("అ క చ ర ఽ ౨ ౬", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Tifinagh",
|
||
"tag": "TFNG",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "ⵔ", # ⵔ
|
||
"base_ranges": [
|
||
(0x2D30, 0x2D7F), # Tifinagh
|
||
],
|
||
"non_base_ranges": [
|
||
],
|
||
"blues": [
|
||
("ⵔ ⵙ ⵛ ⵞ ⴵ ⴼ ⴹ ⵎ", "TOP"),
|
||
("ⵔ ⵙ ⵛ ⵞ ⴵ ⴼ ⴹ ⵎ", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Thai",
|
||
"tag": "THAI",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "า ๅ ๐", # า ๅ ๐
|
||
"base_ranges": [
|
||
(0x0E00, 0x0E7F), # Thai
|
||
],
|
||
"non_base_ranges": [
|
||
(0x0E31, 0x0E31),
|
||
(0x0E34, 0x0E3A),
|
||
(0x0E47, 0x0E4E),
|
||
],
|
||
"blues": [
|
||
("บ เ แ อ ก า", "TOP | X_HEIGHT"),
|
||
("บ ป ษ ฯ อ ย ฮ", "0"),
|
||
("ป ฝ ฟ", "TOP"),
|
||
("โ ใ ไ", "TOP"),
|
||
("ฎ ฏ ฤ ฦ", "0"),
|
||
("ญ ฐ", "0"),
|
||
("๐ ๑ ๓", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Vai",
|
||
"tag": "VAII",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "ꘓ ꖜ ꖴ", # ꘓ ꖜ ꖴ
|
||
"base_ranges": [
|
||
(0xA500, 0xA63F), # Vai
|
||
],
|
||
"non_base_ranges": [
|
||
],
|
||
"blues": [
|
||
("ꗍ ꘖ ꘙ ꘜ ꖜ ꖝ ꔅ ꕢ", "TOP"),
|
||
("ꗍ ꘖ ꘙ ꗞ ꔅ ꕢ ꖜ ꔆ", "0"),
|
||
],
|
||
},
|
||
{
|
||
"name": "Limbu",
|
||
"tag": "LIMB",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "o", # XXX
|
||
"base_ranges": [
|
||
(0x1900, 0x194F), # Limbu
|
||
],
|
||
"non_base_ranges": [
|
||
(0x1920, 0x1922),
|
||
(0x1927, 0x1934),
|
||
(0x1937, 0x193B),
|
||
],
|
||
"blues": [],
|
||
},
|
||
{
|
||
"name": "Oriya",
|
||
"tag": "ORYA",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "o", # XXX
|
||
"base_ranges": [
|
||
(0x0B00, 0x0B7F), # Oriya
|
||
],
|
||
"non_base_ranges": [
|
||
(0x0B01, 0x0B02),
|
||
(0x0B3C, 0x0B3C),
|
||
(0x0B3F, 0x0B3F),
|
||
(0x0B41, 0x0B44),
|
||
(0x0B4D, 0x0B56),
|
||
(0x0B62, 0x0B63),
|
||
],
|
||
"blues": [],
|
||
},
|
||
{
|
||
"name": "Syloti Nagri",
|
||
"tag": "SYLO",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "o", # XXX
|
||
"base_ranges": [
|
||
(0xA800, 0xA82F), # Syloti Nagri
|
||
],
|
||
"non_base_ranges": [
|
||
(0xA802, 0xA802),
|
||
(0xA806, 0xA806),
|
||
(0xA80B, 0xA80B),
|
||
(0xA825, 0xA826),
|
||
],
|
||
"blues": [],
|
||
},
|
||
{
|
||
"name": "Tibetan",
|
||
"tag": "TIBT",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "o", # XXX
|
||
"base_ranges": [
|
||
(0x0F00, 0x0FFF), # Tibetan
|
||
],
|
||
"non_base_ranges": [
|
||
(0x0F18, 0x0F19),
|
||
(0x0F35, 0x0F35),
|
||
(0x0F37, 0x0F37),
|
||
(0x0F39, 0x0F39),
|
||
(0x0F3E, 0x0F3F),
|
||
(0x0F71, 0x0F7E),
|
||
(0x0F80, 0x0F84),
|
||
(0x0F86, 0x0F87),
|
||
(0x0F8D, 0x0FBC),
|
||
],
|
||
"blues": [],
|
||
},
|
||
{
|
||
"name": "CJKV ideographs",
|
||
"tag": "HANI",
|
||
"hint_top_to_bottom": False,
|
||
"std_chars": "田 囗", # 田 囗
|
||
"base_ranges": [
|
||
(0x1100, 0x11FF), # Hangul Jamo
|
||
(0x2E80, 0x2EFF), # CJK Radicals Supplement
|
||
(0x2F00, 0x2FDF), # Kangxi Radicals
|
||
(0x2FF0, 0x2FFF), # Ideographic Description Characters
|
||
(0x3000, 0x303F), # CJK Symbols and Punctuation
|
||
(0x3040, 0x309F), # Hiragana
|
||
(0x30A0, 0x30FF), # Katakana
|
||
(0x3100, 0x312F), # Bopomofo
|
||
(0x3130, 0x318F), # Hangul Compatibility Jamo
|
||
(0x3190, 0x319F), # Kanbun
|
||
(0x31A0, 0x31BF), # Bopomofo Extended
|
||
(0x31C0, 0x31EF), # CJK Strokes
|
||
(0x31F0, 0x31FF), # Katakana Phonetic Extensions
|
||
(0x3300, 0x33FF), # CJK Compatibility
|
||
(0x3400, 0x4DBF), # CJK Unified Ideographs Extension A
|
||
(0x4DC0, 0x4DFF), # Yijing Hexagram Symbols
|
||
(0x4E00, 0x9FFF), # CJK Unified Ideographs
|
||
(0xA960, 0xA97F), # Hangul Jamo Extended-A
|
||
(0xAC00, 0xD7AF), # Hangul Syllables
|
||
(0xD7B0, 0xD7FF), # Hangul Jamo Extended-B
|
||
(0xF900, 0xFAFF), # CJK Compatibility Ideographs
|
||
(0xFE10, 0xFE1F), # Vertical forms
|
||
(0xFE30, 0xFE4F), # CJK Compatibility Forms
|
||
(0xFF00, 0xFFEF), # Halfwidth and Fullwidth Forms
|
||
(0x1B000, 0x1B0FF), # Kana Supplement
|
||
(0x1B100, 0x1B12F), # Kana Extended-A
|
||
(0x1D300, 0x1D35F), # Tai Xuan Hing Symbols
|
||
(0x20000, 0x2A6DF), # CJK Unified Ideographs Extension B
|
||
(0x2A700, 0x2B73F), # CJK Unified Ideographs Extension C
|
||
(0x2B740, 0x2B81F), # CJK Unified Ideographs Extension D
|
||
(0x2B820, 0x2CEAF), # CJK Unified Ideographs Extension E
|
||
(0x2CEB0, 0x2EBEF), # CJK Unified Ideographs Extension F
|
||
(0x2F800, 0x2FA1F), # CJK Compatibility Ideographs Supplement
|
||
],
|
||
"non_base_ranges": [
|
||
(0x302A, 0x302F),
|
||
(0x3190, 0x319F),
|
||
],
|
||
"blues": [
|
||
("他 们 你 來 們 到 和 地 对 對 就 席 我 时 時 會 来 為 能 舰 說 说 这 這 齊 | 军 同 已 愿 既 星 是 景 民 照 现 現 理 用 置 要 軍 那 配 里 開 雷 露 面 顾", "TOP"),
|
||
("个 为 人 他 以 们 你 來 個 們 到 和 大 对 對 就 我 时 時 有 来 為 要 說 说 | 主 些 因 它 想 意 理 生 當 看 着 置 者 自 著 裡 过 还 进 進 過 道 還 里 面", "0"),
|
||
(" 些 们 你 來 們 到 和 地 她 将 將 就 年 得 情 最 样 樣 理 能 說 说 这 這 通 | 即 吗 吧 听 呢 品 响 嗎 师 師 收 断 斷 明 眼 間 间 际 陈 限 除 陳 随 際 隨", "HORIZONTAL"),
|
||
("事 前 學 将 將 情 想 或 政 斯 新 样 樣 民 沒 没 然 特 现 現 球 第 經 谁 起 | 例 別 别 制 动 動 吗 嗎 增 指 明 朝 期 构 物 确 种 調 调 費 费 那 都 間 间", "HORIZONTAL | RIGHT"),
|
||
],
|
||
},
|
||
]
|
||
|
||
CJK_GROUP = ["HANI"]
|
||
INDIC_GROUP = ["LIMB", "ORYA", "SYLO", "TIBT"]
|
||
|
||
def generate() -> str:
|
||
buf = ""
|
||
buf += "// THIS FILE IS AUTOGENERATED.\n"
|
||
buf += "// Any changes to this file will be overwritten.\n"
|
||
buf += "// Use ../scripts/gen_autohint_scripts.py to regenerate.\n\n"
|
||
|
||
char_map = {}
|
||
|
||
buf += "#[rustfmt::skip]\n"
|
||
buf += "pub(super) const SCRIPT_CLASSES: &[ScriptClass] = &[\n"
|
||
# some scripts generate multiple styles so keep track of the style index
|
||
style_index = 0
|
||
for i, script in enumerate(SCRIPT_CLASSES):
|
||
std_chars = script["std_chars"]
|
||
blues = script["blues"]
|
||
tag = script["tag"]
|
||
group = "Default"
|
||
if tag in CJK_GROUP:
|
||
group = "Cjk"
|
||
elif tag in INDIC_GROUP:
|
||
group = "Indic"
|
||
unicode_tag = tag.lower().capitalize()
|
||
has_features = tag in SCRIPTS_WITH_FEATURES
|
||
buf += " ScriptClass {\n"
|
||
buf += " name: \"{}\",\n".format(script["name"])
|
||
buf += " group: ScriptGroup::{},\n".format(group)
|
||
buf += " tag: Tag::new(b\"{}\"),\n".format(unicode_tag)
|
||
buf += " hint_top_to_bottom: {},\n".format(str(script["hint_top_to_bottom"]).lower())
|
||
# standard characters
|
||
buf += " std_chars: \"{}\",\n".format(script["std_chars"])
|
||
# blue characters
|
||
buf += " blues: &["
|
||
if len(blues) != 0:
|
||
buf += "\n";
|
||
for blue in blues:
|
||
blue_zones = "BlueZones::NONE"
|
||
if blue[1] != "0":
|
||
zones = list("BlueZones::" + zone for zone in blue[1].split(" | "))
|
||
blue_zones = zones[0];
|
||
for flag in zones[1:]:
|
||
blue_zones += ".union(" + flag + ")"
|
||
buf += " (\"" + blue[0] + "\""
|
||
buf += ", {}),\n".format(blue_zones)
|
||
buf += " ],\n"
|
||
else:
|
||
buf += "],\n"
|
||
buf += " },\n"
|
||
if has_features:
|
||
style_index += len(STYLE_FEATURES)
|
||
bases = set()
|
||
# build a char -> (script_ix, is_non_base) map for all ranges
|
||
for char_range in script["base_ranges"]:
|
||
first = char_range[0]
|
||
last = char_range[1]
|
||
# inclusive range
|
||
for ch in range(first, last + 1):
|
||
# Note: FT has overlapping ranges but we choose to keep
|
||
# the first one to match behavior
|
||
if not ch in char_map:
|
||
char_map[ch] = (style_index, False)
|
||
bases.add(ch)
|
||
for char_range in script["non_base_ranges"]:
|
||
first = char_range[0]
|
||
last = char_range[1]
|
||
# inclusive range
|
||
for ch in range(first, last + 1):
|
||
if ch in bases:
|
||
char_map[ch] = (style_index, True) # True for non-base character
|
||
style_index += 1
|
||
buf += "];\n\n"
|
||
|
||
# Add some symbolic indices for each script so they can be
|
||
# referenced by ScriptClass::LATN for example
|
||
buf += "#[allow(unused)]"
|
||
buf += "impl ScriptClass {\n"
|
||
for i, script in enumerate(SCRIPT_CLASSES):
|
||
buf += " pub const {}: usize = {};\n".format(script["tag"], i)
|
||
buf += "}\n\n"
|
||
|
||
# Now run through scripts again and generate style classes
|
||
buf += "#[rustfmt::skip]\n"
|
||
buf += "pub(super) const STYLE_CLASSES: &[StyleClass] = &[\n"
|
||
style_class_tags = []
|
||
style_index = 0
|
||
for i, script in enumerate(SCRIPT_CLASSES):
|
||
tag = script["tag"]
|
||
has_features = tag in SCRIPTS_WITH_FEATURES
|
||
if has_features:
|
||
for feature in STYLE_FEATURES:
|
||
name = script["name"] + " " + feature["name"]
|
||
feature_tag = feature["tag"]
|
||
buf += " StyleClass {{ name: \"{}\", index: {}, script: &SCRIPT_CLASSES[{}], feature: Some(Tag::new(b\"{}\")) }},\n".format(name, style_index, i, feature_tag)
|
||
style_index += 1
|
||
style_class_tags.append(tag + "_" + feature_tag.upper())
|
||
name = script["name"]
|
||
buf += " StyleClass {{ name: \"{}\", index: {}, script: &SCRIPT_CLASSES[{}], feature: None }},\n".format(name, style_index, i)
|
||
style_index += 1
|
||
style_class_tags.append(tag)
|
||
buf += "];\n\n";
|
||
|
||
# Symbolic indices for style classes
|
||
buf += "#[allow(unused)]"
|
||
buf += "impl StyleClass {\n"
|
||
for (i, tag) in enumerate(style_class_tags):
|
||
buf += " pub const {}: usize = {};\n".format(tag, i)
|
||
buf += "}\n\n"
|
||
|
||
# build a sorted list from the map
|
||
char_list = []
|
||
for ch in char_map:
|
||
char_list.append((ch, char_map[ch]))
|
||
char_list.sort(key=lambda entry: entry[0])
|
||
|
||
# and merge into ranges
|
||
ranges = []
|
||
for entry in char_list:
|
||
ch = entry[0]
|
||
props = entry[1]
|
||
if len(ranges) != 0:
|
||
last = ranges[-1];
|
||
# we can merge if same props and this character extends the range
|
||
# by 1
|
||
if ch == last[1] + 1 and last[2] == props:
|
||
ranges[-1] = (last[0], ch, props)
|
||
continue
|
||
ranges.append((ch, ch, props))
|
||
|
||
# and finally output the ranges
|
||
buf += "#[rustfmt::skip]\n"
|
||
buf += "pub(super) const STYLE_RANGES: &[StyleRange] = &[\n"
|
||
for char_range in ranges:
|
||
first = char_range[0]
|
||
last = char_range[1]
|
||
props = char_range[2]
|
||
kind = "base_range"
|
||
if props[1]:
|
||
kind = "non_base_range"
|
||
buf += " {}({}, {}, {}),\n".format(kind, first, last, props[0])
|
||
buf += "];\n\n"
|
||
return buf
|
||
|
||
if __name__ == "__main__":
|
||
data = generate()
|
||
with open("../generated/generated_autohint_styles.rs", "w", encoding="utf-8") as f:
|
||
f.write(data)
|