Files
another-boids-in-rust/vendor/skrifa/scripts/gen_autohint_styles.py

1477 lines
47 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# Generates Rust tables that define Unicode "script classes" for the purposes
# of autohinting.
#
# For performance, we want to link various pieces of data by index. For ease of
# modification and to avoid errors, we want to define those links symbolically
# by name. Thus, this script exists which converts symbolic references to
# indices when generating code.
#
# The bottom of this file contains the Rust generation code.
# Based on FreeType autofit coverage:
# https://gitlab.freedesktop.org/freetype/freetype/-/blob/57617782464411201ce7bbc93b086c1b4d7d84a5/src/autofit/afcover.h
STYLE_FEATURES = [
{
"name": "petite capitals from capitals",
"tag": "c2cp",
},
{
"name": "small capitals from capitals",
"tag": "c2sc",
},
{
"name": "ordinals",
"tag": "ordn",
},
{
"name": "petite capitals",
"tag": "pcap",
},
{
"name": "ruby",
"tag": "ruby",
},
{
"name": "scientific inferiors",
"tag": "sinf",
},
{
"name": "small capitals",
"tag": "smcp",
},
{
"name": "subscript",
"tag": "subs",
},
{
"name": "superscript",
"tag": "sups",
},
{
"name": "titling",
"tag": "titl",
},
]
# Scripts that generate styles with the extended feature set above
# FreeType refers to these as "meta latin"
SCRIPTS_WITH_FEATURES = ["CYRL", "GREK", "LATN"]
# In relation to FreeType, this combines the AF_ScriptClass,
# AF_Script_UniRangeRec and AF_BlueStringset.
# Script definitions: https://gitlab.freedesktop.org/freetype/freetype/-/blob/57617782464411201ce7bbc93b086c1b4d7d84a5/src/autofit/afscript.h
# Unicode ranges: https://gitlab.freedesktop.org/freetype/freetype/-/blob/57617782464411201ce7bbc93b086c1b4d7d84a5/src/autofit/afranges.c
# Blues: https://gitlab.freedesktop.org/freetype/freetype/-/blob/57617782464411201ce7bbc93b086c1b4d7d84a5/src/autofit/afblue.h
SCRIPT_CLASSES = [
{
"name": "Adlam",
"tag": "ADLM",
"hint_top_to_bottom": False,
"std_chars": "𞤌 𞤮", # 𞤌 𞤮
"base_ranges": [
(0x1E900, 0x1E95F), # Adlam
],
"non_base_ranges": [
(0x1D944, 0x1E94A),
],
"blues": [
("𞤌 𞤅 𞤈 𞤏 𞤔 𞤚", "TOP"),
("𞤂 𞤖", "0"),
("𞤬 𞤮 𞤻 𞤼 𞤾", "TOP | X_HEIGHT"),
("𞤤 𞤨 𞤩 𞤭 𞤴 𞤸 𞤺 𞥀", "0"),
],
},
{
"name": "Arabic",
"tag": "ARAB",
"hint_top_to_bottom": False,
"std_chars": "ل ح ـ", # ل ح ـ
"base_ranges": [
(0x0600, 0x06FF), # Arabic
(0x0750, 0x07FF), # Arabic Supplement
(0x08A0, 0x08FF), # Arabic Extended-A
(0xFB50, 0xFDFF), # Arabic Presentation Forms-A
(0xFE70, 0xFEFF), # Arabic Presentation Forms-B
(0x1EE00, 0x1EEFF), # Arabic Mathematical Alphabetic Symbols
],
"non_base_ranges": [
(0x0600, 0x0605),
(0x0610, 0x061A),
(0x064B, 0x065F),
(0x0670, 0x0670),
(0x06D6, 0x06DC),
(0x06DF, 0x06E4),
(0x06E7, 0x06E8),
(0x06EA, 0x06ED),
(0x08D4, 0x08E1),
(0x08D3, 0x08FF),
(0xFBB2, 0xFBC1),
(0xFE70, 0xFE70),
(0xFE72, 0xFE72),
(0xFE74, 0xFE74),
(0xFE76, 0xFE76),
(0xFE78, 0xFE78),
(0xFE7A, 0xFE7A),
(0xFE7C, 0xFE7C),
(0xFE7E, 0xFE7E),
],
"blues": [
("ا إ ل ك ط ظ", "TOP"),
("ت ث ط ظ ك", "0"),
("ـ", "NEUTRAL"),
],
},
{
"name": "Armenian",
"tag": "ARMN",
"hint_top_to_bottom": False,
"std_chars": "ս Ս", # ս Ս
"base_ranges": [
(0x0530, 0x058F), # Armenian
(0xFB13, 0xFB17), # Alphab. Present. Forms (Armenian)
],
"non_base_ranges": [
(0x0559, 0x055F),
],
"blues": [
("Ա Մ Ւ Ս Բ Գ Դ Օ", "TOP"),
("Ւ Ո Դ Ճ Շ Ս Տ Օ", "0"),
("ե է ի մ վ ֆ ճ", "TOP"),
("ա յ ւ ս գ շ ր օ", "TOP | X_HEIGHT"),
("հ ո ճ ա ե ծ ս օ", "0"),
("բ ը ի լ ղ պ փ ց", "0"),
],
},
{
"name": "Avestan",
"tag": "AVST",
"hint_top_to_bottom": False,
"std_chars": "𐬚", # 𐬚
"base_ranges": [
(0x10B00, 0x10B3F), # Avestan
],
"non_base_ranges": [
(0x10B39, 0x10B3F),
],
"blues": [
("𐬀 𐬁 𐬐 𐬛", "TOP"),
("𐬀 𐬁", "0"),
],
},
{
"name": "Bamum",
"tag": "BAMU",
"hint_top_to_bottom": False,
"std_chars": "", # ꛁ
"base_ranges": [
(0xA6A0, 0xA6FF), # Bamum
# This is commented out in FreeType
# (0x16800, 0x16A3F), # Bamum Supplement
],
"non_base_ranges": [
(0xA6F0, 0xA6F1),
],
"blues": [
("ꚧ ꚨ ꛛ ꛉ ꛁ ꛈ ", "TOP"),
("ꚭ ꚳ ꚶ ꛬ ꚢ ꚽ ", "0"),
],
},
{
"name": "Bengali",
"tag": "BENG",
"hint_top_to_bottom": True,
"std_chars": " ", #
"base_ranges": [
(0x0980, 0x09FF), # Bengali
],
"non_base_ranges": [
(0x0981, 0x0981),
(0x09BC, 0x09BC),
(0x09C1, 0x09C4),
(0x09CD, 0x09CD),
(0x09E2, 0x09E3),
(0x09FE, 0x09FE),
],
"blues": [
("ই ট ঠ ি ী ৈ ৗ", "TOP"),
("ও এ ড ত ন ব ল ক", "TOP"),
("অ ড ত ন ব ভ ল ক", "TOP | NEUTRAL | X_HEIGHT"),
("অ ড ত ন ব ভ ল ক", "0"),
],
},
{
"name": "Buhid",
"tag": "BUHD",
"hint_top_to_bottom": False,
"std_chars": "ᝋ ᝏ", # ᝋ ᝏ
"base_ranges": [
(0x1740, 0x175F), # Buhid
],
"non_base_ranges": [
(0x1752, 0x1753),
],
"blues": [
("ᝐ ᝈ", "TOP"),
("ᝅ ᝊ ᝎ", "TOP"),
("ᝂ ᝃ ᝉ ᝌ", "TOP | X_HEIGHT"),
("ᝀ ᝃ ᝆ ᝉ ᝋ ᝏ ᝑ", "0"),
],
},
{
"name": "Chakma",
"tag": "CAKM",
"hint_top_to_bottom": False,
"std_chars": "𑄤 𑄉 𑄛", # 𑄤 𑄉 𑄛
"base_ranges": [
(0x11100, 0x1114F), # Chakma
],
"non_base_ranges": [
(0x11100, 0x11102),
(0x11127, 0x11134),
(0x11146, 0x11146),
],
"blues": [
("𑄃 𑄅 𑄉 𑄙 𑄗", "TOP"),
("𑄅 𑄛 𑄝 𑄗 𑄓", "0"),
("𑄖𑄳𑄢 𑄘𑄳𑄢 𑄙𑄳𑄢 𑄤𑄳𑄢 𑄥𑄳𑄢", "0"),
],
},
{
"name": "Canadian Syllabics",
"tag": "CANS",
"hint_top_to_bottom": False,
"std_chars": "", #
"base_ranges": [
(0x1400, 0x167F), # Unified Canadian Aboriginal Syllabics
(0x18B0, 0x18FF), # Unified Canadian Aboriginal Syllabics Extended
],
"non_base_ranges": [
],
"blues": [
(" ᐁ ᒣ ᑫ ᑎ ᔑ ", "TOP"),
("ᗶ ᖵ ᒧ ᐃ ᔑ ᗢ", "0"),
("ᓓ ᓕ ᓀ ᓂ ᓄ ᕄ ᕆ ᘣ", "TOP | X_HEIGHT"),
("ᕃ ᓂ ᓀ ᕂ ᓗ ᓚ ᕆ ᘣ", "0"),
("ᐪ ᙆ ᣘ ᐢ ᒾ ᣗ ᔆ", "TOP"),
("ᙆ ᗮ ᒻ ᐞ ᔆ ᒡ ᒢ ᓑ", "0"),
],
},
{
"name": "Carian",
"tag": "CARI",
"hint_top_to_bottom": False,
"std_chars": "𐊫 𐋉", # 𐊫 𐋉
"base_ranges": [
(0x102A0, 0x102DF), # Carian
],
"non_base_ranges": [
],
"blues": [
("𐊧 𐊫 𐊬 𐊭 𐊱 𐊺 𐊼 𐊿", "TOP"),
("𐊣 𐊧 𐊷 𐋀 𐊫 𐊸 𐋉", "0"),
],
},
{
"name": "Cherokee",
"tag": "CHER",
"hint_top_to_bottom": False,
"std_chars": "Ꭴ Ꮕ ꮕ", # Ꭴ Ꮕ ꮕ
"base_ranges": [
(0x13A0, 0x13FF), # Cherokee
(0xAB70, 0xABBF), # Cherokee Supplement
],
"non_base_ranges": [
],
"blues": [
(" Ꭴ Ꮳ Ꭶ ", "TOP"),
(" Ꭴ Ꮳ Ꭶ ", "0"),
("ꮒ ꮤ ꮶ ꭴ ꭾ ꮗ ꮝ ꮿ", "TOP"),
("ꮖ ꭼ ꮠ ꮳ ꭶ ꮥ ꮻ", "TOP | X_HEIGHT"),
("ꮖ ꭼ ꮠ ꮳ ꭶ ꮥ ꮻ", "0"),
("ᏸ ꮐ ꭹ ꭻ", "0"),
],
},
{
"name": "Coptic",
"tag": "COPT",
"hint_top_to_bottom": False,
"std_chars": " ", #
"base_ranges": [
(0x2C80, 0x2CFF), # Coptic
],
"non_base_ranges": [
(0x2CEF, 0x2CF1),
],
"blues": [
(" Ⲡ Ⳟ ", "TOP"),
(" Ⳙ Ⳟ Ⲑ Ⳝ Ⲱ", "0"),
("ⲍ ⲏ ⲡ ⳟ ", "TOP | X_HEIGHT"),
("ⳑ ⳙ ⳟ ⲏ ⲑ ⳝ ", "0"),
],
},
{
"name": "Cypriot",
"tag": "CPRT",
"hint_top_to_bottom": False,
"std_chars": "𐠅 𐠣", # 𐠅 𐠣
"base_ranges": [
(0x10800, 0x1083F), # Cypriot
],
"non_base_ranges": [
],
"blues": [
("𐠍 𐠙 𐠳 𐠱 𐠅 𐠓 𐠣 𐠦", "TOP"),
("𐠃 𐠊 𐠛 𐠣 𐠳 𐠵 𐠐", "0"),
("𐠈 𐠏 𐠖", "TOP"),
("𐠈 𐠏 𐠖", "0"),
],
},
{
"name": "Cyrillic",
"tag": "CYRL",
"hint_top_to_bottom": False,
"std_chars": "о О", # о О
"base_ranges": [
(0x0400, 0x04FF), # Cyrillic
(0x0500, 0x052F), # Cyrillic Supplement
(0x2DE0, 0x2DFF), # Cyrillic Extended-A
(0xA640, 0xA69F), # Cyrillic Extended-B
(0x1C80, 0x1C8F), # Cyrillic Extended-C
],
"non_base_ranges": [
(0x0483, 0x0489),
(0x2DE0, 0x2DFF),
(0xA66F, 0xA67F),
(0xA69E, 0xA69F),
],
"blues": [
("Б В Е П З О С Э", "TOP"),
("Б В Е Ш З О С Э", "0"),
("х п н ш е з о с", "TOP | X_HEIGHT"),
("х п н ш е з о с", "0"),
("р у ф", "0"),
],
},
{
"name": "Devanagari",
"tag": "DEVA",
"hint_top_to_bottom": True,
"std_chars": "ठ व ट", # ठ व ट
"base_ranges": [
(0x0900, 0x093B), # Devanagari
(0x093D, 0x0950), # ... continued
(0x0953, 0x0963), # ... continued
(0x0966, 0x097F), # ... continued
(0x20B9, 0x20B9), # (new) Rupee sign
(0xA8E0, 0xA8FF), # Devanagari Extended
],
"non_base_ranges": [
(0x0900, 0x0902),
(0x093A, 0x093A),
(0x0941, 0x0948),
(0x094D, 0x094D),
(0x0953, 0x0957),
(0x0962, 0x0963),
(0xA8E0, 0xA8F1),
(0xA8FF, 0xA8FF),
],
"blues": [
("ई ऐ ओ औ ि ी ो ौ", "TOP"),
("क म अ आ थ ध भ श", "TOP"),
("क न म उ छ ट ठ ड", "TOP | NEUTRAL | X_HEIGHT"),
("क न म उ छ ट ठ ड", "0"),
("ु ृ", "0"),
],
},
{
"name": "Deseret",
"tag": "DSRT",
"hint_top_to_bottom": False,
"std_chars": "𐐄 𐐬", # 𐐄 𐐬
"base_ranges": [
(0x10400, 0x1044F), # Deseret
],
"non_base_ranges": [
],
"blues": [
("𐐂 𐐄 𐐋 𐐗 𐐑", "TOP"),
("𐐀 𐐂 𐐄 𐐗 𐐛", "0"),
("𐐪 𐐬 𐐳 𐐿 𐐹", "TOP | X_HEIGHT"),
("𐐨 𐐪 𐐬 𐐿 𐑃", "0"),
],
},
{
"name": "Ethiopic",
"tag": "ETHI",
"hint_top_to_bottom": False,
"std_chars": "", #
"base_ranges": [
(0x1200, 0x137F), # Ethiopic
(0x1380, 0x139F), # Ethiopic Supplement
(0x2D80, 0x2DDF), # Ethiopic Extended
(0xAB00, 0xAB2F), # Ethiopic Extended-A
],
"non_base_ranges": [
(0x135D, 0x135F),
],
"blues": [
(" ሃ ዘ ፐ ማ በ ዋ ", "TOP"),
("ለ ሐ በ ዘ ", "0"),
],
},
{
"name": "Georgian (Mkhedruli)",
"tag": "GEOR",
"hint_top_to_bottom": False,
"std_chars": "ი ე ა Ჿ", # ი ე ა Ი
"base_ranges": [
(0x10D0, 0x10FF), # Georgian (Mkhedruli)
(0x1C90, 0x1CBF), # Georgian Extended (Mtavruli)
],
"non_base_ranges": [
],
"blues": [
("გ დ ე ვ თ ი ო ღ", "TOP | X_HEIGHT"),
("ა ზ მ ს შ ძ ხ პ", "0"),
("ს ხ ქ ზ მ შ ჩ წ", "TOP"),
("ე ვ ჟ ტ უ ფ ქ ", "0"),
("Ნ Ჟ Ჳ Ჸ Გ Ე Ო Ჴ", "TOP"),
("Ი Ჲ Ო Ჩ Მ Შ Ჯ Ჽ", "0"),
],
},
{
"name": "Georgian (Khutsuri)",
"tag": "GEOK",
"hint_top_to_bottom": False,
"std_chars": "Ⴖ Ⴑ ⴙ", # Ⴖ Ⴑ ⴙ
"base_ranges": [
(0x10A0, 0x10CD), # Georgian (Asomtavruli)
(0x2D00, 0x2D2D), # Georgian Supplement (Nuskhuri)
],
"non_base_ranges": [
],
"blues": [
("Ⴑ Ⴇ Ⴙ Ⴜ Ⴄ Ⴅ Ⴓ Ⴚ", "TOP"),
("Ⴄ Ⴅ Ⴇ Ⴈ Ⴆ Ⴑ Ⴊ Ⴋ", "0"),
("ⴁ ⴗ ⴂ ⴄ ⴅ ⴇ ⴔ ⴖ", "TOP | X_HEIGHT"),
("ⴈ ⴌ ⴖ ⴎ ⴃ ⴆ ⴋ ⴢ", "0"),
("ⴐ ⴑ ⴓ ⴕ ⴙ ⴛ ⴡ ⴣ", "TOP"),
("ⴄ ⴅ ⴔ ⴕ ⴁ ⴂ ⴘ ⴝ", "0"),
],
},
{
"name": "Glagolitic",
"tag": "GLAG",
"hint_top_to_bottom": False,
"std_chars": "Ⱅ ⱅ", # Ⱅ ⱅ
"base_ranges": [
(0x2C00, 0x2C5F), # Glagolitic
(0x1E000, 0x1E02F), # Glagolitic Supplement
],
"non_base_ranges": [
(0x1E000, 0x1E02F),
],
"blues": [
("Ⰵ Ⱄ Ⱚ Ⰴ Ⰲ Ⰺ Ⱛ Ⰻ", "TOP"),
("Ⰵ Ⰴ Ⰲ Ⱚ Ⱎ Ⱑ Ⰺ Ⱄ", "0"),
("ⰵ ⱄ ⱚ ⰴ ⰲ ⰺ ⱛ ⰻ", "TOP | X_HEIGHT"),
("ⰵ ⰴ ⰲ ⱚ ⱎ ⱑ ⰺ ⱄ", "0"),
],
},
{
"name": "Gothic",
"tag": "GOTH",
"hint_top_to_bottom": True,
"std_chars": "𐌴 𐌾 𐍃", # 𐌴 𐌾 𐍃
"base_ranges": [
(0x10330, 0x1034F), # Gothic
],
"non_base_ranges": [
],
"blues": [
("𐌲 𐌶 𐍀 𐍄 𐌴 𐍃 𐍈 𐌾", "TOP"),
("𐌶 𐌴 𐍃 𐍈", "0"),
],
},
{
"name": "Greek",
"tag": "GREK",
"hint_top_to_bottom": False,
"std_chars": "ο Ο", # ο Ο
"base_ranges": [
(0x0370, 0x03FF), # Greek and Coptic
(0x1F00, 0x1FFF), # Greek Extended
],
"non_base_ranges": [
(0x037A, 0x037A),
(0x0384, 0x0385),
(0x1FBD, 0x1FC1),
(0x1FCD, 0x1FCF),
(0x1FDD, 0x1FDF),
(0x1FED, 0x1FEF),
(0x1FFD, 0x1FFE),
],
"blues": [
("Γ Β Ε Ζ Θ Ο Ω", "TOP"),
("Β Δ Ζ Ξ Θ Ο", "0"),
("β θ δ ζ λ ξ", "TOP"),
("α ε ι ο π σ τ ω", "TOP | X_HEIGHT"),
("α ε ι ο π σ τ ω", "0"),
("β γ η μ ρ φ χ ψ", "0"),
],
},
{
"name": "Gujarati",
"tag": "GUJR",
"hint_top_to_bottom": False,
"std_chars": "", # ટ
"base_ranges": [
(0x0A80, 0x0AFF), # Gujarati
],
"non_base_ranges": [
(0x0A81, 0x0A82),
(0x0ABC, 0x0ABC),
(0x0AC1, 0x0AC8),
(0x0ACD, 0x0ACD),
(0x0AE2, 0x0AE3),
(0x0AFA, 0x0AFF),
],
"blues": [
("ત ન ઋ ઌ છ ટ ર ", "TOP | X_HEIGHT"),
("ખ ગ ઘ ઞ ઇ ઈ ઠ જ", "0"),
("ઈ ઊ િ ી લી શ્ચિ જિ સી", "TOP"),
("ુ ૃ ૄ ખુ છૃ છૄ", "0"),
(" ૧ ૨ ૩ ૭", "TOP"),
],
},
{
"name": "Gurmukhi",
"tag": "GURU",
"hint_top_to_bottom": True,
"std_chars": "ਠ ਰ ", # ਠ ਰ
"base_ranges": [
(0x0A00, 0x0A7F), # Gurmukhi
],
"non_base_ranges": [
(0x0A01, 0x0A02),
(0x0A3C, 0x0A3C),
(0x0A41, 0x0A51),
(0x0A70, 0x0A71),
(0x0A75, 0x0A75),
],
"blues": [
("ਇ ਈ ਉ ਏ ਓ ੳ ਿ ੀ", "TOP"),
("ਕ ਗ ਙ ਚ ਜ ਤ ਧ ਸ", "TOP"),
("ਕ ਗ ਙ ਚ ਜ ਤ ਧ ਸ", "TOP | NEUTRAL | X_HEIGHT"),
("ਅ ਏ ਓ ਗ ਜ ਠ ਰ ਸ", "0"),
(" ੨ ੩ ੭", "TOP"),
],
},
{
"name": "Hebrew",
"tag": "HEBR",
"hint_top_to_bottom": False,
"std_chars": "ם", # ם
"base_ranges": [
(0x0590, 0x05FF), # Hebrew
(0xFB1D, 0xFB4F), # Alphab. Present. Forms (Hebrew)
],
"non_base_ranges": [
(0x0591, 0x05BF),
(0x05C1, 0x05C2),
(0x05C4, 0x05C5),
(0x05C7, 0x05C7),
(0xFB1E, 0xFB1E),
],
"blues": [
("ב ד ה ח ך כ ם ס", "TOP | LONG"),
("ב ט כ ם ס צ", "0"),
("ק ך ן ף ץ", "0"),
],
},
{
"name": "Kayah Li",
"tag": "KALI",
"hint_top_to_bottom": False,
"std_chars": "ꤍ ꤀", # ꤍ ꤀
"base_ranges": [
(0xA900, 0xA92F), # Kayah Li
],
"non_base_ranges": [
(0xA926, 0xA92D),
],
"blues": [
("꤅ ꤏ ꤁ ꤋ ꤀ ꤍ", "TOP | X_HEIGHT"),
("꤈ ꤘ ꤀ ꤍ ꤢ", "0"),
("ꤖ ꤡ", "TOP"),
("ꤑ ꤜ ꤞ", "0"),
("ꤑ꤬ ꤜ꤭ ꤔ꤬", "0"),
],
},
{
"name": "Khmer",
"tag": "KHMR",
"hint_top_to_bottom": False,
"std_chars": "", # ០
"base_ranges": [
(0x1780, 0x17FF), # Khmer
],
"non_base_ranges": [
(0x17B7, 0x17BD),
(0x17C6, 0x17C6),
(0x17C9, 0x17D3),
(0x17DD, 0x17DD),
],
"blues": [
("ខ ទ ន ឧ ឩ ា", "TOP | X_HEIGHT"),
("ក្ក ក្ខ ក្គ ក្ថ", "SUB_TOP"),
("ខ ឃ ច ឋ ប ម យ ឲ", "0"),
("ត្រ រៀ ឲ្យ អឿ", "0"),
("ន្ត្រៃ ង្ខ្យ ក្បៀ ច្រៀ ន្តឿ ល្បឿ", "0"),
],
},
{
"name": "Khmer Symbols",
"tag": "KHMS",
"hint_top_to_bottom": False,
"std_chars": "᧡ ᧪", # ᧡ ᧪
"base_ranges": [
(0x19E0, 0x19FF), # Khmer Symbols
],
"non_base_ranges": [
],
"blues": [
("᧠ ᧡", "TOP | X_HEIGHT"),
("᧶ ᧹", "0"),
],
},
{
"name": "Kannada",
"tag": "KNDA",
"hint_top_to_bottom": False,
"std_chars": "", #
"base_ranges": [
(0x0C80, 0x0CFF), # Kannada
],
"non_base_ranges": [
(0x0C81, 0x0C81),
(0x0CBC, 0x0CBC),
(0x0CBF, 0x0CBF),
(0x0CC6, 0x0CC6),
(0x0CCC, 0x0CCD),
(0x0CE2, 0x0CE3),
],
"blues": [
("ಇ ಊ ಐ ಣ ಸಾ ನಾ ದಾ ರಾ", "TOP"),
("ಅ ಉ ಎ ಲ ೨ ೬ ೭", "0"),
],
},
{
"name": "Lao",
"tag": "LAOO",
"hint_top_to_bottom": False,
"std_chars": "", #
"base_ranges": [
(0x0E80, 0x0EFF), # Lao
],
"non_base_ranges": [
(0x0EB1, 0x0EB1),
(0x0EB4, 0x0EBC),
(0x0EC8, 0x0ECD),
],
"blues": [
("າ ດ ອ ມ ລ ວ ຣ ງ", "TOP | X_HEIGHT"),
("າ ອ ບ ຍ ຣ ຮ ວ ຢ", "0"),
("ປ ຢ ຟ ຝ", "TOP"),
("ໂ ໄ ໃ", "TOP"),
("ງ ຊ ຖ ຽ ໆ ຯ", "0"),
],
},
{
"name": "Latin",
"tag": "LATN",
"hint_top_to_bottom": False,
"std_chars": "o O 0",
"base_ranges": [
(0x0020, 0x007F), # Basic Latin (no control chars)
(0x00A0, 0x00A9), # Latin-1 Supplement (no control chars)
(0x00AB, 0x00B1), # ... continued
(0x00B4, 0x00B8), # ... continued
(0x00BB, 0x00FF), # ... continued
(0x0100, 0x017F), # Latin Extended-A
(0x0180, 0x024F), # Latin Extended-B
(0x0250, 0x02AF), # IPA Extensions
(0x02B9, 0x02DF), # Spacing Modifier Letters
(0x02E5, 0x02FF), # ... continued
(0x0300, 0x036F), # Combining Diacritical Marks
(0x1AB0, 0x1ABE), # Combining Diacritical Marks Extended
(0x1D00, 0x1D2B), # Phonetic Extensions
(0x1D6B, 0x1D77), # ... continued
(0x1D79, 0x1D7F), # ... continued
(0x1D80, 0x1D9A), # Phonetic Extensions Supplement
(0x1DC0, 0x1DFF), # Combining Diacritical Marks Supplement
(0x1E00, 0x1EFF), # Latin Extended Additional
(0x2000, 0x206F), # General Punctuation
(0x20A0, 0x20B8), # Currency Symbols ...
(0x20BA, 0x20CF), # ... except new Rupee sign
(0x2150, 0x218F), # Number Forms
(0x2C60, 0x2C7B), # Latin Extended-C
(0x2C7E, 0x2C7F), # ... continued
(0x2E00, 0x2E7F), # Supplemental Punctuation
(0xA720, 0xA76F), # Latin Extended-D
(0xA771, 0xA7F7), # ... continued
(0xA7FA, 0xA7FF), # ... continued
(0xAB30, 0xAB5B), # Latin Extended-E
(0xAB60, 0xAB6F), # ... continued
(0xFB00, 0xFB06), # Alphab. Present. Forms (Latin Ligs)
(0x1D400, 0x1D7FF), # Mathematical Alphanumeric Symbols
],
"non_base_ranges": [
(0x005E, 0x0060),
(0x007E, 0x007E),
(0x00A8, 0x00A9),
(0x00AE, 0x00B0),
(0x00B4, 0x00B4),
(0x00B8, 0x00B8),
(0x00BC, 0x00BE),
(0x02B9, 0x02DF),
(0x02E5, 0x02FF),
(0x0300, 0x036F),
(0x1AB0, 0x1ABE),
(0x1DC0, 0x1DFF),
(0x2017, 0x2017),
(0x203E, 0x203E),
(0xA788, 0xA788),
(0xA7F8, 0xA7FA),
],
"blues": [
("T H E Z O C Q S", "TOP"),
("H E Z L O C U S", "0"),
("f i j k d b h", "TOP"),
("u v x z o e s c", "TOP | X_HEIGHT"),
("n r x z o e s c", "0"),
("p q g j y", "0"),
],
},
{
"name": "Latin Subscript Fallback",
"tag": "LATB",
"hint_top_to_bottom": False,
"std_chars": "ₒ ₀", # ₒ ₀
"base_ranges": [
(0x1D62, 0x1D6A), # some small subscript letters
(0x2080, 0x209C), # subscript digits and letters
(0x2C7C, 0x2C7C), # latin subscript small letter j
],
"non_base_ranges": [
],
"blues": [
("₀ ₃ ₅ ₇ ₈", "TOP"),
("₀ ₁ ₂ ₃ ₈", "0"),
("ᵢ ⱼ ₕ ₖ ₗ", "TOP"),
("ₐ ₑ ₒ ₓ ₙ ₛ ᵥ ᵤ ᵣ", "TOP | X_HEIGHT"),
("ₐ ₑ ₒ ₓ ₙ ₛ ᵥ ᵤ ᵣ", "0"),
("ᵦ ᵧ ᵨ ᵩ ₚ", "0"),
],
},
{
"name": "Latin Superscript Fallback",
"tag": "LATP",
"hint_top_to_bottom": False,
"std_chars": "ᵒ ᴼ ⁰", # ᵒ ᴼ ⁰
"base_ranges": [
(0x00AA, 0x00AA), # feminine ordinal indicator
(0x00B2, 0x00B3), # superscript two and three
(0x00B9, 0x00BA), # superscript one, masc. ord. indic.
(0x02B0, 0x02B8), # some latin superscript mod. letters
(0x02E0, 0x02E4), # some IPA modifier letters
(0x1D2C, 0x1D61), # latin superscript modifier letters
(0x1D78, 0x1D78), # modifier letter cyrillic en
(0x1D9B, 0x1DBF), # more modifier letters
(0x2070, 0x207F), # superscript digits and letters
(0x2C7D, 0x2C7D), # modifier letter capital v
(0xA770, 0xA770), # modifier letter us
(0xA7F8, 0xA7F9), # more modifier letters
(0xAB5C, 0xAB5F), # more modifier letters
],
"non_base_ranges": [
],
"blues": [
("⁰ ³ ⁵ ⁷ ᵀ ᴴ ᴱ ᴼ", "TOP"),
("⁰ ¹ ² ³ ᴱ ᴸ ᴼ ᵁ", "0"),
("ᵇ ᵈ ᵏ ʰ ʲ ᶠ ⁱ", "TOP"),
("ᵉ ᵒ ʳ ˢ ˣ ᶜ ᶻ", "TOP | X_HEIGHT"),
("ᵉ ᵒ ʳ ˢ ˣ ᶜ ᶻ", "0"),
("ᵖ ʸ ᵍ", "0"),
],
},
{
"name": "Lisu",
"tag": "LISU",
"hint_top_to_bottom": False,
"std_chars": "", #
"base_ranges": [
(0xA4D0, 0xA4FF), # Lisu
],
"non_base_ranges": [
],
"blues": [
(" ꓱ ꓶ ꓩ ", "TOP"),
(" ", "0"),
],
},
{
"name": "Malayalam",
"tag": "MLYM",
"hint_top_to_bottom": False,
"std_chars": "", #
"base_ranges": [
(0x0D00, 0x0D7F), # Malayalam
],
"non_base_ranges": [
(0x0D00, 0x0D01),
(0x0D3B, 0x0D3C),
(0x0D4D, 0x0D4E),
(0x0D62, 0x0D63),
],
"blues": [
("ഒ ട റ ച പ ച്ച പ്പ", "TOP"),
(" ധ ശ ഘ ച ഥ ല", "0"),
],
},
{
"name": "Medefaidrin",
"tag": "MEDF",
"hint_top_to_bottom": False,
"std_chars": "𖹡 𖹛 𖹯", # 𖹡 𖹛 𖹯
"base_ranges": [
(0x16E40, 0x16E9F), # Medefaidrin
],
"non_base_ranges": [
],
"blues": [
("𖹀 𖹁 𖹂 𖹃 𖹏 𖹚 𖹟", "TOP"),
("𖹀 𖹁 𖹂 𖹃 𖹏 𖹚 𖹒 𖹓", "0"),
("𖹤 𖹬 𖹧 𖹴 𖹶 𖹾", "TOP"),
("𖹠 𖹡 𖹢 𖹹 𖹳 𖹮", "TOP | X_HEIGHT"),
("𖹠 𖹡 𖹢 𖹳 𖹭 𖹽", "0"),
("𖹥 𖹨 𖹩", "0"),
("𖺀 𖺅 𖺈 𖺄 𖺍", "TOP"),
],
},
{
"name": "Mongolian",
"tag": "MONG",
"hint_top_to_bottom": True,
"std_chars": "ᡂ ᠪ", # ᡂ ᠪ
"base_ranges": [
(0x1800, 0x18AF), # Mongolian
(0x11660, 0x1167F), # Mongolian Supplement
],
"non_base_ranges": [
(0x1885, 0x1886),
(0x18A9, 0x18A9),
],
"blues": [
("ᠳ ᠴ ᠶ ᠽ ᡂ ᡊ ‍ᡡ‍ ‍ᡳ‍", "TOP"),
("", "0"),
],
},
{
"name": "Myanmar",
"tag": "MYMR",
"hint_top_to_bottom": False,
"std_chars": " င ဂ", # င ဂ
"base_ranges": [
(0x1000, 0x109F), # Myanmar
(0xA9E0, 0xA9FF), # Myanmar Extended-B
(0xAA60, 0xAA7F), # Myanmar Extended-A
],
"non_base_ranges": [
(0x102D, 0x1030),
(0x1032, 0x1037),
(0x103A, 0x103A),
(0x103D, 0x103E),
(0x1058, 0x1059),
(0x105E, 0x1060),
(0x1071, 0x1074),
(0x1082, 0x1082),
(0x1085, 0x1086),
(0x108D, 0x108D),
(0xA9E5, 0xA9E5),
(0xAA7C, 0xAA7C),
],
"blues": [
("ခ ဂ င ဒ ၥ ၊ ။", "TOP | X_HEIGHT"),
("င ဎ ဒ ပ ဗ ၊ ။", "0"),
("ဩ ြ ၍ ၏ ၆ ါ ိ", "TOP"),
("ဉ ည ဥ ဩ ဨ ၂ ၅ ၉", "0"),
],
},
{
"name": "N'Ko",
"tag": "NKOO",
"hint_top_to_bottom": False,
"std_chars": "ߋ ߀", # ߋ ߀
"base_ranges": [
(0x07C0, 0x07FF), # N'Ko
],
"non_base_ranges": [
(0x07EB, 0x07F5),
(0x07FD, 0x07FD),
],
"blues": [
("ߐ ߉ ߒ ߟ ߖ ߜ ߠ ߥ", "TOP"),
("߀ ߘ ߡ ߠ ߥ", "0"),
("ߏ ߛ ߋ", "TOP | X_HEIGHT"),
("ߎ ߏ ߛ ߋ", "0"),
],
},
{
"name": "no script",
"tag": "NONE",
"hint_top_to_bottom": False,
"std_chars": "",
"base_ranges": [
],
"non_base_ranges": [
],
"blues": [
],
},
{
"name": "Ol Chiki",
"tag": "OLCK",
"hint_top_to_bottom": False,
"std_chars": "", # ᱛ
"base_ranges": [
(0x1C50, 0x1C7F), # Ol Chiki
],
"non_base_ranges": [
],
"blues": [
("ᱛ ᱜ ᱝ ᱡ ᱢ ᱥ", "TOP"),
("ᱛ ᱜ ᱝ ᱡ ᱢ ᱥ", "0"),
],
},
{
"name": "Old Turkic",
"tag": "ORKH",
"hint_top_to_bottom": False,
"std_chars": "𐰗", # 𐰗
"base_ranges": [
(0x10C00, 0x10C4F), # Old Turkic
],
"non_base_ranges": [
],
"blues": [
("𐰗 𐰘 𐰧", "TOP"),
("𐰉 𐰗 𐰦 𐰧", "0"),
],
},
{
"name": "Osage",
"tag": "OSGE",
"hint_top_to_bottom": False,
"std_chars": "𐓂 𐓪", # 𐓂 𐓪
"base_ranges": [
(0x104B0, 0x104FF), # Osage
],
"non_base_ranges": [
],
"blues": [
("𐒾 𐓍 𐓒 𐓓 𐒻 𐓂 𐒵 𐓆", "TOP"),
("𐒰 𐓍 𐓂 𐒿 𐓎 𐒹", "0"),
("𐒼 𐒽 𐒾", "0"),
("𐓵 𐓶 𐓺 𐓻 𐓝 𐓣 𐓪 𐓮", "TOP | X_HEIGHT"),
("𐓘 𐓚 𐓣 𐓵 𐓡 𐓧 𐓪 𐓶", "0"),
("𐓤 𐓦 𐓸 𐓹 𐓛", "TOP"),
("𐓤 𐓥 𐓦", "0"),
],
},
{
"name": "Osmanya",
"tag": "OSMA",
"hint_top_to_bottom": False,
"std_chars": "𐒆 𐒠", # 𐒆 𐒠
"base_ranges": [
(0x10480, 0x104AF), # Osmanya
],
"non_base_ranges": [
],
"blues": [
("𐒆 𐒉 𐒐 𐒒 𐒘 𐒛 𐒠 𐒣", "TOP"),
("𐒀 𐒂 𐒆 𐒈 𐒊 𐒒 𐒠 𐒩", "0"),
],
},
{
"name": "Hanifi Rohingya",
"tag": "ROHG",
"hint_top_to_bottom": False,
"std_chars": "𐴰", # 𐴰
"base_ranges": [
(0x10D00, 0x10D3F), # Hanifi Rohingya
],
"non_base_ranges": [
],
"blues": [
("𐴃 𐴀 𐴆 𐴖 𐴕", "TOP"),
("𐴔 𐴖 𐴕 𐴑 𐴐", "0"),
("ـ", "NEUTRAL"),
],
},
{
"name": "Saurashtra",
"tag": "SAUR",
"hint_top_to_bottom": False,
"std_chars": "ꢝ ꣐", # ꢝ ꣐
"base_ranges": [
(0xA880, 0xA8DF), # Saurashtra
],
"non_base_ranges": [
(0xA880, 0xA881),
(0xA8B4, 0xA8C5),
],
"blues": [
("ꢜ ꢞ ꢳ ꢂ ꢖ ꢒ ꢝ ꢛ", "TOP"),
("ꢂ ꢨ ꢺ ꢤ ꢎ", "0"),
],
},
{
"name": "Shavian",
"tag": "SHAW",
"hint_top_to_bottom": False,
"std_chars": "𐑴", # 𐑴
"base_ranges": [
(0x10450, 0x1047F), # Shavian
],
"non_base_ranges": [
],
"blues": [
("𐑕 𐑙", "TOP"),
("𐑔 𐑖 𐑗 𐑹 𐑻", "0"),
("𐑟 𐑣", "0"),
("𐑱 𐑲 𐑳 𐑴 𐑸 𐑺 𐑼", "TOP | X_HEIGHT"),
("𐑴 𐑻 𐑹", "0"),
],
},
{
"name": "Sinhala",
"tag": "SINH",
"hint_top_to_bottom": False,
"std_chars": "", # ට
"base_ranges": [
(0x0D80, 0x0DFF), # Sinhala
],
"non_base_ranges": [
(0x0DCA, 0x0DCA),
(0x0DD2, 0x0DD6),
],
"blues": [
("ඉ ක ඝ ඳ ප ය ල ෆ", "TOP"),
("එ ඔ ඝ ජ ට ථ ධ ර", "0"),
("ද ඳ උ ල තූ තු බු දු", "0"),
],
},
{
"name": "Sundanese",
"tag": "SUND",
"hint_top_to_bottom": False,
"std_chars": "", # ᮰
"base_ranges": [
(0x1B80, 0x1BBF), # Sundanese
(0x1CC0, 0x1CCF), # Sundanese Supplement
],
"non_base_ranges": [
(0x1B80, 0x1B82),
(0x1BA1, 0x1BAD),
],
"blues": [
("ᮋ ᮞ ᮮ ᮽ ᮰ ᮈ", "TOP"),
("ᮄ ᮔ ᮕ ᮗ ᮰ ᮆ ᮈ ᮉ", "0"),
("ᮼ ᳄", "0"),
],
},
{
"name": "Tamil",
"tag": "TAML",
"hint_top_to_bottom": False,
"std_chars": "", #
"base_ranges": [
(0x0B80, 0x0BFF), # Tamil
],
"non_base_ranges": [
(0x0B82, 0x0B82),
(0x0BC0, 0x0BC2),
(0x0BCD, 0x0BCD),
],
"blues": [
("உ ஒ ஓ ற ஈ க ங ச", "TOP"),
("க ச ல ஶ உ ங ட ப", "0"),
],
},
{
"name": "Tai Viet",
"tag": "TAVT",
"hint_top_to_bottom": False,
"std_chars": "ꪒ ꪫ", # ꪒ ꪫ
"base_ranges": [
(0xAA80, 0xAADF), # Tai Viet
],
"non_base_ranges": [
(0xAAB0, 0xAAB0),
(0xAAB2, 0xAAB4),
(0xAAB7, 0xAAB8),
(0xAABE, 0xAABF),
(0xAAC1, 0xAAC1),
],
"blues": [
("ꪆ ꪔ ꪒ ꪖ ꪫ", "TOP"),
("ꪉ ꪫ ꪮ", "0"),
],
},
{
"name": "Telugu",
"tag": "TELU",
"hint_top_to_bottom": False,
"std_chars": "", #
"base_ranges": [
(0x0C00, 0x0C7F), # Telugu
],
"non_base_ranges": [
(0x0C00, 0x0C00),
(0x0C04, 0x0C04),
(0x0C3E, 0x0C40),
(0x0C46, 0x0C56),
(0x0C62, 0x0C63),
],
"blues": [
("ఇ ఌ ఙ ఞ ణ ఱ ౯", "TOP"),
("అ క చ ర ఽ ౨ ౬", "0"),
],
},
{
"name": "Tifinagh",
"tag": "TFNG",
"hint_top_to_bottom": False,
"std_chars": "", #
"base_ranges": [
(0x2D30, 0x2D7F), # Tifinagh
],
"non_base_ranges": [
],
"blues": [
(" ⵙ ⵛ ⵞ ⴵ ⴼ ", "TOP"),
(" ⵙ ⵛ ⵞ ⴵ ⴼ ", "0"),
],
},
{
"name": "Thai",
"tag": "THAI",
"hint_top_to_bottom": False,
"std_chars": "า ๅ ", # า ๅ
"base_ranges": [
(0x0E00, 0x0E7F), # Thai
],
"non_base_ranges": [
(0x0E31, 0x0E31),
(0x0E34, 0x0E3A),
(0x0E47, 0x0E4E),
],
"blues": [
("บ เ แ อ ก า", "TOP | X_HEIGHT"),
("บ ป ษ ฯ อ ย ฮ", "0"),
("ป ฝ ฟ", "TOP"),
("โ ใ ไ", "TOP"),
("ฎ ฏ ฤ ฦ", "0"),
("ญ ฐ", "0"),
(" ๑ ๓", "0"),
],
},
{
"name": "Vai",
"tag": "VAII",
"hint_top_to_bottom": False,
"std_chars": "ꘓ ꖜ ꖴ", # ꘓ ꖜ ꖴ
"base_ranges": [
(0xA500, 0xA63F), # Vai
],
"non_base_ranges": [
],
"blues": [
("ꗍ ꘖ ꘙ ꘜ ꖜ ꖝ ꔅ ꕢ", "TOP"),
("ꗍ ꘖ ꘙ ꗞ ꔅ ꕢ ꖜ ꔆ", "0"),
],
},
{
"name": "Limbu",
"tag": "LIMB",
"hint_top_to_bottom": False,
"std_chars": "o", # XXX
"base_ranges": [
(0x1900, 0x194F), # Limbu
],
"non_base_ranges": [
(0x1920, 0x1922),
(0x1927, 0x1934),
(0x1937, 0x193B),
],
"blues": [],
},
{
"name": "Oriya",
"tag": "ORYA",
"hint_top_to_bottom": False,
"std_chars": "o", # XXX
"base_ranges": [
(0x0B00, 0x0B7F), # Oriya
],
"non_base_ranges": [
(0x0B01, 0x0B02),
(0x0B3C, 0x0B3C),
(0x0B3F, 0x0B3F),
(0x0B41, 0x0B44),
(0x0B4D, 0x0B56),
(0x0B62, 0x0B63),
],
"blues": [],
},
{
"name": "Syloti Nagri",
"tag": "SYLO",
"hint_top_to_bottom": False,
"std_chars": "o", # XXX
"base_ranges": [
(0xA800, 0xA82F), # Syloti Nagri
],
"non_base_ranges": [
(0xA802, 0xA802),
(0xA806, 0xA806),
(0xA80B, 0xA80B),
(0xA825, 0xA826),
],
"blues": [],
},
{
"name": "Tibetan",
"tag": "TIBT",
"hint_top_to_bottom": False,
"std_chars": "o", # XXX
"base_ranges": [
(0x0F00, 0x0FFF), # Tibetan
],
"non_base_ranges": [
(0x0F18, 0x0F19),
(0x0F35, 0x0F35),
(0x0F37, 0x0F37),
(0x0F39, 0x0F39),
(0x0F3E, 0x0F3F),
(0x0F71, 0x0F7E),
(0x0F80, 0x0F84),
(0x0F86, 0x0F87),
(0x0F8D, 0x0FBC),
],
"blues": [],
},
{
"name": "CJKV ideographs",
"tag": "HANI",
"hint_top_to_bottom": False,
"std_chars": "田 囗", # 田 囗
"base_ranges": [
(0x1100, 0x11FF), # Hangul Jamo
(0x2E80, 0x2EFF), # CJK Radicals Supplement
(0x2F00, 0x2FDF), # Kangxi Radicals
(0x2FF0, 0x2FFF), # Ideographic Description Characters
(0x3000, 0x303F), # CJK Symbols and Punctuation
(0x3040, 0x309F), # Hiragana
(0x30A0, 0x30FF), # Katakana
(0x3100, 0x312F), # Bopomofo
(0x3130, 0x318F), # Hangul Compatibility Jamo
(0x3190, 0x319F), # Kanbun
(0x31A0, 0x31BF), # Bopomofo Extended
(0x31C0, 0x31EF), # CJK Strokes
(0x31F0, 0x31FF), # Katakana Phonetic Extensions
(0x3300, 0x33FF), # CJK Compatibility
(0x3400, 0x4DBF), # CJK Unified Ideographs Extension A
(0x4DC0, 0x4DFF), # Yijing Hexagram Symbols
(0x4E00, 0x9FFF), # CJK Unified Ideographs
(0xA960, 0xA97F), # Hangul Jamo Extended-A
(0xAC00, 0xD7AF), # Hangul Syllables
(0xD7B0, 0xD7FF), # Hangul Jamo Extended-B
(0xF900, 0xFAFF), # CJK Compatibility Ideographs
(0xFE10, 0xFE1F), # Vertical forms
(0xFE30, 0xFE4F), # CJK Compatibility Forms
(0xFF00, 0xFFEF), # Halfwidth and Fullwidth Forms
(0x1B000, 0x1B0FF), # Kana Supplement
(0x1B100, 0x1B12F), # Kana Extended-A
(0x1D300, 0x1D35F), # Tai Xuan Hing Symbols
(0x20000, 0x2A6DF), # CJK Unified Ideographs Extension B
(0x2A700, 0x2B73F), # CJK Unified Ideographs Extension C
(0x2B740, 0x2B81F), # CJK Unified Ideographs Extension D
(0x2B820, 0x2CEAF), # CJK Unified Ideographs Extension E
(0x2CEB0, 0x2EBEF), # CJK Unified Ideographs Extension F
(0x2F800, 0x2FA1F), # CJK Compatibility Ideographs Supplement
],
"non_base_ranges": [
(0x302A, 0x302F),
(0x3190, 0x319F),
],
"blues": [
("他 们 你 來 們 到 和 地 对 對 就 席 我 时 時 會 来 為 能 舰 說 说 这 這 齊 | 军 同 已 愿 既 星 是 景 民 照 现 現 理 用 置 要 軍 那 配 里 開 雷 露 面 顾", "TOP"),
("个 为 人 他 以 们 你 來 個 們 到 和 大 对 對 就 我 时 時 有 来 為 要 說 说 | 主 些 因 它 想 意 理 生 當 看 着 置 者 自 著 裡 过 还 进 進 過 道 還 里 面", "0"),
(" 些 们 你 來 們 到 和 地 她 将 將 就 年 得 情 最 样 樣 理 能 說 说 这 這 通 | 即 吗 吧 听 呢 品 响 嗎 师 師 收 断 斷 明 眼 間 间 际 陈 限 除 陳 随 際 隨", "HORIZONTAL"),
("事 前 學 将 將 情 想 或 政 斯 新 样 樣 民 沒 没 然 特 现 現 球 第 經 谁 起 | 例 別 别 制 动 動 吗 嗎 增 指 明 朝 期 构 物 确 种 調 调 費 费 那 都 間 间", "HORIZONTAL | RIGHT"),
],
},
]
CJK_GROUP = ["HANI"]
INDIC_GROUP = ["LIMB", "ORYA", "SYLO", "TIBT"]
def generate() -> str:
buf = ""
buf += "// THIS FILE IS AUTOGENERATED.\n"
buf += "// Any changes to this file will be overwritten.\n"
buf += "// Use ../scripts/gen_autohint_scripts.py to regenerate.\n\n"
char_map = {}
buf += "#[rustfmt::skip]\n"
buf += "pub(super) const SCRIPT_CLASSES: &[ScriptClass] = &[\n"
# some scripts generate multiple styles so keep track of the style index
style_index = 0
for i, script in enumerate(SCRIPT_CLASSES):
std_chars = script["std_chars"]
blues = script["blues"]
tag = script["tag"]
group = "Default"
if tag in CJK_GROUP:
group = "Cjk"
elif tag in INDIC_GROUP:
group = "Indic"
unicode_tag = tag.lower().capitalize()
has_features = tag in SCRIPTS_WITH_FEATURES
buf += " ScriptClass {\n"
buf += " name: \"{}\",\n".format(script["name"])
buf += " group: ScriptGroup::{},\n".format(group)
buf += " tag: Tag::new(b\"{}\"),\n".format(unicode_tag)
buf += " hint_top_to_bottom: {},\n".format(str(script["hint_top_to_bottom"]).lower())
# standard characters
buf += " std_chars: \"{}\",\n".format(script["std_chars"])
# blue characters
buf += " blues: &["
if len(blues) != 0:
buf += "\n";
for blue in blues:
blue_zones = "BlueZones::NONE"
if blue[1] != "0":
zones = list("BlueZones::" + zone for zone in blue[1].split(" | "))
blue_zones = zones[0];
for flag in zones[1:]:
blue_zones += ".union(" + flag + ")"
buf += " (\"" + blue[0] + "\""
buf += ", {}),\n".format(blue_zones)
buf += " ],\n"
else:
buf += "],\n"
buf += " },\n"
if has_features:
style_index += len(STYLE_FEATURES)
bases = set()
# build a char -> (script_ix, is_non_base) map for all ranges
for char_range in script["base_ranges"]:
first = char_range[0]
last = char_range[1]
# inclusive range
for ch in range(first, last + 1):
# Note: FT has overlapping ranges but we choose to keep
# the first one to match behavior
if not ch in char_map:
char_map[ch] = (style_index, False)
bases.add(ch)
for char_range in script["non_base_ranges"]:
first = char_range[0]
last = char_range[1]
# inclusive range
for ch in range(first, last + 1):
if ch in bases:
char_map[ch] = (style_index, True) # True for non-base character
style_index += 1
buf += "];\n\n"
# Add some symbolic indices for each script so they can be
# referenced by ScriptClass::LATN for example
buf += "#[allow(unused)]"
buf += "impl ScriptClass {\n"
for i, script in enumerate(SCRIPT_CLASSES):
buf += " pub const {}: usize = {};\n".format(script["tag"], i)
buf += "}\n\n"
# Now run through scripts again and generate style classes
buf += "#[rustfmt::skip]\n"
buf += "pub(super) const STYLE_CLASSES: &[StyleClass] = &[\n"
style_class_tags = []
style_index = 0
for i, script in enumerate(SCRIPT_CLASSES):
tag = script["tag"]
has_features = tag in SCRIPTS_WITH_FEATURES
if has_features:
for feature in STYLE_FEATURES:
name = script["name"] + " " + feature["name"]
feature_tag = feature["tag"]
buf += " StyleClass {{ name: \"{}\", index: {}, script: &SCRIPT_CLASSES[{}], feature: Some(Tag::new(b\"{}\")) }},\n".format(name, style_index, i, feature_tag)
style_index += 1
style_class_tags.append(tag + "_" + feature_tag.upper())
name = script["name"]
buf += " StyleClass {{ name: \"{}\", index: {}, script: &SCRIPT_CLASSES[{}], feature: None }},\n".format(name, style_index, i)
style_index += 1
style_class_tags.append(tag)
buf += "];\n\n";
# Symbolic indices for style classes
buf += "#[allow(unused)]"
buf += "impl StyleClass {\n"
for (i, tag) in enumerate(style_class_tags):
buf += " pub const {}: usize = {};\n".format(tag, i)
buf += "}\n\n"
# build a sorted list from the map
char_list = []
for ch in char_map:
char_list.append((ch, char_map[ch]))
char_list.sort(key=lambda entry: entry[0])
# and merge into ranges
ranges = []
for entry in char_list:
ch = entry[0]
props = entry[1]
if len(ranges) != 0:
last = ranges[-1];
# we can merge if same props and this character extends the range
# by 1
if ch == last[1] + 1 and last[2] == props:
ranges[-1] = (last[0], ch, props)
continue
ranges.append((ch, ch, props))
# and finally output the ranges
buf += "#[rustfmt::skip]\n"
buf += "pub(super) const STYLE_RANGES: &[StyleRange] = &[\n"
for char_range in ranges:
first = char_range[0]
last = char_range[1]
props = char_range[2]
kind = "base_range"
if props[1]:
kind = "non_base_range"
buf += " {}({}, {}, {}),\n".format(kind, first, last, props[0])
buf += "];\n\n"
return buf
if __name__ == "__main__":
data = generate()
with open("../generated/generated_autohint_styles.rs", "w", encoding="utf-8") as f:
f.write(data)