Vendor dependencies for 0.3.0 release

2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions
--- a/vendor/rustybuzz/scripts/README.md
+++ b/vendor/rustybuzz/scripts/README.md
@@ -0,0 +1,8 @@
+## Usage
+
+```sh
+python3 gen-universal-table.py > ../src/hb/ot_shape_complex_use_table.rs
+
+python3 ./gen-vowel-constraints.py > ../src/complex/vowel_constraints.rs
+rustfmt ../src/complex/vowel_constraints.rs
+```
--- a/vendor/rustybuzz/scripts/gen-arabic-table.py
+++ b/vendor/rustybuzz/scripts/gen-arabic-table.py
@@ -0,0 +1,179 @@
+#!/usr/bin/env python3
+
+# Based on harfbuzz/src/gen-arabic-table.py
+
+import os
+import urllib.request
+
+DEPENDENCIES = [
+    "ArabicShaping.txt",
+    "UnicodeData.txt",
+    "Blocks.txt",
+]
+
+for dep in DEPENDENCIES:
+    if not os.path.exists(dep):
+        urllib.request.urlretrieve("https://unicode.org/Public/14.0.0/ucd/" + dep, dep)
+
+files = [open(x, encoding="utf-8") for x in DEPENDENCIES]
+
+headers = [
+    [files[0].readline(), files[0].readline()],
+    [files[2].readline(), files[2].readline()],
+    ["UnicodeData.txt does not have a header."],
+]
+while files[0].readline().find("##################") < 0:
+    pass
+
+blocks = {}
+
+
+def read_blocks(f):
+    global blocks
+    for line in f:
+        j = line.find("#")
+        if j >= 0:
+            line = line[:j]
+
+        fields = [x.strip() for x in line.split(";")]
+        if len(fields) == 1:
+            continue
+
+        uu = fields[0].split("..")
+        start = int(uu[0], 16)
+        if len(uu) == 1:
+            end = start
+        else:
+            end = int(uu[1], 16)
+
+        t = fields[1]
+
+        for u in range(start, end + 1):
+            blocks[u] = t
+
+
+def print_joining_table(f):
+    values = {}
+    for line in f:
+        if line[0] == "#":
+            continue
+
+        fields = [x.strip() for x in line.split(";")]
+        if len(fields) == 1:
+            continue
+
+        u = int(fields[0], 16)
+
+        if fields[3] in ["ALAPH", "DALATH RISH"]:
+            value = "JOINING_GROUP_" + fields[3].replace(" ", "_")
+        else:
+            value = "JOINING_TYPE_" + fields[2]
+        values[u] = value
+
+    short_value = {}
+    for value in sorted(set([v for v in values.values()] + ["JOINING_TYPE_X"])):
+        short = "".join(x[0] for x in value.split("_")[2:])
+        assert short not in short_value.values()
+
+        short_value[value] = short
+
+    uu = sorted(values.keys())
+    num = len(values)
+    all_blocks = set([blocks[u] for u in uu])
+
+    last = -100000
+    ranges = []
+    for u in uu:
+        if u - last <= 1 + 16 * 5:
+            ranges[-1][-1] = u
+        else:
+            ranges.append([u, u])
+        last = u
+
+    print("#[rustfmt::skip]")
+    print("pub const JOINING_TABLE: &[hb_arabic_joining_type_t] = &[")
+    last_block = None
+    offset = 0
+
+    join_offsets = []
+
+    for start, end in ranges:
+        join_offsets.append(
+            "const JOINING_OFFSET_0X%04X: usize = %d;" % (start, offset)
+        )
+
+        for u in range(start, end + 1):
+            block = blocks.get(u, last_block)
+            value = values.get(u, "JOINING_TYPE_X")
+
+            if block != last_block or u == start:
+                if u != start:
+                    print()
+                if block in all_blocks:
+                    print("\n  /* %s */" % block)
+                else:
+                    print("\n  /* FILLER */")
+                last_block = block
+                if u % 32 != 0:
+                    print()
+                    print("  /* %04X */" % (u // 32 * 32), "  " * (u % 32), end="")
+
+            if u % 32 == 0:
+                print()
+                print("  /* %04X */ " % u, end="")
+
+            val = short_value[value]
+
+            if val == "C":
+                val = "D"
+
+            print("%s," % val, end="")
+        print()
+
+        offset += end - start + 1
+    print("];")
+    print()
+
+    for offset in join_offsets:
+        print(offset)
+
+    page_bits = 12
+    print()
+    print("pub fn joining_type(u: char) -> hb_arabic_joining_type_t {")
+    print("    let u = u as u32;")
+    print("    match u >> %d {" % page_bits)
+    pages = set(
+        [u >> page_bits for u in [s for s, e in ranges] + [e for s, e in ranges]]
+    )
+    for p in sorted(pages):
+        print("        0x%0X => {" % p)
+        for start, end in ranges:
+            if p not in [start >> page_bits, end >> page_bits]:
+                continue
+            offset = "JOINING_OFFSET_0X%04X" % start
+            print("            if (0x%04X..=0x%04X).contains(&u) {" % (start, end))
+            print(
+                "                return JOINING_TABLE[u as usize - 0x%04X + %s]"
+                % (start, offset)
+            )
+            print("            }")
+        print("        }")
+    print("        _ => {}")
+    print("    }")
+    print()
+    print("    X")
+    print("}")
+    print()
+
+
+print("// WARNING: this file was generated by scripts/gen-arabic-table.py")
+print()
+print(
+    "use super::ot_shape_complex_arabic::hb_arabic_joining_type_t::{\n"
+    "    self, GroupAlaph as A, GroupDalathRish as DR, D, L, R, T, U, X,\n"
+    "};"
+)
+print()
+
+read_blocks(files[2])
+print_joining_table(files[0])
--- a/vendor/rustybuzz/scripts/gen-indic-table.py
+++ b/vendor/rustybuzz/scripts/gen-indic-table.py
@@ -0,0 +1,251 @@
+#!/usr/bin/env python3
+
+# Based on harfbuzz/src/gen-indic-table.py
+
+import io
+import os
+import urllib.request
+
+DEPENDENCIES = [
+    'IndicSyllabicCategory.txt',
+    'IndicPositionalCategory.txt',
+    'Blocks.txt',
+]
+
+for dep in DEPENDENCIES:
+    if not os.path.exists(dep):
+        urllib.request.urlretrieve('https://unicode.org/Public/14.0.0/ucd/' + dep, dep)
+
+ALLOWED_SINGLES = [0x00A0, 0x25CC]
+ALLOWED_BLOCKS = [
+    'Basic Latin',
+    'Latin-1 Supplement',
+    'Devanagari',
+    'Bengali',
+    'Gurmukhi',
+    'Gujarati',
+    'Oriya',
+    'Tamil',
+    'Telugu',
+    'Kannada',
+    'Malayalam',
+    'Sinhala',
+    'Myanmar',
+    'Khmer',
+    'Vedic Extensions',
+    'General Punctuation',
+    'Superscripts and Subscripts',
+    'Devanagari Extended',
+    'Myanmar Extended-B',
+    'Myanmar Extended-A',
+]
+
+files = [io.open(x, encoding='utf-8') for x in DEPENDENCIES]
+
+headers = [[f.readline() for i in range(2)] for f in files]
+
+data = [{} for f in files]
+values = [{} for f in files]
+for i, f in enumerate(files):
+    for line in f:
+        j = line.find('#')
+        if j >= 0:
+            line = line[:j]
+
+        fields = [x.strip() for x in line.split(';')]
+        if len(fields) == 1:
+            continue
+
+        uu = fields[0].split('..')
+        start = int(uu[0], 16)
+        if len(uu) == 1:
+            end = start
+        else:
+            end = int(uu[1], 16)
+
+        t = fields[1]
+
+        for u in range(start, end + 1):
+            data[i][u] = t
+        values[i][t] = values[i].get(t, 0) + end - start + 1
+
+# Merge data into one dict:
+defaults = ('Other', 'Not_Applicable', 'No_Block')
+for i, v in enumerate(defaults):
+    values[i][v] = values[i].get(v, 0) + 1
+
+combined = {}
+for i, d in enumerate(data):
+    for u, v in d.items():
+        if i == 2 and u not in combined:
+            continue
+        if u not in combined:
+            combined[u] = list(defaults)
+        combined[u][i] = v
+combined = {k: v for k, v in combined.items() if k in ALLOWED_SINGLES or v[2] in ALLOWED_BLOCKS}
+data = combined
+del combined
+num = len(data)
+
+# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out
+singles = {}
+for u in ALLOWED_SINGLES:
+    singles[u] = data[u]
+    del data[u]
+
+print('// WARNING: this file was generated by scripts/gen-indic-table.py')
+print()
+print('#![allow(non_camel_case_types)]')
+print('#![allow(unused_imports)]')
+print()
+print('use super::ot_shape_complex_indic::{MatraCategory, SyllabicCategory};')
+
+# Shorten values
+short = [{
+    'Bindu': 'Bi',
+    'Cantillation_Mark': 'Ca',
+    'Joiner': 'ZWJ',
+    'Non_Joiner': 'ZWNJ',
+    'Number': 'Nd',
+    'Visarga': 'Vs',
+    'Vowel': 'Vo',
+    'Vowel_Dependent': 'M',
+    'Consonant_Prefixed': 'CPrf',
+    'Other': 'x',
+}, {
+    'Not_Applicable': 'x',
+}]
+all_shorts = [{}, {}]
+
+# Add some of the values, to make them more readable, and to avoid duplicates
+
+for i in range(2):
+    for v, s in short[i].items():
+        all_shorts[i][s] = v
+
+what = ['SyllabicCategory', 'MatraCategory']
+what_short = ['ISC', 'IMC']
+cat_defs = []
+for i in range(2):
+    vv = sorted(values[i].keys())
+    for v in vv:
+        v_no_and = v.replace('_And_', '_')
+        if v in short[i]:
+            s = short[i][v]
+        else:
+            s = ''.join([c for c in v_no_and if ord('A') <= ord(c) <= ord('Z')])
+            if s in all_shorts[i]:
+                raise Exception('Duplicate short value alias', v, all_shorts[i][s])
+            all_shorts[i][s] = v
+            short[i][v] = s
+        cat_defs.append((what_short[i] + '_' + s, what[i] + '::' + v.replace('_', ''), str(values[i][v]), v))
+
+maxlen_s = max([len(c[0]) for c in cat_defs])
+maxlen_l = max([len(c[1]) for c in cat_defs])
+maxlen_n = max([len(c[2]) for c in cat_defs])
+for s in what_short:
+    print()
+    for c in [c for c in cat_defs if s in c[0]]:
+        print('use %s as %s;' % (c[1].ljust(maxlen_l), c[0]))
+print()
+print()
+
+total = 0
+used = 0
+last_block = None
+
+
+def print_block(block, start, end, data):
+    global total, used, last_block
+    if block and block != last_block:
+        print()
+        print()
+        print('  /* %s */' % block)
+    num = 0
+    assert start % 8 == 0
+    assert (end + 1) % 8 == 0
+    for u in range(start, end + 1):
+        if u % 8 == 0:
+            print()
+            print('  /* %04X */' % u, end='')
+        if u in data:
+            num += 1
+        d = data.get(u, defaults)
+        print('%16s' % ('(ISC_%s,IMC_%s),' % (short[0][d[0]], short[1][d[1]])), end='')
+
+    total += end - start + 1
+    used += num
+    if block:
+        last_block = block
+
+
+uu = sorted(data.keys())
+
+last = -100000
+num = 0
+offset = 0
+starts = []
+ends = []
+print('#[rustfmt::skip]')
+print('const TABLE: &[(SyllabicCategory, MatraCategory)] = &[')
+offsets = []
+for u in uu:
+    if u <= last:
+        continue
+    block = data[u][2]
+
+    start = u // 8 * 8
+    end = start + 1
+    while end in uu and block == data[end][2]:
+        end += 1
+    end = (end - 1) // 8 * 8 + 7
+
+    if start != last + 1:
+        if start - last <= 1 + 16 * 3:
+            print_block(None, last + 1, start - 1, data)
+            last = start - 1
+        else:
+            if last >= 0:
+                ends.append(last + 1)
+                offset += ends[-1] - starts[-1]
+            # print()
+            # print()
+            offsets.append('const OFFSET_0X%04X: usize = %d;' % (start, offset))
+            starts.append(start)
+
+    print_block(block, start, end, data)
+    last = end
+ends.append(last + 1)
+offset += ends[-1] - starts[-1]
+print()
+print()
+occupancy = used * 100. / total
+page_bits = 12
+print('];')
+print()
+for o in offsets:
+    print(o)
+print()
+print('#[rustfmt::skip]')
+print('pub fn get_categories(u: u32) -> (SyllabicCategory, MatraCategory) {')
+print('    match u >> %d {' % page_bits)
+pages = set([u >> page_bits for u in starts + ends + list(singles.keys())])
+for p in sorted(pages):
+    print('        0x%0X => {' % p)
+    for u, d in singles.items():
+        if p != u >> page_bits: continue
+        print('            if u == 0x%04X { return (ISC_%s, IMC_%s); }' % (u, short[0][d[0]], short[1][d[1]]))
+    for (start, end) in zip(starts, ends):
+        if p not in [start >> page_bits, end >> page_bits]: continue
+        offset = 'OFFSET_0X%04X' % start
+        print('            if (0x%04X..=0x%04X).contains(&u) { return TABLE[u as usize - 0x%04X + %s]; }' % (start, end - 1, start, offset))
+    print('        }')
+print('        _ => {}')
+print('    }')
+print()
+print('    (ISC_x, IMC_x)')
+print('}')
+
+# Maintain at least 30% occupancy in the table */
+if occupancy < 30:
+    raise Exception('Table too sparse, please investigate: ', occupancy)
--- a/vendor/rustybuzz/scripts/gen-shaping-tests.py
+++ b/vendor/rustybuzz/scripts/gen-shaping-tests.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python3
+
+import os
+import shutil
+import sys
+import subprocess
+from pathlib import Path
+
+
+# There is no sane way to test them.
+IGNORE_TESTS = [
+    'macos.tests',
+    'coretext.tests',
+    'directwrite.tests',
+    'uniscribe.tests',
+]
+
+IGNORE_TEST_CASES = [
+    # aots tests
+
+    # in-house tests
+    # --shaper=fallback is not supported.
+    'simple_002',
+    # Not possible to implement without shaping.
+    'arabic_fallback_shaping_001',
+    # `dfont` is not supported.
+    'collections_001',
+    'collections_002',
+    'collections_003',
+    # Face index out of bounds. ttf-parser doesn't permit this.
+    'collections_006',
+    # no `hhea` table.
+    'indic_decompose_001',
+    # ttf-parser doesn't support phantom points
+    'variations_003',
+    # Resource exhaustion tests with large outputs
+    'morx_34_001',
+    'morx_36_001',
+    # ttf-parser uses different rounding, not a bug
+    'fallback_positioning_001',
+]
+
+
+def update_relative_path(tests_name, fontfile):
+    fontfile = fontfile.replace('../fonts/', '')
+    return f'tests/fonts/{tests_name}/{fontfile}'  # relative to the root dir
+
+
+# Converts `U+0041,U+0078` or `0041,0078` into `\u{0041}\u{0078}`
+def convert_unicodes(unicodes):
+    text = ''
+    for (i, u) in enumerate(unicodes.split(',')):
+        if i > 0 and i % 10 == 0:
+            text += '\\\n             '
+
+        if u.startswith("U+"):
+            u = u[2:]
+
+        text += f'\\u{{{u}}}'
+
+    return text
+
+
+def convert_test(hb_dir, hb_shape_exe, tests_name, file_name, idx, data, fonts):
+    if file_name == 'emoji-clusters.tests':
+        return ''  # There are a lot of these; let's skip them
+
+    fontfile, options, unicodes, glyphs_expected = data.split(';')
+
+    fontfile_rs = update_relative_path(tests_name, fontfile)
+
+    unicodes_rs = convert_unicodes(unicodes)
+
+    test_name = file_name.replace(
+        '.tests', '').replace('-', '_') + f'_{idx:03d}'
+    test_name = test_name.lower()
+
+    options = options.replace('--shaper=ot', '')
+    options = options.replace(
+        ' --font-funcs=ft', '').replace('--font-funcs=ft', '')
+    options = options.replace(
+        ' --font-funcs=ot', '').replace('--font-funcs=ot', '')
+    # we don't support font scaling
+    options = options.replace('--font-size=1000', '')
+    options = options.strip()
+
+    # We have to actually run hb-shape instead of using predefined results,
+    # because hb sometimes stores results for freetype and not for embedded OpenType
+    # engine, which we are using.
+    # Right now, it only affects 'text-rendering-tests'.
+    if len(options) != 0:
+        options_list = options.split(' ')
+    else:
+        options_list = []
+
+    options_list.insert(0, str(hb_shape_exe))
+
+    # Force OT functions, since this is the only one we support in rustybuzz.
+    options_list.append('--font-funcs=ot')
+
+    abs_font_path = hb_dir.joinpath('test/shape/data')\
+        .joinpath(tests_name)\
+        .joinpath('tests') \
+        .joinpath(fontfile)
+
+    options_list.append(str(abs_font_path))
+    options_list.append(f'--unicodes={unicodes}')  # no need to escape it
+
+    glyphs_expected = subprocess.run(options_list, check=True, stdout=subprocess.PIPE)\
+        .stdout.decode()
+
+    glyphs_expected = glyphs_expected.strip()[1:-1]  # remove leading and trailing whitespaces and `[..]`
+    glyphs_expected = glyphs_expected.replace('|', '|\\\n         ')
+
+    options = options.replace('"', '\\"')
+    options = options.replace(' --single-par', '')
+
+    fonts.add(os.path.split(fontfile_rs)[1])
+
+    if test_name in IGNORE_TEST_CASES:
+        return ''
+
+    return (f'#[test]\n'
+            f'fn {test_name}() {{\n'
+            f'    assert_eq!(\n'
+            f'        shape(\n'
+            f'            "{fontfile_rs}",\n'
+            f'            "{unicodes_rs}",\n'
+            f'            "{options}",\n'
+            f'        ),\n'
+            f'        "{glyphs_expected}"\n'
+            f'    );\n'
+            f'}}\n'
+            '\n')
+
+
+def convert(hb_dir, hb_shape_exe, tests_dir, tests_name):
+    files = sorted(os.listdir(tests_dir))
+    files = [f for f in files if f.endswith('.tests')]
+
+    fonts = set()
+
+    rust_code = ('// WARNING: this file was generated by ../scripts/gen-shaping-tests.py\n'
+                 '\n'
+                 'use crate::shape;\n'
+                 '\n')
+
+    for file in files:
+        if file in IGNORE_TESTS:
+            continue
+
+        with open(tests_dir / file, 'r') as f:
+            for idx, test in enumerate(f.read().splitlines()):
+                # skip comments and empty lines
+                if test.startswith('#') or len(test) == 0:
+                    continue
+
+                rust_code += convert_test(hb_dir, hb_shape_exe, tests_name,
+                                          file, idx + 1, test, fonts)
+
+    tests_name_snake_case = tests_name.replace('-', '_')
+    with open(f'../tests/shaping/{tests_name_snake_case}.rs', 'w') as f:
+        f.write(rust_code)
+
+    return fonts
+
+
+if len(sys.argv) != 2:
+    print('Usage: gen-shaping-tests.py /path/to/harfbuzz-src')
+    exit(1)
+
+hb_dir = Path(sys.argv[1])
+assert hb_dir.exists()
+
+# Check that harfbuzz was built.
+hb_shape_exe = hb_dir.joinpath('builddir/util/hb-shape')
+if not hb_shape_exe.exists():
+    print('Build harfbuzz first using:')
+    print('    meson builddir')
+    print('    ninja -Cbuilddir')
+    exit(1)
+
+used_fonts = []
+font_files = []
+test_dir_names = ['aots', 'in-house', 'text-rendering-tests']
+for test_dir_name in test_dir_names:
+    tests_dir = hb_dir / f'test/shape/data/{test_dir_name}/tests'
+
+    dir_used_fonts = convert(hb_dir, hb_shape_exe, tests_dir, test_dir_name)
+    for filename in dir_used_fonts:
+        shutil.copy(
+            hb_dir / f'test/shape/data/{test_dir_name}/fonts/{filename}',
+            f'../tests/fonts/{test_dir_name}')
+    used_fonts += dir_used_fonts
+
+    font_files += os.listdir(hb_dir /
+                             f'test/shape/data/{test_dir_name}/fonts')
+
+# Check for unused fonts. Just for debugging.
+# unused_fonts = sorted(list(set(font_files).difference(used_fonts)))
+# if len(unused_fonts) != 0:
+#     print('Unused fonts:')
+#     for font in unused_fonts:
+#         print(font)
--- a/vendor/rustybuzz/scripts/gen-tag-table.py
+++ b/vendor/rustybuzz/scripts/gen-tag-table.py
--- a/vendor/rustybuzz/scripts/gen-unicode-is-emoji-ext-pict.py
+++ b/vendor/rustybuzz/scripts/gen-unicode-is-emoji-ext-pict.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+
+import urllib.request
+import os
+
+URL = 'https://www.unicode.org/Public/emoji/12.1/emoji-data.txt'
+FILE_NAME = 'emoji-data.txt'
+
+if not os.path.exists(FILE_NAME):
+    urllib.request.urlretrieve(URL, FILE_NAME)
+
+is_ext_pict_section = False
+ranges = []
+with open(FILE_NAME) as f:
+    for line in f:
+        line = line.strip()
+
+        if not is_ext_pict_section:
+            if line == '# All omitted code points have Extended_Pictographic=No':
+                is_ext_pict_section = True
+            continue
+
+        if not line:
+            continue
+
+        if line.startswith('#'):
+            continue
+
+        range, _ = line.split(' ; ')
+        range = range.strip()
+
+        if '..' in range:
+            start, end = range.split('..')
+            ranges.append([start, end])
+        else:
+            ranges.append([range, range])
+
+# Merge ranges.
+idx = 0
+while idx < len(ranges)-1:
+    if int(ranges[idx][1], 16) + 1 == int(ranges[idx+1][0], 16):
+        ranges[idx][1] = ranges[idx+1][1]
+        del ranges[idx+1]
+    else:
+        idx += 1;
+
+for range in ranges:
+    if range[0] == range[1]:
+        print('0x{} => true,'.format(range[0], range[1]))
+    else:
+        print('0x{}..=0x{} => true,'.format(range[0], range[1]))
--- a/vendor/rustybuzz/scripts/gen-unicode-norm-table.py
+++ b/vendor/rustybuzz/scripts/gen-unicode-norm-table.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+
+import urllib.request
+import os
+
+URL = 'https://www.unicode.org/Public/14.0.0/ucd/UnicodeData.txt'
+FILE_NAME = 'UnicodeData.txt'
+
+
+def hex_to_char_rs(c):
+    return f"'\\u{{{c}}}'"
+
+
+if not os.path.exists(FILE_NAME):
+    urllib.request.urlretrieve(URL, FILE_NAME)
+
+
+print('// WARNING: this file was generated by ../scripts/gen-unicode-norm-table.py')
+print()
+print('//! This module provides Unicode tables for canonical (de)composition.')
+print('//!')
+print('//! The current implementation is not the fastest one. Just good enough.')
+print()
+print('#[allow(dead_code)]')
+print('pub const UNICODE_VERSION: (u8, u8, u8) = (14, 0, 0);')
+print()
+print('// Rust support `Option<char>` layout optimization, so it will take only 4 bytes.')
+print('pub const DECOMPOSITION_TABLE: &[(char, char, Option<char>)] = &[')
+
+compose_data = []
+with open(FILE_NAME) as f:
+    for line in f:
+        parts = line.split(';')
+        if len(parts[5]) == 0:
+            continue
+
+        # Skip codepoints with compatibility formatting tags
+        # since we care only about canonical mapping.
+        if parts[5][0] == '<':
+            continue
+
+        # Print the decomposition table as is, since `UnicodeData` is already sorted.
+
+        c = parts[0]
+        mapping = parts[5].split(' ')
+        if len(mapping) == 2:
+            print(f"    ({hex_to_char_rs(c)}, {hex_to_char_rs(mapping[0])}, Some({hex_to_char_rs(mapping[1])})),")
+
+            # Remember only codepoints that should be decomposed into two codepoints.
+            compose_data.append([mapping[0], mapping[1], c])
+        elif len(mapping) == 1:
+            print(f'    ({hex_to_char_rs(c)}, {hex_to_char_rs(mapping[0])}, None),')
+        else:
+            raise 'invalid unicode data'
+
+print('];')
+print()
+
+
+print('// The first value is `a << 32 | b`.')
+print('// Sorted by the first value.')
+print('pub const COMPOSITION_TABLE: &[(u64, char)] = &[')
+
+pairs = []
+for mapping in compose_data:
+    needle = int(mapping[0], 16) << 32 | int(mapping[1], 16)
+    pairs.append((needle, mapping[2]))
+
+pairs.sort(key=lambda x: x[0])
+
+# Make sure that needles are unique.
+needles = set()
+for pair in pairs:
+    needles.add(pair[0])
+
+assert len(pairs) == len(needles)
+
+for pair in pairs:
+    print(f'    ({pair[0]}, {hex_to_char_rs(pair[1])}),')
+
+print('];')
--- a/vendor/rustybuzz/scripts/gen-universal-table.py
+++ b/vendor/rustybuzz/scripts/gen-universal-table.py
@@ -0,0 +1,584 @@
+#!/usr/bin/env python3
+
+# Based on harfbuzz/src/gen-use-table.py
+
+import io
+import os
+import urllib.request
+
+DISABLED_SCRIPTS = {
+    'Arabic',
+    'Lao',
+    'Samaritan',
+    'Syriac',
+    'Thai',
+}
+
+files = ['IndicSyllabicCategory.txt', 'IndicPositionalCategory.txt', 'ArabicShaping.txt',
+         'DerivedCoreProperties.txt', 'UnicodeData.txt',  'Blocks.txt', 'Scripts.txt',
+         'ms-use/IndicSyllabicCategory-Additional.txt', 'ms-use/IndicPositionalCategory-Additional.txt']
+for f in files:
+    if not os.path.exists(f):
+        urllib.request.urlretrieve(
+            'https://unicode.org/Public/14.0.0/ucd/' + f, f)
+
+files = [io.open(x, encoding='utf-8') for x in files]
+
+headers = [[f.readline() for i in range(2)]
+           for j, f in enumerate(files) if j != 2]
+for j in range(7, 9):
+    for line in files[j]:
+        line = line.rstrip()
+        if not line:
+            break
+        headers[j - 1].append(line)
+headers.append(["UnicodeData.txt does not have a header."])
+
+unicode_data = [{} for _ in files]
+values = [{} for _ in files]
+for i, f in enumerate(files):
+    for line in f:
+
+        j = line.find('#')
+        if j >= 0:
+            line = line[:j]
+
+        fields = [x.strip() for x in line.split(';')]
+        if len(fields) == 1:
+            continue
+
+        uu = fields[0].split('..')
+        start = int(uu[0], 16)
+        if len(uu) == 1:
+            end = start
+        else:
+            end = int(uu[1], 16)
+
+        t = fields[1 if i not in [2, 4] else 2]
+
+        if i == 2:
+            t = 'jt_' + t
+        elif i == 3 and t != 'Default_Ignorable_Code_Point':
+            continue
+        elif i == 7 and t == 'Consonant_Final_Modifier':
+            # TODO: https://github.com/MicrosoftDocs/typography-issues/issues/336
+            t = 'Syllable_Modifier'
+        elif i == 8 and t == 'NA':
+            t = 'Not_Applicable'
+
+        i0 = i if i < 7 else i - 7
+        for u in range(start, end + 1):
+            unicode_data[i0][u] = t
+        values[i0][t] = values[i0].get(t, 0) + end - start + 1
+
+defaults = ('Other', 'Not_Applicable', 'jt_X', '', 'Cn', 'No_Block', 'Unknown')
+
+# Merge data into one dict:
+for i,v in enumerate (defaults):
+    values[i][v] = values[i].get (v, 0) + 1
+combined = {}
+for i,d in enumerate (unicode_data):
+    for u,v in d.items ():
+        if not u in combined:
+            if i >= 4:
+                continue
+            combined[u] = list (defaults)
+        combined[u][i] = v
+combined = {k: v for k, v in combined.items() if v[6] not in DISABLED_SCRIPTS}
+
+
+property_names = [
+    # General_Category
+    'Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu', 'Mc',
+    'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po',
+    'Ps', 'Sc', 'Sk', 'Sm', 'So', 'Zl', 'Zp', 'Zs',
+    # Indic_Syllabic_Category
+    'Other',
+    'Bindu',
+    'Visarga',
+    'Avagraha',
+    'Nukta',
+    'Virama',
+    'Pure_Killer',
+    'Invisible_Stacker',
+    'Vowel_Independent',
+    'Vowel_Dependent',
+    'Vowel',
+    'Consonant_Placeholder',
+    'Consonant',
+    'Consonant_Dead',
+    'Consonant_With_Stacker',
+    'Consonant_Prefixed',
+    'Consonant_Preceding_Repha',
+    'Consonant_Succeeding_Repha',
+    'Consonant_Subjoined',
+    'Consonant_Medial',
+    'Consonant_Final',
+    'Consonant_Head_Letter',
+    'Consonant_Initial_Postfixed',
+    'Modifying_Letter',
+    'Tone_Letter',
+    'Tone_Mark',
+    'Gemination_Mark',
+    'Cantillation_Mark',
+    'Register_Shifter',
+    'Syllable_Modifier',
+    'Consonant_Killer',
+    'Non_Joiner',
+    'Joiner',
+    'Number_Joiner',
+    'Number',
+    'Brahmi_Joining_Number',
+    'Hieroglyph',
+    'Hieroglyph_Joiner',
+    'Hieroglyph_Segment_Begin',
+    'Hieroglyph_Segment_End',
+    # Indic_Positional_Category
+    'Not_Applicable',
+    'Right',
+    'Left',
+    'Visual_Order_Left',
+    'Left_And_Right',
+    'Top',
+    'Bottom',
+    'Top_And_Bottom',
+    'Top_And_Bottom_And_Left',
+    'Top_And_Right',
+    'Top_And_Left',
+    'Top_And_Left_And_Right',
+    'Bottom_And_Left',
+    'Bottom_And_Right',
+    'Top_And_Bottom_And_Right',
+    'Overstruck',
+    # Joining_Type
+    'jt_C',
+    'jt_D',
+    'jt_L',
+    'jt_R',
+    'jt_T',
+    'jt_U',
+    'jt_X',
+]
+
+class PropertyValue(object):
+    def __init__(self, name_):
+        self.name = name_
+
+    def __str__(self):
+        return self.name
+
+    def __eq__(self, other):
+        return self.name == (other if isinstance(other, str) else other.name)
+
+    def __ne__(self, other):
+        return not (self == other)
+
+    def __hash__(self):
+        return hash(str(self))
+
+
+property_values = {}
+
+for name in property_names:
+    value = PropertyValue(name)
+    assert value not in property_values
+    assert value not in globals()
+    property_values[name] = value
+globals().update(property_values)
+
+
+def is_BASE(U, UISC, UDI, UGC, AJT):
+    return (UISC in [Number, Consonant, Consonant_Head_Letter,
+                     Tone_Letter,
+                     Vowel_Independent,
+                     ] or
+            # TODO: https://github.com/MicrosoftDocs/typography-issues/issues/484
+            AJT in [jt_C, jt_D, jt_L, jt_R] and UISC != Joiner or
+            (UGC == Lo and UISC in [Avagraha, Bindu, Consonant_Final, Consonant_Medial,
+                                    Consonant_Subjoined, Vowel, Vowel_Dependent]))
+
+
+def is_BASE_NUM(U, UISC, UDI, UGC, AJT):
+    return UISC == Brahmi_Joining_Number
+
+
+def is_BASE_OTHER(U, UISC, UDI, UGC, AJT):
+    if UISC == Consonant_Placeholder:
+        return True
+    return U in [0x2015, 0x2022, 0x25FB, 0x25FC, 0x25FD, 0x25FE]
+
+
+def is_CGJ(U, UISC, UDI, UGC, AJT):
+    # Also includes VARIATION_SELECTOR and ZWJ
+    return UISC == Joiner or UDI and UGC in [Mc, Me, Mn]
+
+
+def is_CONS_FINAL(U, UISC, UDI, UGC, AJT):
+    return ((UISC == Consonant_Final and UGC != Lo) or
+            UISC == Consonant_Succeeding_Repha)
+
+
+def is_CONS_FINAL_MOD(U, UISC, UDI, UGC, AJT):
+    return UISC == Syllable_Modifier
+
+
+def is_CONS_MED(U, UISC, UDI, UGC, AJT):
+    # Consonant_Initial_Postfixed is new in Unicode 11; not in the spec.
+    return (UISC == Consonant_Medial and UGC != Lo or
+            UISC == Consonant_Initial_Postfixed)
+
+
+def is_CONS_MOD(U, UISC, UDI, UGC, AJT):
+    return (UISC in [Nukta, Gemination_Mark, Consonant_Killer] and
+            not is_SYM_MOD(U, UISC, UDI, UGC, AJT))
+
+
+def is_CONS_SUB(U, UISC, UDI, UGC, AJT):
+    return UISC == Consonant_Subjoined and UGC != Lo
+
+
+def is_CONS_WITH_STACKER(U, UISC, UDI, UGC, AJT):
+    return UISC == Consonant_With_Stacker
+
+
+def is_HALANT(U, UISC, UDI, UGC, AJT):
+    return UISC == Virama
+
+
+def is_HALANT_NUM(U, UISC, UDI, UGC, AJT):
+    return UISC == Number_Joiner
+
+
+def is_HIEROGLYPH(U, UISC, UDI, UGC, AJT):
+    return UISC == Hieroglyph
+
+
+def is_HIEROGLYPH_JOINER(U, UISC, UDI, UGC, AJT):
+    return UISC == Hieroglyph_Joiner
+
+
+def is_HIEROGLYPH_SEGMENT_BEGIN(U, UISC, UDI, UGC, AJT):
+    return UISC == Hieroglyph_Segment_Begin
+
+
+def is_HIEROGLYPH_SEGMENT_END(U, UISC, UDI, UGC, AJT):
+    return UISC == Hieroglyph_Segment_End
+
+
+def is_INVISIBLE_STACKER(U, UISC, UDI, UGC, AJT):
+    # Split off of HALANT
+    return (UISC == Invisible_Stacker
+            and not is_SAKOT(U, UISC, UDI, UGC, AJT)
+            )
+
+
+def is_ZWNJ(U, UISC, UDI, UGC, AJT):
+    return UISC == Non_Joiner
+
+
+def is_OTHER(U, UISC, UDI, UGC, AJT):
+    # Also includes BASE_IND, and SYM
+    return ((UGC == Po or UISC in [Consonant_Dead, Joiner, Modifying_Letter, Other])
+            and not is_BASE(U, UISC, UDI, UGC, AJT)
+            and not is_BASE_OTHER(U, UISC, UDI, UGC, AJT)
+            and not is_CGJ(U, UISC, UDI, UGC, AJT)
+            and not is_SYM_MOD(U, UISC, UDI, UGC, AJT)
+            and not is_Word_Joiner(U, UISC, UDI, UGC, AJT)
+            )
+
+
+def is_REPHA(U, UISC, UDI, UGC, AJT):
+    return UISC in [Consonant_Preceding_Repha, Consonant_Prefixed]
+
+
+def is_SAKOT(U, UISC, UDI, UGC, AJT):
+    # Split off of HALANT
+    return U == 0x1A60
+
+
+def is_SYM_MOD(U, UISC, UDI, UGC, AJT):
+    return U in [0x1B6B, 0x1B6C, 0x1B6D, 0x1B6E, 0x1B6F, 0x1B70, 0x1B71, 0x1B72, 0x1B73]
+
+
+def is_VOWEL(U, UISC, UDI, UGC, AJT):
+    return (UISC == Pure_Killer or
+            UGC != Lo and UISC in [Vowel, Vowel_Dependent])
+
+
+def is_VOWEL_MOD(U, UISC, UDI, UGC, AJT):
+    return (UISC in [Tone_Mark, Cantillation_Mark, Register_Shifter, Visarga] or
+            UGC != Lo and UISC == Bindu)
+
+def is_Word_Joiner(U, UISC, UDI, UGC, AJT):
+    # Also includes Rsv
+    return (UDI and U not in [0x115F, 0x1160, 0x3164, 0xFFA0, 0x1BCA0, 0x1BCA1, 0x1BCA2, 0x1BCA3]
+            and UISC == Other
+            and not is_CGJ(U, UISC, UDI, UGC, AJT)
+            ) or UGC == Cn
+
+use_mapping = {
+    'B': is_BASE,
+    'N': is_BASE_NUM,
+    'GB': is_BASE_OTHER,
+    'CGJ': is_CGJ,
+    'F': is_CONS_FINAL,
+    'FM': is_CONS_FINAL_MOD,
+    'M': is_CONS_MED,
+    'CM': is_CONS_MOD,
+    'SUB': is_CONS_SUB,
+    'CS': is_CONS_WITH_STACKER,
+    'H': is_HALANT,
+    'HN': is_HALANT_NUM,
+    'IS': is_INVISIBLE_STACKER,
+    'G': is_HIEROGLYPH,
+    'J': is_HIEROGLYPH_JOINER,
+    'SB': is_HIEROGLYPH_SEGMENT_BEGIN,
+    'SE': is_HIEROGLYPH_SEGMENT_END,
+    'ZWNJ': is_ZWNJ,
+    'O': is_OTHER,
+    'R': is_REPHA,
+    'SK': is_SAKOT,
+    'SM': is_SYM_MOD,
+    'V': is_VOWEL,
+    'VM': is_VOWEL_MOD,
+    'WJ': is_Word_Joiner,
+}
+
+use_positions = {
+    'F': {
+        'ABV': [Top],
+        'BLW': [Bottom],
+        'PST': [Right],
+    },
+    'M': {
+        'ABV': [Top],
+        'BLW': [Bottom, Bottom_And_Left, Bottom_And_Right],
+        'PST': [Right],
+        'PRE': [Left, Top_And_Bottom_And_Left],
+    },
+    'CM': {
+        'ABV': [Top],
+        'BLW': [Bottom, Overstruck],
+    },
+    'V': {
+        'ABV': [Top, Top_And_Bottom, Top_And_Bottom_And_Right, Top_And_Right],
+        'BLW': [Bottom, Overstruck, Bottom_And_Right],
+        'PST': [Right],
+        'PRE': [Left, Top_And_Left, Top_And_Left_And_Right, Left_And_Right],
+    },
+    'VM': {
+        'ABV': [Top],
+        'BLW': [Bottom, Overstruck],
+        'PST': [Right],
+        'PRE': [Left],
+    },
+    'SM': {
+        'ABV': [Top],
+        'BLW': [Bottom],
+    },
+    'H': None,
+    'IS': None,
+    'B': None,
+    'FM': {
+        'ABV': [Top],
+        'BLW': [Bottom],
+        'PST': [Not_Applicable],
+    },
+    'R': None,
+    'SUB': None,
+}
+
+
+def map_to_use(data):
+    out = {}
+    items = use_mapping.items()
+    for U, (UISC, UIPC, AJT, UDI, UGC, UBlock, _) in data.items():
+
+        # Resolve Indic_Syllabic_Category
+
+        # TODO: These don't have UISC assigned in Unicode 13.0.0, but have UIPC
+        if 0x1CE2 <= U <= 0x1CE8:
+            UISC = Cantillation_Mark
+
+        # Tibetan:
+        # TODO: These don't have UISC assigned in Unicode 13.0.0, but have UIPC
+        if 0x0F18 <= U <= 0x0F19 or 0x0F3E <= U <= 0x0F3F:
+            UISC = Vowel_Dependent
+
+        # TODO: https://github.com/harfbuzz/harfbuzz/pull/627
+        if 0x1BF2 <= U <= 0x1BF3:
+            UISC = Nukta
+            UIPC = Bottom
+
+        # TODO: U+1CED should only be allowed after some of
+        # the nasalization marks, maybe only for U+1CE9..U+1CF1.
+        if U == 0x1CED:
+            UISC = Tone_Mark
+
+        values = [k for k,v in items if v(U, UISC, UDI, UGC, AJT)]
+        assert len(values) == 1, "%s %s %s %s %s %s" % (
+            hex(U), UISC, UDI, UGC, AJT, values)
+        USE = values[0]
+
+        # Resolve Indic_Positional_Category
+
+        # TODO: These should die, but have UIPC in Unicode 13.0.0
+        if U in [0x953, 0x954]:
+            UIPC = Not_Applicable
+
+        # TODO: These are not in USE's override list that we have, nor are they in Unicode 13.0.0
+        if 0xA926 <= U <= 0xA92A:
+            UIPC = Top
+
+        # TODO: https://github.com/harfbuzz/harfbuzz/pull/1037
+        #  and https://github.com/harfbuzz/harfbuzz/issues/1631
+        if U in [0x11302, 0x11303, 0x114C1]:
+            UIPC = Top
+        if 0x1CF8 <= U <= 0x1CF9:
+            UIPC = Top
+
+        # TODO: https://github.com/harfbuzz/harfbuzz/issues/3550
+        if U == 0x10A38: UIPC = Bottom
+
+        # TODO: https://github.com/harfbuzz/harfbuzz/pull/982
+        # also  https://github.com/harfbuzz/harfbuzz/issues/1012
+        if 0x1112A <= U <= 0x1112B:
+            UIPC = Top
+        if 0x11131 <= U <= 0x11132:
+            UIPC = Top
+
+        assert (UIPC in [Not_Applicable, Visual_Order_Left] or U == 0x0F7F or
+                USE in use_positions), "%s %s %s %s %s %s %s" % (hex(U), UIPC, USE, UISC, UDI, UGC, AJT)
+
+        pos_mapping = use_positions.get(USE, None)
+        if pos_mapping:
+            values = [k for k,v in pos_mapping.items() if v and UIPC in v]
+            assert len(values) == 1, "%s %s %s %s %s %s %s %s" % (
+                hex(U), UIPC, USE, UISC, UDI, UGC, AJT, values)
+            USE = USE + values[0]
+
+        out[U] = (USE, UBlock)
+    return out
+
+
+use_data = map_to_use(combined)
+
+print('// WARNING: this file was generated by scripts/gen-universal-table.py')
+print()
+print('use super::hb_glyph_info_t;')
+print('use super::ot_shape_complex_use::{category::*, Category};')
+print('use unicode_properties::GeneralCategory;')
+
+total = 0
+used = 0
+last_block = None
+
+
+def print_block(block, start, end, use_data):
+    global total, used, last_block
+    if block and block != last_block:
+        print()
+        print()
+        print('  /* %s */' % block)
+        if start % 16:
+            print(' ' * (20 + (start % 16 * 6)), end='')
+    num = 0
+    assert start % 8 == 0
+    assert (end + 1) % 8 == 0
+    for u in range(start, end + 1):
+        if u % 16 == 0:
+            print()
+            print('  /* %04X */' % u, end='')
+        if u in use_data:
+            num += 1
+        d = use_data.get(u)
+        if d is not None:
+            d = d[0]
+        elif u in unicode_data[4]:
+            d = 'O'
+        else:
+            d = 'WJ'
+        print("%6s," % d, end='')
+
+    total += end - start + 1
+    used += num
+    if block:
+        last_block = block
+
+
+uu = sorted(use_data.keys())
+
+last = -100000
+num = 0
+offset = 0
+starts = []
+ends = []
+print()
+print('#[rustfmt::skip]')
+print('const USE_TABLE: &[Category] = &[')
+offsets = []
+for u in uu:
+    if u <= last:
+        continue
+    if use_data[u][0] == 'O':
+        continue
+    block = use_data[u][1]
+
+    start = u // 8 * 8
+    end = start + 1
+    while end in uu and block == use_data[end][1]:
+        end += 1
+    end = (end - 1) // 8 * 8 + 7
+
+    if start != last + 1:
+        if start - last <= 1 + 16 * 3:
+            print_block(None, last + 1, start - 1, use_data)
+            last = start - 1
+        else:
+            if last >= 0:
+                ends.append(last + 1)
+                offset += ends[-1] - starts[-1]
+            offsets.append('const USE_OFFSET_0X%04X: usize = %d;' %
+                           (start, offset))
+            starts.append(start)
+
+    print_block(block, start, end, use_data)
+    last = end
+ends.append(last + 1)
+offset += ends[-1] - starts[-1]
+print()
+print()
+occupancy = used * 100. / total
+page_bits = 12
+print('];')
+print()
+for o in offsets:
+    print(o)
+print()
+print('#[rustfmt::skip]')
+print('pub fn get_category(info: &hb_glyph_info_t) -> Category {')
+print('    let u = info.glyph_id;')
+print('    match u >> %d {' % page_bits)
+pages = set([u >> page_bits for u in starts + ends])
+for p in sorted(pages):
+    print('        0x%0X => {' % p)
+    for (start, end) in zip(starts, ends):
+        if p not in [start >> page_bits, end >> page_bits]:
+            continue
+        offset = 'USE_OFFSET_0X%04X' % start
+        print('            if (0x%04X..=0x%04X).contains(&u) { return USE_TABLE[u as usize - 0x%04X + %s]; }' % (
+              start, end - 1, start, offset))
+    print('        }')
+print('        _ => {}')
+print('    }')
+print()
+print('    if crate::hb::ot_layout::_hb_glyph_info_get_general_category(info) == GeneralCategory::Unassigned {')
+print('        return WJ;')
+print('    }')
+print()
+print('    O')
+print('}')
+
+# Maintain at least 50% occupancy in the table */
+if occupancy < 50:
+    raise Exception('Table too sparse, please investigate: ', occupancy)
--- a/vendor/rustybuzz/scripts/gen-vowel-constraints.py
+++ b/vendor/rustybuzz/scripts/gen-vowel-constraints.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python3
+
+"""
+Generator of the function to prohibit certain vowel sequences.
+
+It creates ``preprocess_text_vowel_constraints``, which inserts dotted
+circles into sequences prohibited by the USE script development spec.
+
+Based on harfbuzz/src/gen-vowel-constraints.py
+"""
+
+import collections
+import io
+import os
+import urllib.request
+
+if not os.path.exists('Scripts.txt'):
+    urllib.request.urlretrieve('https://unicode.org/Public/14.0.0/ucd/Scripts.txt', 'Scripts.txt')
+
+with io.open('Scripts.txt', encoding='utf-8') as f:
+    scripts_header = [f.readline() for i in range(2)]
+    scripts = {}
+    script_order = {}
+    for line in f:
+        j = line.find('#')
+        if j >= 0:
+            line = line[:j]
+        fields = [x.strip() for x in line.split(';')]
+        if len(fields) == 1:
+            continue
+        uu = fields[0].split('..')
+        start = int(uu[0], 16)
+        if len(uu) == 1:
+            end = start
+        else:
+            end = int(uu[1], 16)
+        script = fields[1]
+        for u in range(start, end + 1):
+            scripts[u] = script
+        if script not in script_order:
+            script_order[script] = start
+
+
+class ConstraintSet(object):
+    """A set of prohibited code point sequences.
+
+    Args:
+        constraint (List[int]): A prohibited code point sequence.
+
+    """
+
+    def __init__(self, constraint):
+        # Either a list or a dictionary. As a list of code points, it
+        # represents a prohibited code point sequence. As a dictionary,
+        # it represents a set of prohibited sequences, where each item
+        # represents the set of prohibited sequences starting with the
+        # key (a code point) concatenated with any of the values
+        # (ConstraintSets).
+        self._c = constraint
+
+    def add(self, constraint):
+        """Add a constraint to this set."""
+        if not constraint:
+            return
+        first = constraint[0]
+        rest = constraint[1:]
+        if isinstance(self._c, list):
+            if constraint == self._c[:len(constraint)]:
+                self._c = constraint
+            elif self._c != constraint[:len(self._c)]:
+                self._c = {self._c[0]: ConstraintSet(self._c[1:])}
+        if isinstance(self._c, dict):
+            if first in self._c:
+                self._c[first].add(rest)
+            else:
+                self._c[first] = ConstraintSet(rest)
+
+    def __str__(self, index=0, depth=4):
+        s = []
+        if isinstance(self._c, list):
+            if len(self._c) == 0:
+                assert index == 2, 'Cannot use `matched` for this constraint; the general case has not been implemented'
+                s.append('matched = true;\n')
+            elif len(self._c) == 1:
+                assert index == 1, 'Cannot use `matched` for this constraint; the general case has not been implemented'
+                s.append('matched = 0x{:04X} == buffer.cur({}).glyph_id;\n'.format(next(
+                    iter(self._c)), index))
+            else:
+                s.append('if 0x{:04X} == buffer.cur({}).glyph_id &&\n'.format(self._c[0], index))
+                if index:
+                    s.append('buffer.idx + {} < buffer.len &&\n'.format(index + 1))
+                for i, cp in enumerate(self._c[1:], start=1):
+                    s.append('0x{:04X} == buffer.cur({}).glyph_id{}\n'.format(
+                        cp, index + i, '' if i == len(self._c) - 1 else ' &&'))
+                s.append('{\n')
+                for i in range(index + 1):
+                    s.append('buffer.next_glyph();\n')
+                s.append('output_dotted_circle(buffer);\n')
+                s.append('}\n')
+        else:
+            s.append('match buffer.cur({}).glyph_id {{\n'.format(index))
+            cases = collections.defaultdict(set)
+            for first, rest in sorted(self._c.items()):
+                cases[rest.__str__(index + 1, depth + 2)].add(first)
+            for ii, (body, labels) in enumerate(sorted(cases.items(), key=lambda b_ls: sorted(b_ls[1])[0])):
+                for i, cp in enumerate(sorted(labels)):
+                    if i == len(labels) - 1:
+                        s.append(' 0x{:04X} => {{ {}'.format(cp, '\n' if i % 4 == 3 else ''))
+                    else:
+                        s.append(' 0x{:04X} | {}'.format(cp, '\n' if i % 4 == 3 else ''))
+                s.append(body)
+                s.append('}')
+                if ii == len(cases.items()) - 1:
+                    s.append('_ => {}')
+            s.append('}\n')
+        return ''.join(s)
+
+
+constraints = {}
+with io.open('ms-use/IndicShapingInvalidCluster.txt', encoding='utf-8') as f:
+    constraints_header = []
+    while True:
+        line = f.readline().strip()
+        if line == '#':
+            break
+        constraints_header.append(line)
+    for line in f:
+        j = line.find('#')
+        if j >= 0:
+            line = line[:j]
+        constraint = [int(cp, 16) for cp in line.split(';')[0].split()]
+        if not constraint:
+            continue
+        assert 2 <= len(constraint), 'Prohibited sequence is too short: {}'.format(constraint)
+        script = scripts[constraint[0]]
+        if script in constraints:
+            constraints[script].add(constraint)
+        else:
+            constraints[script] = ConstraintSet(constraint)
+        assert constraints, 'No constraints found'
+
+print('// WARNING: this file was generated by scripts/gen-vowel-constraints.py')
+print()
+print('use super::buffer::hb_buffer_t;')
+print('use super::ot_layout::*;')
+print('use super::script;')
+print('use crate::BufferFlags;')
+print()
+print('fn output_dotted_circle(buffer: &mut hb_buffer_t) {')
+print('    buffer.output_glyph(0x25CC);')
+print('    {')
+print('        let out_idx = buffer.out_len - 1;')
+print('        _hb_glyph_info_reset_continuation(&mut buffer.out_info_mut()[out_idx]);')
+print('    }')
+print('}')
+print()
+print('fn output_with_dotted_circle(buffer: &mut hb_buffer_t) {')
+print('    output_dotted_circle(buffer);')
+print('    buffer.next_glyph();')
+print('}')
+print()
+print('pub fn preprocess_text_vowel_constraints(buffer: &mut hb_buffer_t) {')
+print('    if buffer.flags.contains(BufferFlags::DO_NOT_INSERT_DOTTED_CIRCLE) {')
+print('        return;')
+print('    }')
+print()
+print('    // UGLY UGLY UGLY business of adding dotted-circle in the middle of')
+print('    // vowel-sequences that look like another vowel.  Data for each script')
+print('    // collected from the USE script development spec.')
+print('    //')
+print('    // https://github.com/harfbuzz/harfbuzz/issues/1019')
+print('    buffer.clear_output();')
+print('    match buffer.script {')
+
+for script, constraints in sorted(constraints.items(), key=lambda s_c: script_order[s_c[0]]):
+    print('        Some(script::{}) => {{'.format(script.upper()))
+    print('            buffer.idx = 0;')
+    print('            while buffer.idx + 1 < buffer.len {')
+    print('               #[allow(unused_mut)]')
+    print('                let mut matched = false;')
+    print(str(constraints), end='')
+    print('                buffer.next_glyph();')
+    print('                if matched { output_with_dotted_circle(buffer); }')
+    print('      }')
+    print('      }')
+    print()
+
+print('        _ => {}')
+print('    }')
+print('    buffer.sync();')
+print('}')
+print()
--- a/vendor/rustybuzz/scripts/ms-use/COPYING
+++ b/vendor/rustybuzz/scripts/ms-use/COPYING
@@ -0,0 +1,21 @@
+    MIT License
+
+    Copyright (c) Microsoft Corporation.
+
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included in all
+    copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+    SOFTWARE
--- a/vendor/rustybuzz/scripts/ms-use/IndicPositionalCategory-Additional.txt
+++ b/vendor/rustybuzz/scripts/ms-use/IndicPositionalCategory-Additional.txt
@@ -0,0 +1,109 @@
+# Override values For Indic_Positional_Category
+# Not derivable
+# Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17
+# Updated for Unicode 10.0 by Andrew Glass 2017-07-25
+# Ammended for Unicode 10.0 by Andrew Glass 2018-09-21
+# Updated for L2/19-083    by Andrew Glass 2019-05-06
+# Updated for Unicode 12.1 by Andrew Glass 2019-05-30
+# Updated for Unicode 13.0 by Andrew Glass 2020-07-28
+# Updated for Unicode 14.0 by Andrew Glass 2021-09-28
+
+# ================================================
+# ================================================
+# OVERRIDES TO ASSIGNED VALUES
+# ================================================
+# ================================================
+
+# Indic_Positional_Category=Bottom
+0F72          ; Bottom  # Mn      TIBETAN VOWEL SIGN I # Not really below, but need to override to fit into Universal model
+0F7A..0F7D    ; Bottom  # Mn  [4] TIBETAN VOWEL SIGN E..TIBETAN VOWEL SIGN OO # Not really below, but need to override to fit into Universal model
+0F80          ; Bottom  # Mn      TIBETAN VOWEL SIGN REVERSED I # Not really below, but need to override to fit into Universal model
+A9BF          ; Bottom  # Mc      JAVANESE CONSONANT SIGN CAKRA
+11127..11129  ; Bottom  # Mn  [3] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN II
+1112D         ; Bottom  # Mn      CHAKMA VOWEL SIGN AI
+11130         ; Bottom  # Mn      CHAKMA VOWEL SIGN OI
+
+# ================================================
+
+# Indic_Positional_Category=Left
+1C29          ; Left    # Mc      LEPCHA VOWEL SIGN OO  # Reduced from Top_And_Left
+
+# ================================================
+
+
+# Indic_Positional_Category=Right
+A9BE          ; Right   # Mc      JAVANESE CONSONANT SIGN PENGKAL # Reduced from Bottom_And_Right
+10A0C         ; Right   # Mn      KHAROSHTHI VOWEL LENGTH MARK    # Follows vowels and precedes vowel modifiers
+11942         ; Right   # Mc      DIVES AKURU MEDIAL RA           # Reduced from Bottom_And_Right
+
+# ================================================
+
+# Indic_Positional_Category=Top
+0F74          ; Top     # Mn       TIBETAN VOWEL SIGN U # Not really above, but need to override to fit into Universal model
+1A18          ; Top     # Mn       BUGINESE VOWEL SIGN U # Workaround to allow below to occur before above by treating all below marks as above
+AA35          ; Top     # Mn       CHAM CONSONANT SIGN
+
+# ================================================
+
+# Indic_Positional_Category=Top_And_Right
+0E33          ; Top_And_Right # Lo       THAI CHARACTER SARA AM # IMC has Right, which seems to be a mistake.
+0EB3          ; Top_And_Right # Lo       LAO VOWEL SIGN AM # IMC has Right, which seems to be a mistake.
+
+# ================================================
+# ================================================
+# VALUES NOT ASSIGNED IN Indic_Positional_Category
+# ================================================
+# ================================================
+
+# Indic_Positional_Category=Bottom
+0859..085B    ; Bottom # Mn   [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
+18A9          ; Bottom # Mn       MONGOLIAN LETTER ALI GALI DAGALGA
+10AE5         ; Bottom # Mn       MANICHAEAN ABBREVIATION MARK ABOVE  # Overriden, ccc controls order
+10AE6         ; Bottom # Mn       MANICHAEAN ABBREVIATION MARK BELOW
+10F46..10F47  ; Bottom # Mn   [2] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING TWO DOTS BELOW
+10F48..10F4A  ; Bottom # Mn   [3] SOGDIAN COMBINING DOT ABOVE..SOGDIAN COMBINING CURVE ABOVE     # Overriden, ccc controls order
+10F4B         ; Bottom # Mn       SOGDIAN COMBINING CURVE BELOW
+10F4C         ; Bottom # Mn       SOGDIAN COMBINING HOOK ABOVE        # Overriden, ccc controls order
+10F4D..10F50  ; Bottom # Mn   [4] SOGDIAN COMBINING HOOK BELOW..SOGDIAN COMBINING STROKE BELOW
+10F82         ; Bottom # Mn       OLD UYGHUR COMBINING DOT ABOVE      # Overriden, ccc controls order
+10F83         ; Bottom # Mn       OLD UYGHUR COMBINING DOT BELOW
+10F84         ; Bottom # Mn       OLD UYGHUR COMBINING TWO DOTS ABOVE # Overriden, ccc controls order
+10F85         ; Bottom # Mn       OLD UYGHUR COMBINING TWO DOTS BELOW
+16F4F         ; Bottom # Mn       MIAO SIGN CONSONANT MODIFIER BAR
+16F51..16F87  ; Bottom # Mc  [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
+16F8F..16F92  ; Bottom # Mn   [4] MIAO TONE RIGHT..MIAO TONE BELOW
+
+# ================================================
+
+# Indic_Positional_Category=Left
+103C          ; Left   # Mc       MYANMAR CONSONANT SIGN MEDIAL RA
+
+# ================================================
+
+# Indic_Positional_Category=Top
+07EB..07F3    ; Top   # Mn   [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
+07FD          ; Top   # Mn       NKO DANTAYALAN # Not really top, but assigned here to allow ccc to control mark order
+1885..1886    ; Top   # Mn   [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
+10D24..10D27  ; Top   # Mn   [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
+10EAB..10EAC  ; Top   # Mn   [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
+16B30..16B36  ; Top   # Mn   [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
+1E130..1E136  ; Top   # Mn   [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
+1E2AE         ; Top   # Mn       TOTO SIGN RISING TONE
+1E2EC..1E2EF  ; Top   # Mn   [4] WANCHO TONE TUP..WANCHO TONE KOINI
+1E944..1E94A  ; Top   # Mn   [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
+
+# ================================================
+
+# Indic_Positional_Category=Overstruck
+1BC9D..1BC9E  ; Overstruck # Mn  [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
+
+# ================================================
+# ================================================
+# Deliberately suppressed
+# ================================================
+# ================================================
+
+# Indic_Positional_Category=NA
+180B..180D   ; NA        # Mn  [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
+180F         ; NA        # Mn      MONGOLIAN FREE VARIATION SELECTOR FOUR
+2D7F         ; NA        # Mn      TIFINAGH CONSONANT JOINER
--- a/vendor/rustybuzz/scripts/ms-use/IndicShapingInvalidCluster.txt
+++ b/vendor/rustybuzz/scripts/ms-use/IndicShapingInvalidCluster.txt
@@ -0,0 +1,105 @@
+# IndicShapingInvalidCluster.txt
+# Date: 2015-03-12, 21:17:00 GMT [AG]
+# Date: 2019-11-08, 23:22:00 GMT [AG]
+#
+# This file defines the following property:
+#
+#    Indic_Shaping_Invalid_Cluster
+#
+# Scope: This file enumerates sequences of characters that should be treated as invalid clusters
+
+  0905 0946       ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT E
+  0905 093E       ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AA
+  0930 094D 0907  ; # DEVANAGARI LETTER RA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER I
+  0909 0941       ; # DEVANAGARI LETTER U, DEVANAGARI VOWEL SIGN U
+  090F 0945       ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN CANDRA E
+  090F 0946       ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN SHORT E
+  090F 0947       ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN E
+  0905 0949       ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA O
+  0906 0945       ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN CANDRA E
+  0905 094A       ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT O
+  0906 0946       ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN SHORT E
+  0905 094B       ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN O
+  0906 0947       ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN E
+  0905 094C       ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AU
+  0906 0948       ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN AI
+  0905 0945       ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA E
+  0905 093A       ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OE
+  0905 093B       ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OOE
+  0906 093A       ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN OE
+  0905 094F       ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AW
+  0905 0956       ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UE
+  0905 0957       ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UUE
+  0985 09BE       ; # BENGALI LETTER A, BENGALI VOWEL SIGN AA
+  098B 09C3       ; # BENGALI LETTER VOCALIC R, BENGALI VOWEL SIGN VOCALIC R
+  098C 09E2       ; # BENGALI LETTER VOCALIC L, BENGALI VOWEL SIGN VOCALIC L
+  0A05 0A3E       ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AA
+  0A72 0A3F       ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN I
+  0A72 0A40       ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN II
+  0A73 0A41       ; # GURMUKHI URA, GURMUKHI VOWEL SIGN U
+  0A73 0A42       ; # GURMUKHI URA, GURMUKHI VOWEL SIGN UU
+  0A72 0A47       ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN EE
+  0A05 0A48       ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AI
+  0A73 0A4B       ; # GURMUKHI URA, GURMUKHI VOWEL SIGN OO
+  0A05 0A4C       ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AU
+  0A85 0ABE       ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA
+  0A85 0AC5       ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA E
+  0A85 0AC7       ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN E
+  0A85 0AC8       ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AI
+  0A85 0AC9       ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA O
+  0A85 0ACB       ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN O
+  0A85 0ABE 0AC5  ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN CANDRA E
+  0A85 0ACC       ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AU
+  0A85 0ABE 0AC8  ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN AI
+  0AC5 0ABE       ; # GUJARATI VOWEL SIGN CANDRA E, GUJARATI VOWEL SIGN AA
+  0B05 0B3E       ; # ORIYA LETTER A, ORIYA VOWEL SIGN AA
+  0B0F 0B57       ; # ORIYA LETTER E, ORIYA AU LENGTH MARK
+  0B13 0B57       ; # ORIYA LETTER O, ORIYA AU LENGTH MARK
+  0B85 0BC2       ; # TAMIL LETTER A, TAMIL VOWEL SIGN UU
+  0C12 0C55       ; # TELUGU LETTER O, TELUGU LENGTH MARK
+  0C12 0C4C       ; # TELUGU LETTER O, TELUGU VOWEL SIGN AU
+  0C3F 0C55       ; # TELUGU VOWEL SIGN I, TELUGU LENGTH MARK
+  0C46 0C55       ; # TELUGU VOWEL SIGN E, TELUGU LENGTH MARK
+  0C4A 0C55       ; # TELUGU VOWEL SIGN O, TELUGU LENGTH MARK
+  0C89 0CBE       ; # KANNADA LETTER U, KANNADA VOWEL SIGN AA
+  0C92 0CCC       ; # KANNADA LETTER O, KANNADA VOWEL SIGN AU
+  0C8B 0CBE       ; # KANNADA LETTER VOCALIC R, KANNADA VOWEL SIGN AA
+  0D07 0D57       ; # MALAYALAM LETTER I, MALAYALAM AU LENGTH MARK
+  0D09 0D57       ; # MALAYALAM LETTER U, MALAYALAM AU LENGTH MARK
+  0D0E 0D46       ; # MALAYALAM LETTER E, MALAYALAM VOWEL SIGN E
+  0D12 0D3E       ; # MALAYALAM LETTER O, MALAYALAM VOWEL SIGN AA
+  0D12 0D57       ; # MALAYALAM LETTER O, MALAYALAM AU LENGTH MARK
+  0D85 0DCF       ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN AELA-PILLA
+  0D85 0DD0       ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN KETTI AEDA-PILLA
+  0D85 0DD1       ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN DIGA AEDA-PILLA
+  0D8B 0DDF       ; # SINHALA LETTER UYANNA, SINHALA VOWEL SIGN GAYANUKITTA
+  0D8D 0DD8       ; # SINHALA LETTER IRUYANNA, SINHALA VOWEL SIGN GAETTA-PILLA
+  0D8F 0DDF       ; # SINHALA LETTER ILUYANNA, SINHALA VOWEL SIGN GAYANUKITTA
+  0D91 0DCA       ; # SINHALA LETTER EYANNA, SINHALA SIGN AL-LAKUNA
+  0D91 0DD9       ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA  
+  0D91 0DDA       ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN DIGA KOMBUVA
+  0D91 0DDC       ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA AELA-PILLA
+  0D91 0DDD       ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA
+  0D91 0DDE       ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA
+  0D94 0DDF       ; # SINHALA LETTER OYANNA, SINHALA VOWEL SIGN GAYANUKITTA
+  11005 11038     ; # BRAHMI LETTER A, BRAHMI VOWEL SIGN AA
+  1100B 1103E     ; # BRAHMI LETTER VOCALIC R, BRAHMI VOWEL SIGN VOCALIC R
+  1100F 11042     ; # BRAHMI LETTER E, BRAHMI VOWEL SIGN E
+  11680 116AD     ; # TAKRI LETTER A, TAKRI VOWEL SIGN AA
+  11686 116B2     ; # TAKRI LETTER E, TAKRI VOWEL SIGN E
+  11680 116B4     ; # TAKRI LETTER A, TAKRI VOWEL SIGN O
+  11680 116B5     ; # TAKRI LETTER A, TAKRI VOWEL SIGN AU
+  112B0 112E0     ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AA
+  112B0 112E5     ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN E
+  112B0 112E6     ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AI
+  112B0 112E7     ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN O
+  112B0 112E8     ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AU
+  11481 114B0     ; # TIRHUTA LETTER A, TIRHUTA VOWEL SIGN AA
+  114AA 114B5     ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC R
+  114AA 114B6     ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC RR
+  1148B 114BA     ; # TIRHUTA LETTER E, TIRHUTA VOWEL SIGN SHORT E
+  1148D 114BA     ; # TIRHUTA LETTER O, TIRHUTA VOWEL SIGN SHORT E
+  11600 11639     ; # MODI LETTER A, MODI VOWEL SIGN E
+  11600 1163A     ; # MODI LETTER A, MODI VOWEL SIGN AI
+  11601 11639     ; # MODI LETTER AA, MODI VOWEL SIGN E
+  11601 1163A     ; # MODI LETTER AA, MODI VOWEL SIGN AI
--- a/vendor/rustybuzz/scripts/ms-use/IndicSyllabicCategory-Additional.txt
+++ b/vendor/rustybuzz/scripts/ms-use/IndicSyllabicCategory-Additional.txt
@@ -0,0 +1,221 @@
+# Override values For Indic_Syllabic_Category
+# Not derivable
+# Initial version based on Unicode 7.0 by Andrew Glass 2014-03-17
+# Updated for Unicode 10.0 by Andrew Glass 2017-07-25
+# Updated for Unicode 12.1 by Andrew Glass 2019-05-24
+# Updated for Unicode 13.0 by Andrew Glass 2020-07-28
+# Updated for Unicode 14.0 by Andrew Glass 2021-09-25
+
+# ================================================
+# OVERRIDES TO ASSIGNED VALUES
+# ================================================
+
+# Indic_Syllabic_Category=Bindu
+193A          ; Bindu  # Mn       LIMBU SIGN KEMPHRENG
+AA29          ; Bindu  # Mn       CHAM VOWEL SIGN AA
+10A0D         ; Bindu  # Mn       KHAROSHTHI SIGN DOUBLE RING BELOW
+
+# ================================================
+
+# Indic_Syllabic_Category=Consonant
+0840..0858    ; Consonant # Lo  [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN
+0F00..0F01    ; Consonant # Lo   [2] TIBETAN SYLLABLE OM..TIBETAN MARK GTER YIG MGO TRUNCATED
+0F04..0F06    ; Consonant # Po       TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK CARET YIG MGO PHUR SHAD MA
+19C1..19C7    ; Consonant # Lo   [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B # Reassigned to avoid clustering with a base consonant
+25CC          ; Consonant # So       DOTTED CIRCLE
+
+# ================================================
+
+# Indic_Syllabic_Category=Consonant_Dead
+0F7F          ; Consonant_Dead    # Mc       TIBETAN SIGN RNAM BCAD # reassigned so that visarga will form an independent cluster
+
+# ================================================
+
+# Indic_Syllabic_Category=Consonant_Final
+0F35          ; Consonant_Final   # Mn       TIBETAN MARK NGAS BZUNG NYI ZLA
+0F37          ; Consonant_Final   # Mn       TIBETAN MARK NGAS BZUNG SGOR RTAGS
+0FC6          ; Consonant_Final   # Mn       TIBETAN SYMBOL PADMA GDAN
+
+# ================================================
+
+# Indic_Syllabic_Category=Consonant_Final_Modifier
+1C36          ; Consonant_Final_Modifier  # Mn   LEPCHA SIGN RAN
+
+# ================================================
+
+# Indic_Syllabic_Category=Gemination_Mark
+11134         ; Gemination_Mark  # Mc      CHAKMA MAAYYAA
+
+# ================================================
+
+# Indic_Syllabic_Category=Nukta
+0F71          ; Nukta            # Mn       TIBETAN VOWEL SIGN AA # Reassigned to get this before an above vowel
+10A38..10A3A  ; Nukta            # Mn   [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
+
+# ================================================
+
+# Indic_Syllabic_Category=Tone_Mark
+1A7B..1A7C    ; Tone_Mark         # Mn   [2] TAI THAM SIGN MAI SAM..TAI THAM SIGN KHUEN-LUE KARAN
+1A7F          ; Tone_Mark         # Mn       TAI THAM COMBINING CRYPTOGRAMMIC DOT
+
+# ================================================
+
+# Indic_Syllabic_Category=Vowel_Independent
+AAB1          ; Vowel_Independent # Lo       TAI VIET VOWEL AA
+AABA          ; Vowel_Independent # Lo       TAI VIET VOWEL UA
+AABD          ; Vowel_Independent # Lo       TAI VIET VOWEL AN
+
+# ================================================
+# ================================================
+# VALUES NOT ASSIGNED IN Indic_Syllabic_Category
+# ================================================
+# ================================================
+
+# Indic_Syllabic_Category=Consonant
+0800..0815    ; Consonant # Lo   [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF
+1800          ; Consonant # Po        MONGOLIAN BIRGA # Reassigned so that legacy Birga + MFVS sequences still work
+1807          ; Consonant # Po        MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER
+180A          ; Consonant # Po        MONGOLIAN NIRUGU
+1820..1878    ; Consonant # Lo   [88] MONGOLIAN LETTER A..MONGOLIAN LETTER CHA WITH TWO DOTS
+1843          ; Consonant # Lm        MONGOLIAN LETTER TODO LONG VOWEL SIGN
+2D30..2D67    ; Consonant # Lo   [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO
+2D6F          ; Consonant # Lm        TIFINAGH MODIFIER LETTER LABIALIZATION MARK
+10570..1057A  ; Consonant # Lo   [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA
+1057C..1058A  ; Consonant # Lo   [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE
+1058C..10592  ; Consonant # Lo    [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE
+10594..10595  ; Consonant # Lo    [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE
+10597..105A1  ; Consonant # Lo   [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA
+105A3..105B1  ; Consonant # Lo   [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
+105B3..105B9  ; Consonant # Lo    [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
+105BB..105BC  ; Consonant # Lo    [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
+10AC0..10AC7  ; Consonant # Lo    [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW
+10AC9..10AE4  ; Consonant # Lo   [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW
+10D00..10D23  ; Consonant # Lo   [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
+10E80..10EA9  ; Consonant # Lo   [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
+10EB0..10EB1  ; Consonant # Lo    [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
+10F30..10F45  ; Consonant # Lo   [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
+111DA         ; Consonant # Lo        SHARADA EKAM
+#HIEROGLYPHS to be moved to new category
+13000..1342E  ; Consonant # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
+#For the Begin and End segment to be handled fully correctly, the cluster model needs to be modified.
+13437..13438  ; Consonant # Lo    [2] EGYPTIAN HIEROGLYPH BEGIN SEGMENT..EGYPTIAN HIEROGLYPH END SEGMENT
+16B00..16B2F  ; Consonant # Lo   [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU
+16F00..16F4A  ; Consonant # Lo   [75] MIAO LETTER PA..MIAO LETTER RTE
+16FE4         ; Consonant # Mn        KHITAN SMALL SCRIPT FILLER          # Avoids Mn pushing this into VOWEL class
+18B00..18CD5  ; Consonant # Lo  [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5
+1BC00..1BC6A  ; Consonant # Lo  [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
+1BC70..1BC7C  ; Consonant # Lo   [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK
+1BC80..1BC88  ; Consonant # Lo    [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL
+1BC90..1BC99  ; Consonant # Lo   [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW
+1E100..1E12C  ; Consonant # Lo   [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W
+1E137..1E13D  ; Consonant # Lm    [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
+1E14E         ; Consonant # Lo        NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ
+1E14F         ; Consonant # So        NYIAKENG PUACHUE HMONG CIRCLED CA
+1E290..1E2AD  ; Consonant # Lo   [30] TOTO LETTER PA..TOTO LETTER A
+1E2C0..1E2EB  ; Consonant # Lo   [44] WANCHO LETTER AA..WANCHO LETTER YIH
+1E900..1E921  ; Consonant # Lu   [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA
+1E922..1E943  ; Consonant # Ll   [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA
+1E94B         ; Consonant # Lm        ADLAM NASALIZATION MARK
+
+# ================================================
+
+# Indic_Syllabic_Category=Consonant_Placeholder
+1880..1884 ; Consonant_Placeholder # Lo   [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA
+
+# ================================================
+
+# Indic_Syllabic_Category=Gemination_Mark
+10D27         ; Gemination_Mark   # Mn       HANIFI ROHINGYA SIGN TASSI
+
+# ================================================
+
+# Indic_Syllabic_Category=Modifying_Letter
+FE00..FE0F    ; Modifying_Letter  # Mn  [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16# Need to treat them as isolated bases so they don't merge with a cluster in invalid scenarios
+16F50         ; Modifying_Letter  # Lo       MIAO LETTER NASALIZATION
+
+# ================================================
+
+# Indic_Syllabic_Category=Nukta
+0859..085B    ; Nukta            # Mn   [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
+0F39          ; Nukta            # Mn       TIBETAN MARK TSA -PHRU # NOW IN UNICODE 10.0
+1885..1886    ; Nukta            # Mn   [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
+18A9          ; Nukta            # Mn       MONGOLIAN LETTER ALI GALI DAGALGA
+1B6B..1B73    ; Nukta            # Mn   [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
+10AE5..10AE6  ; Nukta            # Mn   [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
+16F4F         ; Nukta            # Mn       MIAO SIGN CONSONANT MODIFIER BAR
+1BC9D..1BC9E  ; Nukta            # Mn   [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
+1E944..1E94A  ; Nukta            # Mn   [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
+10F82..10F85  ; Nukta            # Mn   [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
+
+# ================================================
+
+# Indic_Syllabic_Category=Number
+10D30..10D39  ; Number              # Nd  [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE
+10F51..10F54  ; Number              # No   [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED
+16AC0..16AC9  ; Number              # Nd  [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE
+1E140..1E149  ; Number              # Nd  [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE
+1E2F0..1E2F9  ; Number              # Nd  [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
+1E950..1E959  ; Number              # Nd  [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
+
+# ================================================
+
+# Indic_Syllabic_Category=Tone_Mark
+07EB..07F3    ; Tone_Mark           # Mn   [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
+07FD          ; Tone_Mark           # Mn       NKO DANTAYALAN
+0F86..0F87    ; Tone_Mark           # Mn   [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
+17CF          ; Tone_Mark           # Mn       KHMER SIGN AHSDA
+10D24..10D26  ; Tone_Mark           # Mn   [3] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TANA
+10F46..10F50  ; Tone_Mark           # Mn  [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
+16B30..16B36  ; Tone_Mark           # Mn   [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
+16F8F..16F92  ; Tone_Mark           # Mn   [4] MIAO TONE RIGHT..MIAO TONE BELOW
+1E130..1E136  ; Tone_Mark           # Mn   [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
+1E2AE         ; Tone_Mark           # Mn       TOTO SIGN RISING TONE
+1E2EC..1E2EF  ; Tone_Mark           # Mn   [4] WANCHO TONE TUP..WANCHO TONE KOINI
+
+# ================================================
+
+# Indic_Syllabic_Category=Virama
+2D7F          ; Virama              # Mn       TIFINAGH CONSONANT JOINER
+13430..13436  ; Virama              # Cf   [7] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH OVERLAY MIDDLE
+
+# ================================================
+
+# Indic_Syllabic_Category=Vowel_Independent
+AAB1          ; Vowel_Independent   # Lo       TAI VIET VOWEL AA
+AABA          ; Vowel_Independent   # Lo       TAI VIET VOWEL UA
+AABD          ; Vowel_Independent   # Lo       TAI VIET VOWEL AN
+
+# ================================================
+
+# Indic_Syllabic_Category=Vowel_Dependent
+0B55          ; Vowel_Dependent     # Mn       ORIYA SIGN OVERLINE
+10EAB..10EAC  ; Vowel_Dependent     # Mn   [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
+16F51..16F87  ; Vowel_Dependent     # Mc  [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
+
+# ================================================
+# ================================================
+# PROPERTIES NOT ASSIGNED IN Indic_Syllabic_Category
+# ================================================
+# ================================================
+
+# USE_Syllabic_Category=Hieroglyph
+# 13000..1342E ; Hieroglyph          # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
+
+# ================================================
+
+# USE_Syllabic_Category=Hieroglyph_Joiner
+# 13430..13436 ; Hieroglyph_Joiner   # Cf        EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH OVERLAY MIDDLE
+
+# ================================================
+
+# USE_Syllabic_Category= Hieroglyph_Segment_Begin
+# 13437        ; Hieroglyph_Segment_Begin  # Cf  EGYPTIAN HIEROGLYPH BEGIN SEGMENT
+
+# ================================================
+
+# USE_Syllabic_Category= Hieroglyph_Segment_End
+# 13438        ; Hieroglyph_Segment_End    # Cf  EGYPTIAN HIEROGLYPH END SEGMENT 
+
+# ================================================
+
+# eof