Vendor dependencies for 0.3.0 release

This commit is contained in:
2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions

View File

@@ -0,0 +1 @@
{"files":{"AUTHORS":"1ff3a7c8519b29544bb28ba9b1e7502df0cb764051fb9a1172e60006aa2b8dcc","COPYRIGHT":"edb20b474f6cbd4f4db066b54a9e0f687d0009d309412a63431189b59b8e2a07","Cargo.lock":"4fd8f0417be04865b494adce0cf3f7d7a16cc9752ff48c51cdbdee24417f5002","Cargo.toml":"bcb0d64219014de87ea032f13929f6b532e85628ea774ba87ea262f63efab811","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"7b63ecd5f1902af1b63729947373683c32745c16a10e8e6292e2e2dcd7e90ae0","README.md":"20fa9fe109d08172c0d544b4cb1bbe391afd55070d18b3581833d6b711c4b599","src/char_data/mod.rs":"8cbdcaacddb3dd9b70d615693fa73d0e7dca6332102a95f0d3ce447df7645284","src/char_data/tables.rs":"8cbe4b6a4fc690e76107d4654047d3f0d8988b870fc3234f76dc265ad2e2a9ca","src/data_source.rs":"36fa0785e51c549c1f72f09040cfe515b848d1b23fb30d469770a6b4b17b49df","src/deprecated.rs":"f94c0e75dec7e70cb9802e26b7f82fe618dcdd50e9973927bacd4eccc6899c62","src/explicit.rs":"86c3c55bf2cc90aab1411aac6cf05de505ca74e44a76fe829572dd7dc4dd2aa3","src/format_chars.rs":"678399fec3f4bfaf4093f38cfdb8956288313386dc3511dab9fb58164e8dc01b","src/implicit.rs":"8d5b003464aee3f333785c6170a884945251f39601e4ea658e669a2ad575d588","src/level.rs":"ce1eaa9940f1b90bc59aba296488b8cd128aefeb4b6b2e3ecc34da26c569150b","src/lib.rs":"8a611ccb9d56c176f8428edc14b1e6fc1fbe8ed22bc72118fb987e6a29638197","src/prepare.rs":"aeb8b88cfb2d2e6b74473f5903205dd3683d57abcc8801de7b9fdea6a432a0fe","src/utf16.rs":"12ee177127a0b5b0350a1fcc1edf7387c26b51ec5654f724629aab723881c313"},"package":"5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5"}

4
vendor/unicode-bidi/AUTHORS vendored Normal file
View File

@@ -0,0 +1,4 @@
This software was written by the following people:
Matt Brubeck <mbrubeck@limpet.net>
Behnam Esfahbod <behnam@zwnj.org>

8
vendor/unicode-bidi/COPYRIGHT vendored Normal file
View File

@@ -0,0 +1,8 @@
This project is copyright 2015, The Servo Project Developers (given in the
file AUTHORS).
Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
http://www.apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
http://opensource.org/licenses/MIT>, at your option. All files in the project
carrying such notice may not be copied, modified, or distributed except
according to those terms.

182
vendor/unicode-bidi/Cargo.lock generated vendored Normal file
View File

@@ -0,0 +1,182 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "flame"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fc2706461e1ee94f55cab2ed2e3d34ae9536cfa830358ef80acff1a3dacab30"
dependencies = [
"lazy_static",
"serde",
"serde_derive",
"serde_json",
"thread-id",
]
[[package]]
name = "flamer"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "36b732da54fd4ea34452f2431cf464ac7be94ca4b339c9cd3d3d12eb06fe7aab"
dependencies = [
"flame",
"quote",
"syn",
]
[[package]]
name = "itoa"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
[[package]]
name = "lazy_static"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73"
[[package]]
name = "libc"
version = "0.2.149"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b"
[[package]]
name = "proc-macro2"
version = "1.0.65"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92de25114670a878b1261c79c9f8f729fb97e95bac93f6312f583c60dd6a1dfe"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5907a1b7c277254a8b15170f6e7c97cfa60ee7872a3217663bb81151e48184bb"
dependencies = [
"proc-macro2",
]
[[package]]
name = "redox_syscall"
version = "0.1.57"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
[[package]]
name = "ryu"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
[[package]]
name = "serde"
version = "1.0.156"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "314b5b092c0ade17c00142951e50ced110ec27cea304b1037c6969246c2469a4"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.156"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7e29c4601e36bcec74a223228dce795f4cd3616341a4af93520ca1a837c087d"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.99"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46266871c240a00b8f503b877622fe33430b3c7d963bdc0f2adc511e54a1eae3"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "serde_test"
version = "1.0.175"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29baf0f77ca9ad9c6ed46e1b408b5e0f30b5184bcd66884e7f6d36bd7a65a8a4"
dependencies = [
"serde",
]
[[package]]
name = "smallvec"
version = "1.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7"
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thread-id"
version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7fbf4c9d56b320106cd64fd024dadfa0be7cb4706725fc44a7d7ce952d820c1"
dependencies = [
"libc",
"redox_syscall",
"winapi",
]
[[package]]
name = "unicode-bidi"
version = "0.3.18"
dependencies = [
"flame",
"flamer",
"serde",
"serde_test",
"smallvec",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

90
vendor/unicode-bidi/Cargo.toml vendored Normal file
View File

@@ -0,0 +1,90 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.
[package]
edition = "2018"
rust-version = "1.47.0"
name = "unicode-bidi"
version = "0.3.18"
authors = ["The Servo Project Developers"]
build = false
exclude = [
"benches/**",
"data/**",
"examples/**",
"tests/**",
"tools/**",
]
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Implementation of the Unicode Bidirectional Algorithm"
documentation = "https://docs.rs/unicode-bidi/"
readme = "README.md"
keywords = [
"rtl",
"unicode",
"text",
"layout",
"bidi",
]
categories = [
"no-std",
"encoding",
"text-processing",
]
license = "MIT OR Apache-2.0"
repository = "https://github.com/servo/unicode-bidi"
[lib]
name = "unicode_bidi"
path = "src/lib.rs"
[dependencies.flame]
version = "0.2"
optional = true
[dependencies.flamer]
version = "0.4"
optional = true
[dependencies.serde]
version = ">=0.8, <2.0"
features = ["derive"]
optional = true
default-features = false
[dependencies.smallvec]
version = ">=1.13"
features = ["union"]
optional = true
[dev-dependencies.serde_test]
version = ">=0.8, <2.0"
[features]
bench_it = []
default = [
"std",
"hardcoded-data",
]
flame_it = [
"flame",
"flamer",
]
hardcoded-data = []
std = []
unstable = []
with_serde = ["serde"]
[badges.appveyor]
repository = "servo/unicode-bidi"

201
vendor/unicode-bidi/LICENSE-APACHE vendored Normal file
View File

@@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

25
vendor/unicode-bidi/LICENSE-MIT vendored Normal file
View File

@@ -0,0 +1,25 @@
Copyright (c) 2015 The Rust Project Developers
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

12
vendor/unicode-bidi/README.md vendored Normal file
View File

@@ -0,0 +1,12 @@
# unicode-bidi
This crate implements the [Unicode Bidirectional Algorithm][tr9] for display of
mixed right-to-left and left-to-right text. It is written in safe Rust,
compatible with the current stable release.
[Documentation](https://docs.rs/unicode-bidi)
[![CI](https://github.com/servo/unicode-bidi/actions/workflows/main.yml/badge.svg)](https://github.com/servo/unicode-bidi/actions)
[![AppVeyor](https://img.shields.io/appveyor/ci/servo/unicode-bidi/main.svg)](https://ci.appveyor.com/project/servo/unicode-bidi)
[tr9]: https://www.unicode.org/reports/tr9/

170
vendor/unicode-bidi/src/char_data/mod.rs vendored Normal file
View File

@@ -0,0 +1,170 @@
// Copyright 2015 The Servo Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! Accessor for `Bidi_Class` property from Unicode Character Database (UCD)
mod tables;
pub use self::tables::{BidiClass, UNICODE_VERSION};
#[cfg(feature = "hardcoded-data")]
use core::char;
#[cfg(feature = "hardcoded-data")]
use core::cmp::Ordering::{Equal, Greater, Less};
#[cfg(feature = "hardcoded-data")]
use self::tables::bidi_class_table;
use crate::data_source::BidiMatchedOpeningBracket;
use crate::BidiClass::*;
#[cfg(feature = "hardcoded-data")]
use crate::BidiDataSource;
/// Hardcoded Bidi data that ships with the unicode-bidi crate.
///
/// This can be enabled with the default `hardcoded-data` Cargo feature.
#[cfg(feature = "hardcoded-data")]
pub struct HardcodedBidiData;
#[cfg(feature = "hardcoded-data")]
impl BidiDataSource for HardcodedBidiData {
fn bidi_class(&self, c: char) -> BidiClass {
bsearch_range_value_table(c, bidi_class_table)
}
}
/// Find the `BidiClass` of a single char.
#[cfg(feature = "hardcoded-data")]
pub fn bidi_class(c: char) -> BidiClass {
bsearch_range_value_table(c, bidi_class_table)
}
/// If this character is a bracket according to BidiBrackets.txt,
/// return the corresponding *normalized* *opening bracket* of the pair,
/// and whether or not it itself is an opening bracket.
pub(crate) fn bidi_matched_opening_bracket(c: char) -> Option<BidiMatchedOpeningBracket> {
for pair in self::tables::bidi_pairs_table {
if pair.0 == c || pair.1 == c {
let skeleton = pair.2.unwrap_or(pair.0);
return Some(BidiMatchedOpeningBracket {
opening: skeleton,
is_open: pair.0 == c,
});
}
}
None
}
pub fn is_rtl(bidi_class: BidiClass) -> bool {
matches!(bidi_class, RLE | RLO | RLI)
}
#[cfg(feature = "hardcoded-data")]
fn bsearch_range_value_table(c: char, r: &'static [(char, char, BidiClass)]) -> BidiClass {
match r.binary_search_by(|&(lo, hi, _)| {
if lo <= c && c <= hi {
Equal
} else if hi < c {
Less
} else {
Greater
}
}) {
Ok(idx) => {
let (_, _, cat) = r[idx];
cat
}
// UCD/extracted/DerivedBidiClass.txt: "All code points not explicitly listed
// for Bidi_Class have the value Left_To_Right (L)."
Err(_) => L,
}
}
#[cfg(all(test, feature = "hardcoded-data"))]
mod tests {
use super::*;
#[test]
fn test_ascii() {
assert_eq!(bidi_class('\u{0000}'), BN);
assert_eq!(bidi_class('\u{0040}'), ON);
assert_eq!(bidi_class('\u{0041}'), L);
assert_eq!(bidi_class('\u{0062}'), L);
assert_eq!(bidi_class('\u{007F}'), BN);
}
#[test]
fn test_bmp() {
// Hebrew
assert_eq!(bidi_class('\u{0590}'), R);
assert_eq!(bidi_class('\u{05D0}'), R);
assert_eq!(bidi_class('\u{05D1}'), R);
assert_eq!(bidi_class('\u{05FF}'), R);
// Arabic
assert_eq!(bidi_class('\u{0600}'), AN);
assert_eq!(bidi_class('\u{0627}'), AL);
assert_eq!(bidi_class('\u{07BF}'), AL);
// Default R + Arabic Extras
assert_eq!(bidi_class('\u{07C0}'), R);
assert_eq!(bidi_class('\u{085F}'), R);
assert_eq!(bidi_class('\u{0860}'), AL);
assert_eq!(bidi_class('\u{0870}'), AL);
assert_eq!(bidi_class('\u{089F}'), NSM);
assert_eq!(bidi_class('\u{08A0}'), AL);
assert_eq!(bidi_class('\u{089F}'), NSM);
assert_eq!(bidi_class('\u{08FF}'), NSM);
// Default ET
assert_eq!(bidi_class('\u{20A0}'), ET);
assert_eq!(bidi_class('\u{20CF}'), ET);
// Arabic Presentation Forms
assert_eq!(bidi_class('\u{FB1D}'), R);
assert_eq!(bidi_class('\u{FB4F}'), R);
assert_eq!(bidi_class('\u{FB50}'), AL);
assert_eq!(bidi_class('\u{FDCF}'), ON);
assert_eq!(bidi_class('\u{FDF0}'), AL);
assert_eq!(bidi_class('\u{FDFF}'), ON);
assert_eq!(bidi_class('\u{FE70}'), AL);
assert_eq!(bidi_class('\u{FEFE}'), AL);
assert_eq!(bidi_class('\u{FEFF}'), BN);
// noncharacters
assert_eq!(bidi_class('\u{FDD0}'), L);
assert_eq!(bidi_class('\u{FDD1}'), L);
assert_eq!(bidi_class('\u{FDEE}'), L);
assert_eq!(bidi_class('\u{FDEF}'), L);
assert_eq!(bidi_class('\u{FFFE}'), L);
assert_eq!(bidi_class('\u{FFFF}'), L);
}
#[test]
fn test_smp() {
// Default AL + R
assert_eq!(bidi_class('\u{10800}'), R);
assert_eq!(bidi_class('\u{10FFF}'), R);
assert_eq!(bidi_class('\u{1E800}'), R);
assert_eq!(bidi_class('\u{1EDFF}'), R);
assert_eq!(bidi_class('\u{1EE00}'), AL);
assert_eq!(bidi_class('\u{1EEFF}'), AL);
assert_eq!(bidi_class('\u{1EF00}'), R);
assert_eq!(bidi_class('\u{1EFFF}'), R);
}
#[test]
fn test_unassigned_planes() {
assert_eq!(bidi_class('\u{30000}'), L);
assert_eq!(bidi_class('\u{40000}'), L);
assert_eq!(bidi_class('\u{50000}'), L);
assert_eq!(bidi_class('\u{60000}'), L);
assert_eq!(bidi_class('\u{70000}'), L);
assert_eq!(bidi_class('\u{80000}'), L);
assert_eq!(bidi_class('\u{90000}'), L);
assert_eq!(bidi_class('\u{a0000}'), L);
}
}

View File

@@ -0,0 +1,563 @@
// NOTE:
// The following code was generated by "tools/generate.py". do not edit directly
#![allow(missing_docs, non_upper_case_globals, non_snake_case)]
#![cfg_attr(rustfmt, rustfmt_skip)]
/// The [Unicode version](http://www.unicode.org/versions/) of data
pub const UNICODE_VERSION: (u64, u64, u64) = (16, 0, 0);
#[allow(non_camel_case_types)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
/// Represents values of the Unicode character property
/// [`Bidi_Class`](http://www.unicode.org/reports/tr44/#Bidi_Class), also
/// known as the *bidirectional character type*.
///
/// * <http://www.unicode.org/reports/tr9/#Bidirectional_Character_Types>
/// * <http://www.unicode.org/reports/tr44/#Bidi_Class_Values>
pub enum BidiClass {
AL,
AN,
B,
BN,
CS,
EN,
ES,
ET,
FSI,
L,
LRE,
LRI,
LRO,
NSM,
ON,
PDF,
PDI,
R,
RLE,
RLI,
RLO,
S,
WS,
}
#[cfg(feature = "hardcoded-data")]
use self::BidiClass::*;
#[cfg(feature = "hardcoded-data")]
pub const bidi_class_table: &'static [(char, char, BidiClass)] = &[
('\u{0}', '\u{8}', BN), ('\u{9}', '\u{9}', S), ('\u{a}', '\u{a}', B), ('\u{b}', '\u{b}', S),
('\u{c}', '\u{c}', WS), ('\u{d}', '\u{d}', B), ('\u{e}', '\u{1b}', BN), ('\u{1c}', '\u{1e}', B),
('\u{1f}', '\u{1f}', S), ('\u{20}', '\u{20}', WS), ('\u{21}', '\u{22}', ON), ('\u{23}',
'\u{25}', ET), ('\u{26}', '\u{2a}', ON), ('\u{2b}', '\u{2b}', ES), ('\u{2c}', '\u{2c}', CS),
('\u{2d}', '\u{2d}', ES), ('\u{2e}', '\u{2f}', CS), ('\u{30}', '\u{39}', EN), ('\u{3a}',
'\u{3a}', CS), ('\u{3b}', '\u{40}', ON), ('\u{41}', '\u{5a}', L), ('\u{5b}', '\u{60}', ON),
('\u{61}', '\u{7a}', L), ('\u{7b}', '\u{7e}', ON), ('\u{7f}', '\u{84}', BN), ('\u{85}',
'\u{85}', B), ('\u{86}', '\u{9f}', BN), ('\u{a0}', '\u{a0}', CS), ('\u{a1}', '\u{a1}', ON),
('\u{a2}', '\u{a5}', ET), ('\u{a6}', '\u{a9}', ON), ('\u{aa}', '\u{aa}', L), ('\u{ab}',
'\u{ac}', ON), ('\u{ad}', '\u{ad}', BN), ('\u{ae}', '\u{af}', ON), ('\u{b0}', '\u{b1}', ET),
('\u{b2}', '\u{b3}', EN), ('\u{b4}', '\u{b4}', ON), ('\u{b5}', '\u{b5}', L), ('\u{b6}',
'\u{b8}', ON), ('\u{b9}', '\u{b9}', EN), ('\u{ba}', '\u{ba}', L), ('\u{bb}', '\u{bf}', ON),
('\u{c0}', '\u{d6}', L), ('\u{d7}', '\u{d7}', ON), ('\u{d8}', '\u{f6}', L), ('\u{f7}', '\u{f7}',
ON), ('\u{f8}', '\u{2b8}', L), ('\u{2b9}', '\u{2ba}', ON), ('\u{2bb}', '\u{2c1}', L),
('\u{2c2}', '\u{2cf}', ON), ('\u{2d0}', '\u{2d1}', L), ('\u{2d2}', '\u{2df}', ON), ('\u{2e0}',
'\u{2e4}', L), ('\u{2e5}', '\u{2ed}', ON), ('\u{2ee}', '\u{2ee}', L), ('\u{2ef}', '\u{2ff}',
ON), ('\u{300}', '\u{36f}', NSM), ('\u{370}', '\u{373}', L), ('\u{374}', '\u{375}', ON),
('\u{376}', '\u{377}', L), ('\u{37a}', '\u{37d}', L), ('\u{37e}', '\u{37e}', ON), ('\u{37f}',
'\u{37f}', L), ('\u{384}', '\u{385}', ON), ('\u{386}', '\u{386}', L), ('\u{387}', '\u{387}',
ON), ('\u{388}', '\u{38a}', L), ('\u{38c}', '\u{38c}', L), ('\u{38e}', '\u{3a1}', L),
('\u{3a3}', '\u{3f5}', L), ('\u{3f6}', '\u{3f6}', ON), ('\u{3f7}', '\u{482}', L), ('\u{483}',
'\u{489}', NSM), ('\u{48a}', '\u{52f}', L), ('\u{531}', '\u{556}', L), ('\u{559}', '\u{589}',
L), ('\u{58a}', '\u{58a}', ON), ('\u{58d}', '\u{58e}', ON), ('\u{58f}', '\u{58f}', ET),
('\u{590}', '\u{590}', R), ('\u{591}', '\u{5bd}', NSM), ('\u{5be}', '\u{5be}', R), ('\u{5bf}',
'\u{5bf}', NSM), ('\u{5c0}', '\u{5c0}', R), ('\u{5c1}', '\u{5c2}', NSM), ('\u{5c3}', '\u{5c3}',
R), ('\u{5c4}', '\u{5c5}', NSM), ('\u{5c6}', '\u{5c6}', R), ('\u{5c7}', '\u{5c7}', NSM),
('\u{5c8}', '\u{5ff}', R), ('\u{600}', '\u{605}', AN), ('\u{606}', '\u{607}', ON), ('\u{608}',
'\u{608}', AL), ('\u{609}', '\u{60a}', ET), ('\u{60b}', '\u{60b}', AL), ('\u{60c}', '\u{60c}',
CS), ('\u{60d}', '\u{60d}', AL), ('\u{60e}', '\u{60f}', ON), ('\u{610}', '\u{61a}', NSM),
('\u{61b}', '\u{64a}', AL), ('\u{64b}', '\u{65f}', NSM), ('\u{660}', '\u{669}', AN), ('\u{66a}',
'\u{66a}', ET), ('\u{66b}', '\u{66c}', AN), ('\u{66d}', '\u{66f}', AL), ('\u{670}', '\u{670}',
NSM), ('\u{671}', '\u{6d5}', AL), ('\u{6d6}', '\u{6dc}', NSM), ('\u{6dd}', '\u{6dd}', AN),
('\u{6de}', '\u{6de}', ON), ('\u{6df}', '\u{6e4}', NSM), ('\u{6e5}', '\u{6e6}', AL), ('\u{6e7}',
'\u{6e8}', NSM), ('\u{6e9}', '\u{6e9}', ON), ('\u{6ea}', '\u{6ed}', NSM), ('\u{6ee}', '\u{6ef}',
AL), ('\u{6f0}', '\u{6f9}', EN), ('\u{6fa}', '\u{710}', AL), ('\u{711}', '\u{711}', NSM),
('\u{712}', '\u{72f}', AL), ('\u{730}', '\u{74a}', NSM), ('\u{74b}', '\u{7a5}', AL), ('\u{7a6}',
'\u{7b0}', NSM), ('\u{7b1}', '\u{7bf}', AL), ('\u{7c0}', '\u{7ea}', R), ('\u{7eb}', '\u{7f3}',
NSM), ('\u{7f4}', '\u{7f5}', R), ('\u{7f6}', '\u{7f9}', ON), ('\u{7fa}', '\u{7fc}', R),
('\u{7fd}', '\u{7fd}', NSM), ('\u{7fe}', '\u{815}', R), ('\u{816}', '\u{819}', NSM), ('\u{81a}',
'\u{81a}', R), ('\u{81b}', '\u{823}', NSM), ('\u{824}', '\u{824}', R), ('\u{825}', '\u{827}',
NSM), ('\u{828}', '\u{828}', R), ('\u{829}', '\u{82d}', NSM), ('\u{82e}', '\u{858}', R),
('\u{859}', '\u{85b}', NSM), ('\u{85c}', '\u{85f}', R), ('\u{860}', '\u{86a}', AL), ('\u{86b}',
'\u{86f}', R), ('\u{870}', '\u{88e}', AL), ('\u{88f}', '\u{88f}', R), ('\u{890}', '\u{891}',
AN), ('\u{892}', '\u{896}', R), ('\u{897}', '\u{89f}', NSM), ('\u{8a0}', '\u{8c9}', AL),
('\u{8ca}', '\u{8e1}', NSM), ('\u{8e2}', '\u{8e2}', AN), ('\u{8e3}', '\u{902}', NSM),
('\u{903}', '\u{939}', L), ('\u{93a}', '\u{93a}', NSM), ('\u{93b}', '\u{93b}', L), ('\u{93c}',
'\u{93c}', NSM), ('\u{93d}', '\u{940}', L), ('\u{941}', '\u{948}', NSM), ('\u{949}', '\u{94c}',
L), ('\u{94d}', '\u{94d}', NSM), ('\u{94e}', '\u{950}', L), ('\u{951}', '\u{957}', NSM),
('\u{958}', '\u{961}', L), ('\u{962}', '\u{963}', NSM), ('\u{964}', '\u{980}', L), ('\u{981}',
'\u{981}', NSM), ('\u{982}', '\u{983}', L), ('\u{985}', '\u{98c}', L), ('\u{98f}', '\u{990}',
L), ('\u{993}', '\u{9a8}', L), ('\u{9aa}', '\u{9b0}', L), ('\u{9b2}', '\u{9b2}', L), ('\u{9b6}',
'\u{9b9}', L), ('\u{9bc}', '\u{9bc}', NSM), ('\u{9bd}', '\u{9c0}', L), ('\u{9c1}', '\u{9c4}',
NSM), ('\u{9c7}', '\u{9c8}', L), ('\u{9cb}', '\u{9cc}', L), ('\u{9cd}', '\u{9cd}', NSM),
('\u{9ce}', '\u{9ce}', L), ('\u{9d7}', '\u{9d7}', L), ('\u{9dc}', '\u{9dd}', L), ('\u{9df}',
'\u{9e1}', L), ('\u{9e2}', '\u{9e3}', NSM), ('\u{9e6}', '\u{9f1}', L), ('\u{9f2}', '\u{9f3}',
ET), ('\u{9f4}', '\u{9fa}', L), ('\u{9fb}', '\u{9fb}', ET), ('\u{9fc}', '\u{9fd}', L),
('\u{9fe}', '\u{9fe}', NSM), ('\u{a01}', '\u{a02}', NSM), ('\u{a03}', '\u{a03}', L), ('\u{a05}',
'\u{a0a}', L), ('\u{a0f}', '\u{a10}', L), ('\u{a13}', '\u{a28}', L), ('\u{a2a}', '\u{a30}', L),
('\u{a32}', '\u{a33}', L), ('\u{a35}', '\u{a36}', L), ('\u{a38}', '\u{a39}', L), ('\u{a3c}',
'\u{a3c}', NSM), ('\u{a3e}', '\u{a40}', L), ('\u{a41}', '\u{a42}', NSM), ('\u{a47}', '\u{a48}',
NSM), ('\u{a4b}', '\u{a4d}', NSM), ('\u{a51}', '\u{a51}', NSM), ('\u{a59}', '\u{a5c}', L),
('\u{a5e}', '\u{a5e}', L), ('\u{a66}', '\u{a6f}', L), ('\u{a70}', '\u{a71}', NSM), ('\u{a72}',
'\u{a74}', L), ('\u{a75}', '\u{a75}', NSM), ('\u{a76}', '\u{a76}', L), ('\u{a81}', '\u{a82}',
NSM), ('\u{a83}', '\u{a83}', L), ('\u{a85}', '\u{a8d}', L), ('\u{a8f}', '\u{a91}', L),
('\u{a93}', '\u{aa8}', L), ('\u{aaa}', '\u{ab0}', L), ('\u{ab2}', '\u{ab3}', L), ('\u{ab5}',
'\u{ab9}', L), ('\u{abc}', '\u{abc}', NSM), ('\u{abd}', '\u{ac0}', L), ('\u{ac1}', '\u{ac5}',
NSM), ('\u{ac7}', '\u{ac8}', NSM), ('\u{ac9}', '\u{ac9}', L), ('\u{acb}', '\u{acc}', L),
('\u{acd}', '\u{acd}', NSM), ('\u{ad0}', '\u{ad0}', L), ('\u{ae0}', '\u{ae1}', L), ('\u{ae2}',
'\u{ae3}', NSM), ('\u{ae6}', '\u{af0}', L), ('\u{af1}', '\u{af1}', ET), ('\u{af9}', '\u{af9}',
L), ('\u{afa}', '\u{aff}', NSM), ('\u{b01}', '\u{b01}', NSM), ('\u{b02}', '\u{b03}', L),
('\u{b05}', '\u{b0c}', L), ('\u{b0f}', '\u{b10}', L), ('\u{b13}', '\u{b28}', L), ('\u{b2a}',
'\u{b30}', L), ('\u{b32}', '\u{b33}', L), ('\u{b35}', '\u{b39}', L), ('\u{b3c}', '\u{b3c}',
NSM), ('\u{b3d}', '\u{b3e}', L), ('\u{b3f}', '\u{b3f}', NSM), ('\u{b40}', '\u{b40}', L),
('\u{b41}', '\u{b44}', NSM), ('\u{b47}', '\u{b48}', L), ('\u{b4b}', '\u{b4c}', L), ('\u{b4d}',
'\u{b4d}', NSM), ('\u{b55}', '\u{b56}', NSM), ('\u{b57}', '\u{b57}', L), ('\u{b5c}', '\u{b5d}',
L), ('\u{b5f}', '\u{b61}', L), ('\u{b62}', '\u{b63}', NSM), ('\u{b66}', '\u{b77}', L),
('\u{b82}', '\u{b82}', NSM), ('\u{b83}', '\u{b83}', L), ('\u{b85}', '\u{b8a}', L), ('\u{b8e}',
'\u{b90}', L), ('\u{b92}', '\u{b95}', L), ('\u{b99}', '\u{b9a}', L), ('\u{b9c}', '\u{b9c}', L),
('\u{b9e}', '\u{b9f}', L), ('\u{ba3}', '\u{ba4}', L), ('\u{ba8}', '\u{baa}', L), ('\u{bae}',
'\u{bb9}', L), ('\u{bbe}', '\u{bbf}', L), ('\u{bc0}', '\u{bc0}', NSM), ('\u{bc1}', '\u{bc2}',
L), ('\u{bc6}', '\u{bc8}', L), ('\u{bca}', '\u{bcc}', L), ('\u{bcd}', '\u{bcd}', NSM),
('\u{bd0}', '\u{bd0}', L), ('\u{bd7}', '\u{bd7}', L), ('\u{be6}', '\u{bf2}', L), ('\u{bf3}',
'\u{bf8}', ON), ('\u{bf9}', '\u{bf9}', ET), ('\u{bfa}', '\u{bfa}', ON), ('\u{c00}', '\u{c00}',
NSM), ('\u{c01}', '\u{c03}', L), ('\u{c04}', '\u{c04}', NSM), ('\u{c05}', '\u{c0c}', L),
('\u{c0e}', '\u{c10}', L), ('\u{c12}', '\u{c28}', L), ('\u{c2a}', '\u{c39}', L), ('\u{c3c}',
'\u{c3c}', NSM), ('\u{c3d}', '\u{c3d}', L), ('\u{c3e}', '\u{c40}', NSM), ('\u{c41}', '\u{c44}',
L), ('\u{c46}', '\u{c48}', NSM), ('\u{c4a}', '\u{c4d}', NSM), ('\u{c55}', '\u{c56}', NSM),
('\u{c58}', '\u{c5a}', L), ('\u{c5d}', '\u{c5d}', L), ('\u{c60}', '\u{c61}', L), ('\u{c62}',
'\u{c63}', NSM), ('\u{c66}', '\u{c6f}', L), ('\u{c77}', '\u{c77}', L), ('\u{c78}', '\u{c7e}',
ON), ('\u{c7f}', '\u{c80}', L), ('\u{c81}', '\u{c81}', NSM), ('\u{c82}', '\u{c8c}', L),
('\u{c8e}', '\u{c90}', L), ('\u{c92}', '\u{ca8}', L), ('\u{caa}', '\u{cb3}', L), ('\u{cb5}',
'\u{cb9}', L), ('\u{cbc}', '\u{cbc}', NSM), ('\u{cbd}', '\u{cc4}', L), ('\u{cc6}', '\u{cc8}',
L), ('\u{cca}', '\u{ccb}', L), ('\u{ccc}', '\u{ccd}', NSM), ('\u{cd5}', '\u{cd6}', L),
('\u{cdd}', '\u{cde}', L), ('\u{ce0}', '\u{ce1}', L), ('\u{ce2}', '\u{ce3}', NSM), ('\u{ce6}',
'\u{cef}', L), ('\u{cf1}', '\u{cf3}', L), ('\u{d00}', '\u{d01}', NSM), ('\u{d02}', '\u{d0c}',
L), ('\u{d0e}', '\u{d10}', L), ('\u{d12}', '\u{d3a}', L), ('\u{d3b}', '\u{d3c}', NSM),
('\u{d3d}', '\u{d40}', L), ('\u{d41}', '\u{d44}', NSM), ('\u{d46}', '\u{d48}', L), ('\u{d4a}',
'\u{d4c}', L), ('\u{d4d}', '\u{d4d}', NSM), ('\u{d4e}', '\u{d4f}', L), ('\u{d54}', '\u{d61}',
L), ('\u{d62}', '\u{d63}', NSM), ('\u{d66}', '\u{d7f}', L), ('\u{d81}', '\u{d81}', NSM),
('\u{d82}', '\u{d83}', L), ('\u{d85}', '\u{d96}', L), ('\u{d9a}', '\u{db1}', L), ('\u{db3}',
'\u{dbb}', L), ('\u{dbd}', '\u{dbd}', L), ('\u{dc0}', '\u{dc6}', L), ('\u{dca}', '\u{dca}',
NSM), ('\u{dcf}', '\u{dd1}', L), ('\u{dd2}', '\u{dd4}', NSM), ('\u{dd6}', '\u{dd6}', NSM),
('\u{dd8}', '\u{ddf}', L), ('\u{de6}', '\u{def}', L), ('\u{df2}', '\u{df4}', L), ('\u{e01}',
'\u{e30}', L), ('\u{e31}', '\u{e31}', NSM), ('\u{e32}', '\u{e33}', L), ('\u{e34}', '\u{e3a}',
NSM), ('\u{e3f}', '\u{e3f}', ET), ('\u{e40}', '\u{e46}', L), ('\u{e47}', '\u{e4e}', NSM),
('\u{e4f}', '\u{e5b}', L), ('\u{e81}', '\u{e82}', L), ('\u{e84}', '\u{e84}', L), ('\u{e86}',
'\u{e8a}', L), ('\u{e8c}', '\u{ea3}', L), ('\u{ea5}', '\u{ea5}', L), ('\u{ea7}', '\u{eb0}', L),
('\u{eb1}', '\u{eb1}', NSM), ('\u{eb2}', '\u{eb3}', L), ('\u{eb4}', '\u{ebc}', NSM), ('\u{ebd}',
'\u{ebd}', L), ('\u{ec0}', '\u{ec4}', L), ('\u{ec6}', '\u{ec6}', L), ('\u{ec8}', '\u{ece}',
NSM), ('\u{ed0}', '\u{ed9}', L), ('\u{edc}', '\u{edf}', L), ('\u{f00}', '\u{f17}', L),
('\u{f18}', '\u{f19}', NSM), ('\u{f1a}', '\u{f34}', L), ('\u{f35}', '\u{f35}', NSM), ('\u{f36}',
'\u{f36}', L), ('\u{f37}', '\u{f37}', NSM), ('\u{f38}', '\u{f38}', L), ('\u{f39}', '\u{f39}',
NSM), ('\u{f3a}', '\u{f3d}', ON), ('\u{f3e}', '\u{f47}', L), ('\u{f49}', '\u{f6c}', L),
('\u{f71}', '\u{f7e}', NSM), ('\u{f7f}', '\u{f7f}', L), ('\u{f80}', '\u{f84}', NSM), ('\u{f85}',
'\u{f85}', L), ('\u{f86}', '\u{f87}', NSM), ('\u{f88}', '\u{f8c}', L), ('\u{f8d}', '\u{f97}',
NSM), ('\u{f99}', '\u{fbc}', NSM), ('\u{fbe}', '\u{fc5}', L), ('\u{fc6}', '\u{fc6}', NSM),
('\u{fc7}', '\u{fcc}', L), ('\u{fce}', '\u{fda}', L), ('\u{1000}', '\u{102c}', L), ('\u{102d}',
'\u{1030}', NSM), ('\u{1031}', '\u{1031}', L), ('\u{1032}', '\u{1037}', NSM), ('\u{1038}',
'\u{1038}', L), ('\u{1039}', '\u{103a}', NSM), ('\u{103b}', '\u{103c}', L), ('\u{103d}',
'\u{103e}', NSM), ('\u{103f}', '\u{1057}', L), ('\u{1058}', '\u{1059}', NSM), ('\u{105a}',
'\u{105d}', L), ('\u{105e}', '\u{1060}', NSM), ('\u{1061}', '\u{1070}', L), ('\u{1071}',
'\u{1074}', NSM), ('\u{1075}', '\u{1081}', L), ('\u{1082}', '\u{1082}', NSM), ('\u{1083}',
'\u{1084}', L), ('\u{1085}', '\u{1086}', NSM), ('\u{1087}', '\u{108c}', L), ('\u{108d}',
'\u{108d}', NSM), ('\u{108e}', '\u{109c}', L), ('\u{109d}', '\u{109d}', NSM), ('\u{109e}',
'\u{10c5}', L), ('\u{10c7}', '\u{10c7}', L), ('\u{10cd}', '\u{10cd}', L), ('\u{10d0}',
'\u{1248}', L), ('\u{124a}', '\u{124d}', L), ('\u{1250}', '\u{1256}', L), ('\u{1258}',
'\u{1258}', L), ('\u{125a}', '\u{125d}', L), ('\u{1260}', '\u{1288}', L), ('\u{128a}',
'\u{128d}', L), ('\u{1290}', '\u{12b0}', L), ('\u{12b2}', '\u{12b5}', L), ('\u{12b8}',
'\u{12be}', L), ('\u{12c0}', '\u{12c0}', L), ('\u{12c2}', '\u{12c5}', L), ('\u{12c8}',
'\u{12d6}', L), ('\u{12d8}', '\u{1310}', L), ('\u{1312}', '\u{1315}', L), ('\u{1318}',
'\u{135a}', L), ('\u{135d}', '\u{135f}', NSM), ('\u{1360}', '\u{137c}', L), ('\u{1380}',
'\u{138f}', L), ('\u{1390}', '\u{1399}', ON), ('\u{13a0}', '\u{13f5}', L), ('\u{13f8}',
'\u{13fd}', L), ('\u{1400}', '\u{1400}', ON), ('\u{1401}', '\u{167f}', L), ('\u{1680}',
'\u{1680}', WS), ('\u{1681}', '\u{169a}', L), ('\u{169b}', '\u{169c}', ON), ('\u{16a0}',
'\u{16f8}', L), ('\u{1700}', '\u{1711}', L), ('\u{1712}', '\u{1714}', NSM), ('\u{1715}',
'\u{1715}', L), ('\u{171f}', '\u{1731}', L), ('\u{1732}', '\u{1733}', NSM), ('\u{1734}',
'\u{1736}', L), ('\u{1740}', '\u{1751}', L), ('\u{1752}', '\u{1753}', NSM), ('\u{1760}',
'\u{176c}', L), ('\u{176e}', '\u{1770}', L), ('\u{1772}', '\u{1773}', NSM), ('\u{1780}',
'\u{17b3}', L), ('\u{17b4}', '\u{17b5}', NSM), ('\u{17b6}', '\u{17b6}', L), ('\u{17b7}',
'\u{17bd}', NSM), ('\u{17be}', '\u{17c5}', L), ('\u{17c6}', '\u{17c6}', NSM), ('\u{17c7}',
'\u{17c8}', L), ('\u{17c9}', '\u{17d3}', NSM), ('\u{17d4}', '\u{17da}', L), ('\u{17db}',
'\u{17db}', ET), ('\u{17dc}', '\u{17dc}', L), ('\u{17dd}', '\u{17dd}', NSM), ('\u{17e0}',
'\u{17e9}', L), ('\u{17f0}', '\u{17f9}', ON), ('\u{1800}', '\u{180a}', ON), ('\u{180b}',
'\u{180d}', NSM), ('\u{180e}', '\u{180e}', BN), ('\u{180f}', '\u{180f}', NSM), ('\u{1810}',
'\u{1819}', L), ('\u{1820}', '\u{1878}', L), ('\u{1880}', '\u{1884}', L), ('\u{1885}',
'\u{1886}', NSM), ('\u{1887}', '\u{18a8}', L), ('\u{18a9}', '\u{18a9}', NSM), ('\u{18aa}',
'\u{18aa}', L), ('\u{18b0}', '\u{18f5}', L), ('\u{1900}', '\u{191e}', L), ('\u{1920}',
'\u{1922}', NSM), ('\u{1923}', '\u{1926}', L), ('\u{1927}', '\u{1928}', NSM), ('\u{1929}',
'\u{192b}', L), ('\u{1930}', '\u{1931}', L), ('\u{1932}', '\u{1932}', NSM), ('\u{1933}',
'\u{1938}', L), ('\u{1939}', '\u{193b}', NSM), ('\u{1940}', '\u{1940}', ON), ('\u{1944}',
'\u{1945}', ON), ('\u{1946}', '\u{196d}', L), ('\u{1970}', '\u{1974}', L), ('\u{1980}',
'\u{19ab}', L), ('\u{19b0}', '\u{19c9}', L), ('\u{19d0}', '\u{19da}', L), ('\u{19de}',
'\u{19ff}', ON), ('\u{1a00}', '\u{1a16}', L), ('\u{1a17}', '\u{1a18}', NSM), ('\u{1a19}',
'\u{1a1a}', L), ('\u{1a1b}', '\u{1a1b}', NSM), ('\u{1a1e}', '\u{1a55}', L), ('\u{1a56}',
'\u{1a56}', NSM), ('\u{1a57}', '\u{1a57}', L), ('\u{1a58}', '\u{1a5e}', NSM), ('\u{1a60}',
'\u{1a60}', NSM), ('\u{1a61}', '\u{1a61}', L), ('\u{1a62}', '\u{1a62}', NSM), ('\u{1a63}',
'\u{1a64}', L), ('\u{1a65}', '\u{1a6c}', NSM), ('\u{1a6d}', '\u{1a72}', L), ('\u{1a73}',
'\u{1a7c}', NSM), ('\u{1a7f}', '\u{1a7f}', NSM), ('\u{1a80}', '\u{1a89}', L), ('\u{1a90}',
'\u{1a99}', L), ('\u{1aa0}', '\u{1aad}', L), ('\u{1ab0}', '\u{1ace}', NSM), ('\u{1b00}',
'\u{1b03}', NSM), ('\u{1b04}', '\u{1b33}', L), ('\u{1b34}', '\u{1b34}', NSM), ('\u{1b35}',
'\u{1b35}', L), ('\u{1b36}', '\u{1b3a}', NSM), ('\u{1b3b}', '\u{1b3b}', L), ('\u{1b3c}',
'\u{1b3c}', NSM), ('\u{1b3d}', '\u{1b41}', L), ('\u{1b42}', '\u{1b42}', NSM), ('\u{1b43}',
'\u{1b4c}', L), ('\u{1b4e}', '\u{1b6a}', L), ('\u{1b6b}', '\u{1b73}', NSM), ('\u{1b74}',
'\u{1b7f}', L), ('\u{1b80}', '\u{1b81}', NSM), ('\u{1b82}', '\u{1ba1}', L), ('\u{1ba2}',
'\u{1ba5}', NSM), ('\u{1ba6}', '\u{1ba7}', L), ('\u{1ba8}', '\u{1ba9}', NSM), ('\u{1baa}',
'\u{1baa}', L), ('\u{1bab}', '\u{1bad}', NSM), ('\u{1bae}', '\u{1be5}', L), ('\u{1be6}',
'\u{1be6}', NSM), ('\u{1be7}', '\u{1be7}', L), ('\u{1be8}', '\u{1be9}', NSM), ('\u{1bea}',
'\u{1bec}', L), ('\u{1bed}', '\u{1bed}', NSM), ('\u{1bee}', '\u{1bee}', L), ('\u{1bef}',
'\u{1bf1}', NSM), ('\u{1bf2}', '\u{1bf3}', L), ('\u{1bfc}', '\u{1c2b}', L), ('\u{1c2c}',
'\u{1c33}', NSM), ('\u{1c34}', '\u{1c35}', L), ('\u{1c36}', '\u{1c37}', NSM), ('\u{1c3b}',
'\u{1c49}', L), ('\u{1c4d}', '\u{1c8a}', L), ('\u{1c90}', '\u{1cba}', L), ('\u{1cbd}',
'\u{1cc7}', L), ('\u{1cd0}', '\u{1cd2}', NSM), ('\u{1cd3}', '\u{1cd3}', L), ('\u{1cd4}',
'\u{1ce0}', NSM), ('\u{1ce1}', '\u{1ce1}', L), ('\u{1ce2}', '\u{1ce8}', NSM), ('\u{1ce9}',
'\u{1cec}', L), ('\u{1ced}', '\u{1ced}', NSM), ('\u{1cee}', '\u{1cf3}', L), ('\u{1cf4}',
'\u{1cf4}', NSM), ('\u{1cf5}', '\u{1cf7}', L), ('\u{1cf8}', '\u{1cf9}', NSM), ('\u{1cfa}',
'\u{1cfa}', L), ('\u{1d00}', '\u{1dbf}', L), ('\u{1dc0}', '\u{1dff}', NSM), ('\u{1e00}',
'\u{1f15}', L), ('\u{1f18}', '\u{1f1d}', L), ('\u{1f20}', '\u{1f45}', L), ('\u{1f48}',
'\u{1f4d}', L), ('\u{1f50}', '\u{1f57}', L), ('\u{1f59}', '\u{1f59}', L), ('\u{1f5b}',
'\u{1f5b}', L), ('\u{1f5d}', '\u{1f5d}', L), ('\u{1f5f}', '\u{1f7d}', L), ('\u{1f80}',
'\u{1fb4}', L), ('\u{1fb6}', '\u{1fbc}', L), ('\u{1fbd}', '\u{1fbd}', ON), ('\u{1fbe}',
'\u{1fbe}', L), ('\u{1fbf}', '\u{1fc1}', ON), ('\u{1fc2}', '\u{1fc4}', L), ('\u{1fc6}',
'\u{1fcc}', L), ('\u{1fcd}', '\u{1fcf}', ON), ('\u{1fd0}', '\u{1fd3}', L), ('\u{1fd6}',
'\u{1fdb}', L), ('\u{1fdd}', '\u{1fdf}', ON), ('\u{1fe0}', '\u{1fec}', L), ('\u{1fed}',
'\u{1fef}', ON), ('\u{1ff2}', '\u{1ff4}', L), ('\u{1ff6}', '\u{1ffc}', L), ('\u{1ffd}',
'\u{1ffe}', ON), ('\u{2000}', '\u{200a}', WS), ('\u{200b}', '\u{200d}', BN), ('\u{200e}',
'\u{200e}', L), ('\u{200f}', '\u{200f}', R), ('\u{2010}', '\u{2027}', ON), ('\u{2028}',
'\u{2028}', WS), ('\u{2029}', '\u{2029}', B), ('\u{202a}', '\u{202a}', LRE), ('\u{202b}',
'\u{202b}', RLE), ('\u{202c}', '\u{202c}', PDF), ('\u{202d}', '\u{202d}', LRO), ('\u{202e}',
'\u{202e}', RLO), ('\u{202f}', '\u{202f}', CS), ('\u{2030}', '\u{2034}', ET), ('\u{2035}',
'\u{2043}', ON), ('\u{2044}', '\u{2044}', CS), ('\u{2045}', '\u{205e}', ON), ('\u{205f}',
'\u{205f}', WS), ('\u{2060}', '\u{2064}', BN), ('\u{2066}', '\u{2066}', LRI), ('\u{2067}',
'\u{2067}', RLI), ('\u{2068}', '\u{2068}', FSI), ('\u{2069}', '\u{2069}', PDI), ('\u{206a}',
'\u{206f}', BN), ('\u{2070}', '\u{2070}', EN), ('\u{2071}', '\u{2071}', L), ('\u{2074}',
'\u{2079}', EN), ('\u{207a}', '\u{207b}', ES), ('\u{207c}', '\u{207e}', ON), ('\u{207f}',
'\u{207f}', L), ('\u{2080}', '\u{2089}', EN), ('\u{208a}', '\u{208b}', ES), ('\u{208c}',
'\u{208e}', ON), ('\u{2090}', '\u{209c}', L), ('\u{20a0}', '\u{20cf}', ET), ('\u{20d0}',
'\u{20f0}', NSM), ('\u{2100}', '\u{2101}', ON), ('\u{2102}', '\u{2102}', L), ('\u{2103}',
'\u{2106}', ON), ('\u{2107}', '\u{2107}', L), ('\u{2108}', '\u{2109}', ON), ('\u{210a}',
'\u{2113}', L), ('\u{2114}', '\u{2114}', ON), ('\u{2115}', '\u{2115}', L), ('\u{2116}',
'\u{2118}', ON), ('\u{2119}', '\u{211d}', L), ('\u{211e}', '\u{2123}', ON), ('\u{2124}',
'\u{2124}', L), ('\u{2125}', '\u{2125}', ON), ('\u{2126}', '\u{2126}', L), ('\u{2127}',
'\u{2127}', ON), ('\u{2128}', '\u{2128}', L), ('\u{2129}', '\u{2129}', ON), ('\u{212a}',
'\u{212d}', L), ('\u{212e}', '\u{212e}', ET), ('\u{212f}', '\u{2139}', L), ('\u{213a}',
'\u{213b}', ON), ('\u{213c}', '\u{213f}', L), ('\u{2140}', '\u{2144}', ON), ('\u{2145}',
'\u{2149}', L), ('\u{214a}', '\u{214d}', ON), ('\u{214e}', '\u{214f}', L), ('\u{2150}',
'\u{215f}', ON), ('\u{2160}', '\u{2188}', L), ('\u{2189}', '\u{218b}', ON), ('\u{2190}',
'\u{2211}', ON), ('\u{2212}', '\u{2212}', ES), ('\u{2213}', '\u{2213}', ET), ('\u{2214}',
'\u{2335}', ON), ('\u{2336}', '\u{237a}', L), ('\u{237b}', '\u{2394}', ON), ('\u{2395}',
'\u{2395}', L), ('\u{2396}', '\u{2429}', ON), ('\u{2440}', '\u{244a}', ON), ('\u{2460}',
'\u{2487}', ON), ('\u{2488}', '\u{249b}', EN), ('\u{249c}', '\u{24e9}', L), ('\u{24ea}',
'\u{26ab}', ON), ('\u{26ac}', '\u{26ac}', L), ('\u{26ad}', '\u{27ff}', ON), ('\u{2800}',
'\u{28ff}', L), ('\u{2900}', '\u{2b73}', ON), ('\u{2b76}', '\u{2b95}', ON), ('\u{2b97}',
'\u{2bff}', ON), ('\u{2c00}', '\u{2ce4}', L), ('\u{2ce5}', '\u{2cea}', ON), ('\u{2ceb}',
'\u{2cee}', L), ('\u{2cef}', '\u{2cf1}', NSM), ('\u{2cf2}', '\u{2cf3}', L), ('\u{2cf9}',
'\u{2cff}', ON), ('\u{2d00}', '\u{2d25}', L), ('\u{2d27}', '\u{2d27}', L), ('\u{2d2d}',
'\u{2d2d}', L), ('\u{2d30}', '\u{2d67}', L), ('\u{2d6f}', '\u{2d70}', L), ('\u{2d7f}',
'\u{2d7f}', NSM), ('\u{2d80}', '\u{2d96}', L), ('\u{2da0}', '\u{2da6}', L), ('\u{2da8}',
'\u{2dae}', L), ('\u{2db0}', '\u{2db6}', L), ('\u{2db8}', '\u{2dbe}', L), ('\u{2dc0}',
'\u{2dc6}', L), ('\u{2dc8}', '\u{2dce}', L), ('\u{2dd0}', '\u{2dd6}', L), ('\u{2dd8}',
'\u{2dde}', L), ('\u{2de0}', '\u{2dff}', NSM), ('\u{2e00}', '\u{2e5d}', ON), ('\u{2e80}',
'\u{2e99}', ON), ('\u{2e9b}', '\u{2ef3}', ON), ('\u{2f00}', '\u{2fd5}', ON), ('\u{2ff0}',
'\u{2fff}', ON), ('\u{3000}', '\u{3000}', WS), ('\u{3001}', '\u{3004}', ON), ('\u{3005}',
'\u{3007}', L), ('\u{3008}', '\u{3020}', ON), ('\u{3021}', '\u{3029}', L), ('\u{302a}',
'\u{302d}', NSM), ('\u{302e}', '\u{302f}', L), ('\u{3030}', '\u{3030}', ON), ('\u{3031}',
'\u{3035}', L), ('\u{3036}', '\u{3037}', ON), ('\u{3038}', '\u{303c}', L), ('\u{303d}',
'\u{303f}', ON), ('\u{3041}', '\u{3096}', L), ('\u{3099}', '\u{309a}', NSM), ('\u{309b}',
'\u{309c}', ON), ('\u{309d}', '\u{309f}', L), ('\u{30a0}', '\u{30a0}', ON), ('\u{30a1}',
'\u{30fa}', L), ('\u{30fb}', '\u{30fb}', ON), ('\u{30fc}', '\u{30ff}', L), ('\u{3105}',
'\u{312f}', L), ('\u{3131}', '\u{318e}', L), ('\u{3190}', '\u{31bf}', L), ('\u{31c0}',
'\u{31e5}', ON), ('\u{31ef}', '\u{31ef}', ON), ('\u{31f0}', '\u{321c}', L), ('\u{321d}',
'\u{321e}', ON), ('\u{3220}', '\u{324f}', L), ('\u{3250}', '\u{325f}', ON), ('\u{3260}',
'\u{327b}', L), ('\u{327c}', '\u{327e}', ON), ('\u{327f}', '\u{32b0}', L), ('\u{32b1}',
'\u{32bf}', ON), ('\u{32c0}', '\u{32cb}', L), ('\u{32cc}', '\u{32cf}', ON), ('\u{32d0}',
'\u{3376}', L), ('\u{3377}', '\u{337a}', ON), ('\u{337b}', '\u{33dd}', L), ('\u{33de}',
'\u{33df}', ON), ('\u{33e0}', '\u{33fe}', L), ('\u{33ff}', '\u{33ff}', ON), ('\u{3400}',
'\u{4dbf}', L), ('\u{4dc0}', '\u{4dff}', ON), ('\u{4e00}', '\u{a48c}', L), ('\u{a490}',
'\u{a4c6}', ON), ('\u{a4d0}', '\u{a60c}', L), ('\u{a60d}', '\u{a60f}', ON), ('\u{a610}',
'\u{a62b}', L), ('\u{a640}', '\u{a66e}', L), ('\u{a66f}', '\u{a672}', NSM), ('\u{a673}',
'\u{a673}', ON), ('\u{a674}', '\u{a67d}', NSM), ('\u{a67e}', '\u{a67f}', ON), ('\u{a680}',
'\u{a69d}', L), ('\u{a69e}', '\u{a69f}', NSM), ('\u{a6a0}', '\u{a6ef}', L), ('\u{a6f0}',
'\u{a6f1}', NSM), ('\u{a6f2}', '\u{a6f7}', L), ('\u{a700}', '\u{a721}', ON), ('\u{a722}',
'\u{a787}', L), ('\u{a788}', '\u{a788}', ON), ('\u{a789}', '\u{a7cd}', L), ('\u{a7d0}',
'\u{a7d1}', L), ('\u{a7d3}', '\u{a7d3}', L), ('\u{a7d5}', '\u{a7dc}', L), ('\u{a7f2}',
'\u{a801}', L), ('\u{a802}', '\u{a802}', NSM), ('\u{a803}', '\u{a805}', L), ('\u{a806}',
'\u{a806}', NSM), ('\u{a807}', '\u{a80a}', L), ('\u{a80b}', '\u{a80b}', NSM), ('\u{a80c}',
'\u{a824}', L), ('\u{a825}', '\u{a826}', NSM), ('\u{a827}', '\u{a827}', L), ('\u{a828}',
'\u{a82b}', ON), ('\u{a82c}', '\u{a82c}', NSM), ('\u{a830}', '\u{a837}', L), ('\u{a838}',
'\u{a839}', ET), ('\u{a840}', '\u{a873}', L), ('\u{a874}', '\u{a877}', ON), ('\u{a880}',
'\u{a8c3}', L), ('\u{a8c4}', '\u{a8c5}', NSM), ('\u{a8ce}', '\u{a8d9}', L), ('\u{a8e0}',
'\u{a8f1}', NSM), ('\u{a8f2}', '\u{a8fe}', L), ('\u{a8ff}', '\u{a8ff}', NSM), ('\u{a900}',
'\u{a925}', L), ('\u{a926}', '\u{a92d}', NSM), ('\u{a92e}', '\u{a946}', L), ('\u{a947}',
'\u{a951}', NSM), ('\u{a952}', '\u{a953}', L), ('\u{a95f}', '\u{a97c}', L), ('\u{a980}',
'\u{a982}', NSM), ('\u{a983}', '\u{a9b2}', L), ('\u{a9b3}', '\u{a9b3}', NSM), ('\u{a9b4}',
'\u{a9b5}', L), ('\u{a9b6}', '\u{a9b9}', NSM), ('\u{a9ba}', '\u{a9bb}', L), ('\u{a9bc}',
'\u{a9bd}', NSM), ('\u{a9be}', '\u{a9cd}', L), ('\u{a9cf}', '\u{a9d9}', L), ('\u{a9de}',
'\u{a9e4}', L), ('\u{a9e5}', '\u{a9e5}', NSM), ('\u{a9e6}', '\u{a9fe}', L), ('\u{aa00}',
'\u{aa28}', L), ('\u{aa29}', '\u{aa2e}', NSM), ('\u{aa2f}', '\u{aa30}', L), ('\u{aa31}',
'\u{aa32}', NSM), ('\u{aa33}', '\u{aa34}', L), ('\u{aa35}', '\u{aa36}', NSM), ('\u{aa40}',
'\u{aa42}', L), ('\u{aa43}', '\u{aa43}', NSM), ('\u{aa44}', '\u{aa4b}', L), ('\u{aa4c}',
'\u{aa4c}', NSM), ('\u{aa4d}', '\u{aa4d}', L), ('\u{aa50}', '\u{aa59}', L), ('\u{aa5c}',
'\u{aa7b}', L), ('\u{aa7c}', '\u{aa7c}', NSM), ('\u{aa7d}', '\u{aaaf}', L), ('\u{aab0}',
'\u{aab0}', NSM), ('\u{aab1}', '\u{aab1}', L), ('\u{aab2}', '\u{aab4}', NSM), ('\u{aab5}',
'\u{aab6}', L), ('\u{aab7}', '\u{aab8}', NSM), ('\u{aab9}', '\u{aabd}', L), ('\u{aabe}',
'\u{aabf}', NSM), ('\u{aac0}', '\u{aac0}', L), ('\u{aac1}', '\u{aac1}', NSM), ('\u{aac2}',
'\u{aac2}', L), ('\u{aadb}', '\u{aaeb}', L), ('\u{aaec}', '\u{aaed}', NSM), ('\u{aaee}',
'\u{aaf5}', L), ('\u{aaf6}', '\u{aaf6}', NSM), ('\u{ab01}', '\u{ab06}', L), ('\u{ab09}',
'\u{ab0e}', L), ('\u{ab11}', '\u{ab16}', L), ('\u{ab20}', '\u{ab26}', L), ('\u{ab28}',
'\u{ab2e}', L), ('\u{ab30}', '\u{ab69}', L), ('\u{ab6a}', '\u{ab6b}', ON), ('\u{ab70}',
'\u{abe4}', L), ('\u{abe5}', '\u{abe5}', NSM), ('\u{abe6}', '\u{abe7}', L), ('\u{abe8}',
'\u{abe8}', NSM), ('\u{abe9}', '\u{abec}', L), ('\u{abed}', '\u{abed}', NSM), ('\u{abf0}',
'\u{abf9}', L), ('\u{ac00}', '\u{d7a3}', L), ('\u{d7b0}', '\u{d7c6}', L), ('\u{d7cb}',
'\u{d7fb}', L), ('\u{e000}', '\u{fa6d}', L), ('\u{fa70}', '\u{fad9}', L), ('\u{fb00}',
'\u{fb06}', L), ('\u{fb13}', '\u{fb17}', L), ('\u{fb1d}', '\u{fb1d}', R), ('\u{fb1e}',
'\u{fb1e}', NSM), ('\u{fb1f}', '\u{fb28}', R), ('\u{fb29}', '\u{fb29}', ES), ('\u{fb2a}',
'\u{fb4f}', R), ('\u{fb50}', '\u{fd3d}', AL), ('\u{fd3e}', '\u{fd4f}', ON), ('\u{fd50}',
'\u{fdce}', AL), ('\u{fdcf}', '\u{fdcf}', ON), ('\u{fdf0}', '\u{fdfc}', AL), ('\u{fdfd}',
'\u{fdff}', ON), ('\u{fe00}', '\u{fe0f}', NSM), ('\u{fe10}', '\u{fe19}', ON), ('\u{fe20}',
'\u{fe2f}', NSM), ('\u{fe30}', '\u{fe4f}', ON), ('\u{fe50}', '\u{fe50}', CS), ('\u{fe51}',
'\u{fe51}', ON), ('\u{fe52}', '\u{fe52}', CS), ('\u{fe54}', '\u{fe54}', ON), ('\u{fe55}',
'\u{fe55}', CS), ('\u{fe56}', '\u{fe5e}', ON), ('\u{fe5f}', '\u{fe5f}', ET), ('\u{fe60}',
'\u{fe61}', ON), ('\u{fe62}', '\u{fe63}', ES), ('\u{fe64}', '\u{fe66}', ON), ('\u{fe68}',
'\u{fe68}', ON), ('\u{fe69}', '\u{fe6a}', ET), ('\u{fe6b}', '\u{fe6b}', ON), ('\u{fe70}',
'\u{fefe}', AL), ('\u{feff}', '\u{feff}', BN), ('\u{ff01}', '\u{ff02}', ON), ('\u{ff03}',
'\u{ff05}', ET), ('\u{ff06}', '\u{ff0a}', ON), ('\u{ff0b}', '\u{ff0b}', ES), ('\u{ff0c}',
'\u{ff0c}', CS), ('\u{ff0d}', '\u{ff0d}', ES), ('\u{ff0e}', '\u{ff0f}', CS), ('\u{ff10}',
'\u{ff19}', EN), ('\u{ff1a}', '\u{ff1a}', CS), ('\u{ff1b}', '\u{ff20}', ON), ('\u{ff21}',
'\u{ff3a}', L), ('\u{ff3b}', '\u{ff40}', ON), ('\u{ff41}', '\u{ff5a}', L), ('\u{ff5b}',
'\u{ff65}', ON), ('\u{ff66}', '\u{ffbe}', L), ('\u{ffc2}', '\u{ffc7}', L), ('\u{ffca}',
'\u{ffcf}', L), ('\u{ffd2}', '\u{ffd7}', L), ('\u{ffda}', '\u{ffdc}', L), ('\u{ffe0}',
'\u{ffe1}', ET), ('\u{ffe2}', '\u{ffe4}', ON), ('\u{ffe5}', '\u{ffe6}', ET), ('\u{ffe8}',
'\u{ffee}', ON), ('\u{fff9}', '\u{fffd}', ON), ('\u{10000}', '\u{1000b}', L), ('\u{1000d}',
'\u{10026}', L), ('\u{10028}', '\u{1003a}', L), ('\u{1003c}', '\u{1003d}', L), ('\u{1003f}',
'\u{1004d}', L), ('\u{10050}', '\u{1005d}', L), ('\u{10080}', '\u{100fa}', L), ('\u{10100}',
'\u{10100}', L), ('\u{10101}', '\u{10101}', ON), ('\u{10102}', '\u{10102}', L), ('\u{10107}',
'\u{10133}', L), ('\u{10137}', '\u{1013f}', L), ('\u{10140}', '\u{1018c}', ON), ('\u{1018d}',
'\u{1018e}', L), ('\u{10190}', '\u{1019c}', ON), ('\u{101a0}', '\u{101a0}', ON), ('\u{101d0}',
'\u{101fc}', L), ('\u{101fd}', '\u{101fd}', NSM), ('\u{10280}', '\u{1029c}', L), ('\u{102a0}',
'\u{102d0}', L), ('\u{102e0}', '\u{102e0}', NSM), ('\u{102e1}', '\u{102fb}', EN), ('\u{10300}',
'\u{10323}', L), ('\u{1032d}', '\u{1034a}', L), ('\u{10350}', '\u{10375}', L), ('\u{10376}',
'\u{1037a}', NSM), ('\u{10380}', '\u{1039d}', L), ('\u{1039f}', '\u{103c3}', L), ('\u{103c8}',
'\u{103d5}', L), ('\u{10400}', '\u{1049d}', L), ('\u{104a0}', '\u{104a9}', L), ('\u{104b0}',
'\u{104d3}', L), ('\u{104d8}', '\u{104fb}', L), ('\u{10500}', '\u{10527}', L), ('\u{10530}',
'\u{10563}', L), ('\u{1056f}', '\u{1057a}', L), ('\u{1057c}', '\u{1058a}', L), ('\u{1058c}',
'\u{10592}', L), ('\u{10594}', '\u{10595}', L), ('\u{10597}', '\u{105a1}', L), ('\u{105a3}',
'\u{105b1}', L), ('\u{105b3}', '\u{105b9}', L), ('\u{105bb}', '\u{105bc}', L), ('\u{105c0}',
'\u{105f3}', L), ('\u{10600}', '\u{10736}', L), ('\u{10740}', '\u{10755}', L), ('\u{10760}',
'\u{10767}', L), ('\u{10780}', '\u{10785}', L), ('\u{10787}', '\u{107b0}', L), ('\u{107b2}',
'\u{107ba}', L), ('\u{10800}', '\u{1091e}', R), ('\u{1091f}', '\u{1091f}', ON), ('\u{10920}',
'\u{10a00}', R), ('\u{10a01}', '\u{10a03}', NSM), ('\u{10a04}', '\u{10a04}', R), ('\u{10a05}',
'\u{10a06}', NSM), ('\u{10a07}', '\u{10a0b}', R), ('\u{10a0c}', '\u{10a0f}', NSM), ('\u{10a10}',
'\u{10a37}', R), ('\u{10a38}', '\u{10a3a}', NSM), ('\u{10a3b}', '\u{10a3e}', R), ('\u{10a3f}',
'\u{10a3f}', NSM), ('\u{10a40}', '\u{10ae4}', R), ('\u{10ae5}', '\u{10ae6}', NSM), ('\u{10ae7}',
'\u{10b38}', R), ('\u{10b39}', '\u{10b3f}', ON), ('\u{10b40}', '\u{10cff}', R), ('\u{10d00}',
'\u{10d23}', AL), ('\u{10d24}', '\u{10d27}', NSM), ('\u{10d28}', '\u{10d2f}', R), ('\u{10d30}',
'\u{10d39}', AN), ('\u{10d3a}', '\u{10d3f}', R), ('\u{10d40}', '\u{10d49}', AN), ('\u{10d4a}',
'\u{10d68}', R), ('\u{10d69}', '\u{10d6d}', NSM), ('\u{10d6e}', '\u{10d6e}', ON), ('\u{10d6f}',
'\u{10e5f}', R), ('\u{10e60}', '\u{10e7e}', AN), ('\u{10e7f}', '\u{10eaa}', R), ('\u{10eab}',
'\u{10eac}', NSM), ('\u{10ead}', '\u{10ec1}', R), ('\u{10ec2}', '\u{10ec4}', AL), ('\u{10ec5}',
'\u{10efb}', R), ('\u{10efc}', '\u{10eff}', NSM), ('\u{10f00}', '\u{10f2f}', R), ('\u{10f30}',
'\u{10f45}', AL), ('\u{10f46}', '\u{10f50}', NSM), ('\u{10f51}', '\u{10f59}', AL), ('\u{10f5a}',
'\u{10f81}', R), ('\u{10f82}', '\u{10f85}', NSM), ('\u{10f86}', '\u{10fff}', R), ('\u{11000}',
'\u{11000}', L), ('\u{11001}', '\u{11001}', NSM), ('\u{11002}', '\u{11037}', L), ('\u{11038}',
'\u{11046}', NSM), ('\u{11047}', '\u{1104d}', L), ('\u{11052}', '\u{11065}', ON), ('\u{11066}',
'\u{1106f}', L), ('\u{11070}', '\u{11070}', NSM), ('\u{11071}', '\u{11072}', L), ('\u{11073}',
'\u{11074}', NSM), ('\u{11075}', '\u{11075}', L), ('\u{1107f}', '\u{11081}', NSM), ('\u{11082}',
'\u{110b2}', L), ('\u{110b3}', '\u{110b6}', NSM), ('\u{110b7}', '\u{110b8}', L), ('\u{110b9}',
'\u{110ba}', NSM), ('\u{110bb}', '\u{110c1}', L), ('\u{110c2}', '\u{110c2}', NSM), ('\u{110cd}',
'\u{110cd}', L), ('\u{110d0}', '\u{110e8}', L), ('\u{110f0}', '\u{110f9}', L), ('\u{11100}',
'\u{11102}', NSM), ('\u{11103}', '\u{11126}', L), ('\u{11127}', '\u{1112b}', NSM), ('\u{1112c}',
'\u{1112c}', L), ('\u{1112d}', '\u{11134}', NSM), ('\u{11136}', '\u{11147}', L), ('\u{11150}',
'\u{11172}', L), ('\u{11173}', '\u{11173}', NSM), ('\u{11174}', '\u{11176}', L), ('\u{11180}',
'\u{11181}', NSM), ('\u{11182}', '\u{111b5}', L), ('\u{111b6}', '\u{111be}', NSM), ('\u{111bf}',
'\u{111c8}', L), ('\u{111c9}', '\u{111cc}', NSM), ('\u{111cd}', '\u{111ce}', L), ('\u{111cf}',
'\u{111cf}', NSM), ('\u{111d0}', '\u{111df}', L), ('\u{111e1}', '\u{111f4}', L), ('\u{11200}',
'\u{11211}', L), ('\u{11213}', '\u{1122e}', L), ('\u{1122f}', '\u{11231}', NSM), ('\u{11232}',
'\u{11233}', L), ('\u{11234}', '\u{11234}', NSM), ('\u{11235}', '\u{11235}', L), ('\u{11236}',
'\u{11237}', NSM), ('\u{11238}', '\u{1123d}', L), ('\u{1123e}', '\u{1123e}', NSM), ('\u{1123f}',
'\u{11240}', L), ('\u{11241}', '\u{11241}', NSM), ('\u{11280}', '\u{11286}', L), ('\u{11288}',
'\u{11288}', L), ('\u{1128a}', '\u{1128d}', L), ('\u{1128f}', '\u{1129d}', L), ('\u{1129f}',
'\u{112a9}', L), ('\u{112b0}', '\u{112de}', L), ('\u{112df}', '\u{112df}', NSM), ('\u{112e0}',
'\u{112e2}', L), ('\u{112e3}', '\u{112ea}', NSM), ('\u{112f0}', '\u{112f9}', L), ('\u{11300}',
'\u{11301}', NSM), ('\u{11302}', '\u{11303}', L), ('\u{11305}', '\u{1130c}', L), ('\u{1130f}',
'\u{11310}', L), ('\u{11313}', '\u{11328}', L), ('\u{1132a}', '\u{11330}', L), ('\u{11332}',
'\u{11333}', L), ('\u{11335}', '\u{11339}', L), ('\u{1133b}', '\u{1133c}', NSM), ('\u{1133d}',
'\u{1133f}', L), ('\u{11340}', '\u{11340}', NSM), ('\u{11341}', '\u{11344}', L), ('\u{11347}',
'\u{11348}', L), ('\u{1134b}', '\u{1134d}', L), ('\u{11350}', '\u{11350}', L), ('\u{11357}',
'\u{11357}', L), ('\u{1135d}', '\u{11363}', L), ('\u{11366}', '\u{1136c}', NSM), ('\u{11370}',
'\u{11374}', NSM), ('\u{11380}', '\u{11389}', L), ('\u{1138b}', '\u{1138b}', L), ('\u{1138e}',
'\u{1138e}', L), ('\u{11390}', '\u{113b5}', L), ('\u{113b7}', '\u{113ba}', L), ('\u{113bb}',
'\u{113c0}', NSM), ('\u{113c2}', '\u{113c2}', L), ('\u{113c5}', '\u{113c5}', L), ('\u{113c7}',
'\u{113ca}', L), ('\u{113cc}', '\u{113cd}', L), ('\u{113ce}', '\u{113ce}', NSM), ('\u{113cf}',
'\u{113cf}', L), ('\u{113d0}', '\u{113d0}', NSM), ('\u{113d1}', '\u{113d1}', L), ('\u{113d2}',
'\u{113d2}', NSM), ('\u{113d3}', '\u{113d5}', L), ('\u{113d7}', '\u{113d8}', L), ('\u{113e1}',
'\u{113e2}', NSM), ('\u{11400}', '\u{11437}', L), ('\u{11438}', '\u{1143f}', NSM), ('\u{11440}',
'\u{11441}', L), ('\u{11442}', '\u{11444}', NSM), ('\u{11445}', '\u{11445}', L), ('\u{11446}',
'\u{11446}', NSM), ('\u{11447}', '\u{1145b}', L), ('\u{1145d}', '\u{1145d}', L), ('\u{1145e}',
'\u{1145e}', NSM), ('\u{1145f}', '\u{11461}', L), ('\u{11480}', '\u{114b2}', L), ('\u{114b3}',
'\u{114b8}', NSM), ('\u{114b9}', '\u{114b9}', L), ('\u{114ba}', '\u{114ba}', NSM), ('\u{114bb}',
'\u{114be}', L), ('\u{114bf}', '\u{114c0}', NSM), ('\u{114c1}', '\u{114c1}', L), ('\u{114c2}',
'\u{114c3}', NSM), ('\u{114c4}', '\u{114c7}', L), ('\u{114d0}', '\u{114d9}', L), ('\u{11580}',
'\u{115b1}', L), ('\u{115b2}', '\u{115b5}', NSM), ('\u{115b8}', '\u{115bb}', L), ('\u{115bc}',
'\u{115bd}', NSM), ('\u{115be}', '\u{115be}', L), ('\u{115bf}', '\u{115c0}', NSM), ('\u{115c1}',
'\u{115db}', L), ('\u{115dc}', '\u{115dd}', NSM), ('\u{11600}', '\u{11632}', L), ('\u{11633}',
'\u{1163a}', NSM), ('\u{1163b}', '\u{1163c}', L), ('\u{1163d}', '\u{1163d}', NSM), ('\u{1163e}',
'\u{1163e}', L), ('\u{1163f}', '\u{11640}', NSM), ('\u{11641}', '\u{11644}', L), ('\u{11650}',
'\u{11659}', L), ('\u{11660}', '\u{1166c}', ON), ('\u{11680}', '\u{116aa}', L), ('\u{116ab}',
'\u{116ab}', NSM), ('\u{116ac}', '\u{116ac}', L), ('\u{116ad}', '\u{116ad}', NSM), ('\u{116ae}',
'\u{116af}', L), ('\u{116b0}', '\u{116b5}', NSM), ('\u{116b6}', '\u{116b6}', L), ('\u{116b7}',
'\u{116b7}', NSM), ('\u{116b8}', '\u{116b9}', L), ('\u{116c0}', '\u{116c9}', L), ('\u{116d0}',
'\u{116e3}', L), ('\u{11700}', '\u{1171a}', L), ('\u{1171d}', '\u{1171d}', NSM), ('\u{1171e}',
'\u{1171e}', L), ('\u{1171f}', '\u{1171f}', NSM), ('\u{11720}', '\u{11721}', L), ('\u{11722}',
'\u{11725}', NSM), ('\u{11726}', '\u{11726}', L), ('\u{11727}', '\u{1172b}', NSM), ('\u{11730}',
'\u{11746}', L), ('\u{11800}', '\u{1182e}', L), ('\u{1182f}', '\u{11837}', NSM), ('\u{11838}',
'\u{11838}', L), ('\u{11839}', '\u{1183a}', NSM), ('\u{1183b}', '\u{1183b}', L), ('\u{118a0}',
'\u{118f2}', L), ('\u{118ff}', '\u{11906}', L), ('\u{11909}', '\u{11909}', L), ('\u{1190c}',
'\u{11913}', L), ('\u{11915}', '\u{11916}', L), ('\u{11918}', '\u{11935}', L), ('\u{11937}',
'\u{11938}', L), ('\u{1193b}', '\u{1193c}', NSM), ('\u{1193d}', '\u{1193d}', L), ('\u{1193e}',
'\u{1193e}', NSM), ('\u{1193f}', '\u{11942}', L), ('\u{11943}', '\u{11943}', NSM), ('\u{11944}',
'\u{11946}', L), ('\u{11950}', '\u{11959}', L), ('\u{119a0}', '\u{119a7}', L), ('\u{119aa}',
'\u{119d3}', L), ('\u{119d4}', '\u{119d7}', NSM), ('\u{119da}', '\u{119db}', NSM), ('\u{119dc}',
'\u{119df}', L), ('\u{119e0}', '\u{119e0}', NSM), ('\u{119e1}', '\u{119e4}', L), ('\u{11a00}',
'\u{11a00}', L), ('\u{11a01}', '\u{11a06}', NSM), ('\u{11a07}', '\u{11a08}', L), ('\u{11a09}',
'\u{11a0a}', NSM), ('\u{11a0b}', '\u{11a32}', L), ('\u{11a33}', '\u{11a38}', NSM), ('\u{11a39}',
'\u{11a3a}', L), ('\u{11a3b}', '\u{11a3e}', NSM), ('\u{11a3f}', '\u{11a46}', L), ('\u{11a47}',
'\u{11a47}', NSM), ('\u{11a50}', '\u{11a50}', L), ('\u{11a51}', '\u{11a56}', NSM), ('\u{11a57}',
'\u{11a58}', L), ('\u{11a59}', '\u{11a5b}', NSM), ('\u{11a5c}', '\u{11a89}', L), ('\u{11a8a}',
'\u{11a96}', NSM), ('\u{11a97}', '\u{11a97}', L), ('\u{11a98}', '\u{11a99}', NSM), ('\u{11a9a}',
'\u{11aa2}', L), ('\u{11ab0}', '\u{11af8}', L), ('\u{11b00}', '\u{11b09}', L), ('\u{11bc0}',
'\u{11be1}', L), ('\u{11bf0}', '\u{11bf9}', L), ('\u{11c00}', '\u{11c08}', L), ('\u{11c0a}',
'\u{11c2f}', L), ('\u{11c30}', '\u{11c36}', NSM), ('\u{11c38}', '\u{11c3d}', NSM), ('\u{11c3e}',
'\u{11c45}', L), ('\u{11c50}', '\u{11c6c}', L), ('\u{11c70}', '\u{11c8f}', L), ('\u{11c92}',
'\u{11ca7}', NSM), ('\u{11ca9}', '\u{11ca9}', L), ('\u{11caa}', '\u{11cb0}', NSM), ('\u{11cb1}',
'\u{11cb1}', L), ('\u{11cb2}', '\u{11cb3}', NSM), ('\u{11cb4}', '\u{11cb4}', L), ('\u{11cb5}',
'\u{11cb6}', NSM), ('\u{11d00}', '\u{11d06}', L), ('\u{11d08}', '\u{11d09}', L), ('\u{11d0b}',
'\u{11d30}', L), ('\u{11d31}', '\u{11d36}', NSM), ('\u{11d3a}', '\u{11d3a}', NSM), ('\u{11d3c}',
'\u{11d3d}', NSM), ('\u{11d3f}', '\u{11d45}', NSM), ('\u{11d46}', '\u{11d46}', L), ('\u{11d47}',
'\u{11d47}', NSM), ('\u{11d50}', '\u{11d59}', L), ('\u{11d60}', '\u{11d65}', L), ('\u{11d67}',
'\u{11d68}', L), ('\u{11d6a}', '\u{11d8e}', L), ('\u{11d90}', '\u{11d91}', NSM), ('\u{11d93}',
'\u{11d94}', L), ('\u{11d95}', '\u{11d95}', NSM), ('\u{11d96}', '\u{11d96}', L), ('\u{11d97}',
'\u{11d97}', NSM), ('\u{11d98}', '\u{11d98}', L), ('\u{11da0}', '\u{11da9}', L), ('\u{11ee0}',
'\u{11ef2}', L), ('\u{11ef3}', '\u{11ef4}', NSM), ('\u{11ef5}', '\u{11ef8}', L), ('\u{11f00}',
'\u{11f01}', NSM), ('\u{11f02}', '\u{11f10}', L), ('\u{11f12}', '\u{11f35}', L), ('\u{11f36}',
'\u{11f3a}', NSM), ('\u{11f3e}', '\u{11f3f}', L), ('\u{11f40}', '\u{11f40}', NSM), ('\u{11f41}',
'\u{11f41}', L), ('\u{11f42}', '\u{11f42}', NSM), ('\u{11f43}', '\u{11f59}', L), ('\u{11f5a}',
'\u{11f5a}', NSM), ('\u{11fb0}', '\u{11fb0}', L), ('\u{11fc0}', '\u{11fd4}', L), ('\u{11fd5}',
'\u{11fdc}', ON), ('\u{11fdd}', '\u{11fe0}', ET), ('\u{11fe1}', '\u{11ff1}', ON), ('\u{11fff}',
'\u{12399}', L), ('\u{12400}', '\u{1246e}', L), ('\u{12470}', '\u{12474}', L), ('\u{12480}',
'\u{12543}', L), ('\u{12f90}', '\u{12ff2}', L), ('\u{13000}', '\u{1343f}', L), ('\u{13440}',
'\u{13440}', NSM), ('\u{13441}', '\u{13446}', L), ('\u{13447}', '\u{13455}', NSM), ('\u{13460}',
'\u{143fa}', L), ('\u{14400}', '\u{14646}', L), ('\u{16100}', '\u{1611d}', L), ('\u{1611e}',
'\u{16129}', NSM), ('\u{1612a}', '\u{1612c}', L), ('\u{1612d}', '\u{1612f}', NSM), ('\u{16130}',
'\u{16139}', L), ('\u{16800}', '\u{16a38}', L), ('\u{16a40}', '\u{16a5e}', L), ('\u{16a60}',
'\u{16a69}', L), ('\u{16a6e}', '\u{16abe}', L), ('\u{16ac0}', '\u{16ac9}', L), ('\u{16ad0}',
'\u{16aed}', L), ('\u{16af0}', '\u{16af4}', NSM), ('\u{16af5}', '\u{16af5}', L), ('\u{16b00}',
'\u{16b2f}', L), ('\u{16b30}', '\u{16b36}', NSM), ('\u{16b37}', '\u{16b45}', L), ('\u{16b50}',
'\u{16b59}', L), ('\u{16b5b}', '\u{16b61}', L), ('\u{16b63}', '\u{16b77}', L), ('\u{16b7d}',
'\u{16b8f}', L), ('\u{16d40}', '\u{16d79}', L), ('\u{16e40}', '\u{16e9a}', L), ('\u{16f00}',
'\u{16f4a}', L), ('\u{16f4f}', '\u{16f4f}', NSM), ('\u{16f50}', '\u{16f87}', L), ('\u{16f8f}',
'\u{16f92}', NSM), ('\u{16f93}', '\u{16f9f}', L), ('\u{16fe0}', '\u{16fe1}', L), ('\u{16fe2}',
'\u{16fe2}', ON), ('\u{16fe3}', '\u{16fe3}', L), ('\u{16fe4}', '\u{16fe4}', NSM), ('\u{16ff0}',
'\u{16ff1}', L), ('\u{17000}', '\u{187f7}', L), ('\u{18800}', '\u{18cd5}', L), ('\u{18cff}',
'\u{18d08}', L), ('\u{1aff0}', '\u{1aff3}', L), ('\u{1aff5}', '\u{1affb}', L), ('\u{1affd}',
'\u{1affe}', L), ('\u{1b000}', '\u{1b122}', L), ('\u{1b132}', '\u{1b132}', L), ('\u{1b150}',
'\u{1b152}', L), ('\u{1b155}', '\u{1b155}', L), ('\u{1b164}', '\u{1b167}', L), ('\u{1b170}',
'\u{1b2fb}', L), ('\u{1bc00}', '\u{1bc6a}', L), ('\u{1bc70}', '\u{1bc7c}', L), ('\u{1bc80}',
'\u{1bc88}', L), ('\u{1bc90}', '\u{1bc99}', L), ('\u{1bc9c}', '\u{1bc9c}', L), ('\u{1bc9d}',
'\u{1bc9e}', NSM), ('\u{1bc9f}', '\u{1bc9f}', L), ('\u{1bca0}', '\u{1bca3}', BN), ('\u{1cc00}',
'\u{1ccd5}', ON), ('\u{1ccd6}', '\u{1ccef}', L), ('\u{1ccf0}', '\u{1ccf9}', EN), ('\u{1cd00}',
'\u{1ceb3}', ON), ('\u{1cf00}', '\u{1cf2d}', NSM), ('\u{1cf30}', '\u{1cf46}', NSM),
('\u{1cf50}', '\u{1cfc3}', L), ('\u{1d000}', '\u{1d0f5}', L), ('\u{1d100}', '\u{1d126}', L),
('\u{1d129}', '\u{1d166}', L), ('\u{1d167}', '\u{1d169}', NSM), ('\u{1d16a}', '\u{1d172}', L),
('\u{1d173}', '\u{1d17a}', BN), ('\u{1d17b}', '\u{1d182}', NSM), ('\u{1d183}', '\u{1d184}', L),
('\u{1d185}', '\u{1d18b}', NSM), ('\u{1d18c}', '\u{1d1a9}', L), ('\u{1d1aa}', '\u{1d1ad}', NSM),
('\u{1d1ae}', '\u{1d1e8}', L), ('\u{1d1e9}', '\u{1d1ea}', ON), ('\u{1d200}', '\u{1d241}', ON),
('\u{1d242}', '\u{1d244}', NSM), ('\u{1d245}', '\u{1d245}', ON), ('\u{1d2c0}', '\u{1d2d3}', L),
('\u{1d2e0}', '\u{1d2f3}', L), ('\u{1d300}', '\u{1d356}', ON), ('\u{1d360}', '\u{1d378}', L),
('\u{1d400}', '\u{1d454}', L), ('\u{1d456}', '\u{1d49c}', L), ('\u{1d49e}', '\u{1d49f}', L),
('\u{1d4a2}', '\u{1d4a2}', L), ('\u{1d4a5}', '\u{1d4a6}', L), ('\u{1d4a9}', '\u{1d4ac}', L),
('\u{1d4ae}', '\u{1d4b9}', L), ('\u{1d4bb}', '\u{1d4bb}', L), ('\u{1d4bd}', '\u{1d4c3}', L),
('\u{1d4c5}', '\u{1d505}', L), ('\u{1d507}', '\u{1d50a}', L), ('\u{1d50d}', '\u{1d514}', L),
('\u{1d516}', '\u{1d51c}', L), ('\u{1d51e}', '\u{1d539}', L), ('\u{1d53b}', '\u{1d53e}', L),
('\u{1d540}', '\u{1d544}', L), ('\u{1d546}', '\u{1d546}', L), ('\u{1d54a}', '\u{1d550}', L),
('\u{1d552}', '\u{1d6a5}', L), ('\u{1d6a8}', '\u{1d6c0}', L), ('\u{1d6c1}', '\u{1d6c1}', ON),
('\u{1d6c2}', '\u{1d6da}', L), ('\u{1d6db}', '\u{1d6db}', ON), ('\u{1d6dc}', '\u{1d6fa}', L),
('\u{1d6fb}', '\u{1d6fb}', ON), ('\u{1d6fc}', '\u{1d714}', L), ('\u{1d715}', '\u{1d715}', ON),
('\u{1d716}', '\u{1d734}', L), ('\u{1d735}', '\u{1d735}', ON), ('\u{1d736}', '\u{1d74e}', L),
('\u{1d74f}', '\u{1d74f}', ON), ('\u{1d750}', '\u{1d76e}', L), ('\u{1d76f}', '\u{1d76f}', ON),
('\u{1d770}', '\u{1d788}', L), ('\u{1d789}', '\u{1d789}', ON), ('\u{1d78a}', '\u{1d7a8}', L),
('\u{1d7a9}', '\u{1d7a9}', ON), ('\u{1d7aa}', '\u{1d7c2}', L), ('\u{1d7c3}', '\u{1d7c3}', ON),
('\u{1d7c4}', '\u{1d7cb}', L), ('\u{1d7ce}', '\u{1d7ff}', EN), ('\u{1d800}', '\u{1d9ff}', L),
('\u{1da00}', '\u{1da36}', NSM), ('\u{1da37}', '\u{1da3a}', L), ('\u{1da3b}', '\u{1da6c}', NSM),
('\u{1da6d}', '\u{1da74}', L), ('\u{1da75}', '\u{1da75}', NSM), ('\u{1da76}', '\u{1da83}', L),
('\u{1da84}', '\u{1da84}', NSM), ('\u{1da85}', '\u{1da8b}', L), ('\u{1da9b}', '\u{1da9f}', NSM),
('\u{1daa1}', '\u{1daaf}', NSM), ('\u{1df00}', '\u{1df1e}', L), ('\u{1df25}', '\u{1df2a}', L),
('\u{1e000}', '\u{1e006}', NSM), ('\u{1e008}', '\u{1e018}', NSM), ('\u{1e01b}', '\u{1e021}',
NSM), ('\u{1e023}', '\u{1e024}', NSM), ('\u{1e026}', '\u{1e02a}', NSM), ('\u{1e030}',
'\u{1e06d}', L), ('\u{1e08f}', '\u{1e08f}', NSM), ('\u{1e100}', '\u{1e12c}', L), ('\u{1e130}',
'\u{1e136}', NSM), ('\u{1e137}', '\u{1e13d}', L), ('\u{1e140}', '\u{1e149}', L), ('\u{1e14e}',
'\u{1e14f}', L), ('\u{1e290}', '\u{1e2ad}', L), ('\u{1e2ae}', '\u{1e2ae}', NSM), ('\u{1e2c0}',
'\u{1e2eb}', L), ('\u{1e2ec}', '\u{1e2ef}', NSM), ('\u{1e2f0}', '\u{1e2f9}', L), ('\u{1e2ff}',
'\u{1e2ff}', ET), ('\u{1e4d0}', '\u{1e4eb}', L), ('\u{1e4ec}', '\u{1e4ef}', NSM), ('\u{1e4f0}',
'\u{1e4f9}', L), ('\u{1e5d0}', '\u{1e5ed}', L), ('\u{1e5ee}', '\u{1e5ef}', NSM), ('\u{1e5f0}',
'\u{1e5fa}', L), ('\u{1e5ff}', '\u{1e5ff}', L), ('\u{1e7e0}', '\u{1e7e6}', L), ('\u{1e7e8}',
'\u{1e7eb}', L), ('\u{1e7ed}', '\u{1e7ee}', L), ('\u{1e7f0}', '\u{1e7fe}', L), ('\u{1e800}',
'\u{1e8cf}', R), ('\u{1e8d0}', '\u{1e8d6}', NSM), ('\u{1e8d7}', '\u{1e943}', R), ('\u{1e944}',
'\u{1e94a}', NSM), ('\u{1e94b}', '\u{1ec70}', R), ('\u{1ec71}', '\u{1ecb4}', AL), ('\u{1ecb5}',
'\u{1ed00}', R), ('\u{1ed01}', '\u{1ed3d}', AL), ('\u{1ed3e}', '\u{1edff}', R), ('\u{1ee00}',
'\u{1eeef}', AL), ('\u{1eef0}', '\u{1eef1}', ON), ('\u{1eef2}', '\u{1eeff}', AL), ('\u{1ef00}',
'\u{1efff}', R), ('\u{1f000}', '\u{1f02b}', ON), ('\u{1f030}', '\u{1f093}', ON), ('\u{1f0a0}',
'\u{1f0ae}', ON), ('\u{1f0b1}', '\u{1f0bf}', ON), ('\u{1f0c1}', '\u{1f0cf}', ON), ('\u{1f0d1}',
'\u{1f0f5}', ON), ('\u{1f100}', '\u{1f10a}', EN), ('\u{1f10b}', '\u{1f10f}', ON), ('\u{1f110}',
'\u{1f12e}', L), ('\u{1f12f}', '\u{1f12f}', ON), ('\u{1f130}', '\u{1f169}', L), ('\u{1f16a}',
'\u{1f16f}', ON), ('\u{1f170}', '\u{1f1ac}', L), ('\u{1f1ad}', '\u{1f1ad}', ON), ('\u{1f1e6}',
'\u{1f202}', L), ('\u{1f210}', '\u{1f23b}', L), ('\u{1f240}', '\u{1f248}', L), ('\u{1f250}',
'\u{1f251}', L), ('\u{1f260}', '\u{1f265}', ON), ('\u{1f300}', '\u{1f6d7}', ON), ('\u{1f6dc}',
'\u{1f6ec}', ON), ('\u{1f6f0}', '\u{1f6fc}', ON), ('\u{1f700}', '\u{1f776}', ON), ('\u{1f77b}',
'\u{1f7d9}', ON), ('\u{1f7e0}', '\u{1f7eb}', ON), ('\u{1f7f0}', '\u{1f7f0}', ON), ('\u{1f800}',
'\u{1f80b}', ON), ('\u{1f810}', '\u{1f847}', ON), ('\u{1f850}', '\u{1f859}', ON), ('\u{1f860}',
'\u{1f887}', ON), ('\u{1f890}', '\u{1f8ad}', ON), ('\u{1f8b0}', '\u{1f8bb}', ON), ('\u{1f8c0}',
'\u{1f8c1}', ON), ('\u{1f900}', '\u{1fa53}', ON), ('\u{1fa60}', '\u{1fa6d}', ON), ('\u{1fa70}',
'\u{1fa7c}', ON), ('\u{1fa80}', '\u{1fa89}', ON), ('\u{1fa8f}', '\u{1fac6}', ON), ('\u{1face}',
'\u{1fadc}', ON), ('\u{1fadf}', '\u{1fae9}', ON), ('\u{1faf0}', '\u{1faf8}', ON), ('\u{1fb00}',
'\u{1fb92}', ON), ('\u{1fb94}', '\u{1fbef}', ON), ('\u{1fbf0}', '\u{1fbf9}', EN), ('\u{20000}',
'\u{2a6df}', L), ('\u{2a700}', '\u{2b739}', L), ('\u{2b740}', '\u{2b81d}', L), ('\u{2b820}',
'\u{2cea1}', L), ('\u{2ceb0}', '\u{2ebe0}', L), ('\u{2ebf0}', '\u{2ee5d}', L), ('\u{2f800}',
'\u{2fa1d}', L), ('\u{30000}', '\u{3134a}', L), ('\u{31350}', '\u{323af}', L), ('\u{e0001}',
'\u{e0001}', BN), ('\u{e0020}', '\u{e007f}', BN), ('\u{e0100}', '\u{e01ef}', NSM), ('\u{f0000}',
'\u{ffffd}', L), ('\u{100000}', '\u{10fffd}', L)
];
pub const bidi_pairs_table: &'static [(char, char, Option<char>)] = &[
('\u{28}', '\u{29}', None), ('\u{5b}', '\u{5d}', None), ('\u{7b}', '\u{7d}', None), ('\u{f3a}',
'\u{f3b}', None), ('\u{f3c}', '\u{f3d}', None), ('\u{169b}', '\u{169c}', None), ('\u{2045}',
'\u{2046}', None), ('\u{207d}', '\u{207e}', None), ('\u{208d}', '\u{208e}', None), ('\u{2308}',
'\u{2309}', None), ('\u{230a}', '\u{230b}', None), ('\u{2329}', '\u{232a}', Some('\u{3008}')),
('\u{2768}', '\u{2769}', None), ('\u{276a}', '\u{276b}', None), ('\u{276c}', '\u{276d}', None),
('\u{276e}', '\u{276f}', None), ('\u{2770}', '\u{2771}', None), ('\u{2772}', '\u{2773}', None),
('\u{2774}', '\u{2775}', None), ('\u{27c5}', '\u{27c6}', None), ('\u{27e6}', '\u{27e7}', None),
('\u{27e8}', '\u{27e9}', None), ('\u{27ea}', '\u{27eb}', None), ('\u{27ec}', '\u{27ed}', None),
('\u{27ee}', '\u{27ef}', None), ('\u{2983}', '\u{2984}', None), ('\u{2985}', '\u{2986}', None),
('\u{2987}', '\u{2988}', None), ('\u{2989}', '\u{298a}', None), ('\u{298b}', '\u{298c}', None),
('\u{298d}', '\u{2990}', None), ('\u{298f}', '\u{298e}', None), ('\u{2991}', '\u{2992}', None),
('\u{2993}', '\u{2994}', None), ('\u{2995}', '\u{2996}', None), ('\u{2997}', '\u{2998}', None),
('\u{29d8}', '\u{29d9}', None), ('\u{29da}', '\u{29db}', None), ('\u{29fc}', '\u{29fd}', None),
('\u{2e22}', '\u{2e23}', None), ('\u{2e24}', '\u{2e25}', None), ('\u{2e26}', '\u{2e27}', None),
('\u{2e28}', '\u{2e29}', None), ('\u{2e55}', '\u{2e56}', None), ('\u{2e57}', '\u{2e58}', None),
('\u{2e59}', '\u{2e5a}', None), ('\u{2e5b}', '\u{2e5c}', None), ('\u{3008}', '\u{3009}', None),
('\u{300a}', '\u{300b}', None), ('\u{300c}', '\u{300d}', None), ('\u{300e}', '\u{300f}', None),
('\u{3010}', '\u{3011}', None), ('\u{3014}', '\u{3015}', None), ('\u{3016}', '\u{3017}', None),
('\u{3018}', '\u{3019}', None), ('\u{301a}', '\u{301b}', None), ('\u{fe59}', '\u{fe5a}', None),
('\u{fe5b}', '\u{fe5c}', None), ('\u{fe5d}', '\u{fe5e}', None), ('\u{ff08}', '\u{ff09}', None),
('\u{ff3b}', '\u{ff3d}', None), ('\u{ff5b}', '\u{ff5d}', None), ('\u{ff5f}', '\u{ff60}', None),
('\u{ff62}', '\u{ff63}', None)
];

46
vendor/unicode-bidi/src/data_source.rs vendored Normal file
View File

@@ -0,0 +1,46 @@
// Copyright 2015 The Servo Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use crate::BidiClass;
/// This is the return value of [`BidiDataSource::bidi_matched_opening_bracket()`].
///
/// It represents the matching *normalized* opening bracket for a given bracket in a bracket pair,
/// and whether or not that bracket is opening.
#[derive(Debug, Copy, Clone)]
pub struct BidiMatchedOpeningBracket {
/// The corresponding opening bracket in this bracket pair, normalized
///
/// In case of opening brackets, this will be the bracket itself, except for when the bracket
/// is not normalized, in which case it will be the normalized form.
pub opening: char,
/// Whether or not the requested bracket was an opening bracket. True for opening
pub is_open: bool,
}
/// This trait abstracts over a data source that is able to produce the Unicode Bidi class for a given
/// character
pub trait BidiDataSource {
fn bidi_class(&self, c: char) -> BidiClass;
/// If this character is a bracket according to BidiBrackets.txt,
/// return the corresponding *normalized* *opening bracket* of the pair,
/// and whether or not it itself is an opening bracket.
///
/// This effectively buckets brackets into equivalence classes keyed on the
/// normalized opening bracket.
///
/// The default implementation will pull in a small amount of hardcoded data,
/// regardless of the `hardcoded-data` feature. This is in part for convenience
/// (since this data is small and changes less often), and in part so that this method can be
/// added without needing a breaking version bump.
/// Override this method in your custom data source to prevent the use of hardcoded data.
fn bidi_matched_opening_bracket(&self, c: char) -> Option<BidiMatchedOpeningBracket> {
crate::char_data::bidi_matched_opening_bracket(c)
}
}

89
vendor/unicode-bidi/src/deprecated.rs vendored Normal file
View File

@@ -0,0 +1,89 @@
// Copyright 2015 The Servo Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! This module holds deprecated assets only.
use super::*;
/// Find the level runs within a line and return them in visual order.
///
/// NOTE: This implementation is incomplete. The algorithm needs information about the text,
/// including original `BidiClass` property of each character, to be able to perform correctly.
/// Please see [`BidiInfo::visual_runs()`](../struct.BidiInfo.html#method.visual_runs) for the
/// improved implementation.
///
/// `line` is a range of bytes indices within `levels`.
///
/// <http://www.unicode.org/reports/tr9/#Reordering_Resolved_Levels>
#[deprecated(
since = "0.3.0",
note = "please use `BidiInfo::visual_runs()` instead."
)]
pub fn visual_runs(line: Range<usize>, levels: &[Level]) -> Vec<LevelRun> {
assert!(line.start <= levels.len());
assert!(line.end <= levels.len());
let mut runs = Vec::new();
// Find consecutive level runs.
let mut start = line.start;
let mut run_level = levels[start];
let mut min_level = run_level;
let mut max_level = run_level;
for (i, &new_level) in levels.iter().enumerate().take(line.end).skip(start + 1) {
if new_level != run_level {
// End of the previous run, start of a new one.
runs.push(start..i);
start = i;
run_level = new_level;
min_level = cmp::min(run_level, min_level);
max_level = cmp::max(run_level, max_level);
}
}
runs.push(start..line.end);
let run_count = runs.len();
// Re-order the odd runs.
// <http://www.unicode.org/reports/tr9/#L2>
// Stop at the lowest *odd* level.
min_level = min_level.new_lowest_ge_rtl().expect("Level error");
while max_level >= min_level {
// Look for the start of a sequence of consecutive runs of max_level or higher.
let mut seq_start = 0;
while seq_start < run_count {
if levels[runs[seq_start].start] < max_level {
seq_start += 1;
continue;
}
// Found the start of a sequence. Now find the end.
let mut seq_end = seq_start + 1;
while seq_end < run_count && levels[runs[seq_end].start] >= max_level {
seq_end += 1;
}
// Reverse the runs within this sequence.
runs[seq_start..seq_end].reverse();
seq_start = seq_end;
}
max_level
.lower(1)
.expect("Lowering embedding level below zero");
}
runs
}

229
vendor/unicode-bidi/src/explicit.rs vendored Normal file
View File

@@ -0,0 +1,229 @@
// Copyright 2015 The Servo Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! 3.3.2 Explicit Levels and Directions
//!
//! <http://www.unicode.org/reports/tr9/#Explicit_Levels_and_Directions>
#[cfg(feature = "smallvec")]
use smallvec::{smallvec, SmallVec};
use super::char_data::{
is_rtl,
BidiClass::{self, *},
};
use super::level::Level;
use super::prepare::removed_by_x9;
use super::LevelRunVec;
use super::TextSource;
/// Compute explicit embedding levels for one paragraph of text (X1-X8), and identify
/// level runs (BD7) for use when determining Isolating Run Sequences (X10).
///
/// `processing_classes[i]` must contain the `BidiClass` of the char at byte index `i`,
/// for each char in `text`.
///
/// `runs` returns the list of level runs (BD7) of the text.
#[cfg_attr(feature = "flame_it", flamer::flame)]
pub fn compute<'a, T: TextSource<'a> + ?Sized>(
text: &'a T,
para_level: Level,
original_classes: &[BidiClass],
levels: &mut [Level],
processing_classes: &mut [BidiClass],
runs: &mut LevelRunVec,
) {
assert_eq!(text.len(), original_classes.len());
// <http://www.unicode.org/reports/tr9/#X1>
#[cfg(feature = "smallvec")]
let mut stack: SmallVec<[Status; 8]> = smallvec![Status {
level: para_level,
status: OverrideStatus::Neutral,
}];
#[cfg(not(feature = "smallvec"))]
let mut stack = vec![Status {
level: para_level,
status: OverrideStatus::Neutral,
}];
let mut overflow_isolate_count = 0u32;
let mut overflow_embedding_count = 0u32;
let mut valid_isolate_count = 0u32;
let mut current_run_level = Level::ltr();
let mut current_run_start = 0;
for (i, len) in text.indices_lengths() {
let last = stack.last().unwrap();
match original_classes[i] {
// Rules X2-X5c
RLE | LRE | RLO | LRO | RLI | LRI | FSI => {
// <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
levels[i] = last.level;
// X5a-X5c: Isolate initiators get the level of the last entry on the stack.
let is_isolate = matches!(original_classes[i], RLI | LRI | FSI);
if is_isolate {
// Redundant due to "Retaining explicit formatting characters" step.
// levels[i] = last.level;
match last.status {
OverrideStatus::RTL => processing_classes[i] = R,
OverrideStatus::LTR => processing_classes[i] = L,
_ => {}
}
}
let new_level = if is_rtl(original_classes[i]) {
last.level.new_explicit_next_rtl()
} else {
last.level.new_explicit_next_ltr()
};
if new_level.is_ok() && overflow_isolate_count == 0 && overflow_embedding_count == 0
{
let new_level = new_level.unwrap();
stack.push(Status {
level: new_level,
status: match original_classes[i] {
RLO => OverrideStatus::RTL,
LRO => OverrideStatus::LTR,
RLI | LRI | FSI => OverrideStatus::Isolate,
_ => OverrideStatus::Neutral,
},
});
if is_isolate {
valid_isolate_count += 1;
} else {
// The spec doesn't explicitly mention this step, but it is necessary.
// See the reference implementations for comparison.
levels[i] = new_level;
}
} else if is_isolate {
overflow_isolate_count += 1;
} else if overflow_isolate_count == 0 {
overflow_embedding_count += 1;
}
if !is_isolate {
// X9 +
// <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
// (PDF handled below)
processing_classes[i] = BN;
}
}
// <http://www.unicode.org/reports/tr9/#X6a>
PDI => {
if overflow_isolate_count > 0 {
overflow_isolate_count -= 1;
} else if valid_isolate_count > 0 {
overflow_embedding_count = 0;
while !matches!(
stack.pop(),
None | Some(Status {
status: OverrideStatus::Isolate,
..
})
) {}
valid_isolate_count -= 1;
}
let last = stack.last().unwrap();
levels[i] = last.level;
match last.status {
OverrideStatus::RTL => processing_classes[i] = R,
OverrideStatus::LTR => processing_classes[i] = L,
_ => {}
}
}
// <http://www.unicode.org/reports/tr9/#X7>
PDF => {
if overflow_isolate_count > 0 {
// do nothing
} else if overflow_embedding_count > 0 {
overflow_embedding_count -= 1;
} else if last.status != OverrideStatus::Isolate && stack.len() >= 2 {
stack.pop();
}
// <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
levels[i] = stack.last().unwrap().level;
// X9 part of retaining explicit formatting characters.
processing_classes[i] = BN;
}
// Nothing.
// BN case moved down to X6, see <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
B => {}
// <http://www.unicode.org/reports/tr9/#X6>
_ => {
levels[i] = last.level;
// This condition is not in the spec, but I am pretty sure that is a spec bug.
// https://www.unicode.org/L2/L2023/23014-amd-to-uax9.pdf
if original_classes[i] != BN {
match last.status {
OverrideStatus::RTL => processing_classes[i] = R,
OverrideStatus::LTR => processing_classes[i] = L,
_ => {}
}
}
}
}
// Handle multi-byte characters.
for j in 1..len {
levels[i + j] = levels[i];
processing_classes[i + j] = processing_classes[i];
}
// Identify level runs to be passed to prepare::isolating_run_sequences().
if i == 0 {
// Initialize for the first (or only) run.
current_run_level = levels[i];
} else {
// Check if we need to start a new level run.
// <https://www.unicode.org/reports/tr9/#BD7>
if !removed_by_x9(original_classes[i]) && levels[i] != current_run_level {
// End the last run and start a new one.
runs.push(current_run_start..i);
current_run_level = levels[i];
current_run_start = i;
}
}
}
// Append the trailing level run, if non-empty.
if levels.len() > current_run_start {
runs.push(current_run_start..levels.len());
}
}
/// Entries in the directional status stack:
struct Status {
level: Level,
status: OverrideStatus,
}
#[derive(PartialEq)]
enum OverrideStatus {
Neutral,
RTL,
LTR,
Isolate,
}

42
vendor/unicode-bidi/src/format_chars.rs vendored Normal file
View File

@@ -0,0 +1,42 @@
// Copyright 2017 The Servo Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! Directional Formatting Characters
//!
//! <http://www.unicode.org/reports/tr9/#Directional_Formatting_Characters>
// == Implicit ==
/// ARABIC LETTER MARK
pub const ALM: char = '\u{061C}';
/// LEFT-TO-RIGHT MARK
pub const LRM: char = '\u{200E}';
/// RIGHT-TO-LEFT MARK
pub const RLM: char = '\u{200F}';
// == Explicit Isolates ==
/// LEFTTORIGHT ISOLATE
pub const LRI: char = '\u{2066}';
/// RIGHTTOLEFT ISOLATE
pub const RLI: char = '\u{2067}';
/// FIRST STRONG ISOLATE
pub const FSI: char = '\u{2068}';
/// POP DIRECTIONAL ISOLATE
pub const PDI: char = '\u{2069}';
// == Explicit Embeddings and Overrides ==
/// LEFT-TO-RIGHT EMBEDDING
pub const LRE: char = '\u{202A}';
/// RIGHT-TO-LEFT EMBEDDING
pub const RLE: char = '\u{202B}';
/// POP DIRECTIONAL FORMATTING
pub const PDF: char = '\u{202C}';
/// LEFT-TO-RIGHT OVERRIDE
pub const LRO: char = '\u{202D}';
/// RIGHT-TO-LEFT OVERRIDE
pub const RLO: char = '\u{202E}';

606
vendor/unicode-bidi/src/implicit.rs vendored Normal file
View File

@@ -0,0 +1,606 @@
// Copyright 2015 The Servo Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! 3.3.4 - 3.3.6. Resolve implicit levels and types.
#[cfg(not(feature = "smallvec"))]
use alloc::vec::Vec;
use core::cmp::max;
#[cfg(feature = "smallvec")]
use smallvec::SmallVec;
use super::char_data::BidiClass::{self, *};
use super::level::Level;
use super::prepare::{not_removed_by_x9, IsolatingRunSequence};
use super::{BidiDataSource, TextSource};
/// 3.3.4 Resolving Weak Types
///
/// <http://www.unicode.org/reports/tr9/#Resolving_Weak_Types>
#[cfg_attr(feature = "flame_it", flamer::flame)]
pub fn resolve_weak<'a, T: TextSource<'a> + ?Sized>(
text: &'a T,
sequence: &IsolatingRunSequence,
processing_classes: &mut [BidiClass],
) {
// Note: The spec treats these steps as individual passes that are applied one after the other
// on the entire IsolatingRunSequence at once. We instead collapse it into a single iteration,
// which is straightforward for rules that are based on the state of the current character, but not
// for rules that care about surrounding characters. To deal with them, we retain additional state
// about previous character classes that may have since been changed by later rules.
// The previous class for the purposes of rule W4/W6, not tracking changes made after or during W4.
let mut prev_class_before_w4 = sequence.sos;
// The previous class for the purposes of rule W5.
let mut prev_class_before_w5 = sequence.sos;
// The previous class for the purposes of rule W1, not tracking changes from any other rules.
let mut prev_class_before_w1 = sequence.sos;
let mut last_strong_is_al = false;
#[cfg(feature = "smallvec")]
let mut et_run_indices = SmallVec::<[usize; 8]>::new(); // for W5
#[cfg(not(feature = "smallvec"))]
let mut et_run_indices = Vec::new(); // for W5
#[cfg(feature = "smallvec")]
let mut bn_run_indices = SmallVec::<[usize; 8]>::new(); // for W5 + <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
#[cfg(not(feature = "smallvec"))]
let mut bn_run_indices = Vec::new(); // for W5 + <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
for (run_index, level_run) in sequence.runs.iter().enumerate() {
for i in &mut level_run.clone() {
if processing_classes[i] == BN {
// <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
// Keeps track of bn runs for W5 in case we see an ET.
bn_run_indices.push(i);
// BNs aren't real, skip over them.
continue;
}
// Store the processing class of all rules before W2/W1.
// Used to keep track of the last strong character for W2. W3 is able to insert new strong
// characters, so we don't want to be misled by it.
let mut w2_processing_class = processing_classes[i];
// <http://www.unicode.org/reports/tr9/#W1>
//
if processing_classes[i] == NSM {
processing_classes[i] = match prev_class_before_w1 {
RLI | LRI | FSI | PDI => ON,
_ => prev_class_before_w1,
};
// W1 occurs before W2, update this.
w2_processing_class = processing_classes[i];
}
prev_class_before_w1 = processing_classes[i];
// <http://www.unicode.org/reports/tr9/#W2>
// <http://www.unicode.org/reports/tr9/#W3>
//
match processing_classes[i] {
EN => {
if last_strong_is_al {
// W2. If previous strong char was AL, change EN to AN.
processing_classes[i] = AN;
}
}
// W3.
AL => processing_classes[i] = R,
_ => {}
}
// update last_strong_is_al.
match w2_processing_class {
L | R => {
last_strong_is_al = false;
}
AL => {
last_strong_is_al = true;
}
_ => {}
}
let class_before_w456 = processing_classes[i];
// <http://www.unicode.org/reports/tr9/#W4>
// <http://www.unicode.org/reports/tr9/#W5>
// <http://www.unicode.org/reports/tr9/#W6> (separators only)
// (see below for W6 terminator code)
//
match processing_classes[i] {
// <http://www.unicode.org/reports/tr9/#W6>
EN => {
// W5. If a run of ETs is adjacent to an EN, change the ETs to EN.
for j in &et_run_indices {
processing_classes[*j] = EN;
}
et_run_indices.clear();
}
// <http://www.unicode.org/reports/tr9/#W4>
// <http://www.unicode.org/reports/tr9/#W6>
ES | CS => {
// See https://github.com/servo/unicode-bidi/issues/86 for improving this.
// We want to make sure we check the correct next character by skipping past the rest
// of this one.
if let Some((_, char_len)) = text.char_at(i) {
let mut next_class = sequence
.iter_forwards_from(i + char_len, run_index)
.map(|j| processing_classes[j])
// <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
.find(not_removed_by_x9)
.unwrap_or(sequence.eos);
if next_class == EN && last_strong_is_al {
// Apply W2 to next_class. We know that last_strong_is_al
// has no chance of changing on this character so we can still assume its value
// will be the same by the time we get to it.
next_class = AN;
}
processing_classes[i] =
match (prev_class_before_w4, processing_classes[i], next_class) {
// W4
(EN, ES, EN) | (EN, CS, EN) => EN,
// W4
(AN, CS, AN) => AN,
// W6 (separators only)
(_, _, _) => ON,
};
// W6 + <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
// We have to do this before W5 gets its grubby hands on these characters and thinks
// they're part of an ET run.
// We check for ON to ensure that we had hit the W6 branch above, since this `ES | CS` match
// arm handles both W4 and W6.
if processing_classes[i] == ON {
for idx in sequence.iter_backwards_from(i, run_index) {
let class = &mut processing_classes[idx];
if *class != BN {
break;
}
*class = ON;
}
for idx in sequence.iter_forwards_from(i + char_len, run_index) {
let class = &mut processing_classes[idx];
if *class != BN {
break;
}
*class = ON;
}
}
} else {
// We're in the middle of a character, copy over work done for previous bytes
// since it's going to be the same answer.
processing_classes[i] = processing_classes[i - 1];
}
}
// <http://www.unicode.org/reports/tr9/#W5>
ET => {
match prev_class_before_w5 {
EN => processing_classes[i] = EN,
_ => {
// <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
// If there was a BN run before this, that's now a part of this ET run.
et_run_indices.extend(bn_run_indices.clone());
// In case this is followed by an EN.
et_run_indices.push(i);
}
}
}
_ => {}
}
// Common loop iteration code
//
// <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
// BN runs would have already continued the loop, clear them before we get to the next one.
bn_run_indices.clear();
// W6 above only deals with separators, so it doesn't change anything W5 cares about,
// so we still can update this after running that part of W6.
prev_class_before_w5 = processing_classes[i];
// <http://www.unicode.org/reports/tr9/#W6> (terminators only)
// (see above for W6 separator code)
//
if prev_class_before_w5 != ET {
// W6. If we didn't find an adjacent EN, turn any ETs into ON instead.
for j in &et_run_indices {
processing_classes[*j] = ON;
}
et_run_indices.clear();
}
// We stashed this before W4/5/6 could get their grubby hands on it, and it's not
// used in the W6 terminator code below so we can update it now.
prev_class_before_w4 = class_before_w456;
}
}
// Rerun this check in case we ended with a sequence of BNs (i.e., we'd never
// hit the end of the for loop above).
// W6. If we didn't find an adjacent EN, turn any ETs into ON instead.
for j in &et_run_indices {
processing_classes[*j] = ON;
}
et_run_indices.clear();
// W7. If the previous strong char was L, change EN to L.
let mut last_strong_is_l = sequence.sos == L;
for i in sequence.runs.iter().cloned().flatten() {
match processing_classes[i] {
EN if last_strong_is_l => {
processing_classes[i] = L;
}
L => {
last_strong_is_l = true;
}
R | AL => {
last_strong_is_l = false;
}
// <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
// Already scanning past BN here.
_ => {}
}
}
}
#[cfg(feature = "smallvec")]
type BracketPairVec = SmallVec<[BracketPair; 8]>;
#[cfg(not(feature = "smallvec"))]
type BracketPairVec = Vec<BracketPair>;
/// 3.3.5 Resolving Neutral Types
///
/// <http://www.unicode.org/reports/tr9/#Resolving_Neutral_Types>
#[cfg_attr(feature = "flame_it", flamer::flame)]
pub fn resolve_neutral<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
text: &'a T,
data_source: &D,
sequence: &IsolatingRunSequence,
levels: &[Level],
original_classes: &[BidiClass],
processing_classes: &mut [BidiClass],
) {
// e = embedding direction
let e: BidiClass = levels[sequence.runs[0].start].bidi_class();
let not_e = if e == BidiClass::L {
BidiClass::R
} else {
BidiClass::L
};
// N0. Process bracket pairs.
// > Identify the bracket pairs in the current isolating run sequence according to BD16.
// We use processing_classes, not original_classes, due to BD14/BD15
let mut bracket_pairs = BracketPairVec::new();
identify_bracket_pairs(
text,
data_source,
sequence,
processing_classes,
&mut bracket_pairs,
);
// > For each bracket-pair element in the list of pairs of text positions
//
// Note: Rust ranges are interpreted as [start..end), be careful using `pair` directly
// for indexing as it will include the opening bracket pair but not the closing one.
for pair in bracket_pairs {
#[cfg(feature = "std")]
debug_assert!(
pair.start < processing_classes.len(),
"identify_bracket_pairs returned a range that is out of bounds!"
);
#[cfg(feature = "std")]
debug_assert!(
pair.end < processing_classes.len(),
"identify_bracket_pairs returned a range that is out of bounds!"
);
let mut found_e = false;
let mut found_not_e = false;
let mut class_to_set = None;
let start_char_len =
T::char_len(text.subrange(pair.start..pair.end).chars().next().unwrap());
// > Inspect the bidirectional types of the characters enclosed within the bracket pair.
//
// `pair` is [start, end) so we will end up processing the opening character but not the closing one.
//
for enclosed_i in sequence.iter_forwards_from(pair.start + start_char_len, pair.start_run) {
if enclosed_i >= pair.end {
#[cfg(feature = "std")]
debug_assert!(
enclosed_i == pair.end,
"If we skipped past this, the iterator is broken"
);
break;
}
let class = processing_classes[enclosed_i];
if class == e {
found_e = true;
} else if class == not_e {
found_not_e = true;
} else if matches!(class, BidiClass::EN | BidiClass::AN) {
// > Within this scope, bidirectional types EN and AN are treated as R.
if e == BidiClass::L {
found_not_e = true;
} else {
found_e = true;
}
}
// If we have found a character with the class of the embedding direction
// we can bail early.
if found_e {
break;
}
}
// > If any strong type (either L or R) matching the embedding direction is found
if found_e {
// > .. set the type for both brackets in the pair to match the embedding direction
class_to_set = Some(e);
// > Otherwise, if there is a strong type it must be opposite the embedding direction
} else if found_not_e {
// > Therefore, test for an established context with a preceding strong type by
// > checking backwards before the opening paired bracket
// > until the first strong type (L, R, or sos) is found.
// (see note above about processing_classes and character boundaries)
let mut previous_strong = sequence
.iter_backwards_from(pair.start, pair.start_run)
.map(|i| processing_classes[i])
.find(|class| {
matches!(
class,
BidiClass::L | BidiClass::R | BidiClass::EN | BidiClass::AN
)
})
.unwrap_or(sequence.sos);
// > Within this scope, bidirectional types EN and AN are treated as R.
if matches!(previous_strong, BidiClass::EN | BidiClass::AN) {
previous_strong = BidiClass::R;
}
// > If the preceding strong type is also opposite the embedding direction,
// > context is established,
// > so set the type for both brackets in the pair to that direction.
// AND
// > Otherwise set the type for both brackets in the pair to the embedding direction.
// > Either way it gets set to previous_strong
//
// Both branches amount to setting the type to the strong type.
class_to_set = Some(previous_strong);
}
if let Some(class_to_set) = class_to_set {
// Update all processing classes corresponding to the start and end elements, as requested.
// We should include all bytes of the character, not the first one.
let end_char_len =
T::char_len(text.subrange(pair.end..text.len()).chars().next().unwrap());
for class in &mut processing_classes[pair.start..pair.start + start_char_len] {
*class = class_to_set;
}
for class in &mut processing_classes[pair.end..pair.end + end_char_len] {
*class = class_to_set;
}
// <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
for idx in sequence.iter_backwards_from(pair.start, pair.start_run) {
let class = &mut processing_classes[idx];
if *class != BN {
break;
}
*class = class_to_set;
}
// > Any number of characters that had original bidirectional character type NSM prior to the application of
// > W1 that immediately follow a paired bracket which changed to L or R under N0 should change to match the type of their preceding bracket.
// This rule deals with sequences of NSMs, so we can just update them all at once, we don't need to worry
// about character boundaries. We do need to be careful to skip the full set of bytes for the parentheses characters.
let nsm_start = pair.start + start_char_len;
for idx in sequence.iter_forwards_from(nsm_start, pair.start_run) {
let class = original_classes[idx];
if class == BidiClass::NSM || processing_classes[idx] == BN {
processing_classes[idx] = class_to_set;
} else {
break;
}
}
let nsm_end = pair.end + end_char_len;
for idx in sequence.iter_forwards_from(nsm_end, pair.end_run) {
let class = original_classes[idx];
if class == BidiClass::NSM || processing_classes[idx] == BN {
processing_classes[idx] = class_to_set;
} else {
break;
}
}
}
// > Otherwise, there are no strong types within the bracket pair
// > Therefore, do not set the type for that bracket pair
}
// N1 and N2.
// Indices of every byte in this isolating run sequence
let mut indices = sequence.runs.iter().flat_map(Clone::clone);
let mut prev_class = sequence.sos;
while let Some(mut i) = indices.next() {
// Process sequences of NI characters.
#[cfg(feature = "smallvec")]
let mut ni_run = SmallVec::<[usize; 8]>::new();
#[cfg(not(feature = "smallvec"))]
let mut ni_run = Vec::new();
// The BN is for <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
if is_NI(processing_classes[i]) || processing_classes[i] == BN {
// Consume a run of consecutive NI characters.
ni_run.push(i);
let mut next_class;
loop {
match indices.next() {
Some(j) => {
i = j;
next_class = processing_classes[j];
// The BN is for <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
if is_NI(next_class) || next_class == BN {
ni_run.push(i);
} else {
break;
}
}
None => {
next_class = sequence.eos;
break;
}
};
}
// N1-N2.
//
// <http://www.unicode.org/reports/tr9/#N1>
// <http://www.unicode.org/reports/tr9/#N2>
let new_class = match (prev_class, next_class) {
(L, L) => L,
(R, R)
| (R, AN)
| (R, EN)
| (AN, R)
| (AN, AN)
| (AN, EN)
| (EN, R)
| (EN, AN)
| (EN, EN) => R,
(_, _) => e,
};
for j in &ni_run {
processing_classes[*j] = new_class;
}
ni_run.clear();
}
prev_class = processing_classes[i];
}
}
struct BracketPair {
/// The text-relative index of the opening bracket.
start: usize,
/// The text-relative index of the closing bracket.
end: usize,
/// The index of the run (in the run sequence) that the opening bracket is in.
start_run: usize,
/// The index of the run (in the run sequence) that the closing bracket is in.
end_run: usize,
}
/// 3.1.3 Identifying Bracket Pairs
///
/// Returns all paired brackets in the source, as indices into the
/// text source.
///
/// <https://www.unicode.org/reports/tr9/#BD16>
fn identify_bracket_pairs<'a, T: TextSource<'a> + ?Sized, D: BidiDataSource>(
text: &'a T,
data_source: &D,
run_sequence: &IsolatingRunSequence,
original_classes: &[BidiClass],
bracket_pairs: &mut BracketPairVec,
) {
#[cfg(feature = "smallvec")]
let mut stack = SmallVec::<[(char, usize, usize); 8]>::new();
#[cfg(not(feature = "smallvec"))]
let mut stack = Vec::new();
for (run_index, level_run) in run_sequence.runs.iter().enumerate() {
for (i, ch) in text.subrange(level_run.clone()).char_indices() {
let actual_index = level_run.start + i;
// All paren characters are ON.
// From BidiBrackets.txt:
// > The Unicode property value stability policy guarantees that characters
// > which have bpt=o or bpt=c also have bc=ON and Bidi_M=Y
if original_classes[actual_index] != BidiClass::ON {
continue;
}
if let Some(matched) = data_source.bidi_matched_opening_bracket(ch) {
if matched.is_open {
// > If an opening paired bracket is found ...
// > ... and there is no room in the stack,
// > stop processing BD16 for the remainder of the isolating run sequence.
if stack.len() >= 63 {
break;
}
// > ... push its Bidi_Paired_Bracket property value and its text position onto the stack
stack.push((matched.opening, actual_index, run_index))
} else {
// > If a closing paired bracket is found, do the following
// > Declare a variable that holds a reference to the current stack element
// > and initialize it with the top element of the stack.
// AND
// > Else, if the current stack element is not at the bottom of the stack
for (stack_index, element) in stack.iter().enumerate().rev() {
// > Compare the closing paired bracket being inspected or its canonical
// > equivalent to the bracket in the current stack element.
if element.0 == matched.opening {
// > If the values match, meaning the two characters form a bracket pair, then
// > Append the text position in the current stack element together with the
// > text position of the closing paired bracket to the list.
let pair = BracketPair {
start: element.1,
end: actual_index,
start_run: element.2,
end_run: run_index,
};
bracket_pairs.push(pair);
// > Pop the stack through the current stack element inclusively.
stack.truncate(stack_index);
break;
}
}
}
}
}
}
// > Sort the list of pairs of text positions in ascending order based on
// > the text position of the opening paired bracket.
bracket_pairs.sort_by_key(|r| r.start);
}
/// 3.3.6 Resolving Implicit Levels
///
/// Returns the maximum embedding level in the paragraph.
///
/// <http://www.unicode.org/reports/tr9/#Resolving_Implicit_Levels>
#[cfg_attr(feature = "flame_it", flamer::flame)]
pub fn resolve_levels(processing_classes: &[BidiClass], levels: &mut [Level]) -> Level {
let mut max_level = Level::ltr();
assert_eq!(processing_classes.len(), levels.len());
for i in 0..levels.len() {
match (levels[i].is_rtl(), processing_classes[i]) {
(false, AN) | (false, EN) => levels[i].raise(2).expect("Level number error"),
(false, R) | (true, L) | (true, EN) | (true, AN) => {
levels[i].raise(1).expect("Level number error")
}
// <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters> handled here
(_, _) => {}
}
max_level = max(max_level, levels[i]);
}
max_level
}
/// Neutral or Isolate formatting character (B, S, WS, ON, FSI, LRI, RLI, PDI)
///
/// <http://www.unicode.org/reports/tr9/#NI>
#[allow(non_snake_case)]
fn is_NI(class: BidiClass) -> bool {
matches!(class, B | S | WS | ON | FSI | LRI | RLI | PDI)
}

401
vendor/unicode-bidi/src/level.rs vendored Normal file
View File

@@ -0,0 +1,401 @@
// Copyright 2017 The Servo Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! Bidi Embedding Level
//!
//! See [`Level`](struct.Level.html) for more details.
//!
//! <http://www.unicode.org/reports/tr9/#BD2>
use alloc::{
string::{String, ToString},
vec::Vec,
};
use core::slice;
use super::char_data::BidiClass;
/// Embedding Level
///
/// Embedding Levels are numbers between 0 and 126 (inclusive), where even values denote a
/// left-to-right (LTR) direction and odd values a right-to-left (RTL) direction.
///
/// This struct maintains a *valid* status for level numbers, meaning that creating a new level, or
/// mutating an existing level, with the value smaller than `0` (before conversion to `u8`) or
/// larger than 125 results in an `Error`.
///
/// <http://www.unicode.org/reports/tr9/#BD2>
#[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[repr(transparent)]
pub struct Level(u8);
pub const LTR_LEVEL: Level = Level(0);
pub const RTL_LEVEL: Level = Level(1);
const MAX_DEPTH: u8 = 125;
/// During explicit level resolution, embedding level can go as high as `max_depth`.
pub const MAX_EXPLICIT_DEPTH: u8 = MAX_DEPTH;
/// During implicit level resolution, embedding level can go as high as `max_depth + 1`.
pub const MAX_IMPLICIT_DEPTH: u8 = MAX_DEPTH + 1;
/// Errors that can occur on Level creation or mutation
#[derive(Debug, PartialEq)]
pub enum Error {
/// Out-of-range (invalid) embedding level number.
OutOfRangeNumber,
}
impl Level {
/// New LTR level with smallest number value (0).
#[inline]
pub fn ltr() -> Level {
LTR_LEVEL
}
/// New RTL level with smallest number value (1).
#[inline]
pub fn rtl() -> Level {
RTL_LEVEL
}
/// Maximum depth of the directional status stack during implicit resolutions.
pub fn max_implicit_depth() -> u8 {
MAX_IMPLICIT_DEPTH
}
/// Maximum depth of the directional status stack during explicit resolutions.
pub fn max_explicit_depth() -> u8 {
MAX_EXPLICIT_DEPTH
}
// == Inquiries ==
/// Create new level, fail if number is larger than `max_depth + 1`.
#[inline]
pub fn new(number: u8) -> Result<Level, Error> {
if number <= MAX_IMPLICIT_DEPTH {
Ok(Level(number))
} else {
Err(Error::OutOfRangeNumber)
}
}
/// Create new level, fail if number is larger than `max_depth`.
#[inline]
pub fn new_explicit(number: u8) -> Result<Level, Error> {
if number <= MAX_EXPLICIT_DEPTH {
Ok(Level(number))
} else {
Err(Error::OutOfRangeNumber)
}
}
// == Inquiries ==
/// The level number.
#[inline]
pub fn number(&self) -> u8 {
self.0
}
/// If this level is left-to-right.
#[inline]
pub fn is_ltr(&self) -> bool {
self.0 % 2 == 0
}
/// If this level is right-to-left.
#[inline]
pub fn is_rtl(&self) -> bool {
self.0 % 2 == 1
}
// == Mutators ==
/// Raise level by `amount`, fail if number is larger than `max_depth + 1`.
#[inline]
pub fn raise(&mut self, amount: u8) -> Result<(), Error> {
match self.0.checked_add(amount) {
Some(number) => {
if number <= MAX_IMPLICIT_DEPTH {
self.0 = number;
Ok(())
} else {
Err(Error::OutOfRangeNumber)
}
}
None => Err(Error::OutOfRangeNumber),
}
}
/// Raise level by `amount`, fail if number is larger than `max_depth`.
#[inline]
pub fn raise_explicit(&mut self, amount: u8) -> Result<(), Error> {
match self.0.checked_add(amount) {
Some(number) => {
if number <= MAX_EXPLICIT_DEPTH {
self.0 = number;
Ok(())
} else {
Err(Error::OutOfRangeNumber)
}
}
None => Err(Error::OutOfRangeNumber),
}
}
/// Lower level by `amount`, fail if number goes below zero.
#[inline]
pub fn lower(&mut self, amount: u8) -> Result<(), Error> {
match self.0.checked_sub(amount) {
Some(number) => {
self.0 = number;
Ok(())
}
None => Err(Error::OutOfRangeNumber),
}
}
// == Helpers ==
/// The next LTR (even) level greater than this, or fail if number is larger than `max_depth`.
#[inline]
pub fn new_explicit_next_ltr(&self) -> Result<Level, Error> {
Level::new_explicit((self.0 + 2) & !1)
}
/// The next RTL (odd) level greater than this, or fail if number is larger than `max_depth`.
#[inline]
pub fn new_explicit_next_rtl(&self) -> Result<Level, Error> {
Level::new_explicit((self.0 + 1) | 1)
}
/// The lowest RTL (odd) level greater than or equal to this, or fail if number is larger than
/// `max_depth + 1`.
#[inline]
pub fn new_lowest_ge_rtl(&self) -> Result<Level, Error> {
Level::new(self.0 | 1)
}
/// Generate a character type based on a level (as specified in steps X10 and N2).
#[inline]
pub fn bidi_class(&self) -> BidiClass {
if self.is_rtl() {
BidiClass::R
} else {
BidiClass::L
}
}
pub fn vec(v: &[u8]) -> Vec<Level> {
v.iter().map(|&x| x.into()).collect()
}
/// Converts a byte slice to a slice of Levels
///
/// Does _not_ check if each level is within bounds (`<=` [`MAX_IMPLICIT_DEPTH`]),
/// which is not a requirement for safety but is a requirement for correctness of the algorithm.
pub fn from_slice_unchecked(v: &[u8]) -> &[Level] {
debug_assert_eq!(core::mem::size_of::<u8>(), core::mem::size_of::<Level>());
unsafe {
// Safety: The two arrays are the same size and layout-compatible since
// Level is `repr(transparent)` over `u8`
slice::from_raw_parts(v as *const [u8] as *const u8 as *const Level, v.len())
}
}
}
/// If levels has any RTL (odd) level
///
/// This information is usually used to skip re-ordering of text when no RTL level is present
#[inline]
pub fn has_rtl(levels: &[Level]) -> bool {
levels.iter().any(|&lvl| lvl.is_rtl())
}
impl From<Level> for u8 {
/// Convert to the level number
#[inline]
fn from(val: Level) -> Self {
val.number()
}
}
impl From<u8> for Level {
/// Create level by number
#[inline]
fn from(number: u8) -> Level {
Level::new(number).expect("Level number error")
}
}
/// Used for matching levels in conformance tests
impl<'a> PartialEq<&'a str> for Level {
#[inline]
fn eq(&self, s: &&'a str) -> bool {
*s == "x" || *s == self.0.to_string()
}
}
/// Used for matching levels in conformance tests
impl PartialEq<String> for Level {
#[inline]
fn eq(&self, s: &String) -> bool {
self == &s.as_str()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new() {
assert_eq!(Level::new(0), Ok(Level(0)));
assert_eq!(Level::new(1), Ok(Level(1)));
assert_eq!(Level::new(10), Ok(Level(10)));
assert_eq!(Level::new(125), Ok(Level(125)));
assert_eq!(Level::new(126), Ok(Level(126)));
assert_eq!(Level::new(127), Err(Error::OutOfRangeNumber));
assert_eq!(Level::new(255), Err(Error::OutOfRangeNumber));
}
#[test]
fn test_new_explicit() {
assert_eq!(Level::new_explicit(0), Ok(Level(0)));
assert_eq!(Level::new_explicit(1), Ok(Level(1)));
assert_eq!(Level::new_explicit(10), Ok(Level(10)));
assert_eq!(Level::new_explicit(125), Ok(Level(125)));
assert_eq!(Level::new_explicit(126), Err(Error::OutOfRangeNumber));
assert_eq!(Level::new_explicit(255), Err(Error::OutOfRangeNumber));
}
#[test]
fn test_is_ltr() {
assert_eq!(Level(0).is_ltr(), true);
assert_eq!(Level(1).is_ltr(), false);
assert_eq!(Level(10).is_ltr(), true);
assert_eq!(Level(11).is_ltr(), false);
assert_eq!(Level(124).is_ltr(), true);
assert_eq!(Level(125).is_ltr(), false);
}
#[test]
fn test_is_rtl() {
assert_eq!(Level(0).is_rtl(), false);
assert_eq!(Level(1).is_rtl(), true);
assert_eq!(Level(10).is_rtl(), false);
assert_eq!(Level(11).is_rtl(), true);
assert_eq!(Level(124).is_rtl(), false);
assert_eq!(Level(125).is_rtl(), true);
}
#[test]
fn test_raise() {
let mut level = Level::ltr();
assert_eq!(level.number(), 0);
assert!(level.raise(100).is_ok());
assert_eq!(level.number(), 100);
assert!(level.raise(26).is_ok());
assert_eq!(level.number(), 126);
assert!(level.raise(1).is_err()); // invalid!
assert!(level.raise(250).is_err()); // overflow!
assert_eq!(level.number(), 126);
}
#[test]
fn test_raise_explicit() {
let mut level = Level::ltr();
assert_eq!(level.number(), 0);
assert!(level.raise_explicit(100).is_ok());
assert_eq!(level.number(), 100);
assert!(level.raise_explicit(25).is_ok());
assert_eq!(level.number(), 125);
assert!(level.raise_explicit(1).is_err()); // invalid!
assert!(level.raise_explicit(250).is_err()); // overflow!
assert_eq!(level.number(), 125);
}
#[test]
fn test_lower() {
let mut level = Level::rtl();
assert_eq!(level.number(), 1);
assert!(level.lower(1).is_ok());
assert_eq!(level.number(), 0);
assert!(level.lower(1).is_err()); // underflow!
assert!(level.lower(250).is_err()); // underflow!
assert_eq!(level.number(), 0);
}
#[test]
fn test_has_rtl() {
assert_eq!(has_rtl(&Level::vec(&[0, 0, 0])), false);
assert_eq!(has_rtl(&Level::vec(&[0, 1, 0])), true);
assert_eq!(has_rtl(&Level::vec(&[0, 2, 0])), false);
assert_eq!(has_rtl(&Level::vec(&[0, 125, 0])), true);
assert_eq!(has_rtl(&Level::vec(&[0, 126, 0])), false);
}
#[test]
fn test_into() {
let level = Level::rtl();
let number: u8 = level.into();
assert_eq!(1u8, number);
}
#[test]
fn test_vec() {
assert_eq!(
Level::vec(&[0, 1, 125]),
vec![Level(0), Level(1), Level(125)]
);
}
#[test]
fn test_str_eq() {
assert_eq!(Level::vec(&[0, 1, 4, 125]), vec!["0", "1", "x", "125"]);
assert_ne!(Level::vec(&[0, 1, 4, 125]), vec!["0", "1", "5", "125"]);
}
#[test]
fn test_string_eq() {
assert_eq!(
Level::vec(&[0, 1, 4, 125]),
vec!["0".to_string(), "1".to_string(), "x".to_string(), "125".to_string()]
);
}
}
#[cfg(all(feature = "serde", test))]
mod serde_tests {
use super::*;
use serde_test::{assert_tokens, Token};
#[test]
fn test_statics() {
assert_tokens(
&Level::ltr(),
&[Token::NewtypeStruct { name: "Level" }, Token::U8(0)],
);
assert_tokens(
&Level::rtl(),
&[Token::NewtypeStruct { name: "Level" }, Token::U8(1)],
);
}
#[test]
fn test_new() {
let level = Level::new(42).unwrap();
assert_tokens(
&level,
&[Token::NewtypeStruct { name: "Level" }, Token::U8(42)],
);
}
}

2330
vendor/unicode-bidi/src/lib.rs vendored Normal file

File diff suppressed because it is too large Load Diff

543
vendor/unicode-bidi/src/prepare.rs vendored Normal file
View File

@@ -0,0 +1,543 @@
// Copyright 2015 The Servo Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
//! 3.3.3 Preparations for Implicit Processing
//!
//! <http://www.unicode.org/reports/tr9/#Preparations_for_Implicit_Processing>
use alloc::vec::Vec;
use core::cmp::max;
use core::ops::Range;
#[cfg(feature = "smallvec")]
use smallvec::{smallvec, SmallVec};
use super::level::Level;
use super::BidiClass::{self, *};
/// A maximal substring of characters with the same embedding level.
///
/// Represented as a range of byte indices.
pub type LevelRun = Range<usize>;
#[cfg(feature = "smallvec")]
pub type LevelRunVec = SmallVec<[LevelRun; 8]>;
#[cfg(not(feature = "smallvec"))]
pub type LevelRunVec = Vec<LevelRun>;
/// Output of `isolating_run_sequences` (steps X9-X10)
#[derive(Debug, PartialEq)]
pub struct IsolatingRunSequence {
pub runs: Vec<LevelRun>,
pub sos: BidiClass, // Start-of-sequence type.
pub eos: BidiClass, // End-of-sequence type.
}
#[cfg(feature = "smallvec")]
pub type IsolatingRunSequenceVec = SmallVec<[IsolatingRunSequence; 8]>;
#[cfg(not(feature = "smallvec"))]
pub type IsolatingRunSequenceVec = Vec<IsolatingRunSequence>;
/// Compute the set of isolating run sequences.
///
/// An isolating run sequence is a maximal sequence of level runs such that for all level runs
/// except the last one in the sequence, the last character of the run is an isolate initiator
/// whose matching PDI is the first character of the next level run in the sequence.
///
/// Note: This function does *not* return the sequences in order by their first characters.
#[cfg_attr(feature = "flame_it", flamer::flame)]
pub fn isolating_run_sequences(
para_level: Level,
original_classes: &[BidiClass],
levels: &[Level],
runs: LevelRunVec,
has_isolate_controls: bool,
isolating_run_sequences: &mut IsolatingRunSequenceVec,
) {
// Per http://www.unicode.org/reports/tr9/#BD13:
// "In the absence of isolate initiators, each isolating run sequence in a paragraph
// consists of exactly one level run, and each level run constitutes a separate
// isolating run sequence."
// We can take a simplified path to handle this case.
if !has_isolate_controls {
isolating_run_sequences.reserve_exact(runs.len());
for run in runs {
// Determine the `sos` and `eos` class for the sequence.
// <http://www.unicode.org/reports/tr9/#X10>
let run_levels = &levels[run.clone()];
let run_classes = &original_classes[run.clone()];
let seq_level = run_levels[run_classes
.iter()
.position(|c| not_removed_by_x9(c))
.unwrap_or(0)];
let end_level = run_levels[run_classes
.iter()
.rposition(|c| not_removed_by_x9(c))
.unwrap_or(run.end - run.start - 1)];
// Get the level of the last non-removed char before the run.
let pred_level = match original_classes[..run.start]
.iter()
.rposition(not_removed_by_x9)
{
Some(idx) => levels[idx],
None => para_level,
};
// Get the level of the next non-removed char after the run.
let succ_level = match original_classes[run.end..]
.iter()
.position(not_removed_by_x9)
{
Some(idx) => levels[run.end + idx],
None => para_level,
};
isolating_run_sequences.push(IsolatingRunSequence {
runs: vec![run],
sos: max(seq_level, pred_level).bidi_class(),
eos: max(end_level, succ_level).bidi_class(),
});
}
return;
}
// Compute the set of isolating run sequences.
// <http://www.unicode.org/reports/tr9/#BD13>
let mut sequences = Vec::with_capacity(runs.len());
// When we encounter an isolate initiator, we push the current sequence onto the
// stack so we can resume it after the matching PDI.
#[cfg(feature = "smallvec")]
let mut stack: SmallVec<[Vec<Range<usize>>; 8]> = smallvec![vec![]];
#[cfg(not(feature = "smallvec"))]
let mut stack = vec![vec![]];
for run in runs {
assert!(!run.is_empty());
assert!(!stack.is_empty());
let start_class = original_classes[run.start];
// > In rule X10, [..] skip over any BNs when [..].
// > Do the same when determining if the last character of the sequence is an isolate initiator.
//
// <https://www.unicode.org/reports/tr9/#Retaining_Explicit_Formatting_Characters>
let end_class = original_classes[run.start..run.end]
.iter()
.copied()
.rev()
.find(not_removed_by_x9)
.unwrap_or(start_class);
let mut sequence = if start_class == PDI && stack.len() > 1 {
// Continue a previous sequence interrupted by an isolate.
stack.pop().unwrap()
} else {
// Start a new sequence.
Vec::new()
};
sequence.push(run);
if matches!(end_class, RLI | LRI | FSI) {
// Resume this sequence after the isolate.
stack.push(sequence);
} else {
// This sequence is finished.
sequences.push(sequence);
}
}
// Pop any remaining sequences off the stack.
sequences.extend(stack.into_iter().rev().filter(|seq| !seq.is_empty()));
// Determine the `sos` and `eos` class for each sequence.
// <http://www.unicode.org/reports/tr9/#X10>
for sequence in sequences {
assert!(!sequence.is_empty());
let start_of_seq = sequence[0].start;
let runs_len = sequence.len();
let end_of_seq = sequence[runs_len - 1].end;
let mut result = IsolatingRunSequence {
runs: sequence,
sos: L,
eos: L,
};
// > (not counting characters removed by X9)
let seq_level = levels[result
.iter_forwards_from(start_of_seq, 0)
.find(|i| not_removed_by_x9(&original_classes[*i]))
.unwrap_or(start_of_seq)];
// XXXManishearth the spec talks of a start and end level,
// but for a given IRS the two should be equivalent, yes?
let end_level = levels[result
.iter_backwards_from(end_of_seq, runs_len - 1)
.find(|i| not_removed_by_x9(&original_classes[*i]))
.unwrap_or(end_of_seq - 1)];
#[cfg(test)]
for idx in result.runs.clone().into_iter().flatten() {
if not_removed_by_x9(&original_classes[idx]) {
assert_eq!(seq_level, levels[idx]);
}
}
// Get the level of the last non-removed char before the runs.
let pred_level = match original_classes[..start_of_seq]
.iter()
.rposition(not_removed_by_x9)
{
Some(idx) => levels[idx],
None => para_level,
};
// Get the last non-removed character to check if it is an isolate initiator.
// The spec calls for an unmatched one, but matched isolate initiators
// will never be at the end of a level run (otherwise there would be more to the run).
// We unwrap_or(BN) because BN marks removed classes and it won't matter for the check.
let last_non_removed = original_classes[..end_of_seq]
.iter()
.copied()
.rev()
.find(not_removed_by_x9)
.unwrap_or(BN);
// Get the level of the next non-removed char after the runs.
let succ_level = if matches!(last_non_removed, RLI | LRI | FSI) {
para_level
} else {
match original_classes[end_of_seq..]
.iter()
.position(not_removed_by_x9)
{
Some(idx) => levels[end_of_seq + idx],
None => para_level,
}
};
result.sos = max(seq_level, pred_level).bidi_class();
result.eos = max(end_level, succ_level).bidi_class();
isolating_run_sequences.push(result);
}
}
impl IsolatingRunSequence {
/// Given a text-relative position `pos` and an index of the level run it is in,
/// produce an iterator of all characters after and pos (`pos..`) that are in this
/// run sequence
pub(crate) fn iter_forwards_from(
&self,
pos: usize,
level_run_index: usize,
) -> impl Iterator<Item = usize> + '_ {
let runs = &self.runs[level_run_index..];
// Check that it is in range
// (we can't use contains() since we want an inclusive range)
#[cfg(feature = "std")]
debug_assert!(runs[0].start <= pos && pos <= runs[0].end);
(pos..runs[0].end).chain(runs[1..].iter().flat_map(Clone::clone))
}
/// Given a text-relative position `pos` and an index of the level run it is in,
/// produce an iterator of all characters before and excludingpos (`..pos`) that are in this
/// run sequence
pub(crate) fn iter_backwards_from(
&self,
pos: usize,
level_run_index: usize,
) -> impl Iterator<Item = usize> + '_ {
let prev_runs = &self.runs[..level_run_index];
let current = &self.runs[level_run_index];
// Check that it is in range
// (we can't use contains() since we want an inclusive range)
#[cfg(feature = "std")]
debug_assert!(current.start <= pos && pos <= current.end);
(current.start..pos)
.rev()
.chain(prev_runs.iter().rev().flat_map(Clone::clone))
}
}
/// Finds the level runs in a paragraph.
///
/// <http://www.unicode.org/reports/tr9/#BD7>
///
/// This is only used by tests; normally level runs are identified during explicit::compute.
#[cfg(test)]
fn level_runs(levels: &[Level], original_classes: &[BidiClass]) -> Vec<LevelRun> {
assert_eq!(levels.len(), original_classes.len());
let mut runs = Vec::new();
if levels.is_empty() {
return runs;
}
let mut current_run_level = levels[0];
let mut current_run_start = 0;
for i in 1..levels.len() {
if !removed_by_x9(original_classes[i]) && levels[i] != current_run_level {
// End the last run and start a new one.
runs.push(current_run_start..i);
current_run_level = levels[i];
current_run_start = i;
}
}
runs.push(current_run_start..levels.len());
runs
}
/// Should this character be ignored in steps after X9?
///
/// <http://www.unicode.org/reports/tr9/#X9>
pub fn removed_by_x9(class: BidiClass) -> bool {
matches!(class, RLE | LRE | RLO | LRO | PDF | BN)
}
// For use as a predicate for `position` / `rposition`
pub fn not_removed_by_x9(class: &BidiClass) -> bool {
!removed_by_x9(*class)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_level_runs() {
assert_eq!(level_runs(&Level::vec(&[]), &[]), &[]);
assert_eq!(
level_runs(&Level::vec(&[0, 0, 0, 1, 1, 2, 0, 0]), &[L; 8]),
&[0..3, 3..5, 5..6, 6..8]
);
}
// From <http://www.unicode.org/reports/tr9/#BD13>
#[rustfmt::skip]
#[test]
fn test_isolating_run_sequences() {
// == Example 1 ==
// text1·RLE·text2·PDF·RLE·text3·PDF·text4
// index 0 1 2 3 4 5 6 7
let classes = &[L, RLE, L, PDF, RLE, L, PDF, L];
let levels = &[0, 1, 1, 1, 1, 1, 1, 0];
let para_level = Level::ltr();
let mut sequences = IsolatingRunSequenceVec::new();
isolating_run_sequences(
para_level,
classes,
&Level::vec(levels),
level_runs(&Level::vec(levels), classes).into(),
false,
&mut sequences);
sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
assert_eq!(
sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
vec![vec![0..2], vec![2..7], vec![7..8]]
);
// == Example 2 ==
// text1·RLI·text2·PDI·RLI·text3·PDI·text4
// index 0 1 2 3 4 5 6 7
let classes = &[L, RLI, L, PDI, RLI, L, PDI, L];
let levels = &[0, 0, 1, 0, 0, 1, 0, 0];
let para_level = Level::ltr();
let mut sequences = IsolatingRunSequenceVec::new();
isolating_run_sequences(
para_level,
classes,
&Level::vec(levels),
level_runs(&Level::vec(levels), classes).into(),
true,
&mut sequences);
sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
assert_eq!(
sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
vec![vec![0..2, 3..5, 6..8], vec![2..3], vec![5..6]]
);
// == Example 3 ==
// text1·RLI·text2·LRI·text3·RLE·text4·PDF·text5·PDI·text6·PDI·text7
// index 0 1 2 3 4 5 6 7 8 9 10 11 12
let classes = &[L, RLI, L, LRI, L, RLE, L, PDF, L, PDI, L, PDI, L];
let levels = &[0, 0, 1, 1, 2, 3, 3, 3, 2, 1, 1, 0, 0];
let para_level = Level::ltr();
let mut sequences = IsolatingRunSequenceVec::new();
isolating_run_sequences(
para_level,
classes,
&Level::vec(levels),
level_runs(&Level::vec(levels), classes).into(),
true,
&mut sequences);
sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
assert_eq!(
sequences.iter().map(|s| s.runs.clone()).collect::<Vec<_>>(),
vec![vec![0..2, 11..13], vec![2..4, 9..11], vec![4..6], vec![6..8], vec![8..9]]
);
}
// From <http://www.unicode.org/reports/tr9/#X10>
#[rustfmt::skip]
#[test]
fn test_isolating_run_sequences_sos_and_eos() {
// == Example 1 ==
// text1·RLE·text2·LRE·text3·PDF·text4·PDF·RLE·text5·PDF·text6
// index 0 1 2 3 4 5 6 7 8 9 10 11
let classes = &[L, RLE, L, LRE, L, PDF, L, PDF, RLE, L, PDF, L];
let levels = &[0, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 0];
let para_level = Level::ltr();
let mut sequences = IsolatingRunSequenceVec::new();
isolating_run_sequences(
para_level,
classes,
&Level::vec(levels),
level_runs(&Level::vec(levels), classes).into(),
false,
&mut sequences);
sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
// text1
assert_eq!(
&sequences[0],
&IsolatingRunSequence {
runs: vec![0..2],
sos: L,
eos: R,
}
);
// text2
assert_eq!(
&sequences[1],
&IsolatingRunSequence {
runs: vec![2..4],
sos: R,
eos: L,
}
);
// text3
assert_eq!(
&sequences[2],
&IsolatingRunSequence {
runs: vec![4..6],
sos: L,
eos: L,
}
);
// text4 text5
assert_eq!(
&sequences[3],
&IsolatingRunSequence {
runs: vec![6..11],
sos: L,
eos: R,
}
);
// text6
assert_eq!(
&sequences[4],
&IsolatingRunSequence {
runs: vec![11..12],
sos: R,
eos: L,
}
);
// == Example 2 ==
// text1·RLI·text2·LRI·text3·PDI·text4·PDI·RLI·text5·PDI·text6
// index 0 1 2 3 4 5 6 7 8 9 10 11
let classes = &[L, RLI, L, LRI, L, PDI, L, PDI, RLI, L, PDI, L];
let levels = &[0, 0, 1, 1, 2, 1, 1, 0, 0, 1, 0, 0];
let para_level = Level::ltr();
let mut sequences = IsolatingRunSequenceVec::new();
isolating_run_sequences(
para_level,
classes,
&Level::vec(levels),
level_runs(&Level::vec(levels), classes).into(),
true,
&mut sequences);
sequences.sort_by(|a, b| a.runs[0].clone().cmp(b.runs[0].clone()));
// text1·RLI·PDI·RLI·PDI·text6
assert_eq!(
&sequences[0],
&IsolatingRunSequence {
runs: vec![0..2, 7..9, 10..12],
sos: L,
eos: L,
}
);
// text2·LRI·PDI·text4
assert_eq!(
&sequences[1],
&IsolatingRunSequence {
runs: vec![2..4, 5..7],
sos: R,
eos: R,
}
);
// text3
assert_eq!(
&sequences[2],
&IsolatingRunSequence {
runs: vec![4..5],
sos: L,
eos: L,
}
);
// text5
assert_eq!(
&sequences[3],
&IsolatingRunSequence {
runs: vec![9..10],
sos: R,
eos: R,
}
);
}
#[test]
fn test_removed_by_x9() {
let rem_classes = &[RLE, LRE, RLO, LRO, PDF, BN];
let not_classes = &[L, RLI, AL, LRI, PDI];
for x in rem_classes {
assert_eq!(removed_by_x9(*x), true);
}
for x in not_classes {
assert_eq!(removed_by_x9(*x), false);
}
}
#[test]
fn test_not_removed_by_x9() {
let non_x9_classes = &[L, R, AL, EN, ES, ET, AN, CS, NSM, B, S, WS, ON, LRI, RLI, FSI, PDI];
for x in non_x9_classes {
assert_eq!(not_removed_by_x9(&x), true);
}
}
}

795
vendor/unicode-bidi/src/utf16.rs vendored Normal file
View File

@@ -0,0 +1,795 @@
// Copyright 2023 The Mozilla Foundation. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use super::TextSource;
use alloc::borrow::Cow;
use alloc::vec::Vec;
use core::char;
use core::ops::Range;
use crate::{
compute_bidi_info_for_para, compute_initial_info, level, para_direction, reorder_levels,
reorder_visual, visual_runs_for_line,
};
use crate::{
BidiClass, BidiDataSource, Direction, Level, LevelRun, ParagraphInfo, ParagraphInfoFlags,
};
#[cfg(feature = "hardcoded-data")]
use crate::HardcodedBidiData;
/// Initial bidi information of the text (UTF-16 version).
///
/// Contains the text paragraphs and `BidiClass` of its characters.
#[derive(PartialEq, Debug)]
pub struct InitialInfo<'text> {
/// The text
pub text: &'text [u16],
/// The BidiClass of the character at each code unit in the text.
/// If a character is multiple code units, its class will appear multiple times in the vector.
pub original_classes: Vec<BidiClass>,
/// The boundaries and level of each paragraph within the text.
pub paragraphs: Vec<ParagraphInfo>,
}
impl<'text> InitialInfo<'text> {
/// Find the paragraphs and BidiClasses in a string of text.
///
/// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
///
/// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
/// character is found before the matching PDI. If no strong character is found, the class will
/// remain FSI, and it's up to later stages to treat these as LRI when needed.
///
/// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
#[cfg_attr(feature = "flame_it", flamer::flame)]
#[cfg(feature = "hardcoded-data")]
pub fn new(text: &[u16], default_para_level: Option<Level>) -> InitialInfo<'_> {
Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
}
/// Find the paragraphs and BidiClasses in a string of text, with a custom [`BidiDataSource`]
/// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`InitialInfo::new()`]
/// instead (enabled with tbe default `hardcoded-data` Cargo feature)
///
/// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
///
/// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
/// character is found before the matching PDI. If no strong character is found, the class will
/// remain FSI, and it's up to later stages to treat these as LRI when needed.
#[cfg_attr(feature = "flame_it", flamer::flame)]
pub fn new_with_data_source<'a, D: BidiDataSource>(
data_source: &D,
text: &'a [u16],
default_para_level: Option<Level>,
) -> InitialInfo<'a> {
InitialInfoExt::new_with_data_source(data_source, text, default_para_level).base
}
}
/// Extended version of InitialInfo (not public API).
#[derive(PartialEq, Debug)]
struct InitialInfoExt<'text> {
/// The base InitialInfo for the text, recording its paragraphs and bidi classes.
base: InitialInfo<'text>,
/// Parallel to base.paragraphs, records whether each paragraph is "pure LTR" that
/// requires no further bidi processing (i.e. there are no RTL characters or bidi
/// control codes present).
flags: Vec<ParagraphInfoFlags>,
}
impl<'text> InitialInfoExt<'text> {
/// Find the paragraphs and BidiClasses in a string of text, with a custom [`BidiDataSource`]
/// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`InitialInfo::new()`]
/// instead (enabled with tbe default `hardcoded-data` Cargo feature)
///
/// <http://www.unicode.org/reports/tr9/#The_Paragraph_Level>
///
/// Also sets the class for each First Strong Isolate initiator (FSI) to LRI or RLI if a strong
/// character is found before the matching PDI. If no strong character is found, the class will
/// remain FSI, and it's up to later stages to treat these as LRI when needed.
#[cfg_attr(feature = "flame_it", flamer::flame)]
pub fn new_with_data_source<'a, D: BidiDataSource>(
data_source: &D,
text: &'a [u16],
default_para_level: Option<Level>,
) -> InitialInfoExt<'a> {
let mut paragraphs = Vec::<ParagraphInfo>::new();
let mut flags = Vec::<ParagraphInfoFlags>::new();
let (original_classes, _, _, _) = compute_initial_info(
data_source,
text,
default_para_level,
Some((&mut paragraphs, &mut flags)),
);
InitialInfoExt {
base: InitialInfo {
text,
original_classes,
paragraphs,
},
flags,
}
}
}
/// Bidi information of the text (UTF-16 version).
///
/// The `original_classes` and `levels` vectors are indexed by code unit offsets into the text. If a
/// character is multiple code units wide, then its class and level will appear multiple times in these
/// vectors.
// TODO: Impl `struct StringProperty<T> { values: Vec<T> }` and use instead of Vec<T>
#[derive(Debug, PartialEq)]
pub struct BidiInfo<'text> {
/// The text
pub text: &'text [u16],
/// The BidiClass of the character at each byte in the text.
pub original_classes: Vec<BidiClass>,
/// The directional embedding level of each byte in the text.
pub levels: Vec<Level>,
/// The boundaries and paragraph embedding level of each paragraph within the text.
///
/// TODO: Use SmallVec or similar to avoid overhead when there are only one or two paragraphs?
/// Or just don't include the first paragraph, which always starts at 0?
pub paragraphs: Vec<ParagraphInfo>,
}
impl<'text> BidiInfo<'text> {
/// Split the text into paragraphs and determine the bidi embedding levels for each paragraph.
///
///
/// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
///
/// TODO: In early steps, check for special cases that allow later steps to be skipped. like
/// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison.
///
/// TODO: Support auto-RTL base direction
#[cfg_attr(feature = "flame_it", flamer::flame)]
#[cfg(feature = "hardcoded-data")]
#[inline]
pub fn new(text: &[u16], default_para_level: Option<Level>) -> BidiInfo<'_> {
Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
}
/// Split the text into paragraphs and determine the bidi embedding levels for each paragraph, with a custom [`BidiDataSource`]
/// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`BidiInfo::new()`]
/// instead (enabled with tbe default `hardcoded-data` Cargo feature).
///
/// TODO: In early steps, check for special cases that allow later steps to be skipped. like
/// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison.
///
/// TODO: Support auto-RTL base direction
#[cfg_attr(feature = "flame_it", flamer::flame)]
pub fn new_with_data_source<'a, D: BidiDataSource>(
data_source: &D,
text: &'a [u16],
default_para_level: Option<Level>,
) -> BidiInfo<'a> {
let InitialInfoExt { base, flags, .. } =
InitialInfoExt::new_with_data_source(data_source, text, default_para_level);
let mut levels = Vec::<Level>::with_capacity(text.len());
let mut processing_classes = base.original_classes.clone();
for (para, flags) in base.paragraphs.iter().zip(flags.iter()) {
let text = &text[para.range.clone()];
let original_classes = &base.original_classes[para.range.clone()];
compute_bidi_info_for_para(
data_source,
para,
flags.is_pure_ltr,
flags.has_isolate_controls,
text,
original_classes,
&mut processing_classes,
&mut levels,
);
}
BidiInfo {
text,
original_classes: base.original_classes,
paragraphs: base.paragraphs,
levels,
}
}
/// Produce the levels for this paragraph as needed for reordering, one level per *byte*
/// in the paragraph. The returned vector includes bytes that are not included
/// in the `line`, but will not adjust them.
///
/// This runs [Rule L1], you can run
/// [Rule L2] by calling [`Self::reorder_visual()`].
/// If doing so, you may prefer to use [`Self::reordered_levels_per_char()`] instead
/// to avoid non-byte indices.
///
/// For an all-in-one reordering solution, consider using [`Self::reorder_visual()`].
///
/// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
/// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
#[cfg_attr(feature = "flame_it", flamer::flame)]
pub fn reordered_levels(&self, para: &ParagraphInfo, line: Range<usize>) -> Vec<Level> {
assert!(line.start <= self.levels.len());
assert!(line.end <= self.levels.len());
let mut levels = self.levels.clone();
let line_classes = &self.original_classes[line.clone()];
let line_levels = &mut levels[line.clone()];
let line_str: &[u16] = &self.text[line.clone()];
reorder_levels(line_classes, line_levels, line_str, para.level);
levels
}
/// Produce the levels for this paragraph as needed for reordering, one level per *character*
/// in the paragraph. The returned vector includes characters that are not included
/// in the `line`, but will not adjust them.
///
/// This runs [Rule L1], you can run
/// [Rule L2] by calling [`Self::reorder_visual()`].
/// If doing so, you may prefer to use [`Self::reordered_levels_per_char()`] instead
/// to avoid non-byte indices.
///
/// For an all-in-one reordering solution, consider using [`Self::reorder_visual()`].
///
/// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
/// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
#[cfg_attr(feature = "flame_it", flamer::flame)]
pub fn reordered_levels_per_char(
&self,
para: &ParagraphInfo,
line: Range<usize>,
) -> Vec<Level> {
let levels = self.reordered_levels(para, line);
self.text.char_indices().map(|(i, _)| levels[i]).collect()
}
/// Re-order a line based on resolved levels and return the line in display order.
///
/// This does not apply [Rule L3] or [Rule L4] around combining characters or mirroring.
///
/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
#[cfg_attr(feature = "flame_it", flamer::flame)]
pub fn reorder_line(&self, para: &ParagraphInfo, line: Range<usize>) -> Cow<'text, [u16]> {
if !level::has_rtl(&self.levels[line.clone()]) {
return self.text[line].into();
}
let (levels, runs) = self.visual_runs(para, line.clone());
reorder_line(self.text, line, levels, runs)
}
/// Reorders pre-calculated levels of a sequence of characters.
///
/// NOTE: This is a convenience method that does not use a `Paragraph` object. It is
/// intended to be used when an application has determined the levels of the objects (character sequences)
/// and just needs to have them reordered.
///
/// the index map will result in `indexMap[visualIndex]==logicalIndex`.
///
/// This only runs [Rule L2](http://www.unicode.org/reports/tr9/#L2) as it does not have
/// information about the actual text.
///
/// Furthermore, if `levels` is an array that is aligned with code units, bytes within a codepoint may be
/// reversed. You may need to fix up the map to deal with this. Alternatively, only pass in arrays where each `Level`
/// is for a single code point.
///
///
/// # # Example
/// ```
/// use unicode_bidi::BidiInfo;
/// use unicode_bidi::Level;
///
/// let l0 = Level::from(0);
/// let l1 = Level::from(1);
/// let l2 = Level::from(2);
///
/// let levels = vec![l0, l0, l0, l0];
/// let index_map = BidiInfo::reorder_visual(&levels);
/// assert_eq!(levels.len(), index_map.len());
/// assert_eq!(index_map, [0, 1, 2, 3]);
///
/// let levels: Vec<Level> = vec![l0, l0, l0, l1, l1, l1, l2, l2];
/// let index_map = BidiInfo::reorder_visual(&levels);
/// assert_eq!(levels.len(), index_map.len());
/// assert_eq!(index_map, [0, 1, 2, 6, 7, 5, 4, 3]);
/// ```
#[cfg_attr(feature = "flame_it", flamer::flame)]
#[inline]
pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
reorder_visual(levels)
}
/// Find the level runs within a line and return them in visual order.
///
/// `line` is a range of bytes indices within `levels`.
///
/// The first return value is a vector of levels used by the reordering algorithm,
/// i.e. the result of [Rule L1]. The second return value is a vector of level runs,
/// the result of [Rule L2], showing the visual order that each level run (a run of text with the
/// same level) should be displayed. Within each run, the display order can be checked
/// against the Level vector.
///
/// This does not handle [Rule L3] (combining characters) or [Rule L4] (mirroring),
/// as that should be handled by the engine using this API.
///
/// Conceptually, this is the same as running [`Self::reordered_levels()`] followed by
/// [`Self::reorder_visual()`], however it returns the result as a list of level runs instead
/// of producing a level map, since one may wish to deal with the fact that this is operating on
/// byte rather than character indices.
///
/// <http://www.unicode.org/reports/tr9/#Reordering_Resolved_Levels>
///
/// [Rule L1]: https://www.unicode.org/reports/tr9/#L1
/// [Rule L2]: https://www.unicode.org/reports/tr9/#L2
/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
#[cfg_attr(feature = "flame_it", flamer::flame)]
#[inline]
pub fn visual_runs(
&self,
para: &ParagraphInfo,
line: Range<usize>,
) -> (Vec<Level>, Vec<LevelRun>) {
let levels = self.reordered_levels(para, line.clone());
visual_runs_for_line(levels, &line)
}
/// If processed text has any computed RTL levels
///
/// This information is usually used to skip re-ordering of text when no RTL level is present
#[inline]
pub fn has_rtl(&self) -> bool {
level::has_rtl(&self.levels)
}
}
/// Bidi information of text treated as a single paragraph.
///
/// The `original_classes` and `levels` vectors are indexed by code unit offsets into the text. If a
/// character is multiple code units wide, then its class and level will appear multiple times in these
/// vectors.
#[derive(Debug, PartialEq)]
pub struct ParagraphBidiInfo<'text> {
/// The text
pub text: &'text [u16],
/// The BidiClass of the character at each byte in the text.
pub original_classes: Vec<BidiClass>,
/// The directional embedding level of each byte in the text.
pub levels: Vec<Level>,
/// The paragraph embedding level.
pub paragraph_level: Level,
/// Whether the paragraph is purely LTR.
pub is_pure_ltr: bool,
}
impl<'text> ParagraphBidiInfo<'text> {
/// Determine the bidi embedding level.
///
///
/// The `hardcoded-data` Cargo feature (enabled by default) must be enabled to use this.
///
/// TODO: In early steps, check for special cases that allow later steps to be skipped. like
/// text that is entirely LTR. See the `nsBidi` class from Gecko for comparison.
///
/// TODO: Support auto-RTL base direction
#[cfg_attr(feature = "flame_it", flamer::flame)]
#[cfg(feature = "hardcoded-data")]
#[inline]
pub fn new(text: &[u16], default_para_level: Option<Level>) -> ParagraphBidiInfo<'_> {
Self::new_with_data_source(&HardcodedBidiData, text, default_para_level)
}
/// Determine the bidi embedding level, with a custom [`BidiDataSource`]
/// for Bidi data. If you just wish to use the hardcoded Bidi data, please use [`BidiInfo::new()`]
/// instead (enabled with tbe default `hardcoded-data` Cargo feature).
///
/// (This is the single-paragraph equivalent of BidiInfo::new_with_data_source,
/// and should be kept in sync with it.
#[cfg_attr(feature = "flame_it", flamer::flame)]
pub fn new_with_data_source<'a, D: BidiDataSource>(
data_source: &D,
text: &'a [u16],
default_para_level: Option<Level>,
) -> ParagraphBidiInfo<'a> {
// Here we could create a ParagraphInitialInfo struct to parallel the one
// used by BidiInfo, but there doesn't seem any compelling reason for it.
let (original_classes, paragraph_level, is_pure_ltr, has_isolate_controls) =
compute_initial_info(data_source, text, default_para_level, None);
let mut levels = Vec::<Level>::with_capacity(text.len());
let mut processing_classes = original_classes.clone();
let para_info = ParagraphInfo {
range: Range {
start: 0,
end: text.len(),
},
level: paragraph_level,
};
compute_bidi_info_for_para(
data_source,
&para_info,
is_pure_ltr,
has_isolate_controls,
text,
&original_classes,
&mut processing_classes,
&mut levels,
);
ParagraphBidiInfo {
text,
original_classes,
levels,
paragraph_level,
is_pure_ltr,
}
}
/// Produce the levels for this paragraph as needed for reordering, one level per *code unit*
/// in the paragraph. The returned vector includes code units that are not included
/// in the `line`, but will not adjust them.
///
/// See BidiInfo::reordered_levels for details.
///
/// (This should be kept in sync with BidiInfo::reordered_levels.)
#[cfg_attr(feature = "flame_it", flamer::flame)]
pub fn reordered_levels(&self, line: Range<usize>) -> Vec<Level> {
assert!(line.start <= self.levels.len());
assert!(line.end <= self.levels.len());
let mut levels = self.levels.clone();
let line_classes = &self.original_classes[line.clone()];
let line_levels = &mut levels[line.clone()];
reorder_levels(
line_classes,
line_levels,
self.text.subrange(line),
self.paragraph_level,
);
levels
}
/// Produce the levels for this paragraph as needed for reordering, one level per *character*
/// in the paragraph. The returned vector includes characters that are not included
/// in the `line`, but will not adjust them.
///
/// See BidiInfo::reordered_levels_per_char for details.
///
/// (This should be kept in sync with BidiInfo::reordered_levels_per_char.)
#[cfg_attr(feature = "flame_it", flamer::flame)]
pub fn reordered_levels_per_char(&self, line: Range<usize>) -> Vec<Level> {
let levels = self.reordered_levels(line);
self.text.char_indices().map(|(i, _)| levels[i]).collect()
}
/// Re-order a line based on resolved levels and return the line in display order.
///
/// See BidiInfo::reorder_line for details.
///
/// (This should be kept in sync with BidiInfo::reorder_line.)
#[cfg_attr(feature = "flame_it", flamer::flame)]
pub fn reorder_line(&self, line: Range<usize>) -> Cow<'text, [u16]> {
if !level::has_rtl(&self.levels[line.clone()]) {
return self.text[line].into();
}
let (levels, runs) = self.visual_runs(line.clone());
reorder_line(self.text, line, levels, runs)
}
/// Reorders pre-calculated levels of a sequence of characters.
///
/// See BidiInfo::reorder_visual for details.
#[cfg_attr(feature = "flame_it", flamer::flame)]
#[inline]
pub fn reorder_visual(levels: &[Level]) -> Vec<usize> {
reorder_visual(levels)
}
/// Find the level runs within a line and return them in visual order.
///
/// `line` is a range of code-unit indices within `levels`.
///
/// See `BidiInfo::visual_runs` for details.
///
/// (This should be kept in sync with BidiInfo::visual_runs.)
#[cfg_attr(feature = "flame_it", flamer::flame)]
#[inline]
pub fn visual_runs(&self, line: Range<usize>) -> (Vec<Level>, Vec<LevelRun>) {
let levels = self.reordered_levels(line.clone());
visual_runs_for_line(levels, &line)
}
/// If processed text has any computed RTL levels
///
/// This information is usually used to skip re-ordering of text when no RTL level is present
#[inline]
pub fn has_rtl(&self) -> bool {
!self.is_pure_ltr
}
/// Return the paragraph's Direction (Ltr, Rtl, or Mixed) based on its levels.
#[inline]
pub fn direction(&self) -> Direction {
para_direction(&self.levels)
}
}
/// Return a line of the text in display order based on resolved levels.
///
/// `text` the full text passed to the `BidiInfo` or `ParagraphBidiInfo` for analysis
/// `line` a range of byte indices within `text` corresponding to one line
/// `levels` array of `Level` values, with `line`'s levels reordered into visual order
/// `runs` array of `LevelRun`s in visual order
///
/// (`levels` and `runs` are the result of calling `BidiInfo::visual_runs()` or
/// `ParagraphBidiInfo::visual_runs()` for the line of interest.)
///
/// Returns: the reordered text of the line.
///
/// This does not apply [Rule L3] or [Rule L4] around combining characters or mirroring.
///
/// [Rule L3]: https://www.unicode.org/reports/tr9/#L3
/// [Rule L4]: https://www.unicode.org/reports/tr9/#L4
fn reorder_line(
text: &[u16],
line: Range<usize>,
levels: Vec<Level>,
runs: Vec<LevelRun>,
) -> Cow<'_, [u16]> {
// If all isolating run sequences are LTR, no reordering is needed
if runs.iter().all(|run| levels[run.start].is_ltr()) {
return text[line].into();
}
let mut result = Vec::<u16>::with_capacity(line.len());
for run in runs {
if levels[run.start].is_rtl() {
let mut buf = [0; 2];
for c in text[run].chars().rev() {
result.extend(c.encode_utf16(&mut buf).iter());
}
} else {
result.extend(text[run].iter());
}
}
result.into()
}
/// Contains a reference of `BidiInfo` and one of its `paragraphs`.
/// And it supports all operation in the `Paragraph` that needs also its
/// `BidiInfo` such as `direction`.
#[derive(Debug)]
pub struct Paragraph<'a, 'text> {
pub info: &'a BidiInfo<'text>,
pub para: &'a ParagraphInfo,
}
impl<'a, 'text> Paragraph<'a, 'text> {
#[inline]
pub fn new(info: &'a BidiInfo<'text>, para: &'a ParagraphInfo) -> Paragraph<'a, 'text> {
Paragraph { info, para }
}
/// Returns if the paragraph is Left direction, right direction or mixed.
#[inline]
pub fn direction(&self) -> Direction {
para_direction(&self.info.levels[self.para.range.clone()])
}
/// Returns the `Level` of a certain character in the paragraph.
#[inline]
pub fn level_at(&self, pos: usize) -> Level {
let actual_position = self.para.range.start + pos;
self.info.levels[actual_position]
}
}
/// Implementation of TextSource for UTF-16 text in a [u16] array.
/// Note that there could be unpaired surrogates present!
// Convenience functions to check whether a UTF16 code unit is a surrogate.
#[inline]
fn is_high_surrogate(code: u16) -> bool {
(code & 0xFC00) == 0xD800
}
#[inline]
fn is_low_surrogate(code: u16) -> bool {
(code & 0xFC00) == 0xDC00
}
impl<'text> TextSource<'text> for [u16] {
type CharIter = Utf16CharIter<'text>;
type CharIndexIter = Utf16CharIndexIter<'text>;
type IndexLenIter = Utf16IndexLenIter<'text>;
#[inline]
fn len(&self) -> usize {
(self as &[u16]).len()
}
fn char_at(&self, index: usize) -> Option<(char, usize)> {
if index >= self.len() {
return None;
}
// Get the indicated code unit and try simply converting it to a char;
// this will fail if it is half of a surrogate pair.
let c = self[index];
if let Some(ch) = char::from_u32(c.into()) {
return Some((ch, 1));
}
// If it's a low surrogate, and was immediately preceded by a high surrogate,
// then we're in the middle of a (valid) character, and should return None.
if is_low_surrogate(c) && index > 0 && is_high_surrogate(self[index - 1]) {
return None;
}
// Otherwise, try to decode, returning REPLACEMENT_CHARACTER for errors.
if let Some(ch) = char::decode_utf16(self[index..].iter().cloned()).next() {
if let Ok(ch) = ch {
// This must be a surrogate pair, otherwise char::from_u32() above should
// have succeeded!
debug_assert!(ch.len_utf16() == 2, "BMP should have already been handled");
return Some((ch, ch.len_utf16()));
}
} else {
debug_assert!(
false,
"Why did decode_utf16 return None when we're not at the end?"
);
return None;
}
// Failed to decode UTF-16: we must have encountered an unpaired surrogate.
// Return REPLACEMENT_CHARACTER (not None), to continue processing the following text
// and keep indexing correct.
Some((char::REPLACEMENT_CHARACTER, 1))
}
#[inline]
fn subrange(&self, range: Range<usize>) -> &Self {
&(self as &[u16])[range]
}
#[inline]
fn chars(&'text self) -> Self::CharIter {
Utf16CharIter::new(self)
}
#[inline]
fn char_indices(&'text self) -> Self::CharIndexIter {
Utf16CharIndexIter::new(self)
}
#[inline]
fn indices_lengths(&'text self) -> Self::IndexLenIter {
Utf16IndexLenIter::new(self)
}
#[inline]
fn char_len(ch: char) -> usize {
ch.len_utf16()
}
}
/// Iterator over UTF-16 text in a [u16] slice, returning (index, char_len) tuple.
#[derive(Debug)]
pub struct Utf16IndexLenIter<'text> {
text: &'text [u16],
cur_pos: usize,
}
impl<'text> Utf16IndexLenIter<'text> {
#[inline]
pub fn new(text: &'text [u16]) -> Self {
Utf16IndexLenIter { text, cur_pos: 0 }
}
}
impl Iterator for Utf16IndexLenIter<'_> {
type Item = (usize, usize);
#[inline]
fn next(&mut self) -> Option<Self::Item> {
if let Some((_, char_len)) = self.text.char_at(self.cur_pos) {
let result = (self.cur_pos, char_len);
self.cur_pos += char_len;
return Some(result);
}
None
}
}
/// Iterator over UTF-16 text in a [u16] slice, returning (index, char) tuple.
#[derive(Debug)]
pub struct Utf16CharIndexIter<'text> {
text: &'text [u16],
cur_pos: usize,
}
impl<'text> Utf16CharIndexIter<'text> {
pub fn new(text: &'text [u16]) -> Self {
Utf16CharIndexIter { text, cur_pos: 0 }
}
}
impl Iterator for Utf16CharIndexIter<'_> {
type Item = (usize, char);
fn next(&mut self) -> Option<Self::Item> {
if let Some((ch, char_len)) = self.text.char_at(self.cur_pos) {
let result = (self.cur_pos, ch);
self.cur_pos += char_len;
return Some(result);
}
None
}
}
/// Iterator over UTF-16 text in a [u16] slice, returning Unicode chars.
/// (Unlike the other iterators above, this also supports reverse iteration.)
#[derive(Debug)]
pub struct Utf16CharIter<'text> {
text: &'text [u16],
cur_pos: usize,
end_pos: usize,
}
impl<'text> Utf16CharIter<'text> {
pub fn new(text: &'text [u16]) -> Self {
Utf16CharIter {
text,
cur_pos: 0,
end_pos: text.len(),
}
}
}
impl Iterator for Utf16CharIter<'_> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
if let Some((ch, char_len)) = self.text.char_at(self.cur_pos) {
self.cur_pos += char_len;
return Some(ch);
}
None
}
}
impl DoubleEndedIterator for Utf16CharIter<'_> {
fn next_back(&mut self) -> Option<Self::Item> {
if self.end_pos <= self.cur_pos {
return None;
}
self.end_pos -= 1;
if let Some(ch) = char::from_u32(self.text[self.end_pos] as u32) {
return Some(ch);
}
if self.end_pos > self.cur_pos {
if let Some((ch, char_len)) = self.text.char_at(self.end_pos - 1) {
if char_len == 2 {
self.end_pos -= 1;
return Some(ch);
}
}
}
Some(char::REPLACEMENT_CHARACTER)
}
}