Vendor dependencies for 0.3.0 release

This commit is contained in:
2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions

1
vendor/regex/.cargo-checksum.json vendored Normal file

File diff suppressed because one or more lines are too long

1697
vendor/regex/CHANGELOG.md vendored Normal file

File diff suppressed because it is too large Load Diff

431
vendor/regex/Cargo.lock generated vendored Normal file
View File

@@ -0,0 +1,431 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"log",
"memchr",
]
[[package]]
name = "anyhow"
version = "1.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi",
"libc",
"winapi",
]
[[package]]
name = "bstr"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
dependencies = [
"memchr",
"serde",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "doc-comment"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
[[package]]
name = "env_logger"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7"
dependencies = [
"atty",
"humantime",
"log",
"termcolor",
]
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "getrandom"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "hashbrown"
version = "0.15.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3"
[[package]]
name = "hermit-abi"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
dependencies = [
"libc",
]
[[package]]
name = "humantime"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f"
[[package]]
name = "indexmap"
version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
dependencies = [
"equivalent",
"hashbrown",
]
[[package]]
name = "libc"
version = "0.2.172"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa"
[[package]]
name = "log"
version = "0.4.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
[[package]]
name = "memchr"
version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
dependencies = [
"log",
]
[[package]]
name = "proc-macro2"
version = "1.0.95"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quickcheck"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6"
dependencies = [
"rand",
]
[[package]]
name = "quote"
version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom",
]
[[package]]
name = "regex"
version = "1.11.3"
dependencies = [
"aho-corasick",
"anyhow",
"doc-comment",
"env_logger",
"memchr",
"quickcheck",
"regex-automata",
"regex-syntax",
"regex-test",
]
[[package]]
name = "regex-automata"
version = "0.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad"
dependencies = [
"aho-corasick",
"log",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001"
[[package]]
name = "regex-test"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da40f0939bc4c598b4326abdbb363a8987aa43d0526e5624aefcf3ed90344e62"
dependencies = [
"anyhow",
"bstr",
"serde",
"toml",
]
[[package]]
name = "serde"
version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_spanned"
version = "0.6.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3"
dependencies = [
"serde",
]
[[package]]
name = "syn"
version = "2.0.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "termcolor"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
dependencies = [
"winapi-util",
]
[[package]]
name = "toml"
version = "0.8.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362"
dependencies = [
"serde",
"serde_spanned",
"toml_datetime",
"toml_edit",
]
[[package]]
name = "toml_datetime"
version = "0.6.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
dependencies = [
"serde",
]
[[package]]
name = "toml_edit"
version = "0.22.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
dependencies = [
"indexmap",
"serde",
"serde_spanned",
"toml_datetime",
"winnow",
]
[[package]]
name = "unicode-ident"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
"windows-sys",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-sys"
version = "0.59.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "winnow"
version = "0.7.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf"
dependencies = [
"memchr",
]

202
vendor/regex/Cargo.toml vendored Normal file
View File

@@ -0,0 +1,202 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.
[package]
edition = "2021"
rust-version = "1.65"
name = "regex"
version = "1.11.3"
authors = [
"The Rust Project Developers",
"Andrew Gallant <jamslam@gmail.com>",
]
build = false
exclude = [
"/fuzz/*",
"/record/*",
"/scripts/*",
"tests/fuzz/*",
"/.github/*",
]
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = """
An implementation of regular expressions for Rust. This implementation uses
finite automata and guarantees linear time matching on all inputs.
"""
homepage = "https://github.com/rust-lang/regex"
documentation = "https://docs.rs/regex"
readme = "README.md"
categories = ["text-processing"]
license = "MIT OR Apache-2.0"
repository = "https://github.com/rust-lang/regex"
[package.metadata.docs.rs]
all-features = true
rustdoc-args = [
"--cfg",
"docsrs",
]
[features]
default = [
"std",
"perf",
"unicode",
"regex-syntax/default",
]
logging = [
"aho-corasick?/logging",
"memchr?/logging",
"regex-automata/logging",
]
pattern = []
perf = [
"perf-cache",
"perf-dfa",
"perf-onepass",
"perf-backtrack",
"perf-inline",
"perf-literal",
]
perf-backtrack = ["regex-automata/nfa-backtrack"]
perf-cache = []
perf-dfa = ["regex-automata/hybrid"]
perf-dfa-full = [
"regex-automata/dfa-build",
"regex-automata/dfa-search",
]
perf-inline = ["regex-automata/perf-inline"]
perf-literal = [
"dep:aho-corasick",
"dep:memchr",
"regex-automata/perf-literal",
]
perf-onepass = ["regex-automata/dfa-onepass"]
std = [
"aho-corasick?/std",
"memchr?/std",
"regex-automata/std",
"regex-syntax/std",
]
unicode = [
"unicode-age",
"unicode-bool",
"unicode-case",
"unicode-gencat",
"unicode-perl",
"unicode-script",
"unicode-segment",
"regex-automata/unicode",
"regex-syntax/unicode",
]
unicode-age = [
"regex-automata/unicode-age",
"regex-syntax/unicode-age",
]
unicode-bool = [
"regex-automata/unicode-bool",
"regex-syntax/unicode-bool",
]
unicode-case = [
"regex-automata/unicode-case",
"regex-syntax/unicode-case",
]
unicode-gencat = [
"regex-automata/unicode-gencat",
"regex-syntax/unicode-gencat",
]
unicode-perl = [
"regex-automata/unicode-perl",
"regex-automata/unicode-word-boundary",
"regex-syntax/unicode-perl",
]
unicode-script = [
"regex-automata/unicode-script",
"regex-syntax/unicode-script",
]
unicode-segment = [
"regex-automata/unicode-segment",
"regex-syntax/unicode-segment",
]
unstable = ["pattern"]
use_std = ["std"]
[lib]
name = "regex"
path = "src/lib.rs"
[[test]]
name = "integration"
path = "tests/lib.rs"
[dependencies.aho-corasick]
version = "1.0.0"
optional = true
default-features = false
[dependencies.memchr]
version = "2.6.0"
optional = true
default-features = false
[dependencies.regex-automata]
version = "0.4.11"
features = [
"alloc",
"syntax",
"meta",
"nfa-pikevm",
]
default-features = false
[dependencies.regex-syntax]
version = "0.8.5"
default-features = false
[dev-dependencies.anyhow]
version = "1.0.69"
[dev-dependencies.doc-comment]
version = "0.3"
[dev-dependencies.env_logger]
version = "0.9.3"
features = [
"atty",
"humantime",
"termcolor",
]
default-features = false
[dev-dependencies.quickcheck]
version = "1.0.3"
default-features = false
[dev-dependencies.regex-test]
version = "0.1.0"
[profile.bench]
debug = 2
[profile.dev]
opt-level = 3
debug = 2
[profile.release]
debug = 2
[profile.test]
opt-level = 3
debug = 2

7
vendor/regex/Cross.toml vendored Normal file
View File

@@ -0,0 +1,7 @@
[build.env]
passthrough = [
"RUST_BACKTRACE",
"RUST_LOG",
"REGEX_TEST",
"REGEX_TEST_VERBOSE",
]

201
vendor/regex/LICENSE-APACHE vendored Normal file
View File

@@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

25
vendor/regex/LICENSE-MIT vendored Normal file
View File

@@ -0,0 +1,25 @@
Copyright (c) 2014 The Rust Project Developers
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

336
vendor/regex/README.md vendored Normal file
View File

@@ -0,0 +1,336 @@
regex
=====
This crate provides routines for searching strings for matches of a [regular
expression] (aka "regex"). The regex syntax supported by this crate is similar
to other regex engines, but it lacks several features that are not known how to
implement efficiently. This includes, but is not limited to, look-around and
backreferences. In exchange, all regex searches in this crate have worst case
`O(m * n)` time complexity, where `m` is proportional to the size of the regex
and `n` is proportional to the size of the string being searched.
[regular expression]: https://en.wikipedia.org/wiki/Regular_expression
[![Build status](https://github.com/rust-lang/regex/workflows/ci/badge.svg)](https://github.com/rust-lang/regex/actions)
[![Crates.io](https://img.shields.io/crates/v/regex.svg)](https://crates.io/crates/regex)
### Documentation
[Module documentation with examples](https://docs.rs/regex).
The module documentation also includes a comprehensive description of the
syntax supported.
Documentation with examples for the various matching functions and iterators
can be found on the
[`Regex` type](https://docs.rs/regex/*/regex/struct.Regex.html).
### Usage
To bring this crate into your repository, either add `regex` to your
`Cargo.toml`, or run `cargo add regex`.
Here's a simple example that matches a date in YYYY-MM-DD format and prints the
year, month and day:
```rust
use regex::Regex;
fn main() {
let re = Regex::new(r"(?x)
(?P<year>\d{4}) # the year
-
(?P<month>\d{2}) # the month
-
(?P<day>\d{2}) # the day
").unwrap();
let caps = re.captures("2010-03-14").unwrap();
assert_eq!("2010", &caps["year"]);
assert_eq!("03", &caps["month"]);
assert_eq!("14", &caps["day"]);
}
```
If you have lots of dates in text that you'd like to iterate over, then it's
easy to adapt the above example with an iterator:
```rust
use regex::Regex;
fn main() {
let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap();
let hay = "On 2010-03-14, foo happened. On 2014-10-14, bar happened.";
let mut dates = vec![];
for (_, [year, month, day]) in re.captures_iter(hay).map(|c| c.extract()) {
dates.push((year, month, day));
}
assert_eq!(dates, vec![
("2010", "03", "14"),
("2014", "10", "14"),
]);
}
```
### Usage: Avoid compiling the same regex in a loop
It is an anti-pattern to compile the same regular expression in a loop since
compilation is typically expensive. (It takes anywhere from a few microseconds
to a few **milliseconds** depending on the size of the regex.) Not only is
compilation itself expensive, but this also prevents optimizations that reuse
allocations internally to the matching engines.
In Rust, it can sometimes be a pain to pass regular expressions around if
they're used from inside a helper function. Instead, we recommend using
[`std::sync::LazyLock`], or the [`once_cell`] crate,
if you can't use the standard library.
This example shows how to use `std::sync::LazyLock`:
```rust
use std::sync::LazyLock;
use regex::Regex;
fn some_helper_function(haystack: &str) -> bool {
static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"...").unwrap());
RE.is_match(haystack)
}
fn main() {
assert!(some_helper_function("abc"));
assert!(!some_helper_function("ac"));
}
```
Specifically, in this example, the regex will be compiled when it is used for
the first time. On subsequent uses, it will reuse the previous compilation.
[`std::sync::LazyLock`]: https://doc.rust-lang.org/std/sync/struct.LazyLock.html
[`once_cell`]: https://crates.io/crates/once_cell
### Usage: match regular expressions on `&[u8]`
The main API of this crate (`regex::Regex`) requires the caller to pass a
`&str` for searching. In Rust, an `&str` is required to be valid UTF-8, which
means the main API can't be used for searching arbitrary bytes.
To match on arbitrary bytes, use the `regex::bytes::Regex` API. The API is
identical to the main API, except that it takes an `&[u8]` to search on instead
of an `&str`. The `&[u8]` APIs also permit disabling Unicode mode in the regex
even when the pattern would match invalid UTF-8. For example, `(?-u:.)` is
not allowed in `regex::Regex` but is allowed in `regex::bytes::Regex` since
`(?-u:.)` matches any byte except for `\n`. Conversely, `.` will match the
UTF-8 encoding of any Unicode scalar value except for `\n`.
This example shows how to find all null-terminated strings in a slice of bytes:
```rust
use regex::bytes::Regex;
let re = Regex::new(r"(?-u)(?<cstr>[^\x00]+)\x00").unwrap();
let text = b"foo\xFFbar\x00baz\x00";
// Extract all of the strings without the null terminator from each match.
// The unwrap is OK here since a match requires the `cstr` capture to match.
let cstrs: Vec<&[u8]> =
re.captures_iter(text)
.map(|c| c.name("cstr").unwrap().as_bytes())
.collect();
assert_eq!(vec![&b"foo\xFFbar"[..], &b"baz"[..]], cstrs);
```
Notice here that the `[^\x00]+` will match any *byte* except for `NUL`,
including bytes like `\xFF` which are not valid UTF-8. When using the main API,
`[^\x00]+` would instead match any valid UTF-8 sequence except for `NUL`.
### Usage: match multiple regular expressions simultaneously
This demonstrates how to use a `RegexSet` to match multiple (possibly
overlapping) regular expressions in a single scan of the search text:
```rust
use regex::RegexSet;
let set = RegexSet::new(&[
r"\w+",
r"\d+",
r"\pL+",
r"foo",
r"bar",
r"barfoo",
r"foobar",
]).unwrap();
// Iterate over and collect all of the matches.
let matches: Vec<_> = set.matches("foobar").into_iter().collect();
assert_eq!(matches, vec![0, 2, 3, 4, 6]);
// You can also test whether a particular regex matched:
let matches = set.matches("foobar");
assert!(!matches.matched(5));
assert!(matches.matched(6));
```
### Usage: regex internals as a library
The [`regex-automata` directory](./regex-automata/) contains a crate that
exposes all the internal matching engines used by the `regex` crate. The
idea is that the `regex` crate exposes a simple API for 99% of use cases, but
`regex-automata` exposes oodles of customizable behaviors.
[Documentation for `regex-automata`.](https://docs.rs/regex-automata)
### Usage: a regular expression parser
This repository contains a crate that provides a well tested regular expression
parser, abstract syntax and a high-level intermediate representation for
convenient analysis. It provides no facilities for compilation or execution.
This may be useful if you're implementing your own regex engine or otherwise
need to do analysis on the syntax of a regular expression. It is otherwise not
recommended for general use.
[Documentation for `regex-syntax`.](https://docs.rs/regex-syntax)
### Crate features
This crate comes with several features that permit tweaking the trade-off
between binary size, compilation time and runtime performance. Users of this
crate can selectively disable Unicode tables, or choose from a variety of
optimizations performed by this crate to disable.
When all of these features are disabled, runtime match performance may be much
worse, but if you're matching on short strings, or if high performance isn't
necessary, then such a configuration is perfectly serviceable. To disable
all such features, use the following `Cargo.toml` dependency configuration:
```toml
[dependencies.regex]
version = "1.3"
default-features = false
# Unless you have a specific reason not to, it's good sense to enable standard
# library support. It enables several optimizations and avoids spin locks. It
# also shouldn't meaningfully impact compile times or binary size.
features = ["std"]
```
This will reduce the dependency tree of `regex` down to two crates:
`regex-syntax` and `regex-automata`.
The full set of features one can disable are
[in the "Crate features" section of the documentation](https://docs.rs/regex/1.*/#crate-features).
### Performance
One of the goals of this crate is for the regex engine to be "fast." What that
is a somewhat nebulous goal, it is usually interpreted in one of two ways.
First, it means that all searches take worst case `O(m * n)` time, where
`m` is proportional to `len(regex)` and `n` is proportional to `len(haystack)`.
Second, it means that even aside from the time complexity constraint, regex
searches are "fast" in practice.
While the first interpretation is pretty unambiguous, the second one remains
nebulous. While nebulous, it guides this crate's architecture and the sorts of
the trade-offs it makes. For example, here are some general architectural
statements that follow as a result of the goal to be "fast":
* When given the choice between faster regex searches and faster _Rust compile
times_, this crate will generally choose faster regex searches.
* When given the choice between faster regex searches and faster _regex compile
times_, this crate will generally choose faster regex searches. That is, it is
generally acceptable for `Regex::new` to get a little slower if it means that
searches get faster. (This is a somewhat delicate balance to strike, because
the speed of `Regex::new` needs to remain somewhat reasonable. But this is why
one should avoid re-compiling the same regex over and over again.)
* When given the choice between faster regex searches and simpler API
design, this crate will generally choose faster regex searches. For example,
if one didn't care about performance, we could like get rid of both of
the `Regex::is_match` and `Regex::find` APIs and instead just rely on
`Regex::captures`.
There are perhaps more ways that being "fast" influences things.
While this repository used to provide its own benchmark suite, it has since
been moved to [rebar](https://github.com/BurntSushi/rebar). The benchmarks are
quite extensive, and there are many more than what is shown in rebar's README
(which is just limited to a "curated" set meant to compare performance between
regex engines). To run all of this crate's benchmarks, first start by cloning
and installing `rebar`:
```text
$ git clone https://github.com/BurntSushi/rebar
$ cd rebar
$ cargo install --path ./
```
Then build the benchmark harness for just this crate:
```text
$ rebar build -e '^rust/regex$'
```
Run all benchmarks for this crate as tests (each benchmark is executed once to
ensure it works):
```text
$ rebar measure -e '^rust/regex$' -t
```
Record measurements for all benchmarks and save them to a CSV file:
```text
$ rebar measure -e '^rust/regex$' | tee results.csv
```
Explore benchmark timings:
```text
$ rebar cmp results.csv
```
See the `rebar` documentation for more details on how it works and how to
compare results with other regex engines.
### Hacking
The `regex` crate is, for the most part, a pretty thin wrapper around the
[`meta::Regex`](https://docs.rs/regex-automata/latest/regex_automata/meta/struct.Regex.html)
from the
[`regex-automata` crate](https://docs.rs/regex-automata/latest/regex_automata/).
Therefore, if you're looking to work on the internals of this crate, you'll
likely either want to look in `regex-syntax` (for parsing) or `regex-automata`
(for construction of finite automata and the search routines).
My [blog on regex internals](https://burntsushi.net/regex-internals/)
goes into more depth.
### Minimum Rust version policy
This crate's minimum supported `rustc` version is `1.65.0`.
The policy is that the minimum Rust version required to use this crate can be
increased in minor version updates. For example, if regex 1.0 requires Rust
1.20.0, then regex 1.0.z for all values of `z` will also require Rust 1.20.0 or
newer. However, regex 1.y for `y > 0` may require a newer minimum version of
Rust.
### License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
https://www.apache.org/licenses/LICENSE-2.0)
* MIT license ([LICENSE-MIT](LICENSE-MIT) or
https://opensource.org/licenses/MIT)
at your option.
The data in `regex-syntax/src/unicode_tables/` is licensed under the Unicode
License Agreement
([LICENSE-UNICODE](https://www.unicode.org/copyright.html#License)).

258
vendor/regex/UNICODE.md vendored Normal file
View File

@@ -0,0 +1,258 @@
# Unicode conformance
This document describes the regex crate's conformance to Unicode's
[UTS#18](https://unicode.org/reports/tr18/)
report, which lays out 3 levels of support: Basic, Extended and Tailored.
Full support for Level 1 ("Basic Unicode Support") is provided with two
exceptions:
1. Line boundaries are not Unicode aware. Namely, only the `\n`
(`END OF LINE`) character is recognized as a line boundary by default.
One can opt into `\r\n|\r|\n` being a line boundary via CRLF mode.
2. The compatibility properties specified by
[RL1.2a](https://unicode.org/reports/tr18/#RL1.2a)
are ASCII-only definitions.
Little to no support is provided for either Level 2 or Level 3. For the most
part, this is because the features are either complex/hard to implement, or at
the very least, very difficult to implement without sacrificing performance.
For example, tackling canonical equivalence such that matching worked as one
would expect regardless of normalization form would be a significant
undertaking. This is at least partially a result of the fact that this regex
engine is based on finite automata, which admits less flexibility normally
associated with backtracking implementations.
## RL1.1 Hex Notation
[UTS#18 RL1.1](https://unicode.org/reports/tr18/#Hex_notation)
Hex Notation refers to the ability to specify a Unicode code point in a regular
expression via its hexadecimal code point representation. This is useful in
environments that have poor Unicode font rendering or if you need to express a
code point that is not normally displayable. All forms of hexadecimal notation
are supported
\x7F hex character code (exactly two digits)
\x{10FFFF} any hex character code corresponding to a Unicode code point
\u007F hex character code (exactly four digits)
\u{7F} any hex character code corresponding to a Unicode code point
\U0000007F hex character code (exactly eight digits)
\U{7F} any hex character code corresponding to a Unicode code point
Briefly, the `\x{...}`, `\u{...}` and `\U{...}` are all exactly equivalent ways
of expressing hexadecimal code points. Any number of digits can be written
within the brackets. In contrast, `\xNN`, `\uNNNN`, `\UNNNNNNNN` are all
fixed-width variants of the same idea.
Note that when Unicode mode is disabled, any non-ASCII Unicode codepoint is
banned. Additionally, the `\xNN` syntax represents arbitrary bytes when Unicode
mode is disabled. That is, the regex `\xFF` matches the Unicode codepoint
U+00FF (encoded as `\xC3\xBF` in UTF-8) while the regex `(?-u)\xFF` matches
the literal byte `\xFF`.
## RL1.2 Properties
[UTS#18 RL1.2](https://unicode.org/reports/tr18/#Categories)
Full support for Unicode property syntax is provided. Unicode properties
provide a convenient way to construct character classes of groups of code
points specified by Unicode. The regex crate does not provide exhaustive
support, but covers a useful subset. In particular:
* [General categories](https://unicode.org/reports/tr18/#General_Category_Property)
* [Scripts and Script Extensions](https://unicode.org/reports/tr18/#Script_Property)
* [Age](https://unicode.org/reports/tr18/#Age)
* A smattering of boolean properties, including all of those specified by
[RL1.2](https://unicode.org/reports/tr18/#RL1.2) explicitly.
In all cases, property name and value abbreviations are supported, and all
names/values are matched loosely without regard for case, whitespace or
underscores. Property name aliases can be found in Unicode's
[`PropertyAliases.txt`](https://www.unicode.org/Public/UCD/latest/ucd/PropertyAliases.txt)
file, while property value aliases can be found in Unicode's
[`PropertyValueAliases.txt`](https://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt)
file.
The syntax supported is also consistent with the UTS#18 recommendation:
* `\p{Greek}` selects the `Greek` script. Equivalent expressions follow:
`\p{sc:Greek}`, `\p{Script:Greek}`, `\p{Sc=Greek}`, `\p{script=Greek}`,
`\P{sc!=Greek}`. Similarly for `General_Category` (or `gc` for short) and
`Script_Extensions` (or `scx` for short).
* `\p{age:3.2}` selects all code points in Unicode 3.2.
* `\p{Alphabetic}` selects the "alphabetic" property and can be abbreviated
via `\p{alpha}` (for example).
* Single letter variants for properties with single letter abbreviations.
For example, `\p{Letter}` can be equivalently written as `\pL`.
The following is a list of all properties supported by the regex crate (starred
properties correspond to properties required by RL1.2):
* `General_Category` \* (including `Any`, `ASCII` and `Assigned`)
* `Script` \*
* `Script_Extensions` \*
* `Age`
* `ASCII_Hex_Digit`
* `Alphabetic` \*
* `Bidi_Control`
* `Case_Ignorable`
* `Cased`
* `Changes_When_Casefolded`
* `Changes_When_Casemapped`
* `Changes_When_Lowercased`
* `Changes_When_Titlecased`
* `Changes_When_Uppercased`
* `Dash`
* `Default_Ignorable_Code_Point` \*
* `Deprecated`
* `Diacritic`
* `Emoji`
* `Emoji_Presentation`
* `Emoji_Modifier`
* `Emoji_Modifier_Base`
* `Emoji_Component`
* `Extended_Pictographic`
* `Extender`
* `Grapheme_Base`
* `Grapheme_Cluster_Break`
* `Grapheme_Extend`
* `Hex_Digit`
* `IDS_Binary_Operator`
* `IDS_Trinary_Operator`
* `ID_Continue`
* `ID_Start`
* `Join_Control`
* `Logical_Order_Exception`
* `Lowercase` \*
* `Math`
* `Noncharacter_Code_Point` \*
* `Pattern_Syntax`
* `Pattern_White_Space`
* `Prepended_Concatenation_Mark`
* `Quotation_Mark`
* `Radical`
* `Regional_Indicator`
* `Sentence_Break`
* `Sentence_Terminal`
* `Soft_Dotted`
* `Terminal_Punctuation`
* `Unified_Ideograph`
* `Uppercase` \*
* `Variation_Selector`
* `White_Space` \*
* `Word_Break`
* `XID_Continue`
* `XID_Start`
## RL1.2a Compatibility Properties
[UTS#18 RL1.2a](https://unicode.org/reports/tr18/#RL1.2a)
The regex crate only provides ASCII definitions of the
[compatibility properties documented in UTS#18 Annex C](https://unicode.org/reports/tr18/#Compatibility_Properties)
(sans the `\X` class, for matching grapheme clusters, which isn't provided
at all). This is because it seems to be consistent with most other regular
expression engines, and in particular, because these are often referred to as
"ASCII" or "POSIX" character classes.
Note that the `\w`, `\s` and `\d` character classes **are** Unicode aware.
Their traditional ASCII definition can be used by disabling Unicode. That is,
`[[:word:]]` and `(?-u)\w` are equivalent.
## RL1.3 Subtraction and Intersection
[UTS#18 RL1.3](https://unicode.org/reports/tr18/#Subtraction_and_Intersection)
The regex crate provides full support for nested character classes, along with
union, intersection (`&&`), difference (`--`) and symmetric difference (`~~`)
operations on arbitrary character classes.
For example, to match all non-ASCII letters, you could use either
`[\p{Letter}--\p{Ascii}]` (difference) or `[\p{Letter}&&[^\p{Ascii}]]`
(intersecting the negation).
## RL1.4 Simple Word Boundaries
[UTS#18 RL1.4](https://unicode.org/reports/tr18/#Simple_Word_Boundaries)
The regex crate provides basic Unicode aware word boundary assertions. A word
boundary assertion can be written as `\b`, or `\B` as its negation. A word
boundary negation corresponds to a zero-width match, where its adjacent
characters correspond to word and non-word, or non-word and word characters.
Conformance in this case chooses to define word character in the same way that
the `\w` character class is defined: a code point that is a member of one of
the following classes:
* `\p{Alphabetic}`
* `\p{Join_Control}`
* `\p{gc:Mark}`
* `\p{gc:Decimal_Number}`
* `\p{gc:Connector_Punctuation}`
In particular, this differs slightly from the
[prescription given in RL1.4](https://unicode.org/reports/tr18/#Simple_Word_Boundaries)
but is permissible according to
[UTS#18 Annex C](https://unicode.org/reports/tr18/#Compatibility_Properties).
Namely, it is convenient and simpler to have `\w` and `\b` be in sync with
one another.
Finally, Unicode word boundaries can be disabled, which will cause ASCII word
boundaries to be used instead. That is, `\b` is a Unicode word boundary while
`(?-u)\b` is an ASCII-only word boundary. This can occasionally be beneficial
if performance is important, since the implementation of Unicode word
boundaries is currently suboptimal on non-ASCII text.
## RL1.5 Simple Loose Matches
[UTS#18 RL1.5](https://unicode.org/reports/tr18/#Simple_Loose_Matches)
The regex crate provides full support for case-insensitive matching in
accordance with RL1.5. That is, it uses the "simple" case folding mapping. The
"simple" mapping was chosen because of a key convenient property: every
"simple" mapping is a mapping from exactly one code point to exactly one other
code point. This makes case-insensitive matching of character classes, for
example, straight-forward to implement.
When case-insensitive mode is enabled (e.g., `(?i)[a]` is equivalent to `a|A`),
then all characters classes are case folded as well.
## RL1.6 Line Boundaries
[UTS#18 RL1.6](https://unicode.org/reports/tr18/#Line_Boundaries)
The regex crate only provides support for recognizing the `\n` (`END OF LINE`)
character as a line boundary by default. One can also opt into treating
`\r\n|\r|\n` as a line boundary via CRLF mode. This choice was made mostly for
implementation convenience, and to avoid performance cliffs that Unicode word
boundaries are subject to.
## RL1.7 Code Points
[UTS#18 RL1.7](https://unicode.org/reports/tr18/#Supplementary_Characters)
The regex crate provides full support for Unicode code point matching. Namely,
the fundamental atom of any match is always a single code point.
Given Rust's strong ties to UTF-8, the following guarantees are also provided:
* All matches are reported on valid UTF-8 code unit boundaries. That is, any
match range returned by the public regex API is guaranteed to successfully
slice the string that was searched.
* By consequence of the above, it is impossible to match surrogate code points.
No support for UTF-16 is provided, so this is never necessary.
Note that when Unicode mode is disabled, the fundamental atom of matching is
no longer a code point but a single byte. When Unicode mode is disabled, many
Unicode features are disabled as well. For example, `(?-u)\pL` is not a valid
regex but `\pL(?-u)\xFF` (matches any Unicode `Letter` followed by the literal
byte `\xFF`) is, for example.

2
vendor/regex/bench/README.md vendored Normal file
View File

@@ -0,0 +1,2 @@
Benchmarks for this crate have been moved into the rebar project:
https://github.com/BurntSushi/rebar

2
vendor/regex/rustfmt.toml vendored Normal file
View File

@@ -0,0 +1,2 @@
max_width = 79
use_small_heuristics = "max"

2539
vendor/regex/src/builders.rs vendored Normal file

File diff suppressed because it is too large Load Diff

91
vendor/regex/src/bytes.rs vendored Normal file
View File

@@ -0,0 +1,91 @@
/*!
Search for regex matches in `&[u8]` haystacks.
This module provides a nearly identical API via [`Regex`] to the one found in
the top-level of this crate. There are two important differences:
1. Matching is done on `&[u8]` instead of `&str`. Additionally, `Vec<u8>`
is used where `String` would have been used in the top-level API.
2. Unicode support can be disabled even when disabling it would result in
matching invalid UTF-8 bytes.
# Example: match null terminated string
This shows how to find all null-terminated strings in a slice of bytes. This
works even if a C string contains invalid UTF-8.
```rust
use regex::bytes::Regex;
let re = Regex::new(r"(?-u)(?<cstr>[^\x00]+)\x00").unwrap();
let hay = b"foo\x00qu\xFFux\x00baz\x00";
// Extract all of the strings without the NUL terminator from each match.
// The unwrap is OK here since a match requires the `cstr` capture to match.
let cstrs: Vec<&[u8]> =
re.captures_iter(hay)
.map(|c| c.name("cstr").unwrap().as_bytes())
.collect();
assert_eq!(cstrs, vec![&b"foo"[..], &b"qu\xFFux"[..], &b"baz"[..]]);
```
# Example: selectively enable Unicode support
This shows how to match an arbitrary byte pattern followed by a UTF-8 encoded
string (e.g., to extract a title from a Matroska file):
```rust
use regex::bytes::Regex;
let re = Regex::new(
r"(?-u)\x7b\xa9(?:[\x80-\xfe]|[\x40-\xff].)(?u:(.*))"
).unwrap();
let hay = b"\x12\xd0\x3b\x5f\x7b\xa9\x85\xe2\x98\x83\x80\x98\x54\x76\x68\x65";
// Notice that despite the `.*` at the end, it will only match valid UTF-8
// because Unicode mode was enabled with the `u` flag. Without the `u` flag,
// the `.*` would match the rest of the bytes regardless of whether they were
// valid UTF-8.
let (_, [title]) = re.captures(hay).unwrap().extract();
assert_eq!(title, b"\xE2\x98\x83");
// We can UTF-8 decode the title now. And the unwrap here
// is correct because the existence of a match guarantees
// that `title` is valid UTF-8.
let title = std::str::from_utf8(title).unwrap();
assert_eq!(title, "☃");
```
In general, if the Unicode flag is enabled in a capture group and that capture
is part of the overall match, then the capture is *guaranteed* to be valid
UTF-8.
# Syntax
The supported syntax is pretty much the same as the syntax for Unicode
regular expressions with a few changes that make sense for matching arbitrary
bytes:
1. The `u` flag can be disabled even when disabling it might cause the regex to
match invalid UTF-8. When the `u` flag is disabled, the regex is said to be in
"ASCII compatible" mode.
2. In ASCII compatible mode, Unicode character classes are not allowed. Literal
Unicode scalar values outside of character classes are allowed.
3. In ASCII compatible mode, Perl character classes (`\w`, `\d` and `\s`)
revert to their typical ASCII definition. `\w` maps to `[[:word:]]`, `\d` maps
to `[[:digit:]]` and `\s` maps to `[[:space:]]`.
4. In ASCII compatible mode, word boundaries use the ASCII compatible `\w` to
determine whether a byte is a word byte or not.
5. Hexadecimal notation can be used to specify arbitrary bytes instead of
Unicode codepoints. For example, in ASCII compatible mode, `\xFF` matches the
literal byte `\xFF`, while in Unicode mode, `\xFF` is the Unicode codepoint
`U+00FF` that matches its UTF-8 encoding of `\xC3\xBF`. Similarly for octal
notation when enabled.
6. In ASCII compatible mode, `.` matches any *byte* except for `\n`. When the
`s` flag is additionally enabled, `.` matches any byte.
# Performance
In general, one should expect performance on `&[u8]` to be roughly similar to
performance on `&str`.
*/
pub use crate::{builders::bytes::*, regex::bytes::*, regexset::bytes::*};

101
vendor/regex/src/error.rs vendored Normal file
View File

@@ -0,0 +1,101 @@
use alloc::string::{String, ToString};
use regex_automata::meta;
/// An error that occurred during parsing or compiling a regular expression.
#[non_exhaustive]
#[derive(Clone, PartialEq)]
pub enum Error {
/// A syntax error.
Syntax(String),
/// The compiled program exceeded the set size
/// limit. The argument is the size limit imposed by
/// [`RegexBuilder::size_limit`](crate::RegexBuilder::size_limit). Even
/// when not configured explicitly, it defaults to a reasonable limit.
///
/// If you're getting this error, it occurred because your regex has been
/// compiled to an intermediate state that is too big. It is important to
/// note that exceeding this limit does _not_ mean the regex is too big to
/// _work_, but rather, the regex is big enough that it may wind up being
/// surprisingly slow when used in a search. In other words, this error is
/// meant to be a practical heuristic for avoiding a performance footgun,
/// and especially so for the case where the regex pattern is coming from
/// an untrusted source.
///
/// There are generally two ways to move forward if you hit this error.
/// The first is to find some way to use a smaller regex. The second is to
/// increase the size limit via `RegexBuilder::size_limit`. However, if
/// your regex pattern is not from a trusted source, then neither of these
/// approaches may be appropriate. Instead, you'll have to determine just
/// how big of a regex you want to allow.
CompiledTooBig(usize),
}
impl Error {
pub(crate) fn from_meta_build_error(err: meta::BuildError) -> Error {
if let Some(size_limit) = err.size_limit() {
Error::CompiledTooBig(size_limit)
} else if let Some(ref err) = err.syntax_error() {
Error::Syntax(err.to_string())
} else {
// This is a little suspect. Technically there are more ways for
// a meta regex to fail to build other than "exceeded size limit"
// and "syntax error." For example, if there are too many states
// or even too many patterns. But in practice this is probably
// good enough. The worst thing that happens is that Error::Syntax
// represents an error that isn't technically a syntax error, but
// the actual message will still be shown. So... it's not too bad.
//
// We really should have made the Error type in the regex crate
// completely opaque. Rookie mistake.
Error::Syntax(err.to_string())
}
}
}
#[cfg(feature = "std")]
impl std::error::Error for Error {
// TODO: Remove this method entirely on the next breaking semver release.
#[allow(deprecated)]
fn description(&self) -> &str {
match *self {
Error::Syntax(ref err) => err,
Error::CompiledTooBig(_) => "compiled program too big",
}
}
}
impl core::fmt::Display for Error {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match *self {
Error::Syntax(ref err) => err.fmt(f),
Error::CompiledTooBig(limit) => write!(
f,
"Compiled regex exceeds size limit of {limit} bytes."
),
}
}
}
// We implement our own Debug implementation so that we show nicer syntax
// errors when people use `Regex::new(...).unwrap()`. It's a little weird,
// but the `Syntax` variant is already storing a `String` anyway, so we might
// as well format it nicely.
impl core::fmt::Debug for Error {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match *self {
Error::Syntax(ref err) => {
let hr: String = core::iter::repeat('~').take(79).collect();
writeln!(f, "Syntax(")?;
writeln!(f, "{hr}")?;
writeln!(f, "{err}")?;
writeln!(f, "{hr}")?;
write!(f, ")")?;
Ok(())
}
Error::CompiledTooBig(limit) => {
f.debug_tuple("CompiledTooBig").field(&limit).finish()
}
}
}
}

17
vendor/regex/src/find_byte.rs vendored Normal file
View File

@@ -0,0 +1,17 @@
/// Searches for the given needle in the given haystack.
///
/// If the perf-literal feature is enabled, then this uses the super optimized
/// memchr crate. Otherwise, it uses the naive byte-at-a-time implementation.
pub(crate) fn find_byte(needle: u8, haystack: &[u8]) -> Option<usize> {
#[cfg(not(feature = "perf-literal"))]
fn imp(needle: u8, haystack: &[u8]) -> Option<usize> {
haystack.iter().position(|&b| b == needle)
}
#[cfg(feature = "perf-literal")]
fn imp(needle: u8, haystack: &[u8]) -> Option<usize> {
memchr::memchr(needle, haystack)
}
imp(needle, haystack)
}

1350
vendor/regex/src/lib.rs vendored Normal file

File diff suppressed because it is too large Load Diff

67
vendor/regex/src/pattern.rs vendored Normal file
View File

@@ -0,0 +1,67 @@
use core::str::pattern::{Pattern, SearchStep, Searcher, Utf8Pattern};
use crate::{Matches, Regex};
#[derive(Debug)]
pub struct RegexSearcher<'r, 't> {
haystack: &'t str,
it: Matches<'r, 't>,
last_step_end: usize,
next_match: Option<(usize, usize)>,
}
impl<'r> Pattern for &'r Regex {
type Searcher<'t> = RegexSearcher<'r, 't>;
fn into_searcher<'t>(self, haystack: &'t str) -> RegexSearcher<'r, 't> {
RegexSearcher {
haystack,
it: self.find_iter(haystack),
last_step_end: 0,
next_match: None,
}
}
fn as_utf8_pattern<'p>(&'p self) -> Option<Utf8Pattern<'p>> {
None
}
}
unsafe impl<'r, 't> Searcher<'t> for RegexSearcher<'r, 't> {
#[inline]
fn haystack(&self) -> &'t str {
self.haystack
}
#[inline]
fn next(&mut self) -> SearchStep {
if let Some((s, e)) = self.next_match {
self.next_match = None;
self.last_step_end = e;
return SearchStep::Match(s, e);
}
match self.it.next() {
None => {
if self.last_step_end < self.haystack().len() {
let last = self.last_step_end;
self.last_step_end = self.haystack().len();
SearchStep::Reject(last, self.haystack().len())
} else {
SearchStep::Done
}
}
Some(m) => {
let (s, e) = (m.start(), m.end());
if s == self.last_step_end {
self.last_step_end = e;
SearchStep::Match(s, e)
} else {
self.next_match = Some((s, e));
let last = self.last_step_end;
self.last_step_end = s;
SearchStep::Reject(last, s)
}
}
}
}
}

2702
vendor/regex/src/regex/bytes.rs vendored Normal file

File diff suppressed because it is too large Load Diff

2
vendor/regex/src/regex/mod.rs vendored Normal file
View File

@@ -0,0 +1,2 @@
pub(crate) mod bytes;
pub(crate) mod string;

2604
vendor/regex/src/regex/string.rs vendored Normal file

File diff suppressed because it is too large Load Diff

728
vendor/regex/src/regexset/bytes.rs vendored Normal file
View File

@@ -0,0 +1,728 @@
use alloc::string::String;
use regex_automata::{meta, Input, PatternID, PatternSet, PatternSetIter};
use crate::{bytes::RegexSetBuilder, Error};
/// Match multiple, possibly overlapping, regexes in a single search.
///
/// A regex set corresponds to the union of zero or more regular expressions.
/// That is, a regex set will match a haystack when at least one of its
/// constituent regexes matches. A regex set as its formulated here provides a
/// touch more power: it will also report *which* regular expressions in the
/// set match. Indeed, this is the key difference between regex sets and a
/// single `Regex` with many alternates, since only one alternate can match at
/// a time.
///
/// For example, consider regular expressions to match email addresses and
/// domains: `[a-z]+@[a-z]+\.(com|org|net)` and `[a-z]+\.(com|org|net)`. If a
/// regex set is constructed from those regexes, then searching the haystack
/// `foo@example.com` will report both regexes as matching. Of course, one
/// could accomplish this by compiling each regex on its own and doing two
/// searches over the haystack. The key advantage of using a regex set is
/// that it will report the matching regexes using a *single pass through the
/// haystack*. If one has hundreds or thousands of regexes to match repeatedly
/// (like a URL router for a complex web application or a user agent matcher),
/// then a regex set *can* realize huge performance gains.
///
/// Unlike the top-level [`RegexSet`](crate::RegexSet), this `RegexSet`
/// searches haystacks with type `&[u8]` instead of `&str`. Consequently, this
/// `RegexSet` is permitted to match invalid UTF-8.
///
/// # Limitations
///
/// Regex sets are limited to answering the following two questions:
///
/// 1. Does any regex in the set match?
/// 2. If so, which regexes in the set match?
///
/// As with the main [`Regex`][crate::bytes::Regex] type, it is cheaper to ask
/// (1) instead of (2) since the matching engines can stop after the first
/// match is found.
///
/// You cannot directly extract [`Match`][crate::bytes::Match] or
/// [`Captures`][crate::bytes::Captures] objects from a regex set. If you need
/// these operations, the recommended approach is to compile each pattern in
/// the set independently and scan the exact same haystack a second time with
/// those independently compiled patterns:
///
/// ```
/// use regex::bytes::{Regex, RegexSet};
///
/// let patterns = ["foo", "bar"];
/// // Both patterns will match different ranges of this string.
/// let hay = b"barfoo";
///
/// // Compile a set matching any of our patterns.
/// let set = RegexSet::new(patterns).unwrap();
/// // Compile each pattern independently.
/// let regexes: Vec<_> = set
/// .patterns()
/// .iter()
/// .map(|pat| Regex::new(pat).unwrap())
/// .collect();
///
/// // Match against the whole set first and identify the individual
/// // matching patterns.
/// let matches: Vec<&[u8]> = set
/// .matches(hay)
/// .into_iter()
/// // Dereference the match index to get the corresponding
/// // compiled pattern.
/// .map(|index| &regexes[index])
/// // To get match locations or any other info, we then have to search the
/// // exact same haystack again, using our separately-compiled pattern.
/// .map(|re| re.find(hay).unwrap().as_bytes())
/// .collect();
///
/// // Matches arrive in the order the constituent patterns were declared,
/// // not the order they appear in the haystack.
/// assert_eq!(vec![&b"foo"[..], &b"bar"[..]], matches);
/// ```
///
/// # Performance
///
/// A `RegexSet` has the same performance characteristics as `Regex`. Namely,
/// search takes `O(m * n)` time, where `m` is proportional to the size of the
/// regex set and `n` is proportional to the length of the haystack.
///
/// # Trait implementations
///
/// The `Default` trait is implemented for `RegexSet`. The default value
/// is an empty set. An empty set can also be explicitly constructed via
/// [`RegexSet::empty`].
///
/// # Example
///
/// This shows how the above two regexes (for matching email addresses and
/// domains) might work:
///
/// ```
/// use regex::bytes::RegexSet;
///
/// let set = RegexSet::new(&[
/// r"[a-z]+@[a-z]+\.(com|org|net)",
/// r"[a-z]+\.(com|org|net)",
/// ]).unwrap();
///
/// // Ask whether any regexes in the set match.
/// assert!(set.is_match(b"foo@example.com"));
///
/// // Identify which regexes in the set match.
/// let matches: Vec<_> = set.matches(b"foo@example.com").into_iter().collect();
/// assert_eq!(vec![0, 1], matches);
///
/// // Try again, but with a haystack that only matches one of the regexes.
/// let matches: Vec<_> = set.matches(b"example.com").into_iter().collect();
/// assert_eq!(vec![1], matches);
///
/// // Try again, but with a haystack that doesn't match any regex in the set.
/// let matches: Vec<_> = set.matches(b"example").into_iter().collect();
/// assert!(matches.is_empty());
/// ```
///
/// Note that it would be possible to adapt the above example to using `Regex`
/// with an expression like:
///
/// ```text
/// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net))
/// ```
///
/// After a match, one could then inspect the capture groups to figure out
/// which alternates matched. The problem is that it is hard to make this
/// approach scale when there are many regexes since the overlap between each
/// alternate isn't always obvious to reason about.
#[derive(Clone)]
pub struct RegexSet {
pub(crate) meta: meta::Regex,
pub(crate) patterns: alloc::sync::Arc<[String]>,
}
impl RegexSet {
/// Create a new regex set with the given regular expressions.
///
/// This takes an iterator of `S`, where `S` is something that can produce
/// a `&str`. If any of the strings in the iterator are not valid regular
/// expressions, then an error is returned.
///
/// # Example
///
/// Create a new regex set from an iterator of strings:
///
/// ```
/// use regex::bytes::RegexSet;
///
/// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap();
/// assert!(set.is_match(b"foo"));
/// ```
pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error>
where
S: AsRef<str>,
I: IntoIterator<Item = S>,
{
RegexSetBuilder::new(exprs).build()
}
/// Create a new empty regex set.
///
/// An empty regex never matches anything.
///
/// This is a convenience function for `RegexSet::new([])`, but doesn't
/// require one to specify the type of the input.
///
/// # Example
///
/// ```
/// use regex::bytes::RegexSet;
///
/// let set = RegexSet::empty();
/// assert!(set.is_empty());
/// // an empty set matches nothing
/// assert!(!set.is_match(b""));
/// ```
pub fn empty() -> RegexSet {
let empty: [&str; 0] = [];
RegexSetBuilder::new(empty).build().unwrap()
}
/// Returns true if and only if one of the regexes in this set matches
/// the haystack given.
///
/// This method should be preferred if you only need to test whether any
/// of the regexes in the set should match, but don't care about *which*
/// regexes matched. This is because the underlying matching engine will
/// quit immediately after seeing the first match instead of continuing to
/// find all matches.
///
/// Note that as with searches using [`Regex`](crate::bytes::Regex), the
/// expression is unanchored by default. That is, if the regex does not
/// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted
/// to match anywhere in the haystack.
///
/// # Example
///
/// Tests whether a set matches somewhere in a haystack:
///
/// ```
/// use regex::bytes::RegexSet;
///
/// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap();
/// assert!(set.is_match(b"foo"));
/// assert!(!set.is_match("☃".as_bytes()));
/// ```
#[inline]
pub fn is_match(&self, haystack: &[u8]) -> bool {
self.is_match_at(haystack, 0)
}
/// Returns true if and only if one of the regexes in this set matches the
/// haystack given, with the search starting at the offset given.
///
/// The significance of the starting point is that it takes the surrounding
/// context into consideration. For example, the `\A` anchor can only
/// match when `start == 0`.
///
/// # Panics
///
/// This panics when `start >= haystack.len() + 1`.
///
/// # Example
///
/// This example shows the significance of `start`. Namely, consider a
/// haystack `foobar` and a desire to execute a search starting at offset
/// `3`. You could search a substring explicitly, but then the look-around
/// assertions won't work correctly. Instead, you can use this method to
/// specify the start position of a search.
///
/// ```
/// use regex::bytes::RegexSet;
///
/// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap();
/// let hay = b"foobar";
/// // We get a match here, but it's probably not intended.
/// assert!(set.is_match(&hay[3..]));
/// // No match because the assertions take the context into account.
/// assert!(!set.is_match_at(hay, 3));
/// ```
#[inline]
pub fn is_match_at(&self, haystack: &[u8], start: usize) -> bool {
self.meta.is_match(Input::new(haystack).span(start..haystack.len()))
}
/// Returns the set of regexes that match in the given haystack.
///
/// The set returned contains the index of each regex that matches in
/// the given haystack. The index is in correspondence with the order of
/// regular expressions given to `RegexSet`'s constructor.
///
/// The set can also be used to iterate over the matched indices. The order
/// of iteration is always ascending with respect to the matching indices.
///
/// Note that as with searches using [`Regex`](crate::bytes::Regex), the
/// expression is unanchored by default. That is, if the regex does not
/// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted
/// to match anywhere in the haystack.
///
/// # Example
///
/// Tests which regular expressions match the given haystack:
///
/// ```
/// use regex::bytes::RegexSet;
///
/// let set = RegexSet::new([
/// r"\w+",
/// r"\d+",
/// r"\pL+",
/// r"foo",
/// r"bar",
/// r"barfoo",
/// r"foobar",
/// ]).unwrap();
/// let matches: Vec<_> = set.matches(b"foobar").into_iter().collect();
/// assert_eq!(matches, vec![0, 2, 3, 4, 6]);
///
/// // You can also test whether a particular regex matched:
/// let matches = set.matches(b"foobar");
/// assert!(!matches.matched(5));
/// assert!(matches.matched(6));
/// ```
#[inline]
pub fn matches(&self, haystack: &[u8]) -> SetMatches {
self.matches_at(haystack, 0)
}
/// Returns the set of regexes that match in the given haystack.
///
/// The set returned contains the index of each regex that matches in
/// the given haystack. The index is in correspondence with the order of
/// regular expressions given to `RegexSet`'s constructor.
///
/// The set can also be used to iterate over the matched indices. The order
/// of iteration is always ascending with respect to the matching indices.
///
/// The significance of the starting point is that it takes the surrounding
/// context into consideration. For example, the `\A` anchor can only
/// match when `start == 0`.
///
/// # Panics
///
/// This panics when `start >= haystack.len() + 1`.
///
/// # Example
///
/// Tests which regular expressions match the given haystack:
///
/// ```
/// use regex::bytes::RegexSet;
///
/// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap();
/// let hay = b"foobar";
/// // We get matches here, but it's probably not intended.
/// let matches: Vec<_> = set.matches(&hay[3..]).into_iter().collect();
/// assert_eq!(matches, vec![0, 1]);
/// // No matches because the assertions take the context into account.
/// let matches: Vec<_> = set.matches_at(hay, 3).into_iter().collect();
/// assert_eq!(matches, vec![]);
/// ```
#[inline]
pub fn matches_at(&self, haystack: &[u8], start: usize) -> SetMatches {
let input = Input::new(haystack).span(start..haystack.len());
let mut patset = PatternSet::new(self.meta.pattern_len());
self.meta.which_overlapping_matches(&input, &mut patset);
SetMatches(patset)
}
/// Returns the same as matches, but starts the search at the given
/// offset and stores the matches into the slice given.
///
/// The significance of the starting point is that it takes the surrounding
/// context into consideration. For example, the `\A` anchor can only
/// match when `start == 0`.
///
/// `matches` must have a length that is at least the number of regexes
/// in this set.
///
/// This method returns true if and only if at least one member of
/// `matches` is true after executing the set against `haystack`.
#[doc(hidden)]
#[inline]
pub fn matches_read_at(
&self,
matches: &mut [bool],
haystack: &[u8],
start: usize,
) -> bool {
// This is pretty dumb. We should try to fix this, but the
// regex-automata API doesn't provide a way to store matches in an
// arbitrary &mut [bool]. Thankfully, this API is doc(hidden) and
// thus not public... But regex-capi currently uses it. We should
// fix regex-capi to use a PatternSet, maybe? Not sure... PatternSet
// is in regex-automata, not regex. So maybe we should just accept a
// 'SetMatches', which is basically just a newtype around PatternSet.
let mut patset = PatternSet::new(self.meta.pattern_len());
let mut input = Input::new(haystack);
input.set_start(start);
self.meta.which_overlapping_matches(&input, &mut patset);
for pid in patset.iter() {
matches[pid] = true;
}
!patset.is_empty()
}
/// An alias for `matches_read_at` to preserve backward compatibility.
///
/// The `regex-capi` crate used this method, so to avoid breaking that
/// crate, we continue to export it as an undocumented API.
#[doc(hidden)]
#[inline]
pub fn read_matches_at(
&self,
matches: &mut [bool],
haystack: &[u8],
start: usize,
) -> bool {
self.matches_read_at(matches, haystack, start)
}
/// Returns the total number of regexes in this set.
///
/// # Example
///
/// ```
/// use regex::bytes::RegexSet;
///
/// assert_eq!(0, RegexSet::empty().len());
/// assert_eq!(1, RegexSet::new([r"[0-9]"]).unwrap().len());
/// assert_eq!(2, RegexSet::new([r"[0-9]", r"[a-z]"]).unwrap().len());
/// ```
#[inline]
pub fn len(&self) -> usize {
self.meta.pattern_len()
}
/// Returns `true` if this set contains no regexes.
///
/// # Example
///
/// ```
/// use regex::bytes::RegexSet;
///
/// assert!(RegexSet::empty().is_empty());
/// assert!(!RegexSet::new([r"[0-9]"]).unwrap().is_empty());
/// ```
#[inline]
pub fn is_empty(&self) -> bool {
self.meta.pattern_len() == 0
}
/// Returns the regex patterns that this regex set was constructed from.
///
/// This function can be used to determine the pattern for a match. The
/// slice returned has exactly as many patterns givens to this regex set,
/// and the order of the slice is the same as the order of the patterns
/// provided to the set.
///
/// # Example
///
/// ```
/// use regex::bytes::RegexSet;
///
/// let set = RegexSet::new(&[
/// r"\w+",
/// r"\d+",
/// r"\pL+",
/// r"foo",
/// r"bar",
/// r"barfoo",
/// r"foobar",
/// ]).unwrap();
/// let matches: Vec<_> = set
/// .matches(b"foobar")
/// .into_iter()
/// .map(|index| &set.patterns()[index])
/// .collect();
/// assert_eq!(matches, vec![r"\w+", r"\pL+", r"foo", r"bar", r"foobar"]);
/// ```
#[inline]
pub fn patterns(&self) -> &[String] {
&self.patterns
}
}
impl Default for RegexSet {
fn default() -> Self {
RegexSet::empty()
}
}
/// A set of matches returned by a regex set.
///
/// Values of this type are constructed by [`RegexSet::matches`].
#[derive(Clone, Debug)]
pub struct SetMatches(PatternSet);
impl SetMatches {
/// Whether this set contains any matches.
///
/// # Example
///
/// ```
/// use regex::bytes::RegexSet;
///
/// let set = RegexSet::new(&[
/// r"[a-z]+@[a-z]+\.(com|org|net)",
/// r"[a-z]+\.(com|org|net)",
/// ]).unwrap();
/// let matches = set.matches(b"foo@example.com");
/// assert!(matches.matched_any());
/// ```
#[inline]
pub fn matched_any(&self) -> bool {
!self.0.is_empty()
}
/// Whether all patterns in this set matched.
///
/// # Example
///
/// ```
/// use regex::bytes::RegexSet;
///
/// let set = RegexSet::new(&[
/// r"^foo",
/// r"[a-z]+\.com",
/// ]).unwrap();
/// let matches = set.matches(b"foo.example.com");
/// assert!(matches.matched_all());
/// ```
pub fn matched_all(&self) -> bool {
self.0.is_full()
}
/// Whether the regex at the given index matched.
///
/// The index for a regex is determined by its insertion order upon the
/// initial construction of a `RegexSet`, starting at `0`.
///
/// # Panics
///
/// If `index` is greater than or equal to the number of regexes in the
/// original set that produced these matches. Equivalently, when `index`
/// is greater than or equal to [`SetMatches::len`].
///
/// # Example
///
/// ```
/// use regex::bytes::RegexSet;
///
/// let set = RegexSet::new([
/// r"[a-z]+@[a-z]+\.(com|org|net)",
/// r"[a-z]+\.(com|org|net)",
/// ]).unwrap();
/// let matches = set.matches(b"example.com");
/// assert!(!matches.matched(0));
/// assert!(matches.matched(1));
/// ```
#[inline]
pub fn matched(&self, index: usize) -> bool {
self.0.contains(PatternID::new_unchecked(index))
}
/// The total number of regexes in the set that created these matches.
///
/// **WARNING:** This always returns the same value as [`RegexSet::len`].
/// In particular, it does *not* return the number of elements yielded by
/// [`SetMatches::iter`]. The only way to determine the total number of
/// matched regexes is to iterate over them.
///
/// # Example
///
/// Notice that this method returns the total number of regexes in the
/// original set, and *not* the total number of regexes that matched.
///
/// ```
/// use regex::bytes::RegexSet;
///
/// let set = RegexSet::new([
/// r"[a-z]+@[a-z]+\.(com|org|net)",
/// r"[a-z]+\.(com|org|net)",
/// ]).unwrap();
/// let matches = set.matches(b"example.com");
/// // Total number of patterns that matched.
/// assert_eq!(1, matches.iter().count());
/// // Total number of patterns in the set.
/// assert_eq!(2, matches.len());
/// ```
#[inline]
pub fn len(&self) -> usize {
self.0.capacity()
}
/// Returns an iterator over the indices of the regexes that matched.
///
/// This will always produces matches in ascending order, where the index
/// yielded corresponds to the index of the regex that matched with respect
/// to its position when initially building the set.
///
/// # Example
///
/// ```
/// use regex::bytes::RegexSet;
///
/// let set = RegexSet::new([
/// r"[0-9]",
/// r"[a-z]",
/// r"[A-Z]",
/// r"\p{Greek}",
/// ]).unwrap();
/// let hay = "βa1".as_bytes();
/// let matches: Vec<_> = set.matches(hay).iter().collect();
/// assert_eq!(matches, vec![0, 1, 3]);
/// ```
///
/// Note that `SetMatches` also implements the `IntoIterator` trait, so
/// this method is not always needed. For example:
///
/// ```
/// use regex::bytes::RegexSet;
///
/// let set = RegexSet::new([
/// r"[0-9]",
/// r"[a-z]",
/// r"[A-Z]",
/// r"\p{Greek}",
/// ]).unwrap();
/// let hay = "βa1".as_bytes();
/// let mut matches = vec![];
/// for index in set.matches(hay) {
/// matches.push(index);
/// }
/// assert_eq!(matches, vec![0, 1, 3]);
/// ```
#[inline]
pub fn iter(&self) -> SetMatchesIter<'_> {
SetMatchesIter(self.0.iter())
}
}
impl IntoIterator for SetMatches {
type IntoIter = SetMatchesIntoIter;
type Item = usize;
fn into_iter(self) -> Self::IntoIter {
let it = 0..self.0.capacity();
SetMatchesIntoIter { patset: self.0, it }
}
}
impl<'a> IntoIterator for &'a SetMatches {
type IntoIter = SetMatchesIter<'a>;
type Item = usize;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
/// An owned iterator over the set of matches from a regex set.
///
/// This will always produces matches in ascending order of index, where the
/// index corresponds to the index of the regex that matched with respect to
/// its position when initially building the set.
///
/// This iterator is created by calling `SetMatches::into_iter` via the
/// `IntoIterator` trait. This is automatically done in `for` loops.
///
/// # Example
///
/// ```
/// use regex::bytes::RegexSet;
///
/// let set = RegexSet::new([
/// r"[0-9]",
/// r"[a-z]",
/// r"[A-Z]",
/// r"\p{Greek}",
/// ]).unwrap();
/// let hay = "βa1".as_bytes();
/// let mut matches = vec![];
/// for index in set.matches(hay) {
/// matches.push(index);
/// }
/// assert_eq!(matches, vec![0, 1, 3]);
/// ```
#[derive(Debug)]
pub struct SetMatchesIntoIter {
patset: PatternSet,
it: core::ops::Range<usize>,
}
impl Iterator for SetMatchesIntoIter {
type Item = usize;
fn next(&mut self) -> Option<usize> {
loop {
let id = self.it.next()?;
if self.patset.contains(PatternID::new_unchecked(id)) {
return Some(id);
}
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.it.size_hint()
}
}
impl DoubleEndedIterator for SetMatchesIntoIter {
fn next_back(&mut self) -> Option<usize> {
loop {
let id = self.it.next_back()?;
if self.patset.contains(PatternID::new_unchecked(id)) {
return Some(id);
}
}
}
}
impl core::iter::FusedIterator for SetMatchesIntoIter {}
/// A borrowed iterator over the set of matches from a regex set.
///
/// The lifetime `'a` refers to the lifetime of the [`SetMatches`] value that
/// created this iterator.
///
/// This will always produces matches in ascending order, where the index
/// corresponds to the index of the regex that matched with respect to its
/// position when initially building the set.
///
/// This iterator is created by the [`SetMatches::iter`] method.
#[derive(Clone, Debug)]
pub struct SetMatchesIter<'a>(PatternSetIter<'a>);
impl<'a> Iterator for SetMatchesIter<'a> {
type Item = usize;
fn next(&mut self) -> Option<usize> {
self.0.next().map(|pid| pid.as_usize())
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
}
impl<'a> DoubleEndedIterator for SetMatchesIter<'a> {
fn next_back(&mut self) -> Option<usize> {
self.0.next_back().map(|pid| pid.as_usize())
}
}
impl<'a> core::iter::FusedIterator for SetMatchesIter<'a> {}
impl core::fmt::Debug for RegexSet {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "RegexSet({:?})", self.patterns())
}
}

2
vendor/regex/src/regexset/mod.rs vendored Normal file
View File

@@ -0,0 +1,2 @@
pub(crate) mod bytes;
pub(crate) mod string;

724
vendor/regex/src/regexset/string.rs vendored Normal file
View File

@@ -0,0 +1,724 @@
use alloc::string::String;
use regex_automata::{meta, Input, PatternID, PatternSet, PatternSetIter};
use crate::{Error, RegexSetBuilder};
/// Match multiple, possibly overlapping, regexes in a single search.
///
/// A regex set corresponds to the union of zero or more regular expressions.
/// That is, a regex set will match a haystack when at least one of its
/// constituent regexes matches. A regex set as its formulated here provides a
/// touch more power: it will also report *which* regular expressions in the
/// set match. Indeed, this is the key difference between regex sets and a
/// single `Regex` with many alternates, since only one alternate can match at
/// a time.
///
/// For example, consider regular expressions to match email addresses and
/// domains: `[a-z]+@[a-z]+\.(com|org|net)` and `[a-z]+\.(com|org|net)`. If a
/// regex set is constructed from those regexes, then searching the haystack
/// `foo@example.com` will report both regexes as matching. Of course, one
/// could accomplish this by compiling each regex on its own and doing two
/// searches over the haystack. The key advantage of using a regex set is
/// that it will report the matching regexes using a *single pass through the
/// haystack*. If one has hundreds or thousands of regexes to match repeatedly
/// (like a URL router for a complex web application or a user agent matcher),
/// then a regex set *can* realize huge performance gains.
///
/// # Limitations
///
/// Regex sets are limited to answering the following two questions:
///
/// 1. Does any regex in the set match?
/// 2. If so, which regexes in the set match?
///
/// As with the main [`Regex`][crate::Regex] type, it is cheaper to ask (1)
/// instead of (2) since the matching engines can stop after the first match
/// is found.
///
/// You cannot directly extract [`Match`][crate::Match] or
/// [`Captures`][crate::Captures] objects from a regex set. If you need these
/// operations, the recommended approach is to compile each pattern in the set
/// independently and scan the exact same haystack a second time with those
/// independently compiled patterns:
///
/// ```
/// use regex::{Regex, RegexSet};
///
/// let patterns = ["foo", "bar"];
/// // Both patterns will match different ranges of this string.
/// let hay = "barfoo";
///
/// // Compile a set matching any of our patterns.
/// let set = RegexSet::new(patterns).unwrap();
/// // Compile each pattern independently.
/// let regexes: Vec<_> = set
/// .patterns()
/// .iter()
/// .map(|pat| Regex::new(pat).unwrap())
/// .collect();
///
/// // Match against the whole set first and identify the individual
/// // matching patterns.
/// let matches: Vec<&str> = set
/// .matches(hay)
/// .into_iter()
/// // Dereference the match index to get the corresponding
/// // compiled pattern.
/// .map(|index| &regexes[index])
/// // To get match locations or any other info, we then have to search the
/// // exact same haystack again, using our separately-compiled pattern.
/// .map(|re| re.find(hay).unwrap().as_str())
/// .collect();
///
/// // Matches arrive in the order the constituent patterns were declared,
/// // not the order they appear in the haystack.
/// assert_eq!(vec!["foo", "bar"], matches);
/// ```
///
/// # Performance
///
/// A `RegexSet` has the same performance characteristics as `Regex`. Namely,
/// search takes `O(m * n)` time, where `m` is proportional to the size of the
/// regex set and `n` is proportional to the length of the haystack.
///
/// # Trait implementations
///
/// The `Default` trait is implemented for `RegexSet`. The default value
/// is an empty set. An empty set can also be explicitly constructed via
/// [`RegexSet::empty`].
///
/// # Example
///
/// This shows how the above two regexes (for matching email addresses and
/// domains) might work:
///
/// ```
/// use regex::RegexSet;
///
/// let set = RegexSet::new(&[
/// r"[a-z]+@[a-z]+\.(com|org|net)",
/// r"[a-z]+\.(com|org|net)",
/// ]).unwrap();
///
/// // Ask whether any regexes in the set match.
/// assert!(set.is_match("foo@example.com"));
///
/// // Identify which regexes in the set match.
/// let matches: Vec<_> = set.matches("foo@example.com").into_iter().collect();
/// assert_eq!(vec![0, 1], matches);
///
/// // Try again, but with a haystack that only matches one of the regexes.
/// let matches: Vec<_> = set.matches("example.com").into_iter().collect();
/// assert_eq!(vec![1], matches);
///
/// // Try again, but with a haystack that doesn't match any regex in the set.
/// let matches: Vec<_> = set.matches("example").into_iter().collect();
/// assert!(matches.is_empty());
/// ```
///
/// Note that it would be possible to adapt the above example to using `Regex`
/// with an expression like:
///
/// ```text
/// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net))
/// ```
///
/// After a match, one could then inspect the capture groups to figure out
/// which alternates matched. The problem is that it is hard to make this
/// approach scale when there are many regexes since the overlap between each
/// alternate isn't always obvious to reason about.
#[derive(Clone)]
pub struct RegexSet {
pub(crate) meta: meta::Regex,
pub(crate) patterns: alloc::sync::Arc<[String]>,
}
impl RegexSet {
/// Create a new regex set with the given regular expressions.
///
/// This takes an iterator of `S`, where `S` is something that can produce
/// a `&str`. If any of the strings in the iterator are not valid regular
/// expressions, then an error is returned.
///
/// # Example
///
/// Create a new regex set from an iterator of strings:
///
/// ```
/// use regex::RegexSet;
///
/// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap();
/// assert!(set.is_match("foo"));
/// ```
pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error>
where
S: AsRef<str>,
I: IntoIterator<Item = S>,
{
RegexSetBuilder::new(exprs).build()
}
/// Create a new empty regex set.
///
/// An empty regex never matches anything.
///
/// This is a convenience function for `RegexSet::new([])`, but doesn't
/// require one to specify the type of the input.
///
/// # Example
///
/// ```
/// use regex::RegexSet;
///
/// let set = RegexSet::empty();
/// assert!(set.is_empty());
/// // an empty set matches nothing
/// assert!(!set.is_match(""));
/// ```
pub fn empty() -> RegexSet {
let empty: [&str; 0] = [];
RegexSetBuilder::new(empty).build().unwrap()
}
/// Returns true if and only if one of the regexes in this set matches
/// the haystack given.
///
/// This method should be preferred if you only need to test whether any
/// of the regexes in the set should match, but don't care about *which*
/// regexes matched. This is because the underlying matching engine will
/// quit immediately after seeing the first match instead of continuing to
/// find all matches.
///
/// Note that as with searches using [`Regex`](crate::Regex), the
/// expression is unanchored by default. That is, if the regex does not
/// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted
/// to match anywhere in the haystack.
///
/// # Example
///
/// Tests whether a set matches somewhere in a haystack:
///
/// ```
/// use regex::RegexSet;
///
/// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap();
/// assert!(set.is_match("foo"));
/// assert!(!set.is_match("☃"));
/// ```
#[inline]
pub fn is_match(&self, haystack: &str) -> bool {
self.is_match_at(haystack, 0)
}
/// Returns true if and only if one of the regexes in this set matches the
/// haystack given, with the search starting at the offset given.
///
/// The significance of the starting point is that it takes the surrounding
/// context into consideration. For example, the `\A` anchor can only
/// match when `start == 0`.
///
/// # Panics
///
/// This panics when `start >= haystack.len() + 1`.
///
/// # Example
///
/// This example shows the significance of `start`. Namely, consider a
/// haystack `foobar` and a desire to execute a search starting at offset
/// `3`. You could search a substring explicitly, but then the look-around
/// assertions won't work correctly. Instead, you can use this method to
/// specify the start position of a search.
///
/// ```
/// use regex::RegexSet;
///
/// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap();
/// let hay = "foobar";
/// // We get a match here, but it's probably not intended.
/// assert!(set.is_match(&hay[3..]));
/// // No match because the assertions take the context into account.
/// assert!(!set.is_match_at(hay, 3));
/// ```
#[inline]
pub fn is_match_at(&self, haystack: &str, start: usize) -> bool {
self.meta.is_match(Input::new(haystack).span(start..haystack.len()))
}
/// Returns the set of regexes that match in the given haystack.
///
/// The set returned contains the index of each regex that matches in
/// the given haystack. The index is in correspondence with the order of
/// regular expressions given to `RegexSet`'s constructor.
///
/// The set can also be used to iterate over the matched indices. The order
/// of iteration is always ascending with respect to the matching indices.
///
/// Note that as with searches using [`Regex`](crate::Regex), the
/// expression is unanchored by default. That is, if the regex does not
/// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted
/// to match anywhere in the haystack.
///
/// # Example
///
/// Tests which regular expressions match the given haystack:
///
/// ```
/// use regex::RegexSet;
///
/// let set = RegexSet::new([
/// r"\w+",
/// r"\d+",
/// r"\pL+",
/// r"foo",
/// r"bar",
/// r"barfoo",
/// r"foobar",
/// ]).unwrap();
/// let matches: Vec<_> = set.matches("foobar").into_iter().collect();
/// assert_eq!(matches, vec![0, 2, 3, 4, 6]);
///
/// // You can also test whether a particular regex matched:
/// let matches = set.matches("foobar");
/// assert!(!matches.matched(5));
/// assert!(matches.matched(6));
/// ```
#[inline]
pub fn matches(&self, haystack: &str) -> SetMatches {
self.matches_at(haystack, 0)
}
/// Returns the set of regexes that match in the given haystack.
///
/// The set returned contains the index of each regex that matches in
/// the given haystack. The index is in correspondence with the order of
/// regular expressions given to `RegexSet`'s constructor.
///
/// The set can also be used to iterate over the matched indices. The order
/// of iteration is always ascending with respect to the matching indices.
///
/// The significance of the starting point is that it takes the surrounding
/// context into consideration. For example, the `\A` anchor can only
/// match when `start == 0`.
///
/// # Panics
///
/// This panics when `start >= haystack.len() + 1`.
///
/// # Example
///
/// Tests which regular expressions match the given haystack:
///
/// ```
/// use regex::RegexSet;
///
/// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap();
/// let hay = "foobar";
/// // We get matches here, but it's probably not intended.
/// let matches: Vec<_> = set.matches(&hay[3..]).into_iter().collect();
/// assert_eq!(matches, vec![0, 1]);
/// // No matches because the assertions take the context into account.
/// let matches: Vec<_> = set.matches_at(hay, 3).into_iter().collect();
/// assert_eq!(matches, vec![]);
/// ```
#[inline]
pub fn matches_at(&self, haystack: &str, start: usize) -> SetMatches {
let input = Input::new(haystack).span(start..haystack.len());
let mut patset = PatternSet::new(self.meta.pattern_len());
self.meta.which_overlapping_matches(&input, &mut patset);
SetMatches(patset)
}
/// Returns the same as matches, but starts the search at the given
/// offset and stores the matches into the slice given.
///
/// The significance of the starting point is that it takes the surrounding
/// context into consideration. For example, the `\A` anchor can only
/// match when `start == 0`.
///
/// `matches` must have a length that is at least the number of regexes
/// in this set.
///
/// This method returns true if and only if at least one member of
/// `matches` is true after executing the set against `haystack`.
#[doc(hidden)]
#[inline]
pub fn matches_read_at(
&self,
matches: &mut [bool],
haystack: &str,
start: usize,
) -> bool {
// This is pretty dumb. We should try to fix this, but the
// regex-automata API doesn't provide a way to store matches in an
// arbitrary &mut [bool]. Thankfully, this API is doc(hidden) and
// thus not public... But regex-capi currently uses it. We should
// fix regex-capi to use a PatternSet, maybe? Not sure... PatternSet
// is in regex-automata, not regex. So maybe we should just accept a
// 'SetMatches', which is basically just a newtype around PatternSet.
let mut patset = PatternSet::new(self.meta.pattern_len());
let mut input = Input::new(haystack);
input.set_start(start);
self.meta.which_overlapping_matches(&input, &mut patset);
for pid in patset.iter() {
matches[pid] = true;
}
!patset.is_empty()
}
/// An alias for `matches_read_at` to preserve backward compatibility.
///
/// The `regex-capi` crate used this method, so to avoid breaking that
/// crate, we continue to export it as an undocumented API.
#[doc(hidden)]
#[inline]
pub fn read_matches_at(
&self,
matches: &mut [bool],
haystack: &str,
start: usize,
) -> bool {
self.matches_read_at(matches, haystack, start)
}
/// Returns the total number of regexes in this set.
///
/// # Example
///
/// ```
/// use regex::RegexSet;
///
/// assert_eq!(0, RegexSet::empty().len());
/// assert_eq!(1, RegexSet::new([r"[0-9]"]).unwrap().len());
/// assert_eq!(2, RegexSet::new([r"[0-9]", r"[a-z]"]).unwrap().len());
/// ```
#[inline]
pub fn len(&self) -> usize {
self.meta.pattern_len()
}
/// Returns `true` if this set contains no regexes.
///
/// # Example
///
/// ```
/// use regex::RegexSet;
///
/// assert!(RegexSet::empty().is_empty());
/// assert!(!RegexSet::new([r"[0-9]"]).unwrap().is_empty());
/// ```
#[inline]
pub fn is_empty(&self) -> bool {
self.meta.pattern_len() == 0
}
/// Returns the regex patterns that this regex set was constructed from.
///
/// This function can be used to determine the pattern for a match. The
/// slice returned has exactly as many patterns givens to this regex set,
/// and the order of the slice is the same as the order of the patterns
/// provided to the set.
///
/// # Example
///
/// ```
/// use regex::RegexSet;
///
/// let set = RegexSet::new(&[
/// r"\w+",
/// r"\d+",
/// r"\pL+",
/// r"foo",
/// r"bar",
/// r"barfoo",
/// r"foobar",
/// ]).unwrap();
/// let matches: Vec<_> = set
/// .matches("foobar")
/// .into_iter()
/// .map(|index| &set.patterns()[index])
/// .collect();
/// assert_eq!(matches, vec![r"\w+", r"\pL+", r"foo", r"bar", r"foobar"]);
/// ```
#[inline]
pub fn patterns(&self) -> &[String] {
&self.patterns
}
}
impl Default for RegexSet {
fn default() -> Self {
RegexSet::empty()
}
}
/// A set of matches returned by a regex set.
///
/// Values of this type are constructed by [`RegexSet::matches`].
#[derive(Clone, Debug)]
pub struct SetMatches(PatternSet);
impl SetMatches {
/// Whether this set contains any matches.
///
/// # Example
///
/// ```
/// use regex::RegexSet;
///
/// let set = RegexSet::new(&[
/// r"[a-z]+@[a-z]+\.(com|org|net)",
/// r"[a-z]+\.(com|org|net)",
/// ]).unwrap();
/// let matches = set.matches("foo@example.com");
/// assert!(matches.matched_any());
/// ```
#[inline]
pub fn matched_any(&self) -> bool {
!self.0.is_empty()
}
/// Whether all patterns in this set matched.
///
/// # Example
///
/// ```
/// use regex::RegexSet;
///
/// let set = RegexSet::new(&[
/// r"^foo",
/// r"[a-z]+\.com",
/// ]).unwrap();
/// let matches = set.matches("foo.example.com");
/// assert!(matches.matched_all());
/// ```
pub fn matched_all(&self) -> bool {
self.0.is_full()
}
/// Whether the regex at the given index matched.
///
/// The index for a regex is determined by its insertion order upon the
/// initial construction of a `RegexSet`, starting at `0`.
///
/// # Panics
///
/// If `index` is greater than or equal to the number of regexes in the
/// original set that produced these matches. Equivalently, when `index`
/// is greater than or equal to [`SetMatches::len`].
///
/// # Example
///
/// ```
/// use regex::RegexSet;
///
/// let set = RegexSet::new([
/// r"[a-z]+@[a-z]+\.(com|org|net)",
/// r"[a-z]+\.(com|org|net)",
/// ]).unwrap();
/// let matches = set.matches("example.com");
/// assert!(!matches.matched(0));
/// assert!(matches.matched(1));
/// ```
#[inline]
pub fn matched(&self, index: usize) -> bool {
self.0.contains(PatternID::new_unchecked(index))
}
/// The total number of regexes in the set that created these matches.
///
/// **WARNING:** This always returns the same value as [`RegexSet::len`].
/// In particular, it does *not* return the number of elements yielded by
/// [`SetMatches::iter`]. The only way to determine the total number of
/// matched regexes is to iterate over them.
///
/// # Example
///
/// Notice that this method returns the total number of regexes in the
/// original set, and *not* the total number of regexes that matched.
///
/// ```
/// use regex::RegexSet;
///
/// let set = RegexSet::new([
/// r"[a-z]+@[a-z]+\.(com|org|net)",
/// r"[a-z]+\.(com|org|net)",
/// ]).unwrap();
/// let matches = set.matches("example.com");
/// // Total number of patterns that matched.
/// assert_eq!(1, matches.iter().count());
/// // Total number of patterns in the set.
/// assert_eq!(2, matches.len());
/// ```
#[inline]
pub fn len(&self) -> usize {
self.0.capacity()
}
/// Returns an iterator over the indices of the regexes that matched.
///
/// This will always produces matches in ascending order, where the index
/// yielded corresponds to the index of the regex that matched with respect
/// to its position when initially building the set.
///
/// # Example
///
/// ```
/// use regex::RegexSet;
///
/// let set = RegexSet::new([
/// r"[0-9]",
/// r"[a-z]",
/// r"[A-Z]",
/// r"\p{Greek}",
/// ]).unwrap();
/// let hay = "βa1";
/// let matches: Vec<_> = set.matches(hay).iter().collect();
/// assert_eq!(matches, vec![0, 1, 3]);
/// ```
///
/// Note that `SetMatches` also implements the `IntoIterator` trait, so
/// this method is not always needed. For example:
///
/// ```
/// use regex::RegexSet;
///
/// let set = RegexSet::new([
/// r"[0-9]",
/// r"[a-z]",
/// r"[A-Z]",
/// r"\p{Greek}",
/// ]).unwrap();
/// let hay = "βa1";
/// let mut matches = vec![];
/// for index in set.matches(hay) {
/// matches.push(index);
/// }
/// assert_eq!(matches, vec![0, 1, 3]);
/// ```
#[inline]
pub fn iter(&self) -> SetMatchesIter<'_> {
SetMatchesIter(self.0.iter())
}
}
impl IntoIterator for SetMatches {
type IntoIter = SetMatchesIntoIter;
type Item = usize;
fn into_iter(self) -> Self::IntoIter {
let it = 0..self.0.capacity();
SetMatchesIntoIter { patset: self.0, it }
}
}
impl<'a> IntoIterator for &'a SetMatches {
type IntoIter = SetMatchesIter<'a>;
type Item = usize;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
/// An owned iterator over the set of matches from a regex set.
///
/// This will always produces matches in ascending order of index, where the
/// index corresponds to the index of the regex that matched with respect to
/// its position when initially building the set.
///
/// This iterator is created by calling `SetMatches::into_iter` via the
/// `IntoIterator` trait. This is automatically done in `for` loops.
///
/// # Example
///
/// ```
/// use regex::RegexSet;
///
/// let set = RegexSet::new([
/// r"[0-9]",
/// r"[a-z]",
/// r"[A-Z]",
/// r"\p{Greek}",
/// ]).unwrap();
/// let hay = "βa1";
/// let mut matches = vec![];
/// for index in set.matches(hay) {
/// matches.push(index);
/// }
/// assert_eq!(matches, vec![0, 1, 3]);
/// ```
#[derive(Debug)]
pub struct SetMatchesIntoIter {
patset: PatternSet,
it: core::ops::Range<usize>,
}
impl Iterator for SetMatchesIntoIter {
type Item = usize;
fn next(&mut self) -> Option<usize> {
loop {
let id = self.it.next()?;
if self.patset.contains(PatternID::new_unchecked(id)) {
return Some(id);
}
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.it.size_hint()
}
}
impl DoubleEndedIterator for SetMatchesIntoIter {
fn next_back(&mut self) -> Option<usize> {
loop {
let id = self.it.next_back()?;
if self.patset.contains(PatternID::new_unchecked(id)) {
return Some(id);
}
}
}
}
impl core::iter::FusedIterator for SetMatchesIntoIter {}
/// A borrowed iterator over the set of matches from a regex set.
///
/// The lifetime `'a` refers to the lifetime of the [`SetMatches`] value that
/// created this iterator.
///
/// This will always produces matches in ascending order, where the index
/// corresponds to the index of the regex that matched with respect to its
/// position when initially building the set.
///
/// This iterator is created by the [`SetMatches::iter`] method.
#[derive(Clone, Debug)]
pub struct SetMatchesIter<'a>(PatternSetIter<'a>);
impl<'a> Iterator for SetMatchesIter<'a> {
type Item = usize;
fn next(&mut self) -> Option<usize> {
self.0.next().map(|pid| pid.as_usize())
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
}
impl<'a> DoubleEndedIterator for SetMatchesIter<'a> {
fn next_back(&mut self) -> Option<usize> {
self.0.next_back().map(|pid| pid.as_usize())
}
}
impl<'a> core::iter::FusedIterator for SetMatchesIter<'a> {}
impl core::fmt::Debug for RegexSet {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "RegexSet({:?})", self.patterns())
}
}

46
vendor/regex/test vendored Executable file
View File

@@ -0,0 +1,46 @@
#!/bin/bash
set -e
# cd to the directory containing this crate's Cargo.toml so that we don't need
# to pass --manifest-path to every `cargo` command.
cd "$(dirname "$0")"
# This is a convenience script for running a broad swath of tests across
# features. We don't test the complete space, since the complete space is quite
# large. Hopefully once we migrate the test suite to better infrastructure
# (like regex-automata), we'll be able to test more of the space.
echo "===== DEFAULT FEATURES ====="
cargo test
# no-std mode is annoyingly difficult to test. Currently, the integration tests
# don't run. So for now, we just test that library tests run. (There aren't
# many because `regex` is just a wrapper crate.)
cargo test --no-default-features --lib
echo "===== DOC TESTS ====="
cargo test --doc
features=(
"std"
"std unicode"
"std unicode-perl"
"std perf"
"std perf-cache"
"std perf-dfa"
"std perf-inline"
"std perf-literal"
"std perf-dfa-full"
"std perf-onepass"
"std perf-backtrack"
)
for f in "${features[@]}"; do
echo "===== FEATURE: $f ====="
cargo test --test integration --no-default-features --features "$f"
done
# And test the probably-forever-nightly-only 'pattern' feature...
if rustc --version | grep -q nightly; then
echo "===== FEATURE: std,pattern,unicode-perl ====="
cargo test --test integration --no-default-features --features std,pattern,unicode-perl
fi

22
vendor/regex/testdata/README.md vendored Normal file
View File

@@ -0,0 +1,22 @@
This directory contains a large suite of regex tests defined in a TOML format.
They are used to drive tests in `tests/lib.rs`, `regex-automata/tests/lib.rs`
and `regex-lite/tests/lib.rs`.
See the [`regex-test`][regex-test] crate documentation for an explanation of
the format and how it generates tests.
The basic idea here is that we have many different regex engines but generally
one set of tests. We want to be able to run those tests (or most of them) on
every engine. Prior to `regex 1.9`, we used to do this with a hodge podge soup
of macros and a different test executable for each engine. It overall took a
longer time to compile, was harder to maintain, and it made the test definitions
themselves less clear.
In `regex 1.9`, when we moved over to `regex-automata`, the situation got a lot
worse because of an increase in the number of engines. So I devised an engine
independent format for testing regex patterns and their semantics.
Note: the naming scheme used in these tests isn't terribly consistent. It would
be great to fix that.
[regex-test]: https://docs.rs/regex-test

127
vendor/regex/testdata/anchored.toml vendored Normal file
View File

@@ -0,0 +1,127 @@
# These tests are specifically geared toward searches with 'anchored = true'.
# While they are interesting in their own right, they are particularly
# important for testing the one-pass DFA since the one-pass DFA can't work in
# unanchored contexts.
#
# Note that "anchored" in this context does not mean "^". Anchored searches are
# searches whose matches must begin at the start of the search, which may not
# be at the start of the haystack. That's why anchored searches---and there are
# some examples below---can still report multiple matches. This occurs when the
# matches are adjacent to one another.
[[test]]
name = "greedy"
regex = '(abc)+'
haystack = "abcabcabc"
matches = [
[[0, 9], [6, 9]],
]
anchored = true
# When a "earliest" search is used, greediness doesn't really exist because
# matches are reported as soon as they are known.
[[test]]
name = "greedy-earliest"
regex = '(abc)+'
haystack = "abcabcabc"
matches = [
[[0, 3], [0, 3]],
[[3, 6], [3, 6]],
[[6, 9], [6, 9]],
]
anchored = true
search-kind = "earliest"
[[test]]
name = "nongreedy"
regex = '(abc)+?'
haystack = "abcabcabc"
matches = [
[[0, 3], [0, 3]],
[[3, 6], [3, 6]],
[[6, 9], [6, 9]],
]
anchored = true
# When "all" semantics are used, non-greediness doesn't exist since the longest
# possible match is always taken.
[[test]]
name = "nongreedy-all"
regex = '(abc)+?'
haystack = "abcabcabc"
matches = [
[[0, 9], [6, 9]],
]
anchored = true
match-kind = "all"
[[test]]
name = "word-boundary-unicode-01"
regex = '\b\w+\b'
haystack = 'βββ☃'
matches = [[0, 6]]
anchored = true
[[test]]
name = "word-boundary-nounicode-01"
regex = '\b\w+\b'
haystack = 'abcβ'
matches = [[0, 3]]
anchored = true
unicode = false
# Tests that '.c' doesn't match 'abc' when performing an anchored search from
# the beginning of the haystack. This test found two different bugs in the
# PikeVM and the meta engine.
[[test]]
name = "no-match-at-start"
regex = '.c'
haystack = 'abc'
matches = []
anchored = true
# Like above, but at a non-zero start offset.
[[test]]
name = "no-match-at-start-bounds"
regex = '.c'
haystack = 'aabc'
bounds = [1, 4]
matches = []
anchored = true
# This is like no-match-at-start, but hits the "reverse inner" optimization
# inside the meta engine. (no-match-at-start hits the "reverse suffix"
# optimization.)
[[test]]
name = "no-match-at-start-reverse-inner"
regex = '.c[a-z]'
haystack = 'abcz'
matches = []
anchored = true
# Like above, but at a non-zero start offset.
[[test]]
name = "no-match-at-start-reverse-inner-bounds"
regex = '.c[a-z]'
haystack = 'aabcz'
bounds = [1, 5]
matches = []
anchored = true
# Same as no-match-at-start, but applies to the meta engine's "reverse
# anchored" optimization.
[[test]]
name = "no-match-at-start-reverse-anchored"
regex = '.c[a-z]$'
haystack = 'abcz'
matches = []
anchored = true
# Like above, but at a non-zero start offset.
[[test]]
name = "no-match-at-start-reverse-anchored-bounds"
regex = '.c[a-z]$'
haystack = 'aabcz'
bounds = [1, 5]
matches = []
anchored = true

235
vendor/regex/testdata/bytes.toml vendored Normal file
View File

@@ -0,0 +1,235 @@
# These are tests specifically crafted for regexes that can match arbitrary
# bytes. In some cases, we also test the Unicode variant as well, just because
# it's good sense to do so. But also, these tests aren't really about Unicode,
# but whether matches are only reported at valid UTF-8 boundaries. For most
# tests in this entire collection, utf8 = true. But for these tests, we use
# utf8 = false.
[[test]]
name = "word-boundary-ascii"
regex = ' \b'
haystack = " δ"
matches = []
unicode = false
utf8 = false
[[test]]
name = "word-boundary-unicode"
regex = ' \b'
haystack = " δ"
matches = [[0, 1]]
unicode = true
utf8 = false
[[test]]
name = "word-boundary-ascii-not"
regex = ' \B'
haystack = " δ"
matches = [[0, 1]]
unicode = false
utf8 = false
[[test]]
name = "word-boundary-unicode-not"
regex = ' \B'
haystack = " δ"
matches = []
unicode = true
utf8 = false
[[test]]
name = "perl-word-ascii"
regex = '\w+'
haystack = "aδ"
matches = [[0, 1]]
unicode = false
utf8 = false
[[test]]
name = "perl-word-unicode"
regex = '\w+'
haystack = "aδ"
matches = [[0, 3]]
unicode = true
utf8 = false
[[test]]
name = "perl-decimal-ascii"
regex = '\d+'
haystack = "1२३9"
matches = [[0, 1], [7, 8]]
unicode = false
utf8 = false
[[test]]
name = "perl-decimal-unicode"
regex = '\d+'
haystack = "1२३9"
matches = [[0, 8]]
unicode = true
utf8 = false
[[test]]
name = "perl-whitespace-ascii"
regex = '\s+'
haystack = " \u1680"
matches = [[0, 1]]
unicode = false
utf8 = false
[[test]]
name = "perl-whitespace-unicode"
regex = '\s+'
haystack = " \u1680"
matches = [[0, 4]]
unicode = true
utf8 = false
# The first `(.+)` matches two Unicode codepoints, but can't match the 5th
# byte, which isn't valid UTF-8. The second (byte based) `(.+)` takes over and
# matches.
[[test]]
name = "mixed-dot"
regex = '(.+)(?-u)(.+)'
haystack = '\xCE\x93\xCE\x94\xFF'
matches = [
[[0, 5], [0, 4], [4, 5]],
]
unescape = true
unicode = true
utf8 = false
[[test]]
name = "case-one-ascii"
regex = 'a'
haystack = "A"
matches = [[0, 1]]
case-insensitive = true
unicode = false
utf8 = false
[[test]]
name = "case-one-unicode"
regex = 'a'
haystack = "A"
matches = [[0, 1]]
case-insensitive = true
unicode = true
utf8 = false
[[test]]
name = "case-class-simple-ascii"
regex = '[a-z]+'
haystack = "AaAaA"
matches = [[0, 5]]
case-insensitive = true
unicode = false
utf8 = false
[[test]]
name = "case-class-ascii"
regex = '[a-z]+'
haystack = "aA\u212AaA"
matches = [[0, 2], [5, 7]]
case-insensitive = true
unicode = false
utf8 = false
[[test]]
name = "case-class-unicode"
regex = '[a-z]+'
haystack = "aA\u212AaA"
matches = [[0, 7]]
case-insensitive = true
unicode = true
utf8 = false
[[test]]
name = "negate-ascii"
regex = '[^a]'
haystack = "δ"
matches = [[0, 1], [1, 2]]
unicode = false
utf8 = false
[[test]]
name = "negate-unicode"
regex = '[^a]'
haystack = "δ"
matches = [[0, 2]]
unicode = true
utf8 = false
# When utf8=true, this won't match, because the implicit '.*?' prefix is
# Unicode aware and will refuse to match through invalid UTF-8 bytes.
[[test]]
name = "dotstar-prefix-ascii"
regex = 'a'
haystack = '\xFFa'
matches = [[1, 2]]
unescape = true
unicode = false
utf8 = false
[[test]]
name = "dotstar-prefix-unicode"
regex = 'a'
haystack = '\xFFa'
matches = [[1, 2]]
unescape = true
unicode = true
utf8 = false
[[test]]
name = "null-bytes"
regex = '(?P<cstr>[^\x00]+)\x00'
haystack = 'foo\x00'
matches = [
[[0, 4], [0, 3]],
]
unescape = true
unicode = false
utf8 = false
[[test]]
name = "invalid-utf8-anchor-100"
regex = '\xCC?^'
haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4'
matches = [[0, 0]]
unescape = true
unicode = false
utf8 = false
[[test]]
name = "invalid-utf8-anchor-200"
regex = '^\xf7|4\xff\d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########[] d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########\[] #####\x80\S7|$'
haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4'
matches = [[22, 22]]
unescape = true
unicode = false
utf8 = false
[[test]]
name = "invalid-utf8-anchor-300"
regex = '^|ddp\xff\xffdddddlQd@\x80'
haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4'
matches = [[0, 0]]
unescape = true
unicode = false
utf8 = false
[[test]]
name = "word-boundary-ascii-100"
regex = '\Bx\B'
haystack = "áxβ"
matches = []
unicode = false
utf8 = false
[[test]]
name = "word-boundary-ascii-200"
regex = '\B'
haystack = "0\U0007EF5E"
matches = [[2, 2], [3, 3], [4, 4], [5, 5]]
unicode = false
utf8 = false

315
vendor/regex/testdata/crazy.toml vendored Normal file
View File

@@ -0,0 +1,315 @@
[[test]]
name = "nothing-empty"
regex = []
haystack = ""
matches = []
[[test]]
name = "nothing-something"
regex = []
haystack = "wat"
matches = []
[[test]]
name = "ranges"
regex = '(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b'
haystack = "num: 255"
matches = [[5, 8]]
[[test]]
name = "ranges-not"
regex = '(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b'
haystack = "num: 256"
matches = []
[[test]]
name = "float1"
regex = '[-+]?[0-9]*\.?[0-9]+'
haystack = "0.1"
matches = [[0, 3]]
[[test]]
name = "float2"
regex = '[-+]?[0-9]*\.?[0-9]+'
haystack = "0.1.2"
matches = [[0, 3]]
match-limit = 1
[[test]]
name = "float3"
regex = '[-+]?[0-9]*\.?[0-9]+'
haystack = "a1.2"
matches = [[1, 4]]
[[test]]
name = "float4"
regex = '[-+]?[0-9]*\.?[0-9]+'
haystack = "1.a"
matches = [[0, 1]]
[[test]]
name = "float5"
regex = '^[-+]?[0-9]*\.?[0-9]+$'
haystack = "1.a"
matches = []
[[test]]
name = "email"
regex = '(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b'
haystack = "mine is jam.slam@gmail.com "
matches = [[8, 26]]
[[test]]
name = "email-not"
regex = '(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b'
haystack = "mine is jam.slam@gmail "
matches = []
[[test]]
name = "email-big"
regex = '''[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?'''
haystack = "mine is jam.slam@gmail.com "
matches = [[8, 26]]
[[test]]
name = "date1"
regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$'
haystack = "1900-01-01"
matches = [[0, 10]]
unicode = false
[[test]]
name = "date2"
regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$'
haystack = "1900-00-01"
matches = []
unicode = false
[[test]]
name = "date3"
regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$'
haystack = "1900-13-01"
matches = []
unicode = false
[[test]]
name = "start-end-empty"
regex = '^$'
haystack = ""
matches = [[0, 0]]
[[test]]
name = "start-end-empty-rev"
regex = '$^'
haystack = ""
matches = [[0, 0]]
[[test]]
name = "start-end-empty-many-1"
regex = '^$^$^$'
haystack = ""
matches = [[0, 0]]
[[test]]
name = "start-end-empty-many-2"
regex = '^^^$$$'
haystack = ""
matches = [[0, 0]]
[[test]]
name = "start-end-empty-rep"
regex = '(?:^$)*'
haystack = "a\nb\nc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
[[test]]
name = "start-end-empty-rep-rev"
regex = '(?:$^)*'
haystack = "a\nb\nc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
[[test]]
name = "neg-class-letter"
regex = '[^ac]'
haystack = "acx"
matches = [[2, 3]]
[[test]]
name = "neg-class-letter-comma"
regex = '[^a,]'
haystack = "a,x"
matches = [[2, 3]]
[[test]]
name = "neg-class-letter-space"
regex = '[^a[:space:]]'
haystack = "a x"
matches = [[2, 3]]
[[test]]
name = "neg-class-comma"
regex = '[^,]'
haystack = ",,x"
matches = [[2, 3]]
[[test]]
name = "neg-class-space"
regex = '[^[:space:]]'
haystack = " a"
matches = [[1, 2]]
[[test]]
name = "neg-class-space-comma"
regex = '[^,[:space:]]'
haystack = ", a"
matches = [[2, 3]]
[[test]]
name = "neg-class-comma-space"
regex = '[^[:space:],]'
haystack = " ,a"
matches = [[2, 3]]
[[test]]
name = "neg-class-ascii"
regex = '[^[:alpha:]Z]'
haystack = "A1"
matches = [[1, 2]]
[[test]]
name = "lazy-many-many"
regex = '(?:(?:.*)*?)='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "lazy-many-optional"
regex = '(?:(?:.?)*?)='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "lazy-one-many-many"
regex = '(?:(?:.*)+?)='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "lazy-one-many-optional"
regex = '(?:(?:.?)+?)='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "lazy-range-min-many"
regex = '(?:(?:.*){1,}?)='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "lazy-range-many"
regex = '(?:(?:.*){1,2}?)='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "greedy-many-many"
regex = '(?:(?:.*)*)='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "greedy-many-optional"
regex = '(?:(?:.?)*)='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "greedy-one-many-many"
regex = '(?:(?:.*)+)='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "greedy-one-many-optional"
regex = '(?:(?:.?)+)='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "greedy-range-min-many"
regex = '(?:(?:.*){1,})='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "greedy-range-many"
regex = '(?:(?:.*){1,2})='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "empty1"
regex = ''
haystack = ""
matches = [[0, 0]]
[[test]]
name = "empty2"
regex = ''
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty3"
regex = '(?:)'
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty4"
regex = '(?:)*'
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty5"
regex = '(?:)+'
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty6"
regex = '(?:)?'
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty7"
regex = '(?:)(?:)'
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty8"
regex = '(?:)+|z'
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty9"
regex = 'z|(?:)+'
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty10"
regex = '(?:)+|b'
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty11"
regex = 'b|(?:)+'
haystack = "abc"
matches = [[0, 0], [1, 2], [3, 3]]

117
vendor/regex/testdata/crlf.toml vendored Normal file
View File

@@ -0,0 +1,117 @@
# This is a basic test that checks ^ and $ treat \r\n as a single line
# terminator. If ^ and $ only treated \n as a line terminator, then this would
# only match 'xyz' at the end of the haystack.
[[test]]
name = "basic"
regex = '(?mR)^[a-z]+$'
haystack = "abc\r\ndef\r\nxyz"
matches = [[0, 3], [5, 8], [10, 13]]
# Tests that a CRLF-aware '^$' assertion does not match between CR and LF.
[[test]]
name = "start-end-non-empty"
regex = '(?mR)^$'
haystack = "abc\r\ndef\r\nxyz"
matches = []
# Tests that a CRLF-aware '^$' assertion matches the empty string, just like
# a non-CRLF-aware '^$' assertion.
[[test]]
name = "start-end-empty"
regex = '(?mR)^$'
haystack = ""
matches = [[0, 0]]
# Tests that a CRLF-aware '^$' assertion matches the empty string preceding
# and following a line terminator.
[[test]]
name = "start-end-before-after"
regex = '(?mR)^$'
haystack = "\r\n"
matches = [[0, 0], [2, 2]]
# Tests that a CRLF-aware '^' assertion does not split a line terminator.
[[test]]
name = "start-no-split"
regex = '(?mR)^'
haystack = "abc\r\ndef\r\nxyz"
matches = [[0, 0], [5, 5], [10, 10]]
# Same as above, but with adjacent runs of line terminators.
[[test]]
name = "start-no-split-adjacent"
regex = '(?mR)^'
haystack = "\r\n\r\n\r\n"
matches = [[0, 0], [2, 2], [4, 4], [6, 6]]
# Same as above, but with adjacent runs of just carriage returns.
[[test]]
name = "start-no-split-adjacent-cr"
regex = '(?mR)^'
haystack = "\r\r\r"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
# Same as above, but with adjacent runs of just line feeds.
[[test]]
name = "start-no-split-adjacent-lf"
regex = '(?mR)^'
haystack = "\n\n\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
# Tests that a CRLF-aware '$' assertion does not split a line terminator.
[[test]]
name = "end-no-split"
regex = '(?mR)$'
haystack = "abc\r\ndef\r\nxyz"
matches = [[3, 3], [8, 8], [13, 13]]
# Same as above, but with adjacent runs of line terminators.
[[test]]
name = "end-no-split-adjacent"
regex = '(?mR)$'
haystack = "\r\n\r\n\r\n"
matches = [[0, 0], [2, 2], [4, 4], [6, 6]]
# Same as above, but with adjacent runs of just carriage returns.
[[test]]
name = "end-no-split-adjacent-cr"
regex = '(?mR)$'
haystack = "\r\r\r"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
# Same as above, but with adjacent runs of just line feeds.
[[test]]
name = "end-no-split-adjacent-lf"
regex = '(?mR)$'
haystack = "\n\n\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
# Tests that '.' does not match either \r or \n when CRLF mode is enabled. Note
# that this doesn't require multi-line mode to be enabled.
[[test]]
name = "dot-no-crlf"
regex = '(?R).'
haystack = "\r\n\r\n\r\n"
matches = []
# This is a test that caught a bug in the one-pass DFA where it (amazingly) was
# using 'is_end_lf' instead of 'is_end_crlf' here. It was probably a copy &
# paste bug. We insert an empty capture group here because it provokes the meta
# regex engine to first find a match and then trip over a panic because the
# one-pass DFA erroneously says there is no match.
[[test]]
name = "onepass-wrong-crlf-with-capture"
regex = '(?Rm:().$)'
haystack = "ZZ\r"
matches = [[[1, 2], [1, 1]]]
# This is like onepass-wrong-crlf-with-capture above, except it sets up the
# test so that it can be run by the one-pass DFA directly. (i.e., Make it
# anchored and start the search at the right place.)
[[test]]
name = "onepass-wrong-crlf-anchored"
regex = '(?Rm:.$)'
haystack = "ZZ\r"
matches = [[1, 2]]
anchored = true
bounds = [1, 3]

52
vendor/regex/testdata/earliest.toml vendored Normal file
View File

@@ -0,0 +1,52 @@
[[test]]
name = "no-greedy-100"
regex = 'a+'
haystack = "aaa"
matches = [[0, 1], [1, 2], [2, 3]]
search-kind = "earliest"
[[test]]
name = "no-greedy-200"
regex = 'abc+'
haystack = "zzzabccc"
matches = [[3, 6]]
search-kind = "earliest"
[[test]]
name = "is-ungreedy"
regex = 'a+?'
haystack = "aaa"
matches = [[0, 1], [1, 2], [2, 3]]
search-kind = "earliest"
[[test]]
name = "look-start-test"
regex = '^(abc|a)'
haystack = "abc"
matches = [
[[0, 1], [0, 1]],
]
search-kind = "earliest"
[[test]]
name = "look-end-test"
regex = '(abc|a)$'
haystack = "abc"
matches = [
[[0, 3], [0, 3]],
]
search-kind = "earliest"
[[test]]
name = "no-leftmost-first-100"
regex = 'abc|a'
haystack = "abc"
matches = [[0, 1]]
search-kind = "earliest"
[[test]]
name = "no-leftmost-first-200"
regex = 'aba|a'
haystack = "aba"
matches = [[0, 1], [2, 3]]
search-kind = "earliest"

113
vendor/regex/testdata/empty.toml vendored Normal file
View File

@@ -0,0 +1,113 @@
[[test]]
name = "100"
regex = "|b"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "110"
regex = "b|"
haystack = "abc"
matches = [[0, 0], [1, 2], [3, 3]]
[[test]]
name = "120"
regex = "|z"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "130"
regex = "z|"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "200"
regex = "|"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "210"
regex = "||"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "220"
regex = "||b"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "230"
regex = "b||"
haystack = "abc"
matches = [[0, 0], [1, 2], [3, 3]]
[[test]]
name = "240"
regex = "||z"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "300"
regex = "(?:)|b"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "310"
regex = "b|(?:)"
haystack = "abc"
matches = [[0, 0], [1, 2], [3, 3]]
[[test]]
name = "320"
regex = "(?:|)"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "330"
regex = "(?:|)|z"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "400"
regex = "a(?:)|b"
haystack = "abc"
matches = [[0, 1], [1, 2]]
[[test]]
name = "500"
regex = ""
haystack = ""
matches = [[0, 0]]
[[test]]
name = "510"
regex = ""
haystack = "a"
matches = [[0, 0], [1, 1]]
[[test]]
name = "520"
regex = ""
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "600"
regex = '(?:|a)*'
haystack = "aaa"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "610"
regex = '(?:|a)+'
haystack = "aaa"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]

23
vendor/regex/testdata/expensive.toml vendored Normal file
View File

@@ -0,0 +1,23 @@
# This file represent tests that may be expensive to run on some regex engines.
# For example, tests that build a full DFA ahead of time and minimize it can
# take a horrendously long time on regexes that are large (or result in an
# explosion in the number of states). We group these tests together so that
# such engines can simply skip these tests.
# See: https://github.com/rust-lang/regex/issues/98
[[test]]
name = "regression-many-repeat-no-stack-overflow"
regex = '^.{1,2500}'
haystack = "a"
matches = [[0, 1]]
# This test is meant to blow the bounded backtracker's visited capacity. In
# order to do that, we need a somewhat sizeable regex. The purpose of this
# is to make sure there's at least one test that exercises this path in the
# backtracker. All other tests (at time of writing) are small enough that the
# backtracker can handle them fine.
[[test]]
name = "backtrack-blow-visited-capacity"
regex = '\pL{50}'
haystack = "abcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyZZ"
matches = [[0, 50], [50, 100], [100, 150]]

68
vendor/regex/testdata/flags.toml vendored Normal file
View File

@@ -0,0 +1,68 @@
[[test]]
name = "1"
regex = "(?i)abc"
haystack = "ABC"
matches = [[0, 3]]
[[test]]
name = "2"
regex = "(?i)a(?-i)bc"
haystack = "Abc"
matches = [[0, 3]]
[[test]]
name = "3"
regex = "(?i)a(?-i)bc"
haystack = "ABC"
matches = []
[[test]]
name = "4"
regex = "(?is)a."
haystack = "A\n"
matches = [[0, 2]]
[[test]]
name = "5"
regex = "(?is)a.(?-is)a."
haystack = "A\nab"
matches = [[0, 4]]
[[test]]
name = "6"
regex = "(?is)a.(?-is)a."
haystack = "A\na\n"
matches = []
[[test]]
name = "7"
regex = "(?is)a.(?-is:a.)?"
haystack = "A\na\n"
matches = [[0, 2]]
match-limit = 1
[[test]]
name = "8"
regex = "(?U)a+"
haystack = "aa"
matches = [[0, 1]]
match-limit = 1
[[test]]
name = "9"
regex = "(?U)a+?"
haystack = "aa"
matches = [[0, 2]]
[[test]]
name = "10"
regex = "(?U)(?-U)a+"
haystack = "aa"
matches = [[0, 2]]
[[test]]
name = "11"
regex = '(?m)(?:^\d+$\n?)+'
haystack = "123\n456\n789"
matches = [[0, 11]]
unicode = false

1611
vendor/regex/testdata/fowler/basic.toml vendored Normal file

File diff suppressed because it is too large Load Diff

25
vendor/regex/testdata/fowler/dat/README vendored Normal file
View File

@@ -0,0 +1,25 @@
Test data was taken from the Go distribution, which was in turn taken from the
testregex test suite:
http://web.archive.org/web/20150925124103/http://www2.research.att.com/~astopen/testregex/testregex.html
Unfortunately, the original web site now appears dead, but the test data lives
on.
The LICENSE in this directory corresponds to the LICENSE that the data was
originally released under.
The tests themselves were modified for RE2/Go (and marked as such). A
couple were modified further by me (Andrew Gallant) and marked with 'Rust'.
After some number of years, these tests were transformed into a TOML format
using the 'regex-cli generate fowler' command. To re-generate the
TOML files, run the following from the root of this repository:
regex-cli generate fowler tests/data/fowler tests/data/fowler/dat/*.dat
This assumes that you have 'regex-cli' installed. See 'regex-cli/README.md'
from the root of the repository for more information.
This brings the Fowler tests into a more "sensible" structured format in which
other tests can be written such that they aren't write-only.

View File

@@ -0,0 +1,223 @@
NOTE all standard compliant implementations should pass these : 2002-05-31
BE abracadabra$ abracadabracadabra (7,18)
BE a...b abababbb (2,7)
BE XXXXXX ..XXXXXX (2,8)
E \) () (1,2)
BE a] a]a (0,2)
B } } (0,1)
E \} } (0,1)
BE \] ] (0,1)
B ] ] (0,1)
E ] ] (0,1)
B { { (0,1)
B } } (0,1)
BE ^a ax (0,1)
BE \^a a^a (1,3)
BE a\^ a^ (0,2)
BE a$ aa (1,2)
BE a\$ a$ (0,2)
BE ^$ NULL (0,0)
E $^ NULL (0,0)
E a($) aa (1,2)(2,2)
E a*(^a) aa (0,1)(0,1)
E (..)*(...)* a (0,0)
E (..)*(...)* abcd (0,4)(2,4)
E (ab|a)(bc|c) abc (0,3)(0,2)(2,3)
E (ab)c|abc abc (0,3)(0,2)
E a{0}b ab (1,2)
E (a*)(b?)(b+)b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
E (a*)(b{0,1})(b{1,})b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
E a{9876543210} NULL BADBR
E ((a|a)|a) a (0,1)(0,1)(0,1)
E (a*)(a|aa) aaaa (0,4)(0,3)(3,4)
E a*(a.|aa) aaaa (0,4)(2,4)
E a(b)|c(d)|a(e)f aef (0,3)(?,?)(?,?)(1,2)
E (a|b)?.* b (0,1)(0,1)
E (a|b)c|a(b|c) ac (0,2)(0,1)
E (a|b)c|a(b|c) ab (0,2)(?,?)(1,2)
E (a|b)*c|(a|ab)*c abc (0,3)(1,2)
E (a|b)*c|(a|ab)*c xc (1,2)
E (.a|.b).*|.*(.a|.b) xa (0,2)(0,2)
E a?(ab|ba)ab abab (0,4)(0,2)
E a?(ac{0}b|ba)ab abab (0,4)(0,2)
E ab|abab abbabab (0,2)
E aba|bab|bba baaabbbaba (5,8)
E aba|bab baaabbbaba (6,9)
E (aa|aaa)*|(a|aaaaa) aa (0,2)(0,2)
E (a.|.a.)*|(a|.a...) aa (0,2)(0,2)
E ab|a xabc (1,3)
E ab|a xxabc (2,4)
Ei (Ab|cD)* aBcD (0,4)(2,4)
BE [^-] --a (2,3)
BE [a-]* --a (0,3)
BE [a-m-]* --amoma-- (0,4)
E :::1:::0:|:::1:1:0: :::0:::1:::1:::0: (8,17)
E :::1:::0:|:::1:1:1: :::0:::1:::1:::0: (8,17)
{E [[:upper:]] A (0,1) [[<element>]] not supported
E [[:lower:]]+ `az{ (1,3)
E [[:upper:]]+ @AZ[ (1,3)
# No collation in Go
#BE [[-]] [[-]] (2,4)
#BE [[.NIL.]] NULL ECOLLATE
#BE [[=aleph=]] NULL ECOLLATE
}
BE$ \n \n (0,1)
BEn$ \n \n (0,1)
BE$ [^a] \n (0,1)
BE$ \na \na (0,2)
E (a)(b)(c) abc (0,3)(0,1)(1,2)(2,3)
BE xxx xxx (0,3)
#E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 6, (0,6)
E (?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$) feb 6, (0,6) Rust
#E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) 2/7 (0,3)
E (?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$) 2/7 (0,3) Rust
#E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 1,Feb 6 (5,11)
E (?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$) feb 1,Feb 6 (5,11) Rust
#E3 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1)
E (((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1) Rust
#E3 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2)
E (((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2) Rust
E a?(ab|ba)* ababababababababababababababababababababababababababababababababababababababababa (0,81)(79,81)
E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabbbbaa (18,25)
E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabaa (18,22)
E aaac|aabc|abac|abbc|baac|babc|bbac|bbbc baaabbbabac (7,11)
#BE$ .* \x01\xff (0,2)
BE$ .* \x01\x7f (0,2) Rust
E aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa (53,57)
L aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa NOMATCH
E a*a*a*a*a*b aaaaaaaaab (0,10)
BE ^ NULL (0,0)
BE $ NULL (0,0)
BE ^$ NULL (0,0)
BE ^a$ a (0,1)
BE abc abc (0,3)
BE abc xabcy (1,4)
BE abc ababc (2,5)
BE ab*c abc (0,3)
BE ab*bc abc (0,3)
BE ab*bc abbc (0,4)
BE ab*bc abbbbc (0,6)
E ab+bc abbc (0,4)
E ab+bc abbbbc (0,6)
E ab?bc abbc (0,4)
E ab?bc abc (0,3)
E ab?c abc (0,3)
BE ^abc$ abc (0,3)
BE ^abc abcc (0,3)
BE abc$ aabc (1,4)
BE ^ abc (0,0)
BE $ abc (3,3)
BE a.c abc (0,3)
BE a.c axc (0,3)
BE a.*c axyzc (0,5)
BE a[bc]d abd (0,3)
BE a[b-d]e ace (0,3)
BE a[b-d] aac (1,3)
BE a[-b] a- (0,2)
BE a[b-] a- (0,2)
BE a] a] (0,2)
BE a[]]b a]b (0,3)
BE a[^bc]d aed (0,3)
BE a[^-b]c adc (0,3)
BE a[^]b]c adc (0,3)
E ab|cd abc (0,2)
E ab|cd abcd (0,2)
E a\(b a(b (0,3)
E a\(*b ab (0,2)
E a\(*b a((b (0,4)
E ((a)) abc (0,1)(0,1)(0,1)
E (a)b(c) abc (0,3)(0,1)(2,3)
E a+b+c aabbabc (4,7)
E a* aaa (0,3)
E (a*)* - (0,0)(0,0)
E (a*)+ - (0,0)(0,0)
E (a*|b)* - (0,0)(0,0)
E (a+|b)* ab (0,2)(1,2)
E (a+|b)+ ab (0,2)(1,2)
E (a+|b)? ab (0,1)(0,1)
BE [^ab]* cde (0,3)
E (^)* - (0,0)(0,0)
BE a* NULL (0,0)
E ([abc])*d abbbcd (0,6)(4,5)
E ([abc])*bcd abcd (0,4)(0,1)
E a|b|c|d|e e (0,1)
E (a|b|c|d|e)f ef (0,2)(0,1)
E ((a*|b))* - (0,0)(0,0)(0,0)
BE abcd*efg abcdefg (0,7)
BE ab* xabyabbbz (1,3)
BE ab* xayabbbz (1,2)
E (ab|cd)e abcde (2,5)(2,4)
BE [abhgefdc]ij hij (0,3)
E (a|b)c*d abcd (1,4)(1,2)
E (ab|ab*)bc abc (0,3)(0,1)
E a([bc]*)c* abc (0,3)(1,3)
E a([bc]*)(c*d) abcd (0,4)(1,3)(3,4)
E a([bc]+)(c*d) abcd (0,4)(1,3)(3,4)
E a([bc]*)(c+d) abcd (0,4)(1,2)(2,4)
E a[bcd]*dcdcde adcdcde (0,7)
E (ab|a)b*c abc (0,3)(0,2)
E ((a)(b)c)(d) abcd (0,4)(0,3)(0,1)(1,2)(3,4)
BE [A-Za-z_][A-Za-z0-9_]* alpha (0,5)
E ^a(bc+|b[eh])g|.h$ abh (1,3)
E (bc+d$|ef*g.|h?i(j|k)) effgz (0,5)(0,5)
E (bc+d$|ef*g.|h?i(j|k)) ij (0,2)(0,2)(1,2)
E (bc+d$|ef*g.|h?i(j|k)) reffgz (1,6)(1,6)
E (((((((((a))))))))) a (0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)
BE multiple words multiple words yeah (0,14)
E (.*)c(.*) abcde (0,5)(0,2)(3,5)
BE abcd abcd (0,4)
E a(bc)d abcd (0,4)(1,3)
E a[-]?c ac (0,3)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qaddafi (0,15)(?,?)(10,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mo'ammar Gadhafi (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Kaddafi (0,15)(?,?)(10,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qadhafi (0,15)(?,?)(10,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gadafi (0,14)(?,?)(10,11)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadafi (0,15)(?,?)(11,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moamar Gaddafi (0,14)(?,?)(9,11)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadhdhafi (0,18)(?,?)(13,15)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Khaddafi (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafy (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghadafi (0,15)(?,?)(11,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafi (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muamar Kaddafi (0,14)(?,?)(9,11)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Quathafi (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gheddafi (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Khadafy (0,15)(?,?)(11,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Qudhafi (0,15)(?,?)(10,12)
E a+(b|c)*d+ aabcdd (0,6)(3,4)
E ^.+$ vivi (0,4)
E ^(.+)$ vivi (0,4)(0,4)
E ^([^!.]+).att.com!(.+)$ gryphon.att.com!eby (0,19)(0,7)(16,19)
E ^([^!]+!)?([^!]+)$ bas (0,3)(?,?)(0,3)
E ^([^!]+!)?([^!]+)$ bar!bas (0,7)(0,4)(4,7)
E ^([^!]+!)?([^!]+)$ foo!bas (0,7)(0,4)(4,7)
E ^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(4,8)(8,11)
E ((foo)|(bar))!bas bar!bas (0,7)(0,3)(?,?)(0,3)
E ((foo)|(bar))!bas foo!bar!bas (4,11)(4,7)(?,?)(4,7)
E ((foo)|(bar))!bas foo!bas (0,7)(0,3)(0,3)
E ((foo)|bar)!bas bar!bas (0,7)(0,3)
E ((foo)|bar)!bas foo!bar!bas (4,11)(4,7)
E ((foo)|bar)!bas foo!bas (0,7)(0,3)(0,3)
E (foo|(bar))!bas bar!bas (0,7)(0,3)(0,3)
E (foo|(bar))!bas foo!bar!bas (4,11)(4,7)(4,7)
E (foo|(bar))!bas foo!bas (0,7)(0,3)
E (foo|bar)!bas bar!bas (0,7)(0,3)
E (foo|bar)!bas foo!bar!bas (4,11)(4,7)
E (foo|bar)!bas foo!bas (0,7)(0,3)
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bas (0,3)(?,?)(0,3)
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bar!bas (0,7)(0,4)(4,7)
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(?,?)(?,?)(4,8)(8,11)
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bas (0,7)(0,4)(4,7)
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bas (0,3)(0,3)(?,?)(0,3)
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bar!bas (0,7)(0,7)(0,4)(4,7)
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bas (0,7)(0,7)(0,4)(4,7)
E .*(/XXX).* /XXX (0,4)(0,4)
E .*(\\XXX).* \XXX (0,4)(0,4)
E \\XXX \XXX (0,4)
E .*(/000).* /000 (0,4)(0,4)
E .*(\\000).* \000 (0,4)(0,4)
E \\000 \000 (0,4)

View File

@@ -0,0 +1,74 @@
NOTE null subexpression matches : 2002-06-06
E (a*)* a (0,1)(0,1)
E SAME x (0,0)(0,0)
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E (a*)+ a (0,1)(0,1)
E SAME x (0,0)(0,0)
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E (a+)* a (0,1)(0,1)
E SAME x (0,0)
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E (a+)+ a (0,1)(0,1)
E SAME x NOMATCH
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E ([a]*)* a (0,1)(0,1)
E SAME x (0,0)(0,0)
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E ([a]*)+ a (0,1)(0,1)
E SAME x (0,0)(0,0)
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E ([^b]*)* a (0,1)(0,1)
E SAME b (0,0)(0,0)
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaab (0,6)(0,6)
E ([ab]*)* a (0,1)(0,1)
E SAME aaaaaa (0,6)(0,6)
E SAME ababab (0,6)(0,6)
E SAME bababa (0,6)(0,6)
E SAME b (0,1)(0,1)
E SAME bbbbbb (0,6)(0,6)
E SAME aaaabcde (0,5)(0,5)
E ([^a]*)* b (0,1)(0,1)
E SAME bbbbbb (0,6)(0,6)
E SAME aaaaaa (0,0)(0,0)
E ([^ab]*)* ccccxx (0,6)(0,6)
E SAME ababab (0,0)(0,0)
#E ((z)+|a)* zabcde (0,2)(1,2)
E ((z)+|a)* zabcde (0,2)(1,2)(0,1) Rust
#{E a+? aaaaaa (0,1) no *? +? minimal match ops
#E (a) aaa (0,1)(0,1)
#E (a*?) aaa (0,0)(0,0)
#E (a)*? aaa (0,0)
#E (a*?)*? aaa (0,0)
#}
B \(a*\)*\(x\) x (0,1)(0,0)(0,1)
B \(a*\)*\(x\) ax (0,2)(0,1)(1,2)
B \(a*\)*\(x\) axa (0,2)(0,1)(1,2)
B \(a*\)*\(x\)\(\1\) x (0,1)(0,0)(0,1)(1,1)
B \(a*\)*\(x\)\(\1\) ax (0,2)(1,1)(1,2)(2,2)
B \(a*\)*\(x\)\(\1\) axa (0,3)(0,1)(1,2)(2,3)
B \(a*\)*\(x\)\(\1\)\(x\) axax (0,4)(0,1)(1,2)(2,3)(3,4)
B \(a*\)*\(x\)\(\1\)\(x\) axxa (0,3)(1,1)(1,2)(2,2)(2,3)
E (a*)*(x) x (0,1)(0,0)(0,1)
E (a*)*(x) ax (0,2)(0,1)(1,2)
E (a*)*(x) axa (0,2)(0,1)(1,2)
E (a*)+(x) x (0,1)(0,0)(0,1)
E (a*)+(x) ax (0,2)(0,1)(1,2)
E (a*)+(x) axa (0,2)(0,1)(1,2)
E (a*){2}(x) x (0,1)(0,0)(0,1)
E (a*){2}(x) ax (0,2)(1,1)(1,2)
E (a*){2}(x) axa (0,2)(1,1)(1,2)

View File

@@ -0,0 +1,169 @@
NOTE implicit vs. explicit repetitions : 2009-02-02
# Glenn Fowler <gsf@research.att.com>
# conforming matches (column 4) must match one of the following BREs
# NOMATCH
# (0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)*
# (0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)*
# i.e., each 3-tuple has two identical elements and one (?,?)
E ((..)|(.)) NULL NOMATCH
E ((..)|(.))((..)|(.)) NULL NOMATCH
E ((..)|(.))((..)|(.))((..)|(.)) NULL NOMATCH
E ((..)|(.)){1} NULL NOMATCH
E ((..)|(.)){2} NULL NOMATCH
E ((..)|(.)){3} NULL NOMATCH
E ((..)|(.))* NULL (0,0)
E ((..)|(.)) a (0,1)(0,1)(?,?)(0,1)
E ((..)|(.))((..)|(.)) a NOMATCH
E ((..)|(.))((..)|(.))((..)|(.)) a NOMATCH
E ((..)|(.)){1} a (0,1)(0,1)(?,?)(0,1)
E ((..)|(.)){2} a NOMATCH
E ((..)|(.)){3} a NOMATCH
E ((..)|(.))* a (0,1)(0,1)(?,?)(0,1)
E ((..)|(.)) aa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.))((..)|(.)) aa (0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)
E ((..)|(.))((..)|(.))((..)|(.)) aa NOMATCH
E ((..)|(.)){1} aa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.)){2} aa (0,2)(1,2)(?,?)(1,2)
E ((..)|(.)){3} aa NOMATCH
E ((..)|(.))* aa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.)) aaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.))((..)|(.)) aaa (0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)
E ((..)|(.))((..)|(.))((..)|(.)) aaa (0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3)
E ((..)|(.)){1} aaa (0,2)(0,2)(0,2)(?,?)
#E ((..)|(.)){2} aaa (0,3)(2,3)(?,?)(2,3)
E ((..)|(.)){2} aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
E ((..)|(.)){3} aaa (0,3)(2,3)(?,?)(2,3)
#E ((..)|(.))* aaa (0,3)(2,3)(?,?)(2,3)
E ((..)|(.))* aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
E ((..)|(.)) aaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
E ((..)|(.))((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4)
E ((..)|(.)){1} aaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.)){2} aaaa (0,4)(2,4)(2,4)(?,?)
#E ((..)|(.)){3} aaaa (0,4)(3,4)(?,?)(3,4)
E ((..)|(.)){3} aaaa (0,4)(3,4)(0,2)(3,4) RE2/Go
E ((..)|(.))* aaaa (0,4)(2,4)(2,4)(?,?)
E ((..)|(.)) aaaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.))((..)|(.)) aaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
E ((..)|(.))((..)|(.))((..)|(.)) aaaaa (0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5)
E ((..)|(.)){1} aaaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.)){2} aaaaa (0,4)(2,4)(2,4)(?,?)
#E ((..)|(.)){3} aaaaa (0,5)(4,5)(?,?)(4,5)
E ((..)|(.)){3} aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
#E ((..)|(.))* aaaaa (0,5)(4,5)(?,?)(4,5)
E ((..)|(.))* aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
E ((..)|(.)) aaaaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.))((..)|(.)) aaaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
E ((..)|(.))((..)|(.))((..)|(.)) aaaaaa (0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?)
E ((..)|(.)){1} aaaaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.)){2} aaaaaa (0,4)(2,4)(2,4)(?,?)
E ((..)|(.)){3} aaaaaa (0,6)(4,6)(4,6)(?,?)
E ((..)|(.))* aaaaaa (0,6)(4,6)(4,6)(?,?)
NOTE additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02
# These test a bug in OS X / FreeBSD / NetBSD, and libtree.
# Linux/GLIBC gets the {8,} and {8,8} wrong.
:HA#100:E X(.?){0,}Y X1234567Y (0,9)(7,8)
:HA#101:E X(.?){1,}Y X1234567Y (0,9)(7,8)
:HA#102:E X(.?){2,}Y X1234567Y (0,9)(7,8)
:HA#103:E X(.?){3,}Y X1234567Y (0,9)(7,8)
:HA#104:E X(.?){4,}Y X1234567Y (0,9)(7,8)
:HA#105:E X(.?){5,}Y X1234567Y (0,9)(7,8)
:HA#106:E X(.?){6,}Y X1234567Y (0,9)(7,8)
:HA#107:E X(.?){7,}Y X1234567Y (0,9)(7,8)
:HA#108:E X(.?){8,}Y X1234567Y (0,9)(8,8)
#:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(7,8)
:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(7,8)
:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(7,8)
:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(7,8)
:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(7,8)
:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(7,8)
:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(7,8)
:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(7,8)
:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(8,8) RE2/Go
:HA#118:E X(.?){8,8}Y X1234567Y (0,9)(8,8)
# These test a fixed bug in my regex-tdfa that did not keep the expanded
# form properly grouped, so right association did the wrong thing with
# these ambiguous patterns (crafted just to test my code when I became
# suspicious of my implementation). The first subexpression should use
# "ab" then "a" then "bcd".
# OS X / FreeBSD / NetBSD badly fail many of these, with impossible
# results like (0,6)(4,5)(6,6).
#:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,1)(0,1)(1,1) Rust
#:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,1)(0,1)(1,1) Rust
:HA#262:E (a|ab|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#263:E (a|ab|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#264:E (a|ab|c|bcd){4,}(d*) ababcd NOMATCH
#:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,1)(0,1)(1,1) Rust
#:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,1)(0,1)(1,1) Rust
:HA#267:E (a|ab|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#268:E (a|ab|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#269:E (a|ab|c|bcd){4,10}(d*) ababcd NOMATCH
#:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6)
:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,1)(0,1)(1,1) Rust
#:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6)
:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,1)(0,1)(1,1) Rust
# The above worked on Linux/GLIBC but the following often fail.
# They also trip up OS X / FreeBSD / NetBSD:
#:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
:HA#284:E (ab|a|c|bcd){4,}(d*) ababcd NOMATCH
#:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
:HA#289:E (ab|a|c|bcd){4,10}(d*) ababcd NOMATCH
#:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6)
:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6)
:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(4,5)(5,6) RE2/Go

View File

@@ -0,0 +1,405 @@
# !!! DO NOT EDIT !!!
# Automatically generated by 'regex-cli generate fowler'.
# Numbers in the test names correspond to the line number of the test from
# the original dat file.
[[test]]
name = "nullsubexpr3"
regex = '''(a*)*'''
haystack = '''a'''
matches = [[[0, 1], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr4"
regex = '''(a*)*'''
haystack = '''x'''
matches = [[[0, 0], [0, 0]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr5"
regex = '''(a*)*'''
haystack = '''aaaaaa'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr6"
regex = '''(a*)*'''
haystack = '''aaaaaax'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr7"
regex = '''(a*)+'''
haystack = '''a'''
matches = [[[0, 1], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr8"
regex = '''(a*)+'''
haystack = '''x'''
matches = [[[0, 0], [0, 0]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr9"
regex = '''(a*)+'''
haystack = '''aaaaaa'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr10"
regex = '''(a*)+'''
haystack = '''aaaaaax'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr11"
regex = '''(a+)*'''
haystack = '''a'''
matches = [[[0, 1], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr12"
regex = '''(a+)*'''
haystack = '''x'''
matches = [[[0, 0], []]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr13"
regex = '''(a+)*'''
haystack = '''aaaaaa'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr14"
regex = '''(a+)*'''
haystack = '''aaaaaax'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr15"
regex = '''(a+)+'''
haystack = '''a'''
matches = [[[0, 1], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr16"
regex = '''(a+)+'''
haystack = '''x'''
matches = []
match-limit = 1
[[test]]
name = "nullsubexpr17"
regex = '''(a+)+'''
haystack = '''aaaaaa'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr18"
regex = '''(a+)+'''
haystack = '''aaaaaax'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr20"
regex = '''([a]*)*'''
haystack = '''a'''
matches = [[[0, 1], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr21"
regex = '''([a]*)*'''
haystack = '''x'''
matches = [[[0, 0], [0, 0]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr22"
regex = '''([a]*)*'''
haystack = '''aaaaaa'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr23"
regex = '''([a]*)*'''
haystack = '''aaaaaax'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr24"
regex = '''([a]*)+'''
haystack = '''a'''
matches = [[[0, 1], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr25"
regex = '''([a]*)+'''
haystack = '''x'''
matches = [[[0, 0], [0, 0]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr26"
regex = '''([a]*)+'''
haystack = '''aaaaaa'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr27"
regex = '''([a]*)+'''
haystack = '''aaaaaax'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr28"
regex = '''([^b]*)*'''
haystack = '''a'''
matches = [[[0, 1], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr29"
regex = '''([^b]*)*'''
haystack = '''b'''
matches = [[[0, 0], [0, 0]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr30"
regex = '''([^b]*)*'''
haystack = '''aaaaaa'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr31"
regex = '''([^b]*)*'''
haystack = '''aaaaaab'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr32"
regex = '''([ab]*)*'''
haystack = '''a'''
matches = [[[0, 1], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr33"
regex = '''([ab]*)*'''
haystack = '''aaaaaa'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr34"
regex = '''([ab]*)*'''
haystack = '''ababab'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr35"
regex = '''([ab]*)*'''
haystack = '''bababa'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr36"
regex = '''([ab]*)*'''
haystack = '''b'''
matches = [[[0, 1], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr37"
regex = '''([ab]*)*'''
haystack = '''bbbbbb'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr38"
regex = '''([ab]*)*'''
haystack = '''aaaabcde'''
matches = [[[0, 5], [0, 5]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr39"
regex = '''([^a]*)*'''
haystack = '''b'''
matches = [[[0, 1], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr40"
regex = '''([^a]*)*'''
haystack = '''bbbbbb'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr41"
regex = '''([^a]*)*'''
haystack = '''aaaaaa'''
matches = [[[0, 0], [0, 0]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr42"
regex = '''([^ab]*)*'''
haystack = '''ccccxx'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr43"
regex = '''([^ab]*)*'''
haystack = '''ababab'''
matches = [[[0, 0], [0, 0]]]
match-limit = 1
anchored = true
# Test added by Rust regex project.
[[test]]
name = "nullsubexpr46"
regex = '''((z)+|a)*'''
haystack = '''zabcde'''
matches = [[[0, 2], [1, 2], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr64"
regex = '''(a*)*(x)'''
haystack = '''x'''
matches = [[[0, 1], [0, 0], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr65"
regex = '''(a*)*(x)'''
haystack = '''ax'''
matches = [[[0, 2], [0, 1], [1, 2]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr66"
regex = '''(a*)*(x)'''
haystack = '''axa'''
matches = [[[0, 2], [0, 1], [1, 2]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr68"
regex = '''(a*)+(x)'''
haystack = '''x'''
matches = [[[0, 1], [0, 0], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr69"
regex = '''(a*)+(x)'''
haystack = '''ax'''
matches = [[[0, 2], [0, 1], [1, 2]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr70"
regex = '''(a*)+(x)'''
haystack = '''axa'''
matches = [[[0, 2], [0, 1], [1, 2]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr72"
regex = '''(a*){2}(x)'''
haystack = '''x'''
matches = [[[0, 1], [0, 0], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr73"
regex = '''(a*){2}(x)'''
haystack = '''ax'''
matches = [[[0, 2], [1, 1], [1, 2]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr74"
regex = '''(a*){2}(x)'''
haystack = '''axa'''
matches = [[[0, 2], [1, 1], [1, 2]]]
match-limit = 1
anchored = true

View File

@@ -0,0 +1,746 @@
# !!! DO NOT EDIT !!!
# Automatically generated by 'regex-cli generate fowler'.
# Numbers in the test names correspond to the line number of the test from
# the original dat file.
[[test]]
name = "repetition10"
regex = '''((..)|(.))'''
haystack = ''''''
matches = []
match-limit = 1
[[test]]
name = "repetition11"
regex = '''((..)|(.))((..)|(.))'''
haystack = ''''''
matches = []
match-limit = 1
[[test]]
name = "repetition12"
regex = '''((..)|(.))((..)|(.))((..)|(.))'''
haystack = ''''''
matches = []
match-limit = 1
[[test]]
name = "repetition14"
regex = '''((..)|(.)){1}'''
haystack = ''''''
matches = []
match-limit = 1
[[test]]
name = "repetition15"
regex = '''((..)|(.)){2}'''
haystack = ''''''
matches = []
match-limit = 1
[[test]]
name = "repetition16"
regex = '''((..)|(.)){3}'''
haystack = ''''''
matches = []
match-limit = 1
[[test]]
name = "repetition18"
regex = '''((..)|(.))*'''
haystack = ''''''
matches = [[[0, 0], [], [], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition20"
regex = '''((..)|(.))'''
haystack = '''a'''
matches = [[[0, 1], [0, 1], [], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition21"
regex = '''((..)|(.))((..)|(.))'''
haystack = '''a'''
matches = []
match-limit = 1
[[test]]
name = "repetition22"
regex = '''((..)|(.))((..)|(.))((..)|(.))'''
haystack = '''a'''
matches = []
match-limit = 1
[[test]]
name = "repetition24"
regex = '''((..)|(.)){1}'''
haystack = '''a'''
matches = [[[0, 1], [0, 1], [], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition25"
regex = '''((..)|(.)){2}'''
haystack = '''a'''
matches = []
match-limit = 1
[[test]]
name = "repetition26"
regex = '''((..)|(.)){3}'''
haystack = '''a'''
matches = []
match-limit = 1
[[test]]
name = "repetition28"
regex = '''((..)|(.))*'''
haystack = '''a'''
matches = [[[0, 1], [0, 1], [], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition30"
regex = '''((..)|(.))'''
haystack = '''aa'''
matches = [[[0, 2], [0, 2], [0, 2], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition31"
regex = '''((..)|(.))((..)|(.))'''
haystack = '''aa'''
matches = [[[0, 2], [0, 1], [], [0, 1], [1, 2], [], [1, 2]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition32"
regex = '''((..)|(.))((..)|(.))((..)|(.))'''
haystack = '''aa'''
matches = []
match-limit = 1
[[test]]
name = "repetition34"
regex = '''((..)|(.)){1}'''
haystack = '''aa'''
matches = [[[0, 2], [0, 2], [0, 2], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition35"
regex = '''((..)|(.)){2}'''
haystack = '''aa'''
matches = [[[0, 2], [1, 2], [], [1, 2]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition36"
regex = '''((..)|(.)){3}'''
haystack = '''aa'''
matches = []
match-limit = 1
[[test]]
name = "repetition38"
regex = '''((..)|(.))*'''
haystack = '''aa'''
matches = [[[0, 2], [0, 2], [0, 2], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition40"
regex = '''((..)|(.))'''
haystack = '''aaa'''
matches = [[[0, 2], [0, 2], [0, 2], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition41"
regex = '''((..)|(.))((..)|(.))'''
haystack = '''aaa'''
matches = [[[0, 3], [0, 2], [0, 2], [], [2, 3], [], [2, 3]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition42"
regex = '''((..)|(.))((..)|(.))((..)|(.))'''
haystack = '''aaa'''
matches = [[[0, 3], [0, 1], [], [0, 1], [1, 2], [], [1, 2], [2, 3], [], [2, 3]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition44"
regex = '''((..)|(.)){1}'''
haystack = '''aaa'''
matches = [[[0, 2], [0, 2], [0, 2], []]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition46"
regex = '''((..)|(.)){2}'''
haystack = '''aaa'''
matches = [[[0, 3], [2, 3], [0, 2], [2, 3]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition47"
regex = '''((..)|(.)){3}'''
haystack = '''aaa'''
matches = [[[0, 3], [2, 3], [], [2, 3]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition50"
regex = '''((..)|(.))*'''
haystack = '''aaa'''
matches = [[[0, 3], [2, 3], [0, 2], [2, 3]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition52"
regex = '''((..)|(.))'''
haystack = '''aaaa'''
matches = [[[0, 2], [0, 2], [0, 2], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition53"
regex = '''((..)|(.))((..)|(.))'''
haystack = '''aaaa'''
matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition54"
regex = '''((..)|(.))((..)|(.))((..)|(.))'''
haystack = '''aaaa'''
matches = [[[0, 4], [0, 2], [0, 2], [], [2, 3], [], [2, 3], [3, 4], [], [3, 4]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition56"
regex = '''((..)|(.)){1}'''
haystack = '''aaaa'''
matches = [[[0, 2], [0, 2], [0, 2], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition57"
regex = '''((..)|(.)){2}'''
haystack = '''aaaa'''
matches = [[[0, 4], [2, 4], [2, 4], []]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition59"
regex = '''((..)|(.)){3}'''
haystack = '''aaaa'''
matches = [[[0, 4], [3, 4], [0, 2], [3, 4]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition61"
regex = '''((..)|(.))*'''
haystack = '''aaaa'''
matches = [[[0, 4], [2, 4], [2, 4], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition63"
regex = '''((..)|(.))'''
haystack = '''aaaaa'''
matches = [[[0, 2], [0, 2], [0, 2], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition64"
regex = '''((..)|(.))((..)|(.))'''
haystack = '''aaaaa'''
matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition65"
regex = '''((..)|(.))((..)|(.))((..)|(.))'''
haystack = '''aaaaa'''
matches = [[[0, 5], [0, 2], [0, 2], [], [2, 4], [2, 4], [], [4, 5], [], [4, 5]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition67"
regex = '''((..)|(.)){1}'''
haystack = '''aaaaa'''
matches = [[[0, 2], [0, 2], [0, 2], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition68"
regex = '''((..)|(.)){2}'''
haystack = '''aaaaa'''
matches = [[[0, 4], [2, 4], [2, 4], []]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition70"
regex = '''((..)|(.)){3}'''
haystack = '''aaaaa'''
matches = [[[0, 5], [4, 5], [2, 4], [4, 5]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition73"
regex = '''((..)|(.))*'''
haystack = '''aaaaa'''
matches = [[[0, 5], [4, 5], [2, 4], [4, 5]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition75"
regex = '''((..)|(.))'''
haystack = '''aaaaaa'''
matches = [[[0, 2], [0, 2], [0, 2], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition76"
regex = '''((..)|(.))((..)|(.))'''
haystack = '''aaaaaa'''
matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition77"
regex = '''((..)|(.))((..)|(.))((..)|(.))'''
haystack = '''aaaaaa'''
matches = [[[0, 6], [0, 2], [0, 2], [], [2, 4], [2, 4], [], [4, 6], [4, 6], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition79"
regex = '''((..)|(.)){1}'''
haystack = '''aaaaaa'''
matches = [[[0, 2], [0, 2], [0, 2], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition80"
regex = '''((..)|(.)){2}'''
haystack = '''aaaaaa'''
matches = [[[0, 4], [2, 4], [2, 4], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition81"
regex = '''((..)|(.)){3}'''
haystack = '''aaaaaa'''
matches = [[[0, 6], [4, 6], [4, 6], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition83"
regex = '''((..)|(.))*'''
haystack = '''aaaaaa'''
matches = [[[0, 6], [4, 6], [4, 6], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive90"
regex = '''X(.?){0,}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [7, 8]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive91"
regex = '''X(.?){1,}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [7, 8]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive92"
regex = '''X(.?){2,}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [7, 8]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive93"
regex = '''X(.?){3,}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [7, 8]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive94"
regex = '''X(.?){4,}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [7, 8]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive95"
regex = '''X(.?){5,}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [7, 8]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive96"
regex = '''X(.?){6,}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [7, 8]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive97"
regex = '''X(.?){7,}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [7, 8]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive98"
regex = '''X(.?){8,}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [8, 8]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive100"
regex = '''X(.?){0,8}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [8, 8]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive102"
regex = '''X(.?){1,8}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [8, 8]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive104"
regex = '''X(.?){2,8}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [8, 8]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive106"
regex = '''X(.?){3,8}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [8, 8]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive108"
regex = '''X(.?){4,8}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [8, 8]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive110"
regex = '''X(.?){5,8}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [8, 8]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive112"
regex = '''X(.?){6,8}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [8, 8]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive114"
regex = '''X(.?){7,8}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [8, 8]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive115"
regex = '''X(.?){8,8}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [8, 8]]]
match-limit = 1
anchored = true
# Test added by Rust regex project.
[[test]]
name = "repetition-expensive127"
regex = '''(a|ab|c|bcd){0,}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 1], [0, 1], [1, 1]]]
match-limit = 1
anchored = true
# Test added by Rust regex project.
[[test]]
name = "repetition-expensive129"
regex = '''(a|ab|c|bcd){1,}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 1], [0, 1], [1, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive130"
regex = '''(a|ab|c|bcd){2,}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [3, 6], [6, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive131"
regex = '''(a|ab|c|bcd){3,}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [3, 6], [6, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive132"
regex = '''(a|ab|c|bcd){4,}(d*)'''
haystack = '''ababcd'''
matches = []
match-limit = 1
# Test added by Rust regex project.
[[test]]
name = "repetition-expensive134"
regex = '''(a|ab|c|bcd){0,10}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 1], [0, 1], [1, 1]]]
match-limit = 1
anchored = true
# Test added by Rust regex project.
[[test]]
name = "repetition-expensive136"
regex = '''(a|ab|c|bcd){1,10}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 1], [0, 1], [1, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive137"
regex = '''(a|ab|c|bcd){2,10}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [3, 6], [6, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive138"
regex = '''(a|ab|c|bcd){3,10}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [3, 6], [6, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive139"
regex = '''(a|ab|c|bcd){4,10}(d*)'''
haystack = '''ababcd'''
matches = []
match-limit = 1
# Test added by Rust regex project.
[[test]]
name = "repetition-expensive141"
regex = '''(a|ab|c|bcd)*(d*)'''
haystack = '''ababcd'''
matches = [[[0, 1], [0, 1], [1, 1]]]
match-limit = 1
anchored = true
# Test added by Rust regex project.
[[test]]
name = "repetition-expensive143"
regex = '''(a|ab|c|bcd)+(d*)'''
haystack = '''ababcd'''
matches = [[[0, 1], [0, 1], [1, 1]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive149"
regex = '''(ab|a|c|bcd){0,}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [4, 5], [5, 6]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive151"
regex = '''(ab|a|c|bcd){1,}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [4, 5], [5, 6]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive153"
regex = '''(ab|a|c|bcd){2,}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [4, 5], [5, 6]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive155"
regex = '''(ab|a|c|bcd){3,}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [4, 5], [5, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive156"
regex = '''(ab|a|c|bcd){4,}(d*)'''
haystack = '''ababcd'''
matches = []
match-limit = 1
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive158"
regex = '''(ab|a|c|bcd){0,10}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [4, 5], [5, 6]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive160"
regex = '''(ab|a|c|bcd){1,10}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [4, 5], [5, 6]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive162"
regex = '''(ab|a|c|bcd){2,10}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [4, 5], [5, 6]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive164"
regex = '''(ab|a|c|bcd){3,10}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [4, 5], [5, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive165"
regex = '''(ab|a|c|bcd){4,10}(d*)'''
haystack = '''ababcd'''
matches = []
match-limit = 1
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive167"
regex = '''(ab|a|c|bcd)*(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [4, 5], [5, 6]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive169"
regex = '''(ab|a|c|bcd)+(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [4, 5], [5, 6]]]
match-limit = 1
anchored = true

143
vendor/regex/testdata/iter.toml vendored Normal file
View File

@@ -0,0 +1,143 @@
[[test]]
name = "1"
regex = "a"
haystack = "aaa"
matches = [[0, 1], [1, 2], [2, 3]]
[[test]]
name = "2"
regex = "a"
haystack = "aba"
matches = [[0, 1], [2, 3]]
[[test]]
name = "empty1"
regex = ''
haystack = ''
matches = [[0, 0]]
[[test]]
name = "empty2"
regex = ''
haystack = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty3"
regex = '(?:)'
haystack = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty4"
regex = '(?:)*'
haystack = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty5"
regex = '(?:)+'
haystack = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty6"
regex = '(?:)?'
haystack = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty7"
regex = '(?:)(?:)'
haystack = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty8"
regex = '(?:)+|z'
haystack = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty9"
regex = 'z|(?:)+'
haystack = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty10"
regex = '(?:)+|b'
haystack = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty11"
regex = 'b|(?:)+'
haystack = 'abc'
matches = [[0, 0], [1, 2], [3, 3]]
[[test]]
name = "start1"
regex = "^a"
haystack = "a"
matches = [[0, 1]]
[[test]]
name = "start2"
regex = "^a"
haystack = "aa"
matches = [[0, 1]]
[[test]]
name = "anchored1"
regex = "a"
haystack = "a"
matches = [[0, 1]]
anchored = true
# This test is pretty subtle. It demonstrates the crucial difference between
# '^a' and 'a' compiled in 'anchored' mode. The former regex exclusively
# matches at the start of a haystack and nowhere else. The latter regex has
# no such restriction, but its automaton is constructed such that it lacks a
# `.*?` prefix. So it can actually produce matches at multiple locations.
# The anchored3 test drives this point home.
[[test]]
name = "anchored2"
regex = "a"
haystack = "aa"
matches = [[0, 1], [1, 2]]
anchored = true
# Unlikely anchored2, this test stops matching anything after it sees `b`
# since it lacks a `.*?` prefix. Since it is looking for 'a' but sees 'b', it
# determines that there are no remaining matches.
[[test]]
name = "anchored3"
regex = "a"
haystack = "aaba"
matches = [[0, 1], [1, 2]]
anchored = true
[[test]]
name = "nonempty-followedby-empty"
regex = 'abc|.*?'
haystack = "abczzz"
matches = [[0, 3], [4, 4], [5, 5], [6, 6]]
[[test]]
name = "nonempty-followedby-oneempty"
regex = 'abc|.*?'
haystack = "abcz"
matches = [[0, 3], [4, 4]]
[[test]]
name = "nonempty-followedby-onemixed"
regex = 'abc|.*?'
haystack = "abczabc"
matches = [[0, 3], [4, 7]]
[[test]]
name = "nonempty-followedby-twomixed"
regex = 'abc|.*?'
haystack = "abczzabc"
matches = [[0, 3], [4, 4], [5, 8]]

25
vendor/regex/testdata/leftmost-all.toml vendored Normal file
View File

@@ -0,0 +1,25 @@
[[test]]
name = "alt"
regex = 'foo|foobar'
haystack = "foobar"
matches = [[0, 6]]
match-kind = "all"
search-kind = "leftmost"
[[test]]
name = "multi"
regex = ['foo', 'foobar']
haystack = "foobar"
matches = [
{ id = 1, span = [0, 6] },
]
match-kind = "all"
search-kind = "leftmost"
[[test]]
name = "dotall"
regex = '(?s:.)'
haystack = "foobar"
matches = [[5, 6]]
match-kind = "all"
search-kind = "leftmost"

View File

@@ -0,0 +1,109 @@
# This tests that we can switch the line terminator to the NUL byte.
[[test]]
name = "nul"
regex = '(?m)^[a-z]+$'
haystack = '\x00abc\x00'
matches = [[1, 4]]
unescape = true
line-terminator = '\x00'
# This tests that '.' will not match the configured line terminator, but will
# match \n.
[[test]]
name = "dot-changes-with-line-terminator"
regex = '.'
haystack = '\x00\n'
matches = [[1, 2]]
unescape = true
line-terminator = '\x00'
# This tests that when we switch the line terminator, \n is no longer
# recognized as the terminator.
[[test]]
name = "not-line-feed"
regex = '(?m)^[a-z]+$'
haystack = '\nabc\n'
matches = []
unescape = true
line-terminator = '\x00'
# This tests that we can set the line terminator to a non-ASCII byte and have
# it behave as expected.
[[test]]
name = "non-ascii"
regex = '(?m)^[a-z]+$'
haystack = '\xFFabc\xFF'
matches = [[1, 4]]
unescape = true
line-terminator = '\xFF'
utf8 = false
# This tests a tricky case where the line terminator is set to \r. This ensures
# that the StartLF look-behind assertion is tracked when computing the start
# state.
[[test]]
name = "carriage"
regex = '(?m)^[a-z]+'
haystack = 'ABC\rabc'
matches = [[4, 7]]
bounds = [4, 7]
unescape = true
line-terminator = '\r'
# This tests that we can set the line terminator to a byte corresponding to a
# word character, and things work as expected.
[[test]]
name = "word-byte"
regex = '(?m)^[a-z]+$'
haystack = 'ZabcZ'
matches = [[1, 4]]
unescape = true
line-terminator = 'Z'
# This tests that we can set the line terminator to a byte corresponding to a
# non-word character, and things work as expected.
[[test]]
name = "non-word-byte"
regex = '(?m)^[a-z]+$'
haystack = '%abc%'
matches = [[1, 4]]
unescape = true
line-terminator = '%'
# This combines "set line terminator to a word byte" with a word boundary
# assertion, which should result in no match even though ^/$ matches.
[[test]]
name = "word-boundary"
regex = '(?m)^\b[a-z]+\b$'
haystack = 'ZabcZ'
matches = []
unescape = true
line-terminator = 'Z'
# Like 'word-boundary', but does an anchored search at the point where ^
# matches, but where \b should not.
[[test]]
name = "word-boundary-at"
regex = '(?m)^\b[a-z]+\b$'
haystack = 'ZabcZ'
matches = []
bounds = [1, 4]
anchored = true
unescape = true
line-terminator = 'Z'
# Like 'word-boundary-at', but flips the word boundary to a negation. This
# in particular tests a tricky case in DFA engines, where they must consider
# explicitly that a starting configuration from a custom line terminator may
# also required setting the "is from word byte" flag on a state. Otherwise,
# it's treated as "not from a word byte," which would result in \B not matching
# here when it should.
[[test]]
name = "not-word-boundary-at"
regex = '(?m)^\B[a-z]+\B$'
haystack = 'ZabcZ'
matches = [[1, 4]]
bounds = [1, 4]
anchored = true
unescape = true
line-terminator = 'Z'

99
vendor/regex/testdata/misc.toml vendored Normal file
View File

@@ -0,0 +1,99 @@
[[test]]
name = "ascii-literal"
regex = "a"
haystack = "a"
matches = [[0, 1]]
[[test]]
name = "ascii-literal-not"
regex = "a"
haystack = "z"
matches = []
[[test]]
name = "ascii-literal-anchored"
regex = "a"
haystack = "a"
matches = [[0, 1]]
anchored = true
[[test]]
name = "ascii-literal-anchored-not"
regex = "a"
haystack = "z"
matches = []
anchored = true
[[test]]
name = "anchor-start-end-line"
regex = '(?m)^bar$'
haystack = "foo\nbar\nbaz"
matches = [[4, 7]]
[[test]]
name = "prefix-literal-match"
regex = '^abc'
haystack = "abc"
matches = [[0, 3]]
[[test]]
name = "prefix-literal-match-ascii"
regex = '^abc'
haystack = "abc"
matches = [[0, 3]]
unicode = false
utf8 = false
[[test]]
name = "prefix-literal-no-match"
regex = '^abc'
haystack = "zabc"
matches = []
[[test]]
name = "one-literal-edge"
regex = 'abc'
haystack = "xxxxxab"
matches = []
[[test]]
name = "terminates"
regex = 'a$'
haystack = "a"
matches = [[0, 1]]
[[test]]
name = "suffix-100"
regex = '.*abcd'
haystack = "abcd"
matches = [[0, 4]]
[[test]]
name = "suffix-200"
regex = '.*(?:abcd)+'
haystack = "abcd"
matches = [[0, 4]]
[[test]]
name = "suffix-300"
regex = '.*(?:abcd)+'
haystack = "abcdabcd"
matches = [[0, 8]]
[[test]]
name = "suffix-400"
regex = '.*(?:abcd)+'
haystack = "abcdxabcd"
matches = [[0, 9]]
[[test]]
name = "suffix-500"
regex = '.*x(?:abcd)+'
haystack = "abcdxabcd"
matches = [[0, 9]]
[[test]]
name = "suffix-600"
regex = '[^abcd]*x(?:abcd)+'
haystack = "abcdxabcd"
matches = [[4, 9]]

845
vendor/regex/testdata/multiline.toml vendored Normal file
View File

@@ -0,0 +1,845 @@
[[test]]
name = "basic1"
regex = '(?m)^[a-z]+$'
haystack = "abc\ndef\nxyz"
matches = [[0, 3], [4, 7], [8, 11]]
[[test]]
name = "basic1-crlf"
regex = '(?Rm)^[a-z]+$'
haystack = "abc\ndef\nxyz"
matches = [[0, 3], [4, 7], [8, 11]]
[[test]]
name = "basic1-crlf-cr"
regex = '(?Rm)^[a-z]+$'
haystack = "abc\rdef\rxyz"
matches = [[0, 3], [4, 7], [8, 11]]
[[test]]
name = "basic2"
regex = '(?m)^$'
haystack = "abc\ndef\nxyz"
matches = []
[[test]]
name = "basic2-crlf"
regex = '(?Rm)^$'
haystack = "abc\ndef\nxyz"
matches = []
[[test]]
name = "basic2-crlf-cr"
regex = '(?Rm)^$'
haystack = "abc\rdef\rxyz"
matches = []
[[test]]
name = "basic3"
regex = '(?m)^'
haystack = "abc\ndef\nxyz"
matches = [[0, 0], [4, 4], [8, 8]]
[[test]]
name = "basic3-crlf"
regex = '(?Rm)^'
haystack = "abc\ndef\nxyz"
matches = [[0, 0], [4, 4], [8, 8]]
[[test]]
name = "basic3-crlf-cr"
regex = '(?Rm)^'
haystack = "abc\rdef\rxyz"
matches = [[0, 0], [4, 4], [8, 8]]
[[test]]
name = "basic4"
regex = '(?m)$'
haystack = "abc\ndef\nxyz"
matches = [[3, 3], [7, 7], [11, 11]]
[[test]]
name = "basic4-crlf"
regex = '(?Rm)$'
haystack = "abc\ndef\nxyz"
matches = [[3, 3], [7, 7], [11, 11]]
[[test]]
name = "basic4-crlf-cr"
regex = '(?Rm)$'
haystack = "abc\rdef\rxyz"
matches = [[3, 3], [7, 7], [11, 11]]
[[test]]
name = "basic5"
regex = '(?m)^[a-z]'
haystack = "abc\ndef\nxyz"
matches = [[0, 1], [4, 5], [8, 9]]
[[test]]
name = "basic5-crlf"
regex = '(?Rm)^[a-z]'
haystack = "abc\ndef\nxyz"
matches = [[0, 1], [4, 5], [8, 9]]
[[test]]
name = "basic5-crlf-cr"
regex = '(?Rm)^[a-z]'
haystack = "abc\rdef\rxyz"
matches = [[0, 1], [4, 5], [8, 9]]
[[test]]
name = "basic6"
regex = '(?m)[a-z]^'
haystack = "abc\ndef\nxyz"
matches = []
[[test]]
name = "basic6-crlf"
regex = '(?Rm)[a-z]^'
haystack = "abc\ndef\nxyz"
matches = []
[[test]]
name = "basic6-crlf-cr"
regex = '(?Rm)[a-z]^'
haystack = "abc\rdef\rxyz"
matches = []
[[test]]
name = "basic7"
regex = '(?m)[a-z]$'
haystack = "abc\ndef\nxyz"
matches = [[2, 3], [6, 7], [10, 11]]
[[test]]
name = "basic7-crlf"
regex = '(?Rm)[a-z]$'
haystack = "abc\ndef\nxyz"
matches = [[2, 3], [6, 7], [10, 11]]
[[test]]
name = "basic7-crlf-cr"
regex = '(?Rm)[a-z]$'
haystack = "abc\rdef\rxyz"
matches = [[2, 3], [6, 7], [10, 11]]
[[test]]
name = "basic8"
regex = '(?m)$[a-z]'
haystack = "abc\ndef\nxyz"
matches = []
[[test]]
name = "basic8-crlf"
regex = '(?Rm)$[a-z]'
haystack = "abc\ndef\nxyz"
matches = []
[[test]]
name = "basic8-crlf-cr"
regex = '(?Rm)$[a-z]'
haystack = "abc\rdef\rxyz"
matches = []
[[test]]
name = "basic9"
regex = '(?m)^$'
haystack = ""
matches = [[0, 0]]
[[test]]
name = "basic9-crlf"
regex = '(?Rm)^$'
haystack = ""
matches = [[0, 0]]
[[test]]
name = "repeat1"
regex = '(?m)(?:^$)*'
haystack = "a\nb\nc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
[[test]]
name = "repeat1-crlf"
regex = '(?Rm)(?:^$)*'
haystack = "a\nb\nc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
[[test]]
name = "repeat1-crlf-cr"
regex = '(?Rm)(?:^$)*'
haystack = "a\rb\rc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
[[test]]
name = "repeat1-no-multi"
regex = '(?:^$)*'
haystack = "a\nb\nc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
[[test]]
name = "repeat1-no-multi-crlf"
regex = '(?R)(?:^$)*'
haystack = "a\nb\nc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
[[test]]
name = "repeat1-no-multi-crlf-cr"
regex = '(?R)(?:^$)*'
haystack = "a\rb\rc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
[[test]]
name = "repeat2"
regex = '(?m)(?:^|a)+'
haystack = "a\naaa\n"
matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat2-crlf"
regex = '(?Rm)(?:^|a)+'
haystack = "a\naaa\n"
matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat2-crlf-cr"
regex = '(?Rm)(?:^|a)+'
haystack = "a\raaa\r"
matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat2-no-multi"
regex = '(?:^|a)+'
haystack = "a\naaa\n"
matches = [[0, 0], [2, 5]]
[[test]]
name = "repeat2-no-multi-crlf"
regex = '(?R)(?:^|a)+'
haystack = "a\naaa\n"
matches = [[0, 0], [2, 5]]
[[test]]
name = "repeat2-no-multi-crlf-cr"
regex = '(?R)(?:^|a)+'
haystack = "a\raaa\r"
matches = [[0, 0], [2, 5]]
[[test]]
name = "repeat3"
regex = '(?m)(?:^|a)*'
haystack = "a\naaa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat3-crlf"
regex = '(?Rm)(?:^|a)*'
haystack = "a\naaa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat3-crlf-cr"
regex = '(?Rm)(?:^|a)*'
haystack = "a\raaa\r"
matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat3-no-multi"
regex = '(?:^|a)*'
haystack = "a\naaa\n"
matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
[[test]]
name = "repeat3-no-multi-crlf"
regex = '(?R)(?:^|a)*'
haystack = "a\naaa\n"
matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
[[test]]
name = "repeat3-no-multi-crlf-cr"
regex = '(?R)(?:^|a)*'
haystack = "a\raaa\r"
matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
[[test]]
name = "repeat4"
regex = '(?m)(?:^|a+)'
haystack = "a\naaa\n"
matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat4-crlf"
regex = '(?Rm)(?:^|a+)'
haystack = "a\naaa\n"
matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat4-crlf-cr"
regex = '(?Rm)(?:^|a+)'
haystack = "a\raaa\r"
matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat4-no-multi"
regex = '(?:^|a+)'
haystack = "a\naaa\n"
matches = [[0, 0], [2, 5]]
[[test]]
name = "repeat4-no-multi-crlf"
regex = '(?R)(?:^|a+)'
haystack = "a\naaa\n"
matches = [[0, 0], [2, 5]]
[[test]]
name = "repeat4-no-multi-crlf-cr"
regex = '(?R)(?:^|a+)'
haystack = "a\raaa\r"
matches = [[0, 0], [2, 5]]
[[test]]
name = "repeat5"
regex = '(?m)(?:^|a*)'
haystack = "a\naaa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat5-crlf"
regex = '(?Rm)(?:^|a*)'
haystack = "a\naaa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat5-crlf-cr"
regex = '(?Rm)(?:^|a*)'
haystack = "a\raaa\r"
matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat5-no-multi"
regex = '(?:^|a*)'
haystack = "a\naaa\n"
matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
[[test]]
name = "repeat5-no-multi-crlf"
regex = '(?R)(?:^|a*)'
haystack = "a\naaa\n"
matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
[[test]]
name = "repeat5-no-multi-crlf-cr"
regex = '(?R)(?:^|a*)'
haystack = "a\raaa\r"
matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
[[test]]
name = "repeat6"
regex = '(?m)(?:^[a-z])+'
haystack = "abc\ndef\nxyz"
matches = [[0, 1], [4, 5], [8, 9]]
[[test]]
name = "repeat6-crlf"
regex = '(?Rm)(?:^[a-z])+'
haystack = "abc\ndef\nxyz"
matches = [[0, 1], [4, 5], [8, 9]]
[[test]]
name = "repeat6-crlf-cr"
regex = '(?Rm)(?:^[a-z])+'
haystack = "abc\rdef\rxyz"
matches = [[0, 1], [4, 5], [8, 9]]
[[test]]
name = "repeat6-no-multi"
regex = '(?:^[a-z])+'
haystack = "abc\ndef\nxyz"
matches = [[0, 1]]
[[test]]
name = "repeat6-no-multi-crlf"
regex = '(?R)(?:^[a-z])+'
haystack = "abc\ndef\nxyz"
matches = [[0, 1]]
[[test]]
name = "repeat6-no-multi-crlf-cr"
regex = '(?R)(?:^[a-z])+'
haystack = "abc\rdef\rxyz"
matches = [[0, 1]]
[[test]]
name = "repeat7"
regex = '(?m)(?:^[a-z]{3}\n?)+'
haystack = "abc\ndef\nxyz"
matches = [[0, 11]]
[[test]]
name = "repeat7-crlf"
regex = '(?Rm)(?:^[a-z]{3}\n?)+'
haystack = "abc\ndef\nxyz"
matches = [[0, 11]]
[[test]]
name = "repeat7-crlf-cr"
regex = '(?Rm)(?:^[a-z]{3}\r?)+'
haystack = "abc\rdef\rxyz"
matches = [[0, 11]]
[[test]]
name = "repeat7-no-multi"
regex = '(?:^[a-z]{3}\n?)+'
haystack = "abc\ndef\nxyz"
matches = [[0, 4]]
[[test]]
name = "repeat7-no-multi-crlf"
regex = '(?R)(?:^[a-z]{3}\n?)+'
haystack = "abc\ndef\nxyz"
matches = [[0, 4]]
[[test]]
name = "repeat7-no-multi-crlf-cr"
regex = '(?R)(?:^[a-z]{3}\r?)+'
haystack = "abc\rdef\rxyz"
matches = [[0, 4]]
[[test]]
name = "repeat8"
regex = '(?m)(?:^[a-z]{3}\n?)*'
haystack = "abc\ndef\nxyz"
matches = [[0, 11]]
[[test]]
name = "repeat8-crlf"
regex = '(?Rm)(?:^[a-z]{3}\n?)*'
haystack = "abc\ndef\nxyz"
matches = [[0, 11]]
[[test]]
name = "repeat8-crlf-cr"
regex = '(?Rm)(?:^[a-z]{3}\r?)*'
haystack = "abc\rdef\rxyz"
matches = [[0, 11]]
[[test]]
name = "repeat8-no-multi"
regex = '(?:^[a-z]{3}\n?)*'
haystack = "abc\ndef\nxyz"
matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]]
[[test]]
name = "repeat8-no-multi-crlf"
regex = '(?R)(?:^[a-z]{3}\n?)*'
haystack = "abc\ndef\nxyz"
matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]]
[[test]]
name = "repeat8-no-multi-crlf-cr"
regex = '(?R)(?:^[a-z]{3}\r?)*'
haystack = "abc\rdef\rxyz"
matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]]
[[test]]
name = "repeat9"
regex = '(?m)(?:\n?[a-z]{3}$)+'
haystack = "abc\ndef\nxyz"
matches = [[0, 11]]
[[test]]
name = "repeat9-crlf"
regex = '(?Rm)(?:\n?[a-z]{3}$)+'
haystack = "abc\ndef\nxyz"
matches = [[0, 11]]
[[test]]
name = "repeat9-crlf-cr"
regex = '(?Rm)(?:\r?[a-z]{3}$)+'
haystack = "abc\rdef\rxyz"
matches = [[0, 11]]
[[test]]
name = "repeat9-no-multi"
regex = '(?:\n?[a-z]{3}$)+'
haystack = "abc\ndef\nxyz"
matches = [[7, 11]]
[[test]]
name = "repeat9-no-multi-crlf"
regex = '(?R)(?:\n?[a-z]{3}$)+'
haystack = "abc\ndef\nxyz"
matches = [[7, 11]]
[[test]]
name = "repeat9-no-multi-crlf-cr"
regex = '(?R)(?:\r?[a-z]{3}$)+'
haystack = "abc\rdef\rxyz"
matches = [[7, 11]]
[[test]]
name = "repeat10"
regex = '(?m)(?:\n?[a-z]{3}$)*'
haystack = "abc\ndef\nxyz"
matches = [[0, 11]]
[[test]]
name = "repeat10-crlf"
regex = '(?Rm)(?:\n?[a-z]{3}$)*'
haystack = "abc\ndef\nxyz"
matches = [[0, 11]]
[[test]]
name = "repeat10-crlf-cr"
regex = '(?Rm)(?:\r?[a-z]{3}$)*'
haystack = "abc\rdef\rxyz"
matches = [[0, 11]]
[[test]]
name = "repeat10-no-multi"
regex = '(?:\n?[a-z]{3}$)*'
haystack = "abc\ndef\nxyz"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]]
[[test]]
name = "repeat10-no-multi-crlf"
regex = '(?R)(?:\n?[a-z]{3}$)*'
haystack = "abc\ndef\nxyz"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]]
[[test]]
name = "repeat10-no-multi-crlf-cr"
regex = '(?R)(?:\r?[a-z]{3}$)*'
haystack = "abc\rdef\rxyz"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]]
[[test]]
name = "repeat11"
regex = '(?m)^*'
haystack = "\naa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat11-crlf"
regex = '(?Rm)^*'
haystack = "\naa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat11-crlf-cr"
regex = '(?Rm)^*'
haystack = "\raa\r"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat11-no-multi"
regex = '^*'
haystack = "\naa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat11-no-multi-crlf"
regex = '(?R)^*'
haystack = "\naa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat11-no-multi-crlf-cr"
regex = '(?R)^*'
haystack = "\raa\r"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat12"
regex = '(?m)^+'
haystack = "\naa\n"
matches = [[0, 0], [1, 1], [4, 4]]
[[test]]
name = "repeat12-crlf"
regex = '(?Rm)^+'
haystack = "\naa\n"
matches = [[0, 0], [1, 1], [4, 4]]
[[test]]
name = "repeat12-crlf-cr"
regex = '(?Rm)^+'
haystack = "\raa\r"
matches = [[0, 0], [1, 1], [4, 4]]
[[test]]
name = "repeat12-no-multi"
regex = '^+'
haystack = "\naa\n"
matches = [[0, 0]]
[[test]]
name = "repeat12-no-multi-crlf"
regex = '(?R)^+'
haystack = "\naa\n"
matches = [[0, 0]]
[[test]]
name = "repeat12-no-multi-crlf-cr"
regex = '(?R)^+'
haystack = "\raa\r"
matches = [[0, 0]]
[[test]]
name = "repeat13"
regex = '(?m)$*'
haystack = "\naa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat13-crlf"
regex = '(?Rm)$*'
haystack = "\naa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat13-crlf-cr"
regex = '(?Rm)$*'
haystack = "\raa\r"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat13-no-multi"
regex = '$*'
haystack = "\naa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat13-no-multi-crlf"
regex = '(?R)$*'
haystack = "\naa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat13-no-multi-crlf-cr"
regex = '(?R)$*'
haystack = "\raa\r"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat14"
regex = '(?m)$+'
haystack = "\naa\n"
matches = [[0, 0], [3, 3], [4, 4]]
[[test]]
name = "repeat14-crlf"
regex = '(?Rm)$+'
haystack = "\naa\n"
matches = [[0, 0], [3, 3], [4, 4]]
[[test]]
name = "repeat14-crlf-cr"
regex = '(?Rm)$+'
haystack = "\raa\r"
matches = [[0, 0], [3, 3], [4, 4]]
[[test]]
name = "repeat14-no-multi"
regex = '$+'
haystack = "\naa\n"
matches = [[4, 4]]
[[test]]
name = "repeat14-no-multi-crlf"
regex = '(?R)$+'
haystack = "\naa\n"
matches = [[4, 4]]
[[test]]
name = "repeat14-no-multi-crlf-cr"
regex = '(?R)$+'
haystack = "\raa\r"
matches = [[4, 4]]
[[test]]
name = "repeat15"
regex = '(?m)(?:$\n)+'
haystack = "\n\naaa\n\n"
matches = [[0, 2], [5, 7]]
[[test]]
name = "repeat15-crlf"
regex = '(?Rm)(?:$\n)+'
haystack = "\n\naaa\n\n"
matches = [[0, 2], [5, 7]]
[[test]]
name = "repeat15-crlf-cr"
regex = '(?Rm)(?:$\r)+'
haystack = "\r\raaa\r\r"
matches = [[0, 2], [5, 7]]
[[test]]
name = "repeat15-no-multi"
regex = '(?:$\n)+'
haystack = "\n\naaa\n\n"
matches = []
[[test]]
name = "repeat15-no-multi-crlf"
regex = '(?R)(?:$\n)+'
haystack = "\n\naaa\n\n"
matches = []
[[test]]
name = "repeat15-no-multi-crlf-cr"
regex = '(?R)(?:$\r)+'
haystack = "\r\raaa\r\r"
matches = []
[[test]]
name = "repeat16"
regex = '(?m)(?:$\n)*'
haystack = "\n\naaa\n\n"
matches = [[0, 2], [3, 3], [4, 4], [5, 7]]
[[test]]
name = "repeat16-crlf"
regex = '(?Rm)(?:$\n)*'
haystack = "\n\naaa\n\n"
matches = [[0, 2], [3, 3], [4, 4], [5, 7]]
[[test]]
name = "repeat16-crlf-cr"
regex = '(?Rm)(?:$\r)*'
haystack = "\r\raaa\r\r"
matches = [[0, 2], [3, 3], [4, 4], [5, 7]]
[[test]]
name = "repeat16-no-multi"
regex = '(?:$\n)*'
haystack = "\n\naaa\n\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]]
[[test]]
name = "repeat16-no-multi-crlf"
regex = '(?R)(?:$\n)*'
haystack = "\n\naaa\n\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]]
[[test]]
name = "repeat16-no-multi-crlf-cr"
regex = '(?R)(?:$\r)*'
haystack = "\r\raaa\r\r"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]]
[[test]]
name = "repeat17"
regex = '(?m)(?:$\n^)+'
haystack = "\n\naaa\n\n"
matches = [[0, 2], [5, 7]]
[[test]]
name = "repeat17-crlf"
regex = '(?Rm)(?:$\n^)+'
haystack = "\n\naaa\n\n"
matches = [[0, 2], [5, 7]]
[[test]]
name = "repeat17-crlf-cr"
regex = '(?Rm)(?:$\r^)+'
haystack = "\r\raaa\r\r"
matches = [[0, 2], [5, 7]]
[[test]]
name = "repeat17-no-multi"
regex = '(?:$\n^)+'
haystack = "\n\naaa\n\n"
matches = []
[[test]]
name = "repeat17-no-multi-crlf"
regex = '(?R)(?:$\n^)+'
haystack = "\n\naaa\n\n"
matches = []
[[test]]
name = "repeat17-no-multi-crlf-cr"
regex = '(?R)(?:$\r^)+'
haystack = "\r\raaa\r\r"
matches = []
[[test]]
name = "repeat18"
regex = '(?m)(?:^|$)+'
haystack = "\n\naaa\n\n"
matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]]
[[test]]
name = "repeat18-crlf"
regex = '(?Rm)(?:^|$)+'
haystack = "\n\naaa\n\n"
matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]]
[[test]]
name = "repeat18-crlf-cr"
regex = '(?Rm)(?:^|$)+'
haystack = "\r\raaa\r\r"
matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]]
[[test]]
name = "repeat18-no-multi"
regex = '(?:^|$)+'
haystack = "\n\naaa\n\n"
matches = [[0, 0], [7, 7]]
[[test]]
name = "repeat18-no-multi-crlf"
regex = '(?R)(?:^|$)+'
haystack = "\n\naaa\n\n"
matches = [[0, 0], [7, 7]]
[[test]]
name = "repeat18-no-multi-crlf-cr"
regex = '(?R)(?:^|$)+'
haystack = "\r\raaa\r\r"
matches = [[0, 0], [7, 7]]
[[test]]
name = "match-line-100"
regex = '(?m)^.+$'
haystack = "aa\naaaaaaaaaaaaaaaaaaa\n"
matches = [[0, 2], [3, 22]]
[[test]]
name = "match-line-100-crlf"
regex = '(?Rm)^.+$'
haystack = "aa\naaaaaaaaaaaaaaaaaaa\n"
matches = [[0, 2], [3, 22]]
[[test]]
name = "match-line-100-crlf-cr"
regex = '(?Rm)^.+$'
haystack = "aa\raaaaaaaaaaaaaaaaaaa\r"
matches = [[0, 2], [3, 22]]
[[test]]
name = "match-line-200"
regex = '(?m)^.+$'
haystack = "aa\naaaaaaaaaaaaaaaaaaa\n"
matches = [[0, 2], [3, 22]]
unicode = false
utf8 = false
[[test]]
name = "match-line-200-crlf"
regex = '(?Rm)^.+$'
haystack = "aa\naaaaaaaaaaaaaaaaaaa\n"
matches = [[0, 2], [3, 22]]
unicode = false
utf8 = false
[[test]]
name = "match-line-200-crlf-cr"
regex = '(?Rm)^.+$'
haystack = "aa\raaaaaaaaaaaaaaaaaaa\r"
matches = [[0, 2], [3, 22]]
unicode = false
utf8 = false

222
vendor/regex/testdata/no-unicode.toml vendored Normal file
View File

@@ -0,0 +1,222 @@
[[test]]
name = "invalid-utf8-literal1"
regex = '\xFF'
haystack = '\xFF'
matches = [[0, 1]]
unicode = false
utf8 = false
unescape = true
[[test]]
name = "mixed"
regex = '(?:.+)(?-u)(?:.+)'
haystack = '\xCE\x93\xCE\x94\xFF'
matches = [[0, 5]]
utf8 = false
unescape = true
[[test]]
name = "case1"
regex = "a"
haystack = "A"
matches = [[0, 1]]
case-insensitive = true
unicode = false
[[test]]
name = "case2"
regex = "[a-z]+"
haystack = "AaAaA"
matches = [[0, 5]]
case-insensitive = true
unicode = false
[[test]]
name = "case3"
regex = "[a-z]+"
haystack = "aA\u212AaA"
matches = [[0, 7]]
case-insensitive = true
[[test]]
name = "case4"
regex = "[a-z]+"
haystack = "aA\u212AaA"
matches = [[0, 2], [5, 7]]
case-insensitive = true
unicode = false
[[test]]
name = "negate1"
regex = "[^a]"
haystack = "δ"
matches = [[0, 2]]
[[test]]
name = "negate2"
regex = "[^a]"
haystack = "δ"
matches = [[0, 1], [1, 2]]
unicode = false
utf8 = false
[[test]]
name = "dotstar-prefix1"
regex = "a"
haystack = '\xFFa'
matches = [[1, 2]]
unicode = false
utf8 = false
unescape = true
[[test]]
name = "dotstar-prefix2"
regex = "a"
haystack = '\xFFa'
matches = [[1, 2]]
utf8 = false
unescape = true
[[test]]
name = "null-bytes1"
regex = '[^\x00]+\x00'
haystack = 'foo\x00'
matches = [[0, 4]]
unicode = false
utf8 = false
unescape = true
[[test]]
name = "word-ascii"
regex = '\w+'
haystack = "aδ"
matches = [[0, 1]]
unicode = false
[[test]]
name = "word-unicode"
regex = '\w+'
haystack = "aδ"
matches = [[0, 3]]
[[test]]
name = "decimal-ascii"
regex = '\d+'
haystack = "1२३9"
matches = [[0, 1], [7, 8]]
unicode = false
[[test]]
name = "decimal-unicode"
regex = '\d+'
haystack = "1२३9"
matches = [[0, 8]]
[[test]]
name = "space-ascii"
regex = '\s+'
haystack = " \u1680"
matches = [[0, 1]]
unicode = false
[[test]]
name = "space-unicode"
regex = '\s+'
haystack = " \u1680"
matches = [[0, 4]]
[[test]]
# See: https://github.com/rust-lang/regex/issues/484
name = "iter1-bytes"
regex = ''
haystack = "☃"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
utf8 = false
[[test]]
# See: https://github.com/rust-lang/regex/issues/484
name = "iter1-utf8"
regex = ''
haystack = "☃"
matches = [[0, 0], [3, 3]]
[[test]]
# See: https://github.com/rust-lang/regex/issues/484
# Note that iter2-utf8 doesn't make sense here, since the input isn't UTF-8.
name = "iter2-bytes"
regex = ''
haystack = 'b\xFFr'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
unescape = true
utf8 = false
# These test that unanchored prefixes can munch through invalid UTF-8 even when
# utf8 is enabled.
#
# This test actually reflects an interesting simplification in how the Thompson
# NFA is constructed. It used to be that the NFA could be built with an
# unanchored prefix that either matched any byte or _only_ matched valid UTF-8.
# But the latter turns out to be pretty precarious when it comes to prefilters,
# because if you search a haystack that contains invalid UTF-8 but have an
# unanchored prefix that requires UTF-8, then prefilters are no longer a valid
# optimization because you actually have to check that everything is valid
# UTF-8.
#
# Originally, I had thought that we needed a valid UTF-8 unanchored prefix in
# order to guarantee that we only match at valid UTF-8 boundaries. But this
# isn't actually true! There are really only two things to consider here:
#
# 1) Will a regex match split an encoded codepoint? No. Because by construction,
# we ensure that a MATCH state can only be reached by following valid UTF-8 (assuming
# all of the UTF-8 modes are enabled).
#
# 2) Will a regex match arbitrary bytes that aren't valid UTF-8? Again, no,
# assuming all of the UTF-8 modes are enabled.
[[test]]
name = "unanchored-invalid-utf8-match-100"
regex = '[a-z]'
haystack = '\xFFa\xFF'
matches = [[1, 2]]
unescape = true
utf8 = false
# This test shows that we can still prevent a match from occurring by requiring
# that valid UTF-8 match by inserting our own unanchored prefix. Thus, if the
# behavior of not munching through invalid UTF-8 anywhere is needed, then it
# can be achieved thusly.
[[test]]
name = "unanchored-invalid-utf8-nomatch"
regex = '^(?s:.)*?[a-z]'
haystack = '\xFFa\xFF'
matches = []
unescape = true
utf8 = false
# This is a tricky test that makes sure we don't accidentally do a kind of
# unanchored search when we've requested that a regex engine not report
# empty matches that split a codepoint. This test caught a regression during
# development where the code for skipping over bad empty matches would do so
# even if the search should have been anchored. This is ultimately what led to
# making 'anchored' an 'Input' option, so that it was always clear what kind
# of search was being performed. (Before that, whether a search was anchored
# or not was a config knob on the regex engine.) This did wind up making DFAs
# a little more complex to configure (with their 'StartKind' knob), but it
# generally smoothed out everything else.
#
# Great example of a test whose failure motivated a sweeping API refactoring.
[[test]]
name = "anchored-iter-empty-utf8"
regex = ''
haystack = 'a☃z'
matches = [[0, 0], [1, 1]]
unescape = false
utf8 = true
anchored = true

280
vendor/regex/testdata/overlapping.toml vendored Normal file
View File

@@ -0,0 +1,280 @@
# NOTE: We define a number of tests where the *match* kind is 'leftmost-first'
# but the *search* kind is 'overlapping'. This is a somewhat nonsensical
# combination and can produce odd results. Nevertheless, those results should
# be consistent so we test them here. (At the time of writing this note, I
# hadn't yet decided whether to make 'leftmost-first' with 'overlapping' result
# in unspecified behavior.)
# This demonstrates how a full overlapping search is obvious quadratic. This
# regex reports a match for every substring in the haystack.
[[test]]
name = "ungreedy-dotstar-matches-everything-100"
regex = [".*?"]
haystack = "zzz"
matches = [
{ id = 0, span = [0, 0] },
{ id = 0, span = [1, 1] },
{ id = 0, span = [0, 1] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [1, 2] },
{ id = 0, span = [0, 2] },
{ id = 0, span = [3, 3] },
{ id = 0, span = [2, 3] },
{ id = 0, span = [1, 3] },
{ id = 0, span = [0, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "greedy-dotstar-matches-everything-100"
regex = [".*"]
haystack = "zzz"
matches = [
{ id = 0, span = [0, 0] },
{ id = 0, span = [1, 1] },
{ id = 0, span = [0, 1] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [1, 2] },
{ id = 0, span = [0, 2] },
{ id = 0, span = [3, 3] },
{ id = 0, span = [2, 3] },
{ id = 0, span = [1, 3] },
{ id = 0, span = [0, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "repetition-plus-leftmost-first-100"
regex = 'a+'
haystack = "aaa"
matches = [[0, 1], [1, 2], [0, 2], [2, 3], [1, 3], [0, 3]]
match-kind = "leftmost-first"
search-kind = "overlapping"
[[test]]
name = "repetition-plus-leftmost-first-110"
regex = '☃+'
haystack = "☃☃☃"
matches = [[0, 3], [3, 6], [0, 6], [6, 9], [3, 9], [0, 9]]
match-kind = "leftmost-first"
search-kind = "overlapping"
[[test]]
name = "repetition-plus-all-100"
regex = 'a+'
haystack = "aaa"
matches = [[0, 1], [1, 2], [0, 2], [2, 3], [1, 3], [0, 3]]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "repetition-plus-all-110"
regex = '☃+'
haystack = "☃☃☃"
matches = [[0, 3], [3, 6], [0, 6], [6, 9], [3, 9], [0, 9]]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "repetition-plus-leftmost-first-200"
regex = '(abc)+'
haystack = "zzabcabczzabc"
matches = [
[[2, 5], [2, 5]],
[[5, 8], [5, 8]],
[[2, 8], [5, 8]],
]
match-kind = "leftmost-first"
search-kind = "overlapping"
[[test]]
name = "repetition-plus-all-200"
regex = '(abc)+'
haystack = "zzabcabczzabc"
matches = [
[[2, 5], [2, 5]],
[[5, 8], [5, 8]],
[[2, 8], [5, 8]],
[[10, 13], [10, 13]],
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "repetition-star-leftmost-first-100"
regex = 'a*'
haystack = "aaa"
matches = [
[0, 0],
[1, 1],
[0, 1],
[2, 2],
[1, 2],
[0, 2],
[3, 3],
[2, 3],
[1, 3],
[0, 3],
]
match-kind = "leftmost-first"
search-kind = "overlapping"
[[test]]
name = "repetition-star-all-100"
regex = 'a*'
haystack = "aaa"
matches = [
[0, 0],
[1, 1],
[0, 1],
[2, 2],
[1, 2],
[0, 2],
[3, 3],
[2, 3],
[1, 3],
[0, 3],
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "repetition-star-leftmost-first-200"
regex = '(abc)*'
haystack = "zzabcabczzabc"
matches = [
[[0, 0], []],
]
match-kind = "leftmost-first"
search-kind = "overlapping"
[[test]]
name = "repetition-star-all-200"
regex = '(abc)*'
haystack = "zzabcabczzabc"
matches = [
[[0, 0], []],
[[1, 1], []],
[[2, 2], []],
[[3, 3], []],
[[4, 4], []],
[[5, 5], []],
[[2, 5], [2, 5]],
[[6, 6], []],
[[7, 7], []],
[[8, 8], []],
[[5, 8], [5, 8]],
[[2, 8], [5, 8]],
[[9, 9], []],
[[10, 10], []],
[[11, 11], []],
[[12, 12], []],
[[13, 13], []],
[[10, 13], [10, 13]],
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "start-end-rep-leftmost-first"
regex = '(^$)*'
haystack = "abc"
matches = [
[[0, 0], []],
]
match-kind = "leftmost-first"
search-kind = "overlapping"
[[test]]
name = "start-end-rep-all"
regex = '(^$)*'
haystack = "abc"
matches = [
[[0, 0], []],
[[1, 1], []],
[[2, 2], []],
[[3, 3], []],
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "alt-leftmost-first-100"
regex = 'abc|a'
haystack = "zzabcazzaabc"
matches = [[2, 3], [2, 5]]
match-kind = "leftmost-first"
search-kind = "overlapping"
[[test]]
name = "alt-all-100"
regex = 'abc|a'
haystack = "zzabcazzaabc"
matches = [[2, 3], [2, 5], [5, 6], [8, 9], [9, 10], [9, 12]]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty-000"
regex = ""
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty-alt-000"
regex = "|b"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [1, 2], [3, 3]]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty-alt-010"
regex = "b|"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [1, 2], [3, 3]]
match-kind = "all"
search-kind = "overlapping"
[[test]]
# See: https://github.com/rust-lang/regex/issues/484
name = "iter1-bytes"
regex = ''
haystack = "☃"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
utf8 = false
match-kind = "all"
search-kind = "overlapping"
[[test]]
# See: https://github.com/rust-lang/regex/issues/484
name = "iter1-utf8"
regex = ''
haystack = "☃"
matches = [[0, 0], [3, 3]]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "iter1-incomplete-utf8"
regex = ''
haystack = '\xE2\x98' # incomplete snowman
matches = [[0, 0], [1, 1], [2, 2]]
match-kind = "all"
search-kind = "overlapping"
unescape = true
utf8 = false
[[test]]
name = "scratch"
regex = ['sam', 'samwise']
haystack = "samwise"
matches = [
{ id = 0, span = [0, 3] },
]
match-kind = "leftmost-first"
search-kind = "overlapping"

98
vendor/regex/testdata/regex-lite.toml vendored Normal file
View File

@@ -0,0 +1,98 @@
# These tests are specifically written to test the regex-lite crate. While it
# largely has the same semantics as the regex crate, there are some differences
# around Unicode support and UTF-8.
#
# To be clear, regex-lite supports far fewer patterns because of its lack of
# Unicode support, nested character classes and character class set operations.
# What we're talking about here are the patterns that both crates support but
# where the semantics might differ.
# regex-lite uses ASCII definitions for Perl character classes.
[[test]]
name = "perl-class-decimal"
regex = '\d'
haystack = '᠕'
matches = []
unicode = true
# regex-lite uses ASCII definitions for Perl character classes.
[[test]]
name = "perl-class-space"
regex = '\s'
haystack = "\u2000"
matches = []
unicode = true
# regex-lite uses ASCII definitions for Perl character classes.
[[test]]
name = "perl-class-word"
regex = '\w'
haystack = 'δ'
matches = []
unicode = true
# regex-lite uses the ASCII definition of word for word boundary assertions.
[[test]]
name = "word-boundary"
regex = '\b'
haystack = 'δ'
matches = []
unicode = true
# regex-lite uses the ASCII definition of word for negated word boundary
# assertions. But note that it should still not split codepoints!
[[test]]
name = "word-boundary-negated"
regex = '\B'
haystack = 'δ'
matches = [[0, 0], [2, 2]]
unicode = true
# While we're here, the empty regex---which matches at every
# position---shouldn't split a codepoint either.
[[test]]
name = "empty-no-split-codepoint"
regex = ''
haystack = '💩'
matches = [[0, 0], [4, 4]]
unicode = true
# A dot always matches a full codepoint.
[[test]]
name = "dot-always-matches-codepoint"
regex = '.'
haystack = '💩'
matches = [[0, 4]]
unicode = false
# A negated character class also always matches a full codepoint.
[[test]]
name = "negated-class-always-matches-codepoint"
regex = '[^a]'
haystack = '💩'
matches = [[0, 4]]
unicode = false
# regex-lite only supports ASCII-aware case insensitive matching.
[[test]]
name = "case-insensitive-is-ascii-only"
regex = 's'
haystack = 'ſ'
matches = []
unicode = true
case-insensitive = true
# Negated word boundaries shouldn't split a codepoint, but they will match
# between invalid UTF-8.
#
# This test is only valid for a 'bytes' API, but that doesn't (yet) exist in
# regex-lite. This can't happen in the main API because &str can't contain
# invalid UTF-8.
# [[test]]
# name = "word-boundary-invalid-utf8"
# regex = '\B'
# haystack = '\xFF\xFF\xFF\xFF'
# unescape = true
# matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
# unicode = true
# utf8 = false

830
vendor/regex/testdata/regression.toml vendored Normal file
View File

@@ -0,0 +1,830 @@
# See: https://github.com/rust-lang/regex/issues/48
[[test]]
name = "invalid-regex-no-crash-100"
regex = '(*)'
haystack = ""
matches = []
compiles = false
# See: https://github.com/rust-lang/regex/issues/48
[[test]]
name = "invalid-regex-no-crash-200"
regex = '(?:?)'
haystack = ""
matches = []
compiles = false
# See: https://github.com/rust-lang/regex/issues/48
[[test]]
name = "invalid-regex-no-crash-300"
regex = '(?)'
haystack = ""
matches = []
compiles = false
# See: https://github.com/rust-lang/regex/issues/48
[[test]]
name = "invalid-regex-no-crash-400"
regex = '*'
haystack = ""
matches = []
compiles = false
# See: https://github.com/rust-lang/regex/issues/75
[[test]]
name = "unsorted-binary-search-100"
regex = '(?i-u)[a_]+'
haystack = "A_"
matches = [[0, 2]]
# See: https://github.com/rust-lang/regex/issues/75
[[test]]
name = "unsorted-binary-search-200"
regex = '(?i-u)[A_]+'
haystack = "a_"
matches = [[0, 2]]
# See: https://github.com/rust-lang/regex/issues/76
[[test]]
name = "unicode-case-lower-nocase-flag"
regex = '(?i)\p{Ll}+'
haystack = "ΛΘΓΔα"
matches = [[0, 10]]
# See: https://github.com/rust-lang/regex/issues/99
[[test]]
name = "negated-char-class-100"
regex = '(?i)[^x]'
haystack = "x"
matches = []
# See: https://github.com/rust-lang/regex/issues/99
[[test]]
name = "negated-char-class-200"
regex = '(?i)[^x]'
haystack = "X"
matches = []
# See: https://github.com/rust-lang/regex/issues/101
[[test]]
name = "ascii-word-underscore"
regex = '[[:word:]]'
haystack = "_"
matches = [[0, 1]]
# See: https://github.com/rust-lang/regex/issues/129
[[test]]
name = "captures-repeat"
regex = '([a-f]){2}(?P<foo>[x-z])'
haystack = "abx"
matches = [
[[0, 3], [1, 2], [2, 3]],
]
# See: https://github.com/rust-lang/regex/issues/153
[[test]]
name = "alt-in-alt-100"
regex = 'ab?|$'
haystack = "az"
matches = [[0, 1], [2, 2]]
# See: https://github.com/rust-lang/regex/issues/153
[[test]]
name = "alt-in-alt-200"
regex = '^(?:.*?)(?:\n|\r\n?|$)'
haystack = "ab\rcd"
matches = [[0, 3]]
# See: https://github.com/rust-lang/regex/issues/169
[[test]]
name = "leftmost-first-prefix"
regex = 'z*azb'
haystack = "azb"
matches = [[0, 3]]
# See: https://github.com/rust-lang/regex/issues/191
[[test]]
name = "many-alternates"
regex = '1|2|3|4|5|6|7|8|9|10|int'
haystack = "int"
matches = [[0, 3]]
# See: https://github.com/rust-lang/regex/issues/204
[[test]]
name = "word-boundary-alone-100"
regex = '\b'
haystack = "Should this (work?)"
matches = [[0, 0], [6, 6], [7, 7], [11, 11], [13, 13], [17, 17]]
# See: https://github.com/rust-lang/regex/issues/204
[[test]]
name = "word-boundary-alone-200"
regex = '\b'
haystack = "a b c"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
# See: https://github.com/rust-lang/regex/issues/264
[[test]]
name = "word-boundary-ascii-no-capture"
regex = '\B'
haystack = "\U00028F3E"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
unicode = false
utf8 = false
# See: https://github.com/rust-lang/regex/issues/264
[[test]]
name = "word-boundary-ascii-capture"
regex = '(?:\B)'
haystack = "\U00028F3E"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
unicode = false
utf8 = false
# See: https://github.com/rust-lang/regex/issues/268
[[test]]
name = "partial-anchor"
regex = '^a|b'
haystack = "ba"
matches = [[0, 1]]
# See: https://github.com/rust-lang/regex/issues/271
[[test]]
name = "endl-or-word-boundary"
regex = '(?m:$)|(?-u:\b)'
haystack = "\U0006084E"
matches = [[4, 4]]
# See: https://github.com/rust-lang/regex/issues/271
[[test]]
name = "zero-or-end"
regex = '(?i-u:\x00)|$'
haystack = "\U000E682F"
matches = [[4, 4]]
# See: https://github.com/rust-lang/regex/issues/271
[[test]]
name = "y-or-endl"
regex = '(?i-u:y)|(?m:$)'
haystack = "\U000B4331"
matches = [[4, 4]]
# See: https://github.com/rust-lang/regex/issues/271
[[test]]
name = "word-boundary-start-x"
regex = '(?u:\b)^(?-u:X)'
haystack = "X"
matches = [[0, 1]]
# See: https://github.com/rust-lang/regex/issues/271
[[test]]
name = "word-boundary-ascii-start-x"
regex = '(?-u:\b)^(?-u:X)'
haystack = "X"
matches = [[0, 1]]
# See: https://github.com/rust-lang/regex/issues/271
[[test]]
name = "end-not-word-boundary"
regex = '$\B'
haystack = "\U0005C124\U000B576C"
matches = [[8, 8]]
unicode = false
utf8 = false
# See: https://github.com/rust-lang/regex/issues/280
[[test]]
name = "partial-anchor-alternate-begin"
regex = '^a|z'
haystack = "yyyyya"
matches = []
# See: https://github.com/rust-lang/regex/issues/280
[[test]]
name = "partial-anchor-alternate-end"
regex = 'a$|z'
haystack = "ayyyyy"
matches = []
# See: https://github.com/rust-lang/regex/issues/289
[[test]]
name = "lits-unambiguous-100"
regex = '(?:ABC|CDA|BC)X'
haystack = "CDAX"
matches = [[0, 4]]
# See: https://github.com/rust-lang/regex/issues/291
[[test]]
name = "lits-unambiguous-200"
regex = '((IMG|CAM|MG|MB2)_|(DSCN|CIMG))(?P<n>[0-9]+)$'
haystack = "CIMG2341"
matches = [
[[0, 8], [0, 4], [], [0, 4], [4, 8]],
]
# See: https://github.com/rust-lang/regex/issues/303
#
# 2022-09-19: This has now been "properly" fixed in that empty character
# classes are fully supported as something that can never match. This test
# used to be marked as 'compiles = false', but now it works.
[[test]]
name = "negated-full-byte-range"
regex = '[^\x00-\xFF]'
haystack = ""
matches = []
compiles = true
unicode = false
utf8 = false
# See: https://github.com/rust-lang/regex/issues/321
[[test]]
name = "strange-anchor-non-complete-prefix"
regex = 'a^{2}'
haystack = ""
matches = []
# See: https://github.com/rust-lang/regex/issues/321
[[test]]
name = "strange-anchor-non-complete-suffix"
regex = '${2}a'
haystack = ""
matches = []
# See: https://github.com/rust-lang/regex/issues/334
# See: https://github.com/rust-lang/regex/issues/557
[[test]]
name = "captures-after-dfa-premature-end-100"
regex = 'a(b*(X|$))?'
haystack = "abcbX"
matches = [
[[0, 1], [], []],
]
# See: https://github.com/rust-lang/regex/issues/334
# See: https://github.com/rust-lang/regex/issues/557
[[test]]
name = "captures-after-dfa-premature-end-200"
regex = 'a(bc*(X|$))?'
haystack = "abcbX"
matches = [
[[0, 1], [], []],
]
# See: https://github.com/rust-lang/regex/issues/334
# See: https://github.com/rust-lang/regex/issues/557
[[test]]
name = "captures-after-dfa-premature-end-300"
regex = '(aa$)?'
haystack = "aaz"
matches = [
[[0, 0], []],
[[1, 1], []],
[[2, 2], []],
[[3, 3], []],
]
# Plucked from "Why arent regular expressions a lingua franca? an empirical
# study on the re-use and portability of regular expressions", The ACM Joint
# European Software Engineering Conference and Symposium on the Foundations of
# Software Engineering (ESEC/FSE), 2019.
#
# Link: https://dl.acm.org/doi/pdf/10.1145/3338906.3338909
[[test]]
name = "captures-after-dfa-premature-end-400"
regex = '(a)\d*\.?\d+\b'
haystack = "a0.0c"
matches = [
[[0, 2], [0, 1]],
]
# See: https://github.com/rust-lang/regex/issues/437
[[test]]
name = "literal-panic"
regex = 'typename type\-parameter\-[0-9]+\-[0-9]+::.+'
haystack = "test"
matches = []
# See: https://github.com/rust-lang/regex/issues/527
[[test]]
name = "empty-flag-expr"
regex = '(?:(?:(?x)))'
haystack = ""
matches = [[0, 0]]
# See: https://github.com/rust-lang/regex/issues/533
#[[tests]]
#name = "blank-matches-nothing-between-space-and-tab"
#regex = '[[:blank:]]'
#input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F'
#match = false
#unescape = true
# See: https://github.com/rust-lang/regex/issues/533
#[[tests]]
#name = "blank-matches-nothing-between-space-and-tab-inverted"
#regex = '^[[:^blank:]]+$'
#input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F'
#match = true
#unescape = true
# See: https://github.com/rust-lang/regex/issues/555
[[test]]
name = "invalid-repetition"
regex = '(?m){1,1}'
haystack = ""
matches = []
compiles = false
# See: https://github.com/rust-lang/regex/issues/640
[[test]]
name = "flags-are-unset"
regex = '(?:(?i)foo)|Bar'
haystack = "foo Foo bar Bar"
matches = [[0, 3], [4, 7], [12, 15]]
# Note that 'Ј' is not 'j', but cyrillic Je
# https://en.wikipedia.org/wiki/Je_(Cyrillic)
#
# See: https://github.com/rust-lang/regex/issues/659
[[test]]
name = "empty-group-with-unicode"
regex = '(?:)Ј01'
haystack = 'zЈ01'
matches = [[1, 5]]
# See: https://github.com/rust-lang/regex/issues/579
[[test]]
name = "word-boundary-weird"
regex = '\b..\b'
haystack = "I have 12, he has 2!"
matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]]
# See: https://github.com/rust-lang/regex/issues/579
[[test]]
name = "word-boundary-weird-ascii"
regex = '\b..\b'
haystack = "I have 12, he has 2!"
matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]]
unicode = false
utf8 = false
# See: https://github.com/rust-lang/regex/issues/579
[[test]]
name = "word-boundary-weird-minimal-ascii"
regex = '\b..\b'
haystack = "az,,b"
matches = [[0, 2], [2, 4]]
unicode = false
utf8 = false
# See: https://github.com/BurntSushi/ripgrep/issues/1203
[[test]]
name = "reverse-suffix-100"
regex = '[0-4][0-4][0-4]000'
haystack = "153.230000"
matches = [[4, 10]]
# See: https://github.com/BurntSushi/ripgrep/issues/1203
[[test]]
name = "reverse-suffix-200"
regex = '[0-9][0-9][0-9]000'
haystack = "153.230000\n"
matches = [[4, 10]]
# This is a tricky case for the reverse suffix optimization, because it
# finds the 'foobar' match but the reverse scan must fail to find a match by
# correctly dealing with the word boundary following the 'foobar' literal when
# computing the start state.
#
# This test exists because I tried to break the following assumption that
# is currently in the code: that if a suffix is found and the reverse scan
# succeeds, then it's guaranteed that there is an overall match. Namely, the
# 'is_match' routine does *not* do another forward scan in this case because of
# this assumption.
[[test]]
name = "reverse-suffix-300"
regex = '\w+foobar\b'
haystack = "xyzfoobarZ"
matches = []
unicode = false
utf8 = false
# See: https://github.com/BurntSushi/ripgrep/issues/1247
[[test]]
name = "stops"
regex = '\bs(?:[ab])'
haystack = 's\xE4'
matches = []
unescape = true
utf8 = false
# See: https://github.com/BurntSushi/ripgrep/issues/1247
[[test]]
name = "stops-ascii"
regex = '(?-u:\b)s(?:[ab])'
haystack = 's\xE4'
matches = []
unescape = true
utf8 = false
# See: https://github.com/rust-lang/regex/issues/850
[[test]]
name = "adjacent-line-boundary-100"
regex = '(?m)^(?:[^ ]+?)$'
haystack = "line1\nline2"
matches = [[0, 5], [6, 11]]
# Continued.
[[test]]
name = "adjacent-line-boundary-200"
regex = '(?m)^(?:[^ ]+?)$'
haystack = "A\nB"
matches = [[0, 1], [2, 3]]
# There is no issue for this bug.
[[test]]
name = "anchored-prefix-100"
regex = '^a[[:^space:]]'
haystack = "a "
matches = []
# There is no issue for this bug.
[[test]]
name = "anchored-prefix-200"
regex = '^a[[:^space:]]'
haystack = "foo boo a"
matches = []
# There is no issue for this bug.
[[test]]
name = "anchored-prefix-300"
regex = '^-[a-z]'
haystack = "r-f"
matches = []
# Tests that a possible Aho-Corasick optimization works correctly. It only
# kicks in when we have a lot of literals. By "works correctly," we mean that
# leftmost-first match semantics are properly respected. That is, samwise
# should match, not sam.
#
# There is no issue for this bug.
[[test]]
name = "aho-corasick-100"
regex = 'samwise|sam|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z'
haystack = "samwise"
matches = [[0, 7]]
# See: https://github.com/rust-lang/regex/issues/921
[[test]]
name = "interior-anchor-capture"
regex = '(a$)b$'
haystack = 'ab'
matches = []
# I found this bug in the course of adding some of the regexes that Ruff uses
# to rebar. It turns out that the lazy DFA was finding a match that was being
# rejected by the one-pass DFA. Yikes. I then minimized the regex and haystack.
#
# Source: https://github.com/charliermarsh/ruff/blob/a919041ddaa64cdf6f216f90dd0480dab69fd3ba/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs#L52
[[test]]
name = "ruff-whitespace-around-keywords"
regex = '^(a|ab)$'
haystack = "ab"
anchored = true
unicode = false
utf8 = true
matches = [[[0, 2], [0, 2]]]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-0"
regex = '(?:(?-u:\b)|(?u:h))+'
haystack = "h"
unicode = true
utf8 = false
matches = [[0, 0], [1, 1]]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-1"
regex = '(?u:\B)'
haystack = "鋸"
unicode = true
utf8 = false
matches = []
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-2"
regex = '(?:(?u:\b)|(?s-u:.))+'
haystack = "oB"
unicode = true
utf8 = false
matches = [[0, 0], [1, 2]]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-3"
regex = '(?:(?-u:\B)|(?su:.))+'
haystack = "\U000FEF80"
unicode = true
utf8 = false
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-3-utf8"
regex = '(?:(?-u:\B)|(?su:.))+'
haystack = "\U000FEF80"
unicode = true
utf8 = true
matches = [[0, 0], [4, 4]]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-4"
regex = '(?m:$)(?m:^)(?su:.)'
haystack = "\n‣"
unicode = true
utf8 = false
matches = [[0, 1]]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-5"
regex = '(?m:$)^(?m:^)'
haystack = "\n"
unicode = true
utf8 = false
matches = [[0, 0]]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-6"
regex = '(?P<kp>(?iu:do)(?m:$))*'
haystack = "dodo"
unicode = true
utf8 = false
matches = [
[[0, 0], []],
[[1, 1], []],
[[2, 4], [2, 4]],
]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-7"
regex = '(?u:\B)'
haystack = "䡁"
unicode = true
utf8 = false
matches = []
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-8"
regex = '(?:(?-u:\b)|(?u:[\u{0}-W]))+'
haystack = "0"
unicode = true
utf8 = false
matches = [[0, 0], [1, 1]]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-9"
regex = '((?m:$)(?-u:\B)(?s-u:.)(?-u:\B)$)'
haystack = "\n\n"
unicode = true
utf8 = false
matches = [
[[1, 2], [1, 2]],
]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-10"
regex = '(?m:$)(?m:$)^(?su:.)'
haystack = "\n\u0081¨\u200a"
unicode = true
utf8 = false
matches = [[0, 1]]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-11"
regex = '(?-u:\B)(?m:^)'
haystack = "0\n"
unicode = true
utf8 = false
matches = [[2, 2]]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-12"
regex = '(?:(?u:\b)|(?-u:.))+'
haystack = "0"
unicode = true
utf8 = false
matches = [[0, 0], [1, 1]]
# From: https://github.com/rust-lang/regex/issues/969
[[test]]
name = "i969"
regex = 'c.*d\z'
haystack = "ababcd"
bounds = [4, 6]
search-kind = "earliest"
matches = [[4, 6]]
# I found this during the regex-automata migration. This is the fowler basic
# 154 test, but without anchored = true and without a match limit.
#
# This test caught a subtle bug in the hybrid reverse DFA search, where it
# would skip over the termination condition if it entered a start state. This
# was a double bug. Firstly, the reverse DFA shouldn't have had start states
# specialized in the first place, and thus it shouldn't have possible to detect
# that the DFA had entered a start state. The second bug was that the start
# state handling was incorrect by jumping over the termination condition.
[[test]]
name = "fowler-basic154-unanchored"
regex = '''a([bc]*)c*'''
haystack = '''abc'''
matches = [[[0, 3], [1, 3]]]
# From: https://github.com/rust-lang/regex/issues/981
#
# This was never really a problem in the new architecture because the
# regex-automata engines are far more principled about how they deal with
# look-around. (This was one of the many reasons I wanted to re-work the
# original regex crate engines.)
[[test]]
name = "word-boundary-interact-poorly-with-literal-optimizations"
regex = '(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))'
haystack = 'ubi-Darwin-x86_64.tar.gz'
matches = []
# This was found during fuzz testing of regex. It provoked a panic in the meta
# engine as a result of the reverse suffix optimization. Namely, it hit a case
# where a suffix match was found, a corresponding reverse match was found, but
# the forward search turned up no match. The forward search should always match
# if the suffix and reverse search match.
#
# This in turn uncovered an inconsistency between the PikeVM and the DFA (lazy
# and fully compiled) engines. It was caused by a mishandling of the collection
# of NFA state IDs in the generic determinization code (which is why both types
# of DFA were impacted). Namely, when a fail state was encountered (that's the
# `[^\s\S]` in the pattern below), then it would just stop collecting states.
# But that's not correct since a later state could lead to a match.
[[test]]
name = "impossible-branch"
regex = '.*[^\s\S]A|B'
haystack = "B"
matches = [[0, 1]]
# This was found during fuzz testing in regex-lite. The regex crate never
# suffered from this bug, but it causes regex-lite to incorrectly compile
# captures.
[[test]]
name = "captures-wrong-order"
regex = '(a){0}(a)'
haystack = 'a'
matches = [[[0, 1], [], [0, 1]]]
# This tests a bug in how quit states are handled in the DFA. At some point
# during development, the DFAs were tweaked slightly such that if they hit
# a quit state (which means, they hit a byte that the caller configured should
# stop the search), then it might not return an error necessarily. Namely, if a
# match had already been found, then it would be returned instead of an error.
#
# But this is actually wrong! Why? Because even though a match had been found,
# it wouldn't be fully correct to return it once a quit state has been seen
# because you can't determine whether the match offset returned is the correct
# greedy/leftmost-first match. Since you can't complete the search as requested
# by the caller, the DFA should just stop and return an error.
#
# Interestingly, this does seem to produce an unavoidable difference between
# 'try_is_match().unwrap()' and 'try_find().unwrap().is_some()' for the DFAs.
# The former will stop immediately once a match is known to occur and return
# 'Ok(true)', where as the latter could find the match but quit with an
# 'Err(..)' first.
#
# Thankfully, I believe this inconsistency between 'is_match()' and 'find()'
# cannot be observed in the higher level meta regex API because it specifically
# will try another engine that won't fail in the case of a DFA failing.
#
# This regression happened in the regex crate rewrite, but before anything got
# released.
[[test]]
name = "negated-unicode-word-boundary-dfa-fail"
regex = '\B.*'
haystack = "!\u02D7"
matches = [[0, 3]]
# This failure was found in the *old* regex crate (prior to regex 1.9), but
# I didn't investigate why. My best guess is that it's a literal optimization
# bug. It didn't occur in the rewrite.
[[test]]
name = "missed-match"
regex = 'e..+e.ee>'
haystack = 'Zeee.eZZZZZZZZeee>eeeeeee>'
matches = [[1, 26]]
# This test came from the 'ignore' crate and tripped a bug in how accelerated
# DFA states were handled in an overlapping search.
[[test]]
name = "regex-to-glob"
regex = ['(?-u)^path1/[^/]*$']
haystack = "path1/foo"
matches = [[0, 9]]
utf8 = false
match-kind = "all"
search-kind = "overlapping"
# See: https://github.com/rust-lang/regex/issues/1060
[[test]]
name = "reverse-inner-plus-shorter-than-expected"
regex = '(?:(\d+)[:.])?(\d{1,2})[:.](\d{2})'
haystack = '102:12:39'
matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]]
# Like reverse-inner-plus-shorter-than-expected, but using a far simpler regex
# to demonstrate the extent of the rot. Sigh.
#
# See: https://github.com/rust-lang/regex/issues/1060
[[test]]
name = "reverse-inner-short"
regex = '(?:([0-9][0-9][0-9]):)?([0-9][0-9]):([0-9][0-9])'
haystack = '102:12:39'
matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]]
# This regression test was found via the RegexSet APIs. It triggered a
# particular code path where a regex was compiled with 'All' match semantics
# (to support overlapping search), but got funneled down into a standard
# leftmost search when calling 'is_match'. This is fine on its own, but the
# leftmost search will use a prefilter and that's where this went awry.
#
# Namely, since 'All' semantics were used, the aho-corasick prefilter was
# incorrectly compiled with 'Standard' semantics. This was wrong because
# 'Standard' immediately attempts to report a match at every position, even if
# that would mean reporting a match past the leftmost match before reporting
# the leftmost match. This breaks the prefilter contract of never having false
# negatives and leads overall to the engine not finding a match.
#
# See: https://github.com/rust-lang/regex/issues/1070
[[test]]
name = "prefilter-with-aho-corasick-standard-semantics"
regex = '(?m)^ *v [0-9]'
haystack = 'v 0'
matches = [
{ id = 0, spans = [[0, 3]] },
]
match-kind = "all"
search-kind = "overlapping"
unicode = true
utf8 = true
# This tests that the PikeVM and the meta regex agree on a particular regex.
# This test previously failed when the ad hoc engines inside the meta engine
# did not handle quit states correctly. Namely, the Unicode word boundary here
# combined with a non-ASCII codepoint provokes the quit state. The ad hoc
# engines were previously returning a match even after entering the quit state
# if a match had been previously detected, but this is incorrect. The reason
# is that if a quit state is found, then the search must give up *immediately*
# because it prevents the search from finding the "proper" leftmost-first
# match. If it instead returns a match that has been found, it risks reporting
# an improper match, as it did in this case.
#
# See: https://github.com/rust-lang/regex/issues/1046
[[test]]
name = "non-prefix-literal-quit-state"
regex = '.+\b\n'
haystack = "β77\n"
matches = [[0, 5]]
# This is a regression test for some errant HIR interval set operations that
# were made in the regex-syntax 0.8.0 release and then reverted in 0.8.1. The
# issue here is that the HIR produced from the regex had out-of-order ranges.
#
# See: https://github.com/rust-lang/regex/issues/1103
# Ref: https://github.com/rust-lang/regex/pull/1051
# Ref: https://github.com/rust-lang/regex/pull/1102
[[test]]
name = "hir-optimization-out-of-order-class"
regex = '^[[:alnum:]./-]+$'
haystack = "a-b"
matches = [[0, 3]]
# This is a regression test for an improper reverse suffix optimization. This
# occurred when I "broadened" the applicability of the optimization to include
# multiple possible literal suffixes instead of only sticking to a non-empty
# longest common suffix. It turns out that, at least given how the reverse
# suffix optimization works, we need to stick to the longest common suffix for
# now.
#
# See: https://github.com/rust-lang/regex/issues/1110
# See also: https://github.com/astral-sh/ruff/pull/7980
[[test]]
name = 'improper-reverse-suffix-optimization'
regex = '(\\N\{[^}]+})|([{}])'
haystack = 'hiya \N{snowman} bye'
matches = [[[5, 16], [5, 16], []]]

641
vendor/regex/testdata/set.toml vendored Normal file
View File

@@ -0,0 +1,641 @@
# Basic multi-regex tests.
[[test]]
name = "basic10"
regex = ["a", "a"]
haystack = "a"
matches = [
{ id = 0, span = [0, 1] },
{ id = 1, span = [0, 1] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic10-leftmost-first"
regex = ["a", "a"]
haystack = "a"
matches = [
{ id = 0, span = [0, 1] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
[[test]]
name = "basic20"
regex = ["a", "a"]
haystack = "ba"
matches = [
{ id = 0, span = [1, 2] },
{ id = 1, span = [1, 2] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic30"
regex = ["a", "b"]
haystack = "a"
matches = [
{ id = 0, span = [0, 1] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic40"
regex = ["a", "b"]
haystack = "b"
matches = [
{ id = 1, span = [0, 1] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic50"
regex = ["a|b", "b|a"]
haystack = "b"
matches = [
{ id = 0, span = [0, 1] },
{ id = 1, span = [0, 1] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic60"
regex = ["foo", "oo"]
haystack = "foo"
matches = [
{ id = 0, span = [0, 3] },
{ id = 1, span = [1, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic60-leftmost-first"
regex = ["foo", "oo"]
haystack = "foo"
matches = [
{ id = 0, span = [0, 3] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
[[test]]
name = "basic61"
regex = ["oo", "foo"]
haystack = "foo"
matches = [
{ id = 1, span = [0, 3] },
{ id = 0, span = [1, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic61-leftmost-first"
regex = ["oo", "foo"]
haystack = "foo"
matches = [
{ id = 1, span = [0, 3] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
[[test]]
name = "basic70"
regex = ["abcd", "bcd", "cd", "d"]
haystack = "abcd"
matches = [
{ id = 0, span = [0, 4] },
{ id = 1, span = [1, 4] },
{ id = 2, span = [2, 4] },
{ id = 3, span = [3, 4] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic71"
regex = ["bcd", "cd", "d", "abcd"]
haystack = "abcd"
matches = [
{ id = 3, span = [0, 4] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
[[test]]
name = "basic80"
regex = ["^foo", "bar$"]
haystack = "foo"
matches = [
{ id = 0, span = [0, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic81"
regex = ["^foo", "bar$"]
haystack = "foo bar"
matches = [
{ id = 0, span = [0, 3] },
{ id = 1, span = [4, 7] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic82"
regex = ["^foo", "bar$"]
haystack = "bar"
matches = [
{ id = 1, span = [0, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic90"
regex = ["[a-z]+$", "foo"]
haystack = "01234 foo"
matches = [
{ id = 0, span = [8, 9] },
{ id = 0, span = [7, 9] },
{ id = 0, span = [6, 9] },
{ id = 1, span = [6, 9] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic91"
regex = ["[a-z]+$", "foo"]
haystack = "foo 01234"
matches = [
{ id = 1, span = [0, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic100"
regex = [".*?", "a"]
haystack = "zzza"
matches = [
{ id = 0, span = [0, 0] },
{ id = 0, span = [1, 1] },
{ id = 0, span = [0, 1] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [1, 2] },
{ id = 0, span = [0, 2] },
{ id = 0, span = [3, 3] },
{ id = 0, span = [2, 3] },
{ id = 0, span = [1, 3] },
{ id = 0, span = [0, 3] },
{ id = 0, span = [4, 4] },
{ id = 0, span = [3, 4] },
{ id = 0, span = [2, 4] },
{ id = 0, span = [1, 4] },
{ id = 0, span = [0, 4] },
{ id = 1, span = [3, 4] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic101"
regex = [".*", "a"]
haystack = "zzza"
matches = [
{ id = 0, span = [0, 0] },
{ id = 0, span = [1, 1] },
{ id = 0, span = [0, 1] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [1, 2] },
{ id = 0, span = [0, 2] },
{ id = 0, span = [3, 3] },
{ id = 0, span = [2, 3] },
{ id = 0, span = [1, 3] },
{ id = 0, span = [0, 3] },
{ id = 0, span = [4, 4] },
{ id = 0, span = [3, 4] },
{ id = 0, span = [2, 4] },
{ id = 0, span = [1, 4] },
{ id = 0, span = [0, 4] },
{ id = 1, span = [3, 4] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic102"
regex = [".*", "a"]
haystack = "zzz"
matches = [
{ id = 0, span = [0, 0] },
{ id = 0, span = [1, 1] },
{ id = 0, span = [0, 1] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [1, 2] },
{ id = 0, span = [0, 2] },
{ id = 0, span = [3, 3] },
{ id = 0, span = [2, 3] },
{ id = 0, span = [1, 3] },
{ id = 0, span = [0, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic110"
regex = ['\ba\b']
haystack = "hello a bye"
matches = [
{ id = 0, span = [6, 7] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic111"
regex = ['\ba\b', '\be\b']
haystack = "hello a bye e"
matches = [
{ id = 0, span = [6, 7] },
{ id = 1, span = [12, 13] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic120"
regex = ["a"]
haystack = "a"
matches = [
{ id = 0, span = [0, 1] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic121"
regex = [".*a"]
haystack = "a"
matches = [
{ id = 0, span = [0, 1] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic122"
regex = [".*a", "β"]
haystack = "β"
matches = [
{ id = 1, span = [0, 2] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic130"
regex = ["ab", "b"]
haystack = "ba"
matches = [
{ id = 1, span = [0, 1] },
]
match-kind = "all"
search-kind = "overlapping"
# These test cases where one of the regexes matches the empty string.
[[test]]
name = "empty10"
regex = ["", "a"]
haystack = "abc"
matches = [
{ id = 0, span = [0, 0] },
{ id = 1, span = [0, 1] },
{ id = 0, span = [1, 1] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [3, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty10-leftmost-first"
regex = ["", "a"]
haystack = "abc"
matches = [
{ id = 0, span = [0, 0] },
{ id = 0, span = [1, 1] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [3, 3] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
[[test]]
name = "empty11"
regex = ["a", ""]
haystack = "abc"
matches = [
{ id = 1, span = [0, 0] },
{ id = 0, span = [0, 1] },
{ id = 1, span = [1, 1] },
{ id = 1, span = [2, 2] },
{ id = 1, span = [3, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty11-leftmost-first"
regex = ["a", ""]
haystack = "abc"
matches = [
{ id = 0, span = [0, 1] },
{ id = 1, span = [2, 2] },
{ id = 1, span = [3, 3] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
[[test]]
name = "empty20"
regex = ["", "b"]
haystack = "abc"
matches = [
{ id = 0, span = [0, 0] },
{ id = 0, span = [1, 1] },
{ id = 1, span = [1, 2] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [3, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty20-leftmost-first"
regex = ["", "b"]
haystack = "abc"
matches = [
{ id = 0, span = [0, 0] },
{ id = 0, span = [1, 1] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [3, 3] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
[[test]]
name = "empty21"
regex = ["b", ""]
haystack = "abc"
matches = [
{ id = 1, span = [0, 0] },
{ id = 1, span = [1, 1] },
{ id = 0, span = [1, 2] },
{ id = 1, span = [2, 2] },
{ id = 1, span = [3, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty21-leftmost-first"
regex = ["b", ""]
haystack = "abc"
matches = [
{ id = 1, span = [0, 0] },
{ id = 0, span = [1, 2] },
{ id = 1, span = [3, 3] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
[[test]]
name = "empty22"
regex = ["(?:)", "b"]
haystack = "abc"
matches = [
{ id = 0, span = [0, 0] },
{ id = 0, span = [1, 1] },
{ id = 1, span = [1, 2] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [3, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty23"
regex = ["b", "(?:)"]
haystack = "abc"
matches = [
{ id = 1, span = [0, 0] },
{ id = 1, span = [1, 1] },
{ id = 0, span = [1, 2] },
{ id = 1, span = [2, 2] },
{ id = 1, span = [3, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty30"
regex = ["", "z"]
haystack = "abc"
matches = [
{ id = 0, span = [0, 0] },
{ id = 0, span = [1, 1] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [3, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty30-leftmost-first"
regex = ["", "z"]
haystack = "abc"
matches = [
{ id = 0, span = [0, 0] },
{ id = 0, span = [1, 1] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [3, 3] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
[[test]]
name = "empty31"
regex = ["z", ""]
haystack = "abc"
matches = [
{ id = 1, span = [0, 0] },
{ id = 1, span = [1, 1] },
{ id = 1, span = [2, 2] },
{ id = 1, span = [3, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty31-leftmost-first"
regex = ["z", ""]
haystack = "abc"
matches = [
{ id = 1, span = [0, 0] },
{ id = 1, span = [1, 1] },
{ id = 1, span = [2, 2] },
{ id = 1, span = [3, 3] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
[[test]]
name = "empty40"
regex = ["c(?:)", "b"]
haystack = "abc"
matches = [
{ id = 1, span = [1, 2] },
{ id = 0, span = [2, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty40-leftmost-first"
regex = ["c(?:)", "b"]
haystack = "abc"
matches = [
{ id = 1, span = [1, 2] },
{ id = 0, span = [2, 3] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
# These test cases where there are no matches.
[[test]]
name = "nomatch10"
regex = ["a", "a"]
haystack = "b"
matches = []
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "nomatch20"
regex = ["^foo", "bar$"]
haystack = "bar foo"
matches = []
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "nomatch30"
regex = []
haystack = "a"
matches = []
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "nomatch40"
regex = ["^rooted$", '\.log$']
haystack = "notrooted"
matches = []
match-kind = "all"
search-kind = "overlapping"
# These test multi-regex searches with capture groups.
#
# NOTE: I wrote these tests in the course of developing a first class API for
# overlapping capturing group matches, but ultimately removed that API because
# the semantics for overlapping matches aren't totally clear. However, I've
# left the tests because I believe the semantics for these patterns are clear
# and because we can still test our "which patterns matched" APIs with them.
[[test]]
name = "caps-010"
regex = ['^(\w+) (\w+)$', '^(\S+) (\S+)$']
haystack = "Bruce Springsteen"
matches = [
{ id = 0, spans = [[0, 17], [0, 5], [6, 17]] },
{ id = 1, spans = [[0, 17], [0, 5], [6, 17]] },
]
match-kind = "all"
search-kind = "overlapping"
unicode = false
utf8 = false
[[test]]
name = "caps-020"
regex = ['^(\w+) (\w+)$', '^[A-Z](\S+) [A-Z](\S+)$']
haystack = "Bruce Springsteen"
matches = [
{ id = 0, spans = [[0, 17], [0, 5], [6, 17]] },
{ id = 1, spans = [[0, 17], [1, 5], [7, 17]] },
]
match-kind = "all"
search-kind = "overlapping"
unicode = false
utf8 = false
[[test]]
name = "caps-030"
regex = ['^(\w+) (\w+)$', '^([A-Z])(\S+) ([A-Z])(\S+)$']
haystack = "Bruce Springsteen"
matches = [
{ id = 0, spans = [[0, 17], [0, 5], [6, 17]] },
{ id = 1, spans = [[0, 17], [0, 1], [1, 5], [6, 7], [7, 17]] },
]
match-kind = "all"
search-kind = "overlapping"
unicode = false
utf8 = false
[[test]]
name = "caps-110"
regex = ['(\w+) (\w+)', '(\S+) (\S+)']
haystack = "Bruce Springsteen"
matches = [
{ id = 0, spans = [[0, 17], [0, 5], [6, 17]] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
unicode = false
utf8 = false
[[test]]
name = "caps-120"
regex = ['(\w+) (\w+)', '(\S+) (\S+)']
haystack = "&ruce $pringsteen"
matches = [
{ id = 1, spans = [[0, 17], [0, 5], [6, 17]] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
unicode = false
utf8 = false
[[test]]
name = "caps-121"
regex = ['(\w+) (\w+)', '(\S+) (\S+)']
haystack = "&ruce $pringsteen Foo Bar"
matches = [
{ id = 1, spans = [[0, 17], [0, 5], [6, 17]] },
{ id = 0, spans = [[18, 25], [18, 21], [22, 25]] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
unicode = false
utf8 = false

36
vendor/regex/testdata/substring.toml vendored Normal file
View File

@@ -0,0 +1,36 @@
# These tests check that regex engines perform as expected when the search is
# instructed to only search a substring of a haystack instead of the entire
# haystack. This tends to exercise interesting edge cases that are otherwise
# difficult to provoke. (But not necessarily impossible. Regex search iterators
# for example, make use of the "search just a substring" APIs by changing the
# starting position of a search to the end position of the previous match.)
[[test]]
name = "unicode-word-start"
regex = '\b[0-9]+\b'
haystack = "β123"
bounds = { start = 2, end = 5 }
matches = []
[[test]]
name = "unicode-word-end"
regex = '\b[0-9]+\b'
haystack = "123β"
bounds = { start = 0, end = 3 }
matches = []
[[test]]
name = "ascii-word-start"
regex = '\b[0-9]+\b'
haystack = "β123"
bounds = { start = 2, end = 5 }
matches = [[2, 5]]
unicode = false
[[test]]
name = "ascii-word-end"
regex = '\b[0-9]+\b'
haystack = "123β"
bounds = { start = 0, end = 3 }
matches = [[0, 3]]
unicode = false

517
vendor/regex/testdata/unicode.toml vendored Normal file
View File

@@ -0,0 +1,517 @@
# Basic Unicode literal support.
[[test]]
name = "literal1"
regex = '☃'
haystack = "☃"
matches = [[0, 3]]
[[test]]
name = "literal2"
regex = '☃+'
haystack = "☃"
matches = [[0, 3]]
[[test]]
name = "literal3"
regex = '☃+'
haystack = "☃"
matches = [[0, 3]]
case-insensitive = true
[[test]]
name = "literal4"
regex = 'Δ'
haystack = "δ"
matches = [[0, 2]]
case-insensitive = true
# Unicode word boundaries.
[[test]]
name = "wb-100"
regex = '\d\b'
haystack = "6δ"
matches = []
[[test]]
name = "wb-200"
regex = '\d\b'
haystack = "6"
matches = [[0, 1]]
[[test]]
name = "wb-300"
regex = '\d\B'
haystack = "6δ"
matches = [[0, 1]]
[[test]]
name = "wb-400"
regex = '\d\B'
haystack = "6"
matches = []
# Unicode character class support.
[[test]]
name = "class1"
regex = '[☃Ⅰ]+'
haystack = "☃"
matches = [[0, 3]]
[[test]]
name = "class2"
regex = '\pN'
haystack = ""
matches = [[0, 3]]
[[test]]
name = "class3"
regex = '\pN+'
haystack = "1Ⅱ2"
matches = [[0, 8]]
[[test]]
name = "class4"
regex = '\PN+'
haystack = "ab"
matches = [[0, 2]]
[[test]]
name = "class5"
regex = '[\PN]+'
haystack = "ab"
matches = [[0, 2]]
[[test]]
name = "class6"
regex = '[^\PN]+'
haystack = "ab"
matches = [[2, 5]]
[[test]]
name = "class7"
regex = '\p{Lu}+'
haystack = "ΛΘΓΔα"
matches = [[0, 8]]
[[test]]
name = "class8"
regex = '\p{Lu}+'
haystack = "ΛΘΓΔα"
matches = [[0, 10]]
case-insensitive = true
[[test]]
name = "class9"
regex = '\pL+'
haystack = "ΛΘΓΔα"
matches = [[0, 10]]
[[test]]
name = "class10"
regex = '\p{Ll}+'
haystack = "ΛΘΓΔα"
matches = [[8, 10]]
# Unicode aware "Perl" character classes.
[[test]]
name = "perl1"
regex = '\w+'
haystack = "dδd"
matches = [[0, 4]]
[[test]]
name = "perl2"
regex = '\w+'
haystack = "⥡"
matches = []
[[test]]
name = "perl3"
regex = '\W+'
haystack = "⥡"
matches = [[0, 3]]
[[test]]
name = "perl4"
regex = '\d+'
haystack = "1२३9"
matches = [[0, 8]]
[[test]]
name = "perl5"
regex = '\d+'
haystack = "Ⅱ"
matches = []
[[test]]
name = "perl6"
regex = '\D+'
haystack = "Ⅱ"
matches = [[0, 3]]
[[test]]
name = "perl7"
regex = '\s+'
haystack = ""
matches = [[0, 3]]
[[test]]
name = "perl8"
regex = '\s+'
haystack = "☃"
matches = []
[[test]]
name = "perl9"
regex = '\S+'
haystack = "☃"
matches = [[0, 3]]
# Specific tests for Unicode general category classes.
[[test]]
name = "class-gencat1"
regex = '\p{Cased_Letter}'
haystack = ""
matches = [[0, 3]]
[[test]]
name = "class-gencat2"
regex = '\p{Close_Punctuation}'
haystack = ""
matches = [[0, 3]]
[[test]]
name = "class-gencat3"
regex = '\p{Connector_Punctuation}'
haystack = "⁀"
matches = [[0, 3]]
[[test]]
name = "class-gencat4"
regex = '\p{Control}'
haystack = "\u009F"
matches = [[0, 2]]
[[test]]
name = "class-gencat5"
regex = '\p{Currency_Symbol}'
haystack = "£"
matches = [[0, 3]]
[[test]]
name = "class-gencat6"
regex = '\p{Dash_Punctuation}'
haystack = "〰"
matches = [[0, 3]]
[[test]]
name = "class-gencat7"
regex = '\p{Decimal_Number}'
haystack = "𑓙"
matches = [[0, 4]]
[[test]]
name = "class-gencat8"
regex = '\p{Enclosing_Mark}'
haystack = "\uA672"
matches = [[0, 3]]
[[test]]
name = "class-gencat9"
regex = '\p{Final_Punctuation}'
haystack = "⸡"
matches = [[0, 3]]
[[test]]
name = "class-gencat10"
regex = '\p{Format}'
haystack = "\U000E007F"
matches = [[0, 4]]
[[test]]
name = "class-gencat11"
regex = '\p{Initial_Punctuation}'
haystack = "⸜"
matches = [[0, 3]]
[[test]]
name = "class-gencat12"
regex = '\p{Letter}'
haystack = "Έ"
matches = [[0, 2]]
[[test]]
name = "class-gencat13"
regex = '\p{Letter_Number}'
haystack = "ↂ"
matches = [[0, 3]]
[[test]]
name = "class-gencat14"
regex = '\p{Line_Separator}'
haystack = "\u2028"
matches = [[0, 3]]
[[test]]
name = "class-gencat15"
regex = '\p{Lowercase_Letter}'
haystack = "ϛ"
matches = [[0, 2]]
[[test]]
name = "class-gencat16"
regex = '\p{Mark}'
haystack = "\U000E01EF"
matches = [[0, 4]]
[[test]]
name = "class-gencat17"
regex = '\p{Math}'
haystack = ""
matches = [[0, 3]]
[[test]]
name = "class-gencat18"
regex = '\p{Modifier_Letter}'
haystack = "𖭃"
matches = [[0, 4]]
[[test]]
name = "class-gencat19"
regex = '\p{Modifier_Symbol}'
haystack = "🏿"
matches = [[0, 4]]
[[test]]
name = "class-gencat20"
regex = '\p{Nonspacing_Mark}'
haystack = "\U0001E94A"
matches = [[0, 4]]
[[test]]
name = "class-gencat21"
regex = '\p{Number}'
haystack = "⓿"
matches = [[0, 3]]
[[test]]
name = "class-gencat22"
regex = '\p{Open_Punctuation}'
haystack = "⦅"
matches = [[0, 3]]
[[test]]
name = "class-gencat23"
regex = '\p{Other}'
haystack = "\u0BC9"
matches = [[0, 3]]
[[test]]
name = "class-gencat24"
regex = '\p{Other_Letter}'
haystack = "ꓷ"
matches = [[0, 3]]
[[test]]
name = "class-gencat25"
regex = '\p{Other_Number}'
haystack = "㉏"
matches = [[0, 3]]
[[test]]
name = "class-gencat26"
regex = '\p{Other_Punctuation}'
haystack = "𞥞"
matches = [[0, 4]]
[[test]]
name = "class-gencat27"
regex = '\p{Other_Symbol}'
haystack = "⅌"
matches = [[0, 3]]
[[test]]
name = "class-gencat28"
regex = '\p{Paragraph_Separator}'
haystack = "\u2029"
matches = [[0, 3]]
[[test]]
name = "class-gencat29"
regex = '\p{Private_Use}'
haystack = "\U0010FFFD"
matches = [[0, 4]]
[[test]]
name = "class-gencat30"
regex = '\p{Punctuation}'
haystack = "𑁍"
matches = [[0, 4]]
[[test]]
name = "class-gencat31"
regex = '\p{Separator}'
haystack = "\u3000"
matches = [[0, 3]]
[[test]]
name = "class-gencat32"
regex = '\p{Space_Separator}'
haystack = "\u205F"
matches = [[0, 3]]
[[test]]
name = "class-gencat33"
regex = '\p{Spacing_Mark}'
haystack = "\U00016F7E"
matches = [[0, 4]]
[[test]]
name = "class-gencat34"
regex = '\p{Symbol}'
haystack = "⯈"
matches = [[0, 3]]
[[test]]
name = "class-gencat35"
regex = '\p{Titlecase_Letter}'
haystack = "ῼ"
matches = [[0, 3]]
[[test]]
name = "class-gencat36"
regex = '\p{Unassigned}'
haystack = "\U0010FFFF"
matches = [[0, 4]]
[[test]]
name = "class-gencat37"
regex = '\p{Uppercase_Letter}'
haystack = "Ꝋ"
matches = [[0, 3]]
# Tests for Unicode emoji properties.
[[test]]
name = "class-emoji1"
regex = '\p{Emoji}'
haystack = "\u23E9"
matches = [[0, 3]]
[[test]]
name = "class-emoji2"
regex = '\p{emoji}'
haystack = "\U0001F21A"
matches = [[0, 4]]
[[test]]
name = "class-emoji3"
regex = '\p{extendedpictographic}'
haystack = "\U0001FA6E"
matches = [[0, 4]]
[[test]]
name = "class-emoji4"
regex = '\p{extendedpictographic}'
haystack = "\U0001FFFD"
matches = [[0, 4]]
# Tests for Unicode grapheme cluster properties.
[[test]]
name = "class-gcb1"
regex = '\p{grapheme_cluster_break=prepend}'
haystack = "\U00011D46"
matches = [[0, 4]]
[[test]]
name = "class-gcb2"
regex = '\p{gcb=regional_indicator}'
haystack = "\U0001F1E6"
matches = [[0, 4]]
[[test]]
name = "class-gcb3"
regex = '\p{gcb=ri}'
haystack = "\U0001F1E7"
matches = [[0, 4]]
[[test]]
name = "class-gcb4"
regex = '\p{regionalindicator}'
haystack = "\U0001F1FF"
matches = [[0, 4]]
[[test]]
name = "class-gcb5"
regex = '\p{gcb=lvt}'
haystack = "\uC989"
matches = [[0, 3]]
[[test]]
name = "class-gcb6"
regex = '\p{gcb=zwj}'
haystack = "\u200D"
matches = [[0, 3]]
# Tests for Unicode word boundary properties.
[[test]]
name = "class-word-break1"
regex = '\p{word_break=Hebrew_Letter}'
haystack = "\uFB46"
matches = [[0, 3]]
[[test]]
name = "class-word-break2"
regex = '\p{wb=hebrewletter}'
haystack = "\uFB46"
matches = [[0, 3]]
[[test]]
name = "class-word-break3"
regex = '\p{wb=ExtendNumLet}'
haystack = "\uFF3F"
matches = [[0, 3]]
[[test]]
name = "class-word-break4"
regex = '\p{wb=WSegSpace}'
haystack = "\u3000"
matches = [[0, 3]]
[[test]]
name = "class-word-break5"
regex = '\p{wb=numeric}'
haystack = "\U0001E950"
matches = [[0, 4]]
# Tests for Unicode sentence boundary properties.
[[test]]
name = "class-sentence-break1"
regex = '\p{sentence_break=Lower}'
haystack = "\u0469"
matches = [[0, 2]]
[[test]]
name = "class-sentence-break2"
regex = '\p{sb=lower}'
haystack = "\u0469"
matches = [[0, 2]]
[[test]]
name = "class-sentence-break3"
regex = '\p{sb=Close}'
haystack = "\uFF60"
matches = [[0, 3]]
[[test]]
name = "class-sentence-break4"
regex = '\p{sb=Close}'
haystack = "\U0001F677"
matches = [[0, 4]]
[[test]]
name = "class-sentence-break5"
regex = '\p{sb=SContinue}'
haystack = "\uFF64"
matches = [[0, 3]]

399
vendor/regex/testdata/utf8.toml vendored Normal file
View File

@@ -0,0 +1,399 @@
# These test the UTF-8 modes expose by regex-automata. Namely, when utf8 is
# true, then we promise that the haystack is valid UTF-8. (Otherwise behavior
# is unspecified.) This also corresponds to building the regex engine with the
# following two guarantees:
#
# 1) For any non-empty match reported, its span is guaranteed to correspond to
# valid UTF-8.
# 2) All empty or zero-width matches reported must never split a UTF-8
# encoded codepoint. If the haystack has invalid UTF-8, then this results in
# unspecified behavior.
#
# The (2) is in particular what we focus our testing on since (1) is generally
# guaranteed by regex-syntax's AST-to-HIR translator and is well tested there.
# The thing with (2) is that it can't be described in the HIR, so the regex
# engines have to handle that case. Thus, we test it here.
#
# Note that it is possible to build a regex that has property (1) but not
# (2), and vice versa. This is done by building the HIR with 'utf8=true' but
# building the Thompson NFA with 'utf8=false'. We don't test that here because
# the harness doesn't expose a way to enable or disable UTF-8 mode with that
# granularity. Instead, those combinations are lightly tested via doc examples.
# That's not to say that (1) without (2) is uncommon. Indeed, ripgrep uses it
# because it cannot guarantee that its haystack is valid UTF-8.
# This tests that an empty regex doesn't split a codepoint.
[[test]]
name = "empty-utf8yes"
regex = ''
haystack = '☃'
matches = [[0, 0], [3, 3]]
unicode = true
utf8 = true
# Tests the overlapping case of the above.
[[test]]
name = "empty-utf8yes-overlapping"
regex = ''
haystack = '☃'
matches = [[0, 0], [3, 3]]
unicode = true
utf8 = true
match-kind = "all"
search-kind = "overlapping"
# This tests that an empty regex DOES split a codepoint when utf=false.
[[test]]
name = "empty-utf8no"
regex = ''
haystack = '☃'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
unicode = true
utf8 = false
# Tests the overlapping case of the above.
[[test]]
name = "empty-utf8no-overlapping"
regex = ''
haystack = '☃'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
unicode = true
utf8 = false
match-kind = "all"
search-kind = "overlapping"
# This tests that an empty regex doesn't split a codepoint, even if we give
# it bounds entirely within the codepoint.
#
# This is one of the trickier cases and is what motivated the current UTF-8
# mode design. In particular, at one point, this test failed the 'is_match'
# variant of the test but not 'find'. This is because the 'is_match' code path
# is specifically optimized for "was a match found" rather than "where is the
# match." In the former case, you don't really care about the empty-vs-non-empty
# matches, and thus, the codepoint splitting filtering logic wasn't getting
# applied. (In multiple ways across multiple regex engines.) In this way, you
# can wind up with a situation where 'is_match' says "yes," but 'find' says,
# "I didn't find anything." Which is... not great.
#
# I could have decided to say that providing boundaries that themselves split
# a codepoint would have unspecified behavior. But I couldn't quite convince
# myself that such boundaries were the only way to get an inconsistency between
# 'is_match' and 'find'.
#
# Note that I also tried to come up with a test like this that fails without
# using `bounds`. Specifically, a test where 'is_match' and 'find' disagree.
# But I couldn't do it, and I'm tempted to conclude it is impossible. The
# fundamental problem is that you need to simultaneously produce an empty match
# that splits a codepoint while *not* matching before or after the codepoint.
[[test]]
name = "empty-utf8yes-bounds"
regex = ''
haystack = '𝛃'
bounds = [1, 3]
matches = []
unicode = true
utf8 = true
# Tests the overlapping case of the above.
[[test]]
name = "empty-utf8yes-bounds-overlapping"
regex = ''
haystack = '𝛃'
bounds = [1, 3]
matches = []
unicode = true
utf8 = true
match-kind = "all"
search-kind = "overlapping"
# This tests that an empty regex splits a codepoint when the bounds are
# entirely within the codepoint.
[[test]]
name = "empty-utf8no-bounds"
regex = ''
haystack = '𝛃'
bounds = [1, 3]
matches = [[1, 1], [2, 2], [3, 3]]
unicode = true
utf8 = false
# Tests the overlapping case of the above.
[[test]]
name = "empty-utf8no-bounds-overlapping"
regex = ''
haystack = '𝛃'
bounds = [1, 3]
matches = [[1, 1], [2, 2], [3, 3]]
unicode = true
utf8 = false
match-kind = "all"
search-kind = "overlapping"
# In this test, we anchor the search. Since the start position is also a UTF-8
# boundary, we get a match.
[[test]]
name = "empty-utf8yes-anchored"
regex = ''
haystack = '𝛃'
matches = [[0, 0]]
anchored = true
unicode = true
utf8 = true
# Tests the overlapping case of the above.
[[test]]
name = "empty-utf8yes-anchored-overlapping"
regex = ''
haystack = '𝛃'
matches = [[0, 0]]
anchored = true
unicode = true
utf8 = true
match-kind = "all"
search-kind = "overlapping"
# Same as above, except with UTF-8 mode disabled. It almost doesn't change the
# result, except for the fact that since this is an anchored search and we
# always find all matches, the test harness will keep reporting matches until
# none are found. Because it's anchored, matches will be reported so long as
# they are directly adjacent. Since with UTF-8 mode the next anchored search
# after the match at [0, 0] fails, iteration stops (and doesn't find the last
# match at [4, 4]).
[[test]]
name = "empty-utf8no-anchored"
regex = ''
haystack = '𝛃'
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
anchored = true
unicode = true
utf8 = false
# Tests the overlapping case of the above.
#
# Note that overlapping anchored searches are a little weird, and it's not
# totally clear what their semantics ought to be. For now, we just test the
# current behavior of our test shim that implements overlapping search. (This
# is one of the reasons why we don't really expose regex-level overlapping
# searches.)
[[test]]
name = "empty-utf8no-anchored-overlapping"
regex = ''
haystack = '𝛃'
matches = [[0, 0]]
anchored = true
unicode = true
utf8 = false
match-kind = "all"
search-kind = "overlapping"
# In this test, we anchor the search, but also set bounds. The bounds start the
# search in the middle of a codepoint, so there should never be a match.
[[test]]
name = "empty-utf8yes-anchored-bounds"
regex = ''
haystack = '𝛃'
matches = []
bounds = [1, 3]
anchored = true
unicode = true
utf8 = true
# Tests the overlapping case of the above.
[[test]]
name = "empty-utf8yes-anchored-bounds-overlapping"
regex = ''
haystack = '𝛃'
matches = []
bounds = [1, 3]
anchored = true
unicode = true
utf8 = true
match-kind = "all"
search-kind = "overlapping"
# Same as above, except with UTF-8 mode disabled. Without UTF-8 mode enabled,
# matching within a codepoint is allowed. And remember, as in the anchored test
# above with UTF-8 mode disabled, iteration will report all adjacent matches.
# The matches at [0, 0] and [4, 4] are not included because of the bounds of
# the search.
[[test]]
name = "empty-utf8no-anchored-bounds"
regex = ''
haystack = '𝛃'
bounds = [1, 3]
matches = [[1, 1], [2, 2], [3, 3]]
anchored = true
unicode = true
utf8 = false
# Tests the overlapping case of the above.
#
# Note that overlapping anchored searches are a little weird, and it's not
# totally clear what their semantics ought to be. For now, we just test the
# current behavior of our test shim that implements overlapping search. (This
# is one of the reasons why we don't really expose regex-level overlapping
# searches.)
[[test]]
name = "empty-utf8no-anchored-bounds-overlapping"
regex = ''
haystack = '𝛃'
bounds = [1, 3]
matches = [[1, 1]]
anchored = true
unicode = true
utf8 = false
match-kind = "all"
search-kind = "overlapping"
# This tests that we find the match at the end of the string when the bounds
# exclude the first match.
[[test]]
name = "empty-utf8yes-startbound"
regex = ''
haystack = '𝛃'
bounds = [1, 4]
matches = [[4, 4]]
unicode = true
utf8 = true
# Tests the overlapping case of the above.
[[test]]
name = "empty-utf8yes-startbound-overlapping"
regex = ''
haystack = '𝛃'
bounds = [1, 4]
matches = [[4, 4]]
unicode = true
utf8 = true
match-kind = "all"
search-kind = "overlapping"
# Same as above, except since UTF-8 mode is disabled, we also find the matches
# inbetween that split the codepoint.
[[test]]
name = "empty-utf8no-startbound"
regex = ''
haystack = '𝛃'
bounds = [1, 4]
matches = [[1, 1], [2, 2], [3, 3], [4, 4]]
unicode = true
utf8 = false
# Tests the overlapping case of the above.
[[test]]
name = "empty-utf8no-startbound-overlapping"
regex = ''
haystack = '𝛃'
bounds = [1, 4]
matches = [[1, 1], [2, 2], [3, 3], [4, 4]]
unicode = true
utf8 = false
match-kind = "all"
search-kind = "overlapping"
# This tests that we don't find any matches in an anchored search, even when
# the bounds include a match (at the end).
[[test]]
name = "empty-utf8yes-anchored-startbound"
regex = ''
haystack = '𝛃'
bounds = [1, 4]
matches = []
anchored = true
unicode = true
utf8 = true
# Tests the overlapping case of the above.
[[test]]
name = "empty-utf8yes-anchored-startbound-overlapping"
regex = ''
haystack = '𝛃'
bounds = [1, 4]
matches = []
anchored = true
unicode = true
utf8 = true
match-kind = "all"
search-kind = "overlapping"
# Same as above, except since UTF-8 mode is disabled, we also find the matches
# inbetween that split the codepoint. Even though this is an anchored search,
# since the matches are adjacent, we find all of them.
[[test]]
name = "empty-utf8no-anchored-startbound"
regex = ''
haystack = '𝛃'
bounds = [1, 4]
matches = [[1, 1], [2, 2], [3, 3], [4, 4]]
anchored = true
unicode = true
utf8 = false
# Tests the overlapping case of the above.
#
# Note that overlapping anchored searches are a little weird, and it's not
# totally clear what their semantics ought to be. For now, we just test the
# current behavior of our test shim that implements overlapping search. (This
# is one of the reasons why we don't really expose regex-level overlapping
# searches.)
[[test]]
name = "empty-utf8no-anchored-startbound-overlapping"
regex = ''
haystack = '𝛃'
bounds = [1, 4]
matches = [[1, 1]]
anchored = true
unicode = true
utf8 = false
match-kind = "all"
search-kind = "overlapping"
# This tests that we find the match at the end of the haystack in UTF-8 mode
# when our bounds only include the empty string at the end of the haystack.
[[test]]
name = "empty-utf8yes-anchored-endbound"
regex = ''
haystack = '𝛃'
bounds = [4, 4]
matches = [[4, 4]]
anchored = true
unicode = true
utf8 = true
# Tests the overlapping case of the above.
[[test]]
name = "empty-utf8yes-anchored-endbound-overlapping"
regex = ''
haystack = '𝛃'
bounds = [4, 4]
matches = [[4, 4]]
anchored = true
unicode = true
utf8 = true
match-kind = "all"
search-kind = "overlapping"
# Same as above, but with UTF-8 mode disabled. Results remain the same since
# the only possible match does not split a codepoint.
[[test]]
name = "empty-utf8no-anchored-endbound"
regex = ''
haystack = '𝛃'
bounds = [4, 4]
matches = [[4, 4]]
anchored = true
unicode = true
utf8 = false
# Tests the overlapping case of the above.
[[test]]
name = "empty-utf8no-anchored-endbound-overlapping"
regex = ''
haystack = '𝛃'
bounds = [4, 4]
matches = [[4, 4]]
anchored = true
unicode = true
utf8 = false
match-kind = "all"
search-kind = "overlapping"

View File

@@ -0,0 +1,687 @@
# These tests are for the "special" word boundary assertions. That is,
# \b{start}, \b{end}, \b{start-half}, \b{end-half}. These are specialty
# assertions for more niche use cases, but hitting those cases without these
# assertions is difficult. For example, \b{start-half} and \b{end-half} are
# used to implement the -w/--word-regexp flag in a grep program.
# Tests for (?-u:\b{start})
[[test]]
name = "word-start-ascii-010"
regex = '\b{start}'
haystack = "a"
matches = [[0, 0]]
unicode = false
[[test]]
name = "word-start-ascii-020"
regex = '\b{start}'
haystack = "a "
matches = [[0, 0]]
unicode = false
[[test]]
name = "word-start-ascii-030"
regex = '\b{start}'
haystack = " a "
matches = [[1, 1]]
unicode = false
[[test]]
name = "word-start-ascii-040"
regex = '\b{start}'
haystack = ""
matches = []
unicode = false
[[test]]
name = "word-start-ascii-050"
regex = '\b{start}'
haystack = "ab"
matches = [[0, 0]]
unicode = false
[[test]]
name = "word-start-ascii-060"
regex = '\b{start}'
haystack = "𝛃"
matches = []
unicode = false
[[test]]
name = "word-start-ascii-060-bounds"
regex = '\b{start}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = false
[[test]]
name = "word-start-ascii-070"
regex = '\b{start}'
haystack = " 𝛃 "
matches = []
unicode = false
[[test]]
name = "word-start-ascii-080"
regex = '\b{start}'
haystack = "𝛃𐆀"
matches = []
unicode = false
[[test]]
name = "word-start-ascii-090"
regex = '\b{start}'
haystack = "𝛃b"
matches = [[4, 4]]
unicode = false
[[test]]
name = "word-start-ascii-110"
regex = '\b{start}'
haystack = "b𝛃"
matches = [[0, 0]]
unicode = false
# Tests for (?-u:\b{end})
[[test]]
name = "word-end-ascii-010"
regex = '\b{end}'
haystack = "a"
matches = [[1, 1]]
unicode = false
[[test]]
name = "word-end-ascii-020"
regex = '\b{end}'
haystack = "a "
matches = [[1, 1]]
unicode = false
[[test]]
name = "word-end-ascii-030"
regex = '\b{end}'
haystack = " a "
matches = [[2, 2]]
unicode = false
[[test]]
name = "word-end-ascii-040"
regex = '\b{end}'
haystack = ""
matches = []
unicode = false
[[test]]
name = "word-end-ascii-050"
regex = '\b{end}'
haystack = "ab"
matches = [[2, 2]]
unicode = false
[[test]]
name = "word-end-ascii-060"
regex = '\b{end}'
haystack = "𝛃"
matches = []
unicode = false
[[test]]
name = "word-end-ascii-060-bounds"
regex = '\b{end}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = false
[[test]]
name = "word-end-ascii-070"
regex = '\b{end}'
haystack = " 𝛃 "
matches = []
unicode = false
[[test]]
name = "word-end-ascii-080"
regex = '\b{end}'
haystack = "𝛃𐆀"
matches = []
unicode = false
[[test]]
name = "word-end-ascii-090"
regex = '\b{end}'
haystack = "𝛃b"
matches = [[5, 5]]
unicode = false
[[test]]
name = "word-end-ascii-110"
regex = '\b{end}'
haystack = "b𝛃"
matches = [[1, 1]]
unicode = false
# Tests for \b{start}
[[test]]
name = "word-start-unicode-010"
regex = '\b{start}'
haystack = "a"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-unicode-020"
regex = '\b{start}'
haystack = "a "
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-unicode-030"
regex = '\b{start}'
haystack = " a "
matches = [[1, 1]]
unicode = true
[[test]]
name = "word-start-unicode-040"
regex = '\b{start}'
haystack = ""
matches = []
unicode = true
[[test]]
name = "word-start-unicode-050"
regex = '\b{start}'
haystack = "ab"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-unicode-060"
regex = '\b{start}'
haystack = "𝛃"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-unicode-060-bounds"
regex = '\b{start}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = true
[[test]]
name = "word-start-unicode-070"
regex = '\b{start}'
haystack = " 𝛃 "
matches = [[1, 1]]
unicode = true
[[test]]
name = "word-start-unicode-080"
regex = '\b{start}'
haystack = "𝛃𐆀"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-unicode-090"
regex = '\b{start}'
haystack = "𝛃b"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-unicode-110"
regex = '\b{start}'
haystack = "b𝛃"
matches = [[0, 0]]
unicode = true
# Tests for \b{end}
[[test]]
name = "word-end-unicode-010"
regex = '\b{end}'
haystack = "a"
matches = [[1, 1]]
unicode = true
[[test]]
name = "word-end-unicode-020"
regex = '\b{end}'
haystack = "a "
matches = [[1, 1]]
unicode = true
[[test]]
name = "word-end-unicode-030"
regex = '\b{end}'
haystack = " a "
matches = [[2, 2]]
unicode = true
[[test]]
name = "word-end-unicode-040"
regex = '\b{end}'
haystack = ""
matches = []
unicode = true
[[test]]
name = "word-end-unicode-050"
regex = '\b{end}'
haystack = "ab"
matches = [[2, 2]]
unicode = true
[[test]]
name = "word-end-unicode-060"
regex = '\b{end}'
haystack = "𝛃"
matches = [[4, 4]]
unicode = true
[[test]]
name = "word-end-unicode-060-bounds"
regex = '\b{end}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = true
[[test]]
name = "word-end-unicode-070"
regex = '\b{end}'
haystack = " 𝛃 "
matches = [[5, 5]]
unicode = true
[[test]]
name = "word-end-unicode-080"
regex = '\b{end}'
haystack = "𝛃𐆀"
matches = [[4, 4]]
unicode = true
[[test]]
name = "word-end-unicode-090"
regex = '\b{end}'
haystack = "𝛃b"
matches = [[5, 5]]
unicode = true
[[test]]
name = "word-end-unicode-110"
regex = '\b{end}'
haystack = "b𝛃"
matches = [[5, 5]]
unicode = true
# Tests for (?-u:\b{start-half})
[[test]]
name = "word-start-half-ascii-010"
regex = '\b{start-half}'
haystack = "a"
matches = [[0, 0]]
unicode = false
[[test]]
name = "word-start-half-ascii-020"
regex = '\b{start-half}'
haystack = "a "
matches = [[0, 0], [2, 2]]
unicode = false
[[test]]
name = "word-start-half-ascii-030"
regex = '\b{start-half}'
haystack = " a "
matches = [[0, 0], [1, 1], [3, 3]]
unicode = false
[[test]]
name = "word-start-half-ascii-040"
regex = '\b{start-half}'
haystack = ""
matches = [[0, 0]]
unicode = false
[[test]]
name = "word-start-half-ascii-050"
regex = '\b{start-half}'
haystack = "ab"
matches = [[0, 0]]
unicode = false
[[test]]
name = "word-start-half-ascii-060"
regex = '\b{start-half}'
haystack = "𝛃"
matches = [[0, 0], [4, 4]]
unicode = false
[[test]]
name = "word-start-half-ascii-060-noutf8"
regex = '\b{start-half}'
haystack = "𝛃"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
unicode = false
utf8 = false
[[test]]
name = "word-start-half-ascii-060-bounds"
regex = '\b{start-half}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = false
[[test]]
name = "word-start-half-ascii-070"
regex = '\b{start-half}'
haystack = " 𝛃 "
matches = [[0, 0], [1, 1], [5, 5], [6, 6]]
unicode = false
[[test]]
name = "word-start-half-ascii-080"
regex = '\b{start-half}'
haystack = "𝛃𐆀"
matches = [[0, 0], [4, 4], [8, 8]]
unicode = false
[[test]]
name = "word-start-half-ascii-090"
regex = '\b{start-half}'
haystack = "𝛃b"
matches = [[0, 0], [4, 4]]
unicode = false
[[test]]
name = "word-start-half-ascii-110"
regex = '\b{start-half}'
haystack = "b𝛃"
matches = [[0, 0], [5, 5]]
unicode = false
# Tests for (?-u:\b{end-half})
[[test]]
name = "word-end-half-ascii-010"
regex = '\b{end-half}'
haystack = "a"
matches = [[1, 1]]
unicode = false
[[test]]
name = "word-end-half-ascii-020"
regex = '\b{end-half}'
haystack = "a "
matches = [[1, 1], [2, 2]]
unicode = false
[[test]]
name = "word-end-half-ascii-030"
regex = '\b{end-half}'
haystack = " a "
matches = [[0, 0], [2, 2], [3, 3]]
unicode = false
[[test]]
name = "word-end-half-ascii-040"
regex = '\b{end-half}'
haystack = ""
matches = [[0, 0]]
unicode = false
[[test]]
name = "word-end-half-ascii-050"
regex = '\b{end-half}'
haystack = "ab"
matches = [[2, 2]]
unicode = false
[[test]]
name = "word-end-half-ascii-060"
regex = '\b{end-half}'
haystack = "𝛃"
matches = [[0, 0], [4, 4]]
unicode = false
[[test]]
name = "word-end-half-ascii-060-bounds"
regex = '\b{end-half}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = false
[[test]]
name = "word-end-half-ascii-070"
regex = '\b{end-half}'
haystack = " 𝛃 "
matches = [[0, 0], [1, 1], [5, 5], [6, 6]]
unicode = false
[[test]]
name = "word-end-half-ascii-080"
regex = '\b{end-half}'
haystack = "𝛃𐆀"
matches = [[0, 0], [4, 4], [8, 8]]
unicode = false
[[test]]
name = "word-end-half-ascii-090"
regex = '\b{end-half}'
haystack = "𝛃b"
matches = [[0, 0], [5, 5]]
unicode = false
[[test]]
name = "word-end-half-ascii-110"
regex = '\b{end-half}'
haystack = "b𝛃"
matches = [[1, 1], [5, 5]]
unicode = false
# Tests for \b{start-half}
[[test]]
name = "word-start-half-unicode-010"
regex = '\b{start-half}'
haystack = "a"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-half-unicode-020"
regex = '\b{start-half}'
haystack = "a "
matches = [[0, 0], [2, 2]]
unicode = true
[[test]]
name = "word-start-half-unicode-030"
regex = '\b{start-half}'
haystack = " a "
matches = [[0, 0], [1, 1], [3, 3]]
unicode = true
[[test]]
name = "word-start-half-unicode-040"
regex = '\b{start-half}'
haystack = ""
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-half-unicode-050"
regex = '\b{start-half}'
haystack = "ab"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-half-unicode-060"
regex = '\b{start-half}'
haystack = "𝛃"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-half-unicode-060-bounds"
regex = '\b{start-half}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = true
[[test]]
name = "word-start-half-unicode-070"
regex = '\b{start-half}'
haystack = " 𝛃 "
matches = [[0, 0], [1, 1], [6, 6]]
unicode = true
[[test]]
name = "word-start-half-unicode-080"
regex = '\b{start-half}'
haystack = "𝛃𐆀"
matches = [[0, 0], [8, 8]]
unicode = true
[[test]]
name = "word-start-half-unicode-090"
regex = '\b{start-half}'
haystack = "𝛃b"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-half-unicode-110"
regex = '\b{start-half}'
haystack = "b𝛃"
matches = [[0, 0]]
unicode = true
# Tests for \b{end-half}
[[test]]
name = "word-end-half-unicode-010"
regex = '\b{end-half}'
haystack = "a"
matches = [[1, 1]]
unicode = true
[[test]]
name = "word-end-half-unicode-020"
regex = '\b{end-half}'
haystack = "a "
matches = [[1, 1], [2, 2]]
unicode = true
[[test]]
name = "word-end-half-unicode-030"
regex = '\b{end-half}'
haystack = " a "
matches = [[0, 0], [2, 2], [3, 3]]
unicode = true
[[test]]
name = "word-end-half-unicode-040"
regex = '\b{end-half}'
haystack = ""
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-end-half-unicode-050"
regex = '\b{end-half}'
haystack = "ab"
matches = [[2, 2]]
unicode = true
[[test]]
name = "word-end-half-unicode-060"
regex = '\b{end-half}'
haystack = "𝛃"
matches = [[4, 4]]
unicode = true
[[test]]
name = "word-end-half-unicode-060-bounds"
regex = '\b{end-half}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = true
[[test]]
name = "word-end-half-unicode-070"
regex = '\b{end-half}'
haystack = " 𝛃 "
matches = [[0, 0], [5, 5], [6, 6]]
unicode = true
[[test]]
name = "word-end-half-unicode-080"
regex = '\b{end-half}'
haystack = "𝛃𐆀"
matches = [[4, 4], [8, 8]]
unicode = true
[[test]]
name = "word-end-half-unicode-090"
regex = '\b{end-half}'
haystack = "𝛃b"
matches = [[5, 5]]
unicode = true
[[test]]
name = "word-end-half-unicode-110"
regex = '\b{end-half}'
haystack = "b𝛃"
matches = [[5, 5]]
unicode = true
# Specialty tests.
# Since \r is special cased in the start state computation (to deal with CRLF
# mode), this test ensures that the correct start state is computed when the
# pattern starts with a half word boundary assertion.
[[test]]
name = "word-start-half-ascii-carriage"
regex = '\b{start-half}[a-z]+'
haystack = 'ABC\rabc'
matches = [[4, 7]]
bounds = [4, 7]
unescape = true
# Since \n is also special cased in the start state computation, this test
# ensures that the correct start state is computed when the pattern starts with
# a half word boundary assertion.
[[test]]
name = "word-start-half-ascii-linefeed"
regex = '\b{start-half}[a-z]+'
haystack = 'ABC\nabc'
matches = [[4, 7]]
bounds = [4, 7]
unescape = true
# Like the carriage return test above, but with a custom line terminator.
[[test]]
name = "word-start-half-ascii-customlineterm"
regex = '\b{start-half}[a-z]+'
haystack = 'ABC!abc'
matches = [[4, 7]]
bounds = [4, 7]
unescape = true
line-terminator = '!'

781
vendor/regex/testdata/word-boundary.toml vendored Normal file
View File

@@ -0,0 +1,781 @@
# Some of these are cribbed from RE2's test suite.
# These test \b. Below are tests for \B.
[[test]]
name = "wb1"
regex = '\b'
haystack = ""
matches = []
unicode = false
[[test]]
name = "wb2"
regex = '\b'
haystack = "a"
matches = [[0, 0], [1, 1]]
unicode = false
[[test]]
name = "wb3"
regex = '\b'
haystack = "ab"
matches = [[0, 0], [2, 2]]
unicode = false
[[test]]
name = "wb4"
regex = '^\b'
haystack = "ab"
matches = [[0, 0]]
unicode = false
[[test]]
name = "wb5"
regex = '\b$'
haystack = "ab"
matches = [[2, 2]]
unicode = false
[[test]]
name = "wb6"
regex = '^\b$'
haystack = "ab"
matches = []
unicode = false
[[test]]
name = "wb7"
regex = '\bbar\b'
haystack = "nobar bar foo bar"
matches = [[6, 9], [14, 17]]
unicode = false
[[test]]
name = "wb8"
regex = 'a\b'
haystack = "faoa x"
matches = [[3, 4]]
unicode = false
[[test]]
name = "wb9"
regex = '\bbar'
haystack = "bar x"
matches = [[0, 3]]
unicode = false
[[test]]
name = "wb10"
regex = '\bbar'
haystack = "foo\nbar x"
matches = [[4, 7]]
unicode = false
[[test]]
name = "wb11"
regex = 'bar\b'
haystack = "foobar"
matches = [[3, 6]]
unicode = false
[[test]]
name = "wb12"
regex = 'bar\b'
haystack = "foobar\nxxx"
matches = [[3, 6]]
unicode = false
[[test]]
name = "wb13"
regex = '(?:foo|bar|[A-Z])\b'
haystack = "foo"
matches = [[0, 3]]
unicode = false
[[test]]
name = "wb14"
regex = '(?:foo|bar|[A-Z])\b'
haystack = "foo\n"
matches = [[0, 3]]
unicode = false
[[test]]
name = "wb15"
regex = '\b(?:foo|bar|[A-Z])'
haystack = "foo"
matches = [[0, 3]]
unicode = false
[[test]]
name = "wb16"
regex = '\b(?:foo|bar|[A-Z])\b'
haystack = "X"
matches = [[0, 1]]
unicode = false
[[test]]
name = "wb17"
regex = '\b(?:foo|bar|[A-Z])\b'
haystack = "XY"
matches = []
unicode = false
[[test]]
name = "wb18"
regex = '\b(?:foo|bar|[A-Z])\b'
haystack = "bar"
matches = [[0, 3]]
unicode = false
[[test]]
name = "wb19"
regex = '\b(?:foo|bar|[A-Z])\b'
haystack = "foo"
matches = [[0, 3]]
unicode = false
[[test]]
name = "wb20"
regex = '\b(?:foo|bar|[A-Z])\b'
haystack = "foo\n"
matches = [[0, 3]]
unicode = false
[[test]]
name = "wb21"
regex = '\b(?:foo|bar|[A-Z])\b'
haystack = "ffoo bbar N x"
matches = [[10, 11]]
unicode = false
[[test]]
name = "wb22"
regex = '\b(?:fo|foo)\b'
haystack = "fo"
matches = [[0, 2]]
unicode = false
[[test]]
name = "wb23"
regex = '\b(?:fo|foo)\b'
haystack = "foo"
matches = [[0, 3]]
unicode = false
[[test]]
name = "wb24"
regex = '\b\b'
haystack = ""
matches = []
unicode = false
[[test]]
name = "wb25"
regex = '\b\b'
haystack = "a"
matches = [[0, 0], [1, 1]]
unicode = false
[[test]]
name = "wb26"
regex = '\b$'
haystack = ""
matches = []
unicode = false
[[test]]
name = "wb27"
regex = '\b$'
haystack = "x"
matches = [[1, 1]]
unicode = false
[[test]]
name = "wb28"
regex = '\b$'
haystack = "y x"
matches = [[3, 3]]
unicode = false
[[test]]
name = "wb29"
regex = '(?-u:\b).$'
haystack = "x"
matches = [[0, 1]]
[[test]]
name = "wb30"
regex = '^\b(?:fo|foo)\b'
haystack = "fo"
matches = [[0, 2]]
unicode = false
[[test]]
name = "wb31"
regex = '^\b(?:fo|foo)\b'
haystack = "foo"
matches = [[0, 3]]
unicode = false
[[test]]
name = "wb32"
regex = '^\b$'
haystack = ""
matches = []
unicode = false
[[test]]
name = "wb33"
regex = '^\b$'
haystack = "x"
matches = []
unicode = false
[[test]]
name = "wb34"
regex = '^(?-u:\b).$'
haystack = "x"
matches = [[0, 1]]
[[test]]
name = "wb35"
regex = '^(?-u:\b).(?-u:\b)$'
haystack = "x"
matches = [[0, 1]]
[[test]]
name = "wb36"
regex = '^^^^^\b$$$$$'
haystack = ""
matches = []
unicode = false
[[test]]
name = "wb37"
regex = '^^^^^(?-u:\b).$$$$$'
haystack = "x"
matches = [[0, 1]]
[[test]]
name = "wb38"
regex = '^^^^^\b$$$$$'
haystack = "x"
matches = []
unicode = false
[[test]]
name = "wb39"
regex = '^^^^^(?-u:\b\b\b).(?-u:\b\b\b)$$$$$'
haystack = "x"
matches = [[0, 1]]
[[test]]
name = "wb40"
regex = '(?-u:\b).+(?-u:\b)'
haystack = "$$abc$$"
matches = [[2, 5]]
[[test]]
name = "wb41"
regex = '\b'
haystack = "a b c"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
unicode = false
[[test]]
name = "wb42"
regex = '\bfoo\b'
haystack = "zzz foo zzz"
matches = [[4, 7]]
unicode = false
[[test]]
name = "wb43"
regex = '\b^'
haystack = "ab"
matches = [[0, 0]]
unicode = false
[[test]]
name = "wb44"
regex = '$\b'
haystack = "ab"
matches = [[2, 2]]
unicode = false
# Tests for \B. Note that \B is not allowed if UTF-8 mode is enabled, so we
# have to disable it for most of these tests. This is because \B can match at
# non-UTF-8 boundaries.
[[test]]
name = "nb1"
regex = '\Bfoo\B'
haystack = "n foo xfoox that"
matches = [[7, 10]]
unicode = false
utf8 = false
[[test]]
name = "nb2"
regex = 'a\B'
haystack = "faoa x"
matches = [[1, 2]]
unicode = false
utf8 = false
[[test]]
name = "nb3"
regex = '\Bbar'
haystack = "bar x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb4"
regex = '\Bbar'
haystack = "foo\nbar x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb5"
regex = 'bar\B'
haystack = "foobar"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb6"
regex = 'bar\B'
haystack = "foobar\nxxx"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb7"
regex = '(?:foo|bar|[A-Z])\B'
haystack = "foox"
matches = [[0, 3]]
unicode = false
utf8 = false
[[test]]
name = "nb8"
regex = '(?:foo|bar|[A-Z])\B'
haystack = "foo\n"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb9"
regex = '\B'
haystack = ""
matches = [[0, 0]]
unicode = false
utf8 = false
[[test]]
name = "nb10"
regex = '\B'
haystack = "x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb11"
regex = '\B(?:foo|bar|[A-Z])'
haystack = "foo"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb12"
regex = '\B(?:foo|bar|[A-Z])\B'
haystack = "xXy"
matches = [[1, 2]]
unicode = false
utf8 = false
[[test]]
name = "nb13"
regex = '\B(?:foo|bar|[A-Z])\B'
haystack = "XY"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb14"
regex = '\B(?:foo|bar|[A-Z])\B'
haystack = "XYZ"
matches = [[1, 2]]
unicode = false
utf8 = false
[[test]]
name = "nb15"
regex = '\B(?:foo|bar|[A-Z])\B'
haystack = "abara"
matches = [[1, 4]]
unicode = false
utf8 = false
[[test]]
name = "nb16"
regex = '\B(?:foo|bar|[A-Z])\B'
haystack = "xfoo_"
matches = [[1, 4]]
unicode = false
utf8 = false
[[test]]
name = "nb17"
regex = '\B(?:foo|bar|[A-Z])\B'
haystack = "xfoo\n"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb18"
regex = '\B(?:foo|bar|[A-Z])\B'
haystack = "foo bar vNX"
matches = [[9, 10]]
unicode = false
utf8 = false
[[test]]
name = "nb19"
regex = '\B(?:fo|foo)\B'
haystack = "xfoo"
matches = [[1, 3]]
unicode = false
utf8 = false
[[test]]
name = "nb20"
regex = '\B(?:foo|fo)\B'
haystack = "xfooo"
matches = [[1, 4]]
unicode = false
utf8 = false
[[test]]
name = "nb21"
regex = '\B\B'
haystack = ""
matches = [[0, 0]]
unicode = false
utf8 = false
[[test]]
name = "nb22"
regex = '\B\B'
haystack = "x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb23"
regex = '\B$'
haystack = ""
matches = [[0, 0]]
unicode = false
utf8 = false
[[test]]
name = "nb24"
regex = '\B$'
haystack = "x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb25"
regex = '\B$'
haystack = "y x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb26"
regex = '\B.$'
haystack = "x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb27"
regex = '^\B(?:fo|foo)\B'
haystack = "fo"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb28"
regex = '^\B(?:fo|foo)\B'
haystack = "fo"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb29"
regex = '^\B'
haystack = ""
matches = [[0, 0]]
unicode = false
utf8 = false
[[test]]
name = "nb30"
regex = '^\B'
haystack = "x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb31"
regex = '^\B\B'
haystack = ""
matches = [[0, 0]]
unicode = false
utf8 = false
[[test]]
name = "nb32"
regex = '^\B\B'
haystack = "x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb33"
regex = '^\B$'
haystack = ""
matches = [[0, 0]]
unicode = false
utf8 = false
[[test]]
name = "nb34"
regex = '^\B$'
haystack = "x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb35"
regex = '^\B.$'
haystack = "x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb36"
regex = '^\B.\B$'
haystack = "x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb37"
regex = '^^^^^\B$$$$$'
haystack = ""
matches = [[0, 0]]
unicode = false
utf8 = false
[[test]]
name = "nb38"
regex = '^^^^^\B.$$$$$'
haystack = "x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb39"
regex = '^^^^^\B$$$$$'
haystack = "x"
matches = []
unicode = false
utf8 = false
# unicode1* and unicode2* work for both Unicode and ASCII because all matches
# are reported as byte offsets, and « and » do not correspond to word
# boundaries at either the character or byte level.
[[test]]
name = "unicode1"
regex = '\bx\b'
haystack = "«x"
matches = [[2, 3]]
[[test]]
name = "unicode1-only-ascii"
regex = '\bx\b'
haystack = "«x"
matches = [[2, 3]]
unicode = false
[[test]]
name = "unicode2"
regex = '\bx\b'
haystack = "x»"
matches = [[0, 1]]
[[test]]
name = "unicode2-only-ascii"
regex = '\bx\b'
haystack = "x»"
matches = [[0, 1]]
unicode = false
# ASCII word boundaries are completely oblivious to Unicode characters, so
# even though β is a character, an ASCII \b treats it as a word boundary
# when it is adjacent to another ASCII character. (The ASCII \b only looks
# at the leading byte of β.) For Unicode \b, the tests are precisely inverted.
[[test]]
name = "unicode3"
regex = '\bx\b'
haystack = 'áxβ'
matches = []
[[test]]
name = "unicode3-only-ascii"
regex = '\bx\b'
haystack = 'áxβ'
matches = [[2, 3]]
unicode = false
[[test]]
name = "unicode4"
regex = '\Bx\B'
haystack = 'áxβ'
matches = [[2, 3]]
[[test]]
name = "unicode4-only-ascii"
regex = '\Bx\B'
haystack = 'áxβ'
matches = []
unicode = false
utf8 = false
# The same as above, but with \b instead of \B as a sanity check.
[[test]]
name = "unicode5"
regex = '\b'
haystack = "0\U0007EF5E"
matches = [[0, 0], [1, 1]]
[[test]]
name = "unicode5-only-ascii"
regex = '\b'
haystack = "0\U0007EF5E"
matches = [[0, 0], [1, 1]]
unicode = false
utf8 = false
[[test]]
name = "unicode5-noutf8"
regex = '\b'
haystack = '0\xFF\xFF\xFF\xFF'
matches = [[0, 0], [1, 1]]
unescape = true
utf8 = false
[[test]]
name = "unicode5-noutf8-only-ascii"
regex = '\b'
haystack = '0\xFF\xFF\xFF\xFF'
matches = [[0, 0], [1, 1]]
unescape = true
unicode = false
utf8 = false
# Weird special case to ensure that ASCII \B treats each individual code unit
# as a non-word byte. (The specific codepoint is irrelevant. It's an arbitrary
# codepoint that uses 4 bytes in its UTF-8 encoding and is not a member of the
# \w character class.)
[[test]]
name = "unicode5-not"
regex = '\B'
haystack = "0\U0007EF5E"
matches = [[5, 5]]
[[test]]
name = "unicode5-not-only-ascii"
regex = '\B'
haystack = "0\U0007EF5E"
matches = [[2, 2], [3, 3], [4, 4], [5, 5]]
unicode = false
utf8 = false
# This gets no matches since \B only matches in the presence of valid UTF-8
# when Unicode is enabled, even when UTF-8 mode is disabled.
[[test]]
name = "unicode5-not-noutf8"
regex = '\B'
haystack = '0\xFF\xFF\xFF\xFF'
matches = []
unescape = true
utf8 = false
# But this DOES get matches since \B in ASCII mode only looks at individual
# bytes.
[[test]]
name = "unicode5-not-noutf8-only-ascii"
regex = '\B'
haystack = '0\xFF\xFF\xFF\xFF'
matches = [[2, 2], [3, 3], [4, 4], [5, 5]]
unescape = true
unicode = false
utf8 = false
# Some tests of no particular significance.
[[test]]
name = "unicode6"
regex = '\b[0-9]+\b'
haystack = "foo 123 bar 456 quux 789"
matches = [[4, 7], [12, 15], [21, 24]]
[[test]]
name = "unicode7"
regex = '\b[0-9]+\b'
haystack = "foo 123 bar a456 quux 789"
matches = [[4, 7], [22, 25]]
[[test]]
name = "unicode8"
regex = '\b[0-9]+\b'
haystack = "foo 123 bar 456a quux 789"
matches = [[4, 7], [22, 25]]
# A variant of the problem described here:
# https://github.com/google/re2/blob/89567f5de5b23bb5ad0c26cbafc10bdc7389d1fa/re2/dfa.cc#L658-L667
[[test]]
name = "alt-with-assertion-repetition"
regex = '(?:\b|%)+'
haystack = "z%"
bounds = [1, 2]
anchored = true
matches = [[1, 1]]

58
vendor/regex/tests/lib.rs vendored Normal file
View File

@@ -0,0 +1,58 @@
#![cfg_attr(feature = "pattern", feature(pattern))]
mod fuzz;
mod misc;
mod regression;
mod regression_fuzz;
mod replace;
#[cfg(feature = "pattern")]
mod searcher;
mod suite_bytes;
mod suite_bytes_set;
mod suite_string;
mod suite_string_set;
const BLACKLIST: &[&str] = &[
// Nothing to blacklist yet!
];
fn suite() -> anyhow::Result<regex_test::RegexTests> {
let _ = env_logger::try_init();
let mut tests = regex_test::RegexTests::new();
macro_rules! load {
($name:expr) => {{
const DATA: &[u8] =
include_bytes!(concat!("../testdata/", $name, ".toml"));
tests.load_slice($name, DATA)?;
}};
}
load!("anchored");
load!("bytes");
load!("crazy");
load!("crlf");
load!("earliest");
load!("empty");
load!("expensive");
load!("flags");
load!("iter");
load!("leftmost-all");
load!("line-terminator");
load!("misc");
load!("multiline");
load!("no-unicode");
load!("overlapping");
load!("regression");
load!("set");
load!("substring");
load!("unicode");
load!("utf8");
load!("word-boundary");
load!("word-boundary-special");
load!("fowler/basic");
load!("fowler/nullsubexpr");
load!("fowler/repetition");
Ok(tests)
}

143
vendor/regex/tests/misc.rs vendored Normal file
View File

@@ -0,0 +1,143 @@
use regex::Regex;
macro_rules! regex {
($pattern:expr) => {
regex::Regex::new($pattern).unwrap()
};
}
#[test]
fn unclosed_group_error() {
let err = Regex::new(r"(").unwrap_err();
let msg = err.to_string();
assert!(msg.contains("unclosed group"), "error message: {msg:?}");
}
#[test]
fn regex_string() {
assert_eq!(r"[a-zA-Z0-9]+", regex!(r"[a-zA-Z0-9]+").as_str());
assert_eq!(r"[a-zA-Z0-9]+", &format!("{}", regex!(r"[a-zA-Z0-9]+")));
assert_eq!(
r#"Regex("[a-zA-Z0-9]+")"#,
&format!("{:?}", regex!(r"[a-zA-Z0-9]+"))
);
}
#[test]
fn capture_names() {
let re = regex!(r"(.)(?P<a>.)");
assert_eq!(3, re.captures_len());
assert_eq!((3, Some(3)), re.capture_names().size_hint());
assert_eq!(
vec![None, None, Some("a")],
re.capture_names().collect::<Vec<_>>()
);
}
#[test]
fn capture_index() {
let re = regex!(r"^(?P<name>.+)$");
let cap = re.captures("abc").unwrap();
assert_eq!(&cap[0], "abc");
assert_eq!(&cap[1], "abc");
assert_eq!(&cap["name"], "abc");
}
#[test]
#[should_panic]
fn capture_index_panic_usize() {
let re = regex!(r"^(?P<name>.+)$");
let cap = re.captures("abc").unwrap();
let _ = cap[2];
}
#[test]
#[should_panic]
fn capture_index_panic_name() {
let re = regex!(r"^(?P<name>.+)$");
let cap = re.captures("abc").unwrap();
let _ = cap["bad name"];
}
#[test]
fn capture_index_lifetime() {
// This is a test of whether the types on `caps["..."]` are general
// enough. If not, this will fail to typecheck.
fn inner(s: &str) -> usize {
let re = regex!(r"(?P<number>[0-9]+)");
let caps = re.captures(s).unwrap();
caps["number"].len()
}
assert_eq!(3, inner("123"));
}
#[test]
fn capture_misc() {
let re = regex!(r"(.)(?P<a>a)?(.)(?P<b>.)");
let cap = re.captures("abc").unwrap();
assert_eq!(5, cap.len());
assert_eq!((0, 3), {
let m = cap.get(0).unwrap();
(m.start(), m.end())
});
assert_eq!(None, cap.get(2));
assert_eq!((2, 3), {
let m = cap.get(4).unwrap();
(m.start(), m.end())
});
assert_eq!("abc", cap.get(0).unwrap().as_str());
assert_eq!(None, cap.get(2));
assert_eq!("c", cap.get(4).unwrap().as_str());
assert_eq!(None, cap.name("a"));
assert_eq!("c", cap.name("b").unwrap().as_str());
}
#[test]
fn sub_capture_matches() {
let re = regex!(r"([a-z])(([a-z])|([0-9]))");
let cap = re.captures("a5").unwrap();
let subs: Vec<_> = cap.iter().collect();
assert_eq!(5, subs.len());
assert!(subs[0].is_some());
assert!(subs[1].is_some());
assert!(subs[2].is_some());
assert!(subs[3].is_none());
assert!(subs[4].is_some());
assert_eq!("a5", subs[0].unwrap().as_str());
assert_eq!("a", subs[1].unwrap().as_str());
assert_eq!("5", subs[2].unwrap().as_str());
assert_eq!("5", subs[4].unwrap().as_str());
}
// Test that the DFA can handle pathological cases. (This should result in the
// DFA's cache being flushed too frequently, which should cause it to quit and
// fall back to the NFA algorithm.)
#[test]
fn dfa_handles_pathological_case() {
fn ones_and_zeroes(count: usize) -> String {
let mut s = String::new();
for i in 0..count {
if i % 3 == 0 {
s.push('1');
} else {
s.push('0');
}
}
s
}
let re = regex!(r"[01]*1[01]{20}$");
let text = {
let mut pieces = ones_and_zeroes(100_000);
pieces.push('1');
pieces.push_str(&ones_and_zeroes(20));
pieces
};
assert!(re.is_match(&text));
}

94
vendor/regex/tests/regression.rs vendored Normal file
View File

@@ -0,0 +1,94 @@
use regex::Regex;
macro_rules! regex {
($pattern:expr) => {
regex::Regex::new($pattern).unwrap()
};
}
// See: https://github.com/rust-lang/regex/issues/48
#[test]
fn invalid_regexes_no_crash() {
assert!(Regex::new("(*)").is_err());
assert!(Regex::new("(?:?)").is_err());
assert!(Regex::new("(?)").is_err());
assert!(Regex::new("*").is_err());
}
// See: https://github.com/rust-lang/regex/issues/98
#[test]
fn regression_many_repeat_stack_overflow() {
let re = regex!("^.{1,2500}");
assert_eq!(
vec![0..1],
re.find_iter("a").map(|m| m.range()).collect::<Vec<_>>()
);
}
// See: https://github.com/rust-lang/regex/issues/555
#[test]
fn regression_invalid_repetition_expr() {
assert!(Regex::new("(?m){1,1}").is_err());
}
// See: https://github.com/rust-lang/regex/issues/527
#[test]
fn regression_invalid_flags_expression() {
assert!(Regex::new("(((?x)))").is_ok());
}
// See: https://github.com/rust-lang/regex/issues/129
#[test]
fn regression_captures_rep() {
let re = regex!(r"([a-f]){2}(?P<foo>[x-z])");
let caps = re.captures("abx").unwrap();
assert_eq!(&caps["foo"], "x");
}
// See: https://github.com/BurntSushi/ripgrep/issues/1247
#[cfg(feature = "unicode-perl")]
#[test]
fn regression_nfa_stops1() {
let re = regex::bytes::Regex::new(r"\bs(?:[ab])").unwrap();
assert_eq!(0, re.find_iter(b"s\xE4").count());
}
// See: https://github.com/rust-lang/regex/issues/981
#[cfg(feature = "unicode")]
#[test]
fn regression_bad_word_boundary() {
let re = regex!(r#"(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))"#);
let hay = "ubi-Darwin-x86_64.tar.gz";
assert!(!re.is_match(hay));
let hay = "ubi-Windows-x86_64.zip";
assert!(re.is_match(hay));
}
// See: https://github.com/rust-lang/regex/issues/982
#[cfg(feature = "unicode-perl")]
#[test]
fn regression_unicode_perl_not_enabled() {
let pat = r"(\d+\s?(years|year|y))?\s?(\d+\s?(months|month|m))?\s?(\d+\s?(weeks|week|w))?\s?(\d+\s?(days|day|d))?\s?(\d+\s?(hours|hour|h))?";
assert!(Regex::new(pat).is_ok());
}
// See: https://github.com/rust-lang/regex/issues/995
#[test]
fn regression_big_regex_overflow() {
let pat = r" {2147483516}{2147483416}{5}";
assert!(Regex::new(pat).is_err());
}
// See: https://github.com/rust-lang/regex/issues/999
#[test]
fn regression_complete_literals_suffix_incorrect() {
let needles = vec![
"aA", "bA", "cA", "dA", "eA", "fA", "gA", "hA", "iA", "jA", "kA",
"lA", "mA", "nA", "oA", "pA", "qA", "rA", "sA", "tA", "uA", "vA",
"wA", "xA", "yA", "zA",
];
let pattern = needles.join("|");
let re = regex!(&pattern);
let hay = "FUBAR";
assert_eq!(0, re.find_iter(hay).count());
}

61
vendor/regex/tests/regression_fuzz.rs vendored Normal file
View File

@@ -0,0 +1,61 @@
// These tests are only run for the "default" test target because some of them
// can take quite a long time. Some of them take long enough that it's not
// practical to run them in debug mode. :-/
use regex::Regex;
macro_rules! regex {
($pattern:expr) => {
regex::Regex::new($pattern).unwrap()
};
}
// See: https://oss-fuzz.com/testcase-detail/5673225499181056
//
// Ignored by default since it takes too long in debug mode (almost a minute).
#[test]
#[ignore]
fn fuzz1() {
regex!(r"1}{55}{0}*{1}{55}{55}{5}*{1}{55}+{56}|;**");
}
// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=26505
// See: https://github.com/rust-lang/regex/issues/722
#[test]
#[cfg(feature = "unicode")]
fn empty_any_errors_no_panic() {
assert!(Regex::new(r"\P{any}").is_ok());
}
// This tests that a very large regex errors during compilation instead of
// using gratuitous amounts of memory. The specific problem is that the
// compiler wasn't accounting for the memory used by Unicode character classes
// correctly.
//
// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=33579
#[test]
fn big_regex_fails_to_compile() {
let pat = "[\u{0}\u{e}\u{2}\\w~~>[l\t\u{0}]p?<]{971158}";
assert!(Regex::new(pat).is_err());
}
// This was caught while on master but before a release went out(!).
//
// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=58173
#[test]
fn todo() {
let pat = "(?:z|xx)@|xx";
assert!(Regex::new(pat).is_ok());
}
// This was caused by the fuzzer, and then minimized by hand.
//
// This was caused by a bug in DFA determinization that mishandled NFA fail
// states.
#[test]
fn fail_branch_prevents_match() {
let pat = r".*[a&&b]A|B";
let hay = "B";
let re = Regex::new(pat).unwrap();
assert!(re.is_match(hay));
}

183
vendor/regex/tests/replace.rs vendored Normal file
View File

@@ -0,0 +1,183 @@
macro_rules! replace(
($name:ident, $which:ident, $re:expr,
$search:expr, $replace:expr, $result:expr) => (
#[test]
fn $name() {
let re = regex::Regex::new($re).unwrap();
assert_eq!(re.$which($search, $replace), $result);
}
);
);
replace!(first, replace, r"[0-9]", "age: 26", "Z", "age: Z6");
replace!(plus, replace, r"[0-9]+", "age: 26", "Z", "age: Z");
replace!(all, replace_all, r"[0-9]", "age: 26", "Z", "age: ZZ");
replace!(groups, replace, r"([^ ]+)[ ]+([^ ]+)", "w1 w2", "$2 $1", "w2 w1");
replace!(
double_dollar,
replace,
r"([^ ]+)[ ]+([^ ]+)",
"w1 w2",
"$2 $$1",
"w2 $1"
);
// replace!(adjacent_index, replace,
// r"([^aeiouy])ies$", "skies", "$1y", "sky");
replace!(
named,
replace_all,
r"(?P<first>[^ ]+)[ ]+(?P<last>[^ ]+)(?P<space>[ ]*)",
"w1 w2 w3 w4",
"$last $first$space",
"w2 w1 w4 w3"
);
replace!(
trim,
replace_all,
"^[ \t]+|[ \t]+$",
" \t trim me\t \t",
"",
"trim me"
);
replace!(number_hyphen, replace, r"(.)(.)", "ab", "$1-$2", "a-b");
// replace!(number_underscore, replace, r"(.)(.)", "ab", "$1_$2", "a_b");
replace!(
simple_expand,
replace_all,
r"([a-z]) ([a-z])",
"a b",
"$2 $1",
"b a"
);
replace!(
literal_dollar1,
replace_all,
r"([a-z]+) ([a-z]+)",
"a b",
"$$1",
"$1"
);
replace!(
literal_dollar2,
replace_all,
r"([a-z]+) ([a-z]+)",
"a b",
"$2 $$c $1",
"b $c a"
);
replace!(
no_expand1,
replace,
r"([^ ]+)[ ]+([^ ]+)",
"w1 w2",
regex::NoExpand("$2 $1"),
"$2 $1"
);
replace!(
no_expand2,
replace,
r"([^ ]+)[ ]+([^ ]+)",
"w1 w2",
regex::NoExpand("$$1"),
"$$1"
);
replace!(
closure_returning_reference,
replace,
r"([0-9]+)",
"age: 26",
|captures: &regex::Captures<'_>| { captures[1][0..1].to_owned() },
"age: 2"
);
replace!(
closure_returning_value,
replace,
r"[0-9]+",
"age: 26",
|_captures: &regex::Captures<'_>| "Z".to_owned(),
"age: Z"
);
// See https://github.com/rust-lang/regex/issues/314
replace!(
match_at_start_replace_with_empty,
replace_all,
r"foo",
"foobar",
"",
"bar"
);
// See https://github.com/rust-lang/regex/issues/393
replace!(single_empty_match, replace, r"^", "bar", "foo", "foobar");
// See https://github.com/rust-lang/regex/issues/399
replace!(
capture_longest_possible_name,
replace_all,
r"(.)",
"b",
"${1}a $1a",
"ba "
);
replace!(
impl_string,
replace,
r"[0-9]",
"age: 26",
"Z".to_string(),
"age: Z6"
);
replace!(
impl_string_ref,
replace,
r"[0-9]",
"age: 26",
&"Z".to_string(),
"age: Z6"
);
replace!(
impl_cow_str_borrowed,
replace,
r"[0-9]",
"age: 26",
std::borrow::Cow::<'_, str>::Borrowed("Z"),
"age: Z6"
);
replace!(
impl_cow_str_borrowed_ref,
replace,
r"[0-9]",
"age: 26",
&std::borrow::Cow::<'_, str>::Borrowed("Z"),
"age: Z6"
);
replace!(
impl_cow_str_owned,
replace,
r"[0-9]",
"age: 26",
std::borrow::Cow::<'_, str>::Owned("Z".to_string()),
"age: Z6"
);
replace!(
impl_cow_str_owned_ref,
replace,
r"[0-9]",
"age: 26",
&std::borrow::Cow::<'_, str>::Owned("Z".to_string()),
"age: Z6"
);
#[test]
fn replacen_no_captures() {
let re = regex::Regex::new(r"[0-9]").unwrap();
assert_eq!(re.replacen("age: 1234", 2, "Z"), "age: ZZ34");
}
#[test]
fn replacen_with_captures() {
let re = regex::Regex::new(r"([0-9])").unwrap();
assert_eq!(re.replacen("age: 1234", 2, "${1}Z"), "age: 1Z2Z34");
}

93
vendor/regex/tests/searcher.rs vendored Normal file
View File

@@ -0,0 +1,93 @@
macro_rules! searcher {
($name:ident, $re:expr, $haystack:expr) => (
searcher!($name, $re, $haystack, vec vec![]);
);
($name:ident, $re:expr, $haystack:expr, $($steps:expr,)*) => (
searcher!($name, $re, $haystack, vec vec![$($steps),*]);
);
($name:ident, $re:expr, $haystack:expr, $($steps:expr),*) => (
searcher!($name, $re, $haystack, vec vec![$($steps),*]);
);
($name:ident, $re:expr, $haystack:expr, vec $expect_steps:expr) => (
#[test]
#[allow(unused_imports)]
fn $name() {
use std::str::pattern::{Pattern, Searcher};
use std::str::pattern::SearchStep::{Match, Reject, Done};
let re = regex::Regex::new($re).unwrap();
let mut se = re.into_searcher($haystack);
let mut got_steps = vec![];
loop {
match se.next() {
Done => break,
step => { got_steps.push(step); }
}
}
assert_eq!(got_steps, $expect_steps);
}
);
}
searcher!(searcher_empty_regex_empty_haystack, r"", "", Match(0, 0));
searcher!(
searcher_empty_regex,
r"",
"ab",
Match(0, 0),
Reject(0, 1),
Match(1, 1),
Reject(1, 2),
Match(2, 2)
);
searcher!(searcher_empty_haystack, r"\d", "");
searcher!(searcher_one_match, r"\d", "5", Match(0, 1));
searcher!(searcher_no_match, r"\d", "a", Reject(0, 1));
searcher!(
searcher_two_adjacent_matches,
r"\d",
"56",
Match(0, 1),
Match(1, 2)
);
searcher!(
searcher_two_non_adjacent_matches,
r"\d",
"5a6",
Match(0, 1),
Reject(1, 2),
Match(2, 3)
);
searcher!(searcher_reject_first, r"\d", "a6", Reject(0, 1), Match(1, 2));
searcher!(
searcher_one_zero_length_matches,
r"\d*",
"a1b2",
Match(0, 0), // ^
Reject(0, 1), // a
Match(1, 2), // a1
Reject(2, 3), // a1b
Match(3, 4), // a1b2
);
searcher!(
searcher_many_zero_length_matches,
r"\d*",
"a1bbb2",
Match(0, 0), // ^
Reject(0, 1), // a
Match(1, 2), // a1
Reject(2, 3), // a1b
Match(3, 3), // a1bb
Reject(3, 4), // a1bb
Match(4, 4), // a1bbb
Reject(4, 5), // a1bbb
Match(5, 6), // a1bbba
);
searcher!(
searcher_unicode,
r".+?",
"1Ⅱ2",
Match(0, 3),
Match(3, 4),
Match(4, 7),
Match(7, 8)
);

108
vendor/regex/tests/suite_bytes.rs vendored Normal file
View File

@@ -0,0 +1,108 @@
use {
anyhow::Result,
regex::bytes::{Regex, RegexBuilder},
regex_test::{
CompiledRegex, Match, RegexTest, Span, TestResult, TestRunner,
},
};
/// Tests the default configuration of the hybrid NFA/DFA.
#[test]
fn default() -> Result<()> {
let mut runner = TestRunner::new()?;
runner
.expand(&["is_match", "find", "captures"], |test| test.compiles())
.blacklist_iter(super::BLACKLIST)
.test_iter(crate::suite()?.iter(), compiler)
.assert();
Ok(())
}
fn run_test(re: &Regex, test: &RegexTest) -> TestResult {
match test.additional_name() {
"is_match" => TestResult::matched(re.is_match(test.haystack())),
"find" => TestResult::matches(
re.find_iter(test.haystack())
.take(test.match_limit().unwrap_or(std::usize::MAX))
.map(|m| Match {
id: 0,
span: Span { start: m.start(), end: m.end() },
}),
),
"captures" => {
let it = re
.captures_iter(test.haystack())
.take(test.match_limit().unwrap_or(std::usize::MAX))
.map(|caps| testify_captures(&caps));
TestResult::captures(it)
}
name => TestResult::fail(&format!("unrecognized test name: {name}")),
}
}
/// Converts the given regex test to a closure that searches with a
/// `bytes::Regex`. If the test configuration is unsupported, then a
/// `CompiledRegex` that skips the test is returned.
fn compiler(
test: &RegexTest,
_patterns: &[String],
) -> anyhow::Result<CompiledRegex> {
let skip = Ok(CompiledRegex::skip());
// We're only testing bytes::Regex here, which supports one pattern only.
let pattern = match test.regexes().len() {
1 => &test.regexes()[0],
_ => return skip,
};
// We only test is_match, find_iter and captures_iter. All of those are
// leftmost searches.
if !matches!(test.search_kind(), regex_test::SearchKind::Leftmost) {
return skip;
}
// The top-level single-pattern regex API always uses leftmost-first.
if !matches!(test.match_kind(), regex_test::MatchKind::LeftmostFirst) {
return skip;
}
// The top-level regex API always runs unanchored searches. ... But we can
// handle tests that are anchored but have only one match.
if test.anchored() && test.match_limit() != Some(1) {
return skip;
}
// We don't support tests with explicit search bounds. We could probably
// support this by using the 'find_at' (and such) APIs.
let bounds = test.bounds();
if !(bounds.start == 0 && bounds.end == test.haystack().len()) {
return skip;
}
// The bytes::Regex API specifically does not support enabling UTF-8 mode.
// It could I suppose, but currently it does not. That is, it permits
// matches to have offsets that split codepoints.
if test.utf8() {
return skip;
}
// If the test requires Unicode but the Unicode feature isn't enabled,
// skip it. This is a little aggressive, but the test suite doesn't
// have any easy way of communicating which Unicode features are needed.
if test.unicode() && !cfg!(feature = "unicode") {
return skip;
}
let re = RegexBuilder::new(pattern)
.case_insensitive(test.case_insensitive())
.unicode(test.unicode())
.line_terminator(test.line_terminator())
.build()?;
Ok(CompiledRegex::compiled(move |test| run_test(&re, test)))
}
/// Convert `Captures` into the test suite's capture values.
fn testify_captures(
caps: &regex::bytes::Captures<'_>,
) -> regex_test::Captures {
let spans = caps.iter().map(|group| {
group.map(|m| regex_test::Span { start: m.start(), end: m.end() })
});
// This unwrap is OK because we assume our 'caps' represents a match, and
// a match always gives a non-zero number of groups with the first group
// being non-None.
regex_test::Captures::new(0, spans).unwrap()
}

71
vendor/regex/tests/suite_bytes_set.rs vendored Normal file
View File

@@ -0,0 +1,71 @@
use {
anyhow::Result,
regex::bytes::{RegexSet, RegexSetBuilder},
regex_test::{CompiledRegex, RegexTest, TestResult, TestRunner},
};
/// Tests the default configuration of the hybrid NFA/DFA.
#[test]
fn default() -> Result<()> {
let mut runner = TestRunner::new()?;
runner
.expand(&["is_match", "which"], |test| test.compiles())
.blacklist_iter(super::BLACKLIST)
.test_iter(crate::suite()?.iter(), compiler)
.assert();
Ok(())
}
fn run_test(re: &RegexSet, test: &RegexTest) -> TestResult {
match test.additional_name() {
"is_match" => TestResult::matched(re.is_match(test.haystack())),
"which" => TestResult::which(re.matches(test.haystack()).iter()),
name => TestResult::fail(&format!("unrecognized test name: {name}")),
}
}
/// Converts the given regex test to a closure that searches with a
/// `bytes::Regex`. If the test configuration is unsupported, then a
/// `CompiledRegex` that skips the test is returned.
fn compiler(
test: &RegexTest,
_patterns: &[String],
) -> anyhow::Result<CompiledRegex> {
let skip = Ok(CompiledRegex::skip());
// The top-level RegexSet API only supports "overlapping" semantics.
if !matches!(test.search_kind(), regex_test::SearchKind::Overlapping) {
return skip;
}
// The top-level RegexSet API only supports "all" semantics.
if !matches!(test.match_kind(), regex_test::MatchKind::All) {
return skip;
}
// The top-level RegexSet API always runs unanchored searches.
if test.anchored() {
return skip;
}
// We don't support tests with explicit search bounds.
let bounds = test.bounds();
if !(bounds.start == 0 && bounds.end == test.haystack().len()) {
return skip;
}
// The bytes::Regex API specifically does not support enabling UTF-8 mode.
// It could I suppose, but currently it does not. That is, it permits
// matches to have offsets that split codepoints.
if test.utf8() {
return skip;
}
// If the test requires Unicode but the Unicode feature isn't enabled,
// skip it. This is a little aggressive, but the test suite doesn't
// have any easy way of communicating which Unicode features are needed.
if test.unicode() && !cfg!(feature = "unicode") {
return skip;
}
let re = RegexSetBuilder::new(test.regexes())
.case_insensitive(test.case_insensitive())
.unicode(test.unicode())
.line_terminator(test.line_terminator())
.build()?;
Ok(CompiledRegex::compiled(move |test| run_test(&re, test)))
}

113
vendor/regex/tests/suite_string.rs vendored Normal file
View File

@@ -0,0 +1,113 @@
use {
anyhow::Result,
regex::{Regex, RegexBuilder},
regex_test::{
CompiledRegex, Match, RegexTest, Span, TestResult, TestRunner,
},
};
/// Tests the default configuration of the hybrid NFA/DFA.
#[test]
fn default() -> Result<()> {
let mut runner = TestRunner::new()?;
runner
.expand(&["is_match", "find", "captures"], |test| test.compiles())
.blacklist_iter(super::BLACKLIST)
.test_iter(crate::suite()?.iter(), compiler)
.assert();
Ok(())
}
fn run_test(re: &Regex, test: &RegexTest) -> TestResult {
let hay = match std::str::from_utf8(test.haystack()) {
Ok(hay) => hay,
Err(err) => {
return TestResult::fail(&format!(
"haystack is not valid UTF-8: {err}"
));
}
};
match test.additional_name() {
"is_match" => TestResult::matched(re.is_match(hay)),
"find" => TestResult::matches(
re.find_iter(hay)
.take(test.match_limit().unwrap_or(std::usize::MAX))
.map(|m| Match {
id: 0,
span: Span { start: m.start(), end: m.end() },
}),
),
"captures" => {
let it = re
.captures_iter(hay)
.take(test.match_limit().unwrap_or(std::usize::MAX))
.map(|caps| testify_captures(&caps));
TestResult::captures(it)
}
name => TestResult::fail(&format!("unrecognized test name: {name}")),
}
}
/// Converts the given regex test to a closure that searches with a
/// `bytes::Regex`. If the test configuration is unsupported, then a
/// `CompiledRegex` that skips the test is returned.
fn compiler(
test: &RegexTest,
_patterns: &[String],
) -> anyhow::Result<CompiledRegex> {
let skip = Ok(CompiledRegex::skip());
// We're only testing bytes::Regex here, which supports one pattern only.
let pattern = match test.regexes().len() {
1 => &test.regexes()[0],
_ => return skip,
};
// We only test is_match, find_iter and captures_iter. All of those are
// leftmost searches.
if !matches!(test.search_kind(), regex_test::SearchKind::Leftmost) {
return skip;
}
// The top-level single-pattern regex API always uses leftmost-first.
if !matches!(test.match_kind(), regex_test::MatchKind::LeftmostFirst) {
return skip;
}
// The top-level regex API always runs unanchored searches. ... But we can
// handle tests that are anchored but have only one match.
if test.anchored() && test.match_limit() != Some(1) {
return skip;
}
// We don't support tests with explicit search bounds. We could probably
// support this by using the 'find_at' (and such) APIs.
let bounds = test.bounds();
if !(bounds.start == 0 && bounds.end == test.haystack().len()) {
return skip;
}
// The Regex API specifically does not support disabling UTF-8 mode because
// it can only search &str which is always valid UTF-8.
if !test.utf8() {
return skip;
}
// If the test requires Unicode but the Unicode feature isn't enabled,
// skip it. This is a little aggressive, but the test suite doesn't
// have any easy way of communicating which Unicode features are needed.
if test.unicode() && !cfg!(feature = "unicode") {
return skip;
}
let re = RegexBuilder::new(pattern)
.case_insensitive(test.case_insensitive())
.unicode(test.unicode())
.line_terminator(test.line_terminator())
.build()?;
Ok(CompiledRegex::compiled(move |test| run_test(&re, test)))
}
/// Convert `Captures` into the test suite's capture values.
fn testify_captures(caps: &regex::Captures<'_>) -> regex_test::Captures {
let spans = caps.iter().map(|group| {
group.map(|m| regex_test::Span { start: m.start(), end: m.end() })
});
// This unwrap is OK because we assume our 'caps' represents a match, and
// a match always gives a non-zero number of groups with the first group
// being non-None.
regex_test::Captures::new(0, spans).unwrap()
}

78
vendor/regex/tests/suite_string_set.rs vendored Normal file
View File

@@ -0,0 +1,78 @@
use {
anyhow::Result,
regex::{RegexSet, RegexSetBuilder},
regex_test::{CompiledRegex, RegexTest, TestResult, TestRunner},
};
/// Tests the default configuration of the hybrid NFA/DFA.
#[test]
fn default() -> Result<()> {
let mut runner = TestRunner::new()?;
runner
.expand(&["is_match", "which"], |test| test.compiles())
.blacklist_iter(super::BLACKLIST)
.test_iter(crate::suite()?.iter(), compiler)
.assert();
Ok(())
}
fn run_test(re: &RegexSet, test: &RegexTest) -> TestResult {
let hay = match std::str::from_utf8(test.haystack()) {
Ok(hay) => hay,
Err(err) => {
return TestResult::fail(&format!(
"haystack is not valid UTF-8: {err}"
));
}
};
match test.additional_name() {
"is_match" => TestResult::matched(re.is_match(hay)),
"which" => TestResult::which(re.matches(hay).iter()),
name => TestResult::fail(&format!("unrecognized test name: {name}")),
}
}
/// Converts the given regex test to a closure that searches with a
/// `bytes::Regex`. If the test configuration is unsupported, then a
/// `CompiledRegex` that skips the test is returned.
fn compiler(
test: &RegexTest,
_patterns: &[String],
) -> anyhow::Result<CompiledRegex> {
let skip = Ok(CompiledRegex::skip());
// The top-level RegexSet API only supports "overlapping" semantics.
if !matches!(test.search_kind(), regex_test::SearchKind::Overlapping) {
return skip;
}
// The top-level RegexSet API only supports "all" semantics.
if !matches!(test.match_kind(), regex_test::MatchKind::All) {
return skip;
}
// The top-level RegexSet API always runs unanchored searches.
if test.anchored() {
return skip;
}
// We don't support tests with explicit search bounds.
let bounds = test.bounds();
if !(bounds.start == 0 && bounds.end == test.haystack().len()) {
return skip;
}
// The Regex API specifically does not support disabling UTF-8 mode because
// it can only search &str which is always valid UTF-8.
if !test.utf8() {
return skip;
}
// If the test requires Unicode but the Unicode feature isn't enabled,
// skip it. This is a little aggressive, but the test suite doesn't
// have any easy way of communicating which Unicode features are needed.
if test.unicode() && !cfg!(feature = "unicode") {
return skip;
}
let re = RegexSetBuilder::new(test.regexes())
.case_insensitive(test.case_insensitive())
.unicode(test.unicode())
.line_terminator(test.line_terminator())
.build()?;
Ok(CompiledRegex::compiled(move |test| run_test(&re, test)))
}