Vendor dependencies for 0.3.0 release

This commit is contained in:
2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions

22
vendor/regex/testdata/README.md vendored Normal file
View File

@@ -0,0 +1,22 @@
This directory contains a large suite of regex tests defined in a TOML format.
They are used to drive tests in `tests/lib.rs`, `regex-automata/tests/lib.rs`
and `regex-lite/tests/lib.rs`.
See the [`regex-test`][regex-test] crate documentation for an explanation of
the format and how it generates tests.
The basic idea here is that we have many different regex engines but generally
one set of tests. We want to be able to run those tests (or most of them) on
every engine. Prior to `regex 1.9`, we used to do this with a hodge podge soup
of macros and a different test executable for each engine. It overall took a
longer time to compile, was harder to maintain, and it made the test definitions
themselves less clear.
In `regex 1.9`, when we moved over to `regex-automata`, the situation got a lot
worse because of an increase in the number of engines. So I devised an engine
independent format for testing regex patterns and their semantics.
Note: the naming scheme used in these tests isn't terribly consistent. It would
be great to fix that.
[regex-test]: https://docs.rs/regex-test

127
vendor/regex/testdata/anchored.toml vendored Normal file
View File

@@ -0,0 +1,127 @@
# These tests are specifically geared toward searches with 'anchored = true'.
# While they are interesting in their own right, they are particularly
# important for testing the one-pass DFA since the one-pass DFA can't work in
# unanchored contexts.
#
# Note that "anchored" in this context does not mean "^". Anchored searches are
# searches whose matches must begin at the start of the search, which may not
# be at the start of the haystack. That's why anchored searches---and there are
# some examples below---can still report multiple matches. This occurs when the
# matches are adjacent to one another.
[[test]]
name = "greedy"
regex = '(abc)+'
haystack = "abcabcabc"
matches = [
[[0, 9], [6, 9]],
]
anchored = true
# When a "earliest" search is used, greediness doesn't really exist because
# matches are reported as soon as they are known.
[[test]]
name = "greedy-earliest"
regex = '(abc)+'
haystack = "abcabcabc"
matches = [
[[0, 3], [0, 3]],
[[3, 6], [3, 6]],
[[6, 9], [6, 9]],
]
anchored = true
search-kind = "earliest"
[[test]]
name = "nongreedy"
regex = '(abc)+?'
haystack = "abcabcabc"
matches = [
[[0, 3], [0, 3]],
[[3, 6], [3, 6]],
[[6, 9], [6, 9]],
]
anchored = true
# When "all" semantics are used, non-greediness doesn't exist since the longest
# possible match is always taken.
[[test]]
name = "nongreedy-all"
regex = '(abc)+?'
haystack = "abcabcabc"
matches = [
[[0, 9], [6, 9]],
]
anchored = true
match-kind = "all"
[[test]]
name = "word-boundary-unicode-01"
regex = '\b\w+\b'
haystack = 'βββ☃'
matches = [[0, 6]]
anchored = true
[[test]]
name = "word-boundary-nounicode-01"
regex = '\b\w+\b'
haystack = 'abcβ'
matches = [[0, 3]]
anchored = true
unicode = false
# Tests that '.c' doesn't match 'abc' when performing an anchored search from
# the beginning of the haystack. This test found two different bugs in the
# PikeVM and the meta engine.
[[test]]
name = "no-match-at-start"
regex = '.c'
haystack = 'abc'
matches = []
anchored = true
# Like above, but at a non-zero start offset.
[[test]]
name = "no-match-at-start-bounds"
regex = '.c'
haystack = 'aabc'
bounds = [1, 4]
matches = []
anchored = true
# This is like no-match-at-start, but hits the "reverse inner" optimization
# inside the meta engine. (no-match-at-start hits the "reverse suffix"
# optimization.)
[[test]]
name = "no-match-at-start-reverse-inner"
regex = '.c[a-z]'
haystack = 'abcz'
matches = []
anchored = true
# Like above, but at a non-zero start offset.
[[test]]
name = "no-match-at-start-reverse-inner-bounds"
regex = '.c[a-z]'
haystack = 'aabcz'
bounds = [1, 5]
matches = []
anchored = true
# Same as no-match-at-start, but applies to the meta engine's "reverse
# anchored" optimization.
[[test]]
name = "no-match-at-start-reverse-anchored"
regex = '.c[a-z]$'
haystack = 'abcz'
matches = []
anchored = true
# Like above, but at a non-zero start offset.
[[test]]
name = "no-match-at-start-reverse-anchored-bounds"
regex = '.c[a-z]$'
haystack = 'aabcz'
bounds = [1, 5]
matches = []
anchored = true

235
vendor/regex/testdata/bytes.toml vendored Normal file
View File

@@ -0,0 +1,235 @@
# These are tests specifically crafted for regexes that can match arbitrary
# bytes. In some cases, we also test the Unicode variant as well, just because
# it's good sense to do so. But also, these tests aren't really about Unicode,
# but whether matches are only reported at valid UTF-8 boundaries. For most
# tests in this entire collection, utf8 = true. But for these tests, we use
# utf8 = false.
[[test]]
name = "word-boundary-ascii"
regex = ' \b'
haystack = " δ"
matches = []
unicode = false
utf8 = false
[[test]]
name = "word-boundary-unicode"
regex = ' \b'
haystack = " δ"
matches = [[0, 1]]
unicode = true
utf8 = false
[[test]]
name = "word-boundary-ascii-not"
regex = ' \B'
haystack = " δ"
matches = [[0, 1]]
unicode = false
utf8 = false
[[test]]
name = "word-boundary-unicode-not"
regex = ' \B'
haystack = " δ"
matches = []
unicode = true
utf8 = false
[[test]]
name = "perl-word-ascii"
regex = '\w+'
haystack = "aδ"
matches = [[0, 1]]
unicode = false
utf8 = false
[[test]]
name = "perl-word-unicode"
regex = '\w+'
haystack = "aδ"
matches = [[0, 3]]
unicode = true
utf8 = false
[[test]]
name = "perl-decimal-ascii"
regex = '\d+'
haystack = "1२३9"
matches = [[0, 1], [7, 8]]
unicode = false
utf8 = false
[[test]]
name = "perl-decimal-unicode"
regex = '\d+'
haystack = "1२३9"
matches = [[0, 8]]
unicode = true
utf8 = false
[[test]]
name = "perl-whitespace-ascii"
regex = '\s+'
haystack = " \u1680"
matches = [[0, 1]]
unicode = false
utf8 = false
[[test]]
name = "perl-whitespace-unicode"
regex = '\s+'
haystack = " \u1680"
matches = [[0, 4]]
unicode = true
utf8 = false
# The first `(.+)` matches two Unicode codepoints, but can't match the 5th
# byte, which isn't valid UTF-8. The second (byte based) `(.+)` takes over and
# matches.
[[test]]
name = "mixed-dot"
regex = '(.+)(?-u)(.+)'
haystack = '\xCE\x93\xCE\x94\xFF'
matches = [
[[0, 5], [0, 4], [4, 5]],
]
unescape = true
unicode = true
utf8 = false
[[test]]
name = "case-one-ascii"
regex = 'a'
haystack = "A"
matches = [[0, 1]]
case-insensitive = true
unicode = false
utf8 = false
[[test]]
name = "case-one-unicode"
regex = 'a'
haystack = "A"
matches = [[0, 1]]
case-insensitive = true
unicode = true
utf8 = false
[[test]]
name = "case-class-simple-ascii"
regex = '[a-z]+'
haystack = "AaAaA"
matches = [[0, 5]]
case-insensitive = true
unicode = false
utf8 = false
[[test]]
name = "case-class-ascii"
regex = '[a-z]+'
haystack = "aA\u212AaA"
matches = [[0, 2], [5, 7]]
case-insensitive = true
unicode = false
utf8 = false
[[test]]
name = "case-class-unicode"
regex = '[a-z]+'
haystack = "aA\u212AaA"
matches = [[0, 7]]
case-insensitive = true
unicode = true
utf8 = false
[[test]]
name = "negate-ascii"
regex = '[^a]'
haystack = "δ"
matches = [[0, 1], [1, 2]]
unicode = false
utf8 = false
[[test]]
name = "negate-unicode"
regex = '[^a]'
haystack = "δ"
matches = [[0, 2]]
unicode = true
utf8 = false
# When utf8=true, this won't match, because the implicit '.*?' prefix is
# Unicode aware and will refuse to match through invalid UTF-8 bytes.
[[test]]
name = "dotstar-prefix-ascii"
regex = 'a'
haystack = '\xFFa'
matches = [[1, 2]]
unescape = true
unicode = false
utf8 = false
[[test]]
name = "dotstar-prefix-unicode"
regex = 'a'
haystack = '\xFFa'
matches = [[1, 2]]
unescape = true
unicode = true
utf8 = false
[[test]]
name = "null-bytes"
regex = '(?P<cstr>[^\x00]+)\x00'
haystack = 'foo\x00'
matches = [
[[0, 4], [0, 3]],
]
unescape = true
unicode = false
utf8 = false
[[test]]
name = "invalid-utf8-anchor-100"
regex = '\xCC?^'
haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4'
matches = [[0, 0]]
unescape = true
unicode = false
utf8 = false
[[test]]
name = "invalid-utf8-anchor-200"
regex = '^\xf7|4\xff\d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########[] d\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a\x8a##########\[] #####\x80\S7|$'
haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4'
matches = [[22, 22]]
unescape = true
unicode = false
utf8 = false
[[test]]
name = "invalid-utf8-anchor-300"
regex = '^|ddp\xff\xffdddddlQd@\x80'
haystack = '\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4'
matches = [[0, 0]]
unescape = true
unicode = false
utf8 = false
[[test]]
name = "word-boundary-ascii-100"
regex = '\Bx\B'
haystack = "áxβ"
matches = []
unicode = false
utf8 = false
[[test]]
name = "word-boundary-ascii-200"
regex = '\B'
haystack = "0\U0007EF5E"
matches = [[2, 2], [3, 3], [4, 4], [5, 5]]
unicode = false
utf8 = false

315
vendor/regex/testdata/crazy.toml vendored Normal file
View File

@@ -0,0 +1,315 @@
[[test]]
name = "nothing-empty"
regex = []
haystack = ""
matches = []
[[test]]
name = "nothing-something"
regex = []
haystack = "wat"
matches = []
[[test]]
name = "ranges"
regex = '(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b'
haystack = "num: 255"
matches = [[5, 8]]
[[test]]
name = "ranges-not"
regex = '(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b'
haystack = "num: 256"
matches = []
[[test]]
name = "float1"
regex = '[-+]?[0-9]*\.?[0-9]+'
haystack = "0.1"
matches = [[0, 3]]
[[test]]
name = "float2"
regex = '[-+]?[0-9]*\.?[0-9]+'
haystack = "0.1.2"
matches = [[0, 3]]
match-limit = 1
[[test]]
name = "float3"
regex = '[-+]?[0-9]*\.?[0-9]+'
haystack = "a1.2"
matches = [[1, 4]]
[[test]]
name = "float4"
regex = '[-+]?[0-9]*\.?[0-9]+'
haystack = "1.a"
matches = [[0, 1]]
[[test]]
name = "float5"
regex = '^[-+]?[0-9]*\.?[0-9]+$'
haystack = "1.a"
matches = []
[[test]]
name = "email"
regex = '(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b'
haystack = "mine is jam.slam@gmail.com "
matches = [[8, 26]]
[[test]]
name = "email-not"
regex = '(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b'
haystack = "mine is jam.slam@gmail "
matches = []
[[test]]
name = "email-big"
regex = '''[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?'''
haystack = "mine is jam.slam@gmail.com "
matches = [[8, 26]]
[[test]]
name = "date1"
regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$'
haystack = "1900-01-01"
matches = [[0, 10]]
unicode = false
[[test]]
name = "date2"
regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$'
haystack = "1900-00-01"
matches = []
unicode = false
[[test]]
name = "date3"
regex = '^(?:19|20)\d\d[- /.](?:0[1-9]|1[012])[- /.](?:0[1-9]|[12][0-9]|3[01])$'
haystack = "1900-13-01"
matches = []
unicode = false
[[test]]
name = "start-end-empty"
regex = '^$'
haystack = ""
matches = [[0, 0]]
[[test]]
name = "start-end-empty-rev"
regex = '$^'
haystack = ""
matches = [[0, 0]]
[[test]]
name = "start-end-empty-many-1"
regex = '^$^$^$'
haystack = ""
matches = [[0, 0]]
[[test]]
name = "start-end-empty-many-2"
regex = '^^^$$$'
haystack = ""
matches = [[0, 0]]
[[test]]
name = "start-end-empty-rep"
regex = '(?:^$)*'
haystack = "a\nb\nc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
[[test]]
name = "start-end-empty-rep-rev"
regex = '(?:$^)*'
haystack = "a\nb\nc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
[[test]]
name = "neg-class-letter"
regex = '[^ac]'
haystack = "acx"
matches = [[2, 3]]
[[test]]
name = "neg-class-letter-comma"
regex = '[^a,]'
haystack = "a,x"
matches = [[2, 3]]
[[test]]
name = "neg-class-letter-space"
regex = '[^a[:space:]]'
haystack = "a x"
matches = [[2, 3]]
[[test]]
name = "neg-class-comma"
regex = '[^,]'
haystack = ",,x"
matches = [[2, 3]]
[[test]]
name = "neg-class-space"
regex = '[^[:space:]]'
haystack = " a"
matches = [[1, 2]]
[[test]]
name = "neg-class-space-comma"
regex = '[^,[:space:]]'
haystack = ", a"
matches = [[2, 3]]
[[test]]
name = "neg-class-comma-space"
regex = '[^[:space:],]'
haystack = " ,a"
matches = [[2, 3]]
[[test]]
name = "neg-class-ascii"
regex = '[^[:alpha:]Z]'
haystack = "A1"
matches = [[1, 2]]
[[test]]
name = "lazy-many-many"
regex = '(?:(?:.*)*?)='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "lazy-many-optional"
regex = '(?:(?:.?)*?)='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "lazy-one-many-many"
regex = '(?:(?:.*)+?)='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "lazy-one-many-optional"
regex = '(?:(?:.?)+?)='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "lazy-range-min-many"
regex = '(?:(?:.*){1,}?)='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "lazy-range-many"
regex = '(?:(?:.*){1,2}?)='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "greedy-many-many"
regex = '(?:(?:.*)*)='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "greedy-many-optional"
regex = '(?:(?:.?)*)='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "greedy-one-many-many"
regex = '(?:(?:.*)+)='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "greedy-one-many-optional"
regex = '(?:(?:.?)+)='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "greedy-range-min-many"
regex = '(?:(?:.*){1,})='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "greedy-range-many"
regex = '(?:(?:.*){1,2})='
haystack = "a=b"
matches = [[0, 2]]
[[test]]
name = "empty1"
regex = ''
haystack = ""
matches = [[0, 0]]
[[test]]
name = "empty2"
regex = ''
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty3"
regex = '(?:)'
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty4"
regex = '(?:)*'
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty5"
regex = '(?:)+'
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty6"
regex = '(?:)?'
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty7"
regex = '(?:)(?:)'
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty8"
regex = '(?:)+|z'
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty9"
regex = 'z|(?:)+'
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty10"
regex = '(?:)+|b'
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty11"
regex = 'b|(?:)+'
haystack = "abc"
matches = [[0, 0], [1, 2], [3, 3]]

117
vendor/regex/testdata/crlf.toml vendored Normal file
View File

@@ -0,0 +1,117 @@
# This is a basic test that checks ^ and $ treat \r\n as a single line
# terminator. If ^ and $ only treated \n as a line terminator, then this would
# only match 'xyz' at the end of the haystack.
[[test]]
name = "basic"
regex = '(?mR)^[a-z]+$'
haystack = "abc\r\ndef\r\nxyz"
matches = [[0, 3], [5, 8], [10, 13]]
# Tests that a CRLF-aware '^$' assertion does not match between CR and LF.
[[test]]
name = "start-end-non-empty"
regex = '(?mR)^$'
haystack = "abc\r\ndef\r\nxyz"
matches = []
# Tests that a CRLF-aware '^$' assertion matches the empty string, just like
# a non-CRLF-aware '^$' assertion.
[[test]]
name = "start-end-empty"
regex = '(?mR)^$'
haystack = ""
matches = [[0, 0]]
# Tests that a CRLF-aware '^$' assertion matches the empty string preceding
# and following a line terminator.
[[test]]
name = "start-end-before-after"
regex = '(?mR)^$'
haystack = "\r\n"
matches = [[0, 0], [2, 2]]
# Tests that a CRLF-aware '^' assertion does not split a line terminator.
[[test]]
name = "start-no-split"
regex = '(?mR)^'
haystack = "abc\r\ndef\r\nxyz"
matches = [[0, 0], [5, 5], [10, 10]]
# Same as above, but with adjacent runs of line terminators.
[[test]]
name = "start-no-split-adjacent"
regex = '(?mR)^'
haystack = "\r\n\r\n\r\n"
matches = [[0, 0], [2, 2], [4, 4], [6, 6]]
# Same as above, but with adjacent runs of just carriage returns.
[[test]]
name = "start-no-split-adjacent-cr"
regex = '(?mR)^'
haystack = "\r\r\r"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
# Same as above, but with adjacent runs of just line feeds.
[[test]]
name = "start-no-split-adjacent-lf"
regex = '(?mR)^'
haystack = "\n\n\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
# Tests that a CRLF-aware '$' assertion does not split a line terminator.
[[test]]
name = "end-no-split"
regex = '(?mR)$'
haystack = "abc\r\ndef\r\nxyz"
matches = [[3, 3], [8, 8], [13, 13]]
# Same as above, but with adjacent runs of line terminators.
[[test]]
name = "end-no-split-adjacent"
regex = '(?mR)$'
haystack = "\r\n\r\n\r\n"
matches = [[0, 0], [2, 2], [4, 4], [6, 6]]
# Same as above, but with adjacent runs of just carriage returns.
[[test]]
name = "end-no-split-adjacent-cr"
regex = '(?mR)$'
haystack = "\r\r\r"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
# Same as above, but with adjacent runs of just line feeds.
[[test]]
name = "end-no-split-adjacent-lf"
regex = '(?mR)$'
haystack = "\n\n\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
# Tests that '.' does not match either \r or \n when CRLF mode is enabled. Note
# that this doesn't require multi-line mode to be enabled.
[[test]]
name = "dot-no-crlf"
regex = '(?R).'
haystack = "\r\n\r\n\r\n"
matches = []
# This is a test that caught a bug in the one-pass DFA where it (amazingly) was
# using 'is_end_lf' instead of 'is_end_crlf' here. It was probably a copy &
# paste bug. We insert an empty capture group here because it provokes the meta
# regex engine to first find a match and then trip over a panic because the
# one-pass DFA erroneously says there is no match.
[[test]]
name = "onepass-wrong-crlf-with-capture"
regex = '(?Rm:().$)'
haystack = "ZZ\r"
matches = [[[1, 2], [1, 1]]]
# This is like onepass-wrong-crlf-with-capture above, except it sets up the
# test so that it can be run by the one-pass DFA directly. (i.e., Make it
# anchored and start the search at the right place.)
[[test]]
name = "onepass-wrong-crlf-anchored"
regex = '(?Rm:.$)'
haystack = "ZZ\r"
matches = [[1, 2]]
anchored = true
bounds = [1, 3]

52
vendor/regex/testdata/earliest.toml vendored Normal file
View File

@@ -0,0 +1,52 @@
[[test]]
name = "no-greedy-100"
regex = 'a+'
haystack = "aaa"
matches = [[0, 1], [1, 2], [2, 3]]
search-kind = "earliest"
[[test]]
name = "no-greedy-200"
regex = 'abc+'
haystack = "zzzabccc"
matches = [[3, 6]]
search-kind = "earliest"
[[test]]
name = "is-ungreedy"
regex = 'a+?'
haystack = "aaa"
matches = [[0, 1], [1, 2], [2, 3]]
search-kind = "earliest"
[[test]]
name = "look-start-test"
regex = '^(abc|a)'
haystack = "abc"
matches = [
[[0, 1], [0, 1]],
]
search-kind = "earliest"
[[test]]
name = "look-end-test"
regex = '(abc|a)$'
haystack = "abc"
matches = [
[[0, 3], [0, 3]],
]
search-kind = "earliest"
[[test]]
name = "no-leftmost-first-100"
regex = 'abc|a'
haystack = "abc"
matches = [[0, 1]]
search-kind = "earliest"
[[test]]
name = "no-leftmost-first-200"
regex = 'aba|a'
haystack = "aba"
matches = [[0, 1], [2, 3]]
search-kind = "earliest"

113
vendor/regex/testdata/empty.toml vendored Normal file
View File

@@ -0,0 +1,113 @@
[[test]]
name = "100"
regex = "|b"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "110"
regex = "b|"
haystack = "abc"
matches = [[0, 0], [1, 2], [3, 3]]
[[test]]
name = "120"
regex = "|z"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "130"
regex = "z|"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "200"
regex = "|"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "210"
regex = "||"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "220"
regex = "||b"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "230"
regex = "b||"
haystack = "abc"
matches = [[0, 0], [1, 2], [3, 3]]
[[test]]
name = "240"
regex = "||z"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "300"
regex = "(?:)|b"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "310"
regex = "b|(?:)"
haystack = "abc"
matches = [[0, 0], [1, 2], [3, 3]]
[[test]]
name = "320"
regex = "(?:|)"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "330"
regex = "(?:|)|z"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "400"
regex = "a(?:)|b"
haystack = "abc"
matches = [[0, 1], [1, 2]]
[[test]]
name = "500"
regex = ""
haystack = ""
matches = [[0, 0]]
[[test]]
name = "510"
regex = ""
haystack = "a"
matches = [[0, 0], [1, 1]]
[[test]]
name = "520"
regex = ""
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "600"
regex = '(?:|a)*'
haystack = "aaa"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "610"
regex = '(?:|a)+'
haystack = "aaa"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]

23
vendor/regex/testdata/expensive.toml vendored Normal file
View File

@@ -0,0 +1,23 @@
# This file represent tests that may be expensive to run on some regex engines.
# For example, tests that build a full DFA ahead of time and minimize it can
# take a horrendously long time on regexes that are large (or result in an
# explosion in the number of states). We group these tests together so that
# such engines can simply skip these tests.
# See: https://github.com/rust-lang/regex/issues/98
[[test]]
name = "regression-many-repeat-no-stack-overflow"
regex = '^.{1,2500}'
haystack = "a"
matches = [[0, 1]]
# This test is meant to blow the bounded backtracker's visited capacity. In
# order to do that, we need a somewhat sizeable regex. The purpose of this
# is to make sure there's at least one test that exercises this path in the
# backtracker. All other tests (at time of writing) are small enough that the
# backtracker can handle them fine.
[[test]]
name = "backtrack-blow-visited-capacity"
regex = '\pL{50}'
haystack = "abcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyabcdefghijklmnopqrstuvwxyZZ"
matches = [[0, 50], [50, 100], [100, 150]]

68
vendor/regex/testdata/flags.toml vendored Normal file
View File

@@ -0,0 +1,68 @@
[[test]]
name = "1"
regex = "(?i)abc"
haystack = "ABC"
matches = [[0, 3]]
[[test]]
name = "2"
regex = "(?i)a(?-i)bc"
haystack = "Abc"
matches = [[0, 3]]
[[test]]
name = "3"
regex = "(?i)a(?-i)bc"
haystack = "ABC"
matches = []
[[test]]
name = "4"
regex = "(?is)a."
haystack = "A\n"
matches = [[0, 2]]
[[test]]
name = "5"
regex = "(?is)a.(?-is)a."
haystack = "A\nab"
matches = [[0, 4]]
[[test]]
name = "6"
regex = "(?is)a.(?-is)a."
haystack = "A\na\n"
matches = []
[[test]]
name = "7"
regex = "(?is)a.(?-is:a.)?"
haystack = "A\na\n"
matches = [[0, 2]]
match-limit = 1
[[test]]
name = "8"
regex = "(?U)a+"
haystack = "aa"
matches = [[0, 1]]
match-limit = 1
[[test]]
name = "9"
regex = "(?U)a+?"
haystack = "aa"
matches = [[0, 2]]
[[test]]
name = "10"
regex = "(?U)(?-U)a+"
haystack = "aa"
matches = [[0, 2]]
[[test]]
name = "11"
regex = '(?m)(?:^\d+$\n?)+'
haystack = "123\n456\n789"
matches = [[0, 11]]
unicode = false

1611
vendor/regex/testdata/fowler/basic.toml vendored Normal file

File diff suppressed because it is too large Load Diff

25
vendor/regex/testdata/fowler/dat/README vendored Normal file
View File

@@ -0,0 +1,25 @@
Test data was taken from the Go distribution, which was in turn taken from the
testregex test suite:
http://web.archive.org/web/20150925124103/http://www2.research.att.com/~astopen/testregex/testregex.html
Unfortunately, the original web site now appears dead, but the test data lives
on.
The LICENSE in this directory corresponds to the LICENSE that the data was
originally released under.
The tests themselves were modified for RE2/Go (and marked as such). A
couple were modified further by me (Andrew Gallant) and marked with 'Rust'.
After some number of years, these tests were transformed into a TOML format
using the 'regex-cli generate fowler' command. To re-generate the
TOML files, run the following from the root of this repository:
regex-cli generate fowler tests/data/fowler tests/data/fowler/dat/*.dat
This assumes that you have 'regex-cli' installed. See 'regex-cli/README.md'
from the root of the repository for more information.
This brings the Fowler tests into a more "sensible" structured format in which
other tests can be written such that they aren't write-only.

View File

@@ -0,0 +1,223 @@
NOTE all standard compliant implementations should pass these : 2002-05-31
BE abracadabra$ abracadabracadabra (7,18)
BE a...b abababbb (2,7)
BE XXXXXX ..XXXXXX (2,8)
E \) () (1,2)
BE a] a]a (0,2)
B } } (0,1)
E \} } (0,1)
BE \] ] (0,1)
B ] ] (0,1)
E ] ] (0,1)
B { { (0,1)
B } } (0,1)
BE ^a ax (0,1)
BE \^a a^a (1,3)
BE a\^ a^ (0,2)
BE a$ aa (1,2)
BE a\$ a$ (0,2)
BE ^$ NULL (0,0)
E $^ NULL (0,0)
E a($) aa (1,2)(2,2)
E a*(^a) aa (0,1)(0,1)
E (..)*(...)* a (0,0)
E (..)*(...)* abcd (0,4)(2,4)
E (ab|a)(bc|c) abc (0,3)(0,2)(2,3)
E (ab)c|abc abc (0,3)(0,2)
E a{0}b ab (1,2)
E (a*)(b?)(b+)b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
E (a*)(b{0,1})(b{1,})b{3} aaabbbbbbb (0,10)(0,3)(3,4)(4,7)
E a{9876543210} NULL BADBR
E ((a|a)|a) a (0,1)(0,1)(0,1)
E (a*)(a|aa) aaaa (0,4)(0,3)(3,4)
E a*(a.|aa) aaaa (0,4)(2,4)
E a(b)|c(d)|a(e)f aef (0,3)(?,?)(?,?)(1,2)
E (a|b)?.* b (0,1)(0,1)
E (a|b)c|a(b|c) ac (0,2)(0,1)
E (a|b)c|a(b|c) ab (0,2)(?,?)(1,2)
E (a|b)*c|(a|ab)*c abc (0,3)(1,2)
E (a|b)*c|(a|ab)*c xc (1,2)
E (.a|.b).*|.*(.a|.b) xa (0,2)(0,2)
E a?(ab|ba)ab abab (0,4)(0,2)
E a?(ac{0}b|ba)ab abab (0,4)(0,2)
E ab|abab abbabab (0,2)
E aba|bab|bba baaabbbaba (5,8)
E aba|bab baaabbbaba (6,9)
E (aa|aaa)*|(a|aaaaa) aa (0,2)(0,2)
E (a.|.a.)*|(a|.a...) aa (0,2)(0,2)
E ab|a xabc (1,3)
E ab|a xxabc (2,4)
Ei (Ab|cD)* aBcD (0,4)(2,4)
BE [^-] --a (2,3)
BE [a-]* --a (0,3)
BE [a-m-]* --amoma-- (0,4)
E :::1:::0:|:::1:1:0: :::0:::1:::1:::0: (8,17)
E :::1:::0:|:::1:1:1: :::0:::1:::1:::0: (8,17)
{E [[:upper:]] A (0,1) [[<element>]] not supported
E [[:lower:]]+ `az{ (1,3)
E [[:upper:]]+ @AZ[ (1,3)
# No collation in Go
#BE [[-]] [[-]] (2,4)
#BE [[.NIL.]] NULL ECOLLATE
#BE [[=aleph=]] NULL ECOLLATE
}
BE$ \n \n (0,1)
BEn$ \n \n (0,1)
BE$ [^a] \n (0,1)
BE$ \na \na (0,2)
E (a)(b)(c) abc (0,3)(0,1)(1,2)(2,3)
BE xxx xxx (0,3)
#E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 6, (0,6)
E (?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$) feb 6, (0,6) Rust
#E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) 2/7 (0,3)
E (?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$) 2/7 (0,3) Rust
#E1 (^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$) feb 1,Feb 6 (5,11)
E (?:^|[ (,;])(?:(?:(?:[Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))(?:[^0-9]|$) feb 1,Feb 6 (5,11) Rust
#E3 ((((((((((((((((((((((((((((((x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1)
E (((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x)))))))))))))))))))))))))))))) x (0,1)(0,1)(0,1) Rust
#E3 ((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2)
E (((?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:(?:x))))))))))))))))))))))))))))))* xx (0,2)(1,2)(1,2) Rust
E a?(ab|ba)* ababababababababababababababababababababababababababababababababababababababababa (0,81)(79,81)
E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabbbbaa (18,25)
E abaa|abbaa|abbbaa|abbbbaa ababbabbbabbbabbbbabaa (18,22)
E aaac|aabc|abac|abbc|baac|babc|bbac|bbbc baaabbbabac (7,11)
#BE$ .* \x01\xff (0,2)
BE$ .* \x01\x7f (0,2) Rust
E aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa (53,57)
L aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa NOMATCH
E a*a*a*a*a*b aaaaaaaaab (0,10)
BE ^ NULL (0,0)
BE $ NULL (0,0)
BE ^$ NULL (0,0)
BE ^a$ a (0,1)
BE abc abc (0,3)
BE abc xabcy (1,4)
BE abc ababc (2,5)
BE ab*c abc (0,3)
BE ab*bc abc (0,3)
BE ab*bc abbc (0,4)
BE ab*bc abbbbc (0,6)
E ab+bc abbc (0,4)
E ab+bc abbbbc (0,6)
E ab?bc abbc (0,4)
E ab?bc abc (0,3)
E ab?c abc (0,3)
BE ^abc$ abc (0,3)
BE ^abc abcc (0,3)
BE abc$ aabc (1,4)
BE ^ abc (0,0)
BE $ abc (3,3)
BE a.c abc (0,3)
BE a.c axc (0,3)
BE a.*c axyzc (0,5)
BE a[bc]d abd (0,3)
BE a[b-d]e ace (0,3)
BE a[b-d] aac (1,3)
BE a[-b] a- (0,2)
BE a[b-] a- (0,2)
BE a] a] (0,2)
BE a[]]b a]b (0,3)
BE a[^bc]d aed (0,3)
BE a[^-b]c adc (0,3)
BE a[^]b]c adc (0,3)
E ab|cd abc (0,2)
E ab|cd abcd (0,2)
E a\(b a(b (0,3)
E a\(*b ab (0,2)
E a\(*b a((b (0,4)
E ((a)) abc (0,1)(0,1)(0,1)
E (a)b(c) abc (0,3)(0,1)(2,3)
E a+b+c aabbabc (4,7)
E a* aaa (0,3)
E (a*)* - (0,0)(0,0)
E (a*)+ - (0,0)(0,0)
E (a*|b)* - (0,0)(0,0)
E (a+|b)* ab (0,2)(1,2)
E (a+|b)+ ab (0,2)(1,2)
E (a+|b)? ab (0,1)(0,1)
BE [^ab]* cde (0,3)
E (^)* - (0,0)(0,0)
BE a* NULL (0,0)
E ([abc])*d abbbcd (0,6)(4,5)
E ([abc])*bcd abcd (0,4)(0,1)
E a|b|c|d|e e (0,1)
E (a|b|c|d|e)f ef (0,2)(0,1)
E ((a*|b))* - (0,0)(0,0)(0,0)
BE abcd*efg abcdefg (0,7)
BE ab* xabyabbbz (1,3)
BE ab* xayabbbz (1,2)
E (ab|cd)e abcde (2,5)(2,4)
BE [abhgefdc]ij hij (0,3)
E (a|b)c*d abcd (1,4)(1,2)
E (ab|ab*)bc abc (0,3)(0,1)
E a([bc]*)c* abc (0,3)(1,3)
E a([bc]*)(c*d) abcd (0,4)(1,3)(3,4)
E a([bc]+)(c*d) abcd (0,4)(1,3)(3,4)
E a([bc]*)(c+d) abcd (0,4)(1,2)(2,4)
E a[bcd]*dcdcde adcdcde (0,7)
E (ab|a)b*c abc (0,3)(0,2)
E ((a)(b)c)(d) abcd (0,4)(0,3)(0,1)(1,2)(3,4)
BE [A-Za-z_][A-Za-z0-9_]* alpha (0,5)
E ^a(bc+|b[eh])g|.h$ abh (1,3)
E (bc+d$|ef*g.|h?i(j|k)) effgz (0,5)(0,5)
E (bc+d$|ef*g.|h?i(j|k)) ij (0,2)(0,2)(1,2)
E (bc+d$|ef*g.|h?i(j|k)) reffgz (1,6)(1,6)
E (((((((((a))))))))) a (0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)
BE multiple words multiple words yeah (0,14)
E (.*)c(.*) abcde (0,5)(0,2)(3,5)
BE abcd abcd (0,4)
E a(bc)d abcd (0,4)(1,3)
E a[-]?c ac (0,3)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qaddafi (0,15)(?,?)(10,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mo'ammar Gadhafi (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Kaddafi (0,15)(?,?)(10,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Qadhafi (0,15)(?,?)(10,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gadafi (0,14)(?,?)(10,11)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadafi (0,15)(?,?)(11,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moamar Gaddafi (0,14)(?,?)(9,11)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Mu'ammar Qadhdhafi (0,18)(?,?)(13,15)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Khaddafi (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafy (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghadafi (0,15)(?,?)(11,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Ghaddafi (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muamar Kaddafi (0,14)(?,?)(9,11)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Quathafi (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Muammar Gheddafi (0,16)(?,?)(11,13)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Khadafy (0,15)(?,?)(11,12)
E M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy] Moammar Qudhafi (0,15)(?,?)(10,12)
E a+(b|c)*d+ aabcdd (0,6)(3,4)
E ^.+$ vivi (0,4)
E ^(.+)$ vivi (0,4)(0,4)
E ^([^!.]+).att.com!(.+)$ gryphon.att.com!eby (0,19)(0,7)(16,19)
E ^([^!]+!)?([^!]+)$ bas (0,3)(?,?)(0,3)
E ^([^!]+!)?([^!]+)$ bar!bas (0,7)(0,4)(4,7)
E ^([^!]+!)?([^!]+)$ foo!bas (0,7)(0,4)(4,7)
E ^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(4,8)(8,11)
E ((foo)|(bar))!bas bar!bas (0,7)(0,3)(?,?)(0,3)
E ((foo)|(bar))!bas foo!bar!bas (4,11)(4,7)(?,?)(4,7)
E ((foo)|(bar))!bas foo!bas (0,7)(0,3)(0,3)
E ((foo)|bar)!bas bar!bas (0,7)(0,3)
E ((foo)|bar)!bas foo!bar!bas (4,11)(4,7)
E ((foo)|bar)!bas foo!bas (0,7)(0,3)(0,3)
E (foo|(bar))!bas bar!bas (0,7)(0,3)(0,3)
E (foo|(bar))!bas foo!bar!bas (4,11)(4,7)(4,7)
E (foo|(bar))!bas foo!bas (0,7)(0,3)
E (foo|bar)!bas bar!bas (0,7)(0,3)
E (foo|bar)!bas foo!bar!bas (4,11)(4,7)
E (foo|bar)!bas foo!bas (0,7)(0,3)
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bas (0,3)(?,?)(0,3)
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ bar!bas (0,7)(0,4)(4,7)
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bar!bas (0,11)(?,?)(?,?)(4,8)(8,11)
E ^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$ foo!bas (0,7)(0,4)(4,7)
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bas (0,3)(0,3)(?,?)(0,3)
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ bar!bas (0,7)(0,7)(0,4)(4,7)
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bar!bas (0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
E ^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$ foo!bas (0,7)(0,7)(0,4)(4,7)
E .*(/XXX).* /XXX (0,4)(0,4)
E .*(\\XXX).* \XXX (0,4)(0,4)
E \\XXX \XXX (0,4)
E .*(/000).* /000 (0,4)(0,4)
E .*(\\000).* \000 (0,4)(0,4)
E \\000 \000 (0,4)

View File

@@ -0,0 +1,74 @@
NOTE null subexpression matches : 2002-06-06
E (a*)* a (0,1)(0,1)
E SAME x (0,0)(0,0)
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E (a*)+ a (0,1)(0,1)
E SAME x (0,0)(0,0)
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E (a+)* a (0,1)(0,1)
E SAME x (0,0)
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E (a+)+ a (0,1)(0,1)
E SAME x NOMATCH
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E ([a]*)* a (0,1)(0,1)
E SAME x (0,0)(0,0)
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E ([a]*)+ a (0,1)(0,1)
E SAME x (0,0)(0,0)
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E ([^b]*)* a (0,1)(0,1)
E SAME b (0,0)(0,0)
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaab (0,6)(0,6)
E ([ab]*)* a (0,1)(0,1)
E SAME aaaaaa (0,6)(0,6)
E SAME ababab (0,6)(0,6)
E SAME bababa (0,6)(0,6)
E SAME b (0,1)(0,1)
E SAME bbbbbb (0,6)(0,6)
E SAME aaaabcde (0,5)(0,5)
E ([^a]*)* b (0,1)(0,1)
E SAME bbbbbb (0,6)(0,6)
E SAME aaaaaa (0,0)(0,0)
E ([^ab]*)* ccccxx (0,6)(0,6)
E SAME ababab (0,0)(0,0)
#E ((z)+|a)* zabcde (0,2)(1,2)
E ((z)+|a)* zabcde (0,2)(1,2)(0,1) Rust
#{E a+? aaaaaa (0,1) no *? +? minimal match ops
#E (a) aaa (0,1)(0,1)
#E (a*?) aaa (0,0)(0,0)
#E (a)*? aaa (0,0)
#E (a*?)*? aaa (0,0)
#}
B \(a*\)*\(x\) x (0,1)(0,0)(0,1)
B \(a*\)*\(x\) ax (0,2)(0,1)(1,2)
B \(a*\)*\(x\) axa (0,2)(0,1)(1,2)
B \(a*\)*\(x\)\(\1\) x (0,1)(0,0)(0,1)(1,1)
B \(a*\)*\(x\)\(\1\) ax (0,2)(1,1)(1,2)(2,2)
B \(a*\)*\(x\)\(\1\) axa (0,3)(0,1)(1,2)(2,3)
B \(a*\)*\(x\)\(\1\)\(x\) axax (0,4)(0,1)(1,2)(2,3)(3,4)
B \(a*\)*\(x\)\(\1\)\(x\) axxa (0,3)(1,1)(1,2)(2,2)(2,3)
E (a*)*(x) x (0,1)(0,0)(0,1)
E (a*)*(x) ax (0,2)(0,1)(1,2)
E (a*)*(x) axa (0,2)(0,1)(1,2)
E (a*)+(x) x (0,1)(0,0)(0,1)
E (a*)+(x) ax (0,2)(0,1)(1,2)
E (a*)+(x) axa (0,2)(0,1)(1,2)
E (a*){2}(x) x (0,1)(0,0)(0,1)
E (a*){2}(x) ax (0,2)(1,1)(1,2)
E (a*){2}(x) axa (0,2)(1,1)(1,2)

View File

@@ -0,0 +1,169 @@
NOTE implicit vs. explicit repetitions : 2009-02-02
# Glenn Fowler <gsf@research.att.com>
# conforming matches (column 4) must match one of the following BREs
# NOMATCH
# (0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)*
# (0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)*
# i.e., each 3-tuple has two identical elements and one (?,?)
E ((..)|(.)) NULL NOMATCH
E ((..)|(.))((..)|(.)) NULL NOMATCH
E ((..)|(.))((..)|(.))((..)|(.)) NULL NOMATCH
E ((..)|(.)){1} NULL NOMATCH
E ((..)|(.)){2} NULL NOMATCH
E ((..)|(.)){3} NULL NOMATCH
E ((..)|(.))* NULL (0,0)
E ((..)|(.)) a (0,1)(0,1)(?,?)(0,1)
E ((..)|(.))((..)|(.)) a NOMATCH
E ((..)|(.))((..)|(.))((..)|(.)) a NOMATCH
E ((..)|(.)){1} a (0,1)(0,1)(?,?)(0,1)
E ((..)|(.)){2} a NOMATCH
E ((..)|(.)){3} a NOMATCH
E ((..)|(.))* a (0,1)(0,1)(?,?)(0,1)
E ((..)|(.)) aa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.))((..)|(.)) aa (0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)
E ((..)|(.))((..)|(.))((..)|(.)) aa NOMATCH
E ((..)|(.)){1} aa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.)){2} aa (0,2)(1,2)(?,?)(1,2)
E ((..)|(.)){3} aa NOMATCH
E ((..)|(.))* aa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.)) aaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.))((..)|(.)) aaa (0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)
E ((..)|(.))((..)|(.))((..)|(.)) aaa (0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3)
E ((..)|(.)){1} aaa (0,2)(0,2)(0,2)(?,?)
#E ((..)|(.)){2} aaa (0,3)(2,3)(?,?)(2,3)
E ((..)|(.)){2} aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
E ((..)|(.)){3} aaa (0,3)(2,3)(?,?)(2,3)
#E ((..)|(.))* aaa (0,3)(2,3)(?,?)(2,3)
E ((..)|(.))* aaa (0,3)(2,3)(0,2)(2,3) RE2/Go
E ((..)|(.)) aaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
E ((..)|(.))((..)|(.))((..)|(.)) aaaa (0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4)
E ((..)|(.)){1} aaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.)){2} aaaa (0,4)(2,4)(2,4)(?,?)
#E ((..)|(.)){3} aaaa (0,4)(3,4)(?,?)(3,4)
E ((..)|(.)){3} aaaa (0,4)(3,4)(0,2)(3,4) RE2/Go
E ((..)|(.))* aaaa (0,4)(2,4)(2,4)(?,?)
E ((..)|(.)) aaaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.))((..)|(.)) aaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
E ((..)|(.))((..)|(.))((..)|(.)) aaaaa (0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5)
E ((..)|(.)){1} aaaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.)){2} aaaaa (0,4)(2,4)(2,4)(?,?)
#E ((..)|(.)){3} aaaaa (0,5)(4,5)(?,?)(4,5)
E ((..)|(.)){3} aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
#E ((..)|(.))* aaaaa (0,5)(4,5)(?,?)(4,5)
E ((..)|(.))* aaaaa (0,5)(4,5)(2,4)(4,5) RE2/Go
E ((..)|(.)) aaaaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.))((..)|(.)) aaaaaa (0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
E ((..)|(.))((..)|(.))((..)|(.)) aaaaaa (0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?)
E ((..)|(.)){1} aaaaaa (0,2)(0,2)(0,2)(?,?)
E ((..)|(.)){2} aaaaaa (0,4)(2,4)(2,4)(?,?)
E ((..)|(.)){3} aaaaaa (0,6)(4,6)(4,6)(?,?)
E ((..)|(.))* aaaaaa (0,6)(4,6)(4,6)(?,?)
NOTE additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02
# These test a bug in OS X / FreeBSD / NetBSD, and libtree.
# Linux/GLIBC gets the {8,} and {8,8} wrong.
:HA#100:E X(.?){0,}Y X1234567Y (0,9)(7,8)
:HA#101:E X(.?){1,}Y X1234567Y (0,9)(7,8)
:HA#102:E X(.?){2,}Y X1234567Y (0,9)(7,8)
:HA#103:E X(.?){3,}Y X1234567Y (0,9)(7,8)
:HA#104:E X(.?){4,}Y X1234567Y (0,9)(7,8)
:HA#105:E X(.?){5,}Y X1234567Y (0,9)(7,8)
:HA#106:E X(.?){6,}Y X1234567Y (0,9)(7,8)
:HA#107:E X(.?){7,}Y X1234567Y (0,9)(7,8)
:HA#108:E X(.?){8,}Y X1234567Y (0,9)(8,8)
#:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(7,8)
:HA#110:E X(.?){0,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(7,8)
:HA#111:E X(.?){1,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(7,8)
:HA#112:E X(.?){2,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(7,8)
:HA#113:E X(.?){3,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(7,8)
:HA#114:E X(.?){4,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(7,8)
:HA#115:E X(.?){5,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(7,8)
:HA#116:E X(.?){6,8}Y X1234567Y (0,9)(8,8) RE2/Go
#:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(7,8)
:HA#117:E X(.?){7,8}Y X1234567Y (0,9)(8,8) RE2/Go
:HA#118:E X(.?){8,8}Y X1234567Y (0,9)(8,8)
# These test a fixed bug in my regex-tdfa that did not keep the expanded
# form properly grouped, so right association did the wrong thing with
# these ambiguous patterns (crafted just to test my code when I became
# suspicious of my implementation). The first subexpression should use
# "ab" then "a" then "bcd".
# OS X / FreeBSD / NetBSD badly fail many of these, with impossible
# results like (0,6)(4,5)(6,6).
#:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#260:E (a|ab|c|bcd){0,}(d*) ababcd (0,1)(0,1)(1,1) Rust
#:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#261:E (a|ab|c|bcd){1,}(d*) ababcd (0,1)(0,1)(1,1) Rust
:HA#262:E (a|ab|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#263:E (a|ab|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#264:E (a|ab|c|bcd){4,}(d*) ababcd NOMATCH
#:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#265:E (a|ab|c|bcd){0,10}(d*) ababcd (0,1)(0,1)(1,1) Rust
#:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#266:E (a|ab|c|bcd){1,10}(d*) ababcd (0,1)(0,1)(1,1) Rust
:HA#267:E (a|ab|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#268:E (a|ab|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#269:E (a|ab|c|bcd){4,10}(d*) ababcd NOMATCH
#:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6)
:HA#270:E (a|ab|c|bcd)*(d*) ababcd (0,1)(0,1)(1,1) Rust
#:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6)
:HA#271:E (a|ab|c|bcd)+(d*) ababcd (0,1)(0,1)(1,1) Rust
# The above worked on Linux/GLIBC but the following often fail.
# They also trip up OS X / FreeBSD / NetBSD:
#:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#280:E (ab|a|c|bcd){0,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#281:E (ab|a|c|bcd){1,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#282:E (ab|a|c|bcd){2,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(3,6)(6,6)
:HA#283:E (ab|a|c|bcd){3,}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
:HA#284:E (ab|a|c|bcd){4,}(d*) ababcd NOMATCH
#:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#285:E (ab|a|c|bcd){0,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#286:E (ab|a|c|bcd){1,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#287:E (ab|a|c|bcd){2,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(3,6)(6,6)
:HA#288:E (ab|a|c|bcd){3,10}(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
:HA#289:E (ab|a|c|bcd){4,10}(d*) ababcd NOMATCH
#:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(3,6)(6,6)
:HA#290:E (ab|a|c|bcd)*(d*) ababcd (0,6)(4,5)(5,6) RE2/Go
#:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(3,6)(6,6)
:HA#291:E (ab|a|c|bcd)+(d*) ababcd (0,6)(4,5)(5,6) RE2/Go

View File

@@ -0,0 +1,405 @@
# !!! DO NOT EDIT !!!
# Automatically generated by 'regex-cli generate fowler'.
# Numbers in the test names correspond to the line number of the test from
# the original dat file.
[[test]]
name = "nullsubexpr3"
regex = '''(a*)*'''
haystack = '''a'''
matches = [[[0, 1], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr4"
regex = '''(a*)*'''
haystack = '''x'''
matches = [[[0, 0], [0, 0]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr5"
regex = '''(a*)*'''
haystack = '''aaaaaa'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr6"
regex = '''(a*)*'''
haystack = '''aaaaaax'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr7"
regex = '''(a*)+'''
haystack = '''a'''
matches = [[[0, 1], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr8"
regex = '''(a*)+'''
haystack = '''x'''
matches = [[[0, 0], [0, 0]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr9"
regex = '''(a*)+'''
haystack = '''aaaaaa'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr10"
regex = '''(a*)+'''
haystack = '''aaaaaax'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr11"
regex = '''(a+)*'''
haystack = '''a'''
matches = [[[0, 1], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr12"
regex = '''(a+)*'''
haystack = '''x'''
matches = [[[0, 0], []]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr13"
regex = '''(a+)*'''
haystack = '''aaaaaa'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr14"
regex = '''(a+)*'''
haystack = '''aaaaaax'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr15"
regex = '''(a+)+'''
haystack = '''a'''
matches = [[[0, 1], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr16"
regex = '''(a+)+'''
haystack = '''x'''
matches = []
match-limit = 1
[[test]]
name = "nullsubexpr17"
regex = '''(a+)+'''
haystack = '''aaaaaa'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr18"
regex = '''(a+)+'''
haystack = '''aaaaaax'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr20"
regex = '''([a]*)*'''
haystack = '''a'''
matches = [[[0, 1], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr21"
regex = '''([a]*)*'''
haystack = '''x'''
matches = [[[0, 0], [0, 0]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr22"
regex = '''([a]*)*'''
haystack = '''aaaaaa'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr23"
regex = '''([a]*)*'''
haystack = '''aaaaaax'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr24"
regex = '''([a]*)+'''
haystack = '''a'''
matches = [[[0, 1], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr25"
regex = '''([a]*)+'''
haystack = '''x'''
matches = [[[0, 0], [0, 0]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr26"
regex = '''([a]*)+'''
haystack = '''aaaaaa'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr27"
regex = '''([a]*)+'''
haystack = '''aaaaaax'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr28"
regex = '''([^b]*)*'''
haystack = '''a'''
matches = [[[0, 1], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr29"
regex = '''([^b]*)*'''
haystack = '''b'''
matches = [[[0, 0], [0, 0]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr30"
regex = '''([^b]*)*'''
haystack = '''aaaaaa'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr31"
regex = '''([^b]*)*'''
haystack = '''aaaaaab'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr32"
regex = '''([ab]*)*'''
haystack = '''a'''
matches = [[[0, 1], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr33"
regex = '''([ab]*)*'''
haystack = '''aaaaaa'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr34"
regex = '''([ab]*)*'''
haystack = '''ababab'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr35"
regex = '''([ab]*)*'''
haystack = '''bababa'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr36"
regex = '''([ab]*)*'''
haystack = '''b'''
matches = [[[0, 1], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr37"
regex = '''([ab]*)*'''
haystack = '''bbbbbb'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr38"
regex = '''([ab]*)*'''
haystack = '''aaaabcde'''
matches = [[[0, 5], [0, 5]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr39"
regex = '''([^a]*)*'''
haystack = '''b'''
matches = [[[0, 1], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr40"
regex = '''([^a]*)*'''
haystack = '''bbbbbb'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr41"
regex = '''([^a]*)*'''
haystack = '''aaaaaa'''
matches = [[[0, 0], [0, 0]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr42"
regex = '''([^ab]*)*'''
haystack = '''ccccxx'''
matches = [[[0, 6], [0, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr43"
regex = '''([^ab]*)*'''
haystack = '''ababab'''
matches = [[[0, 0], [0, 0]]]
match-limit = 1
anchored = true
# Test added by Rust regex project.
[[test]]
name = "nullsubexpr46"
regex = '''((z)+|a)*'''
haystack = '''zabcde'''
matches = [[[0, 2], [1, 2], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr64"
regex = '''(a*)*(x)'''
haystack = '''x'''
matches = [[[0, 1], [0, 0], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr65"
regex = '''(a*)*(x)'''
haystack = '''ax'''
matches = [[[0, 2], [0, 1], [1, 2]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr66"
regex = '''(a*)*(x)'''
haystack = '''axa'''
matches = [[[0, 2], [0, 1], [1, 2]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr68"
regex = '''(a*)+(x)'''
haystack = '''x'''
matches = [[[0, 1], [0, 0], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr69"
regex = '''(a*)+(x)'''
haystack = '''ax'''
matches = [[[0, 2], [0, 1], [1, 2]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr70"
regex = '''(a*)+(x)'''
haystack = '''axa'''
matches = [[[0, 2], [0, 1], [1, 2]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr72"
regex = '''(a*){2}(x)'''
haystack = '''x'''
matches = [[[0, 1], [0, 0], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr73"
regex = '''(a*){2}(x)'''
haystack = '''ax'''
matches = [[[0, 2], [1, 1], [1, 2]]]
match-limit = 1
anchored = true
[[test]]
name = "nullsubexpr74"
regex = '''(a*){2}(x)'''
haystack = '''axa'''
matches = [[[0, 2], [1, 1], [1, 2]]]
match-limit = 1
anchored = true

View File

@@ -0,0 +1,746 @@
# !!! DO NOT EDIT !!!
# Automatically generated by 'regex-cli generate fowler'.
# Numbers in the test names correspond to the line number of the test from
# the original dat file.
[[test]]
name = "repetition10"
regex = '''((..)|(.))'''
haystack = ''''''
matches = []
match-limit = 1
[[test]]
name = "repetition11"
regex = '''((..)|(.))((..)|(.))'''
haystack = ''''''
matches = []
match-limit = 1
[[test]]
name = "repetition12"
regex = '''((..)|(.))((..)|(.))((..)|(.))'''
haystack = ''''''
matches = []
match-limit = 1
[[test]]
name = "repetition14"
regex = '''((..)|(.)){1}'''
haystack = ''''''
matches = []
match-limit = 1
[[test]]
name = "repetition15"
regex = '''((..)|(.)){2}'''
haystack = ''''''
matches = []
match-limit = 1
[[test]]
name = "repetition16"
regex = '''((..)|(.)){3}'''
haystack = ''''''
matches = []
match-limit = 1
[[test]]
name = "repetition18"
regex = '''((..)|(.))*'''
haystack = ''''''
matches = [[[0, 0], [], [], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition20"
regex = '''((..)|(.))'''
haystack = '''a'''
matches = [[[0, 1], [0, 1], [], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition21"
regex = '''((..)|(.))((..)|(.))'''
haystack = '''a'''
matches = []
match-limit = 1
[[test]]
name = "repetition22"
regex = '''((..)|(.))((..)|(.))((..)|(.))'''
haystack = '''a'''
matches = []
match-limit = 1
[[test]]
name = "repetition24"
regex = '''((..)|(.)){1}'''
haystack = '''a'''
matches = [[[0, 1], [0, 1], [], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition25"
regex = '''((..)|(.)){2}'''
haystack = '''a'''
matches = []
match-limit = 1
[[test]]
name = "repetition26"
regex = '''((..)|(.)){3}'''
haystack = '''a'''
matches = []
match-limit = 1
[[test]]
name = "repetition28"
regex = '''((..)|(.))*'''
haystack = '''a'''
matches = [[[0, 1], [0, 1], [], [0, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition30"
regex = '''((..)|(.))'''
haystack = '''aa'''
matches = [[[0, 2], [0, 2], [0, 2], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition31"
regex = '''((..)|(.))((..)|(.))'''
haystack = '''aa'''
matches = [[[0, 2], [0, 1], [], [0, 1], [1, 2], [], [1, 2]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition32"
regex = '''((..)|(.))((..)|(.))((..)|(.))'''
haystack = '''aa'''
matches = []
match-limit = 1
[[test]]
name = "repetition34"
regex = '''((..)|(.)){1}'''
haystack = '''aa'''
matches = [[[0, 2], [0, 2], [0, 2], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition35"
regex = '''((..)|(.)){2}'''
haystack = '''aa'''
matches = [[[0, 2], [1, 2], [], [1, 2]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition36"
regex = '''((..)|(.)){3}'''
haystack = '''aa'''
matches = []
match-limit = 1
[[test]]
name = "repetition38"
regex = '''((..)|(.))*'''
haystack = '''aa'''
matches = [[[0, 2], [0, 2], [0, 2], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition40"
regex = '''((..)|(.))'''
haystack = '''aaa'''
matches = [[[0, 2], [0, 2], [0, 2], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition41"
regex = '''((..)|(.))((..)|(.))'''
haystack = '''aaa'''
matches = [[[0, 3], [0, 2], [0, 2], [], [2, 3], [], [2, 3]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition42"
regex = '''((..)|(.))((..)|(.))((..)|(.))'''
haystack = '''aaa'''
matches = [[[0, 3], [0, 1], [], [0, 1], [1, 2], [], [1, 2], [2, 3], [], [2, 3]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition44"
regex = '''((..)|(.)){1}'''
haystack = '''aaa'''
matches = [[[0, 2], [0, 2], [0, 2], []]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition46"
regex = '''((..)|(.)){2}'''
haystack = '''aaa'''
matches = [[[0, 3], [2, 3], [0, 2], [2, 3]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition47"
regex = '''((..)|(.)){3}'''
haystack = '''aaa'''
matches = [[[0, 3], [2, 3], [], [2, 3]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition50"
regex = '''((..)|(.))*'''
haystack = '''aaa'''
matches = [[[0, 3], [2, 3], [0, 2], [2, 3]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition52"
regex = '''((..)|(.))'''
haystack = '''aaaa'''
matches = [[[0, 2], [0, 2], [0, 2], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition53"
regex = '''((..)|(.))((..)|(.))'''
haystack = '''aaaa'''
matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition54"
regex = '''((..)|(.))((..)|(.))((..)|(.))'''
haystack = '''aaaa'''
matches = [[[0, 4], [0, 2], [0, 2], [], [2, 3], [], [2, 3], [3, 4], [], [3, 4]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition56"
regex = '''((..)|(.)){1}'''
haystack = '''aaaa'''
matches = [[[0, 2], [0, 2], [0, 2], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition57"
regex = '''((..)|(.)){2}'''
haystack = '''aaaa'''
matches = [[[0, 4], [2, 4], [2, 4], []]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition59"
regex = '''((..)|(.)){3}'''
haystack = '''aaaa'''
matches = [[[0, 4], [3, 4], [0, 2], [3, 4]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition61"
regex = '''((..)|(.))*'''
haystack = '''aaaa'''
matches = [[[0, 4], [2, 4], [2, 4], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition63"
regex = '''((..)|(.))'''
haystack = '''aaaaa'''
matches = [[[0, 2], [0, 2], [0, 2], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition64"
regex = '''((..)|(.))((..)|(.))'''
haystack = '''aaaaa'''
matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition65"
regex = '''((..)|(.))((..)|(.))((..)|(.))'''
haystack = '''aaaaa'''
matches = [[[0, 5], [0, 2], [0, 2], [], [2, 4], [2, 4], [], [4, 5], [], [4, 5]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition67"
regex = '''((..)|(.)){1}'''
haystack = '''aaaaa'''
matches = [[[0, 2], [0, 2], [0, 2], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition68"
regex = '''((..)|(.)){2}'''
haystack = '''aaaaa'''
matches = [[[0, 4], [2, 4], [2, 4], []]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition70"
regex = '''((..)|(.)){3}'''
haystack = '''aaaaa'''
matches = [[[0, 5], [4, 5], [2, 4], [4, 5]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition73"
regex = '''((..)|(.))*'''
haystack = '''aaaaa'''
matches = [[[0, 5], [4, 5], [2, 4], [4, 5]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition75"
regex = '''((..)|(.))'''
haystack = '''aaaaaa'''
matches = [[[0, 2], [0, 2], [0, 2], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition76"
regex = '''((..)|(.))((..)|(.))'''
haystack = '''aaaaaa'''
matches = [[[0, 4], [0, 2], [0, 2], [], [2, 4], [2, 4], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition77"
regex = '''((..)|(.))((..)|(.))((..)|(.))'''
haystack = '''aaaaaa'''
matches = [[[0, 6], [0, 2], [0, 2], [], [2, 4], [2, 4], [], [4, 6], [4, 6], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition79"
regex = '''((..)|(.)){1}'''
haystack = '''aaaaaa'''
matches = [[[0, 2], [0, 2], [0, 2], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition80"
regex = '''((..)|(.)){2}'''
haystack = '''aaaaaa'''
matches = [[[0, 4], [2, 4], [2, 4], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition81"
regex = '''((..)|(.)){3}'''
haystack = '''aaaaaa'''
matches = [[[0, 6], [4, 6], [4, 6], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition83"
regex = '''((..)|(.))*'''
haystack = '''aaaaaa'''
matches = [[[0, 6], [4, 6], [4, 6], []]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive90"
regex = '''X(.?){0,}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [7, 8]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive91"
regex = '''X(.?){1,}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [7, 8]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive92"
regex = '''X(.?){2,}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [7, 8]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive93"
regex = '''X(.?){3,}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [7, 8]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive94"
regex = '''X(.?){4,}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [7, 8]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive95"
regex = '''X(.?){5,}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [7, 8]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive96"
regex = '''X(.?){6,}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [7, 8]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive97"
regex = '''X(.?){7,}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [7, 8]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive98"
regex = '''X(.?){8,}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [8, 8]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive100"
regex = '''X(.?){0,8}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [8, 8]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive102"
regex = '''X(.?){1,8}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [8, 8]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive104"
regex = '''X(.?){2,8}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [8, 8]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive106"
regex = '''X(.?){3,8}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [8, 8]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive108"
regex = '''X(.?){4,8}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [8, 8]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive110"
regex = '''X(.?){5,8}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [8, 8]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive112"
regex = '''X(.?){6,8}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [8, 8]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive114"
regex = '''X(.?){7,8}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [8, 8]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive115"
regex = '''X(.?){8,8}Y'''
haystack = '''X1234567Y'''
matches = [[[0, 9], [8, 8]]]
match-limit = 1
anchored = true
# Test added by Rust regex project.
[[test]]
name = "repetition-expensive127"
regex = '''(a|ab|c|bcd){0,}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 1], [0, 1], [1, 1]]]
match-limit = 1
anchored = true
# Test added by Rust regex project.
[[test]]
name = "repetition-expensive129"
regex = '''(a|ab|c|bcd){1,}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 1], [0, 1], [1, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive130"
regex = '''(a|ab|c|bcd){2,}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [3, 6], [6, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive131"
regex = '''(a|ab|c|bcd){3,}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [3, 6], [6, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive132"
regex = '''(a|ab|c|bcd){4,}(d*)'''
haystack = '''ababcd'''
matches = []
match-limit = 1
# Test added by Rust regex project.
[[test]]
name = "repetition-expensive134"
regex = '''(a|ab|c|bcd){0,10}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 1], [0, 1], [1, 1]]]
match-limit = 1
anchored = true
# Test added by Rust regex project.
[[test]]
name = "repetition-expensive136"
regex = '''(a|ab|c|bcd){1,10}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 1], [0, 1], [1, 1]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive137"
regex = '''(a|ab|c|bcd){2,10}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [3, 6], [6, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive138"
regex = '''(a|ab|c|bcd){3,10}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [3, 6], [6, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive139"
regex = '''(a|ab|c|bcd){4,10}(d*)'''
haystack = '''ababcd'''
matches = []
match-limit = 1
# Test added by Rust regex project.
[[test]]
name = "repetition-expensive141"
regex = '''(a|ab|c|bcd)*(d*)'''
haystack = '''ababcd'''
matches = [[[0, 1], [0, 1], [1, 1]]]
match-limit = 1
anchored = true
# Test added by Rust regex project.
[[test]]
name = "repetition-expensive143"
regex = '''(a|ab|c|bcd)+(d*)'''
haystack = '''ababcd'''
matches = [[[0, 1], [0, 1], [1, 1]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive149"
regex = '''(ab|a|c|bcd){0,}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [4, 5], [5, 6]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive151"
regex = '''(ab|a|c|bcd){1,}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [4, 5], [5, 6]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive153"
regex = '''(ab|a|c|bcd){2,}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [4, 5], [5, 6]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive155"
regex = '''(ab|a|c|bcd){3,}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [4, 5], [5, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive156"
regex = '''(ab|a|c|bcd){4,}(d*)'''
haystack = '''ababcd'''
matches = []
match-limit = 1
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive158"
regex = '''(ab|a|c|bcd){0,10}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [4, 5], [5, 6]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive160"
regex = '''(ab|a|c|bcd){1,10}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [4, 5], [5, 6]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive162"
regex = '''(ab|a|c|bcd){2,10}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [4, 5], [5, 6]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive164"
regex = '''(ab|a|c|bcd){3,10}(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [4, 5], [5, 6]]]
match-limit = 1
anchored = true
[[test]]
name = "repetition-expensive165"
regex = '''(ab|a|c|bcd){4,10}(d*)'''
haystack = '''ababcd'''
matches = []
match-limit = 1
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive167"
regex = '''(ab|a|c|bcd)*(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [4, 5], [5, 6]]]
match-limit = 1
anchored = true
# Test added by RE2/Go project.
[[test]]
name = "repetition-expensive169"
regex = '''(ab|a|c|bcd)+(d*)'''
haystack = '''ababcd'''
matches = [[[0, 6], [4, 5], [5, 6]]]
match-limit = 1
anchored = true

143
vendor/regex/testdata/iter.toml vendored Normal file
View File

@@ -0,0 +1,143 @@
[[test]]
name = "1"
regex = "a"
haystack = "aaa"
matches = [[0, 1], [1, 2], [2, 3]]
[[test]]
name = "2"
regex = "a"
haystack = "aba"
matches = [[0, 1], [2, 3]]
[[test]]
name = "empty1"
regex = ''
haystack = ''
matches = [[0, 0]]
[[test]]
name = "empty2"
regex = ''
haystack = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty3"
regex = '(?:)'
haystack = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty4"
regex = '(?:)*'
haystack = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty5"
regex = '(?:)+'
haystack = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty6"
regex = '(?:)?'
haystack = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty7"
regex = '(?:)(?:)'
haystack = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty8"
regex = '(?:)+|z'
haystack = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty9"
regex = 'z|(?:)+'
haystack = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty10"
regex = '(?:)+|b'
haystack = 'abc'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
[[test]]
name = "empty11"
regex = 'b|(?:)+'
haystack = 'abc'
matches = [[0, 0], [1, 2], [3, 3]]
[[test]]
name = "start1"
regex = "^a"
haystack = "a"
matches = [[0, 1]]
[[test]]
name = "start2"
regex = "^a"
haystack = "aa"
matches = [[0, 1]]
[[test]]
name = "anchored1"
regex = "a"
haystack = "a"
matches = [[0, 1]]
anchored = true
# This test is pretty subtle. It demonstrates the crucial difference between
# '^a' and 'a' compiled in 'anchored' mode. The former regex exclusively
# matches at the start of a haystack and nowhere else. The latter regex has
# no such restriction, but its automaton is constructed such that it lacks a
# `.*?` prefix. So it can actually produce matches at multiple locations.
# The anchored3 test drives this point home.
[[test]]
name = "anchored2"
regex = "a"
haystack = "aa"
matches = [[0, 1], [1, 2]]
anchored = true
# Unlikely anchored2, this test stops matching anything after it sees `b`
# since it lacks a `.*?` prefix. Since it is looking for 'a' but sees 'b', it
# determines that there are no remaining matches.
[[test]]
name = "anchored3"
regex = "a"
haystack = "aaba"
matches = [[0, 1], [1, 2]]
anchored = true
[[test]]
name = "nonempty-followedby-empty"
regex = 'abc|.*?'
haystack = "abczzz"
matches = [[0, 3], [4, 4], [5, 5], [6, 6]]
[[test]]
name = "nonempty-followedby-oneempty"
regex = 'abc|.*?'
haystack = "abcz"
matches = [[0, 3], [4, 4]]
[[test]]
name = "nonempty-followedby-onemixed"
regex = 'abc|.*?'
haystack = "abczabc"
matches = [[0, 3], [4, 7]]
[[test]]
name = "nonempty-followedby-twomixed"
regex = 'abc|.*?'
haystack = "abczzabc"
matches = [[0, 3], [4, 4], [5, 8]]

25
vendor/regex/testdata/leftmost-all.toml vendored Normal file
View File

@@ -0,0 +1,25 @@
[[test]]
name = "alt"
regex = 'foo|foobar'
haystack = "foobar"
matches = [[0, 6]]
match-kind = "all"
search-kind = "leftmost"
[[test]]
name = "multi"
regex = ['foo', 'foobar']
haystack = "foobar"
matches = [
{ id = 1, span = [0, 6] },
]
match-kind = "all"
search-kind = "leftmost"
[[test]]
name = "dotall"
regex = '(?s:.)'
haystack = "foobar"
matches = [[5, 6]]
match-kind = "all"
search-kind = "leftmost"

View File

@@ -0,0 +1,109 @@
# This tests that we can switch the line terminator to the NUL byte.
[[test]]
name = "nul"
regex = '(?m)^[a-z]+$'
haystack = '\x00abc\x00'
matches = [[1, 4]]
unescape = true
line-terminator = '\x00'
# This tests that '.' will not match the configured line terminator, but will
# match \n.
[[test]]
name = "dot-changes-with-line-terminator"
regex = '.'
haystack = '\x00\n'
matches = [[1, 2]]
unescape = true
line-terminator = '\x00'
# This tests that when we switch the line terminator, \n is no longer
# recognized as the terminator.
[[test]]
name = "not-line-feed"
regex = '(?m)^[a-z]+$'
haystack = '\nabc\n'
matches = []
unescape = true
line-terminator = '\x00'
# This tests that we can set the line terminator to a non-ASCII byte and have
# it behave as expected.
[[test]]
name = "non-ascii"
regex = '(?m)^[a-z]+$'
haystack = '\xFFabc\xFF'
matches = [[1, 4]]
unescape = true
line-terminator = '\xFF'
utf8 = false
# This tests a tricky case where the line terminator is set to \r. This ensures
# that the StartLF look-behind assertion is tracked when computing the start
# state.
[[test]]
name = "carriage"
regex = '(?m)^[a-z]+'
haystack = 'ABC\rabc'
matches = [[4, 7]]
bounds = [4, 7]
unescape = true
line-terminator = '\r'
# This tests that we can set the line terminator to a byte corresponding to a
# word character, and things work as expected.
[[test]]
name = "word-byte"
regex = '(?m)^[a-z]+$'
haystack = 'ZabcZ'
matches = [[1, 4]]
unescape = true
line-terminator = 'Z'
# This tests that we can set the line terminator to a byte corresponding to a
# non-word character, and things work as expected.
[[test]]
name = "non-word-byte"
regex = '(?m)^[a-z]+$'
haystack = '%abc%'
matches = [[1, 4]]
unescape = true
line-terminator = '%'
# This combines "set line terminator to a word byte" with a word boundary
# assertion, which should result in no match even though ^/$ matches.
[[test]]
name = "word-boundary"
regex = '(?m)^\b[a-z]+\b$'
haystack = 'ZabcZ'
matches = []
unescape = true
line-terminator = 'Z'
# Like 'word-boundary', but does an anchored search at the point where ^
# matches, but where \b should not.
[[test]]
name = "word-boundary-at"
regex = '(?m)^\b[a-z]+\b$'
haystack = 'ZabcZ'
matches = []
bounds = [1, 4]
anchored = true
unescape = true
line-terminator = 'Z'
# Like 'word-boundary-at', but flips the word boundary to a negation. This
# in particular tests a tricky case in DFA engines, where they must consider
# explicitly that a starting configuration from a custom line terminator may
# also required setting the "is from word byte" flag on a state. Otherwise,
# it's treated as "not from a word byte," which would result in \B not matching
# here when it should.
[[test]]
name = "not-word-boundary-at"
regex = '(?m)^\B[a-z]+\B$'
haystack = 'ZabcZ'
matches = [[1, 4]]
bounds = [1, 4]
anchored = true
unescape = true
line-terminator = 'Z'

99
vendor/regex/testdata/misc.toml vendored Normal file
View File

@@ -0,0 +1,99 @@
[[test]]
name = "ascii-literal"
regex = "a"
haystack = "a"
matches = [[0, 1]]
[[test]]
name = "ascii-literal-not"
regex = "a"
haystack = "z"
matches = []
[[test]]
name = "ascii-literal-anchored"
regex = "a"
haystack = "a"
matches = [[0, 1]]
anchored = true
[[test]]
name = "ascii-literal-anchored-not"
regex = "a"
haystack = "z"
matches = []
anchored = true
[[test]]
name = "anchor-start-end-line"
regex = '(?m)^bar$'
haystack = "foo\nbar\nbaz"
matches = [[4, 7]]
[[test]]
name = "prefix-literal-match"
regex = '^abc'
haystack = "abc"
matches = [[0, 3]]
[[test]]
name = "prefix-literal-match-ascii"
regex = '^abc'
haystack = "abc"
matches = [[0, 3]]
unicode = false
utf8 = false
[[test]]
name = "prefix-literal-no-match"
regex = '^abc'
haystack = "zabc"
matches = []
[[test]]
name = "one-literal-edge"
regex = 'abc'
haystack = "xxxxxab"
matches = []
[[test]]
name = "terminates"
regex = 'a$'
haystack = "a"
matches = [[0, 1]]
[[test]]
name = "suffix-100"
regex = '.*abcd'
haystack = "abcd"
matches = [[0, 4]]
[[test]]
name = "suffix-200"
regex = '.*(?:abcd)+'
haystack = "abcd"
matches = [[0, 4]]
[[test]]
name = "suffix-300"
regex = '.*(?:abcd)+'
haystack = "abcdabcd"
matches = [[0, 8]]
[[test]]
name = "suffix-400"
regex = '.*(?:abcd)+'
haystack = "abcdxabcd"
matches = [[0, 9]]
[[test]]
name = "suffix-500"
regex = '.*x(?:abcd)+'
haystack = "abcdxabcd"
matches = [[0, 9]]
[[test]]
name = "suffix-600"
regex = '[^abcd]*x(?:abcd)+'
haystack = "abcdxabcd"
matches = [[4, 9]]

845
vendor/regex/testdata/multiline.toml vendored Normal file
View File

@@ -0,0 +1,845 @@
[[test]]
name = "basic1"
regex = '(?m)^[a-z]+$'
haystack = "abc\ndef\nxyz"
matches = [[0, 3], [4, 7], [8, 11]]
[[test]]
name = "basic1-crlf"
regex = '(?Rm)^[a-z]+$'
haystack = "abc\ndef\nxyz"
matches = [[0, 3], [4, 7], [8, 11]]
[[test]]
name = "basic1-crlf-cr"
regex = '(?Rm)^[a-z]+$'
haystack = "abc\rdef\rxyz"
matches = [[0, 3], [4, 7], [8, 11]]
[[test]]
name = "basic2"
regex = '(?m)^$'
haystack = "abc\ndef\nxyz"
matches = []
[[test]]
name = "basic2-crlf"
regex = '(?Rm)^$'
haystack = "abc\ndef\nxyz"
matches = []
[[test]]
name = "basic2-crlf-cr"
regex = '(?Rm)^$'
haystack = "abc\rdef\rxyz"
matches = []
[[test]]
name = "basic3"
regex = '(?m)^'
haystack = "abc\ndef\nxyz"
matches = [[0, 0], [4, 4], [8, 8]]
[[test]]
name = "basic3-crlf"
regex = '(?Rm)^'
haystack = "abc\ndef\nxyz"
matches = [[0, 0], [4, 4], [8, 8]]
[[test]]
name = "basic3-crlf-cr"
regex = '(?Rm)^'
haystack = "abc\rdef\rxyz"
matches = [[0, 0], [4, 4], [8, 8]]
[[test]]
name = "basic4"
regex = '(?m)$'
haystack = "abc\ndef\nxyz"
matches = [[3, 3], [7, 7], [11, 11]]
[[test]]
name = "basic4-crlf"
regex = '(?Rm)$'
haystack = "abc\ndef\nxyz"
matches = [[3, 3], [7, 7], [11, 11]]
[[test]]
name = "basic4-crlf-cr"
regex = '(?Rm)$'
haystack = "abc\rdef\rxyz"
matches = [[3, 3], [7, 7], [11, 11]]
[[test]]
name = "basic5"
regex = '(?m)^[a-z]'
haystack = "abc\ndef\nxyz"
matches = [[0, 1], [4, 5], [8, 9]]
[[test]]
name = "basic5-crlf"
regex = '(?Rm)^[a-z]'
haystack = "abc\ndef\nxyz"
matches = [[0, 1], [4, 5], [8, 9]]
[[test]]
name = "basic5-crlf-cr"
regex = '(?Rm)^[a-z]'
haystack = "abc\rdef\rxyz"
matches = [[0, 1], [4, 5], [8, 9]]
[[test]]
name = "basic6"
regex = '(?m)[a-z]^'
haystack = "abc\ndef\nxyz"
matches = []
[[test]]
name = "basic6-crlf"
regex = '(?Rm)[a-z]^'
haystack = "abc\ndef\nxyz"
matches = []
[[test]]
name = "basic6-crlf-cr"
regex = '(?Rm)[a-z]^'
haystack = "abc\rdef\rxyz"
matches = []
[[test]]
name = "basic7"
regex = '(?m)[a-z]$'
haystack = "abc\ndef\nxyz"
matches = [[2, 3], [6, 7], [10, 11]]
[[test]]
name = "basic7-crlf"
regex = '(?Rm)[a-z]$'
haystack = "abc\ndef\nxyz"
matches = [[2, 3], [6, 7], [10, 11]]
[[test]]
name = "basic7-crlf-cr"
regex = '(?Rm)[a-z]$'
haystack = "abc\rdef\rxyz"
matches = [[2, 3], [6, 7], [10, 11]]
[[test]]
name = "basic8"
regex = '(?m)$[a-z]'
haystack = "abc\ndef\nxyz"
matches = []
[[test]]
name = "basic8-crlf"
regex = '(?Rm)$[a-z]'
haystack = "abc\ndef\nxyz"
matches = []
[[test]]
name = "basic8-crlf-cr"
regex = '(?Rm)$[a-z]'
haystack = "abc\rdef\rxyz"
matches = []
[[test]]
name = "basic9"
regex = '(?m)^$'
haystack = ""
matches = [[0, 0]]
[[test]]
name = "basic9-crlf"
regex = '(?Rm)^$'
haystack = ""
matches = [[0, 0]]
[[test]]
name = "repeat1"
regex = '(?m)(?:^$)*'
haystack = "a\nb\nc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
[[test]]
name = "repeat1-crlf"
regex = '(?Rm)(?:^$)*'
haystack = "a\nb\nc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
[[test]]
name = "repeat1-crlf-cr"
regex = '(?Rm)(?:^$)*'
haystack = "a\rb\rc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
[[test]]
name = "repeat1-no-multi"
regex = '(?:^$)*'
haystack = "a\nb\nc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
[[test]]
name = "repeat1-no-multi-crlf"
regex = '(?R)(?:^$)*'
haystack = "a\nb\nc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
[[test]]
name = "repeat1-no-multi-crlf-cr"
regex = '(?R)(?:^$)*'
haystack = "a\rb\rc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
[[test]]
name = "repeat2"
regex = '(?m)(?:^|a)+'
haystack = "a\naaa\n"
matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat2-crlf"
regex = '(?Rm)(?:^|a)+'
haystack = "a\naaa\n"
matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat2-crlf-cr"
regex = '(?Rm)(?:^|a)+'
haystack = "a\raaa\r"
matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat2-no-multi"
regex = '(?:^|a)+'
haystack = "a\naaa\n"
matches = [[0, 0], [2, 5]]
[[test]]
name = "repeat2-no-multi-crlf"
regex = '(?R)(?:^|a)+'
haystack = "a\naaa\n"
matches = [[0, 0], [2, 5]]
[[test]]
name = "repeat2-no-multi-crlf-cr"
regex = '(?R)(?:^|a)+'
haystack = "a\raaa\r"
matches = [[0, 0], [2, 5]]
[[test]]
name = "repeat3"
regex = '(?m)(?:^|a)*'
haystack = "a\naaa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat3-crlf"
regex = '(?Rm)(?:^|a)*'
haystack = "a\naaa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat3-crlf-cr"
regex = '(?Rm)(?:^|a)*'
haystack = "a\raaa\r"
matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat3-no-multi"
regex = '(?:^|a)*'
haystack = "a\naaa\n"
matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
[[test]]
name = "repeat3-no-multi-crlf"
regex = '(?R)(?:^|a)*'
haystack = "a\naaa\n"
matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
[[test]]
name = "repeat3-no-multi-crlf-cr"
regex = '(?R)(?:^|a)*'
haystack = "a\raaa\r"
matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
[[test]]
name = "repeat4"
regex = '(?m)(?:^|a+)'
haystack = "a\naaa\n"
matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat4-crlf"
regex = '(?Rm)(?:^|a+)'
haystack = "a\naaa\n"
matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat4-crlf-cr"
regex = '(?Rm)(?:^|a+)'
haystack = "a\raaa\r"
matches = [[0, 0], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat4-no-multi"
regex = '(?:^|a+)'
haystack = "a\naaa\n"
matches = [[0, 0], [2, 5]]
[[test]]
name = "repeat4-no-multi-crlf"
regex = '(?R)(?:^|a+)'
haystack = "a\naaa\n"
matches = [[0, 0], [2, 5]]
[[test]]
name = "repeat4-no-multi-crlf-cr"
regex = '(?R)(?:^|a+)'
haystack = "a\raaa\r"
matches = [[0, 0], [2, 5]]
[[test]]
name = "repeat5"
regex = '(?m)(?:^|a*)'
haystack = "a\naaa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat5-crlf"
regex = '(?Rm)(?:^|a*)'
haystack = "a\naaa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat5-crlf-cr"
regex = '(?Rm)(?:^|a*)'
haystack = "a\raaa\r"
matches = [[0, 0], [1, 1], [2, 2], [3, 5], [6, 6]]
[[test]]
name = "repeat5-no-multi"
regex = '(?:^|a*)'
haystack = "a\naaa\n"
matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
[[test]]
name = "repeat5-no-multi-crlf"
regex = '(?R)(?:^|a*)'
haystack = "a\naaa\n"
matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
[[test]]
name = "repeat5-no-multi-crlf-cr"
regex = '(?R)(?:^|a*)'
haystack = "a\raaa\r"
matches = [[0, 0], [1, 1], [2, 5], [6, 6]]
[[test]]
name = "repeat6"
regex = '(?m)(?:^[a-z])+'
haystack = "abc\ndef\nxyz"
matches = [[0, 1], [4, 5], [8, 9]]
[[test]]
name = "repeat6-crlf"
regex = '(?Rm)(?:^[a-z])+'
haystack = "abc\ndef\nxyz"
matches = [[0, 1], [4, 5], [8, 9]]
[[test]]
name = "repeat6-crlf-cr"
regex = '(?Rm)(?:^[a-z])+'
haystack = "abc\rdef\rxyz"
matches = [[0, 1], [4, 5], [8, 9]]
[[test]]
name = "repeat6-no-multi"
regex = '(?:^[a-z])+'
haystack = "abc\ndef\nxyz"
matches = [[0, 1]]
[[test]]
name = "repeat6-no-multi-crlf"
regex = '(?R)(?:^[a-z])+'
haystack = "abc\ndef\nxyz"
matches = [[0, 1]]
[[test]]
name = "repeat6-no-multi-crlf-cr"
regex = '(?R)(?:^[a-z])+'
haystack = "abc\rdef\rxyz"
matches = [[0, 1]]
[[test]]
name = "repeat7"
regex = '(?m)(?:^[a-z]{3}\n?)+'
haystack = "abc\ndef\nxyz"
matches = [[0, 11]]
[[test]]
name = "repeat7-crlf"
regex = '(?Rm)(?:^[a-z]{3}\n?)+'
haystack = "abc\ndef\nxyz"
matches = [[0, 11]]
[[test]]
name = "repeat7-crlf-cr"
regex = '(?Rm)(?:^[a-z]{3}\r?)+'
haystack = "abc\rdef\rxyz"
matches = [[0, 11]]
[[test]]
name = "repeat7-no-multi"
regex = '(?:^[a-z]{3}\n?)+'
haystack = "abc\ndef\nxyz"
matches = [[0, 4]]
[[test]]
name = "repeat7-no-multi-crlf"
regex = '(?R)(?:^[a-z]{3}\n?)+'
haystack = "abc\ndef\nxyz"
matches = [[0, 4]]
[[test]]
name = "repeat7-no-multi-crlf-cr"
regex = '(?R)(?:^[a-z]{3}\r?)+'
haystack = "abc\rdef\rxyz"
matches = [[0, 4]]
[[test]]
name = "repeat8"
regex = '(?m)(?:^[a-z]{3}\n?)*'
haystack = "abc\ndef\nxyz"
matches = [[0, 11]]
[[test]]
name = "repeat8-crlf"
regex = '(?Rm)(?:^[a-z]{3}\n?)*'
haystack = "abc\ndef\nxyz"
matches = [[0, 11]]
[[test]]
name = "repeat8-crlf-cr"
regex = '(?Rm)(?:^[a-z]{3}\r?)*'
haystack = "abc\rdef\rxyz"
matches = [[0, 11]]
[[test]]
name = "repeat8-no-multi"
regex = '(?:^[a-z]{3}\n?)*'
haystack = "abc\ndef\nxyz"
matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]]
[[test]]
name = "repeat8-no-multi-crlf"
regex = '(?R)(?:^[a-z]{3}\n?)*'
haystack = "abc\ndef\nxyz"
matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]]
[[test]]
name = "repeat8-no-multi-crlf-cr"
regex = '(?R)(?:^[a-z]{3}\r?)*'
haystack = "abc\rdef\rxyz"
matches = [[0, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]]
[[test]]
name = "repeat9"
regex = '(?m)(?:\n?[a-z]{3}$)+'
haystack = "abc\ndef\nxyz"
matches = [[0, 11]]
[[test]]
name = "repeat9-crlf"
regex = '(?Rm)(?:\n?[a-z]{3}$)+'
haystack = "abc\ndef\nxyz"
matches = [[0, 11]]
[[test]]
name = "repeat9-crlf-cr"
regex = '(?Rm)(?:\r?[a-z]{3}$)+'
haystack = "abc\rdef\rxyz"
matches = [[0, 11]]
[[test]]
name = "repeat9-no-multi"
regex = '(?:\n?[a-z]{3}$)+'
haystack = "abc\ndef\nxyz"
matches = [[7, 11]]
[[test]]
name = "repeat9-no-multi-crlf"
regex = '(?R)(?:\n?[a-z]{3}$)+'
haystack = "abc\ndef\nxyz"
matches = [[7, 11]]
[[test]]
name = "repeat9-no-multi-crlf-cr"
regex = '(?R)(?:\r?[a-z]{3}$)+'
haystack = "abc\rdef\rxyz"
matches = [[7, 11]]
[[test]]
name = "repeat10"
regex = '(?m)(?:\n?[a-z]{3}$)*'
haystack = "abc\ndef\nxyz"
matches = [[0, 11]]
[[test]]
name = "repeat10-crlf"
regex = '(?Rm)(?:\n?[a-z]{3}$)*'
haystack = "abc\ndef\nxyz"
matches = [[0, 11]]
[[test]]
name = "repeat10-crlf-cr"
regex = '(?Rm)(?:\r?[a-z]{3}$)*'
haystack = "abc\rdef\rxyz"
matches = [[0, 11]]
[[test]]
name = "repeat10-no-multi"
regex = '(?:\n?[a-z]{3}$)*'
haystack = "abc\ndef\nxyz"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]]
[[test]]
name = "repeat10-no-multi-crlf"
regex = '(?R)(?:\n?[a-z]{3}$)*'
haystack = "abc\ndef\nxyz"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]]
[[test]]
name = "repeat10-no-multi-crlf-cr"
regex = '(?R)(?:\r?[a-z]{3}$)*'
haystack = "abc\rdef\rxyz"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 11]]
[[test]]
name = "repeat11"
regex = '(?m)^*'
haystack = "\naa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat11-crlf"
regex = '(?Rm)^*'
haystack = "\naa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat11-crlf-cr"
regex = '(?Rm)^*'
haystack = "\raa\r"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat11-no-multi"
regex = '^*'
haystack = "\naa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat11-no-multi-crlf"
regex = '(?R)^*'
haystack = "\naa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat11-no-multi-crlf-cr"
regex = '(?R)^*'
haystack = "\raa\r"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat12"
regex = '(?m)^+'
haystack = "\naa\n"
matches = [[0, 0], [1, 1], [4, 4]]
[[test]]
name = "repeat12-crlf"
regex = '(?Rm)^+'
haystack = "\naa\n"
matches = [[0, 0], [1, 1], [4, 4]]
[[test]]
name = "repeat12-crlf-cr"
regex = '(?Rm)^+'
haystack = "\raa\r"
matches = [[0, 0], [1, 1], [4, 4]]
[[test]]
name = "repeat12-no-multi"
regex = '^+'
haystack = "\naa\n"
matches = [[0, 0]]
[[test]]
name = "repeat12-no-multi-crlf"
regex = '(?R)^+'
haystack = "\naa\n"
matches = [[0, 0]]
[[test]]
name = "repeat12-no-multi-crlf-cr"
regex = '(?R)^+'
haystack = "\raa\r"
matches = [[0, 0]]
[[test]]
name = "repeat13"
regex = '(?m)$*'
haystack = "\naa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat13-crlf"
regex = '(?Rm)$*'
haystack = "\naa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat13-crlf-cr"
regex = '(?Rm)$*'
haystack = "\raa\r"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat13-no-multi"
regex = '$*'
haystack = "\naa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat13-no-multi-crlf"
regex = '(?R)$*'
haystack = "\naa\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat13-no-multi-crlf-cr"
regex = '(?R)$*'
haystack = "\raa\r"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
[[test]]
name = "repeat14"
regex = '(?m)$+'
haystack = "\naa\n"
matches = [[0, 0], [3, 3], [4, 4]]
[[test]]
name = "repeat14-crlf"
regex = '(?Rm)$+'
haystack = "\naa\n"
matches = [[0, 0], [3, 3], [4, 4]]
[[test]]
name = "repeat14-crlf-cr"
regex = '(?Rm)$+'
haystack = "\raa\r"
matches = [[0, 0], [3, 3], [4, 4]]
[[test]]
name = "repeat14-no-multi"
regex = '$+'
haystack = "\naa\n"
matches = [[4, 4]]
[[test]]
name = "repeat14-no-multi-crlf"
regex = '(?R)$+'
haystack = "\naa\n"
matches = [[4, 4]]
[[test]]
name = "repeat14-no-multi-crlf-cr"
regex = '(?R)$+'
haystack = "\raa\r"
matches = [[4, 4]]
[[test]]
name = "repeat15"
regex = '(?m)(?:$\n)+'
haystack = "\n\naaa\n\n"
matches = [[0, 2], [5, 7]]
[[test]]
name = "repeat15-crlf"
regex = '(?Rm)(?:$\n)+'
haystack = "\n\naaa\n\n"
matches = [[0, 2], [5, 7]]
[[test]]
name = "repeat15-crlf-cr"
regex = '(?Rm)(?:$\r)+'
haystack = "\r\raaa\r\r"
matches = [[0, 2], [5, 7]]
[[test]]
name = "repeat15-no-multi"
regex = '(?:$\n)+'
haystack = "\n\naaa\n\n"
matches = []
[[test]]
name = "repeat15-no-multi-crlf"
regex = '(?R)(?:$\n)+'
haystack = "\n\naaa\n\n"
matches = []
[[test]]
name = "repeat15-no-multi-crlf-cr"
regex = '(?R)(?:$\r)+'
haystack = "\r\raaa\r\r"
matches = []
[[test]]
name = "repeat16"
regex = '(?m)(?:$\n)*'
haystack = "\n\naaa\n\n"
matches = [[0, 2], [3, 3], [4, 4], [5, 7]]
[[test]]
name = "repeat16-crlf"
regex = '(?Rm)(?:$\n)*'
haystack = "\n\naaa\n\n"
matches = [[0, 2], [3, 3], [4, 4], [5, 7]]
[[test]]
name = "repeat16-crlf-cr"
regex = '(?Rm)(?:$\r)*'
haystack = "\r\raaa\r\r"
matches = [[0, 2], [3, 3], [4, 4], [5, 7]]
[[test]]
name = "repeat16-no-multi"
regex = '(?:$\n)*'
haystack = "\n\naaa\n\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]]
[[test]]
name = "repeat16-no-multi-crlf"
regex = '(?R)(?:$\n)*'
haystack = "\n\naaa\n\n"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]]
[[test]]
name = "repeat16-no-multi-crlf-cr"
regex = '(?R)(?:$\r)*'
haystack = "\r\raaa\r\r"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]]
[[test]]
name = "repeat17"
regex = '(?m)(?:$\n^)+'
haystack = "\n\naaa\n\n"
matches = [[0, 2], [5, 7]]
[[test]]
name = "repeat17-crlf"
regex = '(?Rm)(?:$\n^)+'
haystack = "\n\naaa\n\n"
matches = [[0, 2], [5, 7]]
[[test]]
name = "repeat17-crlf-cr"
regex = '(?Rm)(?:$\r^)+'
haystack = "\r\raaa\r\r"
matches = [[0, 2], [5, 7]]
[[test]]
name = "repeat17-no-multi"
regex = '(?:$\n^)+'
haystack = "\n\naaa\n\n"
matches = []
[[test]]
name = "repeat17-no-multi-crlf"
regex = '(?R)(?:$\n^)+'
haystack = "\n\naaa\n\n"
matches = []
[[test]]
name = "repeat17-no-multi-crlf-cr"
regex = '(?R)(?:$\r^)+'
haystack = "\r\raaa\r\r"
matches = []
[[test]]
name = "repeat18"
regex = '(?m)(?:^|$)+'
haystack = "\n\naaa\n\n"
matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]]
[[test]]
name = "repeat18-crlf"
regex = '(?Rm)(?:^|$)+'
haystack = "\n\naaa\n\n"
matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]]
[[test]]
name = "repeat18-crlf-cr"
regex = '(?Rm)(?:^|$)+'
haystack = "\r\raaa\r\r"
matches = [[0, 0], [1, 1], [2, 2], [5, 5], [6, 6], [7, 7]]
[[test]]
name = "repeat18-no-multi"
regex = '(?:^|$)+'
haystack = "\n\naaa\n\n"
matches = [[0, 0], [7, 7]]
[[test]]
name = "repeat18-no-multi-crlf"
regex = '(?R)(?:^|$)+'
haystack = "\n\naaa\n\n"
matches = [[0, 0], [7, 7]]
[[test]]
name = "repeat18-no-multi-crlf-cr"
regex = '(?R)(?:^|$)+'
haystack = "\r\raaa\r\r"
matches = [[0, 0], [7, 7]]
[[test]]
name = "match-line-100"
regex = '(?m)^.+$'
haystack = "aa\naaaaaaaaaaaaaaaaaaa\n"
matches = [[0, 2], [3, 22]]
[[test]]
name = "match-line-100-crlf"
regex = '(?Rm)^.+$'
haystack = "aa\naaaaaaaaaaaaaaaaaaa\n"
matches = [[0, 2], [3, 22]]
[[test]]
name = "match-line-100-crlf-cr"
regex = '(?Rm)^.+$'
haystack = "aa\raaaaaaaaaaaaaaaaaaa\r"
matches = [[0, 2], [3, 22]]
[[test]]
name = "match-line-200"
regex = '(?m)^.+$'
haystack = "aa\naaaaaaaaaaaaaaaaaaa\n"
matches = [[0, 2], [3, 22]]
unicode = false
utf8 = false
[[test]]
name = "match-line-200-crlf"
regex = '(?Rm)^.+$'
haystack = "aa\naaaaaaaaaaaaaaaaaaa\n"
matches = [[0, 2], [3, 22]]
unicode = false
utf8 = false
[[test]]
name = "match-line-200-crlf-cr"
regex = '(?Rm)^.+$'
haystack = "aa\raaaaaaaaaaaaaaaaaaa\r"
matches = [[0, 2], [3, 22]]
unicode = false
utf8 = false

222
vendor/regex/testdata/no-unicode.toml vendored Normal file
View File

@@ -0,0 +1,222 @@
[[test]]
name = "invalid-utf8-literal1"
regex = '\xFF'
haystack = '\xFF'
matches = [[0, 1]]
unicode = false
utf8 = false
unescape = true
[[test]]
name = "mixed"
regex = '(?:.+)(?-u)(?:.+)'
haystack = '\xCE\x93\xCE\x94\xFF'
matches = [[0, 5]]
utf8 = false
unescape = true
[[test]]
name = "case1"
regex = "a"
haystack = "A"
matches = [[0, 1]]
case-insensitive = true
unicode = false
[[test]]
name = "case2"
regex = "[a-z]+"
haystack = "AaAaA"
matches = [[0, 5]]
case-insensitive = true
unicode = false
[[test]]
name = "case3"
regex = "[a-z]+"
haystack = "aA\u212AaA"
matches = [[0, 7]]
case-insensitive = true
[[test]]
name = "case4"
regex = "[a-z]+"
haystack = "aA\u212AaA"
matches = [[0, 2], [5, 7]]
case-insensitive = true
unicode = false
[[test]]
name = "negate1"
regex = "[^a]"
haystack = "δ"
matches = [[0, 2]]
[[test]]
name = "negate2"
regex = "[^a]"
haystack = "δ"
matches = [[0, 1], [1, 2]]
unicode = false
utf8 = false
[[test]]
name = "dotstar-prefix1"
regex = "a"
haystack = '\xFFa'
matches = [[1, 2]]
unicode = false
utf8 = false
unescape = true
[[test]]
name = "dotstar-prefix2"
regex = "a"
haystack = '\xFFa'
matches = [[1, 2]]
utf8 = false
unescape = true
[[test]]
name = "null-bytes1"
regex = '[^\x00]+\x00'
haystack = 'foo\x00'
matches = [[0, 4]]
unicode = false
utf8 = false
unescape = true
[[test]]
name = "word-ascii"
regex = '\w+'
haystack = "aδ"
matches = [[0, 1]]
unicode = false
[[test]]
name = "word-unicode"
regex = '\w+'
haystack = "aδ"
matches = [[0, 3]]
[[test]]
name = "decimal-ascii"
regex = '\d+'
haystack = "1२३9"
matches = [[0, 1], [7, 8]]
unicode = false
[[test]]
name = "decimal-unicode"
regex = '\d+'
haystack = "1२३9"
matches = [[0, 8]]
[[test]]
name = "space-ascii"
regex = '\s+'
haystack = " \u1680"
matches = [[0, 1]]
unicode = false
[[test]]
name = "space-unicode"
regex = '\s+'
haystack = " \u1680"
matches = [[0, 4]]
[[test]]
# See: https://github.com/rust-lang/regex/issues/484
name = "iter1-bytes"
regex = ''
haystack = "☃"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
utf8 = false
[[test]]
# See: https://github.com/rust-lang/regex/issues/484
name = "iter1-utf8"
regex = ''
haystack = "☃"
matches = [[0, 0], [3, 3]]
[[test]]
# See: https://github.com/rust-lang/regex/issues/484
# Note that iter2-utf8 doesn't make sense here, since the input isn't UTF-8.
name = "iter2-bytes"
regex = ''
haystack = 'b\xFFr'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
unescape = true
utf8 = false
# These test that unanchored prefixes can munch through invalid UTF-8 even when
# utf8 is enabled.
#
# This test actually reflects an interesting simplification in how the Thompson
# NFA is constructed. It used to be that the NFA could be built with an
# unanchored prefix that either matched any byte or _only_ matched valid UTF-8.
# But the latter turns out to be pretty precarious when it comes to prefilters,
# because if you search a haystack that contains invalid UTF-8 but have an
# unanchored prefix that requires UTF-8, then prefilters are no longer a valid
# optimization because you actually have to check that everything is valid
# UTF-8.
#
# Originally, I had thought that we needed a valid UTF-8 unanchored prefix in
# order to guarantee that we only match at valid UTF-8 boundaries. But this
# isn't actually true! There are really only two things to consider here:
#
# 1) Will a regex match split an encoded codepoint? No. Because by construction,
# we ensure that a MATCH state can only be reached by following valid UTF-8 (assuming
# all of the UTF-8 modes are enabled).
#
# 2) Will a regex match arbitrary bytes that aren't valid UTF-8? Again, no,
# assuming all of the UTF-8 modes are enabled.
[[test]]
name = "unanchored-invalid-utf8-match-100"
regex = '[a-z]'
haystack = '\xFFa\xFF'
matches = [[1, 2]]
unescape = true
utf8 = false
# This test shows that we can still prevent a match from occurring by requiring
# that valid UTF-8 match by inserting our own unanchored prefix. Thus, if the
# behavior of not munching through invalid UTF-8 anywhere is needed, then it
# can be achieved thusly.
[[test]]
name = "unanchored-invalid-utf8-nomatch"
regex = '^(?s:.)*?[a-z]'
haystack = '\xFFa\xFF'
matches = []
unescape = true
utf8 = false
# This is a tricky test that makes sure we don't accidentally do a kind of
# unanchored search when we've requested that a regex engine not report
# empty matches that split a codepoint. This test caught a regression during
# development where the code for skipping over bad empty matches would do so
# even if the search should have been anchored. This is ultimately what led to
# making 'anchored' an 'Input' option, so that it was always clear what kind
# of search was being performed. (Before that, whether a search was anchored
# or not was a config knob on the regex engine.) This did wind up making DFAs
# a little more complex to configure (with their 'StartKind' knob), but it
# generally smoothed out everything else.
#
# Great example of a test whose failure motivated a sweeping API refactoring.
[[test]]
name = "anchored-iter-empty-utf8"
regex = ''
haystack = 'a☃z'
matches = [[0, 0], [1, 1]]
unescape = false
utf8 = true
anchored = true

280
vendor/regex/testdata/overlapping.toml vendored Normal file
View File

@@ -0,0 +1,280 @@
# NOTE: We define a number of tests where the *match* kind is 'leftmost-first'
# but the *search* kind is 'overlapping'. This is a somewhat nonsensical
# combination and can produce odd results. Nevertheless, those results should
# be consistent so we test them here. (At the time of writing this note, I
# hadn't yet decided whether to make 'leftmost-first' with 'overlapping' result
# in unspecified behavior.)
# This demonstrates how a full overlapping search is obvious quadratic. This
# regex reports a match for every substring in the haystack.
[[test]]
name = "ungreedy-dotstar-matches-everything-100"
regex = [".*?"]
haystack = "zzz"
matches = [
{ id = 0, span = [0, 0] },
{ id = 0, span = [1, 1] },
{ id = 0, span = [0, 1] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [1, 2] },
{ id = 0, span = [0, 2] },
{ id = 0, span = [3, 3] },
{ id = 0, span = [2, 3] },
{ id = 0, span = [1, 3] },
{ id = 0, span = [0, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "greedy-dotstar-matches-everything-100"
regex = [".*"]
haystack = "zzz"
matches = [
{ id = 0, span = [0, 0] },
{ id = 0, span = [1, 1] },
{ id = 0, span = [0, 1] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [1, 2] },
{ id = 0, span = [0, 2] },
{ id = 0, span = [3, 3] },
{ id = 0, span = [2, 3] },
{ id = 0, span = [1, 3] },
{ id = 0, span = [0, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "repetition-plus-leftmost-first-100"
regex = 'a+'
haystack = "aaa"
matches = [[0, 1], [1, 2], [0, 2], [2, 3], [1, 3], [0, 3]]
match-kind = "leftmost-first"
search-kind = "overlapping"
[[test]]
name = "repetition-plus-leftmost-first-110"
regex = '☃+'
haystack = "☃☃☃"
matches = [[0, 3], [3, 6], [0, 6], [6, 9], [3, 9], [0, 9]]
match-kind = "leftmost-first"
search-kind = "overlapping"
[[test]]
name = "repetition-plus-all-100"
regex = 'a+'
haystack = "aaa"
matches = [[0, 1], [1, 2], [0, 2], [2, 3], [1, 3], [0, 3]]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "repetition-plus-all-110"
regex = '☃+'
haystack = "☃☃☃"
matches = [[0, 3], [3, 6], [0, 6], [6, 9], [3, 9], [0, 9]]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "repetition-plus-leftmost-first-200"
regex = '(abc)+'
haystack = "zzabcabczzabc"
matches = [
[[2, 5], [2, 5]],
[[5, 8], [5, 8]],
[[2, 8], [5, 8]],
]
match-kind = "leftmost-first"
search-kind = "overlapping"
[[test]]
name = "repetition-plus-all-200"
regex = '(abc)+'
haystack = "zzabcabczzabc"
matches = [
[[2, 5], [2, 5]],
[[5, 8], [5, 8]],
[[2, 8], [5, 8]],
[[10, 13], [10, 13]],
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "repetition-star-leftmost-first-100"
regex = 'a*'
haystack = "aaa"
matches = [
[0, 0],
[1, 1],
[0, 1],
[2, 2],
[1, 2],
[0, 2],
[3, 3],
[2, 3],
[1, 3],
[0, 3],
]
match-kind = "leftmost-first"
search-kind = "overlapping"
[[test]]
name = "repetition-star-all-100"
regex = 'a*'
haystack = "aaa"
matches = [
[0, 0],
[1, 1],
[0, 1],
[2, 2],
[1, 2],
[0, 2],
[3, 3],
[2, 3],
[1, 3],
[0, 3],
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "repetition-star-leftmost-first-200"
regex = '(abc)*'
haystack = "zzabcabczzabc"
matches = [
[[0, 0], []],
]
match-kind = "leftmost-first"
search-kind = "overlapping"
[[test]]
name = "repetition-star-all-200"
regex = '(abc)*'
haystack = "zzabcabczzabc"
matches = [
[[0, 0], []],
[[1, 1], []],
[[2, 2], []],
[[3, 3], []],
[[4, 4], []],
[[5, 5], []],
[[2, 5], [2, 5]],
[[6, 6], []],
[[7, 7], []],
[[8, 8], []],
[[5, 8], [5, 8]],
[[2, 8], [5, 8]],
[[9, 9], []],
[[10, 10], []],
[[11, 11], []],
[[12, 12], []],
[[13, 13], []],
[[10, 13], [10, 13]],
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "start-end-rep-leftmost-first"
regex = '(^$)*'
haystack = "abc"
matches = [
[[0, 0], []],
]
match-kind = "leftmost-first"
search-kind = "overlapping"
[[test]]
name = "start-end-rep-all"
regex = '(^$)*'
haystack = "abc"
matches = [
[[0, 0], []],
[[1, 1], []],
[[2, 2], []],
[[3, 3], []],
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "alt-leftmost-first-100"
regex = 'abc|a'
haystack = "zzabcazzaabc"
matches = [[2, 3], [2, 5]]
match-kind = "leftmost-first"
search-kind = "overlapping"
[[test]]
name = "alt-all-100"
regex = 'abc|a'
haystack = "zzabcazzaabc"
matches = [[2, 3], [2, 5], [5, 6], [8, 9], [9, 10], [9, 12]]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty-000"
regex = ""
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty-alt-000"
regex = "|b"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [1, 2], [3, 3]]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty-alt-010"
regex = "b|"
haystack = "abc"
matches = [[0, 0], [1, 1], [2, 2], [1, 2], [3, 3]]
match-kind = "all"
search-kind = "overlapping"
[[test]]
# See: https://github.com/rust-lang/regex/issues/484
name = "iter1-bytes"
regex = ''
haystack = "☃"
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
utf8 = false
match-kind = "all"
search-kind = "overlapping"
[[test]]
# See: https://github.com/rust-lang/regex/issues/484
name = "iter1-utf8"
regex = ''
haystack = "☃"
matches = [[0, 0], [3, 3]]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "iter1-incomplete-utf8"
regex = ''
haystack = '\xE2\x98' # incomplete snowman
matches = [[0, 0], [1, 1], [2, 2]]
match-kind = "all"
search-kind = "overlapping"
unescape = true
utf8 = false
[[test]]
name = "scratch"
regex = ['sam', 'samwise']
haystack = "samwise"
matches = [
{ id = 0, span = [0, 3] },
]
match-kind = "leftmost-first"
search-kind = "overlapping"

98
vendor/regex/testdata/regex-lite.toml vendored Normal file
View File

@@ -0,0 +1,98 @@
# These tests are specifically written to test the regex-lite crate. While it
# largely has the same semantics as the regex crate, there are some differences
# around Unicode support and UTF-8.
#
# To be clear, regex-lite supports far fewer patterns because of its lack of
# Unicode support, nested character classes and character class set operations.
# What we're talking about here are the patterns that both crates support but
# where the semantics might differ.
# regex-lite uses ASCII definitions for Perl character classes.
[[test]]
name = "perl-class-decimal"
regex = '\d'
haystack = '᠕'
matches = []
unicode = true
# regex-lite uses ASCII definitions for Perl character classes.
[[test]]
name = "perl-class-space"
regex = '\s'
haystack = "\u2000"
matches = []
unicode = true
# regex-lite uses ASCII definitions for Perl character classes.
[[test]]
name = "perl-class-word"
regex = '\w'
haystack = 'δ'
matches = []
unicode = true
# regex-lite uses the ASCII definition of word for word boundary assertions.
[[test]]
name = "word-boundary"
regex = '\b'
haystack = 'δ'
matches = []
unicode = true
# regex-lite uses the ASCII definition of word for negated word boundary
# assertions. But note that it should still not split codepoints!
[[test]]
name = "word-boundary-negated"
regex = '\B'
haystack = 'δ'
matches = [[0, 0], [2, 2]]
unicode = true
# While we're here, the empty regex---which matches at every
# position---shouldn't split a codepoint either.
[[test]]
name = "empty-no-split-codepoint"
regex = ''
haystack = '💩'
matches = [[0, 0], [4, 4]]
unicode = true
# A dot always matches a full codepoint.
[[test]]
name = "dot-always-matches-codepoint"
regex = '.'
haystack = '💩'
matches = [[0, 4]]
unicode = false
# A negated character class also always matches a full codepoint.
[[test]]
name = "negated-class-always-matches-codepoint"
regex = '[^a]'
haystack = '💩'
matches = [[0, 4]]
unicode = false
# regex-lite only supports ASCII-aware case insensitive matching.
[[test]]
name = "case-insensitive-is-ascii-only"
regex = 's'
haystack = 'ſ'
matches = []
unicode = true
case-insensitive = true
# Negated word boundaries shouldn't split a codepoint, but they will match
# between invalid UTF-8.
#
# This test is only valid for a 'bytes' API, but that doesn't (yet) exist in
# regex-lite. This can't happen in the main API because &str can't contain
# invalid UTF-8.
# [[test]]
# name = "word-boundary-invalid-utf8"
# regex = '\B'
# haystack = '\xFF\xFF\xFF\xFF'
# unescape = true
# matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
# unicode = true
# utf8 = false

830
vendor/regex/testdata/regression.toml vendored Normal file
View File

@@ -0,0 +1,830 @@
# See: https://github.com/rust-lang/regex/issues/48
[[test]]
name = "invalid-regex-no-crash-100"
regex = '(*)'
haystack = ""
matches = []
compiles = false
# See: https://github.com/rust-lang/regex/issues/48
[[test]]
name = "invalid-regex-no-crash-200"
regex = '(?:?)'
haystack = ""
matches = []
compiles = false
# See: https://github.com/rust-lang/regex/issues/48
[[test]]
name = "invalid-regex-no-crash-300"
regex = '(?)'
haystack = ""
matches = []
compiles = false
# See: https://github.com/rust-lang/regex/issues/48
[[test]]
name = "invalid-regex-no-crash-400"
regex = '*'
haystack = ""
matches = []
compiles = false
# See: https://github.com/rust-lang/regex/issues/75
[[test]]
name = "unsorted-binary-search-100"
regex = '(?i-u)[a_]+'
haystack = "A_"
matches = [[0, 2]]
# See: https://github.com/rust-lang/regex/issues/75
[[test]]
name = "unsorted-binary-search-200"
regex = '(?i-u)[A_]+'
haystack = "a_"
matches = [[0, 2]]
# See: https://github.com/rust-lang/regex/issues/76
[[test]]
name = "unicode-case-lower-nocase-flag"
regex = '(?i)\p{Ll}+'
haystack = "ΛΘΓΔα"
matches = [[0, 10]]
# See: https://github.com/rust-lang/regex/issues/99
[[test]]
name = "negated-char-class-100"
regex = '(?i)[^x]'
haystack = "x"
matches = []
# See: https://github.com/rust-lang/regex/issues/99
[[test]]
name = "negated-char-class-200"
regex = '(?i)[^x]'
haystack = "X"
matches = []
# See: https://github.com/rust-lang/regex/issues/101
[[test]]
name = "ascii-word-underscore"
regex = '[[:word:]]'
haystack = "_"
matches = [[0, 1]]
# See: https://github.com/rust-lang/regex/issues/129
[[test]]
name = "captures-repeat"
regex = '([a-f]){2}(?P<foo>[x-z])'
haystack = "abx"
matches = [
[[0, 3], [1, 2], [2, 3]],
]
# See: https://github.com/rust-lang/regex/issues/153
[[test]]
name = "alt-in-alt-100"
regex = 'ab?|$'
haystack = "az"
matches = [[0, 1], [2, 2]]
# See: https://github.com/rust-lang/regex/issues/153
[[test]]
name = "alt-in-alt-200"
regex = '^(?:.*?)(?:\n|\r\n?|$)'
haystack = "ab\rcd"
matches = [[0, 3]]
# See: https://github.com/rust-lang/regex/issues/169
[[test]]
name = "leftmost-first-prefix"
regex = 'z*azb'
haystack = "azb"
matches = [[0, 3]]
# See: https://github.com/rust-lang/regex/issues/191
[[test]]
name = "many-alternates"
regex = '1|2|3|4|5|6|7|8|9|10|int'
haystack = "int"
matches = [[0, 3]]
# See: https://github.com/rust-lang/regex/issues/204
[[test]]
name = "word-boundary-alone-100"
regex = '\b'
haystack = "Should this (work?)"
matches = [[0, 0], [6, 6], [7, 7], [11, 11], [13, 13], [17, 17]]
# See: https://github.com/rust-lang/regex/issues/204
[[test]]
name = "word-boundary-alone-200"
regex = '\b'
haystack = "a b c"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
# See: https://github.com/rust-lang/regex/issues/264
[[test]]
name = "word-boundary-ascii-no-capture"
regex = '\B'
haystack = "\U00028F3E"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
unicode = false
utf8 = false
# See: https://github.com/rust-lang/regex/issues/264
[[test]]
name = "word-boundary-ascii-capture"
regex = '(?:\B)'
haystack = "\U00028F3E"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
unicode = false
utf8 = false
# See: https://github.com/rust-lang/regex/issues/268
[[test]]
name = "partial-anchor"
regex = '^a|b'
haystack = "ba"
matches = [[0, 1]]
# See: https://github.com/rust-lang/regex/issues/271
[[test]]
name = "endl-or-word-boundary"
regex = '(?m:$)|(?-u:\b)'
haystack = "\U0006084E"
matches = [[4, 4]]
# See: https://github.com/rust-lang/regex/issues/271
[[test]]
name = "zero-or-end"
regex = '(?i-u:\x00)|$'
haystack = "\U000E682F"
matches = [[4, 4]]
# See: https://github.com/rust-lang/regex/issues/271
[[test]]
name = "y-or-endl"
regex = '(?i-u:y)|(?m:$)'
haystack = "\U000B4331"
matches = [[4, 4]]
# See: https://github.com/rust-lang/regex/issues/271
[[test]]
name = "word-boundary-start-x"
regex = '(?u:\b)^(?-u:X)'
haystack = "X"
matches = [[0, 1]]
# See: https://github.com/rust-lang/regex/issues/271
[[test]]
name = "word-boundary-ascii-start-x"
regex = '(?-u:\b)^(?-u:X)'
haystack = "X"
matches = [[0, 1]]
# See: https://github.com/rust-lang/regex/issues/271
[[test]]
name = "end-not-word-boundary"
regex = '$\B'
haystack = "\U0005C124\U000B576C"
matches = [[8, 8]]
unicode = false
utf8 = false
# See: https://github.com/rust-lang/regex/issues/280
[[test]]
name = "partial-anchor-alternate-begin"
regex = '^a|z'
haystack = "yyyyya"
matches = []
# See: https://github.com/rust-lang/regex/issues/280
[[test]]
name = "partial-anchor-alternate-end"
regex = 'a$|z'
haystack = "ayyyyy"
matches = []
# See: https://github.com/rust-lang/regex/issues/289
[[test]]
name = "lits-unambiguous-100"
regex = '(?:ABC|CDA|BC)X'
haystack = "CDAX"
matches = [[0, 4]]
# See: https://github.com/rust-lang/regex/issues/291
[[test]]
name = "lits-unambiguous-200"
regex = '((IMG|CAM|MG|MB2)_|(DSCN|CIMG))(?P<n>[0-9]+)$'
haystack = "CIMG2341"
matches = [
[[0, 8], [0, 4], [], [0, 4], [4, 8]],
]
# See: https://github.com/rust-lang/regex/issues/303
#
# 2022-09-19: This has now been "properly" fixed in that empty character
# classes are fully supported as something that can never match. This test
# used to be marked as 'compiles = false', but now it works.
[[test]]
name = "negated-full-byte-range"
regex = '[^\x00-\xFF]'
haystack = ""
matches = []
compiles = true
unicode = false
utf8 = false
# See: https://github.com/rust-lang/regex/issues/321
[[test]]
name = "strange-anchor-non-complete-prefix"
regex = 'a^{2}'
haystack = ""
matches = []
# See: https://github.com/rust-lang/regex/issues/321
[[test]]
name = "strange-anchor-non-complete-suffix"
regex = '${2}a'
haystack = ""
matches = []
# See: https://github.com/rust-lang/regex/issues/334
# See: https://github.com/rust-lang/regex/issues/557
[[test]]
name = "captures-after-dfa-premature-end-100"
regex = 'a(b*(X|$))?'
haystack = "abcbX"
matches = [
[[0, 1], [], []],
]
# See: https://github.com/rust-lang/regex/issues/334
# See: https://github.com/rust-lang/regex/issues/557
[[test]]
name = "captures-after-dfa-premature-end-200"
regex = 'a(bc*(X|$))?'
haystack = "abcbX"
matches = [
[[0, 1], [], []],
]
# See: https://github.com/rust-lang/regex/issues/334
# See: https://github.com/rust-lang/regex/issues/557
[[test]]
name = "captures-after-dfa-premature-end-300"
regex = '(aa$)?'
haystack = "aaz"
matches = [
[[0, 0], []],
[[1, 1], []],
[[2, 2], []],
[[3, 3], []],
]
# Plucked from "Why arent regular expressions a lingua franca? an empirical
# study on the re-use and portability of regular expressions", The ACM Joint
# European Software Engineering Conference and Symposium on the Foundations of
# Software Engineering (ESEC/FSE), 2019.
#
# Link: https://dl.acm.org/doi/pdf/10.1145/3338906.3338909
[[test]]
name = "captures-after-dfa-premature-end-400"
regex = '(a)\d*\.?\d+\b'
haystack = "a0.0c"
matches = [
[[0, 2], [0, 1]],
]
# See: https://github.com/rust-lang/regex/issues/437
[[test]]
name = "literal-panic"
regex = 'typename type\-parameter\-[0-9]+\-[0-9]+::.+'
haystack = "test"
matches = []
# See: https://github.com/rust-lang/regex/issues/527
[[test]]
name = "empty-flag-expr"
regex = '(?:(?:(?x)))'
haystack = ""
matches = [[0, 0]]
# See: https://github.com/rust-lang/regex/issues/533
#[[tests]]
#name = "blank-matches-nothing-between-space-and-tab"
#regex = '[[:blank:]]'
#input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F'
#match = false
#unescape = true
# See: https://github.com/rust-lang/regex/issues/533
#[[tests]]
#name = "blank-matches-nothing-between-space-and-tab-inverted"
#regex = '^[[:^blank:]]+$'
#input = '\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F'
#match = true
#unescape = true
# See: https://github.com/rust-lang/regex/issues/555
[[test]]
name = "invalid-repetition"
regex = '(?m){1,1}'
haystack = ""
matches = []
compiles = false
# See: https://github.com/rust-lang/regex/issues/640
[[test]]
name = "flags-are-unset"
regex = '(?:(?i)foo)|Bar'
haystack = "foo Foo bar Bar"
matches = [[0, 3], [4, 7], [12, 15]]
# Note that 'Ј' is not 'j', but cyrillic Je
# https://en.wikipedia.org/wiki/Je_(Cyrillic)
#
# See: https://github.com/rust-lang/regex/issues/659
[[test]]
name = "empty-group-with-unicode"
regex = '(?:)Ј01'
haystack = 'zЈ01'
matches = [[1, 5]]
# See: https://github.com/rust-lang/regex/issues/579
[[test]]
name = "word-boundary-weird"
regex = '\b..\b'
haystack = "I have 12, he has 2!"
matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]]
# See: https://github.com/rust-lang/regex/issues/579
[[test]]
name = "word-boundary-weird-ascii"
regex = '\b..\b'
haystack = "I have 12, he has 2!"
matches = [[0, 2], [7, 9], [9, 11], [11, 13], [17, 19]]
unicode = false
utf8 = false
# See: https://github.com/rust-lang/regex/issues/579
[[test]]
name = "word-boundary-weird-minimal-ascii"
regex = '\b..\b'
haystack = "az,,b"
matches = [[0, 2], [2, 4]]
unicode = false
utf8 = false
# See: https://github.com/BurntSushi/ripgrep/issues/1203
[[test]]
name = "reverse-suffix-100"
regex = '[0-4][0-4][0-4]000'
haystack = "153.230000"
matches = [[4, 10]]
# See: https://github.com/BurntSushi/ripgrep/issues/1203
[[test]]
name = "reverse-suffix-200"
regex = '[0-9][0-9][0-9]000'
haystack = "153.230000\n"
matches = [[4, 10]]
# This is a tricky case for the reverse suffix optimization, because it
# finds the 'foobar' match but the reverse scan must fail to find a match by
# correctly dealing with the word boundary following the 'foobar' literal when
# computing the start state.
#
# This test exists because I tried to break the following assumption that
# is currently in the code: that if a suffix is found and the reverse scan
# succeeds, then it's guaranteed that there is an overall match. Namely, the
# 'is_match' routine does *not* do another forward scan in this case because of
# this assumption.
[[test]]
name = "reverse-suffix-300"
regex = '\w+foobar\b'
haystack = "xyzfoobarZ"
matches = []
unicode = false
utf8 = false
# See: https://github.com/BurntSushi/ripgrep/issues/1247
[[test]]
name = "stops"
regex = '\bs(?:[ab])'
haystack = 's\xE4'
matches = []
unescape = true
utf8 = false
# See: https://github.com/BurntSushi/ripgrep/issues/1247
[[test]]
name = "stops-ascii"
regex = '(?-u:\b)s(?:[ab])'
haystack = 's\xE4'
matches = []
unescape = true
utf8 = false
# See: https://github.com/rust-lang/regex/issues/850
[[test]]
name = "adjacent-line-boundary-100"
regex = '(?m)^(?:[^ ]+?)$'
haystack = "line1\nline2"
matches = [[0, 5], [6, 11]]
# Continued.
[[test]]
name = "adjacent-line-boundary-200"
regex = '(?m)^(?:[^ ]+?)$'
haystack = "A\nB"
matches = [[0, 1], [2, 3]]
# There is no issue for this bug.
[[test]]
name = "anchored-prefix-100"
regex = '^a[[:^space:]]'
haystack = "a "
matches = []
# There is no issue for this bug.
[[test]]
name = "anchored-prefix-200"
regex = '^a[[:^space:]]'
haystack = "foo boo a"
matches = []
# There is no issue for this bug.
[[test]]
name = "anchored-prefix-300"
regex = '^-[a-z]'
haystack = "r-f"
matches = []
# Tests that a possible Aho-Corasick optimization works correctly. It only
# kicks in when we have a lot of literals. By "works correctly," we mean that
# leftmost-first match semantics are properly respected. That is, samwise
# should match, not sam.
#
# There is no issue for this bug.
[[test]]
name = "aho-corasick-100"
regex = 'samwise|sam|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z'
haystack = "samwise"
matches = [[0, 7]]
# See: https://github.com/rust-lang/regex/issues/921
[[test]]
name = "interior-anchor-capture"
regex = '(a$)b$'
haystack = 'ab'
matches = []
# I found this bug in the course of adding some of the regexes that Ruff uses
# to rebar. It turns out that the lazy DFA was finding a match that was being
# rejected by the one-pass DFA. Yikes. I then minimized the regex and haystack.
#
# Source: https://github.com/charliermarsh/ruff/blob/a919041ddaa64cdf6f216f90dd0480dab69fd3ba/crates/ruff/src/rules/pycodestyle/rules/whitespace_around_keywords.rs#L52
[[test]]
name = "ruff-whitespace-around-keywords"
regex = '^(a|ab)$'
haystack = "ab"
anchored = true
unicode = false
utf8 = true
matches = [[[0, 2], [0, 2]]]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-0"
regex = '(?:(?-u:\b)|(?u:h))+'
haystack = "h"
unicode = true
utf8 = false
matches = [[0, 0], [1, 1]]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-1"
regex = '(?u:\B)'
haystack = "鋸"
unicode = true
utf8 = false
matches = []
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-2"
regex = '(?:(?u:\b)|(?s-u:.))+'
haystack = "oB"
unicode = true
utf8 = false
matches = [[0, 0], [1, 2]]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-3"
regex = '(?:(?-u:\B)|(?su:.))+'
haystack = "\U000FEF80"
unicode = true
utf8 = false
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-3-utf8"
regex = '(?:(?-u:\B)|(?su:.))+'
haystack = "\U000FEF80"
unicode = true
utf8 = true
matches = [[0, 0], [4, 4]]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-4"
regex = '(?m:$)(?m:^)(?su:.)'
haystack = "\n‣"
unicode = true
utf8 = false
matches = [[0, 1]]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-5"
regex = '(?m:$)^(?m:^)'
haystack = "\n"
unicode = true
utf8 = false
matches = [[0, 0]]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-6"
regex = '(?P<kp>(?iu:do)(?m:$))*'
haystack = "dodo"
unicode = true
utf8 = false
matches = [
[[0, 0], []],
[[1, 1], []],
[[2, 4], [2, 4]],
]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-7"
regex = '(?u:\B)'
haystack = "䡁"
unicode = true
utf8 = false
matches = []
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-8"
regex = '(?:(?-u:\b)|(?u:[\u{0}-W]))+'
haystack = "0"
unicode = true
utf8 = false
matches = [[0, 0], [1, 1]]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-9"
regex = '((?m:$)(?-u:\B)(?s-u:.)(?-u:\B)$)'
haystack = "\n\n"
unicode = true
utf8 = false
matches = [
[[1, 2], [1, 2]],
]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-10"
regex = '(?m:$)(?m:$)^(?su:.)'
haystack = "\n\u0081¨\u200a"
unicode = true
utf8 = false
matches = [[0, 1]]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-11"
regex = '(?-u:\B)(?m:^)'
haystack = "0\n"
unicode = true
utf8 = false
matches = [[2, 2]]
# From: https://github.com/rust-lang/regex/issues/429
[[test]]
name = "i429-12"
regex = '(?:(?u:\b)|(?-u:.))+'
haystack = "0"
unicode = true
utf8 = false
matches = [[0, 0], [1, 1]]
# From: https://github.com/rust-lang/regex/issues/969
[[test]]
name = "i969"
regex = 'c.*d\z'
haystack = "ababcd"
bounds = [4, 6]
search-kind = "earliest"
matches = [[4, 6]]
# I found this during the regex-automata migration. This is the fowler basic
# 154 test, but without anchored = true and without a match limit.
#
# This test caught a subtle bug in the hybrid reverse DFA search, where it
# would skip over the termination condition if it entered a start state. This
# was a double bug. Firstly, the reverse DFA shouldn't have had start states
# specialized in the first place, and thus it shouldn't have possible to detect
# that the DFA had entered a start state. The second bug was that the start
# state handling was incorrect by jumping over the termination condition.
[[test]]
name = "fowler-basic154-unanchored"
regex = '''a([bc]*)c*'''
haystack = '''abc'''
matches = [[[0, 3], [1, 3]]]
# From: https://github.com/rust-lang/regex/issues/981
#
# This was never really a problem in the new architecture because the
# regex-automata engines are far more principled about how they deal with
# look-around. (This was one of the many reasons I wanted to re-work the
# original regex crate engines.)
[[test]]
name = "word-boundary-interact-poorly-with-literal-optimizations"
regex = '(?i:(?:\b|_)win(?:32|64|dows)?(?:\b|_))'
haystack = 'ubi-Darwin-x86_64.tar.gz'
matches = []
# This was found during fuzz testing of regex. It provoked a panic in the meta
# engine as a result of the reverse suffix optimization. Namely, it hit a case
# where a suffix match was found, a corresponding reverse match was found, but
# the forward search turned up no match. The forward search should always match
# if the suffix and reverse search match.
#
# This in turn uncovered an inconsistency between the PikeVM and the DFA (lazy
# and fully compiled) engines. It was caused by a mishandling of the collection
# of NFA state IDs in the generic determinization code (which is why both types
# of DFA were impacted). Namely, when a fail state was encountered (that's the
# `[^\s\S]` in the pattern below), then it would just stop collecting states.
# But that's not correct since a later state could lead to a match.
[[test]]
name = "impossible-branch"
regex = '.*[^\s\S]A|B'
haystack = "B"
matches = [[0, 1]]
# This was found during fuzz testing in regex-lite. The regex crate never
# suffered from this bug, but it causes regex-lite to incorrectly compile
# captures.
[[test]]
name = "captures-wrong-order"
regex = '(a){0}(a)'
haystack = 'a'
matches = [[[0, 1], [], [0, 1]]]
# This tests a bug in how quit states are handled in the DFA. At some point
# during development, the DFAs were tweaked slightly such that if they hit
# a quit state (which means, they hit a byte that the caller configured should
# stop the search), then it might not return an error necessarily. Namely, if a
# match had already been found, then it would be returned instead of an error.
#
# But this is actually wrong! Why? Because even though a match had been found,
# it wouldn't be fully correct to return it once a quit state has been seen
# because you can't determine whether the match offset returned is the correct
# greedy/leftmost-first match. Since you can't complete the search as requested
# by the caller, the DFA should just stop and return an error.
#
# Interestingly, this does seem to produce an unavoidable difference between
# 'try_is_match().unwrap()' and 'try_find().unwrap().is_some()' for the DFAs.
# The former will stop immediately once a match is known to occur and return
# 'Ok(true)', where as the latter could find the match but quit with an
# 'Err(..)' first.
#
# Thankfully, I believe this inconsistency between 'is_match()' and 'find()'
# cannot be observed in the higher level meta regex API because it specifically
# will try another engine that won't fail in the case of a DFA failing.
#
# This regression happened in the regex crate rewrite, but before anything got
# released.
[[test]]
name = "negated-unicode-word-boundary-dfa-fail"
regex = '\B.*'
haystack = "!\u02D7"
matches = [[0, 3]]
# This failure was found in the *old* regex crate (prior to regex 1.9), but
# I didn't investigate why. My best guess is that it's a literal optimization
# bug. It didn't occur in the rewrite.
[[test]]
name = "missed-match"
regex = 'e..+e.ee>'
haystack = 'Zeee.eZZZZZZZZeee>eeeeeee>'
matches = [[1, 26]]
# This test came from the 'ignore' crate and tripped a bug in how accelerated
# DFA states were handled in an overlapping search.
[[test]]
name = "regex-to-glob"
regex = ['(?-u)^path1/[^/]*$']
haystack = "path1/foo"
matches = [[0, 9]]
utf8 = false
match-kind = "all"
search-kind = "overlapping"
# See: https://github.com/rust-lang/regex/issues/1060
[[test]]
name = "reverse-inner-plus-shorter-than-expected"
regex = '(?:(\d+)[:.])?(\d{1,2})[:.](\d{2})'
haystack = '102:12:39'
matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]]
# Like reverse-inner-plus-shorter-than-expected, but using a far simpler regex
# to demonstrate the extent of the rot. Sigh.
#
# See: https://github.com/rust-lang/regex/issues/1060
[[test]]
name = "reverse-inner-short"
regex = '(?:([0-9][0-9][0-9]):)?([0-9][0-9]):([0-9][0-9])'
haystack = '102:12:39'
matches = [[[0, 9], [0, 3], [4, 6], [7, 9]]]
# This regression test was found via the RegexSet APIs. It triggered a
# particular code path where a regex was compiled with 'All' match semantics
# (to support overlapping search), but got funneled down into a standard
# leftmost search when calling 'is_match'. This is fine on its own, but the
# leftmost search will use a prefilter and that's where this went awry.
#
# Namely, since 'All' semantics were used, the aho-corasick prefilter was
# incorrectly compiled with 'Standard' semantics. This was wrong because
# 'Standard' immediately attempts to report a match at every position, even if
# that would mean reporting a match past the leftmost match before reporting
# the leftmost match. This breaks the prefilter contract of never having false
# negatives and leads overall to the engine not finding a match.
#
# See: https://github.com/rust-lang/regex/issues/1070
[[test]]
name = "prefilter-with-aho-corasick-standard-semantics"
regex = '(?m)^ *v [0-9]'
haystack = 'v 0'
matches = [
{ id = 0, spans = [[0, 3]] },
]
match-kind = "all"
search-kind = "overlapping"
unicode = true
utf8 = true
# This tests that the PikeVM and the meta regex agree on a particular regex.
# This test previously failed when the ad hoc engines inside the meta engine
# did not handle quit states correctly. Namely, the Unicode word boundary here
# combined with a non-ASCII codepoint provokes the quit state. The ad hoc
# engines were previously returning a match even after entering the quit state
# if a match had been previously detected, but this is incorrect. The reason
# is that if a quit state is found, then the search must give up *immediately*
# because it prevents the search from finding the "proper" leftmost-first
# match. If it instead returns a match that has been found, it risks reporting
# an improper match, as it did in this case.
#
# See: https://github.com/rust-lang/regex/issues/1046
[[test]]
name = "non-prefix-literal-quit-state"
regex = '.+\b\n'
haystack = "β77\n"
matches = [[0, 5]]
# This is a regression test for some errant HIR interval set operations that
# were made in the regex-syntax 0.8.0 release and then reverted in 0.8.1. The
# issue here is that the HIR produced from the regex had out-of-order ranges.
#
# See: https://github.com/rust-lang/regex/issues/1103
# Ref: https://github.com/rust-lang/regex/pull/1051
# Ref: https://github.com/rust-lang/regex/pull/1102
[[test]]
name = "hir-optimization-out-of-order-class"
regex = '^[[:alnum:]./-]+$'
haystack = "a-b"
matches = [[0, 3]]
# This is a regression test for an improper reverse suffix optimization. This
# occurred when I "broadened" the applicability of the optimization to include
# multiple possible literal suffixes instead of only sticking to a non-empty
# longest common suffix. It turns out that, at least given how the reverse
# suffix optimization works, we need to stick to the longest common suffix for
# now.
#
# See: https://github.com/rust-lang/regex/issues/1110
# See also: https://github.com/astral-sh/ruff/pull/7980
[[test]]
name = 'improper-reverse-suffix-optimization'
regex = '(\\N\{[^}]+})|([{}])'
haystack = 'hiya \N{snowman} bye'
matches = [[[5, 16], [5, 16], []]]

641
vendor/regex/testdata/set.toml vendored Normal file
View File

@@ -0,0 +1,641 @@
# Basic multi-regex tests.
[[test]]
name = "basic10"
regex = ["a", "a"]
haystack = "a"
matches = [
{ id = 0, span = [0, 1] },
{ id = 1, span = [0, 1] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic10-leftmost-first"
regex = ["a", "a"]
haystack = "a"
matches = [
{ id = 0, span = [0, 1] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
[[test]]
name = "basic20"
regex = ["a", "a"]
haystack = "ba"
matches = [
{ id = 0, span = [1, 2] },
{ id = 1, span = [1, 2] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic30"
regex = ["a", "b"]
haystack = "a"
matches = [
{ id = 0, span = [0, 1] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic40"
regex = ["a", "b"]
haystack = "b"
matches = [
{ id = 1, span = [0, 1] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic50"
regex = ["a|b", "b|a"]
haystack = "b"
matches = [
{ id = 0, span = [0, 1] },
{ id = 1, span = [0, 1] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic60"
regex = ["foo", "oo"]
haystack = "foo"
matches = [
{ id = 0, span = [0, 3] },
{ id = 1, span = [1, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic60-leftmost-first"
regex = ["foo", "oo"]
haystack = "foo"
matches = [
{ id = 0, span = [0, 3] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
[[test]]
name = "basic61"
regex = ["oo", "foo"]
haystack = "foo"
matches = [
{ id = 1, span = [0, 3] },
{ id = 0, span = [1, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic61-leftmost-first"
regex = ["oo", "foo"]
haystack = "foo"
matches = [
{ id = 1, span = [0, 3] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
[[test]]
name = "basic70"
regex = ["abcd", "bcd", "cd", "d"]
haystack = "abcd"
matches = [
{ id = 0, span = [0, 4] },
{ id = 1, span = [1, 4] },
{ id = 2, span = [2, 4] },
{ id = 3, span = [3, 4] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic71"
regex = ["bcd", "cd", "d", "abcd"]
haystack = "abcd"
matches = [
{ id = 3, span = [0, 4] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
[[test]]
name = "basic80"
regex = ["^foo", "bar$"]
haystack = "foo"
matches = [
{ id = 0, span = [0, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic81"
regex = ["^foo", "bar$"]
haystack = "foo bar"
matches = [
{ id = 0, span = [0, 3] },
{ id = 1, span = [4, 7] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic82"
regex = ["^foo", "bar$"]
haystack = "bar"
matches = [
{ id = 1, span = [0, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic90"
regex = ["[a-z]+$", "foo"]
haystack = "01234 foo"
matches = [
{ id = 0, span = [8, 9] },
{ id = 0, span = [7, 9] },
{ id = 0, span = [6, 9] },
{ id = 1, span = [6, 9] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic91"
regex = ["[a-z]+$", "foo"]
haystack = "foo 01234"
matches = [
{ id = 1, span = [0, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic100"
regex = [".*?", "a"]
haystack = "zzza"
matches = [
{ id = 0, span = [0, 0] },
{ id = 0, span = [1, 1] },
{ id = 0, span = [0, 1] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [1, 2] },
{ id = 0, span = [0, 2] },
{ id = 0, span = [3, 3] },
{ id = 0, span = [2, 3] },
{ id = 0, span = [1, 3] },
{ id = 0, span = [0, 3] },
{ id = 0, span = [4, 4] },
{ id = 0, span = [3, 4] },
{ id = 0, span = [2, 4] },
{ id = 0, span = [1, 4] },
{ id = 0, span = [0, 4] },
{ id = 1, span = [3, 4] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic101"
regex = [".*", "a"]
haystack = "zzza"
matches = [
{ id = 0, span = [0, 0] },
{ id = 0, span = [1, 1] },
{ id = 0, span = [0, 1] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [1, 2] },
{ id = 0, span = [0, 2] },
{ id = 0, span = [3, 3] },
{ id = 0, span = [2, 3] },
{ id = 0, span = [1, 3] },
{ id = 0, span = [0, 3] },
{ id = 0, span = [4, 4] },
{ id = 0, span = [3, 4] },
{ id = 0, span = [2, 4] },
{ id = 0, span = [1, 4] },
{ id = 0, span = [0, 4] },
{ id = 1, span = [3, 4] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic102"
regex = [".*", "a"]
haystack = "zzz"
matches = [
{ id = 0, span = [0, 0] },
{ id = 0, span = [1, 1] },
{ id = 0, span = [0, 1] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [1, 2] },
{ id = 0, span = [0, 2] },
{ id = 0, span = [3, 3] },
{ id = 0, span = [2, 3] },
{ id = 0, span = [1, 3] },
{ id = 0, span = [0, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic110"
regex = ['\ba\b']
haystack = "hello a bye"
matches = [
{ id = 0, span = [6, 7] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic111"
regex = ['\ba\b', '\be\b']
haystack = "hello a bye e"
matches = [
{ id = 0, span = [6, 7] },
{ id = 1, span = [12, 13] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic120"
regex = ["a"]
haystack = "a"
matches = [
{ id = 0, span = [0, 1] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic121"
regex = [".*a"]
haystack = "a"
matches = [
{ id = 0, span = [0, 1] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic122"
regex = [".*a", "β"]
haystack = "β"
matches = [
{ id = 1, span = [0, 2] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "basic130"
regex = ["ab", "b"]
haystack = "ba"
matches = [
{ id = 1, span = [0, 1] },
]
match-kind = "all"
search-kind = "overlapping"
# These test cases where one of the regexes matches the empty string.
[[test]]
name = "empty10"
regex = ["", "a"]
haystack = "abc"
matches = [
{ id = 0, span = [0, 0] },
{ id = 1, span = [0, 1] },
{ id = 0, span = [1, 1] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [3, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty10-leftmost-first"
regex = ["", "a"]
haystack = "abc"
matches = [
{ id = 0, span = [0, 0] },
{ id = 0, span = [1, 1] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [3, 3] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
[[test]]
name = "empty11"
regex = ["a", ""]
haystack = "abc"
matches = [
{ id = 1, span = [0, 0] },
{ id = 0, span = [0, 1] },
{ id = 1, span = [1, 1] },
{ id = 1, span = [2, 2] },
{ id = 1, span = [3, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty11-leftmost-first"
regex = ["a", ""]
haystack = "abc"
matches = [
{ id = 0, span = [0, 1] },
{ id = 1, span = [2, 2] },
{ id = 1, span = [3, 3] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
[[test]]
name = "empty20"
regex = ["", "b"]
haystack = "abc"
matches = [
{ id = 0, span = [0, 0] },
{ id = 0, span = [1, 1] },
{ id = 1, span = [1, 2] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [3, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty20-leftmost-first"
regex = ["", "b"]
haystack = "abc"
matches = [
{ id = 0, span = [0, 0] },
{ id = 0, span = [1, 1] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [3, 3] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
[[test]]
name = "empty21"
regex = ["b", ""]
haystack = "abc"
matches = [
{ id = 1, span = [0, 0] },
{ id = 1, span = [1, 1] },
{ id = 0, span = [1, 2] },
{ id = 1, span = [2, 2] },
{ id = 1, span = [3, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty21-leftmost-first"
regex = ["b", ""]
haystack = "abc"
matches = [
{ id = 1, span = [0, 0] },
{ id = 0, span = [1, 2] },
{ id = 1, span = [3, 3] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
[[test]]
name = "empty22"
regex = ["(?:)", "b"]
haystack = "abc"
matches = [
{ id = 0, span = [0, 0] },
{ id = 0, span = [1, 1] },
{ id = 1, span = [1, 2] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [3, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty23"
regex = ["b", "(?:)"]
haystack = "abc"
matches = [
{ id = 1, span = [0, 0] },
{ id = 1, span = [1, 1] },
{ id = 0, span = [1, 2] },
{ id = 1, span = [2, 2] },
{ id = 1, span = [3, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty30"
regex = ["", "z"]
haystack = "abc"
matches = [
{ id = 0, span = [0, 0] },
{ id = 0, span = [1, 1] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [3, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty30-leftmost-first"
regex = ["", "z"]
haystack = "abc"
matches = [
{ id = 0, span = [0, 0] },
{ id = 0, span = [1, 1] },
{ id = 0, span = [2, 2] },
{ id = 0, span = [3, 3] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
[[test]]
name = "empty31"
regex = ["z", ""]
haystack = "abc"
matches = [
{ id = 1, span = [0, 0] },
{ id = 1, span = [1, 1] },
{ id = 1, span = [2, 2] },
{ id = 1, span = [3, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty31-leftmost-first"
regex = ["z", ""]
haystack = "abc"
matches = [
{ id = 1, span = [0, 0] },
{ id = 1, span = [1, 1] },
{ id = 1, span = [2, 2] },
{ id = 1, span = [3, 3] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
[[test]]
name = "empty40"
regex = ["c(?:)", "b"]
haystack = "abc"
matches = [
{ id = 1, span = [1, 2] },
{ id = 0, span = [2, 3] },
]
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "empty40-leftmost-first"
regex = ["c(?:)", "b"]
haystack = "abc"
matches = [
{ id = 1, span = [1, 2] },
{ id = 0, span = [2, 3] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
# These test cases where there are no matches.
[[test]]
name = "nomatch10"
regex = ["a", "a"]
haystack = "b"
matches = []
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "nomatch20"
regex = ["^foo", "bar$"]
haystack = "bar foo"
matches = []
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "nomatch30"
regex = []
haystack = "a"
matches = []
match-kind = "all"
search-kind = "overlapping"
[[test]]
name = "nomatch40"
regex = ["^rooted$", '\.log$']
haystack = "notrooted"
matches = []
match-kind = "all"
search-kind = "overlapping"
# These test multi-regex searches with capture groups.
#
# NOTE: I wrote these tests in the course of developing a first class API for
# overlapping capturing group matches, but ultimately removed that API because
# the semantics for overlapping matches aren't totally clear. However, I've
# left the tests because I believe the semantics for these patterns are clear
# and because we can still test our "which patterns matched" APIs with them.
[[test]]
name = "caps-010"
regex = ['^(\w+) (\w+)$', '^(\S+) (\S+)$']
haystack = "Bruce Springsteen"
matches = [
{ id = 0, spans = [[0, 17], [0, 5], [6, 17]] },
{ id = 1, spans = [[0, 17], [0, 5], [6, 17]] },
]
match-kind = "all"
search-kind = "overlapping"
unicode = false
utf8 = false
[[test]]
name = "caps-020"
regex = ['^(\w+) (\w+)$', '^[A-Z](\S+) [A-Z](\S+)$']
haystack = "Bruce Springsteen"
matches = [
{ id = 0, spans = [[0, 17], [0, 5], [6, 17]] },
{ id = 1, spans = [[0, 17], [1, 5], [7, 17]] },
]
match-kind = "all"
search-kind = "overlapping"
unicode = false
utf8 = false
[[test]]
name = "caps-030"
regex = ['^(\w+) (\w+)$', '^([A-Z])(\S+) ([A-Z])(\S+)$']
haystack = "Bruce Springsteen"
matches = [
{ id = 0, spans = [[0, 17], [0, 5], [6, 17]] },
{ id = 1, spans = [[0, 17], [0, 1], [1, 5], [6, 7], [7, 17]] },
]
match-kind = "all"
search-kind = "overlapping"
unicode = false
utf8 = false
[[test]]
name = "caps-110"
regex = ['(\w+) (\w+)', '(\S+) (\S+)']
haystack = "Bruce Springsteen"
matches = [
{ id = 0, spans = [[0, 17], [0, 5], [6, 17]] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
unicode = false
utf8 = false
[[test]]
name = "caps-120"
regex = ['(\w+) (\w+)', '(\S+) (\S+)']
haystack = "&ruce $pringsteen"
matches = [
{ id = 1, spans = [[0, 17], [0, 5], [6, 17]] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
unicode = false
utf8 = false
[[test]]
name = "caps-121"
regex = ['(\w+) (\w+)', '(\S+) (\S+)']
haystack = "&ruce $pringsteen Foo Bar"
matches = [
{ id = 1, spans = [[0, 17], [0, 5], [6, 17]] },
{ id = 0, spans = [[18, 25], [18, 21], [22, 25]] },
]
match-kind = "leftmost-first"
search-kind = "leftmost"
unicode = false
utf8 = false

36
vendor/regex/testdata/substring.toml vendored Normal file
View File

@@ -0,0 +1,36 @@
# These tests check that regex engines perform as expected when the search is
# instructed to only search a substring of a haystack instead of the entire
# haystack. This tends to exercise interesting edge cases that are otherwise
# difficult to provoke. (But not necessarily impossible. Regex search iterators
# for example, make use of the "search just a substring" APIs by changing the
# starting position of a search to the end position of the previous match.)
[[test]]
name = "unicode-word-start"
regex = '\b[0-9]+\b'
haystack = "β123"
bounds = { start = 2, end = 5 }
matches = []
[[test]]
name = "unicode-word-end"
regex = '\b[0-9]+\b'
haystack = "123β"
bounds = { start = 0, end = 3 }
matches = []
[[test]]
name = "ascii-word-start"
regex = '\b[0-9]+\b'
haystack = "β123"
bounds = { start = 2, end = 5 }
matches = [[2, 5]]
unicode = false
[[test]]
name = "ascii-word-end"
regex = '\b[0-9]+\b'
haystack = "123β"
bounds = { start = 0, end = 3 }
matches = [[0, 3]]
unicode = false

517
vendor/regex/testdata/unicode.toml vendored Normal file
View File

@@ -0,0 +1,517 @@
# Basic Unicode literal support.
[[test]]
name = "literal1"
regex = '☃'
haystack = "☃"
matches = [[0, 3]]
[[test]]
name = "literal2"
regex = '☃+'
haystack = "☃"
matches = [[0, 3]]
[[test]]
name = "literal3"
regex = '☃+'
haystack = "☃"
matches = [[0, 3]]
case-insensitive = true
[[test]]
name = "literal4"
regex = 'Δ'
haystack = "δ"
matches = [[0, 2]]
case-insensitive = true
# Unicode word boundaries.
[[test]]
name = "wb-100"
regex = '\d\b'
haystack = "6δ"
matches = []
[[test]]
name = "wb-200"
regex = '\d\b'
haystack = "6"
matches = [[0, 1]]
[[test]]
name = "wb-300"
regex = '\d\B'
haystack = "6δ"
matches = [[0, 1]]
[[test]]
name = "wb-400"
regex = '\d\B'
haystack = "6"
matches = []
# Unicode character class support.
[[test]]
name = "class1"
regex = '[☃Ⅰ]+'
haystack = "☃"
matches = [[0, 3]]
[[test]]
name = "class2"
regex = '\pN'
haystack = ""
matches = [[0, 3]]
[[test]]
name = "class3"
regex = '\pN+'
haystack = "1Ⅱ2"
matches = [[0, 8]]
[[test]]
name = "class4"
regex = '\PN+'
haystack = "ab"
matches = [[0, 2]]
[[test]]
name = "class5"
regex = '[\PN]+'
haystack = "ab"
matches = [[0, 2]]
[[test]]
name = "class6"
regex = '[^\PN]+'
haystack = "ab"
matches = [[2, 5]]
[[test]]
name = "class7"
regex = '\p{Lu}+'
haystack = "ΛΘΓΔα"
matches = [[0, 8]]
[[test]]
name = "class8"
regex = '\p{Lu}+'
haystack = "ΛΘΓΔα"
matches = [[0, 10]]
case-insensitive = true
[[test]]
name = "class9"
regex = '\pL+'
haystack = "ΛΘΓΔα"
matches = [[0, 10]]
[[test]]
name = "class10"
regex = '\p{Ll}+'
haystack = "ΛΘΓΔα"
matches = [[8, 10]]
# Unicode aware "Perl" character classes.
[[test]]
name = "perl1"
regex = '\w+'
haystack = "dδd"
matches = [[0, 4]]
[[test]]
name = "perl2"
regex = '\w+'
haystack = "⥡"
matches = []
[[test]]
name = "perl3"
regex = '\W+'
haystack = "⥡"
matches = [[0, 3]]
[[test]]
name = "perl4"
regex = '\d+'
haystack = "1२३9"
matches = [[0, 8]]
[[test]]
name = "perl5"
regex = '\d+'
haystack = "Ⅱ"
matches = []
[[test]]
name = "perl6"
regex = '\D+'
haystack = "Ⅱ"
matches = [[0, 3]]
[[test]]
name = "perl7"
regex = '\s+'
haystack = ""
matches = [[0, 3]]
[[test]]
name = "perl8"
regex = '\s+'
haystack = "☃"
matches = []
[[test]]
name = "perl9"
regex = '\S+'
haystack = "☃"
matches = [[0, 3]]
# Specific tests for Unicode general category classes.
[[test]]
name = "class-gencat1"
regex = '\p{Cased_Letter}'
haystack = ""
matches = [[0, 3]]
[[test]]
name = "class-gencat2"
regex = '\p{Close_Punctuation}'
haystack = ""
matches = [[0, 3]]
[[test]]
name = "class-gencat3"
regex = '\p{Connector_Punctuation}'
haystack = "⁀"
matches = [[0, 3]]
[[test]]
name = "class-gencat4"
regex = '\p{Control}'
haystack = "\u009F"
matches = [[0, 2]]
[[test]]
name = "class-gencat5"
regex = '\p{Currency_Symbol}'
haystack = "£"
matches = [[0, 3]]
[[test]]
name = "class-gencat6"
regex = '\p{Dash_Punctuation}'
haystack = "〰"
matches = [[0, 3]]
[[test]]
name = "class-gencat7"
regex = '\p{Decimal_Number}'
haystack = "𑓙"
matches = [[0, 4]]
[[test]]
name = "class-gencat8"
regex = '\p{Enclosing_Mark}'
haystack = "\uA672"
matches = [[0, 3]]
[[test]]
name = "class-gencat9"
regex = '\p{Final_Punctuation}'
haystack = "⸡"
matches = [[0, 3]]
[[test]]
name = "class-gencat10"
regex = '\p{Format}'
haystack = "\U000E007F"
matches = [[0, 4]]
[[test]]
name = "class-gencat11"
regex = '\p{Initial_Punctuation}'
haystack = "⸜"
matches = [[0, 3]]
[[test]]
name = "class-gencat12"
regex = '\p{Letter}'
haystack = "Έ"
matches = [[0, 2]]
[[test]]
name = "class-gencat13"
regex = '\p{Letter_Number}'
haystack = "ↂ"
matches = [[0, 3]]
[[test]]
name = "class-gencat14"
regex = '\p{Line_Separator}'
haystack = "\u2028"
matches = [[0, 3]]
[[test]]
name = "class-gencat15"
regex = '\p{Lowercase_Letter}'
haystack = "ϛ"
matches = [[0, 2]]
[[test]]
name = "class-gencat16"
regex = '\p{Mark}'
haystack = "\U000E01EF"
matches = [[0, 4]]
[[test]]
name = "class-gencat17"
regex = '\p{Math}'
haystack = ""
matches = [[0, 3]]
[[test]]
name = "class-gencat18"
regex = '\p{Modifier_Letter}'
haystack = "𖭃"
matches = [[0, 4]]
[[test]]
name = "class-gencat19"
regex = '\p{Modifier_Symbol}'
haystack = "🏿"
matches = [[0, 4]]
[[test]]
name = "class-gencat20"
regex = '\p{Nonspacing_Mark}'
haystack = "\U0001E94A"
matches = [[0, 4]]
[[test]]
name = "class-gencat21"
regex = '\p{Number}'
haystack = "⓿"
matches = [[0, 3]]
[[test]]
name = "class-gencat22"
regex = '\p{Open_Punctuation}'
haystack = "⦅"
matches = [[0, 3]]
[[test]]
name = "class-gencat23"
regex = '\p{Other}'
haystack = "\u0BC9"
matches = [[0, 3]]
[[test]]
name = "class-gencat24"
regex = '\p{Other_Letter}'
haystack = "ꓷ"
matches = [[0, 3]]
[[test]]
name = "class-gencat25"
regex = '\p{Other_Number}'
haystack = "㉏"
matches = [[0, 3]]
[[test]]
name = "class-gencat26"
regex = '\p{Other_Punctuation}'
haystack = "𞥞"
matches = [[0, 4]]
[[test]]
name = "class-gencat27"
regex = '\p{Other_Symbol}'
haystack = "⅌"
matches = [[0, 3]]
[[test]]
name = "class-gencat28"
regex = '\p{Paragraph_Separator}'
haystack = "\u2029"
matches = [[0, 3]]
[[test]]
name = "class-gencat29"
regex = '\p{Private_Use}'
haystack = "\U0010FFFD"
matches = [[0, 4]]
[[test]]
name = "class-gencat30"
regex = '\p{Punctuation}'
haystack = "𑁍"
matches = [[0, 4]]
[[test]]
name = "class-gencat31"
regex = '\p{Separator}'
haystack = "\u3000"
matches = [[0, 3]]
[[test]]
name = "class-gencat32"
regex = '\p{Space_Separator}'
haystack = "\u205F"
matches = [[0, 3]]
[[test]]
name = "class-gencat33"
regex = '\p{Spacing_Mark}'
haystack = "\U00016F7E"
matches = [[0, 4]]
[[test]]
name = "class-gencat34"
regex = '\p{Symbol}'
haystack = "⯈"
matches = [[0, 3]]
[[test]]
name = "class-gencat35"
regex = '\p{Titlecase_Letter}'
haystack = "ῼ"
matches = [[0, 3]]
[[test]]
name = "class-gencat36"
regex = '\p{Unassigned}'
haystack = "\U0010FFFF"
matches = [[0, 4]]
[[test]]
name = "class-gencat37"
regex = '\p{Uppercase_Letter}'
haystack = "Ꝋ"
matches = [[0, 3]]
# Tests for Unicode emoji properties.
[[test]]
name = "class-emoji1"
regex = '\p{Emoji}'
haystack = "\u23E9"
matches = [[0, 3]]
[[test]]
name = "class-emoji2"
regex = '\p{emoji}'
haystack = "\U0001F21A"
matches = [[0, 4]]
[[test]]
name = "class-emoji3"
regex = '\p{extendedpictographic}'
haystack = "\U0001FA6E"
matches = [[0, 4]]
[[test]]
name = "class-emoji4"
regex = '\p{extendedpictographic}'
haystack = "\U0001FFFD"
matches = [[0, 4]]
# Tests for Unicode grapheme cluster properties.
[[test]]
name = "class-gcb1"
regex = '\p{grapheme_cluster_break=prepend}'
haystack = "\U00011D46"
matches = [[0, 4]]
[[test]]
name = "class-gcb2"
regex = '\p{gcb=regional_indicator}'
haystack = "\U0001F1E6"
matches = [[0, 4]]
[[test]]
name = "class-gcb3"
regex = '\p{gcb=ri}'
haystack = "\U0001F1E7"
matches = [[0, 4]]
[[test]]
name = "class-gcb4"
regex = '\p{regionalindicator}'
haystack = "\U0001F1FF"
matches = [[0, 4]]
[[test]]
name = "class-gcb5"
regex = '\p{gcb=lvt}'
haystack = "\uC989"
matches = [[0, 3]]
[[test]]
name = "class-gcb6"
regex = '\p{gcb=zwj}'
haystack = "\u200D"
matches = [[0, 3]]
# Tests for Unicode word boundary properties.
[[test]]
name = "class-word-break1"
regex = '\p{word_break=Hebrew_Letter}'
haystack = "\uFB46"
matches = [[0, 3]]
[[test]]
name = "class-word-break2"
regex = '\p{wb=hebrewletter}'
haystack = "\uFB46"
matches = [[0, 3]]
[[test]]
name = "class-word-break3"
regex = '\p{wb=ExtendNumLet}'
haystack = "\uFF3F"
matches = [[0, 3]]
[[test]]
name = "class-word-break4"
regex = '\p{wb=WSegSpace}'
haystack = "\u3000"
matches = [[0, 3]]
[[test]]
name = "class-word-break5"
regex = '\p{wb=numeric}'
haystack = "\U0001E950"
matches = [[0, 4]]
# Tests for Unicode sentence boundary properties.
[[test]]
name = "class-sentence-break1"
regex = '\p{sentence_break=Lower}'
haystack = "\u0469"
matches = [[0, 2]]
[[test]]
name = "class-sentence-break2"
regex = '\p{sb=lower}'
haystack = "\u0469"
matches = [[0, 2]]
[[test]]
name = "class-sentence-break3"
regex = '\p{sb=Close}'
haystack = "\uFF60"
matches = [[0, 3]]
[[test]]
name = "class-sentence-break4"
regex = '\p{sb=Close}'
haystack = "\U0001F677"
matches = [[0, 4]]
[[test]]
name = "class-sentence-break5"
regex = '\p{sb=SContinue}'
haystack = "\uFF64"
matches = [[0, 3]]

399
vendor/regex/testdata/utf8.toml vendored Normal file
View File

@@ -0,0 +1,399 @@
# These test the UTF-8 modes expose by regex-automata. Namely, when utf8 is
# true, then we promise that the haystack is valid UTF-8. (Otherwise behavior
# is unspecified.) This also corresponds to building the regex engine with the
# following two guarantees:
#
# 1) For any non-empty match reported, its span is guaranteed to correspond to
# valid UTF-8.
# 2) All empty or zero-width matches reported must never split a UTF-8
# encoded codepoint. If the haystack has invalid UTF-8, then this results in
# unspecified behavior.
#
# The (2) is in particular what we focus our testing on since (1) is generally
# guaranteed by regex-syntax's AST-to-HIR translator and is well tested there.
# The thing with (2) is that it can't be described in the HIR, so the regex
# engines have to handle that case. Thus, we test it here.
#
# Note that it is possible to build a regex that has property (1) but not
# (2), and vice versa. This is done by building the HIR with 'utf8=true' but
# building the Thompson NFA with 'utf8=false'. We don't test that here because
# the harness doesn't expose a way to enable or disable UTF-8 mode with that
# granularity. Instead, those combinations are lightly tested via doc examples.
# That's not to say that (1) without (2) is uncommon. Indeed, ripgrep uses it
# because it cannot guarantee that its haystack is valid UTF-8.
# This tests that an empty regex doesn't split a codepoint.
[[test]]
name = "empty-utf8yes"
regex = ''
haystack = '☃'
matches = [[0, 0], [3, 3]]
unicode = true
utf8 = true
# Tests the overlapping case of the above.
[[test]]
name = "empty-utf8yes-overlapping"
regex = ''
haystack = '☃'
matches = [[0, 0], [3, 3]]
unicode = true
utf8 = true
match-kind = "all"
search-kind = "overlapping"
# This tests that an empty regex DOES split a codepoint when utf=false.
[[test]]
name = "empty-utf8no"
regex = ''
haystack = '☃'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
unicode = true
utf8 = false
# Tests the overlapping case of the above.
[[test]]
name = "empty-utf8no-overlapping"
regex = ''
haystack = '☃'
matches = [[0, 0], [1, 1], [2, 2], [3, 3]]
unicode = true
utf8 = false
match-kind = "all"
search-kind = "overlapping"
# This tests that an empty regex doesn't split a codepoint, even if we give
# it bounds entirely within the codepoint.
#
# This is one of the trickier cases and is what motivated the current UTF-8
# mode design. In particular, at one point, this test failed the 'is_match'
# variant of the test but not 'find'. This is because the 'is_match' code path
# is specifically optimized for "was a match found" rather than "where is the
# match." In the former case, you don't really care about the empty-vs-non-empty
# matches, and thus, the codepoint splitting filtering logic wasn't getting
# applied. (In multiple ways across multiple regex engines.) In this way, you
# can wind up with a situation where 'is_match' says "yes," but 'find' says,
# "I didn't find anything." Which is... not great.
#
# I could have decided to say that providing boundaries that themselves split
# a codepoint would have unspecified behavior. But I couldn't quite convince
# myself that such boundaries were the only way to get an inconsistency between
# 'is_match' and 'find'.
#
# Note that I also tried to come up with a test like this that fails without
# using `bounds`. Specifically, a test where 'is_match' and 'find' disagree.
# But I couldn't do it, and I'm tempted to conclude it is impossible. The
# fundamental problem is that you need to simultaneously produce an empty match
# that splits a codepoint while *not* matching before or after the codepoint.
[[test]]
name = "empty-utf8yes-bounds"
regex = ''
haystack = '𝛃'
bounds = [1, 3]
matches = []
unicode = true
utf8 = true
# Tests the overlapping case of the above.
[[test]]
name = "empty-utf8yes-bounds-overlapping"
regex = ''
haystack = '𝛃'
bounds = [1, 3]
matches = []
unicode = true
utf8 = true
match-kind = "all"
search-kind = "overlapping"
# This tests that an empty regex splits a codepoint when the bounds are
# entirely within the codepoint.
[[test]]
name = "empty-utf8no-bounds"
regex = ''
haystack = '𝛃'
bounds = [1, 3]
matches = [[1, 1], [2, 2], [3, 3]]
unicode = true
utf8 = false
# Tests the overlapping case of the above.
[[test]]
name = "empty-utf8no-bounds-overlapping"
regex = ''
haystack = '𝛃'
bounds = [1, 3]
matches = [[1, 1], [2, 2], [3, 3]]
unicode = true
utf8 = false
match-kind = "all"
search-kind = "overlapping"
# In this test, we anchor the search. Since the start position is also a UTF-8
# boundary, we get a match.
[[test]]
name = "empty-utf8yes-anchored"
regex = ''
haystack = '𝛃'
matches = [[0, 0]]
anchored = true
unicode = true
utf8 = true
# Tests the overlapping case of the above.
[[test]]
name = "empty-utf8yes-anchored-overlapping"
regex = ''
haystack = '𝛃'
matches = [[0, 0]]
anchored = true
unicode = true
utf8 = true
match-kind = "all"
search-kind = "overlapping"
# Same as above, except with UTF-8 mode disabled. It almost doesn't change the
# result, except for the fact that since this is an anchored search and we
# always find all matches, the test harness will keep reporting matches until
# none are found. Because it's anchored, matches will be reported so long as
# they are directly adjacent. Since with UTF-8 mode the next anchored search
# after the match at [0, 0] fails, iteration stops (and doesn't find the last
# match at [4, 4]).
[[test]]
name = "empty-utf8no-anchored"
regex = ''
haystack = '𝛃'
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
anchored = true
unicode = true
utf8 = false
# Tests the overlapping case of the above.
#
# Note that overlapping anchored searches are a little weird, and it's not
# totally clear what their semantics ought to be. For now, we just test the
# current behavior of our test shim that implements overlapping search. (This
# is one of the reasons why we don't really expose regex-level overlapping
# searches.)
[[test]]
name = "empty-utf8no-anchored-overlapping"
regex = ''
haystack = '𝛃'
matches = [[0, 0]]
anchored = true
unicode = true
utf8 = false
match-kind = "all"
search-kind = "overlapping"
# In this test, we anchor the search, but also set bounds. The bounds start the
# search in the middle of a codepoint, so there should never be a match.
[[test]]
name = "empty-utf8yes-anchored-bounds"
regex = ''
haystack = '𝛃'
matches = []
bounds = [1, 3]
anchored = true
unicode = true
utf8 = true
# Tests the overlapping case of the above.
[[test]]
name = "empty-utf8yes-anchored-bounds-overlapping"
regex = ''
haystack = '𝛃'
matches = []
bounds = [1, 3]
anchored = true
unicode = true
utf8 = true
match-kind = "all"
search-kind = "overlapping"
# Same as above, except with UTF-8 mode disabled. Without UTF-8 mode enabled,
# matching within a codepoint is allowed. And remember, as in the anchored test
# above with UTF-8 mode disabled, iteration will report all adjacent matches.
# The matches at [0, 0] and [4, 4] are not included because of the bounds of
# the search.
[[test]]
name = "empty-utf8no-anchored-bounds"
regex = ''
haystack = '𝛃'
bounds = [1, 3]
matches = [[1, 1], [2, 2], [3, 3]]
anchored = true
unicode = true
utf8 = false
# Tests the overlapping case of the above.
#
# Note that overlapping anchored searches are a little weird, and it's not
# totally clear what their semantics ought to be. For now, we just test the
# current behavior of our test shim that implements overlapping search. (This
# is one of the reasons why we don't really expose regex-level overlapping
# searches.)
[[test]]
name = "empty-utf8no-anchored-bounds-overlapping"
regex = ''
haystack = '𝛃'
bounds = [1, 3]
matches = [[1, 1]]
anchored = true
unicode = true
utf8 = false
match-kind = "all"
search-kind = "overlapping"
# This tests that we find the match at the end of the string when the bounds
# exclude the first match.
[[test]]
name = "empty-utf8yes-startbound"
regex = ''
haystack = '𝛃'
bounds = [1, 4]
matches = [[4, 4]]
unicode = true
utf8 = true
# Tests the overlapping case of the above.
[[test]]
name = "empty-utf8yes-startbound-overlapping"
regex = ''
haystack = '𝛃'
bounds = [1, 4]
matches = [[4, 4]]
unicode = true
utf8 = true
match-kind = "all"
search-kind = "overlapping"
# Same as above, except since UTF-8 mode is disabled, we also find the matches
# inbetween that split the codepoint.
[[test]]
name = "empty-utf8no-startbound"
regex = ''
haystack = '𝛃'
bounds = [1, 4]
matches = [[1, 1], [2, 2], [3, 3], [4, 4]]
unicode = true
utf8 = false
# Tests the overlapping case of the above.
[[test]]
name = "empty-utf8no-startbound-overlapping"
regex = ''
haystack = '𝛃'
bounds = [1, 4]
matches = [[1, 1], [2, 2], [3, 3], [4, 4]]
unicode = true
utf8 = false
match-kind = "all"
search-kind = "overlapping"
# This tests that we don't find any matches in an anchored search, even when
# the bounds include a match (at the end).
[[test]]
name = "empty-utf8yes-anchored-startbound"
regex = ''
haystack = '𝛃'
bounds = [1, 4]
matches = []
anchored = true
unicode = true
utf8 = true
# Tests the overlapping case of the above.
[[test]]
name = "empty-utf8yes-anchored-startbound-overlapping"
regex = ''
haystack = '𝛃'
bounds = [1, 4]
matches = []
anchored = true
unicode = true
utf8 = true
match-kind = "all"
search-kind = "overlapping"
# Same as above, except since UTF-8 mode is disabled, we also find the matches
# inbetween that split the codepoint. Even though this is an anchored search,
# since the matches are adjacent, we find all of them.
[[test]]
name = "empty-utf8no-anchored-startbound"
regex = ''
haystack = '𝛃'
bounds = [1, 4]
matches = [[1, 1], [2, 2], [3, 3], [4, 4]]
anchored = true
unicode = true
utf8 = false
# Tests the overlapping case of the above.
#
# Note that overlapping anchored searches are a little weird, and it's not
# totally clear what their semantics ought to be. For now, we just test the
# current behavior of our test shim that implements overlapping search. (This
# is one of the reasons why we don't really expose regex-level overlapping
# searches.)
[[test]]
name = "empty-utf8no-anchored-startbound-overlapping"
regex = ''
haystack = '𝛃'
bounds = [1, 4]
matches = [[1, 1]]
anchored = true
unicode = true
utf8 = false
match-kind = "all"
search-kind = "overlapping"
# This tests that we find the match at the end of the haystack in UTF-8 mode
# when our bounds only include the empty string at the end of the haystack.
[[test]]
name = "empty-utf8yes-anchored-endbound"
regex = ''
haystack = '𝛃'
bounds = [4, 4]
matches = [[4, 4]]
anchored = true
unicode = true
utf8 = true
# Tests the overlapping case of the above.
[[test]]
name = "empty-utf8yes-anchored-endbound-overlapping"
regex = ''
haystack = '𝛃'
bounds = [4, 4]
matches = [[4, 4]]
anchored = true
unicode = true
utf8 = true
match-kind = "all"
search-kind = "overlapping"
# Same as above, but with UTF-8 mode disabled. Results remain the same since
# the only possible match does not split a codepoint.
[[test]]
name = "empty-utf8no-anchored-endbound"
regex = ''
haystack = '𝛃'
bounds = [4, 4]
matches = [[4, 4]]
anchored = true
unicode = true
utf8 = false
# Tests the overlapping case of the above.
[[test]]
name = "empty-utf8no-anchored-endbound-overlapping"
regex = ''
haystack = '𝛃'
bounds = [4, 4]
matches = [[4, 4]]
anchored = true
unicode = true
utf8 = false
match-kind = "all"
search-kind = "overlapping"

View File

@@ -0,0 +1,687 @@
# These tests are for the "special" word boundary assertions. That is,
# \b{start}, \b{end}, \b{start-half}, \b{end-half}. These are specialty
# assertions for more niche use cases, but hitting those cases without these
# assertions is difficult. For example, \b{start-half} and \b{end-half} are
# used to implement the -w/--word-regexp flag in a grep program.
# Tests for (?-u:\b{start})
[[test]]
name = "word-start-ascii-010"
regex = '\b{start}'
haystack = "a"
matches = [[0, 0]]
unicode = false
[[test]]
name = "word-start-ascii-020"
regex = '\b{start}'
haystack = "a "
matches = [[0, 0]]
unicode = false
[[test]]
name = "word-start-ascii-030"
regex = '\b{start}'
haystack = " a "
matches = [[1, 1]]
unicode = false
[[test]]
name = "word-start-ascii-040"
regex = '\b{start}'
haystack = ""
matches = []
unicode = false
[[test]]
name = "word-start-ascii-050"
regex = '\b{start}'
haystack = "ab"
matches = [[0, 0]]
unicode = false
[[test]]
name = "word-start-ascii-060"
regex = '\b{start}'
haystack = "𝛃"
matches = []
unicode = false
[[test]]
name = "word-start-ascii-060-bounds"
regex = '\b{start}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = false
[[test]]
name = "word-start-ascii-070"
regex = '\b{start}'
haystack = " 𝛃 "
matches = []
unicode = false
[[test]]
name = "word-start-ascii-080"
regex = '\b{start}'
haystack = "𝛃𐆀"
matches = []
unicode = false
[[test]]
name = "word-start-ascii-090"
regex = '\b{start}'
haystack = "𝛃b"
matches = [[4, 4]]
unicode = false
[[test]]
name = "word-start-ascii-110"
regex = '\b{start}'
haystack = "b𝛃"
matches = [[0, 0]]
unicode = false
# Tests for (?-u:\b{end})
[[test]]
name = "word-end-ascii-010"
regex = '\b{end}'
haystack = "a"
matches = [[1, 1]]
unicode = false
[[test]]
name = "word-end-ascii-020"
regex = '\b{end}'
haystack = "a "
matches = [[1, 1]]
unicode = false
[[test]]
name = "word-end-ascii-030"
regex = '\b{end}'
haystack = " a "
matches = [[2, 2]]
unicode = false
[[test]]
name = "word-end-ascii-040"
regex = '\b{end}'
haystack = ""
matches = []
unicode = false
[[test]]
name = "word-end-ascii-050"
regex = '\b{end}'
haystack = "ab"
matches = [[2, 2]]
unicode = false
[[test]]
name = "word-end-ascii-060"
regex = '\b{end}'
haystack = "𝛃"
matches = []
unicode = false
[[test]]
name = "word-end-ascii-060-bounds"
regex = '\b{end}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = false
[[test]]
name = "word-end-ascii-070"
regex = '\b{end}'
haystack = " 𝛃 "
matches = []
unicode = false
[[test]]
name = "word-end-ascii-080"
regex = '\b{end}'
haystack = "𝛃𐆀"
matches = []
unicode = false
[[test]]
name = "word-end-ascii-090"
regex = '\b{end}'
haystack = "𝛃b"
matches = [[5, 5]]
unicode = false
[[test]]
name = "word-end-ascii-110"
regex = '\b{end}'
haystack = "b𝛃"
matches = [[1, 1]]
unicode = false
# Tests for \b{start}
[[test]]
name = "word-start-unicode-010"
regex = '\b{start}'
haystack = "a"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-unicode-020"
regex = '\b{start}'
haystack = "a "
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-unicode-030"
regex = '\b{start}'
haystack = " a "
matches = [[1, 1]]
unicode = true
[[test]]
name = "word-start-unicode-040"
regex = '\b{start}'
haystack = ""
matches = []
unicode = true
[[test]]
name = "word-start-unicode-050"
regex = '\b{start}'
haystack = "ab"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-unicode-060"
regex = '\b{start}'
haystack = "𝛃"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-unicode-060-bounds"
regex = '\b{start}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = true
[[test]]
name = "word-start-unicode-070"
regex = '\b{start}'
haystack = " 𝛃 "
matches = [[1, 1]]
unicode = true
[[test]]
name = "word-start-unicode-080"
regex = '\b{start}'
haystack = "𝛃𐆀"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-unicode-090"
regex = '\b{start}'
haystack = "𝛃b"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-unicode-110"
regex = '\b{start}'
haystack = "b𝛃"
matches = [[0, 0]]
unicode = true
# Tests for \b{end}
[[test]]
name = "word-end-unicode-010"
regex = '\b{end}'
haystack = "a"
matches = [[1, 1]]
unicode = true
[[test]]
name = "word-end-unicode-020"
regex = '\b{end}'
haystack = "a "
matches = [[1, 1]]
unicode = true
[[test]]
name = "word-end-unicode-030"
regex = '\b{end}'
haystack = " a "
matches = [[2, 2]]
unicode = true
[[test]]
name = "word-end-unicode-040"
regex = '\b{end}'
haystack = ""
matches = []
unicode = true
[[test]]
name = "word-end-unicode-050"
regex = '\b{end}'
haystack = "ab"
matches = [[2, 2]]
unicode = true
[[test]]
name = "word-end-unicode-060"
regex = '\b{end}'
haystack = "𝛃"
matches = [[4, 4]]
unicode = true
[[test]]
name = "word-end-unicode-060-bounds"
regex = '\b{end}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = true
[[test]]
name = "word-end-unicode-070"
regex = '\b{end}'
haystack = " 𝛃 "
matches = [[5, 5]]
unicode = true
[[test]]
name = "word-end-unicode-080"
regex = '\b{end}'
haystack = "𝛃𐆀"
matches = [[4, 4]]
unicode = true
[[test]]
name = "word-end-unicode-090"
regex = '\b{end}'
haystack = "𝛃b"
matches = [[5, 5]]
unicode = true
[[test]]
name = "word-end-unicode-110"
regex = '\b{end}'
haystack = "b𝛃"
matches = [[5, 5]]
unicode = true
# Tests for (?-u:\b{start-half})
[[test]]
name = "word-start-half-ascii-010"
regex = '\b{start-half}'
haystack = "a"
matches = [[0, 0]]
unicode = false
[[test]]
name = "word-start-half-ascii-020"
regex = '\b{start-half}'
haystack = "a "
matches = [[0, 0], [2, 2]]
unicode = false
[[test]]
name = "word-start-half-ascii-030"
regex = '\b{start-half}'
haystack = " a "
matches = [[0, 0], [1, 1], [3, 3]]
unicode = false
[[test]]
name = "word-start-half-ascii-040"
regex = '\b{start-half}'
haystack = ""
matches = [[0, 0]]
unicode = false
[[test]]
name = "word-start-half-ascii-050"
regex = '\b{start-half}'
haystack = "ab"
matches = [[0, 0]]
unicode = false
[[test]]
name = "word-start-half-ascii-060"
regex = '\b{start-half}'
haystack = "𝛃"
matches = [[0, 0], [4, 4]]
unicode = false
[[test]]
name = "word-start-half-ascii-060-noutf8"
regex = '\b{start-half}'
haystack = "𝛃"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4]]
unicode = false
utf8 = false
[[test]]
name = "word-start-half-ascii-060-bounds"
regex = '\b{start-half}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = false
[[test]]
name = "word-start-half-ascii-070"
regex = '\b{start-half}'
haystack = " 𝛃 "
matches = [[0, 0], [1, 1], [5, 5], [6, 6]]
unicode = false
[[test]]
name = "word-start-half-ascii-080"
regex = '\b{start-half}'
haystack = "𝛃𐆀"
matches = [[0, 0], [4, 4], [8, 8]]
unicode = false
[[test]]
name = "word-start-half-ascii-090"
regex = '\b{start-half}'
haystack = "𝛃b"
matches = [[0, 0], [4, 4]]
unicode = false
[[test]]
name = "word-start-half-ascii-110"
regex = '\b{start-half}'
haystack = "b𝛃"
matches = [[0, 0], [5, 5]]
unicode = false
# Tests for (?-u:\b{end-half})
[[test]]
name = "word-end-half-ascii-010"
regex = '\b{end-half}'
haystack = "a"
matches = [[1, 1]]
unicode = false
[[test]]
name = "word-end-half-ascii-020"
regex = '\b{end-half}'
haystack = "a "
matches = [[1, 1], [2, 2]]
unicode = false
[[test]]
name = "word-end-half-ascii-030"
regex = '\b{end-half}'
haystack = " a "
matches = [[0, 0], [2, 2], [3, 3]]
unicode = false
[[test]]
name = "word-end-half-ascii-040"
regex = '\b{end-half}'
haystack = ""
matches = [[0, 0]]
unicode = false
[[test]]
name = "word-end-half-ascii-050"
regex = '\b{end-half}'
haystack = "ab"
matches = [[2, 2]]
unicode = false
[[test]]
name = "word-end-half-ascii-060"
regex = '\b{end-half}'
haystack = "𝛃"
matches = [[0, 0], [4, 4]]
unicode = false
[[test]]
name = "word-end-half-ascii-060-bounds"
regex = '\b{end-half}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = false
[[test]]
name = "word-end-half-ascii-070"
regex = '\b{end-half}'
haystack = " 𝛃 "
matches = [[0, 0], [1, 1], [5, 5], [6, 6]]
unicode = false
[[test]]
name = "word-end-half-ascii-080"
regex = '\b{end-half}'
haystack = "𝛃𐆀"
matches = [[0, 0], [4, 4], [8, 8]]
unicode = false
[[test]]
name = "word-end-half-ascii-090"
regex = '\b{end-half}'
haystack = "𝛃b"
matches = [[0, 0], [5, 5]]
unicode = false
[[test]]
name = "word-end-half-ascii-110"
regex = '\b{end-half}'
haystack = "b𝛃"
matches = [[1, 1], [5, 5]]
unicode = false
# Tests for \b{start-half}
[[test]]
name = "word-start-half-unicode-010"
regex = '\b{start-half}'
haystack = "a"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-half-unicode-020"
regex = '\b{start-half}'
haystack = "a "
matches = [[0, 0], [2, 2]]
unicode = true
[[test]]
name = "word-start-half-unicode-030"
regex = '\b{start-half}'
haystack = " a "
matches = [[0, 0], [1, 1], [3, 3]]
unicode = true
[[test]]
name = "word-start-half-unicode-040"
regex = '\b{start-half}'
haystack = ""
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-half-unicode-050"
regex = '\b{start-half}'
haystack = "ab"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-half-unicode-060"
regex = '\b{start-half}'
haystack = "𝛃"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-half-unicode-060-bounds"
regex = '\b{start-half}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = true
[[test]]
name = "word-start-half-unicode-070"
regex = '\b{start-half}'
haystack = " 𝛃 "
matches = [[0, 0], [1, 1], [6, 6]]
unicode = true
[[test]]
name = "word-start-half-unicode-080"
regex = '\b{start-half}'
haystack = "𝛃𐆀"
matches = [[0, 0], [8, 8]]
unicode = true
[[test]]
name = "word-start-half-unicode-090"
regex = '\b{start-half}'
haystack = "𝛃b"
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-start-half-unicode-110"
regex = '\b{start-half}'
haystack = "b𝛃"
matches = [[0, 0]]
unicode = true
# Tests for \b{end-half}
[[test]]
name = "word-end-half-unicode-010"
regex = '\b{end-half}'
haystack = "a"
matches = [[1, 1]]
unicode = true
[[test]]
name = "word-end-half-unicode-020"
regex = '\b{end-half}'
haystack = "a "
matches = [[1, 1], [2, 2]]
unicode = true
[[test]]
name = "word-end-half-unicode-030"
regex = '\b{end-half}'
haystack = " a "
matches = [[0, 0], [2, 2], [3, 3]]
unicode = true
[[test]]
name = "word-end-half-unicode-040"
regex = '\b{end-half}'
haystack = ""
matches = [[0, 0]]
unicode = true
[[test]]
name = "word-end-half-unicode-050"
regex = '\b{end-half}'
haystack = "ab"
matches = [[2, 2]]
unicode = true
[[test]]
name = "word-end-half-unicode-060"
regex = '\b{end-half}'
haystack = "𝛃"
matches = [[4, 4]]
unicode = true
[[test]]
name = "word-end-half-unicode-060-bounds"
regex = '\b{end-half}'
haystack = "𝛃"
bounds = [2, 3]
matches = []
unicode = true
[[test]]
name = "word-end-half-unicode-070"
regex = '\b{end-half}'
haystack = " 𝛃 "
matches = [[0, 0], [5, 5], [6, 6]]
unicode = true
[[test]]
name = "word-end-half-unicode-080"
regex = '\b{end-half}'
haystack = "𝛃𐆀"
matches = [[4, 4], [8, 8]]
unicode = true
[[test]]
name = "word-end-half-unicode-090"
regex = '\b{end-half}'
haystack = "𝛃b"
matches = [[5, 5]]
unicode = true
[[test]]
name = "word-end-half-unicode-110"
regex = '\b{end-half}'
haystack = "b𝛃"
matches = [[5, 5]]
unicode = true
# Specialty tests.
# Since \r is special cased in the start state computation (to deal with CRLF
# mode), this test ensures that the correct start state is computed when the
# pattern starts with a half word boundary assertion.
[[test]]
name = "word-start-half-ascii-carriage"
regex = '\b{start-half}[a-z]+'
haystack = 'ABC\rabc'
matches = [[4, 7]]
bounds = [4, 7]
unescape = true
# Since \n is also special cased in the start state computation, this test
# ensures that the correct start state is computed when the pattern starts with
# a half word boundary assertion.
[[test]]
name = "word-start-half-ascii-linefeed"
regex = '\b{start-half}[a-z]+'
haystack = 'ABC\nabc'
matches = [[4, 7]]
bounds = [4, 7]
unescape = true
# Like the carriage return test above, but with a custom line terminator.
[[test]]
name = "word-start-half-ascii-customlineterm"
regex = '\b{start-half}[a-z]+'
haystack = 'ABC!abc'
matches = [[4, 7]]
bounds = [4, 7]
unescape = true
line-terminator = '!'

781
vendor/regex/testdata/word-boundary.toml vendored Normal file
View File

@@ -0,0 +1,781 @@
# Some of these are cribbed from RE2's test suite.
# These test \b. Below are tests for \B.
[[test]]
name = "wb1"
regex = '\b'
haystack = ""
matches = []
unicode = false
[[test]]
name = "wb2"
regex = '\b'
haystack = "a"
matches = [[0, 0], [1, 1]]
unicode = false
[[test]]
name = "wb3"
regex = '\b'
haystack = "ab"
matches = [[0, 0], [2, 2]]
unicode = false
[[test]]
name = "wb4"
regex = '^\b'
haystack = "ab"
matches = [[0, 0]]
unicode = false
[[test]]
name = "wb5"
regex = '\b$'
haystack = "ab"
matches = [[2, 2]]
unicode = false
[[test]]
name = "wb6"
regex = '^\b$'
haystack = "ab"
matches = []
unicode = false
[[test]]
name = "wb7"
regex = '\bbar\b'
haystack = "nobar bar foo bar"
matches = [[6, 9], [14, 17]]
unicode = false
[[test]]
name = "wb8"
regex = 'a\b'
haystack = "faoa x"
matches = [[3, 4]]
unicode = false
[[test]]
name = "wb9"
regex = '\bbar'
haystack = "bar x"
matches = [[0, 3]]
unicode = false
[[test]]
name = "wb10"
regex = '\bbar'
haystack = "foo\nbar x"
matches = [[4, 7]]
unicode = false
[[test]]
name = "wb11"
regex = 'bar\b'
haystack = "foobar"
matches = [[3, 6]]
unicode = false
[[test]]
name = "wb12"
regex = 'bar\b'
haystack = "foobar\nxxx"
matches = [[3, 6]]
unicode = false
[[test]]
name = "wb13"
regex = '(?:foo|bar|[A-Z])\b'
haystack = "foo"
matches = [[0, 3]]
unicode = false
[[test]]
name = "wb14"
regex = '(?:foo|bar|[A-Z])\b'
haystack = "foo\n"
matches = [[0, 3]]
unicode = false
[[test]]
name = "wb15"
regex = '\b(?:foo|bar|[A-Z])'
haystack = "foo"
matches = [[0, 3]]
unicode = false
[[test]]
name = "wb16"
regex = '\b(?:foo|bar|[A-Z])\b'
haystack = "X"
matches = [[0, 1]]
unicode = false
[[test]]
name = "wb17"
regex = '\b(?:foo|bar|[A-Z])\b'
haystack = "XY"
matches = []
unicode = false
[[test]]
name = "wb18"
regex = '\b(?:foo|bar|[A-Z])\b'
haystack = "bar"
matches = [[0, 3]]
unicode = false
[[test]]
name = "wb19"
regex = '\b(?:foo|bar|[A-Z])\b'
haystack = "foo"
matches = [[0, 3]]
unicode = false
[[test]]
name = "wb20"
regex = '\b(?:foo|bar|[A-Z])\b'
haystack = "foo\n"
matches = [[0, 3]]
unicode = false
[[test]]
name = "wb21"
regex = '\b(?:foo|bar|[A-Z])\b'
haystack = "ffoo bbar N x"
matches = [[10, 11]]
unicode = false
[[test]]
name = "wb22"
regex = '\b(?:fo|foo)\b'
haystack = "fo"
matches = [[0, 2]]
unicode = false
[[test]]
name = "wb23"
regex = '\b(?:fo|foo)\b'
haystack = "foo"
matches = [[0, 3]]
unicode = false
[[test]]
name = "wb24"
regex = '\b\b'
haystack = ""
matches = []
unicode = false
[[test]]
name = "wb25"
regex = '\b\b'
haystack = "a"
matches = [[0, 0], [1, 1]]
unicode = false
[[test]]
name = "wb26"
regex = '\b$'
haystack = ""
matches = []
unicode = false
[[test]]
name = "wb27"
regex = '\b$'
haystack = "x"
matches = [[1, 1]]
unicode = false
[[test]]
name = "wb28"
regex = '\b$'
haystack = "y x"
matches = [[3, 3]]
unicode = false
[[test]]
name = "wb29"
regex = '(?-u:\b).$'
haystack = "x"
matches = [[0, 1]]
[[test]]
name = "wb30"
regex = '^\b(?:fo|foo)\b'
haystack = "fo"
matches = [[0, 2]]
unicode = false
[[test]]
name = "wb31"
regex = '^\b(?:fo|foo)\b'
haystack = "foo"
matches = [[0, 3]]
unicode = false
[[test]]
name = "wb32"
regex = '^\b$'
haystack = ""
matches = []
unicode = false
[[test]]
name = "wb33"
regex = '^\b$'
haystack = "x"
matches = []
unicode = false
[[test]]
name = "wb34"
regex = '^(?-u:\b).$'
haystack = "x"
matches = [[0, 1]]
[[test]]
name = "wb35"
regex = '^(?-u:\b).(?-u:\b)$'
haystack = "x"
matches = [[0, 1]]
[[test]]
name = "wb36"
regex = '^^^^^\b$$$$$'
haystack = ""
matches = []
unicode = false
[[test]]
name = "wb37"
regex = '^^^^^(?-u:\b).$$$$$'
haystack = "x"
matches = [[0, 1]]
[[test]]
name = "wb38"
regex = '^^^^^\b$$$$$'
haystack = "x"
matches = []
unicode = false
[[test]]
name = "wb39"
regex = '^^^^^(?-u:\b\b\b).(?-u:\b\b\b)$$$$$'
haystack = "x"
matches = [[0, 1]]
[[test]]
name = "wb40"
regex = '(?-u:\b).+(?-u:\b)'
haystack = "$$abc$$"
matches = [[2, 5]]
[[test]]
name = "wb41"
regex = '\b'
haystack = "a b c"
matches = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]
unicode = false
[[test]]
name = "wb42"
regex = '\bfoo\b'
haystack = "zzz foo zzz"
matches = [[4, 7]]
unicode = false
[[test]]
name = "wb43"
regex = '\b^'
haystack = "ab"
matches = [[0, 0]]
unicode = false
[[test]]
name = "wb44"
regex = '$\b'
haystack = "ab"
matches = [[2, 2]]
unicode = false
# Tests for \B. Note that \B is not allowed if UTF-8 mode is enabled, so we
# have to disable it for most of these tests. This is because \B can match at
# non-UTF-8 boundaries.
[[test]]
name = "nb1"
regex = '\Bfoo\B'
haystack = "n foo xfoox that"
matches = [[7, 10]]
unicode = false
utf8 = false
[[test]]
name = "nb2"
regex = 'a\B'
haystack = "faoa x"
matches = [[1, 2]]
unicode = false
utf8 = false
[[test]]
name = "nb3"
regex = '\Bbar'
haystack = "bar x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb4"
regex = '\Bbar'
haystack = "foo\nbar x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb5"
regex = 'bar\B'
haystack = "foobar"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb6"
regex = 'bar\B'
haystack = "foobar\nxxx"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb7"
regex = '(?:foo|bar|[A-Z])\B'
haystack = "foox"
matches = [[0, 3]]
unicode = false
utf8 = false
[[test]]
name = "nb8"
regex = '(?:foo|bar|[A-Z])\B'
haystack = "foo\n"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb9"
regex = '\B'
haystack = ""
matches = [[0, 0]]
unicode = false
utf8 = false
[[test]]
name = "nb10"
regex = '\B'
haystack = "x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb11"
regex = '\B(?:foo|bar|[A-Z])'
haystack = "foo"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb12"
regex = '\B(?:foo|bar|[A-Z])\B'
haystack = "xXy"
matches = [[1, 2]]
unicode = false
utf8 = false
[[test]]
name = "nb13"
regex = '\B(?:foo|bar|[A-Z])\B'
haystack = "XY"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb14"
regex = '\B(?:foo|bar|[A-Z])\B'
haystack = "XYZ"
matches = [[1, 2]]
unicode = false
utf8 = false
[[test]]
name = "nb15"
regex = '\B(?:foo|bar|[A-Z])\B'
haystack = "abara"
matches = [[1, 4]]
unicode = false
utf8 = false
[[test]]
name = "nb16"
regex = '\B(?:foo|bar|[A-Z])\B'
haystack = "xfoo_"
matches = [[1, 4]]
unicode = false
utf8 = false
[[test]]
name = "nb17"
regex = '\B(?:foo|bar|[A-Z])\B'
haystack = "xfoo\n"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb18"
regex = '\B(?:foo|bar|[A-Z])\B'
haystack = "foo bar vNX"
matches = [[9, 10]]
unicode = false
utf8 = false
[[test]]
name = "nb19"
regex = '\B(?:fo|foo)\B'
haystack = "xfoo"
matches = [[1, 3]]
unicode = false
utf8 = false
[[test]]
name = "nb20"
regex = '\B(?:foo|fo)\B'
haystack = "xfooo"
matches = [[1, 4]]
unicode = false
utf8 = false
[[test]]
name = "nb21"
regex = '\B\B'
haystack = ""
matches = [[0, 0]]
unicode = false
utf8 = false
[[test]]
name = "nb22"
regex = '\B\B'
haystack = "x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb23"
regex = '\B$'
haystack = ""
matches = [[0, 0]]
unicode = false
utf8 = false
[[test]]
name = "nb24"
regex = '\B$'
haystack = "x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb25"
regex = '\B$'
haystack = "y x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb26"
regex = '\B.$'
haystack = "x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb27"
regex = '^\B(?:fo|foo)\B'
haystack = "fo"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb28"
regex = '^\B(?:fo|foo)\B'
haystack = "fo"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb29"
regex = '^\B'
haystack = ""
matches = [[0, 0]]
unicode = false
utf8 = false
[[test]]
name = "nb30"
regex = '^\B'
haystack = "x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb31"
regex = '^\B\B'
haystack = ""
matches = [[0, 0]]
unicode = false
utf8 = false
[[test]]
name = "nb32"
regex = '^\B\B'
haystack = "x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb33"
regex = '^\B$'
haystack = ""
matches = [[0, 0]]
unicode = false
utf8 = false
[[test]]
name = "nb34"
regex = '^\B$'
haystack = "x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb35"
regex = '^\B.$'
haystack = "x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb36"
regex = '^\B.\B$'
haystack = "x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb37"
regex = '^^^^^\B$$$$$'
haystack = ""
matches = [[0, 0]]
unicode = false
utf8 = false
[[test]]
name = "nb38"
regex = '^^^^^\B.$$$$$'
haystack = "x"
matches = []
unicode = false
utf8 = false
[[test]]
name = "nb39"
regex = '^^^^^\B$$$$$'
haystack = "x"
matches = []
unicode = false
utf8 = false
# unicode1* and unicode2* work for both Unicode and ASCII because all matches
# are reported as byte offsets, and « and » do not correspond to word
# boundaries at either the character or byte level.
[[test]]
name = "unicode1"
regex = '\bx\b'
haystack = "«x"
matches = [[2, 3]]
[[test]]
name = "unicode1-only-ascii"
regex = '\bx\b'
haystack = "«x"
matches = [[2, 3]]
unicode = false
[[test]]
name = "unicode2"
regex = '\bx\b'
haystack = "x»"
matches = [[0, 1]]
[[test]]
name = "unicode2-only-ascii"
regex = '\bx\b'
haystack = "x»"
matches = [[0, 1]]
unicode = false
# ASCII word boundaries are completely oblivious to Unicode characters, so
# even though β is a character, an ASCII \b treats it as a word boundary
# when it is adjacent to another ASCII character. (The ASCII \b only looks
# at the leading byte of β.) For Unicode \b, the tests are precisely inverted.
[[test]]
name = "unicode3"
regex = '\bx\b'
haystack = 'áxβ'
matches = []
[[test]]
name = "unicode3-only-ascii"
regex = '\bx\b'
haystack = 'áxβ'
matches = [[2, 3]]
unicode = false
[[test]]
name = "unicode4"
regex = '\Bx\B'
haystack = 'áxβ'
matches = [[2, 3]]
[[test]]
name = "unicode4-only-ascii"
regex = '\Bx\B'
haystack = 'áxβ'
matches = []
unicode = false
utf8 = false
# The same as above, but with \b instead of \B as a sanity check.
[[test]]
name = "unicode5"
regex = '\b'
haystack = "0\U0007EF5E"
matches = [[0, 0], [1, 1]]
[[test]]
name = "unicode5-only-ascii"
regex = '\b'
haystack = "0\U0007EF5E"
matches = [[0, 0], [1, 1]]
unicode = false
utf8 = false
[[test]]
name = "unicode5-noutf8"
regex = '\b'
haystack = '0\xFF\xFF\xFF\xFF'
matches = [[0, 0], [1, 1]]
unescape = true
utf8 = false
[[test]]
name = "unicode5-noutf8-only-ascii"
regex = '\b'
haystack = '0\xFF\xFF\xFF\xFF'
matches = [[0, 0], [1, 1]]
unescape = true
unicode = false
utf8 = false
# Weird special case to ensure that ASCII \B treats each individual code unit
# as a non-word byte. (The specific codepoint is irrelevant. It's an arbitrary
# codepoint that uses 4 bytes in its UTF-8 encoding and is not a member of the
# \w character class.)
[[test]]
name = "unicode5-not"
regex = '\B'
haystack = "0\U0007EF5E"
matches = [[5, 5]]
[[test]]
name = "unicode5-not-only-ascii"
regex = '\B'
haystack = "0\U0007EF5E"
matches = [[2, 2], [3, 3], [4, 4], [5, 5]]
unicode = false
utf8 = false
# This gets no matches since \B only matches in the presence of valid UTF-8
# when Unicode is enabled, even when UTF-8 mode is disabled.
[[test]]
name = "unicode5-not-noutf8"
regex = '\B'
haystack = '0\xFF\xFF\xFF\xFF'
matches = []
unescape = true
utf8 = false
# But this DOES get matches since \B in ASCII mode only looks at individual
# bytes.
[[test]]
name = "unicode5-not-noutf8-only-ascii"
regex = '\B'
haystack = '0\xFF\xFF\xFF\xFF'
matches = [[2, 2], [3, 3], [4, 4], [5, 5]]
unescape = true
unicode = false
utf8 = false
# Some tests of no particular significance.
[[test]]
name = "unicode6"
regex = '\b[0-9]+\b'
haystack = "foo 123 bar 456 quux 789"
matches = [[4, 7], [12, 15], [21, 24]]
[[test]]
name = "unicode7"
regex = '\b[0-9]+\b'
haystack = "foo 123 bar a456 quux 789"
matches = [[4, 7], [22, 25]]
[[test]]
name = "unicode8"
regex = '\b[0-9]+\b'
haystack = "foo 123 bar 456a quux 789"
matches = [[4, 7], [22, 25]]
# A variant of the problem described here:
# https://github.com/google/re2/blob/89567f5de5b23bb5ad0c26cbafc10bdc7389d1fa/re2/dfa.cc#L658-L667
[[test]]
name = "alt-with-assertion-repetition"
regex = '(?:\b|%)+'
haystack = "z%"
bounds = [1, 2]
anchored = true
matches = [[1, 1]]