From cccf7bda651e6b9234e346adbd28c0f2e2330fa6 Mon Sep 17 00:00:00 2001 From: Nicholas Wilson Date: Thu, 15 May 2025 10:00:17 +0100 Subject: [PATCH] Changes to error offsets, fixing inconsistencies --- src/pcre2.h.generic | 1 + src/pcre2.h.in | 1 + src/pcre2_compile.c | 205 ++++++++----- src/pcre2_compile.h | 2 +- src/pcre2_error.c | 3 +- src/pcre2test.c | 69 ++--- testdata/grepoutput | 6 +- testdata/testinput10 | 4 + testdata/testinput12 | 4 + testdata/testinput2 | 2 + testdata/testinput5 | 14 +- testdata/testoutput10 | 8 + testdata/testoutput12-16 | 8 + testdata/testoutput12-32 | 8 + testdata/testoutput18 | 4 +- testdata/testoutput2 | 632 ++++++++++++++++++++------------------- testdata/testoutput21 | 4 +- testdata/testoutput23 | 4 +- testdata/testoutput28 | 2 +- testdata/testoutput5 | 64 ++-- 20 files changed, 564 insertions(+), 481 deletions(-) diff --git a/src/pcre2.h.generic b/src/pcre2.h.generic index 1a2f4a730..761f72175 100644 --- a/src/pcre2.h.generic +++ b/src/pcre2.h.generic @@ -345,6 +345,7 @@ pcre2_pattern_convert(). */ #define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_CHAR 216 #define PCRE2_ERROR_EXPECTED_CAPTURE_GROUP 217 #define PCRE2_ERROR_MISSING_OPENING_PARENTHESIS 218 +#define PCRE2_ERROR_MISSING_NUMBER_TERMINATOR 219 /* "Expected" matching error codes: no match and partial match. */ diff --git a/src/pcre2.h.in b/src/pcre2.h.in index 3eec16d26..93d44a636 100644 --- a/src/pcre2.h.in +++ b/src/pcre2.h.in @@ -345,6 +345,7 @@ pcre2_pattern_convert(). */ #define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_CHAR 216 #define PCRE2_ERROR_EXPECTED_CAPTURE_GROUP 217 #define PCRE2_ERROR_MISSING_OPENING_PARENTHESIS 218 +#define PCRE2_ERROR_MISSING_NUMBER_TERMINATOR 219 /* "Expected" matching error codes: no match and partial match. */ diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 5010fa64f..64ceb8048 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -1566,6 +1566,13 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0) goto COME_FROM_NU; } #endif + + /* Improve error offset. */ + ptr = p + 2; + while (ptr < ptrend && XDIGIT(*ptr) != 0xff) ptr++; + while (ptr < ptrend && (*ptr == CHAR_SPACE || *ptr == CHAR_HT)) ptr++; + if (ptr < ptrend && *ptr == CHAR_RIGHT_CURLY_BRACKET) ptr++; + *errorcodeptr = ERR93; } @@ -1574,6 +1581,7 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0) else if (isclass || cb == NULL) { + ptr++; /* Skip over the opening brace */ *errorcodeptr = ERR37; } @@ -1584,7 +1592,10 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0) { if (!read_repeat_counts(&p, ptrend, NULL, NULL, errorcodeptr) && *errorcodeptr == 0) + { + ptr++; /* Skip over the opening brace */ *errorcodeptr = ERR37; + } } } } @@ -1609,7 +1620,7 @@ else c != CHAR_x && c != CHAR_g) { *errorcodeptr = ERR3; - return 0; + goto EXIT; } alt_bsux = FALSE; /* Do not modify \x handling */ } @@ -1634,7 +1645,9 @@ else because otherwise \u{ 12} (for example) would be treated as u{12}. */ case CHAR_u: - if (!alt_bsux) *errorcodeptr = ERR37; else + if (!alt_bsux) + *errorcodeptr = ERR37; + else { uint32_t xc; @@ -1760,8 +1773,8 @@ else if (p >= ptrend || *p != CHAR_GREATER_THAN_SIGN) { - /* not advancing ptr; report error at the \g character */ - *errorcodeptr = ERR57; + ptr = p; + *errorcodeptr = ERR119; /* Missing terminator for number */ break; } @@ -1797,8 +1810,8 @@ else if (p >= ptrend || *p != CHAR_RIGHT_CURLY_BRACKET) { - /* not advancing ptr; report error at the \g character */ - *errorcodeptr = ERR57; + ptr = p; + *errorcodeptr = ERR119; /* Missing terminator for number */ break; } ptr = p + 1; @@ -1955,12 +1968,12 @@ else with optional spaces or tabs after { and before }. */ case CHAR_o: - if (ptr >= ptrend || *ptr++ != CHAR_LEFT_CURLY_BRACKET) + if (ptr >= ptrend || *ptr != CHAR_LEFT_CURLY_BRACKET) { - ptr--; *errorcodeptr = ERR55; break; } + ptr++; while (ptr < ptrend && (*ptr == CHAR_SPACE || *ptr == CHAR_HT)) ptr++; if (ptr >= ptrend || *ptr == CHAR_RIGHT_CURLY_BRACKET) @@ -1995,19 +2008,19 @@ else while (ptr < ptrend && *ptr >= CHAR_0 && *ptr <= CHAR_7) ptr++; *errorcodeptr = ERR34; } - else if (ptr < ptrend && *ptr++ == CHAR_RIGHT_CURLY_BRACKET) + else if (utf && c >= 0xd800 && c <= 0xdfff && + (xoptions & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0) { - if (utf && c >= 0xd800 && c <= 0xdfff && - (xoptions & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0) - { - ptr--; - *errorcodeptr = ERR73; - } + *errorcodeptr = ERR73; + } + else if (ptr < ptrend && *ptr == CHAR_RIGHT_CURLY_BRACKET) + { + ptr++; } else { - ptr--; *errorcodeptr = ERR64; + goto ESCAPE_FAILED_FORWARD; } break; @@ -2076,14 +2089,14 @@ else while (ptr < ptrend && XDIGIT(*ptr) != 0xff) ptr++; *errorcodeptr = ERR34; } - else if (ptr < ptrend && *ptr++ == CHAR_RIGHT_CURLY_BRACKET) + else if (utf && c >= 0xd800 && c <= 0xdfff && + (xoptions & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0) { - if (utf && c >= 0xd800 && c <= 0xdfff && - (xoptions & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0) - { - ptr--; - *errorcodeptr = ERR73; - } + *errorcodeptr = ERR73; + } + else if (ptr < ptrend && *ptr == CHAR_RIGHT_CURLY_BRACKET) + { + ptr++; } /* If the sequence of hex digits (followed by optional space) does not @@ -2093,8 +2106,8 @@ else else { - ptr--; *errorcodeptr = ERR67; + goto ESCAPE_FAILED_FORWARD; } } /* End of \x{} processing */ @@ -2161,7 +2174,7 @@ else if (c < 32 || c > 126) /* Excludes all non-printable ASCII */ { *errorcodeptr = ERR68; - break; + goto ESCAPE_FAILED_FORWARD; } c ^= 0x40; @@ -2179,7 +2192,13 @@ else { if (c == ebcdic_escape_c[i]) break; } - if (i < 32) c = i; else *errorcodeptr = ERR68; + if (i < 32) + c = i; + else + { + *errorcodeptr = ERR68; + goto ESCAPE_FAILED_FORWARD; + } } #endif /* EBCDIC */ @@ -2191,16 +2210,25 @@ else default: *errorcodeptr = ERR3; - *ptrptr = ptr - 1; /* Point to the character at fault */ - return 0; + break; } } /* Set the pointer to the next character before returning. */ +EXIT: *ptrptr = ptr; *chptr = c; return escape; + +/* Some errors need to indicate the next character. */ + +ESCAPE_FAILED_FORWARD: +ptr++; +#ifdef SUPPORT_UNICODE +if (utf) FORWARDCHARTEST(ptr, ptrend); +#endif +goto EXIT; } @@ -2540,7 +2568,7 @@ return -1; the name of a subpattern or a (*VERB) or an (*alpha_assertion). The initial pointer must be to the preceding character. If that character is '*' we are reading a verb or alpha assertion name. The pointer is updated to point after -the name, for a VERB or alpha assertion name, or after tha name's terminator +the name, for a VERB or alpha assertion name, or after the name's terminator for a subpattern name. Returning both the offset and the name pointer is redundant information, but some callers use one and some the other, so it is simplest just to return both. When the name is in braces, spaces and tabs are @@ -2593,12 +2621,14 @@ by Unicode properties, and underscores, but must not start with a digit. */ if (utf && is_group) { uint32_t c, type; + PCRE2_SPTR p = ptr; - GETCHAR(c, ptr); + GETCHARINC(c, p); /* Peek at next character */ type = UCD_CHARTYPE(c); if (type == ucp_Nd) { + ptr = p; *errorcodeptr = ERR44; goto FAILED; } @@ -2607,10 +2637,9 @@ if (utf && is_group) { if (type != ucp_Nd && PRIV(ucp_gentype)[type] != ucp_L && c != CHAR_UNDERSCORE) break; - ptr++; - FORWARDCHARTEST(ptr, ptrend); - if (ptr >= ptrend) break; - GETCHAR(c, ptr); + ptr = p; /* Accept character and peek again */ + if (p >= ptrend) break; + GETCHARINC(c, p); type = UCD_CHARTYPE(c); } } @@ -2626,6 +2655,7 @@ won't be recognized. */ { if (is_group && IS_DIGIT(*ptr)) { + ++ptr; *errorcodeptr = ERR44; goto FAILED; } @@ -2638,7 +2668,7 @@ won't be recognized. */ /* Check name length */ -if (ptr > *nameptr + MAX_NAME_SIZE) +if (ptr - *nameptr > MAX_NAME_SIZE) { *errorcodeptr = ERR48; goto FAILED; @@ -3264,12 +3294,6 @@ while (ptr < ptrend) } else { - if (expect_cond_assert > 0) /* A literal is not allowed if we are */ - { /* expecting a conditional assertion, */ - ptr--; /* but an empty \Q\E sequence is OK. */ - errorcode = ERR28; - goto FAILED; - } if (inverbname) { /* Don't use PARSED_LITERAL() because it */ #if PCRE2_CODE_UNIT_WIDTH == 32 /* sets okquantifier. */ @@ -3398,6 +3422,15 @@ while (ptr < ptrend) { if (*ptr == CHAR_Q || *ptr == CHAR_E) { + /* A literal inside a \Q...\E is not allowed if we are expecting a + conditional assertion, but an empty \Q\E sequence is OK. */ + if (expect_cond_assert > 0 && *ptr == CHAR_Q && + !(ptrend - ptr >= 3 && ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)) + { + ptr--; + errorcode = ERR28; + goto FAILED; + } inescq = *ptr == CHAR_Q; ptr++; continue; @@ -3507,9 +3540,9 @@ while (ptr < ptrend) if (!ok) { - ptr--; /* Adjust error offset */ errorcode = ERR28; - goto FAILED; + if (expect_cond_assert == 2) goto FAILED; + goto FAILED_BACK; } } @@ -3585,7 +3618,7 @@ while (ptr < ptrend) else if (escape < 0) { - offset = (PCRE2_SIZE)(ptr - cb->start_pattern - 1); + offset = (PCRE2_SIZE)(ptr - cb->start_pattern); escape = -escape - 1; *parsed_pattern++ = META_BACKREF | (uint32_t)escape; if (escape < 10) @@ -3725,7 +3758,8 @@ while (ptr < ptrend) { if (p >= ptrend || *p != terminator) { - errorcode = ERR57; + ptr = p; + errorcode = ERR119; /* Missing terminator for number */ goto ESCAPE_FAILED; } ptr = p + 1; @@ -3809,7 +3843,7 @@ while (ptr < ptrend) if (!prev_okquantifier) { errorcode = ERR9; - goto FAILED_BACK; // TODO https://github.com/PCRE2Project/pcre2/issues/549 + goto FAILED; } /* Most (*VERB)s are not allowed to be quantified, but an ungreedy @@ -3898,6 +3932,7 @@ while (ptr < ptrend) check_posix_syntax(ptr, ptrend, &tempptr)) { errorcode = (*ptr-- == CHAR_COLON)? ERR12 : ERR13; + ptr = tempptr + 2; goto FAILED; } @@ -4125,8 +4160,9 @@ while (ptr < ptrend) /* Validate nesting depth */ if (class_depth_m1 >= ECLASS_NEST_LIMIT - 1) { - errorcode = ERR107; - goto FAILED; /* Classes too deeply nested */ + ptr--; /* Point rightwards at the paren, same as ERR19. */ + errorcode = ERR107; /* Classes too deeply nested */ + goto FAILED; } /* Process the character class start. If the first character is '^', set @@ -4241,7 +4277,8 @@ while (ptr < ptrend) if (c == CHAR_RIGHT_SQUARE_BRACKET && class_depth_m1 != 0) { errorcode = ERR14; - goto FAILED_BACK; + ptr--; /* Correct the offset */ + goto FAILED; } if (c == CHAR_RIGHT_PARENTHESIS && class_depth_m1 < 1) { @@ -4461,7 +4498,6 @@ while (ptr < ptrend) case ESC_R: case ESC_X: errorcode = ERR7; - ptr--; // TODO https://github.com/PCRE2Project/pcre2/issues/549 goto FAILED; case ESC_N: /* Not permitted by Perl either */ @@ -4533,7 +4569,6 @@ while (ptr < ptrend) case ESC_K: case ESC_C: errorcode = ERR7; - ptr--; // TODO https://github.com/PCRE2Project/pcre2/issues/549 goto FAILED; } @@ -4624,7 +4659,7 @@ while (ptr < ptrend) else if (parsed_pattern[-2] > c) /* Check range is in order */ { errorcode = ERR8; - goto FAILED_BACK; // TODO https://github.com/PCRE2Project/pcre2/issues/549 + goto FAILED; } else { @@ -4720,10 +4755,11 @@ while (ptr < ptrend) vn = alasnames; if (!read_name(&ptr, ptrend, utf, 0, &offset, &name, &namelen, &errorcode, cb)) goto FAILED; - if (ptr >= ptrend || *ptr != CHAR_COLON) + if (ptr >= ptrend) goto UNCLOSED_PARENTHESIS; + if (*ptr != CHAR_COLON) { errorcode = ERR95; /* Malformed */ - goto FAILED; + goto FAILED_FORWARD; } /* Scan the table of alpha assertion names */ @@ -5008,7 +5044,6 @@ while (ptr < ptrend) if (!hyphenok) { errorcode = ERR94; - ptr--; /* Correct the offset */ goto FAILED; } optset = &unset; @@ -5082,7 +5117,6 @@ while (ptr < ptrend) default: errorcode = ERR11; - ptr--; /* Correct the offset */ goto FAILED; } } @@ -5151,7 +5185,7 @@ while (ptr < ptrend) if (*ptr != CHAR_EQUALS_SIGN) { errorcode = ERR41; - goto FAILED; + goto FAILED_FORWARD; } if (!read_name(&ptr, ptrend, utf, CHAR_RIGHT_PARENTHESIS, &offset, &name, &namelen, &errorcode, cb)) goto FAILED; @@ -5179,10 +5213,16 @@ while (ptr < ptrend) case because (?- followed by a non-digit is an options setting. */ case CHAR_PLUS: - if (ptrend - ptr < 2 || !IS_DIGIT(ptr[1])) + if (ptr + 1 >= ptrend) + { + ++ptr; + goto UNCLOSED_PARENTHESIS; + } + if (!IS_DIGIT(ptr[1])) { errorcode = ERR29; /* Missing number */ - goto FAILED; + ++ptr; + goto FAILED_FORWARD; } /* Fall through */ @@ -5238,6 +5278,7 @@ while (ptr < ptrend) case CHAR_C: if ((xoptions & PCRE2_EXTRA_NEVER_CALLOUT) != 0) { + ptr++; errorcode = ERR103; goto FAILED; } @@ -5290,7 +5331,7 @@ while (ptr < ptrend) if (delimiter == 0) { errorcode = ERR82; - goto FAILED; + goto FAILED_FORWARD; } *parsed_pattern = META_CALLOUT_STRING; @@ -5434,20 +5475,31 @@ while (ptr < ptrend) references its argument twice. */ if (*ptr != CHAR_EQUALS_SIGN || (ptr++, !IS_DIGIT(*ptr))) - goto BAD_VERSION_CONDITION; + { + errorcode = ERR79; + if (!ge) goto FAILED_FORWARD; + goto FAILED; + } if (!read_number(&ptr, ptrend, -1, 1000, ERR79, &major, &errorcode)) goto FAILED; - if (ptr >= ptrend) goto BAD_VERSION_CONDITION; - if (*ptr == CHAR_DOT) + if (ptr < ptrend && *ptr == CHAR_DOT) { - if (++ptr >= ptrend || !IS_DIGIT(*ptr)) goto BAD_VERSION_CONDITION; - minor = (*ptr++ - CHAR_0) * 10; - if (ptr >= ptrend) goto BAD_VERSION_CONDITION; - if (IS_DIGIT(*ptr)) minor += *ptr++ - CHAR_0; - if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS) - goto BAD_VERSION_CONDITION; + if (++ptr >= ptrend || !IS_DIGIT(*ptr)) + { + errorcode = ERR79; + if (ptr < ptrend) goto FAILED_FORWARD; + goto FAILED; + } + if (!read_number(&ptr, ptrend, -1, 1000, ERR79, &minor, &errorcode)) + goto FAILED; + } + if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS) + { + errorcode = ERR79; + if (ptr < ptrend) goto FAILED_FORWARD; + goto FAILED; } *parsed_pattern++ = META_COND_VERSION; @@ -5481,6 +5533,7 @@ while (ptr < ptrend) terminator = CHAR_RIGHT_PARENTHESIS; ptr--; /* Point to char before name */ } + if (!read_name(&ptr, ptrend, utf, terminator, &offset, &name, &namelen, &errorcode, cb)) goto FAILED; @@ -5798,7 +5851,7 @@ while (ptr < ptrend) if (nest_depth == 0) /* Unmatched closing parenthesis */ { errorcode = ERR22; - goto FAILED_BACK; // TODO https://github.com/PCRE2Project/pcre2/issues/549 + goto FAILED; } nest_depth--; *parsed_pattern++ = META_KET; @@ -5866,12 +5919,18 @@ return errorcode; FAILED_BACK: ptr--; +#ifdef SUPPORT_UNICODE +if (utf) BACKCHAR(ptr); +#endif goto FAILED; -/* This failure happens several times. */ +/* Some errors need to indicate the next character. */ -BAD_VERSION_CONDITION: -errorcode = ERR79; +FAILED_FORWARD: +ptr++; +#ifdef SUPPORT_UNICODE +if (utf) FORWARDCHARTEST(ptr, ptrend); +#endif goto FAILED; } diff --git a/src/pcre2_compile.h b/src/pcre2_compile.h index 51d9e74a4..292c4e791 100644 --- a/src/pcre2_compile.h +++ b/src/pcre2_compile.h @@ -62,7 +62,7 @@ enum { ERR0 = COMPILE_ERROR_BASE, ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90, ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99, ERR100, ERR101, ERR102, ERR103, ERR104, ERR105, ERR106, ERR107, ERR108, ERR109, ERR110, - ERR111, ERR112, ERR113, ERR114, ERR115, ERR116, ERR117, ERR118 }; + ERR111, ERR112, ERR113, ERR114, ERR115, ERR116, ERR117, ERR118, ERR119 }; /* Code values for parsed patterns, which are stored in a vector of 32-bit unsigned ints. Values less than META_END are literal data values. The coding diff --git a/src/pcre2_error.c b/src/pcre2_error.c index f7b9a6765..23f80cf00 100644 --- a/src/pcre2_error.c +++ b/src/pcre2_error.c @@ -95,7 +95,7 @@ static const unsigned char compile_error_texts[] = "a relative value of zero is not allowed\0" "conditional subpattern contains more than two branches\0" "atomic assertion expected after (?( or (?(?C)\0" - "digit expected after (?+ or (?-\0" + "digit expected after (?+\0" /* 30 */ "unknown POSIX class name\0" "internal error in pcre2_study(): should not occur\0" @@ -208,6 +208,7 @@ static const unsigned char compile_error_texts[] = "unexpected character in (?[...]) extended character class\0" "expected capture group number or name\0" "missing opening parenthesis\0" + "syntax error in subpattern number (missing terminator?)\0" ; /* Match-time and UTF error texts are in the same format. */ diff --git a/src/pcre2test.c b/src/pcre2test.c index 1260c9dbe..19f222236 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -3547,7 +3547,7 @@ else } if (left && start > p) fprintf(f, "..."); -fprintf(f, "%.*s", (int)(end - start), start); +for (; start < end; start++) fprintf(f, "%c", CHAR_OUTPUT(*start)); if (!left && end < p + p_len) fprintf(f, "..."); } #endif @@ -4623,15 +4623,16 @@ switch (rc) used when indicating an error in a capture group or lookaround parentheses. It is more user-friendly to identify the capture group by its start. */ - case PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY: case PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP: case PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH: case PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES: case PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED: case PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C: + case PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER: case PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP: case PCRE2_ERROR_LOOKBEHIND_TOO_LONG: case PCRE2_ERROR_MAX_VAR_LOOKBEHIND_EXCEEDED: + case PCRE2_ERROR_ECLASS_NEST_TOO_DEEP: return 2; /* The standard erroroffset should occur just after the affected portion of @@ -4640,123 +4641,80 @@ switch (rc) case PCRE2_ERROR_END_BACKSLASH: case PCRE2_ERROR_END_BACKSLASH_C: - return 1; case PCRE2_ERROR_UNKNOWN_ESCAPE: - return 2; /* I'd like to fix this */ case PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER: case PCRE2_ERROR_QUANTIFIER_TOO_BIG: case PCRE2_ERROR_MISSING_SQUARE_BRACKET: - return 1; case PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS: - return 2; /* I'd like to fix this */ case PCRE2_ERROR_CLASS_RANGE_ORDER: - return 3; /* I'd like to fix this */ case PCRE2_ERROR_QUANTIFIER_INVALID: - return 3; /* I'd like to fix this */ + case PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY: case PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS: - return 2; /* I'd like to fix this */ case PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING: - return 3; /* I'd like to fix this */ case PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS: return 1; case PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE: - return 3; /* I'd like to fix this */ + return 3; /* TODO I'd like to fix this, but some of the cases are _hard_ */ case PCRE2_ERROR_MISSING_COMMENT_CLOSING: - return 1; case PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS: - return 2; /* I'd like to fix this */ case PCRE2_ERROR_MISSING_CONDITION_CLOSING: case PCRE2_ERROR_ZERO_RELATIVE_REFERENCE: - return 1; case PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED: - return 3; /* I'd like to fix this */ case PCRE2_ERROR_BAD_RELATIVE_REFERENCE: - return 2; /* I'd like to fix this */ case PCRE2_ERROR_UNKNOWN_POSIX_CLASS: case PCRE2_ERROR_CODE_POINT_TOO_BIG: - return 1; case PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE: - return 1; /* Inconsistent; \N{...} not placed same as for others */ case PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG: case PCRE2_ERROR_MISSING_CALLOUT_CLOSING: case PCRE2_ERROR_ESCAPE_INVALID_IN_VERB: - return 1; case PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P: - return 2; /* I'd like to fix this */ case PCRE2_ERROR_MISSING_NAME_TERMINATOR: case PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME: - return 1; case PCRE2_ERROR_INVALID_SUBPATTERN_NAME: - return 2; /* I'd like to fix this */ case PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE: case PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY: case PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY: case PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG: case PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS: - return 1; case PCRE2_ERROR_CLASS_INVALID_RANGE: - return 3; /* Inconsistent; I'd like to fix this */ case PCRE2_ERROR_OCTAL_BYTE_TOO_BIG: return 1; case PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES: - return 2; /* Not ideally placed; I'd like to fix this */ + return 2; /* TODO Not ideally placed; I'd like to fix this */ case PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE: case PCRE2_ERROR_BACKSLASH_G_SYNTAX: - return 3; /* For consistency as "braced" items their parse errors should move to the left and indicate to the right*/ case PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING: case PCRE2_ERROR_VERB_UNKNOWN: case PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG: - return 1; case PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED: - return 1; /* For consistency, should move one to the right if there was a non-matching character */ case PCRE2_ERROR_INVALID_OCTAL: - return 2; /* I'd like to fix this */ case PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH: case PCRE2_ERROR_MARK_MISSING_ARGUMENT: - return 1; case PCRE2_ERROR_INVALID_HEXADECIMAL: - return 2; /* I'd like to fix this */ case PCRE2_ERROR_BACKSLASH_C_SYNTAX: case PCRE2_ERROR_BACKSLASH_K_SYNTAX: - return 3; /* For consistency this should move to the right */ case PCRE2_ERROR_BACKSLASH_N_IN_CLASS: case PCRE2_ERROR_CALLOUT_STRING_TOO_LONG: case PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT: return 1; - case PCRE2_ERROR_UTF_IS_DISABLED: - case PCRE2_ERROR_UCP_IS_DISABLED: - return (erroroffset > 0)? 1 : 0; /* Tricksy - can be triggered by options or by (*UTF) */ case PCRE2_ERROR_VERB_NAME_TOO_LONG: case PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG: case PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS: - return 1; case PCRE2_ERROR_VERSION_CONDITION_SYNTAX: - return 3; /* Ideally we'd be more accurate in placing the erroroffset, but it's acceptable. */ - case PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER: - return 2; /* I'd like to fix this */ case PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER: - return 3; /* I'd like to fix this */ case PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED: case PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED: - return 1; case PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE: - return 3; /* Inconsistent placement in the middle of \N{} */ case PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS: - return 3; /* Should be one character to the right */ case PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN: - return 1; /* Inconsistency; I'd move it one to the right */ case PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE: case PCRE2_ERROR_TOO_MANY_CAPTURES: case PCRE2_ERROR_MISSING_OCTAL_DIGIT: return 1; case PCRE2_ERROR_BACKSLASH_K_IN_LOOKAROUND: - return 3; /* No erroroffset implemented yet, sadly */ + return 3; /* TODO No erroroffset implemented yet, sadly */ case PCRE2_ERROR_OVERSIZE_PYTHON_OCTAL: - return 1; case PCRE2_ERROR_CALLOUT_CALLER_DISABLED: - return 1; /* Should be one to the right for consistency */ - case PCRE2_ERROR_ECLASS_NEST_TOO_DEEP: - return 1; /* Should be one to the left and pointing right for consistency */ case PCRE2_ERROR_ECLASS_INVALID_OPERATOR: case PCRE2_ERROR_ECLASS_UNEXPECTED_OPERATOR: case PCRE2_ERROR_ECLASS_EXPECTED_OPERAND: @@ -4768,8 +4726,17 @@ switch (rc) case PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_CHAR: case PCRE2_ERROR_EXPECTED_CAPTURE_GROUP: case PCRE2_ERROR_MISSING_OPENING_PARENTHESIS: + case PCRE2_ERROR_MISSING_NUMBER_TERMINATOR: return 1; + /* These two are a little fiddly. They can be triggered by passed-in options + (when erroroffset is zero), or by text in the pattern "(*UTF)". We only + indicate an pattern error in the latter case. */ + + case PCRE2_ERROR_UTF_IS_DISABLED: + case PCRE2_ERROR_UCP_IS_DISABLED: + return (erroroffset > 0)? 1 : 0; + case PCRE2_ERROR_UTF8_ERR1: case PCRE2_ERROR_UTF8_ERR2: case PCRE2_ERROR_UTF8_ERR3: @@ -6956,7 +6923,7 @@ if (TEST(compiled_code, ==, NULL)) if (direction < 0) { fprintf(outfile, "** Error code %d not implemented in error_direction().\n", errorcode); - fprintf(outfile, " error_direction() is usually return '1' for newly-added errors,\n"); + fprintf(outfile, " error_direction() should usually return '1' for newly-added errors,\n"); fprintf(outfile, " and the offset should be just to the right of the bad character.\n"); return PR_ABEND; } @@ -10660,7 +10627,7 @@ least 128 code units, because it is used for retrieving error messages. */ for (;;) { li = strtol(arg_error, &endptr, 10); - if (S32OVERFLOW(li) || (*endptr != 0 && *endptr != CHAR_COMMA)) + if (S32OVERFLOW(li) || (*endptr != 0 && *endptr != ',')) { fprintf(stderr, "** \"%s\" is not a valid error number list\n", arg_error); yield = 1; diff --git a/testdata/grepoutput b/testdata/grepoutput index f4e899612..13b0c7dbe 100644 --- a/testdata/grepoutput +++ b/testdata/grepoutput @@ -100,7 +100,7 @@ RC=0 ./testdata/grepinputx:To pat or not to pat, that is the question. RC=0 ---------------------------- Test 15 ----------------------------- -pcre2grep: Error in command-line regex at offset 4: quantifier does not follow a repeatable item +pcre2grep: Error in command-line regex at offset 5: quantifier does not follow a repeatable item RC=2 ---------------------------- Test 16 ----------------------------- pcre2grep: Failed to open ./testdata/nonexistfile: No such file or directory @@ -435,7 +435,7 @@ RC=0 597:binary RC=0 ---------------------------- Test 46 ------------------------------ -pcre2grep: Error in 1st command-line regex at offset 8: unmatched closing parenthesis +pcre2grep: Error in 1st command-line regex at offset 9: unmatched closing parenthesis RC=2 pcre2grep: Error in 2nd command-line regex at offset 9: missing closing parenthesis RC=2 @@ -989,7 +989,7 @@ RC=0 ---------------------------- Test 126 ----------------------------- ABCXYZ RC=0 -pcre2grep: Error in regex in line 2 of testtemp1grep at offset 4: unmatched closing parenthesis +pcre2grep: Error in regex in line 2 of testtemp1grep at offset 5: unmatched closing parenthesis RC=2 ---------------------------- Test 127 ----------------------------- pattern diff --git a/testdata/testinput10 b/testdata/testinput10 index 7ec8b88c2..c4ee27646 100644 --- a/testdata/testinput10 +++ b/testdata/testinput10 @@ -502,6 +502,10 @@ /(?(á/utf +/^\cģ/utf + +/(?'٠ABC'...)/utf + # Invalid UTF-8 tests /.../g,match_invalid_utf diff --git a/testdata/testinput12 b/testdata/testinput12 index b0d39bb04..8f8491446 100644 --- a/testdata/testinput12 +++ b/testdata/testinput12 @@ -407,6 +407,10 @@ /(?(á/utf +/^\cģ/utf + +/(?'٠ABC'...)/utf + # Invalid UTF-16/32 tests. /.../g,match_invalid_utf diff --git a/testdata/testinput2 b/testdata/testinput2 index 47d580efd..0cac6a7e4 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4107,6 +4107,8 @@ /(?(VERSION=10.101)yes|no)/ +/(?(VERSION=10z)yes|no)/ + # We should see the starting code unit, required code unit, and minimum length set for this regex: /abcd/I diff --git a/testdata/testinput5 b/testdata/testinput5 index 89ffdf333..7f9f6a2d0 100644 --- a/testdata/testinput5 +++ b/testdata/testinput5 @@ -665,8 +665,6 @@ /\g{A}xxx#bх(?'A'123) (?'A'456)/Bx,newline=any,utf -/^\cģ/utf - /(\R*)(.)/s,utf \r\n \r\r\n\n\r @@ -1657,6 +1655,10 @@ "(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'" +/(?(?C'')\Qé/utf + +/(?(?C'')é/utf + /[\pS#moq]/ = @@ -2068,6 +2070,12 @@ /\N{U}/ +/\N{U+}/utf + +/\N{U+1}/ + +/\N{U+1 }/ + # This tests the non-UTF Unicode NEL pattern whitespace character, only # recognized by PCRE2 with /x when there is Unicode support. @@ -2143,8 +2151,6 @@ /(?'AB၌C'...)\g{AB၌C}/utf -/(?'٠ABC'...)/utf - /(?'²ABC'...)/utf /(?'X²ABC'...)/utf diff --git a/testdata/testoutput10 b/testdata/testoutput10 index be9052b63..487316dd4 100644 --- a/testdata/testoutput10 +++ b/testdata/testoutput10 @@ -1638,6 +1638,14 @@ Failed: error 142 at offset 4: syntax error in subpattern name (missing terminat Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator?) here: (?(á |<--| +/^\cģ/utf +Failed: error 168 at offset 5: \c must be followed by a printable ASCII character + here: ^\cģ |<--| + +/(?'٠ABC'...)/utf +Failed: error 144 at offset 5: subpattern name must start with a non-digit + here: (?'٠ |<--| ABC'...) + # Invalid UTF-8 tests /.../g,match_invalid_utf diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16 index 7228af6c0..7ff32c4d8 100644 --- a/testdata/testoutput12-16 +++ b/testdata/testoutput12-16 @@ -1511,6 +1511,14 @@ Failed: error 142 at offset 4: syntax error in subpattern name (missing terminat Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?) here: (?(á |<--| +/^\cģ/utf +Failed: error 168 at offset 4: \c must be followed by a printable ASCII character + here: ^\cģ |<--| + +/(?'٠ABC'...)/utf +Failed: error 144 at offset 4: subpattern name must start with a non-digit + here: (?'٠ |<--| ABC'...) + # Invalid UTF-16/32 tests. /.../g,match_invalid_utf diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32 index 6dcbe20e3..6d4041e37 100644 --- a/testdata/testoutput12-32 +++ b/testdata/testoutput12-32 @@ -1510,6 +1510,14 @@ Failed: error 142 at offset 4: syntax error in subpattern name (missing terminat Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?) here: (?(á |<--| +/^\cģ/utf +Failed: error 168 at offset 4: \c must be followed by a printable ASCII character + here: ^\cģ |<--| + +/(?'٠ABC'...)/utf +Failed: error 144 at offset 4: subpattern name must start with a non-digit + here: (?'٠ |<--| ABC'...) + # Invalid UTF-16/32 tests. /.../g,match_invalid_utf diff --git a/testdata/testoutput18 b/testdata/testoutput18 index 1dcde94e3..ec628c5a9 100644 --- a/testdata/testoutput18 +++ b/testdata/testoutput18 @@ -74,7 +74,7 @@ No match: POSIX code 17: match failed 0: abc /(abc)\2/ -Failed: POSIX code 15: bad back reference at offset 6 +Failed: POSIX code 15: bad back reference at offset 7 /(abc\1)/ \= Expect no match @@ -162,7 +162,7 @@ Failed: POSIX code 4: ? * + invalid at offset 100000 ** regerror() message truncated /\[A]{1000000}**/expand,regerror_buffsize=32 -Failed: POSIX code 4: ? * + invalid at offset 1000001 +Failed: POSIX code 4: ? * + invalid at offset 1000002 //posix_nosub \=offset=70000 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 966576bb3..8b463757e 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -120,12 +120,12 @@ Subject length lower bound = 3 No match /ab\idef/ -Failed: error 103 at offset 3: unrecognized character follows \ - here: ab\ |-->| idef +Failed: error 103 at offset 4: unrecognized character follows \ + here: ab\i |<--| def /(?X)ab\idef/ -Failed: error 111 at offset 2: unrecognized character after (? or (?- - here: (? |-->| X)ab\idef +Failed: error 111 at offset 3: unrecognized character after (? or (?- + here: (?X |<--| )ab\idef /x{5,4}/ Failed: error 104 at offset 5: numbers out of order in {} quantifier @@ -140,24 +140,24 @@ Failed: error 106 at offset 5: missing terminating ] for character class here: [abcd |<--| /[\B]/B -Failed: error 107 at offset 2: escape sequence is invalid in character class - here: [\ |-->| B] +Failed: error 107 at offset 3: escape sequence is invalid in character class + here: [\B |<--| ] /[\R]/B -Failed: error 107 at offset 2: escape sequence is invalid in character class - here: [\ |-->| R] +Failed: error 107 at offset 3: escape sequence is invalid in character class + here: [\R |<--| ] /[\X]/B -Failed: error 107 at offset 2: escape sequence is invalid in character class - here: [\ |-->| X] +Failed: error 107 at offset 3: escape sequence is invalid in character class + here: [\X |<--| ] /[z-a]/ -Failed: error 108 at offset 3: range out of order in character class - here: [z- |<-->| a] +Failed: error 108 at offset 4: range out of order in character class + here: [z-a |<--| ] /^*/ -Failed: error 109 at offset 1: quantifier does not follow a repeatable item - here: ^ |<-->| * +Failed: error 109 at offset 2: quantifier does not follow a repeatable item + here: ^* |<--| /(abc/ Failed: error 114 at offset 4: missing closing parenthesis @@ -168,8 +168,8 @@ Failed: error 118 at offset 7: missing ) after (?# comment here: (?# abc |<--| /(?z)abc/ -Failed: error 111 at offset 2: unrecognized character after (? or (?- - here: (? |-->| z)abc +Failed: error 111 at offset 3: unrecognized character after (? or (?- + here: (?z |<--| )abc /.*b/I Capture group count = 0 @@ -240,12 +240,12 @@ Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 a b Subject length lower bound = 1 /(ab\2)/ -Failed: error 115 at offset 4: reference to non-existent subpattern - here: (ab\ |<-->| 2) +Failed: error 115 at offset 5: reference to non-existent subpattern + here: (ab\2 |<-->| ) /{4,5}abc/ -Failed: error 109 at offset 4: quantifier does not follow a repeatable item - here: {4,5 |<-->| }abc +Failed: error 109 at offset 5: quantifier does not follow a repeatable item + here: {4,5} |<--| abc /(a)(b)(c)\2/I Capture group count = 3 @@ -341,8 +341,8 @@ No match No match /(a)(b)(c)(d)(e)\6/ -Failed: error 115 at offset 16: reference to non-existent subpattern - here: ...(c)(d)(e)\ |<-->| 6 +Failed: error 115 at offset 17: reference to non-existent subpattern + here: ...c)(d)(e)\6 |<-->| /the quick brown fox/I Capture group count = 0 @@ -366,8 +366,8 @@ Subject length lower bound = 19 No match /ab(?z)cd/ -Failed: error 111 at offset 4: unrecognized character after (? or (?- - here: ab(? |-->| z)cd +Failed: error 111 at offset 5: unrecognized character after (? or (?- + here: ab(?z |<--| )cd /^abc|def/I Capture group count = 0 @@ -393,8 +393,8 @@ Subject length lower bound = 3 3: def /)/ -Failed: error 122 at offset 0: unmatched closing parenthesis - here: |-->| ) +Failed: error 122 at offset 1: unmatched closing parenthesis + here: ) |<--| /a[]b/ Failed: error 106 at offset 4: missing terminating ] for character class @@ -577,16 +577,16 @@ Failed: error 124 at offset 4: missing closing parenthesis for condition here: (?(1 |<--| a)) /(?(?i))/ -Failed: error 128 at offset 2: atomic assertion expected after (?( or (?(?C) - here: (? |<-->| (?i)) +Failed: error 128 at offset 3: atomic assertion expected after (?( or (?(?C) + here: (?( |<--| ?i)) /(?(abc))/ Failed: error 115 at offset 3: reference to non-existent subpattern here: (?( |<-->| abc)) /(?(?| (?| a] +Failed: error 108 at offset 5: range out of order in character class + here: a[b-a |<--| ] /a[]b/ Failed: error 106 at offset 4: missing terminating ] for character class @@ -865,44 +865,44 @@ Failed: error 106 at offset 2: missing terminating ] for character class here: a[ |<--| /*a/ -Failed: error 109 at offset 0: quantifier does not follow a repeatable item - here: |<-->| *a +Failed: error 109 at offset 1: quantifier does not follow a repeatable item + here: * |<--| a /(*)b/ -Failed: error 109 at offset 1: quantifier does not follow a repeatable item - here: ( |<-->| *)b +Failed: error 109 at offset 2: quantifier does not follow a repeatable item + here: (* |<--| )b /abc)/ -Failed: error 122 at offset 3: unmatched closing parenthesis - here: abc |-->| ) +Failed: error 122 at offset 4: unmatched closing parenthesis + here: abc) |<--| /(abc/ Failed: error 114 at offset 4: missing closing parenthesis here: (abc |<--| /a**/ -Failed: error 109 at offset 2: quantifier does not follow a repeatable item - here: a* |<-->| * +Failed: error 109 at offset 3: quantifier does not follow a repeatable item + here: a** |<--| /)(/ -Failed: error 122 at offset 0: unmatched closing parenthesis - here: |-->| )( +Failed: error 122 at offset 1: unmatched closing parenthesis + here: ) |<--| ( /\1/ -Failed: error 115 at offset 1: reference to non-existent subpattern - here: \ |<-->| 1 +Failed: error 115 at offset 2: reference to non-existent subpattern + here: \1 |<-->| /\2/ -Failed: error 115 at offset 1: reference to non-existent subpattern - here: \ |<-->| 2 +Failed: error 115 at offset 2: reference to non-existent subpattern + here: \2 |<-->| /(a)|\2/ -Failed: error 115 at offset 5: reference to non-existent subpattern - here: (a)|\ |<-->| 2 +Failed: error 115 at offset 6: reference to non-existent subpattern + here: (a)|\2 |<-->| /a[b-a]/Ii -Failed: error 108 at offset 4: range out of order in character class - here: a[b- |<-->| a] +Failed: error 108 at offset 5: range out of order in character class + here: a[b-a |<--| ] /a[]b/Ii Failed: error 106 at offset 4: missing terminating ] for character class @@ -913,28 +913,28 @@ Failed: error 106 at offset 2: missing terminating ] for character class here: a[ |<--| /*a/Ii -Failed: error 109 at offset 0: quantifier does not follow a repeatable item - here: |<-->| *a +Failed: error 109 at offset 1: quantifier does not follow a repeatable item + here: * |<--| a /(*)b/Ii -Failed: error 109 at offset 1: quantifier does not follow a repeatable item - here: ( |<-->| *)b +Failed: error 109 at offset 2: quantifier does not follow a repeatable item + here: (* |<--| )b /abc)/Ii -Failed: error 122 at offset 3: unmatched closing parenthesis - here: abc |-->| ) +Failed: error 122 at offset 4: unmatched closing parenthesis + here: abc) |<--| /(abc/Ii Failed: error 114 at offset 4: missing closing parenthesis here: (abc |<--| /a**/Ii -Failed: error 109 at offset 2: quantifier does not follow a repeatable item - here: a* |<-->| * +Failed: error 109 at offset 3: quantifier does not follow a repeatable item + here: a** |<--| /)(/Ii -Failed: error 122 at offset 0: unmatched closing parenthesis - here: |-->| )( +Failed: error 122 at offset 1: unmatched closing parenthesis + here: ) |<--| ( /:(?:/ Failed: error 114 at offset 4: missing closing parenthesis @@ -945,24 +945,24 @@ Failed: error 162 at offset 3: subpattern name expected here: (?< |<--| %)b /a(?{)b/ -Failed: error 111 at offset 3: unrecognized character after (? or (?- - here: a(? |-->| {)b +Failed: error 111 at offset 4: unrecognized character after (? or (?- + here: a(?{ |<--| )b /a(?{{})b/ -Failed: error 111 at offset 3: unrecognized character after (? or (?- - here: a(? |-->| {{})b +Failed: error 111 at offset 4: unrecognized character after (? or (?- + here: a(?{ |<--| {})b /a(?{}})b/ -Failed: error 111 at offset 3: unrecognized character after (? or (?- - here: a(? |-->| {}})b +Failed: error 111 at offset 4: unrecognized character after (? or (?- + here: a(?{ |<--| }})b /a(?{"{"})b/ -Failed: error 111 at offset 3: unrecognized character after (? or (?- - here: a(? |-->| {"{"})b +Failed: error 111 at offset 4: unrecognized character after (? or (?- + here: a(?{ |<--| "{"})b /a(?{"{"}})b/ -Failed: error 111 at offset 3: unrecognized character after (? or (?- - here: a(? |-->| {"{"}})b +Failed: error 111 at offset 4: unrecognized character after (? or (?- + here: a(?{ |<--| "{"}})b /(?(1?)a|b)/ Failed: error 124 at offset 4: missing closing parenthesis for condition @@ -2234,11 +2234,11 @@ Subject length lower bound = 1 /[[.ch.]]/I Failed: error 113 at offset 7: POSIX collating elements are not supported - here: [[.ch.] |<-->| ] + here: [[.ch.] |<--| ] /[[=ch=]]/I Failed: error 113 at offset 7: POSIX collating elements are not supported - here: [[=ch=] |<-->| ] + here: [[=ch=] |<--| ] /[[:rhubarb:]]/I Failed: error 130 at offset 12: unknown POSIX class name @@ -2284,8 +2284,8 @@ No match No match /[\200-\110]/I -Failed: error 108 at offset 9: range out of order in character class - here: [\200-\11 |<-->| 0] +Failed: error 108 at offset 10: range out of order in character class + here: [\200-\110 |<--| ] /^(?(0)f|b)oo/I Failed: error 115 at offset 5: reference to non-existent subpattern @@ -3228,20 +3228,20 @@ Last code unit = 'c' Subject length lower bound = 3 /a+?+/I -Failed: error 109 at offset 3: quantifier does not follow a repeatable item - here: a+? |<-->| + +Failed: error 109 at offset 4: quantifier does not follow a repeatable item + here: a+?+ |<--| /a{2,3}?+b/I -Failed: error 109 at offset 7: quantifier does not follow a repeatable item - here: a{2,3}? |<-->| +b +Failed: error 109 at offset 8: quantifier does not follow a repeatable item + here: a{2,3}?+ |<--| b /(?U)a+?+/I -Failed: error 109 at offset 7: quantifier does not follow a repeatable item - here: (?U)a+? |<-->| + +Failed: error 109 at offset 8: quantifier does not follow a repeatable item + here: (?U)a+?+ |<--| /a{2,3}?+b/I,ungreedy -Failed: error 109 at offset 7: quantifier does not follow a repeatable item - here: a{2,3}? |<-->| +b +Failed: error 109 at offset 8: quantifier does not follow a repeatable item + here: a{2,3}?+ |<--| b /x(?U)a++b/IB ------------------------------------------------------------------ @@ -3339,16 +3339,16 @@ Last code unit = 'b' Subject length lower bound = 3 /[.x.]/I -Failed: error 113 at offset 0: POSIX collating elements are not supported - here: |<-->| [.x.] +Failed: error 113 at offset 5: POSIX collating elements are not supported + here: [.x.] |<--| /[=x=]/I -Failed: error 113 at offset 0: POSIX collating elements are not supported - here: |<-->| [=x=] +Failed: error 113 at offset 5: POSIX collating elements are not supported + here: [=x=] |<--| /[:x:]/I -Failed: error 112 at offset 0: POSIX named classes are supported only within a class - here: |-->| [:x:] +Failed: error 112 at offset 5: POSIX named classes are supported only within a class + here: [:x:] |<--| /\F/I Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u @@ -3363,8 +3363,8 @@ Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, here: \L |<--| /\N{name}/I -Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u - here: \N |<--| {name} +Failed: error 137 at offset 3: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + here: \N{ |<--| name} /\u/I Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u @@ -3379,24 +3379,24 @@ Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, 0: abcd /\N{,}/ -Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u - here: \N |<--| {,} +Failed: error 137 at offset 3: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + here: \N{ |<--| ,} /\N{25,ab}/ -Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u - here: \N |<--| {25,ab} +Failed: error 137 at offset 3: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + here: \N{ |<--| 25,ab} /[\N]/ Failed: error 171 at offset 3: \N is not supported in a class here: [\N |<--| ] /[\N{4}]/ -Failed: error 137 at offset 3: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u - here: [\N |<--| {4}] +Failed: error 137 at offset 4: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + here: [\N{ |<--| 4}] /[\N{name}]/ -Failed: error 137 at offset 3: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u - here: [\N |<--| {name}] +Failed: error 137 at offset 4: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + here: [\N{ |<--| name}] /a{1,3}b/ungreedy ab @@ -3779,8 +3779,8 @@ Failed: error 138 at offset 6: number after (?C is greater than 255 here: (?C256 |<--| )ab /(?Cab)xx/I -Failed: error 182 at offset 3: unrecognized string delimiter follows (?C - here: (?C |<-->| ab)xx +Failed: error 182 at offset 4: unrecognized string delimiter follows (?C + here: (?Ca |<--| b)xx /(?C12vr)x/I Failed: error 139 at offset 5: closing parenthesis for (?C expected @@ -7327,8 +7327,8 @@ Failed: error 142 at offset 15: syntax error in subpattern name (missing termina here: ...c>x)(?'xyz |<--| >y) /(?P'abc'x)(?Py)/I -Failed: error 141 at offset 3: unrecognized character after (?P - here: (?P |-->| 'abc'x)(?P... +Failed: error 141 at offset 4: unrecognized character after (?P + here: (?P' |<--| abc'x)(?P<... /^(?:(?(ZZ)a|b)(?X))+/ bXaX @@ -7845,15 +7845,15 @@ Failed: error 115 at offset 8: reference to non-existent subpattern /^(a)\g/ Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number - here: ^(a)\g |<-->| + here: ^(a)\g |<--| /^(a)\g{0}/ Failed: error 115 at offset 9: reference to non-existent subpattern here: ^(a)\g{0} |<-->| /^(a)\g{3/ -Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number - here: ^(a)\g |<-->| {3 +Failed: error 219 at offset 8: syntax error in subpattern number (missing terminator?) + here: ^(a)\g{3 |<--| /^(a)\g{aa}/ Failed: error 115 at offset 7: reference to non-existent subpattern @@ -8877,12 +8877,12 @@ Failed: error 115 at offset 9: reference to non-existent subpattern here: (?)(?& |<-->| aaaaaaaaaa... /(?+-a)/ -Failed: error 129 at offset 2: digit expected after (?+ or (?- - here: (? |-->| +-a) +Failed: error 129 at offset 4: digit expected after (?+ + here: (?+- |<--| a) /(?-+a)/ -Failed: error 111 at offset 3: unrecognized character after (? or (?- - here: (?- |-->| +a) +Failed: error 111 at offset 4: unrecognized character after (? or (?- + here: (?-+ |<--| a) /(?(-1))/ Failed: error 115 at offset 5: reference to non-existent subpattern @@ -8914,11 +8914,11 @@ Failed: error 162 at offset 3: subpattern name expected /\k/ Failed: error 169 at offset 2: \k is not followed by a braced, angle-bracketed, or quoted name - here: \k |<-->| + here: \k |<--| /\kabc/ Failed: error 169 at offset 2: \k is not followed by a braced, angle-bracketed, or quoted name - here: \k |<-->| abc + here: \k |<--| abc /(?P=)/ Failed: error 162 at offset 4: subpattern name expected @@ -8965,8 +8965,8 @@ Failed: error 130 at offset 9: unknown POSIX class name here: [[:a\dz:] |<--| ] /(^(a|b\g<-1'c))/ -Failed: error 157 at offset 8: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number - here: (^(a|b\g |<-->| <-1'c)) +Failed: error 219 at offset 11: syntax error in subpattern number (missing terminator?) + here: ...^(a|b\g<-1 |<--| 'c)) /^(?+1)(?x|y){0}z/ xzxx @@ -9114,8 +9114,8 @@ No match 0: a /a(*FAIL)+b/ -Failed: error 109 at offset 8: quantifier does not follow a repeatable item - here: a(*FAIL) |<-->| +b +Failed: error 109 at offset 9: quantifier does not follow a repeatable item + here: a(*FAIL)+ |<--| b /(abc|pqr|123){0}[xyz]/I Capture group count = 1 @@ -10741,8 +10741,8 @@ No match #if !ebcdic /^\cģ/ -Failed: error 168 at offset 3: \c must be followed by a printable ASCII character - here: ^\c |<-->| ģ +Failed: error 168 at offset 4: \c must be followed by a printable ASCII character + here: ^\c |<--| #endif @@ -13498,21 +13498,21 @@ Subject length lower bound = 1 # End of special auto-possessive tests /^A\o{1239}B/ -Failed: error 164 at offset 8: non-octal character in \o{} (closing brace missing?) - here: ^A\o{123 |-->| 9}B +Failed: error 164 at offset 9: non-octal character in \o{} (closing brace missing?) + here: ^A\o{1239 |<--| }B A\123B /^A\oB/ Failed: error 155 at offset 4: missing opening brace after \o - here: ^A\o |<-->| B + here: ^A\o |<--| B /^A\x{zz}B/ -Failed: error 167 at offset 5: non-hex character in \x{} (closing brace missing?) - here: ^A\x{ |-->| zz}B +Failed: error 167 at offset 6: non-hex character in \x{} (closing brace missing?) + here: ^A\x{z |<--| z}B /^A\x{12Z/ -Failed: error 167 at offset 7: non-hex character in \x{} (closing brace missing?) - here: ^A\x{12 |-->| Z +Failed: error 167 at offset 8: non-hex character in \x{} (closing brace missing?) + here: ^A\x{12Z |<--| /^A\x{/ Failed: error 178 at offset 5: digits missing after \x or in \x{} or \o{} or \N{U+} @@ -13552,25 +13552,25 @@ Failed: error 178 at offset 5: digits missing after \x or in \x{} or \o{} or \N{ /[a-[:digit:]]+/ Failed: error 150 at offset 12: invalid range in character class - here: ...-[:digit:] |<-->| ]+ + here: ...-[:digit:] |<--| ]+ /[A-[:digit:]]+/ Failed: error 150 at offset 12: invalid range in character class - here: ...-[:digit:] |<-->| ]+ + here: ...-[:digit:] |<--| ]+ /[a-[.xxx.]]+/ Failed: error 150 at offset 10: invalid range in character class - here: [a-[.xxx.] |<-->| ]+ + here: [a-[.xxx.] |<--| ]+ /[a-[=xxx=]]+/ Failed: error 150 at offset 10: invalid range in character class - here: [a-[=xxx=] |<-->| ]+ + here: [a-[=xxx=] |<--| ]+ #if !ebcdic /[a-[!xxx!]]+/ -Failed: error 108 at offset 3: range out of order in character class - here: [a- |<-->| [!xxx!]]+ +Failed: error 108 at offset 4: range out of order in character class + here: [a-[ |<--| !xxx!]]+ /[A-[!xxx!]]+/ A]]] @@ -13580,79 +13580,79 @@ Failed: error 108 at offset 3: range out of order in character class /[a-\d]+/ Failed: error 150 at offset 5: invalid range in character class - here: [a-\d |<-->| ]+ + here: [a-\d |<--| ]+ /(?<0abc>xx)/ -Failed: error 144 at offset 3: subpattern name must start with a non-digit - here: (?< |-->| 0abc>xx) +Failed: error 144 at offset 4: subpattern name must start with a non-digit + here: (?<0 |<--| abc>xx) /(?&1abc)xx(?<1abc>y)/ -Failed: error 144 at offset 3: subpattern name must start with a non-digit - here: (?& |-->| 1abc)xx(?<... +Failed: error 144 at offset 4: subpattern name must start with a non-digit + here: (?&1 |<--| abc)xx(?<1... /(?xx)/ Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator?) here: (?xx) /(?'0abc'xx)/ -Failed: error 144 at offset 3: subpattern name must start with a non-digit - here: (?' |-->| 0abc'xx) +Failed: error 144 at offset 4: subpattern name must start with a non-digit + here: (?'0 |<--| abc'xx) /(?P<0abc>xx)/ -Failed: error 144 at offset 4: subpattern name must start with a non-digit - here: (?P< |-->| 0abc>xx) +Failed: error 144 at offset 5: subpattern name must start with a non-digit + here: (?P<0 |<--| abc>xx) /\k<5ghj>/ -Failed: error 144 at offset 3: subpattern name must start with a non-digit - here: \k< |-->| 5ghj> +Failed: error 144 at offset 4: subpattern name must start with a non-digit + here: \k<5 |<--| ghj> /\k'5ghj'/ -Failed: error 144 at offset 3: subpattern name must start with a non-digit - here: \k' |-->| 5ghj' +Failed: error 144 at offset 4: subpattern name must start with a non-digit + here: \k'5 |<--| ghj' /\k{2fgh}/ -Failed: error 144 at offset 3: subpattern name must start with a non-digit - here: \k{ |-->| 2fgh} +Failed: error 144 at offset 4: subpattern name must start with a non-digit + here: \k{2 |<--| fgh} /(?P=8yuki)/ -Failed: error 144 at offset 4: subpattern name must start with a non-digit - here: (?P= |-->| 8yuki) +Failed: error 144 at offset 5: subpattern name must start with a non-digit + here: (?P=8 |<--| yuki) /\g{4df}/ -Failed: error 157 at offset 2: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number - here: \g |<-->| {4df} +Failed: error 219 at offset 4: syntax error in subpattern number (missing terminator?) + here: \g{4 |<--| df} /(?&1abc)xx(?<1abc>y)/ -Failed: error 144 at offset 3: subpattern name must start with a non-digit - here: (?& |-->| 1abc)xx(?<... +Failed: error 144 at offset 4: subpattern name must start with a non-digit + here: (?&1 |<--| abc)xx(?<1... /(?P>1abc)xx(?<1abc>y)/ -Failed: error 144 at offset 4: subpattern name must start with a non-digit - here: (?P> |-->| 1abc)xx(?<... +Failed: error 144 at offset 5: subpattern name must start with a non-digit + here: (?P>1 |<--| abc)xx(?<1... /\g'3gh'/ -Failed: error 157 at offset 2: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number - here: \g |<-->| '3gh' +Failed: error 219 at offset 4: syntax error in subpattern number (missing terminator?) + here: \g'3 |<--| gh' /\g<5fg>/ -Failed: error 157 at offset 2: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number - here: \g |<-->| <5fg> +Failed: error 219 at offset 4: syntax error in subpattern number (missing terminator?) + here: \g<5 |<--| fg> /(?(<4gh>)abc)/ -Failed: error 144 at offset 4: subpattern name must start with a non-digit - here: (?(< |-->| 4gh>)abc) +Failed: error 144 at offset 5: subpattern name must start with a non-digit + here: (?(<4 |<--| gh>)abc) /(?('4gh')abc)/ -Failed: error 144 at offset 4: subpattern name must start with a non-digit - here: (?(' |-->| 4gh')abc) +Failed: error 144 at offset 5: subpattern name must start with a non-digit + here: (?('4 |<--| gh')abc) /(?(4gh)abc)/ Failed: error 124 at offset 4: missing closing parenthesis for condition here: (?(4 |<--| gh)abc) /(?(R&6yh)abc)/ -Failed: error 144 at offset 5: subpattern name must start with a non-digit - here: (?(R& |-->| 6yh)abc) +Failed: error 144 at offset 6: subpattern name must start with a non-digit + here: (?(R&6 |<--| yh)abc) /(((a\2)|(a*)\g<-1>))*a?/B ------------------------------------------------------------------ @@ -13778,15 +13778,15 @@ Failed: error 133 at offset 0: parentheses are too deeply nested (stack check) /\othing/ Failed: error 155 at offset 2: missing opening brace after \o - here: \o |<-->| thing + here: \o |<--| thing /\o{}/ Failed: error 178 at offset 3: digits missing after \x or in \x{} or \o{} or \N{U+} here: \o{ |<--| } /\o{whatever}/ -Failed: error 164 at offset 3: non-octal character in \o{} (closing brace missing?) - here: \o{ |-->| whatever} +Failed: error 164 at offset 4: non-octal character in \o{} (closing brace missing?) + here: \o{w |<--| hatever} /\xthing/ Failed: error 178 at offset 2: digits missing after \x or in \x{} or \o{} or \N{U+} @@ -13805,16 +13805,16 @@ Failed: error 178 at offset 3: digits missing after \x or in \x{} or \o{} or \N{ here: \x{ |<--| } /\x{whatever}/ -Failed: error 167 at offset 3: non-hex character in \x{} (closing brace missing?) - here: \x{ |-->| whatever} +Failed: error 167 at offset 4: non-hex character in \x{} (closing brace missing?) + here: \x{w |<--| hatever} /A\8B/ -Failed: error 115 at offset 2: reference to non-existent subpattern - here: A\ |<-->| 8B +Failed: error 115 at offset 3: reference to non-existent subpattern + here: A\8 |<-->| B /A\9B/ -Failed: error 115 at offset 2: reference to non-existent subpattern - here: A\ |<-->| 9B +Failed: error 115 at offset 3: reference to non-existent subpattern + here: A\9 |<-->| B # This one is here because Perl fails to match "12" for this pattern when the $ # is present. @@ -13927,20 +13927,22 @@ No match No match /(?(VERSION<10)yes|no)/ -Failed: error 179 at offset 10: syntax error or number too big in (?(VERSION condition - here: (?(VERSION |<-->| <10)yes|no... +Failed: error 179 at offset 11: syntax error or number too big in (?(VERSION condition + here: ...?(VERSION< |<--| 10)yes|no) /(?(VERSION>10)yes|no)/ Failed: error 179 at offset 11: syntax error or number too big in (?(VERSION condition - here: ...?(VERSION> |<-->| 10)yes|no) + here: ...?(VERSION> |<--| 10)yes|no) /(?(VERSION>=10.0.0)yes|no)/ -Failed: error 179 at offset 16: syntax error or number too big in (?(VERSION condition - here: ...SION>=10.0 |<-->| .0)yes|no) +Failed: error 179 at offset 17: syntax error or number too big in (?(VERSION condition + here: ...ION>=10.0. |<--| 0)yes|no) /(?(VERSION=10.101)yes|no)/ -Failed: error 179 at offset 16: syntax error or number too big in (?(VERSION condition - here: ...SION=10.10 |<-->| 1)yes|no) + +/(?(VERSION=10z)yes|no)/ +Failed: error 179 at offset 14: syntax error or number too big in (?(VERSION condition + here: ...ERSION=10z |<--| )yes|no) # We should see the starting code unit, required code unit, and minimum length set for this regex: /abcd/I @@ -14313,17 +14315,17 @@ Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement ZabcZ\=replace=>${1:+ yes : no } 1: Z> yes Z ZabcZ\=replace=>${1:+ \o{Z} : no } -Failed: error -57 at offset 9 in replacement: bad escape sequence in replacement string +Failed: error -57 at offset 10 in replacement: bad escape sequence in replacement string ZabcZ\=replace=>${1:+ yes : \o{Z} } -Failed: error -57 at offset 15 in replacement: bad escape sequence in replacement string +Failed: error -57 at offset 16 in replacement: bad escape sequence in replacement string ZabcZ\=replace=>${1:+ \g<1> : no } 1: Z> b Z ZabcZ\=replace=>${1:+ yes : \g<1> } 1: Z> yes Z ZabcZ\=replace=>${1:+ \g<1 : no } -Failed: error -57 at offset 8 in replacement: bad escape sequence in replacement string +Failed: error -57 at offset 10 in replacement: bad escape sequence in replacement string ZabcZ\=replace=>${1:+ yes : \g<1 } -Failed: error -57 at offset 14 in replacement: bad escape sequence in replacement string +Failed: error -57 at offset 16 in replacement: bad escape sequence in replacement string ZabcZ\=replace=>${1:+ $<1> : no } Failed: error -49 at offset 11 in replacement: unknown substring ZabcZ\=replace=>${1:+ yes : $<1> } @@ -14361,11 +14363,11 @@ Failed: error -35 at offset 4 in replacement: invalid replacement string ZabcZ\=replace=>\g< Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string ZabcZ\=replace=>\g<1 -Failed: error -57 at offset 3 in replacement: bad escape sequence in replacement string +Failed: error -57 at offset 5 in replacement: bad escape sequence in replacement string ZabcZ\=replace=>\g<1Z -Failed: error -57 at offset 3 in replacement: bad escape sequence in replacement string +Failed: error -57 at offset 5 in replacement: bad escape sequence in replacement string ZabcZ\=replace=>\g<1; -Failed: error -57 at offset 3 in replacement: bad escape sequence in replacement string +Failed: error -57 at offset 5 in replacement: bad escape sequence in replacement string "((?=(?(?=(?(?=(?(?=()))))))))" a @@ -14624,8 +14626,8 @@ Matched, but too many substrings # OK in conditions. /(?(?=0)?)+/ -Failed: error 109 at offset 7: quantifier does not follow a repeatable item - here: (?(?=0) |<-->| ?)+ +Failed: error 109 at offset 8: quantifier does not follow a repeatable item + here: (?(?=0)? |<--| )+ /(?(?=0)(?=00)?00765)/ 00765 @@ -14913,8 +14915,8 @@ Failed: error -52: nested recursion at the same subject position # Perl fails to diagnose the absence of an assertion "(?(?.*!.*)?)" -Failed: error 128 at offset 2: atomic assertion expected after (?( or (?(?C) - here: (? |<-->| (?.*!.*... +Failed: error 128 at offset 3: atomic assertion expected after (?( or (?(?C) + here: (?( |<--| ?.*!.*)... "X((?2)()*+){2}+"B ------------------------------------------------------------------ @@ -15138,8 +15140,8 @@ No match 0: /((((((((x))))))))\81/ -Failed: error 115 at offset 19: reference to non-existent subpattern - here: ...))))))))\8 |<-->| 1 +Failed: error 115 at offset 20: reference to non-existent subpattern + here: ...)))))))\81 |<-->| xx1 /((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))\80/ @@ -15162,12 +15164,12 @@ Matched, but too many substrings 14: x /\80/ -Failed: error 115 at offset 2: reference to non-existent subpattern - here: \8 |<-->| 0 +Failed: error 115 at offset 3: reference to non-existent subpattern + here: \80 |<-->| /A\8B\9C/ -Failed: error 115 at offset 2: reference to non-existent subpattern - here: A\ |<-->| 8B\9C +Failed: error 115 at offset 3: reference to non-existent subpattern + here: A\8 |<-->| B\9C A8B9C /(?x:((?'a')) # comment (with parentheses) and | vertical @@ -15246,12 +15248,12 @@ Subject length lower bound = 1 ------------------------------------------------------------------ /(\9*+(?2);\3++()2|)++{/ -Failed: error 115 at offset 2: reference to non-existent subpattern - here: (\ |<-->| 9*+(?2);\3... +Failed: error 115 at offset 3: reference to non-existent subpattern + here: (\9 |<-->| *+(?2);\3+... /\V\x85\9*+((?2)\3++()2)*:2/ -Failed: error 115 at offset 7: reference to non-existent subpattern - here: \V\x85\ |<-->| 9*+((?2)\3... +Failed: error 115 at offset 8: reference to non-existent subpattern + here: \V\x85\9 |<-->| *+((?2)\3+... /(((?(R)){0,2}) (?'x'((?'R')((?'R')))))/dupnames @@ -15517,8 +15519,8 @@ Failed: error 140 at offset 10: invalid escape sequence in (*VERB) name here: (*MARK:a\z |<--| b)z /(*:ab\t(d\)c)xxx/ -Failed: error 122 at offset 12: unmatched closing parenthesis - here: ...:ab\t(d\)c |-->| )xxx +Failed: error 122 at offset 13: unmatched closing parenthesis + here: ...ab\t(d\)c) |<--| xxx /(*:ab\t(d\)c)xxx/alt_verbnames,mark cxxxz @@ -16266,8 +16268,8 @@ MK: A\x00b ** Unexpected non-hex-digit 'g' at offset 3 in hex pattern: quote missing? /(?J)(?'a'))(?'a')/ -Failed: error 122 at offset 10: unmatched closing parenthesis - here: (?J)(?'a') |-->| )(?'a') +Failed: error 122 at offset 11: unmatched closing parenthesis + here: ...?J)(?'a')) |<--| (?'a') /(?<=((?C)0))/ 9010 @@ -16301,8 +16303,8 @@ Failed: error 160 at offset 3: (*VERB) not recognized or malformed here: (*U |<--| /'(*'/hex -Failed: error 109 at offset 1: quantifier does not follow a repeatable item - here: ( |<-->| * +Failed: error 109 at offset 2: quantifier does not follow a repeatable item + here: (* |<--| /'('/hex Failed: error 114 at offset 1: missing closing parenthesis @@ -16384,17 +16386,17 @@ Failed: error 125 at offset 13: length of lookbehind assertion is not limited /[a-[:digit:]]+/ Failed: error 150 at offset 12: invalid range in character class - here: ...-[:digit:] |<-->| ]+ + here: ...-[:digit:] |<--| ]+ a-a9-a /[A-[:digit:]]+/ Failed: error 150 at offset 12: invalid range in character class - here: ...-[:digit:] |<-->| ]+ + here: ...-[:digit:] |<--| ]+ A-A9-A /[a-\d]+/ Failed: error 150 at offset 5: invalid range in character class - here: [a-\d |<-->| ]+ + here: [a-\d |<--| ]+ a-a9-a /(?abc)(?(R)xyz)/B @@ -16441,12 +16443,12 @@ Failed: error 125 at offset 2: length of lookbehind assertion is not limited /{2,2{2,2/use_length /.>*?\g'0/use_length -Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number - here: .>*?\g |<-->| '0 +Failed: error 219 at offset 8: syntax error in subpattern number (missing terminator?) + here: .>*?\g'0 |<--| /.>*?\g'0/ -Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number - here: .>*?\g |<-->| '0 +Failed: error 219 at offset 8: syntax error in subpattern number (missing terminator?) + here: .>*?\g'0 |<--| /{̈́̈́{'{22{2{{2{'{22{{22{2{'{22{2{{2{{222{{2{'{22{2{22{2{'{22{2{{2{'{22{2{22{2{'{'{22{2{22{2{'{22{2{{2{'{22{2{22{2{'{222{2Ą̈́̈́{'{22{2{{2{'{22{{11{2{'{22{2{{2{{'{22{2{{2{'{22{{22{1{'{22{2{{2{{222{{2{'{22{2{22{2{'{/auto_callout @@ -16458,17 +16460,17 @@ Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed, "(?(?C))" Failed: error 128 at offset 6: atomic assertion expected after (?( or (?(?C) - here: (?(?C) |<-->| ) + here: (?(?C) |<--| ) /(?(?(?(?(?(?))))))/ -Failed: error 128 at offset 2: atomic assertion expected after (?( or (?(?C) - here: (? |<-->| (?(?(?(?(?... +Failed: error 128 at offset 3: atomic assertion expected after (?( or (?(?C) + here: (?( |<--| ?(?(?(?(?)... /(?<=(?1))((?s))/anchored /(*:ab)*/ -Failed: error 109 at offset 6: quantifier does not follow a repeatable item - here: (*:ab) |<-->| * +Failed: error 109 at offset 7: quantifier does not follow a repeatable item + here: (*:ab)* |<--| %(*:(:(svvvvvvvvvv:]*[ Z!*;[]*[^[]*!^[+.+{{2,7}' _\\\\\\\\\\\\\)?.:.. *w////\\\Q\\\\\\\\\\\\\\\T\\\\\+/?/////'+\\\EEE?/////'+/*+/[^K]?]//(w)%never_backslash_c,alt_verbnames,auto_callout @@ -16530,16 +16532,16 @@ Subject length lower bound = 1 /.+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X/ Failed: error 128 at offset 63: atomic assertion expected after (?( or (?(?C) - here: ...XXXXXXXX') |<-->| )?!XXXX.=X + here: ...XXXXXXXX') |<--| )?!XXXX.=X .+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X /[:[:alnum:]-[[a:lnum:]+/ Failed: error 150 at offset 12: invalid range in character class - here: ...[:alnum:]- |<-->| [[a:lnum:]... + here: ...[:alnum:]- |<--| [[a:lnum:]... /((?(?C'')\QX\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ -Failed: error 128 at offset 11: atomic assertion expected after (?( or (?(?C) - here: ...(?(?C'')\Q |<-->| X\E(?!((?(... +Failed: error 128 at offset 9: atomic assertion expected after (?( or (?(?C) + here: ((?(?C'') |<--| \QX\E(?!((... /((?(?C'')\Q\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/ @@ -16555,8 +16557,8 @@ Failed: error 161 at offset 11: subpattern number is too big here: ...)(\g+65534 |<--| ) /()(\g+65533)/ -Failed: error 115 at offset 10: reference to non-existent subpattern - here: ()(\g+6553 |<-->| 3) +Failed: error 115 at offset 11: reference to non-existent subpattern + here: ...)(\g+65533 |<-->| ) /\x00\x00\x00(\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\2*\x00k\d+\x00‎\x00\x00\x00\x00\x00\2*\x00\x00\1*.){36}int^\x00\x00\x00(\1{50779}?)J\w2/I Capture group count = 2 @@ -16668,8 +16670,8 @@ No match /(?| {3 +Failed: error 219 at offset 4: syntax error in subpattern number (missing terminator?) + here: \g{3 |<--| /(a(?C1)(b)(c)d)+/ abcdabcd\=callout_capture @@ -17182,27 +17184,27 @@ Failed: error 171 at offset 5: \N is not supported in a class /[\s-_]/bad_escape_is_literal Failed: error 150 at offset 4: invalid range in character class - here: [\s- |<-->| _] + here: [\s- |<--| _] /[_-\s]/bad_escape_is_literal Failed: error 150 at offset 5: invalid range in character class - here: [_-\s |<-->| ] + here: [_-\s |<--| ] /[\B\R\X]/B -Failed: error 107 at offset 2: escape sequence is invalid in character class - here: [\ |-->| B\R\X] +Failed: error 107 at offset 3: escape sequence is invalid in character class + here: [\B |<--| \R\X] /[\B\R\X]/B,bad_escape_is_literal -Failed: error 107 at offset 2: escape sequence is invalid in character class - here: [\ |-->| B\R\X] +Failed: error 107 at offset 3: escape sequence is invalid in character class + here: [\B |<--| \R\X] /[A-\BP-\RV-\X]/B -Failed: error 107 at offset 4: escape sequence is invalid in character class - here: [A-\ |-->| BP-\RV-\X] +Failed: error 107 at offset 5: escape sequence is invalid in character class + here: [A-\B |<--| P-\RV-\X] /[A-\BP-\RV-\X]/B,bad_escape_is_literal -Failed: error 107 at offset 4: escape sequence is invalid in character class - here: [A-\ |-->| BP-\RV-\X] +Failed: error 107 at offset 5: escape sequence is invalid in character class + here: [A-\B |<--| P-\RV-\X] # ---------------------------------------------------------------------- @@ -17388,23 +17390,23 @@ No match /[[:digit:]-a]/ Failed: error 150 at offset 11: invalid range in character class - here: ...[:digit:]- |<-->| a] + here: ...[:digit:]- |<--| a] /[[:digit:]-[:print:]]/ Failed: error 150 at offset 11: invalid range in character class - here: ...[:digit:]- |<-->| [:print:]] + here: ...[:digit:]- |<--| [:print:]] /[\d-a]/ Failed: error 150 at offset 4: invalid range in character class - here: [\d- |<-->| a] + here: [\d- |<--| a] /[\H-z]/ Failed: error 150 at offset 4: invalid range in character class - here: [\H- |<-->| z] + here: [\H- |<--| z] /[\d-[:print:]]/ Failed: error 150 at offset 4: invalid range in character class - here: [\d- |<-->| [:print:]] + here: [\d- |<--| [:print:]] # Perl gets the second of these wrong, giving no match. @@ -17655,16 +17657,16 @@ No match, mark = X No match, mark = X /(?^x-i)AB/ -Failed: error 194 at offset 4: invalid hyphen in option setting - here: (?^x |<-->| -i)AB +Failed: error 194 at offset 5: invalid hyphen in option setting + here: (?^x- |<--| i)AB /(?^-i)AB/ -Failed: error 194 at offset 3: invalid hyphen in option setting - here: (?^ |<-->| -i)AB +Failed: error 194 at offset 4: invalid hyphen in option setting + here: (?^- |<--| i)AB /(?x-i-i)/ -Failed: error 194 at offset 5: invalid hyphen in option setting - here: (?x-i |<-->| -i) +Failed: error 194 at offset 6: invalid hyphen in option setting + here: (?x-i- |<--| i) /(?(?=^))b/I Capture group count = 0 @@ -17979,16 +17981,16 @@ No match No match /(?(*ACCEPT)xxx)/ -Failed: error 128 at offset 2: atomic assertion expected after (?( or (?(?C) - here: (? |<-->| (*ACCEPT)x... +Failed: error 128 at offset 3: atomic assertion expected after (?( or (?(?C) + here: (?( |<--| *ACCEPT)xx... /(?(*atomic:xx)xxx)/ Failed: error 128 at offset 10: atomic assertion expected after (?( or (?(?C) - here: (?(*atomic |<-->| :xx)xxx) + here: (?(*atomic |<--| :xx)xxx) /(?(*script_run:xxx)zzz)/ Failed: error 128 at offset 14: atomic assertion expected after (?( or (?(?C) - here: ...script_run |<-->| :xxx)zzz) + here: ...script_run |<--| :xxx)zzz) /foobar/ the foobar thing\=copy_matched_subject @@ -18060,12 +18062,12 @@ Failed: error 197 at offset 131075: too many capturing groups (maximum 65535) MK: XX /(*:\)?/ -Failed: error 109 at offset 5: quantifier does not follow a repeatable item - here: (*:\) |<-->| ? +Failed: error 109 at offset 6: quantifier does not follow a repeatable item + here: (*:\)? |<--| /(*:\Q \E){5}/alt_verbnames -Failed: error 109 at offset 11: quantifier does not follow a repeatable item - here: ...*:\Q \E){5 |<-->| } +Failed: error 109 at offset 12: quantifier does not follow a repeatable item + here: ...:\Q \E){5} |<--| /(?=abc)/I Capture group count = 0 @@ -18214,17 +18216,17 @@ Failed: error 114 at offset 6: missing closing parenthesis here: (*pla: |<--| /(*pla/ -Failed: error 195 at offset 5: (*alpha_assertion) not recognized +Failed: error 114 at offset 5: missing closing parenthesis here: (*pla |<--| /(*pla}abc)/ -Failed: error 195 at offset 5: (*alpha_assertion) not recognized - here: (*pla |<--| }abc) +Failed: error 195 at offset 6: (*alpha_assertion) not recognized + here: (*pla} |<--| abc) # Expect error: not allowed as a condition /(?(*napla:xx)bc)/ Failed: error 128 at offset 9: atomic assertion expected after (?( or (?(?C) - here: (?(*napla |<-->| :xx)bc) + here: (?(*napla |<--| :xx)bc) /\A(*pla:.*\b(\w++))(?>.*?\b\1\b){3}/ word1 word3 word1 word2 word3 word2 word2 word1 word3 word4 @@ -18789,7 +18791,7 @@ MK: >\x00< /(?(VERSION=0.0/ Failed: error 179 at offset 14: syntax error or number too big in (?(VERSION condition - here: ...ERSION=0.0 |<-->| + here: ...ERSION=0.0 |<--| # Perl has made \K in lookarounds an error. PCRE2 now rejects as well, unless # explicitly authorized. @@ -18975,8 +18977,8 @@ Failed: error 187 at offset 0: lookbehind assertion is too long here: |-->| (?| } +Failed: error 109 at offset 6: quantifier does not follow a repeatable item + here: A+{,3} |<--| /(\g{+1}Z|(A))+/ BAAZCD @@ -19312,8 +19314,8 @@ Failed: error 115 at offset 19: reference to non-existent subpattern here: ...bstring:(< |<-->| name>)a|b) /()(*scs:(1)+a)/ -Failed: error 109 at offset 11: quantifier does not follow a repeatable item - here: ...)(*scs:(1) |<-->| +a) +Failed: error 109 at offset 12: quantifier does not follow a repeatable item + here: ...(*scs:(1)+ |<--| a) /()(*scs:(1,1,1,1,1,1,1,1,2))/ Failed: error 115 at offset 25: reference to non-existent subpattern @@ -19577,7 +19579,7 @@ Failed: error 115 at offset 26: reference to non-existent subpattern /(\w++)=(?(*scs:(1)(abc))pqr|xyz)(\w++)/ Failed: error 128 at offset 14: atomic assertion expected after (?( or (?(?C) - here: ...+)=(?(*scs |<-->| :(1)(abc))... + here: ...+)=(?(*scs |<--| :(1)(abc))... # Tests for scan_substring @@ -20392,8 +20394,8 @@ Failed: error -57 at offset 3 in replacement: bad escape sequence in replacement 0: \x00-\x00-\x01-\x0a-\x0a3-S-S4 /\1/ -Failed: error 115 at offset 1: reference to non-existent subpattern - here: \ |<-->| 1 +Failed: error 115 at offset 2: reference to non-existent subpattern + here: \1 |<-->| /\12/ \o{12} @@ -20412,12 +20414,12 @@ Failed: error -49 at offset 2 in replacement: unknown substring 0: \x00-\x00-\x01-\x0a-\x0a3-S-S4 /\1/python_octal -Failed: error 115 at offset 1: reference to non-existent subpattern - here: \ |<-->| 1 +Failed: error 115 at offset 2: reference to non-existent subpattern + here: \1 |<-->| /\12/python_octal -Failed: error 115 at offset 2: reference to non-existent subpattern - here: \1 |<-->| 2 +Failed: error 115 at offset 3: reference to non-existent subpattern + here: \12 |<-->| /abc/substitute_extended,python_octal abc\=replace=\0-\00-\01-\012-\0123-\123-\1234 @@ -20440,8 +20442,8 @@ Failed: error -49 at offset 3 in replacement: unknown substring 0: ab /a(?C)b/never_callout -Failed: error 203 at offset 3: using callouts is disabled by the application - here: a(? |<--| C)b +Failed: error 203 at offset 4: using callouts is disabled by the application + here: a(?C |<--| )b # -------------- @@ -21361,11 +21363,11 @@ No match /[\d-z]/B,alt_extended_class Failed: error 150 at offset 4: invalid range in character class - here: [\d- |<-->| z] + here: [\d- |<--| z] /[z-\d]/B,alt_extended_class Failed: error 150 at offset 5: invalid range in character class - here: [z-\d |<-->| ] + here: [z-\d |<--| ] /[abc -- b]+/B,alt_extended_class ------------------------------------------------------------------ @@ -21385,12 +21387,12 @@ Failed: error 150 at offset 5: invalid range in character class No match /[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[z]&&a]]]]]]]]]]]]]]]/alt_extended_class -Failed: error 207 at offset 115: extended character class nesting is too deep - here: ...a[b]&&a[a[ |<--| z]&&a]]]]]... +Failed: error 207 at offset 114: extended character class nesting is too deep + here: ...[a[b]&&a[a |-->| [z]&&a]]]]... /[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a[b]&&a[a&&a[z]]]]]]]]]]]]]]]]/alt_extended_class -Failed: error 207 at offset 118: extended character class nesting is too deep - here: ...]&&a[a&&a[ |<--| z]]]]]]]]]... +Failed: error 207 at offset 117: extended character class nesting is too deep + here: ...b]&&a[a&&a |-->| [z]]]]]]]]... /[z&/alt_extended_class Failed: error 106 at offset 3: missing terminating ] for character class @@ -21434,16 +21436,16 @@ No match # bad-escape-is-literal does nothing inside (?[...]) /[ \j ]/ -Failed: error 103 at offset 3: unrecognized character follows \ - here: [ \ |-->| j ] +Failed: error 103 at offset 4: unrecognized character follows \ + here: [ \j |<--| ] /[ /\ Failed: error 101 at offset 3: \ at end of pattern here: [ \ |<--| /(?[ \j ])/ -Failed: error 103 at offset 5: unrecognized character follows \ - here: (?[ \ |-->| j ]) +Failed: error 103 at offset 6: unrecognized character follows \ + here: (?[ \j |<--| ]) /(?[ /\ Failed: error 101 at offset 5: \ at end of pattern @@ -21461,20 +21463,20 @@ Failed: error 106 at offset 3: missing terminating ] for character class here: [ \ |<--| /(?[ \j ])/bad_escape_is_literal -Failed: error 103 at offset 5: unrecognized character follows \ - here: (?[ \ |-->| j ]) +Failed: error 103 at offset 6: unrecognized character follows \ + here: (?[ \j |<--| ]) /(?[ /\bad_escape_is_literal Failed: error 101 at offset 5: \ at end of pattern here: (?[ \ |<--| /(?[ [\j] ])/bad_escape_is_literal -Failed: error 103 at offset 6: unrecognized character follows \ - here: (?[ [\ |-->| j] ]) +Failed: error 103 at offset 7: unrecognized character follows \ + here: (?[ [\j |<--| ] ]) /(?[ (\j) ])/bad_escape_is_literal -Failed: error 103 at offset 6: unrecognized character follows \ - here: (?[ (\ |-->| j) ]) +Failed: error 103 at offset 7: unrecognized character follows \ + here: (?[ (\j |<--| ) ]) # We can't test error cases in testinput1 @@ -21544,7 +21546,7 @@ Failed: error 114 at offset 6: missing closing parenthesis /(?[\n)])/ Failed: error 122 at offset 6: unmatched closing parenthesis - here: (?[\n) |-->| ]) + here: (?[\n) |<--| ]) /(?[^\n])/ Failed: error 209 at offset 4: unexpected operator in extended character class (no preceding operand) @@ -21618,54 +21620,54 @@ No match No match /(?[\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+([\n]&\n))))))))))))))])/ -Failed: error 207 at offset 158: extended character class nesting is too deep - here: ...+[a]&\n+([ |<--| \n]&\n))))... +Failed: error 207 at offset 157: extended character class nesting is too deep + here: ...n+[a]&\n+( |-->| [\n]&\n)))... /(?[\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n&[\n]))))))))))))))])/ -Failed: error 207 at offset 161: extended character class nesting is too deep - here: ...]&\n+(\n&[ |<--| \n])))))))... +Failed: error 207 at offset 160: extended character class nesting is too deep + here: ...a]&\n+(\n& |-->| [\n]))))))... /(?[\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+(\n+[a]&\n+((\n)&\n))))))))))))))])/ -Failed: error 207 at offset 158: extended character class nesting is too deep - here: ...+[a]&\n+(( |<--| \n)&\n))))... +Failed: error 207 at offset 157: extended character class nesting is too deep + here: ...n+[a]&\n+( |-->| (\n)&\n)))... # -------------- /[[:digit:] -Z]/xx Failed: error 150 at offset 14: invalid range in character class - here: ...igit:] - |<-->| Z] + here: ...igit:] - |<--| Z] /[\d -Z]/xx Failed: error 150 at offset 7: invalid range in character class - here: [\d - |<-->| Z] + here: [\d - |<--| Z] /[[:digit:]\E-H]/ Failed: error 150 at offset 13: invalid range in character class - here: ...digit:]\E- |<-->| H] + here: ...digit:]\E- |<--| H] /[[:digit:]\Q\E-H]+/ Failed: error 150 at offset 15: invalid range in character class - here: ...git:]\Q\E- |<-->| H]+ + here: ...git:]\Q\E- |<--| H]+ /[z-[:space:]]/ Failed: error 150 at offset 12: invalid range in character class - here: ...-[:space:] |<-->| ] + here: ...-[:space:] |<--| ] /[z-\d]/ Failed: error 150 at offset 5: invalid range in character class - here: [z-\d |<-->| ] + here: [z-\d |<--| ] /[[:space:]-z]/ Failed: error 150 at offset 11: invalid range in character class - here: ...[:space:]- |<-->| z] + here: ...[:space:]- |<--| z] /[\d-z]/ Failed: error 150 at offset 4: invalid range in character class - here: [\d- |<-->| z] + here: [\d- |<--| z] /[\d-\w]/ Failed: error 150 at offset 4: invalid range in character class - here: [\d- |<-->| \w] + here: [\d- |<--| \w] /[\Q/ Failed: error 106 at offset 3: missing terminating ] for character class @@ -21823,24 +21825,24 @@ Failed: error 106 at offset 3: missing terminating ] for character class #endif /[\A]/ -Failed: error 107 at offset 2: escape sequence is invalid in character class - here: [\ |-->| A] +Failed: error 107 at offset 3: escape sequence is invalid in character class + here: [\A |<--| ] /[\Z]/ -Failed: error 107 at offset 2: escape sequence is invalid in character class - here: [\ |-->| Z] +Failed: error 107 at offset 3: escape sequence is invalid in character class + here: [\Z |<--| ] /[\z]/ -Failed: error 107 at offset 2: escape sequence is invalid in character class - here: [\ |-->| z] +Failed: error 107 at offset 3: escape sequence is invalid in character class + here: [\z |<--| ] /[\G]/ -Failed: error 107 at offset 2: escape sequence is invalid in character class - here: [\ |-->| G] +Failed: error 107 at offset 3: escape sequence is invalid in character class + here: [\G |<--| ] /[\K]/ -Failed: error 107 at offset 2: escape sequence is invalid in character class - here: [\ |-->| K] +Failed: error 107 at offset 3: escape sequence is invalid in character class + here: [\K |<--| ] /[\g<1>]/ < diff --git a/testdata/testoutput21 b/testdata/testoutput21 index acae70b23..f17a9c035 100644 --- a/testdata/testoutput21 +++ b/testdata/testoutput21 @@ -93,7 +93,7 @@ Subject length lower bound = 5 0: X /[\C]/ -Failed: error 107 at offset 2: escape sequence is invalid in character class - here: [\ |-->| C] +Failed: error 107 at offset 3: escape sequence is invalid in character class + here: [\C |<--| ] # End of testinput21 diff --git a/testdata/testoutput23 b/testdata/testoutput23 index 808ddc90f..eea97ea2e 100644 --- a/testdata/testoutput23 +++ b/testdata/testoutput23 @@ -7,7 +7,7 @@ Failed: error 185 at offset 3: using \C is disabled in this PCRE2 library here: a\C |<--| b /a[\C]b/ -Failed: error 107 at offset 3: escape sequence is invalid in character class - here: a[\ |-->| C]b +Failed: error 107 at offset 4: escape sequence is invalid in character class + here: a[\C |<--| ]b # End of testinput23 diff --git a/testdata/testoutput28 b/testdata/testoutput28 index 6652dc39c..60a82f9ba 100644 --- a/testdata/testoutput28 +++ b/testdata/testoutput28 @@ -212,6 +212,6 @@ No match /\c&/ Failed: error 168 at offset 3: \c must be followed by a letter or one of @[\]^_? - here: \c& |<-->| + here: \c& |<--| # End diff --git a/testdata/testoutput5 b/testdata/testoutput5 index 9dcb3643c..115679baa 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -305,8 +305,8 @@ Last code unit = 'b' Subject length lower bound = 2 /[\x{200}-\x{100}]/utf -Failed: error 108 at offset 15: range out of order in character class - here: ...00}-\x{100 |<-->| }] +Failed: error 108 at offset 16: range out of order in character class + here: ...0}-\x{100} |<--| ] /[Ā-Ą]/utf \x{100} @@ -1416,10 +1416,6 @@ Partial match: X End ------------------------------------------------------------------ -/^\cģ/utf -Failed: error 168 at offset 3: \c must be followed by a printable ASCII character - here: ^\c |<-->| ģ - /(\R*)(.)/s,utf \r\n 0: \x{0d} @@ -4050,8 +4046,8 @@ global repeat returned the same match as previous 0+ "\xa\xf<(.\pZ*\P{Xwd}+^\xa8\3'3yq.::?(?J:()\xd1+!~:3'(8?:)':(?'d'(?'d'^u]!.+.+\\A\Ah(n+?9){7}+\K;(?'X'u'(?'c'(?'z'(?\xb::\xf0'|\xd3(\xae?'w(z\x8?P>l)\x8?P>a)'\H\R\xd1+!!~:3'(?:h$N{26875}\W+?\\=D{2}\x89(?i:Uy0\N({2\xa(\v\x85*){y*\A(()\p{L}+?\P{^Xan}'+?\xff\+pS\?|).{;y*\A(()\p{L}+?\8}\d?1(|)(/1){7}.+[Lp{Me}].\s\xdcC*?(?())(?))(?\g{d});\g{x}\x11\g{d}\x81\|$((?'X'\'X'(?'W''\x92()'9'\x83*))\xba*\!?^ <){)':;\xcc4'\xd1'(?'X'28))?-%--\x95$9*\4'|\xd1((''e\x94*$9:)*#(?'R')3)\x7?('P\xed')\\x16:;()\x1e\x10*:(?)\xd1+0!~:(?)'d'E:yD!\s(?'R'\x1e;\x10:U))|'\x9g!\xb0*){)\\x16:;()\x1e\x10\x87*:(?)\xd1+!~:(?)'}'\d'E:yD!\s(?'R'\x1e;\x10:U))|'))|)g!\xb0*R+9{29+)#(?'P'})*?pS\{3,}\x85,{0,}l{*UTF)(\xe{7}){3722,{9,}d{2,?|))|{)\(A?&d}}{\xa,}2}){3,}7,l{)22}(,}l:7{2,4}}29\x19+)#?'P'})*v?))\x5" -Failed: error 122 at offset 1227: unmatched closing parenthesis - here: ...#?'P'})*v? |-->| ))\x5 +Failed: error 122 at offset 1228: unmatched closing parenthesis + here: ...?'P'})*v?) |<--| )\x5 /$(&.+[\p{Me}].\s\xdcC*?(?())(?)\xd1+!~:(?)''(d'E:yD!\s(?'R'\x1e;\x10:U))|')g!\xb0*){29+))#(?'P'})*?/ @@ -4059,6 +4055,14 @@ Failed: error 122 at offset 1227: unmatched closing parenthesis Failed: error 162 at offset 113: subpattern name expected here: ...})(?!^)(?' |<--| +/(?(?C'')\Qé/utf +Failed: error 128 at offset 8: atomic assertion expected after (?( or (?(?C) + here: (?(?C'') |<--| \Qé + +/(?(?C'')é/utf +Failed: error 128 at offset 8: atomic assertion expected after (?( or (?(?C) + here: (?(?C'') |<--| é + /[\pS#moq]/ = 0: = @@ -4791,16 +4795,28 @@ Callout 0: last capture = 1 0: \x{1d1aa} /\N{U+}/ -Failed: error 193 at offset 2: \N{U+dddd} is supported only in Unicode (UTF) mode - here: \N |<-->| {U+} +Failed: error 193 at offset 6: \N{U+dddd} is supported only in Unicode (UTF) mode + here: \N{U+} |<--| /\N{U+}/utf Failed: error 178 at offset 5: digits missing after \x or in \x{} or \o{} or \N{U+} here: \N{U+ |<--| } /\N{U}/ -Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u - here: \N |<--| {U} +Failed: error 137 at offset 3: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u + here: \N{ |<--| U} + +/\N{U+}/utf +Failed: error 178 at offset 5: digits missing after \x or in \x{} or \o{} or \N{U+} + here: \N{U+ |<--| } + +/\N{U+1}/ +Failed: error 193 at offset 7: \N{U+dddd} is supported only in Unicode (UTF) mode + here: \N{U+1} |<--| + +/\N{U+1 }/ +Failed: error 193 at offset 8: \N{U+dddd} is supported only in Unicode (UTF) mode + here: \N{U+1 } |<--| # This tests the non-UTF Unicode NEL pattern whitespace character, only # recognized by PCRE2 with /x when there is Unicode support. @@ -4963,10 +4979,6 @@ Subject length lower bound = 3 Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator?) here: (?'AB |<--| ၌C'...)\g{... -/(?'٠ABC'...)/utf -Failed: error 144 at offset 3: subpattern name must start with a non-digit - here: (?' |-->| ٠ABC'...) - /(?'²ABC'...)/utf Failed: error 162 at offset 3: subpattern name expected here: (?' |<--| ²ABC'...) @@ -6297,8 +6309,8 @@ No match # Freeing memory on error test /[\x{100}-\x{400}][\x{100}-\x{300}][\x{100}-\x{200}]\8/i,utf -Failed: error 115 at offset 52: reference to non-existent subpattern - here: ...-\x{200}]\ |<-->| 8 +Failed: error 115 at offset 53: reference to non-existent subpattern + here: ...\x{200}]\8 |<-->| # Character list tests @@ -6871,19 +6883,19 @@ No match /[\p{Lu}-z]/B,alt_extended_class Failed: error 150 at offset 8: invalid range in character class - here: [\p{Lu}- |<-->| z] + here: [\p{Lu}- |<--| z] /[z-\p{Lu}]/B,alt_extended_class Failed: error 150 at offset 9: invalid range in character class - here: [z-\p{Lu} |<-->| ] + here: [z-\p{Lu} |<--| ] /[\pL-z]/B,alt_extended_class Failed: error 150 at offset 5: invalid range in character class - here: [\pL- |<-->| z] + here: [\pL- |<--| z] /[z-\pL]/B,alt_extended_class Failed: error 150 at offset 6: invalid range in character class - here: [z-\pL |<-->| ] + here: [z-\pL |<--| ] /[\p{Lu}-&&-\pL]/B,alt_extended_class ------------------------------------------------------------------ @@ -8226,19 +8238,19 @@ No match /[z-\p{Lu}]/ Failed: error 150 at offset 9: invalid range in character class - here: [z-\p{Lu} |<-->| ] + here: [z-\p{Lu} |<--| ] /[z-\pL]/ Failed: error 150 at offset 6: invalid range in character class - here: [z-\pL |<-->| ] + here: [z-\pL |<--| ] /[\p{Lu}-z]/ Failed: error 150 at offset 8: invalid range in character class - here: [\p{Lu}- |<-->| z] + here: [\p{Lu}- |<--| z] /[\pL-z]/ Failed: error 150 at offset 5: invalid range in character class - here: [\pL- |<-->| z] + here: [\pL- |<--| z] /[a\x{e1}]/iB ------------------------------------------------------------------