summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorrobert <robert@openbsd.org>2013-06-14 18:55:11 +0000
committerrobert <robert@openbsd.org>2013-06-14 18:55:11 +0000
commit6c0ee77c905b499597069f3161609a43bf472b69 (patch)
tree173b5e19c8e0fd098deee027831213dc762b8616
parentCorrect interrupt moderation setting for 82598; tested on the CX4 version (diff)
downloadwireguard-openbsd-6c0ee77c905b499597069f3161609a43bf472b69.tar.xz
wireguard-openbsd-6c0ee77c905b499597069f3161609a43bf472b69.zip
update the internal pcre library to 8.33
-rw-r--r--usr.sbin/nginx/README.OpenBSD1
-rw-r--r--usr.sbin/nginx/src/pcre/pcre.h40
-rw-r--r--usr.sbin/nginx/src/pcre/pcre_compile.c163
-rw-r--r--usr.sbin/nginx/src/pcre/pcre_exec.c440
-rw-r--r--usr.sbin/nginx/src/pcre/pcre_fullinfo.c12
-rw-r--r--usr.sbin/nginx/src/pcre/pcre_internal.h167
-rw-r--r--usr.sbin/nginx/src/pcre/pcre_tables.c15
-rw-r--r--usr.sbin/nginx/src/pcre/pcre_xclass.c16
8 files changed, 563 insertions, 291 deletions
diff --git a/usr.sbin/nginx/README.OpenBSD b/usr.sbin/nginx/README.OpenBSD
index 2a926e158fa..b157be9eb79 100644
--- a/usr.sbin/nginx/README.OpenBSD
+++ b/usr.sbin/nginx/README.OpenBSD
@@ -27,6 +27,7 @@ Modified files by OpenBSD:
- src/os/unix/ngx_process_cycle.c
- src/os/unix/ngx_process_cycle.h
- src/pcre/pcre_compile.c
+- src/pcre/pcre_exec.c
The syslog patch has been applied from:
https://github.com/yaoweibin/nginx_syslog_patch
diff --git a/usr.sbin/nginx/src/pcre/pcre.h b/usr.sbin/nginx/src/pcre/pcre.h
index a6aa4e934b2..0e479667672 100644
--- a/usr.sbin/nginx/src/pcre/pcre.h
+++ b/usr.sbin/nginx/src/pcre/pcre.h
@@ -5,7 +5,7 @@
/* This is the public header file for the PCRE library, to be #included by
applications that call the PCRE functions.
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
/* The current PCRE version information. */
#define PCRE_MAJOR 8
-#define PCRE_MINOR 32
+#define PCRE_MINOR 33
#define PCRE_PRERELEASE
-#define PCRE_DATE 2012-11-30
+#define PCRE_DATE 2013-05-28
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate
@@ -96,11 +96,14 @@ extern "C" {
#endif
/* Public options. Some are compile-time only, some are run-time only, and some
-are both, so we keep them all distinct. However, almost all the bits in the
-options word are now used. In the long run, we may have to re-use some of the
-compile-time only bits for runtime options, or vice versa. Any of the
-compile-time options may be inspected during studying (and therefore JIT
-compiling).
+are both. Most of the compile-time options are saved with the compiled regex so
+that they can be inspected during studying (and therefore JIT compiling). Note
+that pcre_study() has its own set of options. Originally, all the options
+defined here used distinct bits. However, almost all the bits in a 32-bit word
+are now used, so in order to conserve them, option bits that were previously
+only recognized at matching time (i.e. by pcre_exec() or pcre_dfa_exec()) may
+also be used for compile-time options that affect only compiling and are not
+relevant for studying or JIT compiling.
Some options for pcre_compile() change its behaviour but do not affect the
behaviour of the execution functions. Other options are passed through to the
@@ -142,7 +145,11 @@ with J. */
#define PCRE_AUTO_CALLOUT 0x00004000 /* C1 */
#define PCRE_PARTIAL_SOFT 0x00008000 /* E D J ) Synonyms */
#define PCRE_PARTIAL 0x00008000 /* E D J ) */
-#define PCRE_DFA_SHORTEST 0x00010000 /* D */
+
+/* This pair use the same bit. */
+#define PCRE_NEVER_UTF 0x00010000 /* C1 ) Overlaid */
+#define PCRE_DFA_SHORTEST 0x00010000 /* D ) Overlaid */
+
#define PCRE_DFA_RESTART 0x00020000 /* D */
#define PCRE_FIRSTLINE 0x00040000 /* C3 */
#define PCRE_DUPNAMES 0x00080000 /* C1 */
@@ -199,6 +206,7 @@ with J. */
#define PCRE_ERROR_DFA_BADRESTART (-30)
#define PCRE_ERROR_JIT_BADOPTION (-31)
#define PCRE_ERROR_BADLENGTH (-32)
+#define PCRE_ERROR_UNSET (-33)
/* Specific error codes for UTF-8 validity checks */
@@ -224,7 +232,7 @@ with J. */
#define PCRE_UTF8_ERR19 19
#define PCRE_UTF8_ERR20 20
#define PCRE_UTF8_ERR21 21
-#define PCRE_UTF8_ERR22 22
+#define PCRE_UTF8_ERR22 22 /* Unused (was non-character) */
/* Specific error codes for UTF-16 validity checks */
@@ -232,13 +240,13 @@ with J. */
#define PCRE_UTF16_ERR1 1
#define PCRE_UTF16_ERR2 2
#define PCRE_UTF16_ERR3 3
-#define PCRE_UTF16_ERR4 4
+#define PCRE_UTF16_ERR4 4 /* Unused (was non-character) */
/* Specific error codes for UTF-32 validity checks */
#define PCRE_UTF32_ERR0 0
#define PCRE_UTF32_ERR1 1
-#define PCRE_UTF32_ERR2 2
+#define PCRE_UTF32_ERR2 2 /* Unused (was non-character) */
#define PCRE_UTF32_ERR3 3
/* Request types for pcre_fullinfo() */
@@ -263,10 +271,12 @@ with J. */
#define PCRE_INFO_JIT 16
#define PCRE_INFO_JITSIZE 17
#define PCRE_INFO_MAXLOOKBEHIND 18
-#define PCRE_INFO_FIRSTCHARACTER 19
-#define PCRE_INFO_FIRSTCHARACTERFLAGS 20
+#define PCRE_INFO_FIRSTCHARACTER 19
+#define PCRE_INFO_FIRSTCHARACTERFLAGS 20
#define PCRE_INFO_REQUIREDCHAR 21
-#define PCRE_INFO_REQUIREDCHARFLAGS 22
+#define PCRE_INFO_REQUIREDCHARFLAGS 22
+#define PCRE_INFO_MATCHLIMIT 23
+#define PCRE_INFO_RECURSIONLIMIT 24
/* Request types for pcre_config(). Do not re-arrange, in order to remain
compatible. */
diff --git a/usr.sbin/nginx/src/pcre/pcre_compile.c b/usr.sbin/nginx/src/pcre/pcre_compile.c
index fb592a87422..2dbe9ca40a4 100644
--- a/usr.sbin/nginx/src/pcre/pcre_compile.c
+++ b/usr.sbin/nginx/src/pcre/pcre_compile.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -487,7 +487,7 @@ static const char error_texts[] =
"a numbered reference must not be zero\0"
"an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
/* 60 */
- "(*VERB) not recognized\0"
+ "(*VERB) not recognized or malformed\0"
"number is too big\0"
"subpattern name expected\0"
"digit expected after (?+\0"
@@ -508,6 +508,7 @@ static const char error_texts[] =
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
"character value in \\u.... sequence is too large\0"
"invalid UTF-32 string\0"
+ "setting UTF is disabled by the application\0"
;
/* Table to identify digits and hex digits. This is used when compiling
@@ -797,7 +798,8 @@ Otherwise further processing may be required. */
#ifndef EBCDIC /* ASCII/UTF-8 coding */
/* Not alphanumeric */
else if (c < CHAR_0 || c > CHAR_z) {}
-else if ((i = escapes[c - CHAR_0]) != 0) { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
+else if ((i = escapes[c - CHAR_0]) != 0)
+ { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
#else /* EBCDIC coding */
/* Not alphanumeric */
@@ -847,11 +849,11 @@ else
}
#if defined COMPILE_PCRE8
- if (c > (unsigned int)(utf ? 0x10ffff : 0xff))
+ if (c > (utf ? 0x10ffffU : 0xffU))
#elif defined COMPILE_PCRE16
- if (c > (utf ? 0x10ffff : 0xffff))
+ if (c > (utf ? 0x10ffffU : 0xffffU))
#elif defined COMPILE_PCRE32
- if (utf && c > 0x10ffff)
+ if (utf && c > 0x10ffffU)
#endif
{
*errorcodeptr = ERR76;
@@ -1085,11 +1087,11 @@ else
#endif
#if defined COMPILE_PCRE8
- if (c > (unsigned int)(utf ? 0x10ffff : 0xff)) { overflow = TRUE; break; }
+ if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
#elif defined COMPILE_PCRE16
- if (c > (utf ? 0x10ffff : 0xffff)) { overflow = TRUE; break; }
+ if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
#elif defined COMPILE_PCRE32
- if (utf && c > 0x10ffff) { overflow = TRUE; break; }
+ if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
#endif
}
@@ -1408,7 +1410,11 @@ if (ptr[0] == CHAR_LEFT_PARENTHESIS)
{
/* Handle specials such as (*SKIP) or (*UTF8) etc. */
- if (ptr[1] == CHAR_ASTERISK) ptr += 2;
+ if (ptr[1] == CHAR_ASTERISK)
+ {
+ ptr += 2;
+ while (ptr < cd->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
+ }
/* Handle a normal, unnamed capturing parenthesis. */
@@ -2129,9 +2135,6 @@ for (;;)
case OP_MARK:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
- code += code[1];
- break;
-
case OP_THEN_ARG:
code += code[1];
break;
@@ -2249,9 +2252,6 @@ for (;;)
case OP_MARK:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
- code += code[1];
- break;
-
case OP_THEN_ARG:
code += code[1];
break;
@@ -2616,9 +2616,6 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
case OP_MARK:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
- code += code[1];
- break;
-
case OP_THEN_ARG:
code += code[1];
break;
@@ -2924,9 +2921,6 @@ get_othercase_range(pcre_uint32 *cptr, pcre_uint32 d, pcre_uint32 *ocptr,
{
pcre_uint32 c, othercase, next;
unsigned int co;
-#if defined(__GNUC__) && __GNUC__ == 3
-othercase = 0; /* XXX gcc -Wuninitialized */
-#endif
/* Find the first character that has an other case. If it has multiple other
cases, return its case offset value. */
@@ -3097,7 +3091,8 @@ value is a character, a negative value is an escape value. */
if (*ptr == CHAR_BACKSLASH)
{
int temperrorcode = 0;
- escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options, FALSE);
+ escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options,
+ FALSE);
if (temperrorcode != 0) return FALSE;
ptr++; /* Point after the escape sequence */
}
@@ -4280,14 +4275,12 @@ for (;; ptr++)
if (c == CHAR_BACKSLASH)
{
- escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, TRUE);
-
+ escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options,
+ TRUE);
if (*errorcodeptr != 0) goto FAILED;
-
- if (escape == 0)
- c = ec;
+ if (escape == 0) c = ec;
else if (escape == ESC_b) c = CHAR_BS; /* \b is backspace in a class */
- else if (escape == ESC_N) /* \N is not supported in a class */
+ else if (escape == ESC_N) /* \N is not supported in a class */
{
*errorcodeptr = ERR71;
goto FAILED;
@@ -4910,16 +4903,6 @@ for (;; ptr++)
if (repeat_max == 0) goto END_REPEAT;
- /*--------------------------------------------------------------------*/
- /* This code is obsolete from release 8.00; the restriction was finally
- removed: */
-
- /* All real repeats make it impossible to handle partial matching (maybe
- one day we will be able to remove this restriction). */
-
- /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
- /*--------------------------------------------------------------------*/
-
/* Combine the op_type with the repeat_type */
repeat_type += op_type;
@@ -5066,16 +5049,6 @@ for (;; ptr++)
goto END_REPEAT;
}
- /*--------------------------------------------------------------------*/
- /* This code is obsolete from release 8.00; the restriction was finally
- removed: */
-
- /* All real repeats make it impossible to handle partial matching (maybe
- one day we will be able to remove this restriction). */
-
- /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
- /*--------------------------------------------------------------------*/
-
if (repeat_min == 0 && repeat_max == -1)
*code++ = OP_CRSTAR + repeat_type;
else if (repeat_min == 1 && repeat_max == -1)
@@ -5752,6 +5725,7 @@ for (;; ptr++)
/* ------------------------------------------------------------ */
case CHAR_LEFT_PARENTHESIS:
bravalue = OP_COND; /* Conditional group */
+ tempptr = ptr;
/* A condition can be an assertion, a number (referring to a numbered
group), a name (referring to a named group), or 'R', referring to
@@ -5764,14 +5738,28 @@ for (;; ptr++)
be the recursive thing or the name 'R' (and similarly for 'R' followed
by digits), and (b) a number could be a name that consists of digits.
In both cases, we look for a name first; if not found, we try the other
- cases. */
+ cases.
+
+ For compatibility with auto-callouts, we allow a callout to be
+ specified before a condition that is an assertion. First, check for the
+ syntax of a callout; if found, adjust the temporary pointer that is
+ used to check for an assertion condition. That's all that is needed! */
+
+ if (ptr[1] == CHAR_QUESTION_MARK && ptr[2] == CHAR_C)
+ {
+ for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break;
+ if (ptr[i] == CHAR_RIGHT_PARENTHESIS)
+ tempptr += i + 1;
+ }
/* For conditions that are assertions, check the syntax, and then exit
the switch. This will take control down to where bracketed groups,
including assertions, are processed. */
- if (ptr[1] == CHAR_QUESTION_MARK && (ptr[2] == CHAR_EQUALS_SIGN ||
- ptr[2] == CHAR_EXCLAMATION_MARK || ptr[2] == CHAR_LESS_THAN_SIGN))
+ if (tempptr[1] == CHAR_QUESTION_MARK &&
+ (tempptr[2] == CHAR_EQUALS_SIGN ||
+ tempptr[2] == CHAR_EXCLAMATION_MARK ||
+ tempptr[2] == CHAR_LESS_THAN_SIGN))
break;
/* Most other conditions use OP_CREF (a couple change to OP_RREF
@@ -6741,10 +6729,9 @@ for (;; ptr++)
case CHAR_BACKSLASH:
tempptr = ptr;
escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, FALSE);
-
if (*errorcodeptr != 0) goto FAILED;
- if (escape == 0)
+ if (escape == 0) /* The escape coded a single character */
c = ec;
else
{
@@ -6910,11 +6897,12 @@ for (;; ptr++)
can obtain the OP value by negating the escape value in the default
situation when PCRE_UCP is not set. When it *is* set, we substitute
Unicode property tests. Note that \b and \B do a one-character
- lookbehind. */
+ lookbehind, and \A also behaves as if it does. */
else
{
- if ((escape == ESC_b || escape == ESC_B) && cd->max_lookbehind == 0)
+ if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
+ cd->max_lookbehind == 0)
cd->max_lookbehind = 1;
#ifdef SUPPORT_UCP
if (escape >= ESC_DU && escape <= ESC_wu)
@@ -7778,12 +7766,15 @@ pcre32_compile2(PCRE_SPTR32 pattern, int options, int *errorcodeptr,
{
REAL_PCRE *re;
int length = 1; /* For final END opcode */
-pcre_uint32 firstchar, reqchar;
pcre_int32 firstcharflags, reqcharflags;
+pcre_uint32 firstchar, reqchar;
+pcre_uint32 limit_match = PCRE_UINT32_MAX;
+pcre_uint32 limit_recursion = PCRE_UINT32_MAX;
int newline;
int errorcode = 0;
int skipatstart = 0;
BOOL utf;
+BOOL never_utf = FALSE;
size_t size;
pcre_uchar *code;
const pcre_uchar *codestart;
@@ -7843,9 +7834,15 @@ if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
goto PCRE_EARLY_ERROR_RETURN;
}
+/* If PCRE_NEVER_UTF is set, remember it. */
+
+if ((options & PCRE_NEVER_UTF) != 0) never_utf = TRUE;
+
/* Check for global one-time settings at the start of the pattern, and remember
the offset for later. */
+cd->external_flags = 0; /* Initialize here for LIMIT_MATCH/RECURSION */
+
while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
ptr[skipatstart+1] == CHAR_ASTERISK)
{
@@ -7876,6 +7873,44 @@ PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. */
else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)
{ skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; }
+ else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_MATCH_EQ, 12) == 0)
+ {
+ pcre_uint32 c = 0;
+ int p = skipatstart + 14;
+ while (isdigit(ptr[p]))
+ {
+ if (c > PCRE_UINT32_MAX / 10 - 1) break; /* Integer overflow */
+ c = c*10 + ptr[p++] - CHAR_0;
+ }
+ if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break;
+ if (c < limit_match)
+ {
+ limit_match = c;
+ cd->external_flags |= PCRE_MLSET;
+ }
+ skipatstart = p;
+ continue;
+ }
+
+ else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_RECURSION_EQ, 16) == 0)
+ {
+ pcre_uint32 c = 0;
+ int p = skipatstart + 18;
+ while (isdigit(ptr[p]))
+ {
+ if (c > PCRE_UINT32_MAX / 10 - 1) break; /* Integer overflow check */
+ c = c*10 + ptr[p++] - CHAR_0;
+ }
+ if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break;
+ if (c < limit_recursion)
+ {
+ limit_recursion = c;
+ cd->external_flags |= PCRE_RLSET;
+ }
+ skipatstart = p;
+ continue;
+ }
+
if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0)
{ skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3) == 0)
@@ -7901,6 +7936,11 @@ PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. */
/* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
utf = (options & PCRE_UTF8) != 0;
+if (utf && never_utf)
+ {
+ errorcode = ERR78;
+ goto PCRE_EARLY_ERROR_RETURN2;
+ }
/* Can't support UTF unless PCRE has been compiled to include the code. The
return of an error code from PRIV(valid_utf)() is a new feature, introduced in
@@ -8023,7 +8063,6 @@ cd->req_varyopt = 0;
cd->assert_depth = 0;
cd->max_lookbehind = 0;
cd->external_options = options;
-cd->external_flags = 0;
cd->open_caps = NULL;
/* Now do the pre-compile. On error, errorcode will be set non-zero, so we
@@ -8073,6 +8112,8 @@ re->magic_number = MAGIC_NUMBER;
re->size = (int)size;
re->options = cd->external_options;
re->flags = cd->external_flags;
+re->limit_match = limit_match;
+re->limit_recursion = limit_recursion;
re->first_char = 0;
re->req_char = 0;
re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);
@@ -8082,7 +8123,9 @@ re->ref_count = 0;
re->tables = (tables == PRIV(default_tables))? NULL : tables;
re->nullpad = NULL;
#ifdef COMPILE_PCRE32
-re->dummy1 = re->dummy2 = 0;
+re->dummy = 0;
+#else
+re->dummy1 = re->dummy2 = re->dummy3 = 0;
#endif
/* The starting points of the name/number translation table and of the code are
@@ -8142,7 +8185,7 @@ if (code - codestart > length) errorcode = ERR23;
#ifdef SUPPORT_VALGRIND
/* If the estimated length exceeds the really used length, mark the extra
-allocated memory as unadressable, so that any out-of-bound reads can be
+allocated memory as unaddressable, so that any out-of-bound reads can be
detected. */
VALGRIND_MAKE_MEM_NOACCESS(code, (length - (code - codestart)) * sizeof(pcre_uchar));
#endif
diff --git a/usr.sbin/nginx/src/pcre/pcre_exec.c b/usr.sbin/nginx/src/pcre/pcre_exec.c
index 05d0e52d33c..48ba0ef4007 100644
--- a/usr.sbin/nginx/src/pcre/pcre_exec.c
+++ b/usr.sbin/nginx/src/pcre/pcre_exec.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -56,6 +56,20 @@ possible. There are also some static supporting functions. */
#undef min
#undef max
+/* The md->capture_last field uses the lower 16 bits for the last captured
+substring (which can never be greater than 65535) and a bit in the top half
+to mean "capture vector overflowed". This odd way of doing things was
+implemented when it was realized that preserving and restoring the overflow bit
+whenever the last capture number was saved/restored made for a neater
+interface, and doing it this way saved on (a) another variable, which would
+have increased the stack frame size (a big NO-NO in PCRE) and (b) another
+separate set of save/restore instructions. The following defines are used in
+implementing this. */
+
+#define CAPLMASK 0x0000ffff /* The bits used for last_capture */
+#define OVFLMASK 0xffff0000 /* The bits used for the overflow flag */
+#define OVFLBIT 0x00010000 /* The bit that is set for overflow */
+
/* Values for setting in md->match_function_type to indicate two special types
of call to match(). We do it this way to save on using another stack variable,
as stack usage is to be discouraged. */
@@ -73,13 +87,17 @@ defined PCRE_ERROR_xxx codes, which are all negative. */
negative to avoid the external error codes. */
#define MATCH_ACCEPT (-999)
-#define MATCH_COMMIT (-998)
-#define MATCH_KETRPOS (-997)
-#define MATCH_ONCE (-996)
+#define MATCH_KETRPOS (-998)
+#define MATCH_ONCE (-997)
+/* The next 5 must be kept together and in sequence so that a test that checks
+for any one of them can use a range. */
+#define MATCH_COMMIT (-996)
#define MATCH_PRUNE (-995)
#define MATCH_SKIP (-994)
#define MATCH_SKIP_ARG (-993)
#define MATCH_THEN (-992)
+#define MATCH_BACKTRACK_MAX MATCH_THEN
+#define MATCH_BACKTRACK_MIN MATCH_COMMIT
/* Maximum number of ints of offset to save on the stack for recursive calls.
If the offset vector is bigger, malloc is used. This should be a multiple of 3,
@@ -219,7 +237,7 @@ if (caseless)
{
while (length-- > 0)
{
- pcre_uchar cc, cp;
+ pcre_uint32 cc, cp;
if (eptr >= md->end_subject) return -2; /* Partial match */
cc = RAWUCHARTEST(eptr);
cp = RAWUCHARTEST(p);
@@ -294,7 +312,7 @@ enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
- RM61, RM62, RM63, RM64, RM65, RM66, RM67 };
+ RM61, RM62, RM63, RM64, RM65, RM66, RM67, RM68 };
/* These versions of the macros use the stack, as normal. There are debugging
versions and production versions. Note that the "rw" argument of RMATCH isn't
@@ -416,10 +434,10 @@ typedef struct heapframe {
int Xlength;
int Xmax;
int Xmin;
- int Xnumber;
+ unsigned int Xnumber;
int Xoffset;
- int Xop;
- int Xsave_capture_last;
+ unsigned int Xop;
+ pcre_int32 Xsave_capture_last;
int Xsave_offset1, Xsave_offset2, Xsave_offset3;
int Xstacksave[REC_STACK_SAVE_MAX];
@@ -634,8 +652,8 @@ int max;
int min;
unsigned int number;
int offset;
-pcre_uchar op;
-int save_capture_last;
+unsigned int op;
+pcre_int32 save_capture_last;
int save_offset1, save_offset2, save_offset3;
int stacksave[REC_STACK_SAVE_MAX];
@@ -763,23 +781,16 @@ for (;;)
case OP_FAIL:
RRETURN(MATCH_NOMATCH);
- /* COMMIT overrides PRUNE, SKIP, and THEN */
-
case OP_COMMIT:
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
eptrb, RM52);
- if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
- rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
- rrc != MATCH_THEN)
- RRETURN(rrc);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
RRETURN(MATCH_COMMIT);
- /* PRUNE overrides THEN */
-
case OP_PRUNE:
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
eptrb, RM51);
- if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
RRETURN(MATCH_PRUNE);
case OP_PRUNE_ARG:
@@ -789,38 +800,39 @@ for (;;)
eptrb, RM56);
if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
md->mark == NULL) md->mark = ecode + 2;
- if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
RRETURN(MATCH_PRUNE);
- /* SKIP overrides PRUNE and THEN */
-
case OP_SKIP:
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
eptrb, RM53);
- if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
- RRETURN(rrc);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
md->start_match_ptr = eptr; /* Pass back current position */
RRETURN(MATCH_SKIP);
/* Note that, for Perl compatibility, SKIP with an argument does NOT set
- nomatch_mark. There is a flag that disables this opcode when re-matching a
- pattern that ended with a SKIP for which there was not a matching MARK. */
+ nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
+ not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
+ that failed and any that precede it (either they also failed, or were not
+ triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
+ SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
+ set to the count of the one that failed. */
case OP_SKIP_ARG:
- if (md->ignore_skip_arg)
+ md->skip_arg_count++;
+ if (md->skip_arg_count <= md->ignore_skip_arg)
{
ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
break;
}
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
eptrb, RM57);
- if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
- RRETURN(rrc);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
/* Pass back the current skip name by overloading md->start_match_ptr and
returning the special MATCH_SKIP_ARG return code. This will either be
caught by a matching MARK, or get to the top, where it causes a rematch
- with the md->ignore_skip_arg flag set. */
+ with md->ignore_skip_arg set to the value of md->skip_arg_count. */
md->start_match_ptr = ecode + 2;
RRETURN(MATCH_SKIP_ARG);
@@ -1066,6 +1078,7 @@ for (;;)
/* In all other cases, we have to make another call to match(). */
save_mark = md->mark;
+ save_capture_last = md->capture_last;
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
RM2);
@@ -1097,6 +1110,7 @@ for (;;)
ecode += GET(ecode, 1);
md->mark = save_mark;
if (*ecode != OP_ALT) break;
+ md->capture_last = save_capture_last;
}
RRETURN(MATCH_NOMATCH);
@@ -1218,6 +1232,7 @@ for (;;)
POSSESSIVE_NON_CAPTURE:
matched_once = FALSE;
code_offset = (int)(ecode - md->start_code);
+ save_capture_last = md->capture_last;
for (;;)
{
@@ -1247,6 +1262,7 @@ for (;;)
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
ecode += GET(ecode, 1);
if (*ecode != OP_ALT) break;
+ md->capture_last = save_capture_last;
}
if (matched_once || allow_zero)
@@ -1291,13 +1307,16 @@ for (;;)
cb.pattern_position = GET(ecode, LINK_SIZE + 3);
cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
cb.capture_top = offset_top/2;
- cb.capture_last = md->capture_last;
+ cb.capture_last = md->capture_last & CAPLMASK;
+ /* Internal change requires this for API compatibility. */
+ if (cb.capture_last == 0) cb.capture_last = -1;
cb.callout_data = md->callout_data;
cb.mark = md->nomatch_mark;
if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
if (rrc < 0) RRETURN(rrc);
}
ecode += PRIV(OP_lengths)[OP_CALLOUT];
+ codelink -= PRIV(OP_lengths)[OP_CALLOUT];
}
condcode = ecode[LINK_SIZE+1];
@@ -1513,7 +1532,7 @@ for (;;)
to close any currently open capturing brackets. */
case OP_CLOSE:
- number = GET2(ecode, 1);
+ number = GET2(ecode, 1); /* Must be less than 65536 */
offset = number << 1;
#ifdef PCRE_DEBUG
@@ -1521,8 +1540,8 @@ for (;;)
printf("\n");
#endif
- md->capture_last = number;
- if (offset >= md->offset_max) md->offset_overflow = TRUE; else
+ md->capture_last = (md->capture_last & OVFLMASK) | number;
+ if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
{
md->offset_vector[offset] =
md->offset_vector[md->offset_end - number];
@@ -1584,28 +1603,49 @@ for (;;)
}
else condassert = FALSE;
+ /* Loop for each branch */
+
do
{
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
+
+ /* A match means that the assertion is true; break out of the loop
+ that matches its alternatives. */
+
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
{
mstart = md->start_match_ptr; /* In case \K reset it */
break;
}
+
+ /* If not matched, restore the previous mark setting. */
+
md->mark = save_mark;
- /* A COMMIT failure must fail the entire assertion, without trying any
- subsequent branches. */
+ /* See comment in the code for capturing groups above about handling
+ THEN. */
- if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);
+ if (rrc == MATCH_THEN)
+ {
+ next = ecode + GET(ecode,1);
+ if (md->start_match_ptr < next &&
+ (*ecode == OP_ALT || *next == OP_ALT))
+ rrc = MATCH_NOMATCH;
+ }
- /* PCRE does not allow THEN to escape beyond an assertion; it
- is treated as NOMATCH. */
+ /* Anything other than NOMATCH causes the entire assertion to fail,
+ passing back the return code. This includes COMMIT, SKIP, PRUNE and an
+ uncaptured THEN, which means they take their normal effect. This
+ consistent approach does not always have exactly the same effect as in
+ Perl. */
- if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
ecode += GET(ecode, 1);
}
- while (*ecode == OP_ALT);
+ while (*ecode == OP_ALT); /* Continue for next alternative */
+
+ /* If we have tried all the alternative branches, the assertion has
+ failed. If not, we broke out after a match. */
if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
@@ -1613,17 +1653,16 @@ for (;;)
if (condassert) RRETURN(MATCH_MATCH);
- /* Continue from after the assertion, updating the offsets high water
- mark, since extracts may have been taken during the assertion. */
+ /* Continue from after a successful assertion, updating the offsets high
+ water mark, since extracts may have been taken during the assertion. */
do ecode += GET(ecode,1); while (*ecode == OP_ALT);
ecode += 1 + LINK_SIZE;
offset_top = md->end_offset_top;
continue;
- /* Negative assertion: all branches must fail to match. Encountering SKIP,
- PRUNE, or COMMIT means we must assume failure without checking subsequent
- branches. */
+ /* Negative assertion: all branches must fail to match for the assertion to
+ succeed. */
case OP_ASSERT_NOT:
case OP_ASSERTBACK_NOT:
@@ -1635,28 +1674,64 @@ for (;;)
}
else condassert = FALSE;
+ /* Loop for each alternative branch. */
+
do
{
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
- md->mark = save_mark;
- if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
- if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
+ md->mark = save_mark; /* Always restore the mark setting */
+
+ switch(rrc)
{
- do ecode += GET(ecode,1); while (*ecode == OP_ALT);
+ case MATCH_MATCH: /* A successful match means */
+ case MATCH_ACCEPT: /* the assertion has failed. */
+ RRETURN(MATCH_NOMATCH);
+
+ case MATCH_NOMATCH: /* Carry on with next branch */
break;
+
+ /* See comment in the code for capturing groups above about handling
+ THEN. */
+
+ case MATCH_THEN:
+ next = ecode + GET(ecode,1);
+ if (md->start_match_ptr < next &&
+ (*ecode == OP_ALT || *next == OP_ALT))
+ {
+ rrc = MATCH_NOMATCH;
+ break;
+ }
+ /* Otherwise fall through. */
+
+ /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole
+ assertion to fail to match, without considering any more alternatives.
+ Failing to match means the assertion is true. This is a consistent
+ approach, but does not always have the same effect as in Perl. */
+
+ case MATCH_COMMIT:
+ case MATCH_SKIP:
+ case MATCH_SKIP_ARG:
+ case MATCH_PRUNE:
+ do ecode += GET(ecode,1); while (*ecode == OP_ALT);
+ goto NEG_ASSERT_TRUE; /* Break out of alternation loop */
+
+ /* Anything else is an error */
+
+ default:
+ RRETURN(rrc);
}
- /* PCRE does not allow THEN to escape beyond an assertion; it is treated
- as NOMATCH. */
+ /* Continue with next branch */
- if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
ecode += GET(ecode,1);
}
while (*ecode == OP_ALT);
- if (condassert) RRETURN(MATCH_MATCH); /* Condition assertion */
+ /* All branches in the assertion failed to match. */
- ecode += 1 + LINK_SIZE;
+ NEG_ASSERT_TRUE:
+ if (condassert) RRETURN(MATCH_MATCH); /* Condition assertion */
+ ecode += 1 + LINK_SIZE; /* Continue with current branch */
continue;
/* Move the subject pointer back. This occurs only at the start of
@@ -1716,7 +1791,9 @@ for (;;)
cb.pattern_position = GET(ecode, 2);
cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
cb.capture_top = offset_top/2;
- cb.capture_last = md->capture_last;
+ cb.capture_last = md->capture_last & CAPLMASK;
+ /* Internal change requires this for API compatibility. */
+ if (cb.capture_last == 0) cb.capture_last = -1;
cb.callout_data = md->callout_data;
cb.mark = md->nomatch_mark;
if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
@@ -1762,6 +1839,7 @@ for (;;)
/* Add to "recursing stack" */
new_recursive.group_num = recno;
+ new_recursive.saved_capture_last = md->capture_last;
new_recursive.subject_position = eptr;
new_recursive.prevrec = md->recursive;
md->recursive = &new_recursive;
@@ -1785,8 +1863,9 @@ for (;;)
new_recursive.saved_max * sizeof(int));
/* OK, now we can do the recursion. After processing each alternative,
- restore the offset data. If there were nested recursions, md->recursive
- might be changed, so reset it before looping. */
+ restore the offset data and the last captured value. If there were nested
+ recursions, md->recursive might be changed, so reset it before looping.
+ */
DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
cbegroup = (*callpat >= OP_SBRA);
@@ -1797,6 +1876,7 @@ for (;;)
md, eptrb, RM6);
memcpy(md->offset_vector, new_recursive.offset_save,
new_recursive.saved_max * sizeof(int));
+ md->capture_last = new_recursive.saved_capture_last;
md->recursive = new_recursive.prevrec;
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
{
@@ -1813,11 +1893,16 @@ for (;;)
goto RECURSION_MATCHED; /* Exit loop; end processing */
}
- /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it
- is treated as NOMATCH. */
+ /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
+ recursion; they cause a NOMATCH for the entire recursion. These codes
+ are defined in a range that can be tested for. */
+
+ if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
+ RRETURN(MATCH_NOMATCH);
- else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&
- rrc != MATCH_COMMIT)
+ /* Any return code other than NOMATCH is an error. */
+
+ if (rrc != MATCH_NOMATCH)
{
DPRINTF(("Recursion gave error %d\n", rrc));
if (new_recursive.offset_save != stacksave)
@@ -1947,8 +2032,8 @@ for (;;)
/* Deal with capturing */
- md->capture_last = number;
- if (offset >= md->offset_max) md->offset_overflow = TRUE; else
+ md->capture_last = (md->capture_last & OVFLMASK) | number;
+ if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
{
/* If offset is greater than offset_top, it means that we are
"skipping" a capturing group, and that group's offsets must be marked
@@ -2604,6 +2689,13 @@ for (;;)
}
break;
+ case PT_UCNC:
+ if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000) == (op == OP_NOTPROP))
+ RRETURN(MATCH_NOMATCH);
+ break;
+
/* This should never occur */
default:
@@ -3190,7 +3282,7 @@ for (;;)
if (fc < 128)
{
- pcre_uchar cc = RAWUCHAR(eptr);
+ pcre_uint32 cc = RAWUCHAR(eptr);
if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
ecode++;
eptr++;
@@ -3295,7 +3387,22 @@ for (;;)
max = rep_max[c]; /* zero for max => infinity */
if (max == 0) max = INT_MAX;
- /* Common code for all repeated single-character matches. */
+ /* Common code for all repeated single-character matches. We first check
+ for the minimum number of characters. If the minimum equals the maximum, we
+ are done. Otherwise, if minimizing, check the rest of the pattern for a
+ match; if there isn't one, advance up to the maximum, one character at a
+ time.
+
+ If maximizing, advance up to the maximum number of matching characters,
+ until eptr is past the end of the maximum run. If possessive, we are
+ then done (no backing up). Otherwise, match at this position; anything
+ other than no match is immediately returned. For nomatch, back up one
+ character, unless we are matching \R and the last thing matched was
+ \r\n, in which case, back up two bytes. When we reach the first optional
+ character position, we can save stack by doing a tail recurse.
+
+ The various UTF/non-UTF and caseful/caseless cases are handled separately,
+ for speed. */
REPEATCHAR:
#ifdef SUPPORT_UTF
@@ -3379,13 +3486,12 @@ for (;;)
}
}
- if (possessive) continue;
-
+ if (possessive) continue; /* No backtracking */
for(;;)
{
+ if (eptr == pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
#ifdef SUPPORT_UCP
eptr--;
BACKCHAR(eptr);
@@ -3439,8 +3545,7 @@ for (;;)
for (i = 1; i <= min; i++)
{
- pcre_uchar cc;
-
+ pcre_uint32 cc; /* Faster than pcre_uchar */
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
@@ -3455,8 +3560,7 @@ for (;;)
{
for (fi = min;; fi++)
{
- pcre_uchar cc;
-
+ pcre_uint32 cc; /* Faster than pcre_uchar */
RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max) RRETURN(MATCH_NOMATCH);
@@ -3476,8 +3580,7 @@ for (;;)
pp = eptr;
for (i = min; i < max; i++)
{
- pcre_uchar cc;
-
+ pcre_uint32 cc; /* Faster than pcre_uchar */
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
@@ -3488,10 +3591,10 @@ for (;;)
eptr++;
}
- if (possessive) continue;
-
- while (eptr >= pp)
+ if (possessive) continue; /* No backtracking */
+ for (;;)
{
+ if (eptr == pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
eptr--;
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
@@ -3546,10 +3649,10 @@ for (;;)
if (fc != RAWUCHARTEST(eptr)) break;
eptr++;
}
- if (possessive) continue;
-
- while (eptr >= pp)
+ if (possessive) continue; /* No backtracking */
+ for (;;)
{
+ if (eptr == pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
eptr--;
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
@@ -3726,7 +3829,7 @@ for (;;)
}
}
else
-#endif
+#endif /* SUPPORT_UTF */
/* Not UTF mode */
{
for (i = 1; i <= min; i++)
@@ -3764,7 +3867,7 @@ for (;;)
}
}
else
-#endif
+#endif /*SUPPORT_UTF */
/* Not UTF mode */
{
for (fi = min;; fi++)
@@ -3806,17 +3909,18 @@ for (;;)
if (fc == d || (unsigned int)foc == d) break;
eptr += len;
}
- if (possessive) continue;
+ if (possessive) continue; /* No backtracking */
for(;;)
{
+ if (eptr == pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (eptr-- == pp) break; /* Stop if tried at original pos */
+ eptr--;
BACKCHAR(eptr);
}
}
else
-#endif
+#endif /* SUPPORT_UTF */
/* Not UTF mode */
{
for (i = min; i < max; i++)
@@ -3829,9 +3933,10 @@ for (;;)
if (fc == *eptr || foc == *eptr) break;
eptr++;
}
- if (possessive) continue;
- while (eptr >= pp)
+ if (possessive) continue; /* No backtracking */
+ for (;;)
{
+ if (eptr == pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--;
@@ -3941,12 +4046,13 @@ for (;;)
if (fc == d) break;
eptr += len;
}
- if (possessive) continue;
+ if (possessive) continue; /* No backtracking */
for(;;)
{
+ if (eptr == pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (eptr-- == pp) break; /* Stop if tried at original pos */
+ eptr--;
BACKCHAR(eptr);
}
}
@@ -3964,9 +4070,10 @@ for (;;)
if (fc == *eptr) break;
eptr++;
}
- if (possessive) continue;
- while (eptr >= pp)
+ if (possessive) continue; /* No backtracking */
+ for (;;)
{
+ if (eptr == pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--;
@@ -4225,6 +4332,22 @@ for (;;)
}
break;
+ case PT_UCNC:
+ for (i = 1; i <= min; i++)
+ {
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ GETCHARINCTEST(c, eptr);
+ if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000) == prop_fail_result)
+ RRETURN(MATCH_NOMATCH);
+ }
+ break;
+
/* This should not occur */
default:
@@ -4430,8 +4553,7 @@ for (;;)
case OP_DIGIT:
for (i = 1; i <= min; i++)
{
- pcre_uchar cc;
-
+ pcre_uint32 cc;
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
@@ -4448,8 +4570,7 @@ for (;;)
case OP_NOT_WHITESPACE:
for (i = 1; i <= min; i++)
{
- pcre_uchar cc;
-
+ pcre_uint32 cc;
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
@@ -4466,8 +4587,7 @@ for (;;)
case OP_WHITESPACE:
for (i = 1; i <= min; i++)
{
- pcre_uchar cc;
-
+ pcre_uint32 cc;
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
@@ -4484,8 +4604,7 @@ for (;;)
case OP_NOT_WORDCHAR:
for (i = 1; i <= min; i++)
{
- pcre_uchar cc;
-
+ pcre_uint32 cc;
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
@@ -4502,8 +4621,7 @@ for (;;)
case OP_WORDCHAR:
for (i = 1; i <= min; i++)
{
- pcre_uchar cc;
-
+ pcre_uint32 cc;
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
@@ -4976,6 +5094,25 @@ for (;;)
}
/* Control never gets here */
+ case PT_UCNC:
+ for (fi = min;; fi++)
+ {
+ RMATCH(eptr, ecode, offset_top, md, eptrb, RM68);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (fi >= max) RRETURN(MATCH_NOMATCH);
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+ GETCHARINCTEST(c, eptr);
+ if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000) == prop_fail_result)
+ RRETURN(MATCH_NOMATCH);
+ }
+ /* Control never gets here */
+
/* This should never occur */
default:
RRETURN(PCRE_ERROR_INTERNAL);
@@ -5470,18 +5607,37 @@ for (;;)
GOT_MAX:
break;
+ case PT_UCNC:
+ for (i = min; i < max; i++)
+ {
+ int len = 1;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ GETCHARLENTEST(c, eptr, len);
+ if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+ c >= 0xe000) == prop_fail_result)
+ break;
+ eptr += len;
+ }
+ break;
+
default:
RRETURN(PCRE_ERROR_INTERNAL);
}
/* eptr is now past the end of the maximum run */
- if (possessive) continue;
+ if (possessive) continue; /* No backtracking */
for(;;)
{
+ if (eptr == pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (eptr-- == pp) break; /* Stop if tried at original pos */
+ eptr--;
if (utf) BACKCHAR(eptr);
}
}
@@ -5518,13 +5674,13 @@ for (;;)
/* eptr is now past the end of the maximum run */
- if (possessive) continue;
-
+ if (possessive) continue; /* No backtracking */
for(;;)
{
+ if (eptr == pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (eptr-- == pp) break; /* Stop if tried at original pos */
+ eptr--;
for (;;) /* Move back over one extended */
{
if (!utf) c = *eptr; else
@@ -5799,18 +5955,13 @@ for (;;)
RRETURN(PCRE_ERROR_INTERNAL);
}
- /* eptr is now past the end of the maximum run. If possessive, we are
- done (no backing up). Otherwise, match at this position; anything other
- than no match is immediately returned. For nomatch, back up one
- character, unless we are matching \R and the last thing matched was
- \r\n, in which case, back up two bytes. */
-
- if (possessive) continue;
+ if (possessive) continue; /* No backtracking */
for(;;)
{
+ if (eptr == pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (eptr-- == pp) break; /* Stop if tried at original pos */
+ eptr--;
BACKCHAR(eptr);
if (ctype == OP_ANYNL && eptr > pp && RAWUCHAR(eptr) == CHAR_NL &&
RAWUCHAR(eptr - 1) == CHAR_CR) eptr--;
@@ -6048,15 +6199,10 @@ for (;;)
RRETURN(PCRE_ERROR_INTERNAL);
}
- /* eptr is now past the end of the maximum run. If possessive, we are
- done (no backing up). Otherwise, match at this position; anything other
- than no match is immediately returned. For nomatch, back up one
- character (byte), unless we are matching \R and the last thing matched
- was \r\n, in which case, back up two bytes. */
-
- if (possessive) continue;
- while (eptr >= pp)
+ if (possessive) continue; /* No backtracking */
+ for (;;)
{
+ if (eptr == pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--;
@@ -6111,7 +6257,7 @@ switch (frame->Xwhere)
LBL(32) LBL(34) LBL(42) LBL(46)
#ifdef SUPPORT_UCP
LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
- LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
+ LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) LBL(68)
#endif /* SUPPORT_UCP */
#endif /* SUPPORT_UTF */
default:
@@ -6264,6 +6410,7 @@ const pcre_uint8 *start_bits = NULL;
PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
PCRE_PUCHAR end_subject;
PCRE_PUCHAR start_partial = NULL;
+PCRE_PUCHAR match_partial = NULL;
PCRE_PUCHAR req_char_ptr = start_match - 1;
const pcre_study_data *study;
@@ -6393,6 +6540,8 @@ md->callout_data = NULL;
tables = re->tables;
+/* The two limit values override the defaults, whatever their value. */
+
if (extra_data != NULL)
{
register unsigned int flags = extra_data->flags;
@@ -6407,6 +6556,15 @@ if (extra_data != NULL)
if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
}
+/* Limits in the regex override only if they are smaller. */
+
+if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit)
+ md->match_limit = re->limit_match;
+
+if ((re->flags & PCRE_RLSET) != 0 &&
+ re->limit_recursion < md->match_limit_recursion)
+ md->match_limit_recursion = re->limit_recursion;
+
/* If the exec call supplied NULL for tables, use the inbuilt ones. This
is a feature that makes it possible to save compiled regex and re-use them
in other programs later. */
@@ -6432,7 +6590,7 @@ end_subject = md->end_subject;
md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
md->use_ucp = (re->options & PCRE_UCP) != 0;
md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
-md->ignore_skip_arg = FALSE;
+md->ignore_skip_arg = 0;
/* Some options are unpacked into BOOL variables in the hope that testing
them will be faster than individual option bits. */
@@ -6542,11 +6700,9 @@ if (re->top_backref > 0 && re->top_backref >= ocount/3)
DPRINTF(("Got memory to hold back references\n"));
}
else md->offset_vector = offsets;
-
md->offset_end = ocount;
md->offset_max = (2*ocount)/3;
-md->offset_overflow = FALSE;
-md->capture_last = -1;
+md->capture_last = 0;
/* Reset the working variable associated with each extraction. These should
never be used unless previously set, but they get saved and restored, and so we
@@ -6816,8 +6972,13 @@ for(;;)
md->match_call_count = 0;
md->match_function_type = 0;
md->end_offset_top = 0;
+ md->skip_arg_count = 0;
rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
- if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
+ if (md->hitend && start_partial == NULL)
+ {
+ start_partial = md->start_used_ptr;
+ match_partial = start_match;
+ }
switch(rc)
{
@@ -6830,14 +6991,14 @@ for(;;)
case MATCH_SKIP_ARG:
new_start_match = start_match;
- md->ignore_skip_arg = TRUE;
+ md->ignore_skip_arg = md->skip_arg_count;
break;
- /* SKIP passes back the next starting point explicitly, but if it is the
- same as the match we have just done, treat it as NOMATCH. */
+ /* SKIP passes back the next starting point explicitly, but if it is no
+ greater than the match we have just done, treat it as NOMATCH. */
case MATCH_SKIP:
- if (md->start_match_ptr != start_match)
+ if (md->start_match_ptr > start_match)
{
new_start_match = md->start_match_ptr;
break;
@@ -6845,12 +7006,12 @@ for(;;)
/* Fall through */
/* NOMATCH and PRUNE advance by one character. THEN at this level acts
- exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */
+ exactly like PRUNE. Unset ignore SKIP-with-argument. */
case MATCH_NOMATCH:
case MATCH_PRUNE:
case MATCH_THEN:
- md->ignore_skip_arg = FALSE;
+ md->ignore_skip_arg = 0;
new_start_match = start_match + 1;
#ifdef SUPPORT_UTF
if (utf)
@@ -6943,7 +7104,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
(arg_offset_max - 2) * sizeof(int));
DPRINTF(("Copied offsets from temporary memory\n"));
}
- if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
+ if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT;
DPRINTF(("Freeing temporary memory\n"));
(PUBL(free))(md->offset_vector);
}
@@ -6951,7 +7112,8 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
/* Set the return code to the number of captured strings, or 0 if there were
too many to fit into the vector. */
- rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
+ rc = ((md->capture_last & OVFLBIT) != 0 &&
+ md->end_offset_top >= arg_offset_max)?
0 : md->end_offset_top/2;
/* If there is space in the offset vector, set any unused pairs at the end of
@@ -7024,6 +7186,8 @@ if (start_partial != NULL)
{
offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
+ if (offsetcount > 2)
+ offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
}
rc = PCRE_ERROR_PARTIAL;
}
diff --git a/usr.sbin/nginx/src/pcre/pcre_fullinfo.c b/usr.sbin/nginx/src/pcre/pcre_fullinfo.c
index 02c9df4a826..c4eb5c0e1ad 100644
--- a/usr.sbin/nginx/src/pcre/pcre_fullinfo.c
+++ b/usr.sbin/nginx/src/pcre/pcre_fullinfo.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -222,6 +222,16 @@ switch (what)
*((int *)where) = re->max_lookbehind;
break;
+ case PCRE_INFO_MATCHLIMIT:
+ if ((re->flags & PCRE_MLSET) == 0) return PCRE_ERROR_UNSET;
+ *((pcre_uint32 *)where) = re->limit_match;
+ break;
+
+ case PCRE_INFO_RECURSIONLIMIT:
+ if ((re->flags & PCRE_RLSET) == 0) return PCRE_ERROR_UNSET;
+ *((pcre_uint32 *)where) = re->limit_recursion;
+ break;
+
default: return PCRE_ERROR_BADOPTION;
}
diff --git a/usr.sbin/nginx/src/pcre/pcre_internal.h b/usr.sbin/nginx/src/pcre/pcre_internal.h
index f3cb001fea7..307069ca9d6 100644
--- a/usr.sbin/nginx/src/pcre/pcre_internal.h
+++ b/usr.sbin/nginx/src/pcre/pcre_internal.h
@@ -7,7 +7,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -194,23 +194,31 @@ preprocessor time in standard C environments. */
typedef unsigned char pcre_uint8;
#if USHRT_MAX == 65535
- typedef unsigned short pcre_uint16;
- typedef short pcre_int16;
+typedef unsigned short pcre_uint16;
+typedef short pcre_int16;
+#define PCRE_UINT16_MAX USHRT_MAX
+#define PCRE_INT16_MAX SHRT_MAX
#elif UINT_MAX == 65535
- typedef unsigned int pcre_uint16;
- typedef int pcre_int16;
+typedef unsigned int pcre_uint16;
+typedef int pcre_int16;
+#define PCRE_UINT16_MAX UINT_MAX
+#define PCRE_INT16_MAX INT_MAX
#else
-# error Cannot determine a type for 16-bit unsigned integers
+#error Cannot determine a type for 16-bit integers
#endif
-#if UINT_MAX == 4294967295
- typedef unsigned int pcre_uint32;
- typedef int pcre_int32;
-#elif ULONG_MAX == 4294967295
- typedef unsigned long int pcre_uint32;
- typedef long int pcre_int32;
+#if UINT_MAX == 4294967295U
+typedef unsigned int pcre_uint32;
+typedef int pcre_int32;
+#define PCRE_UINT32_MAX UINT_MAX
+#define PCRE_INT32_MAX INT_MAX
+#elif ULONG_MAX == 4294967295UL
+typedef unsigned long int pcre_uint32;
+typedef long int pcre_int32;
+#define PCRE_UINT32_MAX ULONG_MAX
+#define PCRE_INT32_MAX LONG_MAX
#else
-# error Cannot determine a type for 32-bit unsigned integers
+#error Cannot determine a type for 32-bit integers
#endif
/* When checking for integer overflow in pcre_compile(), we need to handle
@@ -1121,23 +1129,26 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */
/* Private flags containing information about the compiled regex. They used to
-live at the top end of the options word, but that got almost full, so now they
-are in a 16-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as
-the restrictions on partial matching have been lifted. It remains for backwards
+live at the top end of the options word, but that got almost full, so they were
+moved to a 16-bit flags word - which got almost full, so now they are in a
+32-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as the
+restrictions on partial matching have been lifted. It remains for backwards
compatibility. */
-#define PCRE_MODE8 0x0001 /* compiled in 8 bit mode */
-#define PCRE_MODE16 0x0002 /* compiled in 16 bit mode */
-#define PCRE_MODE32 0x0004 /* compiled in 32 bit mode */
-#define PCRE_FIRSTSET 0x0010 /* first_char is set */
-#define PCRE_FCH_CASELESS 0x0020 /* caseless first char */
-#define PCRE_REQCHSET 0x0040 /* req_byte is set */
-#define PCRE_RCH_CASELESS 0x0080 /* caseless requested char */
-#define PCRE_STARTLINE 0x0100 /* start after \n for multiline */
-#define PCRE_NOPARTIAL 0x0200 /* can't use partial with this regex */
-#define PCRE_JCHANGED 0x0400 /* j option used in regex */
-#define PCRE_HASCRORLF 0x0800 /* explicit \r or \n in pattern */
-#define PCRE_HASTHEN 0x1000 /* pattern contains (*THEN) */
+#define PCRE_MODE8 0x00000001 /* compiled in 8 bit mode */
+#define PCRE_MODE16 0x00000002 /* compiled in 16 bit mode */
+#define PCRE_MODE32 0x00000004 /* compiled in 32 bit mode */
+#define PCRE_FIRSTSET 0x00000010 /* first_char is set */
+#define PCRE_FCH_CASELESS 0x00000020 /* caseless first char */
+#define PCRE_REQCHSET 0x00000040 /* req_byte is set */
+#define PCRE_RCH_CASELESS 0x00000080 /* caseless requested char */
+#define PCRE_STARTLINE 0x00000100 /* start after \n for multiline */
+#define PCRE_NOPARTIAL 0x00000200 /* can't use partial with this regex */
+#define PCRE_JCHANGED 0x00000400 /* j option used in regex */
+#define PCRE_HASCRORLF 0x00000800 /* explicit \r or \n in pattern */
+#define PCRE_HASTHEN 0x00001000 /* pattern contains (*THEN) */
+#define PCRE_MLSET 0x00002000 /* match limit set by regex */
+#define PCRE_RLSET 0x00004000 /* recursion limit set by regex */
#if defined COMPILE_PCRE8
#define PCRE_MODE PCRE_MODE8
@@ -1164,7 +1175,7 @@ time, run time, or study time, respectively. */
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
- PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE)
+ PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE|PCRE_NEVER_UTF)
#define PUBLIC_EXEC_OPTIONS \
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \
@@ -1534,6 +1545,8 @@ a positive value. */
#define STRING_UTF_RIGHTPAR "UTF)"
#define STRING_UCP_RIGHTPAR "UCP)"
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
+#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
+#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
#else /* SUPPORT_UTF */
@@ -1795,6 +1808,8 @@ only. */
#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
+#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
+#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
#endif /* SUPPORT_UTF */
@@ -1835,6 +1850,7 @@ only. */
#define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */
#define PT_WORD 8 /* Word - L plus N plus underscore */
#define PT_CLIST 9 /* Pseudo-property: match character list */
+#define PT_UCNC 10 /* Universal Character nameable character */
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
contain characters with values greater than 255. */
@@ -2270,7 +2286,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
- ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERRCOUNT };
+ ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERRCOUNT };
/* JIT compiling modes. The function list is indexed by them. */
enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
@@ -2280,48 +2296,49 @@ enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
code vector run on as long as necessary after the end. We store an explicit
offset to the name table so that if a regex is compiled on one host, saved, and
then run on another where the size of pointers is different, all might still
-be well. For the case of compiled-on-4 and run-on-8, we include an extra
-pointer that is always NULL. For future-proofing, a few dummy fields were
-originally included - even though you can never get this planning right - but
-there is only one left now.
-
-NOTE NOTE NOTE:
-Because people can now save and re-use compiled patterns, any additions to this
-structure should be made at the end, and something earlier (e.g. a new
-flag in the options or one of the dummy fields) should indicate that the new
-fields are present. Currently PCRE always sets the dummy fields to zero.
-NOTE NOTE NOTE
+be well.
+
+The size of the structure must be a multiple of 8 bytes. For the case of
+compiled-on-4 and run-on-8, we include an extra pointer that is always NULL so
+that there are an even number of pointers which therefore are a multiple of 8
+bytes.
+
+It is necessary to fork the struct for the 32 bit library, since it needs to
+use pcre_uint32 for first_char and req_char. We can't put an ifdef inside the
+typedef because pcretest needs access to the struct of the 8-, 16- and 32-bit
+variants.
+
+*** WARNING ***
+When new fields are added to these structures, remember to adjust the code in
+pcre_byte_order.c that is concerned with swapping the byte order of the fields
+when a compiled regex is reloaded on a host with different endianness.
+*** WARNING ***
+There is also similar byte-flipping code in pcretest.c, which is used for
+testing the byte-flipping features. It must also be kept in step.
+*** WARNING ***
*/
-#if defined COMPILE_PCRE8
-#define REAL_PCRE real_pcre
-#elif defined COMPILE_PCRE16
-#define REAL_PCRE real_pcre16
-#elif defined COMPILE_PCRE32
-#define REAL_PCRE real_pcre32
-#endif
-
-/* It is necessary to fork the struct for 32 bit, since it needs to use
- * pcre_uchar for first_char and req_char. Can't put an ifdef inside the
- * typedef since pcretest needs access to the struct of the 8-, 16-
- * and 32-bit variants. */
-
typedef struct real_pcre8_or_16 {
pcre_uint32 magic_number;
pcre_uint32 size; /* Total that was malloced */
pcre_uint32 options; /* Public options */
- pcre_uint16 flags; /* Private flags */
+ pcre_uint32 flags; /* Private flags */
+ pcre_uint32 limit_match; /* Limit set from regex */
+ pcre_uint32 limit_recursion; /* Limit set from regex */
+ pcre_uint16 first_char; /* Starting character */
+ pcre_uint16 req_char; /* This character must be seen */
pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */
pcre_uint16 top_bracket; /* Highest numbered group */
pcre_uint16 top_backref; /* Highest numbered back reference */
- pcre_uint16 first_char; /* Starting character */
- pcre_uint16 req_char; /* This character must be seen */
pcre_uint16 name_table_offset; /* Offset to name table that follows */
pcre_uint16 name_entry_size; /* Size of any name items */
pcre_uint16 name_count; /* Number of name items */
pcre_uint16 ref_count; /* Reference count */
+ pcre_uint16 dummy1; /* To ensure size is a multiple of 8 */
+ pcre_uint16 dummy2; /* To ensure size is a multiple of 8 */
+ pcre_uint16 dummy3; /* To ensure size is a multiple of 8 */
const pcre_uint8 *tables; /* Pointer to tables or NULL for std */
- const pcre_uint8 *nullpad; /* NULL padding */
+ void *nullpad; /* NULL padding */
} real_pcre8_or_16;
typedef struct real_pcre8_or_16 real_pcre;
@@ -2331,22 +2348,31 @@ typedef struct real_pcre32 {
pcre_uint32 magic_number;
pcre_uint32 size; /* Total that was malloced */
pcre_uint32 options; /* Public options */
- pcre_uint16 flags; /* Private flags */
+ pcre_uint32 flags; /* Private flags */
+ pcre_uint32 limit_match; /* Limit set from regex */
+ pcre_uint32 limit_recursion; /* Limit set from regex */
+ pcre_uint32 first_char; /* Starting character */
+ pcre_uint32 req_char; /* This character must be seen */
pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */
pcre_uint16 top_bracket; /* Highest numbered group */
pcre_uint16 top_backref; /* Highest numbered back reference */
- pcre_uint32 first_char; /* Starting character */
- pcre_uint32 req_char; /* This character must be seen */
pcre_uint16 name_table_offset; /* Offset to name table that follows */
pcre_uint16 name_entry_size; /* Size of any name items */
pcre_uint16 name_count; /* Number of name items */
pcre_uint16 ref_count; /* Reference count */
- pcre_uint16 dummy1; /* for later expansion */
- pcre_uint16 dummy2; /* for later expansion */
+ pcre_uint16 dummy; /* To ensure size is a multiple of 8 */
const pcre_uint8 *tables; /* Pointer to tables or NULL for std */
- void *nullpad; /* for later expansion */
+ void *nullpad; /* NULL padding */
} real_pcre32;
+#if defined COMPILE_PCRE8
+#define REAL_PCRE real_pcre
+#elif defined COMPILE_PCRE16
+#define REAL_PCRE real_pcre16
+#elif defined COMPILE_PCRE32
+#define REAL_PCRE real_pcre32
+#endif
+
/* Assert that the size of REAL_PCRE is divisible by 8 */
typedef int __assert_real_pcre_size_divisible_8[(sizeof(REAL_PCRE) % 8) == 0 ? 1 : -1];
@@ -2398,14 +2424,14 @@ typedef struct compile_data {
int names_found; /* Number of entries so far */
int name_entry_size; /* Size of each entry */
int workspace_size; /* Size of workspace */
- unsigned int bracount; /* Count of capturing parens as we compile */
+ unsigned int bracount; /* Count of capturing parens as we compile */
int final_bracount; /* Saved value after first pass */
int max_lookbehind; /* Maximum lookbehind (characters) */
int top_backref; /* Maximum back reference */
unsigned int backref_map; /* Bitmap of low back refs */
int assert_depth; /* Depth of nested assertions */
- int external_options; /* External (initial) options */
- int external_flags; /* External flag bits to be set */
+ pcre_uint32 external_options; /* External (initial) options */
+ pcre_uint32 external_flags; /* External flag bits to be set */
int req_varyopt; /* "After variable item" flag for reqbyte */
BOOL had_accept; /* (*ACCEPT) encountered */
BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */
@@ -2431,6 +2457,7 @@ typedef struct recursion_info {
unsigned int group_num; /* Number of group that was called */
int *offset_save; /* Pointer to start of saved offsets */
int saved_max; /* Number of saved offsets */
+ int saved_capture_last; /* Last capture number */
PCRE_PUCHAR subject_position; /* Position at start of recursion */
} recursion_info;
@@ -2467,12 +2494,13 @@ typedef struct match_data {
int nllen; /* Newline string length */
int name_count; /* Number of names in name table */
int name_entry_size; /* Size of entry in names table */
+ unsigned int skip_arg_count; /* For counting SKIP_ARGs */
+ unsigned int ignore_skip_arg; /* For re-run when SKIP arg name not found */
pcre_uchar *name_table; /* Table of names */
pcre_uchar nl[4]; /* Newline string when fixed */
const pcre_uint8 *lcc; /* Points to lower casing table */
const pcre_uint8 *fcc; /* Points to case-flipping table */
const pcre_uint8 *ctypes; /* Points to table of type maps */
- BOOL offset_overflow; /* Set if too many extractions */
BOOL notbol; /* NOTBOL flag */
BOOL noteol; /* NOTEOL flag */
BOOL utf; /* UTF-8 / UTF-16 flag */
@@ -2484,7 +2512,6 @@ typedef struct match_data {
BOOL hitend; /* Hit the end of the subject at some point */
BOOL bsr_anycrlf; /* \R is just any CRLF, not full Unicode */
BOOL hasthen; /* Pattern contains (*THEN) */
- BOOL ignore_skip_arg; /* For re-run when SKIP name not found */
const pcre_uchar *start_code; /* For use when recursing */
PCRE_PUCHAR start_subject; /* Start of the subject string */
PCRE_PUCHAR end_subject; /* End of the subject string */
@@ -2493,7 +2520,7 @@ typedef struct match_data {
PCRE_PUCHAR start_used_ptr; /* Earliest consulted character */
int partial; /* PARTIAL options */
int end_offset_top; /* Highwater mark at end of match */
- int capture_last; /* Most recent capture number */
+ pcre_int32 capture_last; /* Most recent capture number + overflow flag */
int start_offset; /* The start offset value */
int match_function_type; /* Set for certain special calls of MATCH() */
eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */
diff --git a/usr.sbin/nginx/src/pcre/pcre_tables.c b/usr.sbin/nginx/src/pcre/pcre_tables.c
index 34ee0488a81..f38ab52cbb8 100644
--- a/usr.sbin/nginx/src/pcre/pcre_tables.c
+++ b/usr.sbin/nginx/src/pcre/pcre_tables.c
@@ -346,6 +346,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
#define STRING_Xan0 STR_X STR_a STR_n "\0"
#define STRING_Xps0 STR_X STR_p STR_s "\0"
#define STRING_Xsp0 STR_X STR_s STR_p "\0"
+#define STRING_Xuc0 STR_X STR_u STR_c "\0"
#define STRING_Xwd0 STR_X STR_w STR_d "\0"
#define STRING_Yi0 STR_Y STR_i "\0"
#define STRING_Z0 STR_Z "\0"
@@ -493,6 +494,7 @@ const char PRIV(utt_names)[] =
STRING_Xan0
STRING_Xps0
STRING_Xsp0
+ STRING_Xuc0
STRING_Xwd0
STRING_Yi0
STRING_Z0
@@ -640,12 +642,13 @@ const ucp_type_table PRIV(utt)[] = {
{ 1011, PT_ALNUM, 0 },
{ 1015, PT_PXSPACE, 0 },
{ 1019, PT_SPACE, 0 },
- { 1023, PT_WORD, 0 },
- { 1027, PT_SC, ucp_Yi },
- { 1030, PT_GC, ucp_Z },
- { 1032, PT_PC, ucp_Zl },
- { 1035, PT_PC, ucp_Zp },
- { 1038, PT_PC, ucp_Zs }
+ { 1023, PT_UCNC, 0 },
+ { 1027, PT_WORD, 0 },
+ { 1031, PT_SC, ucp_Yi },
+ { 1034, PT_GC, ucp_Z },
+ { 1036, PT_PC, ucp_Zl },
+ { 1039, PT_PC, ucp_Zp },
+ { 1042, PT_PC, ucp_Zs }
};
const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
diff --git a/usr.sbin/nginx/src/pcre/pcre_xclass.c b/usr.sbin/nginx/src/pcre/pcre_xclass.c
index fa73cd8c9d5..d777acb57c9 100644
--- a/usr.sbin/nginx/src/pcre/pcre_xclass.c
+++ b/usr.sbin/nginx/src/pcre/pcre_xclass.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2012 University of Cambridge
+ Copyright (c) 1997-2013 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -180,6 +180,20 @@ while ((t = *data++) != XCL_END)
return !negated;
break;
+ case PT_UCNC:
+ if (c < 0xa0)
+ {
+ if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+ c == CHAR_GRAVE_ACCENT) == (t == XCL_PROP))
+ return !negated;
+ }
+ else
+ {
+ if ((c < 0xd800 || c > 0xdfff) == (t == XCL_PROP))
+ return !negated;
+ }
+ break;
+
/* This should never occur, but compilers may mutter if there is no
default. */