diff options
author | 2013-06-14 18:55:11 +0000 | |
---|---|---|
committer | 2013-06-14 18:55:11 +0000 | |
commit | 6c0ee77c905b499597069f3161609a43bf472b69 (patch) | |
tree | 173b5e19c8e0fd098deee027831213dc762b8616 | |
parent | Correct interrupt moderation setting for 82598; tested on the CX4 version (diff) | |
download | wireguard-openbsd-6c0ee77c905b499597069f3161609a43bf472b69.tar.xz wireguard-openbsd-6c0ee77c905b499597069f3161609a43bf472b69.zip |
update the internal pcre library to 8.33
-rw-r--r-- | usr.sbin/nginx/README.OpenBSD | 1 | ||||
-rw-r--r-- | usr.sbin/nginx/src/pcre/pcre.h | 40 | ||||
-rw-r--r-- | usr.sbin/nginx/src/pcre/pcre_compile.c | 163 | ||||
-rw-r--r-- | usr.sbin/nginx/src/pcre/pcre_exec.c | 440 | ||||
-rw-r--r-- | usr.sbin/nginx/src/pcre/pcre_fullinfo.c | 12 | ||||
-rw-r--r-- | usr.sbin/nginx/src/pcre/pcre_internal.h | 167 | ||||
-rw-r--r-- | usr.sbin/nginx/src/pcre/pcre_tables.c | 15 | ||||
-rw-r--r-- | usr.sbin/nginx/src/pcre/pcre_xclass.c | 16 |
8 files changed, 563 insertions, 291 deletions
diff --git a/usr.sbin/nginx/README.OpenBSD b/usr.sbin/nginx/README.OpenBSD index 2a926e158fa..b157be9eb79 100644 --- a/usr.sbin/nginx/README.OpenBSD +++ b/usr.sbin/nginx/README.OpenBSD @@ -27,6 +27,7 @@ Modified files by OpenBSD: - src/os/unix/ngx_process_cycle.c - src/os/unix/ngx_process_cycle.h - src/pcre/pcre_compile.c +- src/pcre/pcre_exec.c The syslog patch has been applied from: https://github.com/yaoweibin/nginx_syslog_patch diff --git a/usr.sbin/nginx/src/pcre/pcre.h b/usr.sbin/nginx/src/pcre/pcre.h index a6aa4e934b2..0e479667672 100644 --- a/usr.sbin/nginx/src/pcre/pcre.h +++ b/usr.sbin/nginx/src/pcre/pcre.h @@ -5,7 +5,7 @@ /* This is the public header file for the PCRE library, to be #included by applications that call the PCRE functions. - Copyright (c) 1997-2012 University of Cambridge + Copyright (c) 1997-2013 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE. /* The current PCRE version information. */ #define PCRE_MAJOR 8 -#define PCRE_MINOR 32 +#define PCRE_MINOR 33 #define PCRE_PRERELEASE -#define PCRE_DATE 2012-11-30 +#define PCRE_DATE 2013-05-28 /* When an application links to a PCRE DLL in Windows, the symbols that are imported have to be identified as such. When building PCRE, the appropriate @@ -96,11 +96,14 @@ extern "C" { #endif /* Public options. Some are compile-time only, some are run-time only, and some -are both, so we keep them all distinct. However, almost all the bits in the -options word are now used. In the long run, we may have to re-use some of the -compile-time only bits for runtime options, or vice versa. Any of the -compile-time options may be inspected during studying (and therefore JIT -compiling). +are both. Most of the compile-time options are saved with the compiled regex so +that they can be inspected during studying (and therefore JIT compiling). Note +that pcre_study() has its own set of options. Originally, all the options +defined here used distinct bits. However, almost all the bits in a 32-bit word +are now used, so in order to conserve them, option bits that were previously +only recognized at matching time (i.e. by pcre_exec() or pcre_dfa_exec()) may +also be used for compile-time options that affect only compiling and are not +relevant for studying or JIT compiling. Some options for pcre_compile() change its behaviour but do not affect the behaviour of the execution functions. Other options are passed through to the @@ -142,7 +145,11 @@ with J. */ #define PCRE_AUTO_CALLOUT 0x00004000 /* C1 */ #define PCRE_PARTIAL_SOFT 0x00008000 /* E D J ) Synonyms */ #define PCRE_PARTIAL 0x00008000 /* E D J ) */ -#define PCRE_DFA_SHORTEST 0x00010000 /* D */ + +/* This pair use the same bit. */ +#define PCRE_NEVER_UTF 0x00010000 /* C1 ) Overlaid */ +#define PCRE_DFA_SHORTEST 0x00010000 /* D ) Overlaid */ + #define PCRE_DFA_RESTART 0x00020000 /* D */ #define PCRE_FIRSTLINE 0x00040000 /* C3 */ #define PCRE_DUPNAMES 0x00080000 /* C1 */ @@ -199,6 +206,7 @@ with J. */ #define PCRE_ERROR_DFA_BADRESTART (-30) #define PCRE_ERROR_JIT_BADOPTION (-31) #define PCRE_ERROR_BADLENGTH (-32) +#define PCRE_ERROR_UNSET (-33) /* Specific error codes for UTF-8 validity checks */ @@ -224,7 +232,7 @@ with J. */ #define PCRE_UTF8_ERR19 19 #define PCRE_UTF8_ERR20 20 #define PCRE_UTF8_ERR21 21 -#define PCRE_UTF8_ERR22 22 +#define PCRE_UTF8_ERR22 22 /* Unused (was non-character) */ /* Specific error codes for UTF-16 validity checks */ @@ -232,13 +240,13 @@ with J. */ #define PCRE_UTF16_ERR1 1 #define PCRE_UTF16_ERR2 2 #define PCRE_UTF16_ERR3 3 -#define PCRE_UTF16_ERR4 4 +#define PCRE_UTF16_ERR4 4 /* Unused (was non-character) */ /* Specific error codes for UTF-32 validity checks */ #define PCRE_UTF32_ERR0 0 #define PCRE_UTF32_ERR1 1 -#define PCRE_UTF32_ERR2 2 +#define PCRE_UTF32_ERR2 2 /* Unused (was non-character) */ #define PCRE_UTF32_ERR3 3 /* Request types for pcre_fullinfo() */ @@ -263,10 +271,12 @@ with J. */ #define PCRE_INFO_JIT 16 #define PCRE_INFO_JITSIZE 17 #define PCRE_INFO_MAXLOOKBEHIND 18 -#define PCRE_INFO_FIRSTCHARACTER 19 -#define PCRE_INFO_FIRSTCHARACTERFLAGS 20 +#define PCRE_INFO_FIRSTCHARACTER 19 +#define PCRE_INFO_FIRSTCHARACTERFLAGS 20 #define PCRE_INFO_REQUIREDCHAR 21 -#define PCRE_INFO_REQUIREDCHARFLAGS 22 +#define PCRE_INFO_REQUIREDCHARFLAGS 22 +#define PCRE_INFO_MATCHLIMIT 23 +#define PCRE_INFO_RECURSIONLIMIT 24 /* Request types for pcre_config(). Do not re-arrange, in order to remain compatible. */ diff --git a/usr.sbin/nginx/src/pcre/pcre_compile.c b/usr.sbin/nginx/src/pcre/pcre_compile.c index fb592a87422..2dbe9ca40a4 100644 --- a/usr.sbin/nginx/src/pcre/pcre_compile.c +++ b/usr.sbin/nginx/src/pcre/pcre_compile.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge + Copyright (c) 1997-2013 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -487,7 +487,7 @@ static const char error_texts[] = "a numbered reference must not be zero\0" "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" /* 60 */ - "(*VERB) not recognized\0" + "(*VERB) not recognized or malformed\0" "number is too big\0" "subpattern name expected\0" "digit expected after (?+\0" @@ -508,6 +508,7 @@ static const char error_texts[] = "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0" "character value in \\u.... sequence is too large\0" "invalid UTF-32 string\0" + "setting UTF is disabled by the application\0" ; /* Table to identify digits and hex digits. This is used when compiling @@ -797,7 +798,8 @@ Otherwise further processing may be required. */ #ifndef EBCDIC /* ASCII/UTF-8 coding */ /* Not alphanumeric */ else if (c < CHAR_0 || c > CHAR_z) {} -else if ((i = escapes[c - CHAR_0]) != 0) { if (i > 0) c = (pcre_uint32)i; else escape = -i; } +else if ((i = escapes[c - CHAR_0]) != 0) + { if (i > 0) c = (pcre_uint32)i; else escape = -i; } #else /* EBCDIC coding */ /* Not alphanumeric */ @@ -847,11 +849,11 @@ else } #if defined COMPILE_PCRE8 - if (c > (unsigned int)(utf ? 0x10ffff : 0xff)) + if (c > (utf ? 0x10ffffU : 0xffU)) #elif defined COMPILE_PCRE16 - if (c > (utf ? 0x10ffff : 0xffff)) + if (c > (utf ? 0x10ffffU : 0xffffU)) #elif defined COMPILE_PCRE32 - if (utf && c > 0x10ffff) + if (utf && c > 0x10ffffU) #endif { *errorcodeptr = ERR76; @@ -1085,11 +1087,11 @@ else #endif #if defined COMPILE_PCRE8 - if (c > (unsigned int)(utf ? 0x10ffff : 0xff)) { overflow = TRUE; break; } + if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; } #elif defined COMPILE_PCRE16 - if (c > (utf ? 0x10ffff : 0xffff)) { overflow = TRUE; break; } + if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; } #elif defined COMPILE_PCRE32 - if (utf && c > 0x10ffff) { overflow = TRUE; break; } + if (utf && c > 0x10ffffU) { overflow = TRUE; break; } #endif } @@ -1408,7 +1410,11 @@ if (ptr[0] == CHAR_LEFT_PARENTHESIS) { /* Handle specials such as (*SKIP) or (*UTF8) etc. */ - if (ptr[1] == CHAR_ASTERISK) ptr += 2; + if (ptr[1] == CHAR_ASTERISK) + { + ptr += 2; + while (ptr < cd->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; + } /* Handle a normal, unnamed capturing parenthesis. */ @@ -2129,9 +2135,6 @@ for (;;) case OP_MARK: case OP_PRUNE_ARG: case OP_SKIP_ARG: - code += code[1]; - break; - case OP_THEN_ARG: code += code[1]; break; @@ -2249,9 +2252,6 @@ for (;;) case OP_MARK: case OP_PRUNE_ARG: case OP_SKIP_ARG: - code += code[1]; - break; - case OP_THEN_ARG: code += code[1]; break; @@ -2616,9 +2616,6 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); case OP_MARK: case OP_PRUNE_ARG: case OP_SKIP_ARG: - code += code[1]; - break; - case OP_THEN_ARG: code += code[1]; break; @@ -2924,9 +2921,6 @@ get_othercase_range(pcre_uint32 *cptr, pcre_uint32 d, pcre_uint32 *ocptr, { pcre_uint32 c, othercase, next; unsigned int co; -#if defined(__GNUC__) && __GNUC__ == 3 -othercase = 0; /* XXX gcc -Wuninitialized */ -#endif /* Find the first character that has an other case. If it has multiple other cases, return its case offset value. */ @@ -3097,7 +3091,8 @@ value is a character, a negative value is an escape value. */ if (*ptr == CHAR_BACKSLASH) { int temperrorcode = 0; - escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options, FALSE); + escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options, + FALSE); if (temperrorcode != 0) return FALSE; ptr++; /* Point after the escape sequence */ } @@ -4280,14 +4275,12 @@ for (;; ptr++) if (c == CHAR_BACKSLASH) { - escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, TRUE); - + escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, + TRUE); if (*errorcodeptr != 0) goto FAILED; - - if (escape == 0) - c = ec; + if (escape == 0) c = ec; else if (escape == ESC_b) c = CHAR_BS; /* \b is backspace in a class */ - else if (escape == ESC_N) /* \N is not supported in a class */ + else if (escape == ESC_N) /* \N is not supported in a class */ { *errorcodeptr = ERR71; goto FAILED; @@ -4910,16 +4903,6 @@ for (;; ptr++) if (repeat_max == 0) goto END_REPEAT; - /*--------------------------------------------------------------------*/ - /* This code is obsolete from release 8.00; the restriction was finally - removed: */ - - /* All real repeats make it impossible to handle partial matching (maybe - one day we will be able to remove this restriction). */ - - /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */ - /*--------------------------------------------------------------------*/ - /* Combine the op_type with the repeat_type */ repeat_type += op_type; @@ -5066,16 +5049,6 @@ for (;; ptr++) goto END_REPEAT; } - /*--------------------------------------------------------------------*/ - /* This code is obsolete from release 8.00; the restriction was finally - removed: */ - - /* All real repeats make it impossible to handle partial matching (maybe - one day we will be able to remove this restriction). */ - - /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */ - /*--------------------------------------------------------------------*/ - if (repeat_min == 0 && repeat_max == -1) *code++ = OP_CRSTAR + repeat_type; else if (repeat_min == 1 && repeat_max == -1) @@ -5752,6 +5725,7 @@ for (;; ptr++) /* ------------------------------------------------------------ */ case CHAR_LEFT_PARENTHESIS: bravalue = OP_COND; /* Conditional group */ + tempptr = ptr; /* A condition can be an assertion, a number (referring to a numbered group), a name (referring to a named group), or 'R', referring to @@ -5764,14 +5738,28 @@ for (;; ptr++) be the recursive thing or the name 'R' (and similarly for 'R' followed by digits), and (b) a number could be a name that consists of digits. In both cases, we look for a name first; if not found, we try the other - cases. */ + cases. + + For compatibility with auto-callouts, we allow a callout to be + specified before a condition that is an assertion. First, check for the + syntax of a callout; if found, adjust the temporary pointer that is + used to check for an assertion condition. That's all that is needed! */ + + if (ptr[1] == CHAR_QUESTION_MARK && ptr[2] == CHAR_C) + { + for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break; + if (ptr[i] == CHAR_RIGHT_PARENTHESIS) + tempptr += i + 1; + } /* For conditions that are assertions, check the syntax, and then exit the switch. This will take control down to where bracketed groups, including assertions, are processed. */ - if (ptr[1] == CHAR_QUESTION_MARK && (ptr[2] == CHAR_EQUALS_SIGN || - ptr[2] == CHAR_EXCLAMATION_MARK || ptr[2] == CHAR_LESS_THAN_SIGN)) + if (tempptr[1] == CHAR_QUESTION_MARK && + (tempptr[2] == CHAR_EQUALS_SIGN || + tempptr[2] == CHAR_EXCLAMATION_MARK || + tempptr[2] == CHAR_LESS_THAN_SIGN)) break; /* Most other conditions use OP_CREF (a couple change to OP_RREF @@ -6741,10 +6729,9 @@ for (;; ptr++) case CHAR_BACKSLASH: tempptr = ptr; escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, FALSE); - if (*errorcodeptr != 0) goto FAILED; - if (escape == 0) + if (escape == 0) /* The escape coded a single character */ c = ec; else { @@ -6910,11 +6897,12 @@ for (;; ptr++) can obtain the OP value by negating the escape value in the default situation when PCRE_UCP is not set. When it *is* set, we substitute Unicode property tests. Note that \b and \B do a one-character - lookbehind. */ + lookbehind, and \A also behaves as if it does. */ else { - if ((escape == ESC_b || escape == ESC_B) && cd->max_lookbehind == 0) + if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) && + cd->max_lookbehind == 0) cd->max_lookbehind = 1; #ifdef SUPPORT_UCP if (escape >= ESC_DU && escape <= ESC_wu) @@ -7778,12 +7766,15 @@ pcre32_compile2(PCRE_SPTR32 pattern, int options, int *errorcodeptr, { REAL_PCRE *re; int length = 1; /* For final END opcode */ -pcre_uint32 firstchar, reqchar; pcre_int32 firstcharflags, reqcharflags; +pcre_uint32 firstchar, reqchar; +pcre_uint32 limit_match = PCRE_UINT32_MAX; +pcre_uint32 limit_recursion = PCRE_UINT32_MAX; int newline; int errorcode = 0; int skipatstart = 0; BOOL utf; +BOOL never_utf = FALSE; size_t size; pcre_uchar *code; const pcre_uchar *codestart; @@ -7843,9 +7834,15 @@ if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0) goto PCRE_EARLY_ERROR_RETURN; } +/* If PCRE_NEVER_UTF is set, remember it. */ + +if ((options & PCRE_NEVER_UTF) != 0) never_utf = TRUE; + /* Check for global one-time settings at the start of the pattern, and remember the offset for later. */ +cd->external_flags = 0; /* Initialize here for LIMIT_MATCH/RECURSION */ + while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS && ptr[skipatstart+1] == CHAR_ASTERISK) { @@ -7876,6 +7873,44 @@ PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. */ else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0) { skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; } + else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_MATCH_EQ, 12) == 0) + { + pcre_uint32 c = 0; + int p = skipatstart + 14; + while (isdigit(ptr[p])) + { + if (c > PCRE_UINT32_MAX / 10 - 1) break; /* Integer overflow */ + c = c*10 + ptr[p++] - CHAR_0; + } + if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break; + if (c < limit_match) + { + limit_match = c; + cd->external_flags |= PCRE_MLSET; + } + skipatstart = p; + continue; + } + + else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_RECURSION_EQ, 16) == 0) + { + pcre_uint32 c = 0; + int p = skipatstart + 18; + while (isdigit(ptr[p])) + { + if (c > PCRE_UINT32_MAX / 10 - 1) break; /* Integer overflow check */ + c = c*10 + ptr[p++] - CHAR_0; + } + if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break; + if (c < limit_recursion) + { + limit_recursion = c; + cd->external_flags |= PCRE_RLSET; + } + skipatstart = p; + continue; + } + if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0) { skipatstart += 5; newnl = PCRE_NEWLINE_CR; } else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3) == 0) @@ -7901,6 +7936,11 @@ PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. */ /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */ utf = (options & PCRE_UTF8) != 0; +if (utf && never_utf) + { + errorcode = ERR78; + goto PCRE_EARLY_ERROR_RETURN2; + } /* Can't support UTF unless PCRE has been compiled to include the code. The return of an error code from PRIV(valid_utf)() is a new feature, introduced in @@ -8023,7 +8063,6 @@ cd->req_varyopt = 0; cd->assert_depth = 0; cd->max_lookbehind = 0; cd->external_options = options; -cd->external_flags = 0; cd->open_caps = NULL; /* Now do the pre-compile. On error, errorcode will be set non-zero, so we @@ -8073,6 +8112,8 @@ re->magic_number = MAGIC_NUMBER; re->size = (int)size; re->options = cd->external_options; re->flags = cd->external_flags; +re->limit_match = limit_match; +re->limit_recursion = limit_recursion; re->first_char = 0; re->req_char = 0; re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar); @@ -8082,7 +8123,9 @@ re->ref_count = 0; re->tables = (tables == PRIV(default_tables))? NULL : tables; re->nullpad = NULL; #ifdef COMPILE_PCRE32 -re->dummy1 = re->dummy2 = 0; +re->dummy = 0; +#else +re->dummy1 = re->dummy2 = re->dummy3 = 0; #endif /* The starting points of the name/number translation table and of the code are @@ -8142,7 +8185,7 @@ if (code - codestart > length) errorcode = ERR23; #ifdef SUPPORT_VALGRIND /* If the estimated length exceeds the really used length, mark the extra -allocated memory as unadressable, so that any out-of-bound reads can be +allocated memory as unaddressable, so that any out-of-bound reads can be detected. */ VALGRIND_MAKE_MEM_NOACCESS(code, (length - (code - codestart)) * sizeof(pcre_uchar)); #endif diff --git a/usr.sbin/nginx/src/pcre/pcre_exec.c b/usr.sbin/nginx/src/pcre/pcre_exec.c index 05d0e52d33c..48ba0ef4007 100644 --- a/usr.sbin/nginx/src/pcre/pcre_exec.c +++ b/usr.sbin/nginx/src/pcre/pcre_exec.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge + Copyright (c) 1997-2013 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -56,6 +56,20 @@ possible. There are also some static supporting functions. */ #undef min #undef max +/* The md->capture_last field uses the lower 16 bits for the last captured +substring (which can never be greater than 65535) and a bit in the top half +to mean "capture vector overflowed". This odd way of doing things was +implemented when it was realized that preserving and restoring the overflow bit +whenever the last capture number was saved/restored made for a neater +interface, and doing it this way saved on (a) another variable, which would +have increased the stack frame size (a big NO-NO in PCRE) and (b) another +separate set of save/restore instructions. The following defines are used in +implementing this. */ + +#define CAPLMASK 0x0000ffff /* The bits used for last_capture */ +#define OVFLMASK 0xffff0000 /* The bits used for the overflow flag */ +#define OVFLBIT 0x00010000 /* The bit that is set for overflow */ + /* Values for setting in md->match_function_type to indicate two special types of call to match(). We do it this way to save on using another stack variable, as stack usage is to be discouraged. */ @@ -73,13 +87,17 @@ defined PCRE_ERROR_xxx codes, which are all negative. */ negative to avoid the external error codes. */ #define MATCH_ACCEPT (-999) -#define MATCH_COMMIT (-998) -#define MATCH_KETRPOS (-997) -#define MATCH_ONCE (-996) +#define MATCH_KETRPOS (-998) +#define MATCH_ONCE (-997) +/* The next 5 must be kept together and in sequence so that a test that checks +for any one of them can use a range. */ +#define MATCH_COMMIT (-996) #define MATCH_PRUNE (-995) #define MATCH_SKIP (-994) #define MATCH_SKIP_ARG (-993) #define MATCH_THEN (-992) +#define MATCH_BACKTRACK_MAX MATCH_THEN +#define MATCH_BACKTRACK_MIN MATCH_COMMIT /* Maximum number of ints of offset to save on the stack for recursive calls. If the offset vector is bigger, malloc is used. This should be a multiple of 3, @@ -219,7 +237,7 @@ if (caseless) { while (length-- > 0) { - pcre_uchar cc, cp; + pcre_uint32 cc, cp; if (eptr >= md->end_subject) return -2; /* Partial match */ cc = RAWUCHARTEST(eptr); cp = RAWUCHARTEST(p); @@ -294,7 +312,7 @@ enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10, RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40, RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50, RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60, - RM61, RM62, RM63, RM64, RM65, RM66, RM67 }; + RM61, RM62, RM63, RM64, RM65, RM66, RM67, RM68 }; /* These versions of the macros use the stack, as normal. There are debugging versions and production versions. Note that the "rw" argument of RMATCH isn't @@ -416,10 +434,10 @@ typedef struct heapframe { int Xlength; int Xmax; int Xmin; - int Xnumber; + unsigned int Xnumber; int Xoffset; - int Xop; - int Xsave_capture_last; + unsigned int Xop; + pcre_int32 Xsave_capture_last; int Xsave_offset1, Xsave_offset2, Xsave_offset3; int Xstacksave[REC_STACK_SAVE_MAX]; @@ -634,8 +652,8 @@ int max; int min; unsigned int number; int offset; -pcre_uchar op; -int save_capture_last; +unsigned int op; +pcre_int32 save_capture_last; int save_offset1, save_offset2, save_offset3; int stacksave[REC_STACK_SAVE_MAX]; @@ -763,23 +781,16 @@ for (;;) case OP_FAIL: RRETURN(MATCH_NOMATCH); - /* COMMIT overrides PRUNE, SKIP, and THEN */ - case OP_COMMIT: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb, RM52); - if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && - rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG && - rrc != MATCH_THEN) - RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); RRETURN(MATCH_COMMIT); - /* PRUNE overrides THEN */ - case OP_PRUNE: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb, RM51); - if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); RRETURN(MATCH_PRUNE); case OP_PRUNE_ARG: @@ -789,38 +800,39 @@ for (;;) eptrb, RM56); if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) && md->mark == NULL) md->mark = ecode + 2; - if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); RRETURN(MATCH_PRUNE); - /* SKIP overrides PRUNE and THEN */ - case OP_SKIP: RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb, RM53); - if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN) - RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); md->start_match_ptr = eptr; /* Pass back current position */ RRETURN(MATCH_SKIP); /* Note that, for Perl compatibility, SKIP with an argument does NOT set - nomatch_mark. There is a flag that disables this opcode when re-matching a - pattern that ended with a SKIP for which there was not a matching MARK. */ + nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was + not a matching mark, we have to re-run the match, ignoring the SKIP_ARG + that failed and any that precede it (either they also failed, or were not + triggered). To do this, we maintain a count of executed SKIP_ARGs. If a + SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg + set to the count of the one that failed. */ case OP_SKIP_ARG: - if (md->ignore_skip_arg) + md->skip_arg_count++; + if (md->skip_arg_count <= md->ignore_skip_arg) { ecode += PRIV(OP_lengths)[*ecode] + ecode[1]; break; } RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md, eptrb, RM57); - if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN) - RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); /* Pass back the current skip name by overloading md->start_match_ptr and returning the special MATCH_SKIP_ARG return code. This will either be caught by a matching MARK, or get to the top, where it causes a rematch - with the md->ignore_skip_arg flag set. */ + with md->ignore_skip_arg set to the value of md->skip_arg_count. */ md->start_match_ptr = ecode + 2; RRETURN(MATCH_SKIP_ARG); @@ -1066,6 +1078,7 @@ for (;;) /* In all other cases, we have to make another call to match(). */ save_mark = md->mark; + save_capture_last = md->capture_last; RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb, RM2); @@ -1097,6 +1110,7 @@ for (;;) ecode += GET(ecode, 1); md->mark = save_mark; if (*ecode != OP_ALT) break; + md->capture_last = save_capture_last; } RRETURN(MATCH_NOMATCH); @@ -1218,6 +1232,7 @@ for (;;) POSSESSIVE_NON_CAPTURE: matched_once = FALSE; code_offset = (int)(ecode - md->start_code); + save_capture_last = md->capture_last; for (;;) { @@ -1247,6 +1262,7 @@ for (;;) if (rrc != MATCH_NOMATCH) RRETURN(rrc); ecode += GET(ecode, 1); if (*ecode != OP_ALT) break; + md->capture_last = save_capture_last; } if (matched_once || allow_zero) @@ -1291,13 +1307,16 @@ for (;;) cb.pattern_position = GET(ecode, LINK_SIZE + 3); cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE); cb.capture_top = offset_top/2; - cb.capture_last = md->capture_last; + cb.capture_last = md->capture_last & CAPLMASK; + /* Internal change requires this for API compatibility. */ + if (cb.capture_last == 0) cb.capture_last = -1; cb.callout_data = md->callout_data; cb.mark = md->nomatch_mark; if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH); if (rrc < 0) RRETURN(rrc); } ecode += PRIV(OP_lengths)[OP_CALLOUT]; + codelink -= PRIV(OP_lengths)[OP_CALLOUT]; } condcode = ecode[LINK_SIZE+1]; @@ -1513,7 +1532,7 @@ for (;;) to close any currently open capturing brackets. */ case OP_CLOSE: - number = GET2(ecode, 1); + number = GET2(ecode, 1); /* Must be less than 65536 */ offset = number << 1; #ifdef PCRE_DEBUG @@ -1521,8 +1540,8 @@ for (;;) printf("\n"); #endif - md->capture_last = number; - if (offset >= md->offset_max) md->offset_overflow = TRUE; else + md->capture_last = (md->capture_last & OVFLMASK) | number; + if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else { md->offset_vector[offset] = md->offset_vector[md->offset_end - number]; @@ -1584,28 +1603,49 @@ for (;;) } else condassert = FALSE; + /* Loop for each branch */ + do { RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4); + + /* A match means that the assertion is true; break out of the loop + that matches its alternatives. */ + if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) { mstart = md->start_match_ptr; /* In case \K reset it */ break; } + + /* If not matched, restore the previous mark setting. */ + md->mark = save_mark; - /* A COMMIT failure must fail the entire assertion, without trying any - subsequent branches. */ + /* See comment in the code for capturing groups above about handling + THEN. */ - if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH); + if (rrc == MATCH_THEN) + { + next = ecode + GET(ecode,1); + if (md->start_match_ptr < next && + (*ecode == OP_ALT || *next == OP_ALT)) + rrc = MATCH_NOMATCH; + } - /* PCRE does not allow THEN to escape beyond an assertion; it - is treated as NOMATCH. */ + /* Anything other than NOMATCH causes the entire assertion to fail, + passing back the return code. This includes COMMIT, SKIP, PRUNE and an + uncaptured THEN, which means they take their normal effect. This + consistent approach does not always have exactly the same effect as in + Perl. */ - if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); ecode += GET(ecode, 1); } - while (*ecode == OP_ALT); + while (*ecode == OP_ALT); /* Continue for next alternative */ + + /* If we have tried all the alternative branches, the assertion has + failed. If not, we broke out after a match. */ if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH); @@ -1613,17 +1653,16 @@ for (;;) if (condassert) RRETURN(MATCH_MATCH); - /* Continue from after the assertion, updating the offsets high water - mark, since extracts may have been taken during the assertion. */ + /* Continue from after a successful assertion, updating the offsets high + water mark, since extracts may have been taken during the assertion. */ do ecode += GET(ecode,1); while (*ecode == OP_ALT); ecode += 1 + LINK_SIZE; offset_top = md->end_offset_top; continue; - /* Negative assertion: all branches must fail to match. Encountering SKIP, - PRUNE, or COMMIT means we must assume failure without checking subsequent - branches. */ + /* Negative assertion: all branches must fail to match for the assertion to + succeed. */ case OP_ASSERT_NOT: case OP_ASSERTBACK_NOT: @@ -1635,28 +1674,64 @@ for (;;) } else condassert = FALSE; + /* Loop for each alternative branch. */ + do { RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5); - md->mark = save_mark; - if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH); - if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT) + md->mark = save_mark; /* Always restore the mark setting */ + + switch(rrc) { - do ecode += GET(ecode,1); while (*ecode == OP_ALT); + case MATCH_MATCH: /* A successful match means */ + case MATCH_ACCEPT: /* the assertion has failed. */ + RRETURN(MATCH_NOMATCH); + + case MATCH_NOMATCH: /* Carry on with next branch */ break; + + /* See comment in the code for capturing groups above about handling + THEN. */ + + case MATCH_THEN: + next = ecode + GET(ecode,1); + if (md->start_match_ptr < next && + (*ecode == OP_ALT || *next == OP_ALT)) + { + rrc = MATCH_NOMATCH; + break; + } + /* Otherwise fall through. */ + + /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole + assertion to fail to match, without considering any more alternatives. + Failing to match means the assertion is true. This is a consistent + approach, but does not always have the same effect as in Perl. */ + + case MATCH_COMMIT: + case MATCH_SKIP: + case MATCH_SKIP_ARG: + case MATCH_PRUNE: + do ecode += GET(ecode,1); while (*ecode == OP_ALT); + goto NEG_ASSERT_TRUE; /* Break out of alternation loop */ + + /* Anything else is an error */ + + default: + RRETURN(rrc); } - /* PCRE does not allow THEN to escape beyond an assertion; it is treated - as NOMATCH. */ + /* Continue with next branch */ - if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); ecode += GET(ecode,1); } while (*ecode == OP_ALT); - if (condassert) RRETURN(MATCH_MATCH); /* Condition assertion */ + /* All branches in the assertion failed to match. */ - ecode += 1 + LINK_SIZE; + NEG_ASSERT_TRUE: + if (condassert) RRETURN(MATCH_MATCH); /* Condition assertion */ + ecode += 1 + LINK_SIZE; /* Continue with current branch */ continue; /* Move the subject pointer back. This occurs only at the start of @@ -1716,7 +1791,9 @@ for (;;) cb.pattern_position = GET(ecode, 2); cb.next_item_length = GET(ecode, 2 + LINK_SIZE); cb.capture_top = offset_top/2; - cb.capture_last = md->capture_last; + cb.capture_last = md->capture_last & CAPLMASK; + /* Internal change requires this for API compatibility. */ + if (cb.capture_last == 0) cb.capture_last = -1; cb.callout_data = md->callout_data; cb.mark = md->nomatch_mark; if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH); @@ -1762,6 +1839,7 @@ for (;;) /* Add to "recursing stack" */ new_recursive.group_num = recno; + new_recursive.saved_capture_last = md->capture_last; new_recursive.subject_position = eptr; new_recursive.prevrec = md->recursive; md->recursive = &new_recursive; @@ -1785,8 +1863,9 @@ for (;;) new_recursive.saved_max * sizeof(int)); /* OK, now we can do the recursion. After processing each alternative, - restore the offset data. If there were nested recursions, md->recursive - might be changed, so reset it before looping. */ + restore the offset data and the last captured value. If there were nested + recursions, md->recursive might be changed, so reset it before looping. + */ DPRINTF(("Recursing into group %d\n", new_recursive.group_num)); cbegroup = (*callpat >= OP_SBRA); @@ -1797,6 +1876,7 @@ for (;;) md, eptrb, RM6); memcpy(md->offset_vector, new_recursive.offset_save, new_recursive.saved_max * sizeof(int)); + md->capture_last = new_recursive.saved_capture_last; md->recursive = new_recursive.prevrec; if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) { @@ -1813,11 +1893,16 @@ for (;;) goto RECURSION_MATCHED; /* Exit loop; end processing */ } - /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it - is treated as NOMATCH. */ + /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a + recursion; they cause a NOMATCH for the entire recursion. These codes + are defined in a range that can be tested for. */ + + if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX) + RRETURN(MATCH_NOMATCH); - else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN && - rrc != MATCH_COMMIT) + /* Any return code other than NOMATCH is an error. */ + + if (rrc != MATCH_NOMATCH) { DPRINTF(("Recursion gave error %d\n", rrc)); if (new_recursive.offset_save != stacksave) @@ -1947,8 +2032,8 @@ for (;;) /* Deal with capturing */ - md->capture_last = number; - if (offset >= md->offset_max) md->offset_overflow = TRUE; else + md->capture_last = (md->capture_last & OVFLMASK) | number; + if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else { /* If offset is greater than offset_top, it means that we are "skipping" a capturing group, and that group's offsets must be marked @@ -2604,6 +2689,13 @@ for (;;) } break; + case PT_UCNC: + if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT || + c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) || + c >= 0xe000) == (op == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + break; + /* This should never occur */ default: @@ -3190,7 +3282,7 @@ for (;;) if (fc < 128) { - pcre_uchar cc = RAWUCHAR(eptr); + pcre_uint32 cc = RAWUCHAR(eptr); if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH); ecode++; eptr++; @@ -3295,7 +3387,22 @@ for (;;) max = rep_max[c]; /* zero for max => infinity */ if (max == 0) max = INT_MAX; - /* Common code for all repeated single-character matches. */ + /* Common code for all repeated single-character matches. We first check + for the minimum number of characters. If the minimum equals the maximum, we + are done. Otherwise, if minimizing, check the rest of the pattern for a + match; if there isn't one, advance up to the maximum, one character at a + time. + + If maximizing, advance up to the maximum number of matching characters, + until eptr is past the end of the maximum run. If possessive, we are + then done (no backing up). Otherwise, match at this position; anything + other than no match is immediately returned. For nomatch, back up one + character, unless we are matching \R and the last thing matched was + \r\n, in which case, back up two bytes. When we reach the first optional + character position, we can save stack by doing a tail recurse. + + The various UTF/non-UTF and caseful/caseless cases are handled separately, + for speed. */ REPEATCHAR: #ifdef SUPPORT_UTF @@ -3379,13 +3486,12 @@ for (;;) } } - if (possessive) continue; - + if (possessive) continue; /* No backtracking */ for(;;) { + if (eptr == pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM23); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (eptr == pp) { RRETURN(MATCH_NOMATCH); } #ifdef SUPPORT_UCP eptr--; BACKCHAR(eptr); @@ -3439,8 +3545,7 @@ for (;;) for (i = 1; i <= min; i++) { - pcre_uchar cc; - + pcre_uint32 cc; /* Faster than pcre_uchar */ if (eptr >= md->end_subject) { SCHECK_PARTIAL(); @@ -3455,8 +3560,7 @@ for (;;) { for (fi = min;; fi++) { - pcre_uchar cc; - + pcre_uint32 cc; /* Faster than pcre_uchar */ RMATCH(eptr, ecode, offset_top, md, eptrb, RM24); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max) RRETURN(MATCH_NOMATCH); @@ -3476,8 +3580,7 @@ for (;;) pp = eptr; for (i = min; i < max; i++) { - pcre_uchar cc; - + pcre_uint32 cc; /* Faster than pcre_uchar */ if (eptr >= md->end_subject) { SCHECK_PARTIAL(); @@ -3488,10 +3591,10 @@ for (;;) eptr++; } - if (possessive) continue; - - while (eptr >= pp) + if (possessive) continue; /* No backtracking */ + for (;;) { + if (eptr == pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM25); eptr--; if (rrc != MATCH_NOMATCH) RRETURN(rrc); @@ -3546,10 +3649,10 @@ for (;;) if (fc != RAWUCHARTEST(eptr)) break; eptr++; } - if (possessive) continue; - - while (eptr >= pp) + if (possessive) continue; /* No backtracking */ + for (;;) { + if (eptr == pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM27); eptr--; if (rrc != MATCH_NOMATCH) RRETURN(rrc); @@ -3726,7 +3829,7 @@ for (;;) } } else -#endif +#endif /* SUPPORT_UTF */ /* Not UTF mode */ { for (i = 1; i <= min; i++) @@ -3764,7 +3867,7 @@ for (;;) } } else -#endif +#endif /*SUPPORT_UTF */ /* Not UTF mode */ { for (fi = min;; fi++) @@ -3806,17 +3909,18 @@ for (;;) if (fc == d || (unsigned int)foc == d) break; eptr += len; } - if (possessive) continue; + if (possessive) continue; /* No backtracking */ for(;;) { + if (eptr == pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM30); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (eptr-- == pp) break; /* Stop if tried at original pos */ + eptr--; BACKCHAR(eptr); } } else -#endif +#endif /* SUPPORT_UTF */ /* Not UTF mode */ { for (i = min; i < max; i++) @@ -3829,9 +3933,10 @@ for (;;) if (fc == *eptr || foc == *eptr) break; eptr++; } - if (possessive) continue; - while (eptr >= pp) + if (possessive) continue; /* No backtracking */ + for (;;) { + if (eptr == pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM31); if (rrc != MATCH_NOMATCH) RRETURN(rrc); eptr--; @@ -3941,12 +4046,13 @@ for (;;) if (fc == d) break; eptr += len; } - if (possessive) continue; + if (possessive) continue; /* No backtracking */ for(;;) { + if (eptr == pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM34); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (eptr-- == pp) break; /* Stop if tried at original pos */ + eptr--; BACKCHAR(eptr); } } @@ -3964,9 +4070,10 @@ for (;;) if (fc == *eptr) break; eptr++; } - if (possessive) continue; - while (eptr >= pp) + if (possessive) continue; /* No backtracking */ + for (;;) { + if (eptr == pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM35); if (rrc != MATCH_NOMATCH) RRETURN(rrc); eptr--; @@ -4225,6 +4332,22 @@ for (;;) } break; + case PT_UCNC: + for (i = 1; i <= min; i++) + { + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(c, eptr); + if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT || + c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) || + c >= 0xe000) == prop_fail_result) + RRETURN(MATCH_NOMATCH); + } + break; + /* This should not occur */ default: @@ -4430,8 +4553,7 @@ for (;;) case OP_DIGIT: for (i = 1; i <= min; i++) { - pcre_uchar cc; - + pcre_uint32 cc; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); @@ -4448,8 +4570,7 @@ for (;;) case OP_NOT_WHITESPACE: for (i = 1; i <= min; i++) { - pcre_uchar cc; - + pcre_uint32 cc; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); @@ -4466,8 +4587,7 @@ for (;;) case OP_WHITESPACE: for (i = 1; i <= min; i++) { - pcre_uchar cc; - + pcre_uint32 cc; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); @@ -4484,8 +4604,7 @@ for (;;) case OP_NOT_WORDCHAR: for (i = 1; i <= min; i++) { - pcre_uchar cc; - + pcre_uint32 cc; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); @@ -4502,8 +4621,7 @@ for (;;) case OP_WORDCHAR: for (i = 1; i <= min; i++) { - pcre_uchar cc; - + pcre_uint32 cc; if (eptr >= md->end_subject) { SCHECK_PARTIAL(); @@ -4976,6 +5094,25 @@ for (;;) } /* Control never gets here */ + case PT_UCNC: + for (fi = min;; fi++) + { + RMATCH(eptr, ecode, offset_top, md, eptrb, RM68); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (fi >= max) RRETURN(MATCH_NOMATCH); + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(c, eptr); + if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT || + c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) || + c >= 0xe000) == prop_fail_result) + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + /* This should never occur */ default: RRETURN(PCRE_ERROR_INTERNAL); @@ -5470,18 +5607,37 @@ for (;;) GOT_MAX: break; + case PT_UCNC: + for (i = min; i < max; i++) + { + int len = 1; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(c, eptr, len); + if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT || + c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) || + c >= 0xe000) == prop_fail_result) + break; + eptr += len; + } + break; + default: RRETURN(PCRE_ERROR_INTERNAL); } /* eptr is now past the end of the maximum run */ - if (possessive) continue; + if (possessive) continue; /* No backtracking */ for(;;) { + if (eptr == pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM44); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (eptr-- == pp) break; /* Stop if tried at original pos */ + eptr--; if (utf) BACKCHAR(eptr); } } @@ -5518,13 +5674,13 @@ for (;;) /* eptr is now past the end of the maximum run */ - if (possessive) continue; - + if (possessive) continue; /* No backtracking */ for(;;) { + if (eptr == pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM45); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (eptr-- == pp) break; /* Stop if tried at original pos */ + eptr--; for (;;) /* Move back over one extended */ { if (!utf) c = *eptr; else @@ -5799,18 +5955,13 @@ for (;;) RRETURN(PCRE_ERROR_INTERNAL); } - /* eptr is now past the end of the maximum run. If possessive, we are - done (no backing up). Otherwise, match at this position; anything other - than no match is immediately returned. For nomatch, back up one - character, unless we are matching \R and the last thing matched was - \r\n, in which case, back up two bytes. */ - - if (possessive) continue; + if (possessive) continue; /* No backtracking */ for(;;) { + if (eptr == pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM46); if (rrc != MATCH_NOMATCH) RRETURN(rrc); - if (eptr-- == pp) break; /* Stop if tried at original pos */ + eptr--; BACKCHAR(eptr); if (ctype == OP_ANYNL && eptr > pp && RAWUCHAR(eptr) == CHAR_NL && RAWUCHAR(eptr - 1) == CHAR_CR) eptr--; @@ -6048,15 +6199,10 @@ for (;;) RRETURN(PCRE_ERROR_INTERNAL); } - /* eptr is now past the end of the maximum run. If possessive, we are - done (no backing up). Otherwise, match at this position; anything other - than no match is immediately returned. For nomatch, back up one - character (byte), unless we are matching \R and the last thing matched - was \r\n, in which case, back up two bytes. */ - - if (possessive) continue; - while (eptr >= pp) + if (possessive) continue; /* No backtracking */ + for (;;) { + if (eptr == pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, md, eptrb, RM47); if (rrc != MATCH_NOMATCH) RRETURN(rrc); eptr--; @@ -6111,7 +6257,7 @@ switch (frame->Xwhere) LBL(32) LBL(34) LBL(42) LBL(46) #ifdef SUPPORT_UCP LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45) - LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) + LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) LBL(68) #endif /* SUPPORT_UCP */ #endif /* SUPPORT_UTF */ default: @@ -6264,6 +6410,7 @@ const pcre_uint8 *start_bits = NULL; PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset; PCRE_PUCHAR end_subject; PCRE_PUCHAR start_partial = NULL; +PCRE_PUCHAR match_partial = NULL; PCRE_PUCHAR req_char_ptr = start_match - 1; const pcre_study_data *study; @@ -6393,6 +6540,8 @@ md->callout_data = NULL; tables = re->tables; +/* The two limit values override the defaults, whatever their value. */ + if (extra_data != NULL) { register unsigned int flags = extra_data->flags; @@ -6407,6 +6556,15 @@ if (extra_data != NULL) if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables; } +/* Limits in the regex override only if they are smaller. */ + +if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit) + md->match_limit = re->limit_match; + +if ((re->flags & PCRE_RLSET) != 0 && + re->limit_recursion < md->match_limit_recursion) + md->match_limit_recursion = re->limit_recursion; + /* If the exec call supplied NULL for tables, use the inbuilt ones. This is a feature that makes it possible to save compiled regex and re-use them in other programs later. */ @@ -6432,7 +6590,7 @@ end_subject = md->end_subject; md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; md->use_ucp = (re->options & PCRE_UCP) != 0; md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; -md->ignore_skip_arg = FALSE; +md->ignore_skip_arg = 0; /* Some options are unpacked into BOOL variables in the hope that testing them will be faster than individual option bits. */ @@ -6542,11 +6700,9 @@ if (re->top_backref > 0 && re->top_backref >= ocount/3) DPRINTF(("Got memory to hold back references\n")); } else md->offset_vector = offsets; - md->offset_end = ocount; md->offset_max = (2*ocount)/3; -md->offset_overflow = FALSE; -md->capture_last = -1; +md->capture_last = 0; /* Reset the working variable associated with each extraction. These should never be used unless previously set, but they get saved and restored, and so we @@ -6816,8 +6972,13 @@ for(;;) md->match_call_count = 0; md->match_function_type = 0; md->end_offset_top = 0; + md->skip_arg_count = 0; rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0); - if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr; + if (md->hitend && start_partial == NULL) + { + start_partial = md->start_used_ptr; + match_partial = start_match; + } switch(rc) { @@ -6830,14 +6991,14 @@ for(;;) case MATCH_SKIP_ARG: new_start_match = start_match; - md->ignore_skip_arg = TRUE; + md->ignore_skip_arg = md->skip_arg_count; break; - /* SKIP passes back the next starting point explicitly, but if it is the - same as the match we have just done, treat it as NOMATCH. */ + /* SKIP passes back the next starting point explicitly, but if it is no + greater than the match we have just done, treat it as NOMATCH. */ case MATCH_SKIP: - if (md->start_match_ptr != start_match) + if (md->start_match_ptr > start_match) { new_start_match = md->start_match_ptr; break; @@ -6845,12 +7006,12 @@ for(;;) /* Fall through */ /* NOMATCH and PRUNE advance by one character. THEN at this level acts - exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */ + exactly like PRUNE. Unset ignore SKIP-with-argument. */ case MATCH_NOMATCH: case MATCH_PRUNE: case MATCH_THEN: - md->ignore_skip_arg = FALSE; + md->ignore_skip_arg = 0; new_start_match = start_match + 1; #ifdef SUPPORT_UTF if (utf) @@ -6943,7 +7104,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT) (arg_offset_max - 2) * sizeof(int)); DPRINTF(("Copied offsets from temporary memory\n")); } - if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE; + if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT; DPRINTF(("Freeing temporary memory\n")); (PUBL(free))(md->offset_vector); } @@ -6951,7 +7112,8 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT) /* Set the return code to the number of captured strings, or 0 if there were too many to fit into the vector. */ - rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)? + rc = ((md->capture_last & OVFLBIT) != 0 && + md->end_offset_top >= arg_offset_max)? 0 : md->end_offset_top/2; /* If there is space in the offset vector, set any unused pairs at the end of @@ -7024,6 +7186,8 @@ if (start_partial != NULL) { offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject); offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject); + if (offsetcount > 2) + offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject); } rc = PCRE_ERROR_PARTIAL; } diff --git a/usr.sbin/nginx/src/pcre/pcre_fullinfo.c b/usr.sbin/nginx/src/pcre/pcre_fullinfo.c index 02c9df4a826..c4eb5c0e1ad 100644 --- a/usr.sbin/nginx/src/pcre/pcre_fullinfo.c +++ b/usr.sbin/nginx/src/pcre/pcre_fullinfo.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge + Copyright (c) 1997-2013 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -222,6 +222,16 @@ switch (what) *((int *)where) = re->max_lookbehind; break; + case PCRE_INFO_MATCHLIMIT: + if ((re->flags & PCRE_MLSET) == 0) return PCRE_ERROR_UNSET; + *((pcre_uint32 *)where) = re->limit_match; + break; + + case PCRE_INFO_RECURSIONLIMIT: + if ((re->flags & PCRE_RLSET) == 0) return PCRE_ERROR_UNSET; + *((pcre_uint32 *)where) = re->limit_recursion; + break; + default: return PCRE_ERROR_BADOPTION; } diff --git a/usr.sbin/nginx/src/pcre/pcre_internal.h b/usr.sbin/nginx/src/pcre/pcre_internal.h index f3cb001fea7..307069ca9d6 100644 --- a/usr.sbin/nginx/src/pcre/pcre_internal.h +++ b/usr.sbin/nginx/src/pcre/pcre_internal.h @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge + Copyright (c) 1997-2013 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -194,23 +194,31 @@ preprocessor time in standard C environments. */ typedef unsigned char pcre_uint8; #if USHRT_MAX == 65535 - typedef unsigned short pcre_uint16; - typedef short pcre_int16; +typedef unsigned short pcre_uint16; +typedef short pcre_int16; +#define PCRE_UINT16_MAX USHRT_MAX +#define PCRE_INT16_MAX SHRT_MAX #elif UINT_MAX == 65535 - typedef unsigned int pcre_uint16; - typedef int pcre_int16; +typedef unsigned int pcre_uint16; +typedef int pcre_int16; +#define PCRE_UINT16_MAX UINT_MAX +#define PCRE_INT16_MAX INT_MAX #else -# error Cannot determine a type for 16-bit unsigned integers +#error Cannot determine a type for 16-bit integers #endif -#if UINT_MAX == 4294967295 - typedef unsigned int pcre_uint32; - typedef int pcre_int32; -#elif ULONG_MAX == 4294967295 - typedef unsigned long int pcre_uint32; - typedef long int pcre_int32; +#if UINT_MAX == 4294967295U +typedef unsigned int pcre_uint32; +typedef int pcre_int32; +#define PCRE_UINT32_MAX UINT_MAX +#define PCRE_INT32_MAX INT_MAX +#elif ULONG_MAX == 4294967295UL +typedef unsigned long int pcre_uint32; +typedef long int pcre_int32; +#define PCRE_UINT32_MAX ULONG_MAX +#define PCRE_INT32_MAX LONG_MAX #else -# error Cannot determine a type for 32-bit unsigned integers +#error Cannot determine a type for 32-bit integers #endif /* When checking for integer overflow in pcre_compile(), we need to handle @@ -1121,23 +1129,26 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */ /* Private flags containing information about the compiled regex. They used to -live at the top end of the options word, but that got almost full, so now they -are in a 16-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as -the restrictions on partial matching have been lifted. It remains for backwards +live at the top end of the options word, but that got almost full, so they were +moved to a 16-bit flags word - which got almost full, so now they are in a +32-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as the +restrictions on partial matching have been lifted. It remains for backwards compatibility. */ -#define PCRE_MODE8 0x0001 /* compiled in 8 bit mode */ -#define PCRE_MODE16 0x0002 /* compiled in 16 bit mode */ -#define PCRE_MODE32 0x0004 /* compiled in 32 bit mode */ -#define PCRE_FIRSTSET 0x0010 /* first_char is set */ -#define PCRE_FCH_CASELESS 0x0020 /* caseless first char */ -#define PCRE_REQCHSET 0x0040 /* req_byte is set */ -#define PCRE_RCH_CASELESS 0x0080 /* caseless requested char */ -#define PCRE_STARTLINE 0x0100 /* start after \n for multiline */ -#define PCRE_NOPARTIAL 0x0200 /* can't use partial with this regex */ -#define PCRE_JCHANGED 0x0400 /* j option used in regex */ -#define PCRE_HASCRORLF 0x0800 /* explicit \r or \n in pattern */ -#define PCRE_HASTHEN 0x1000 /* pattern contains (*THEN) */ +#define PCRE_MODE8 0x00000001 /* compiled in 8 bit mode */ +#define PCRE_MODE16 0x00000002 /* compiled in 16 bit mode */ +#define PCRE_MODE32 0x00000004 /* compiled in 32 bit mode */ +#define PCRE_FIRSTSET 0x00000010 /* first_char is set */ +#define PCRE_FCH_CASELESS 0x00000020 /* caseless first char */ +#define PCRE_REQCHSET 0x00000040 /* req_byte is set */ +#define PCRE_RCH_CASELESS 0x00000080 /* caseless requested char */ +#define PCRE_STARTLINE 0x00000100 /* start after \n for multiline */ +#define PCRE_NOPARTIAL 0x00000200 /* can't use partial with this regex */ +#define PCRE_JCHANGED 0x00000400 /* j option used in regex */ +#define PCRE_HASCRORLF 0x00000800 /* explicit \r or \n in pattern */ +#define PCRE_HASTHEN 0x00001000 /* pattern contains (*THEN) */ +#define PCRE_MLSET 0x00002000 /* match limit set by regex */ +#define PCRE_RLSET 0x00004000 /* recursion limit set by regex */ #if defined COMPILE_PCRE8 #define PCRE_MODE PCRE_MODE8 @@ -1164,7 +1175,7 @@ time, run time, or study time, respectively. */ PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \ PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \ PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \ - PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE) + PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE|PCRE_NEVER_UTF) #define PUBLIC_EXEC_OPTIONS \ (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \ @@ -1534,6 +1545,8 @@ a positive value. */ #define STRING_UTF_RIGHTPAR "UTF)" #define STRING_UCP_RIGHTPAR "UCP)" #define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)" +#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH=" +#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION=" #else /* SUPPORT_UTF */ @@ -1795,6 +1808,8 @@ only. */ #define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_RIGHT_PARENTHESIS #define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS #define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS +#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN +#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN #endif /* SUPPORT_UTF */ @@ -1835,6 +1850,7 @@ only. */ #define PT_PXSPACE 7 /* POSIX space - Z plus 9,10,11,12,13 */ #define PT_WORD 8 /* Word - L plus N plus underscore */ #define PT_CLIST 9 /* Pseudo-property: match character list */ +#define PT_UCNC 10 /* Universal Character nameable character */ /* Flag bits and data types for the extended class (OP_XCLASS) for classes that contain characters with values greater than 255. */ @@ -2270,7 +2286,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, - ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERRCOUNT }; + ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERRCOUNT }; /* JIT compiling modes. The function list is indexed by them. */ enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE, @@ -2280,48 +2296,49 @@ enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE, code vector run on as long as necessary after the end. We store an explicit offset to the name table so that if a regex is compiled on one host, saved, and then run on another where the size of pointers is different, all might still -be well. For the case of compiled-on-4 and run-on-8, we include an extra -pointer that is always NULL. For future-proofing, a few dummy fields were -originally included - even though you can never get this planning right - but -there is only one left now. - -NOTE NOTE NOTE: -Because people can now save and re-use compiled patterns, any additions to this -structure should be made at the end, and something earlier (e.g. a new -flag in the options or one of the dummy fields) should indicate that the new -fields are present. Currently PCRE always sets the dummy fields to zero. -NOTE NOTE NOTE +be well. + +The size of the structure must be a multiple of 8 bytes. For the case of +compiled-on-4 and run-on-8, we include an extra pointer that is always NULL so +that there are an even number of pointers which therefore are a multiple of 8 +bytes. + +It is necessary to fork the struct for the 32 bit library, since it needs to +use pcre_uint32 for first_char and req_char. We can't put an ifdef inside the +typedef because pcretest needs access to the struct of the 8-, 16- and 32-bit +variants. + +*** WARNING *** +When new fields are added to these structures, remember to adjust the code in +pcre_byte_order.c that is concerned with swapping the byte order of the fields +when a compiled regex is reloaded on a host with different endianness. +*** WARNING *** +There is also similar byte-flipping code in pcretest.c, which is used for +testing the byte-flipping features. It must also be kept in step. +*** WARNING *** */ -#if defined COMPILE_PCRE8 -#define REAL_PCRE real_pcre -#elif defined COMPILE_PCRE16 -#define REAL_PCRE real_pcre16 -#elif defined COMPILE_PCRE32 -#define REAL_PCRE real_pcre32 -#endif - -/* It is necessary to fork the struct for 32 bit, since it needs to use - * pcre_uchar for first_char and req_char. Can't put an ifdef inside the - * typedef since pcretest needs access to the struct of the 8-, 16- - * and 32-bit variants. */ - typedef struct real_pcre8_or_16 { pcre_uint32 magic_number; pcre_uint32 size; /* Total that was malloced */ pcre_uint32 options; /* Public options */ - pcre_uint16 flags; /* Private flags */ + pcre_uint32 flags; /* Private flags */ + pcre_uint32 limit_match; /* Limit set from regex */ + pcre_uint32 limit_recursion; /* Limit set from regex */ + pcre_uint16 first_char; /* Starting character */ + pcre_uint16 req_char; /* This character must be seen */ pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */ pcre_uint16 top_bracket; /* Highest numbered group */ pcre_uint16 top_backref; /* Highest numbered back reference */ - pcre_uint16 first_char; /* Starting character */ - pcre_uint16 req_char; /* This character must be seen */ pcre_uint16 name_table_offset; /* Offset to name table that follows */ pcre_uint16 name_entry_size; /* Size of any name items */ pcre_uint16 name_count; /* Number of name items */ pcre_uint16 ref_count; /* Reference count */ + pcre_uint16 dummy1; /* To ensure size is a multiple of 8 */ + pcre_uint16 dummy2; /* To ensure size is a multiple of 8 */ + pcre_uint16 dummy3; /* To ensure size is a multiple of 8 */ const pcre_uint8 *tables; /* Pointer to tables or NULL for std */ - const pcre_uint8 *nullpad; /* NULL padding */ + void *nullpad; /* NULL padding */ } real_pcre8_or_16; typedef struct real_pcre8_or_16 real_pcre; @@ -2331,22 +2348,31 @@ typedef struct real_pcre32 { pcre_uint32 magic_number; pcre_uint32 size; /* Total that was malloced */ pcre_uint32 options; /* Public options */ - pcre_uint16 flags; /* Private flags */ + pcre_uint32 flags; /* Private flags */ + pcre_uint32 limit_match; /* Limit set from regex */ + pcre_uint32 limit_recursion; /* Limit set from regex */ + pcre_uint32 first_char; /* Starting character */ + pcre_uint32 req_char; /* This character must be seen */ pcre_uint16 max_lookbehind; /* Longest lookbehind (characters) */ pcre_uint16 top_bracket; /* Highest numbered group */ pcre_uint16 top_backref; /* Highest numbered back reference */ - pcre_uint32 first_char; /* Starting character */ - pcre_uint32 req_char; /* This character must be seen */ pcre_uint16 name_table_offset; /* Offset to name table that follows */ pcre_uint16 name_entry_size; /* Size of any name items */ pcre_uint16 name_count; /* Number of name items */ pcre_uint16 ref_count; /* Reference count */ - pcre_uint16 dummy1; /* for later expansion */ - pcre_uint16 dummy2; /* for later expansion */ + pcre_uint16 dummy; /* To ensure size is a multiple of 8 */ const pcre_uint8 *tables; /* Pointer to tables or NULL for std */ - void *nullpad; /* for later expansion */ + void *nullpad; /* NULL padding */ } real_pcre32; +#if defined COMPILE_PCRE8 +#define REAL_PCRE real_pcre +#elif defined COMPILE_PCRE16 +#define REAL_PCRE real_pcre16 +#elif defined COMPILE_PCRE32 +#define REAL_PCRE real_pcre32 +#endif + /* Assert that the size of REAL_PCRE is divisible by 8 */ typedef int __assert_real_pcre_size_divisible_8[(sizeof(REAL_PCRE) % 8) == 0 ? 1 : -1]; @@ -2398,14 +2424,14 @@ typedef struct compile_data { int names_found; /* Number of entries so far */ int name_entry_size; /* Size of each entry */ int workspace_size; /* Size of workspace */ - unsigned int bracount; /* Count of capturing parens as we compile */ + unsigned int bracount; /* Count of capturing parens as we compile */ int final_bracount; /* Saved value after first pass */ int max_lookbehind; /* Maximum lookbehind (characters) */ int top_backref; /* Maximum back reference */ unsigned int backref_map; /* Bitmap of low back refs */ int assert_depth; /* Depth of nested assertions */ - int external_options; /* External (initial) options */ - int external_flags; /* External flag bits to be set */ + pcre_uint32 external_options; /* External (initial) options */ + pcre_uint32 external_flags; /* External flag bits to be set */ int req_varyopt; /* "After variable item" flag for reqbyte */ BOOL had_accept; /* (*ACCEPT) encountered */ BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */ @@ -2431,6 +2457,7 @@ typedef struct recursion_info { unsigned int group_num; /* Number of group that was called */ int *offset_save; /* Pointer to start of saved offsets */ int saved_max; /* Number of saved offsets */ + int saved_capture_last; /* Last capture number */ PCRE_PUCHAR subject_position; /* Position at start of recursion */ } recursion_info; @@ -2467,12 +2494,13 @@ typedef struct match_data { int nllen; /* Newline string length */ int name_count; /* Number of names in name table */ int name_entry_size; /* Size of entry in names table */ + unsigned int skip_arg_count; /* For counting SKIP_ARGs */ + unsigned int ignore_skip_arg; /* For re-run when SKIP arg name not found */ pcre_uchar *name_table; /* Table of names */ pcre_uchar nl[4]; /* Newline string when fixed */ const pcre_uint8 *lcc; /* Points to lower casing table */ const pcre_uint8 *fcc; /* Points to case-flipping table */ const pcre_uint8 *ctypes; /* Points to table of type maps */ - BOOL offset_overflow; /* Set if too many extractions */ BOOL notbol; /* NOTBOL flag */ BOOL noteol; /* NOTEOL flag */ BOOL utf; /* UTF-8 / UTF-16 flag */ @@ -2484,7 +2512,6 @@ typedef struct match_data { BOOL hitend; /* Hit the end of the subject at some point */ BOOL bsr_anycrlf; /* \R is just any CRLF, not full Unicode */ BOOL hasthen; /* Pattern contains (*THEN) */ - BOOL ignore_skip_arg; /* For re-run when SKIP name not found */ const pcre_uchar *start_code; /* For use when recursing */ PCRE_PUCHAR start_subject; /* Start of the subject string */ PCRE_PUCHAR end_subject; /* End of the subject string */ @@ -2493,7 +2520,7 @@ typedef struct match_data { PCRE_PUCHAR start_used_ptr; /* Earliest consulted character */ int partial; /* PARTIAL options */ int end_offset_top; /* Highwater mark at end of match */ - int capture_last; /* Most recent capture number */ + pcre_int32 capture_last; /* Most recent capture number + overflow flag */ int start_offset; /* The start offset value */ int match_function_type; /* Set for certain special calls of MATCH() */ eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */ diff --git a/usr.sbin/nginx/src/pcre/pcre_tables.c b/usr.sbin/nginx/src/pcre/pcre_tables.c index 34ee0488a81..f38ab52cbb8 100644 --- a/usr.sbin/nginx/src/pcre/pcre_tables.c +++ b/usr.sbin/nginx/src/pcre/pcre_tables.c @@ -346,6 +346,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Xan0 STR_X STR_a STR_n "\0" #define STRING_Xps0 STR_X STR_p STR_s "\0" #define STRING_Xsp0 STR_X STR_s STR_p "\0" +#define STRING_Xuc0 STR_X STR_u STR_c "\0" #define STRING_Xwd0 STR_X STR_w STR_d "\0" #define STRING_Yi0 STR_Y STR_i "\0" #define STRING_Z0 STR_Z "\0" @@ -493,6 +494,7 @@ const char PRIV(utt_names)[] = STRING_Xan0 STRING_Xps0 STRING_Xsp0 + STRING_Xuc0 STRING_Xwd0 STRING_Yi0 STRING_Z0 @@ -640,12 +642,13 @@ const ucp_type_table PRIV(utt)[] = { { 1011, PT_ALNUM, 0 }, { 1015, PT_PXSPACE, 0 }, { 1019, PT_SPACE, 0 }, - { 1023, PT_WORD, 0 }, - { 1027, PT_SC, ucp_Yi }, - { 1030, PT_GC, ucp_Z }, - { 1032, PT_PC, ucp_Zl }, - { 1035, PT_PC, ucp_Zp }, - { 1038, PT_PC, ucp_Zs } + { 1023, PT_UCNC, 0 }, + { 1027, PT_WORD, 0 }, + { 1031, PT_SC, ucp_Yi }, + { 1034, PT_GC, ucp_Z }, + { 1036, PT_PC, ucp_Zl }, + { 1039, PT_PC, ucp_Zp }, + { 1042, PT_PC, ucp_Zs } }; const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); diff --git a/usr.sbin/nginx/src/pcre/pcre_xclass.c b/usr.sbin/nginx/src/pcre/pcre_xclass.c index fa73cd8c9d5..d777acb57c9 100644 --- a/usr.sbin/nginx/src/pcre/pcre_xclass.c +++ b/usr.sbin/nginx/src/pcre/pcre_xclass.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2012 University of Cambridge + Copyright (c) 1997-2013 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -180,6 +180,20 @@ while ((t = *data++) != XCL_END) return !negated; break; + case PT_UCNC: + if (c < 0xa0) + { + if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT || + c == CHAR_GRAVE_ACCENT) == (t == XCL_PROP)) + return !negated; + } + else + { + if ((c < 0xd800 || c > 0xdfff) == (t == XCL_PROP)) + return !negated; + } + break; + /* This should never occur, but compilers may mutter if there is no default. */ |