update the internal pcre library to 8.33

author: robert <robert@openbsd.org> 2013-06-14 18:55:11 +0000
committer: robert <robert@openbsd.org> 2013-06-14 18:55:11 +0000
commit: 6c0ee77c905b499597069f3161609a43bf472b69 (patch)
tree: 173b5e19c8e0fd098deee027831213dc762b8616
parent: Correct interrupt moderation setting for 82598; tested on the CX4 version (diff)
download: wireguard-openbsd-6c0ee77c905b499597069f3161609a43bf472b69.tar.xz
wireguard-openbsd-6c0ee77c905b499597069f3161609a43bf472b69.zip
8 files changed, 563 insertions, 291 deletions
diff --git a/usr.sbin/nginx/README.OpenBSD b/usr.sbin/nginx/README.OpenBSD
index 2a926e158fa..b157be9eb79 100644
--- a/usr.sbin/nginx/README.OpenBSD
+++ b/usr.sbin/nginx/README.OpenBSD
@@ -27,6 +27,7 @@ Modified files by OpenBSD:
 - src/os/unix/ngx_process_cycle.c
 - src/os/unix/ngx_process_cycle.h
 - src/pcre/pcre_compile.c
+- src/pcre/pcre_exec.c
 
 The syslog patch has been applied from:
 https://github.com/yaoweibin/nginx_syslog_patch
diff --git a/usr.sbin/nginx/src/pcre/pcre.h b/usr.sbin/nginx/src/pcre/pcre.h
index a6aa4e934b2..0e479667672 100644
--- a/usr.sbin/nginx/src/pcre/pcre.h
+++ b/usr.sbin/nginx/src/pcre/pcre.h
@@ -5,7 +5,7 @@
 /* This is the public header file for the PCRE library, to be #included by
 applications that call the PCRE functions.
 
-           Copyright (c) 1997-2012 University of Cambridge
+           Copyright (c) 1997-2013 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
 /* The current PCRE version information. */
 
 #define PCRE_MAJOR          8
-#define PCRE_MINOR          32
+#define PCRE_MINOR          33
 #define PCRE_PRERELEASE     
-#define PCRE_DATE           2012-11-30
+#define PCRE_DATE           2013-05-28
 
 /* When an application links to a PCRE DLL in Windows, the symbols that are
 imported have to be identified as such. When building PCRE, the appropriate
@@ -96,11 +96,14 @@ extern "C" {
 #endif
 
 /* Public options. Some are compile-time only, some are run-time only, and some
-are both, so we keep them all distinct. However, almost all the bits in the
-options word are now used. In the long run, we may have to re-use some of the
-compile-time only bits for runtime options, or vice versa. Any of the
-compile-time options may be inspected during studying (and therefore JIT
-compiling).
+are both. Most of the compile-time options are saved with the compiled regex so
+that they can be inspected during studying (and therefore JIT compiling). Note
+that pcre_study() has its own set of options. Originally, all the options
+defined here used distinct bits. However, almost all the bits in a 32-bit word
+are now used, so in order to conserve them, option bits that were previously
+only recognized at matching time (i.e. by pcre_exec() or pcre_dfa_exec()) may
+also be used for compile-time options that affect only compiling and are not
+relevant for studying or JIT compiling.
 
 Some options for pcre_compile() change its behaviour but do not affect the
 behaviour of the execution functions. Other options are passed through to the
@@ -142,7 +145,11 @@ with J. */
 #define PCRE_AUTO_CALLOUT       0x00004000  /* C1       */
 #define PCRE_PARTIAL_SOFT       0x00008000  /*    E D J  ) Synonyms */
 #define PCRE_PARTIAL            0x00008000  /*    E D J  )          */
-#define PCRE_DFA_SHORTEST       0x00010000  /*      D   */
+
+/* This pair use the same bit. */
+#define PCRE_NEVER_UTF          0x00010000  /* C1        ) Overlaid */
+#define PCRE_DFA_SHORTEST       0x00010000  /*      D    ) Overlaid */
+
 #define PCRE_DFA_RESTART        0x00020000  /*      D   */
 #define PCRE_FIRSTLINE          0x00040000  /* C3       */
 #define PCRE_DUPNAMES           0x00080000  /* C1       */
@@ -199,6 +206,7 @@ with J. */
 #define PCRE_ERROR_DFA_BADRESTART  (-30)
 #define PCRE_ERROR_JIT_BADOPTION   (-31)
 #define PCRE_ERROR_BADLENGTH       (-32)
+#define PCRE_ERROR_UNSET           (-33)
 
 /* Specific error codes for UTF-8 validity checks */
 
@@ -224,7 +232,7 @@ with J. */
 #define PCRE_UTF8_ERR19             19
 #define PCRE_UTF8_ERR20             20
 #define PCRE_UTF8_ERR21             21
-#define PCRE_UTF8_ERR22             22
+#define PCRE_UTF8_ERR22             22  /* Unused (was non-character) */
 
 /* Specific error codes for UTF-16 validity checks */
 
@@ -232,13 +240,13 @@ with J. */
 #define PCRE_UTF16_ERR1              1
 #define PCRE_UTF16_ERR2              2
 #define PCRE_UTF16_ERR3              3
-#define PCRE_UTF16_ERR4              4
+#define PCRE_UTF16_ERR4              4  /* Unused (was non-character) */
 
 /* Specific error codes for UTF-32 validity checks */
 
 #define PCRE_UTF32_ERR0              0
 #define PCRE_UTF32_ERR1              1
-#define PCRE_UTF32_ERR2              2
+#define PCRE_UTF32_ERR2              2  /* Unused (was non-character) */
 #define PCRE_UTF32_ERR3              3
 
 /* Request types for pcre_fullinfo() */
@@ -263,10 +271,12 @@ with J. */
 #define PCRE_INFO_JIT               16
 #define PCRE_INFO_JITSIZE           17
 #define PCRE_INFO_MAXLOOKBEHIND     18
-#define PCRE_INFO_FIRSTCHARACTER      19
-#define PCRE_INFO_FIRSTCHARACTERFLAGS   20
+#define PCRE_INFO_FIRSTCHARACTER    19
+#define PCRE_INFO_FIRSTCHARACTERFLAGS 20
 #define PCRE_INFO_REQUIREDCHAR      21
-#define PCRE_INFO_REQUIREDCHARFLAGS   22
+#define PCRE_INFO_REQUIREDCHARFLAGS 22
+#define PCRE_INFO_MATCHLIMIT        23
+#define PCRE_INFO_RECURSIONLIMIT    24
 
 /* Request types for pcre_config(). Do not re-arrange, in order to remain
 compatible. */
diff --git a/usr.sbin/nginx/src/pcre/pcre_compile.c b/usr.sbin/nginx/src/pcre/pcre_compile.c
index fb592a87422..2dbe9ca40a4 100644
--- a/usr.sbin/nginx/src/pcre/pcre_compile.c
+++ b/usr.sbin/nginx/src/pcre/pcre_compile.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2012 University of Cambridge
+           Copyright (c) 1997-2013 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -487,7 +487,7 @@ static const char error_texts[] =
   "a numbered reference must not be zero\0"
   "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
   /* 60 */
-  "(*VERB) not recognized\0"
+  "(*VERB) not recognized or malformed\0"
   "number is too big\0"
   "subpattern name expected\0"
   "digit expected after (?+\0"
@@ -508,6 +508,7 @@ static const char error_texts[] =
   "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
   "character value in \\u.... sequence is too large\0"
   "invalid UTF-32 string\0"
+  "setting UTF is disabled by the application\0"
   ;
 
 /* Table to identify digits and hex digits. This is used when compiling
@@ -797,7 +798,8 @@ Otherwise further processing may be required. */
 #ifndef EBCDIC  /* ASCII/UTF-8 coding */
 /* Not alphanumeric */
 else if (c < CHAR_0 || c > CHAR_z) {}
-else if ((i = escapes[c - CHAR_0]) != 0) { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
+else if ((i = escapes[c - CHAR_0]) != 0)
+  { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
 
 #else           /* EBCDIC coding */
 /* Not alphanumeric */
@@ -847,11 +849,11 @@ else
           }
 
 #if defined COMPILE_PCRE8
-        if (c > (unsigned int)(utf ? 0x10ffff : 0xff))
+        if (c > (utf ? 0x10ffffU : 0xffU))
 #elif defined COMPILE_PCRE16
-        if (c > (utf ? 0x10ffff : 0xffff))
+        if (c > (utf ? 0x10ffffU : 0xffffU))
 #elif defined COMPILE_PCRE32
-        if (utf && c > 0x10ffff)
+        if (utf && c > 0x10ffffU)
 #endif
           {
           *errorcodeptr = ERR76;
@@ -1085,11 +1087,11 @@ else
 #endif
 
 #if defined COMPILE_PCRE8
-        if (c > (unsigned int)(utf ? 0x10ffff : 0xff)) { overflow = TRUE; break; }
+        if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
 #elif defined COMPILE_PCRE16
-        if (c > (utf ? 0x10ffff : 0xffff)) { overflow = TRUE; break; }
+        if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
 #elif defined COMPILE_PCRE32
-        if (utf && c > 0x10ffff) { overflow = TRUE; break; }
+        if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
 #endif
         }
 
@@ -1408,7 +1410,11 @@ if (ptr[0] == CHAR_LEFT_PARENTHESIS)
   {
   /* Handle specials such as (*SKIP) or (*UTF8) etc. */
 
-  if (ptr[1] == CHAR_ASTERISK) ptr += 2;
+  if (ptr[1] == CHAR_ASTERISK)
+    {
+    ptr += 2;
+    while (ptr < cd->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
+    }
 
   /* Handle a normal, unnamed capturing parenthesis. */
 
@@ -2129,9 +2135,6 @@ for (;;)
       case OP_MARK:
       case OP_PRUNE_ARG:
       case OP_SKIP_ARG:
-      code += code[1];
-      break;
-
       case OP_THEN_ARG:
       code += code[1];
       break;
@@ -2249,9 +2252,6 @@ for (;;)
       case OP_MARK:
       case OP_PRUNE_ARG:
       case OP_SKIP_ARG:
-      code += code[1];
-      break;
-
       case OP_THEN_ARG:
       code += code[1];
       break;
@@ -2616,9 +2616,6 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
     case OP_MARK:
     case OP_PRUNE_ARG:
     case OP_SKIP_ARG:
-    code += code[1];
-    break;
-
     case OP_THEN_ARG:
     code += code[1];
     break;
@@ -2924,9 +2921,6 @@ get_othercase_range(pcre_uint32 *cptr, pcre_uint32 d, pcre_uint32 *ocptr,
 {
 pcre_uint32 c, othercase, next;
 unsigned int co;
-#if defined(__GNUC__) && __GNUC__ == 3
-othercase = 0; /* XXX gcc -Wuninitialized */
-#endif
 
 /* Find the first character that has an other case. If it has multiple other
 cases, return its case offset value. */
@@ -3097,7 +3091,8 @@ value is a character, a negative value is an escape value. */
 if (*ptr == CHAR_BACKSLASH)
   {
   int temperrorcode = 0;
-  escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options, FALSE);
+  escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options,
+    FALSE);
   if (temperrorcode != 0) return FALSE;
   ptr++;    /* Point after the escape sequence */
   }
@@ -4280,14 +4275,12 @@ for (;; ptr++)
 
       if (c == CHAR_BACKSLASH)
         {
-        escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, TRUE);
-
+        escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options,
+          TRUE);
         if (*errorcodeptr != 0) goto FAILED;
-
-        if (escape == 0)
-          c = ec;
+        if (escape == 0) c = ec;
         else if (escape == ESC_b) c = CHAR_BS; /* \b is backspace in a class */
-        else if (escape == ESC_N)            /* \N is not supported in a class */
+        else if (escape == ESC_N)          /* \N is not supported in a class */
           {
           *errorcodeptr = ERR71;
           goto FAILED;
@@ -4910,16 +4903,6 @@ for (;; ptr++)
 
       if (repeat_max == 0) goto END_REPEAT;
 
-      /*--------------------------------------------------------------------*/
-      /* This code is obsolete from release 8.00; the restriction was finally
-      removed: */
-
-      /* All real repeats make it impossible to handle partial matching (maybe
-      one day we will be able to remove this restriction). */
-
-      /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
-      /*--------------------------------------------------------------------*/
-
       /* Combine the op_type with the repeat_type */
 
       repeat_type += op_type;
@@ -5066,16 +5049,6 @@ for (;; ptr++)
         goto END_REPEAT;
         }
 
-      /*--------------------------------------------------------------------*/
-      /* This code is obsolete from release 8.00; the restriction was finally
-      removed: */
-
-      /* All real repeats make it impossible to handle partial matching (maybe
-      one day we will be able to remove this restriction). */
-
-      /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
-      /*--------------------------------------------------------------------*/
-
       if (repeat_min == 0 && repeat_max == -1)
         *code++ = OP_CRSTAR + repeat_type;
       else if (repeat_min == 1 && repeat_max == -1)
@@ -5752,6 +5725,7 @@ for (;; ptr++)
         /* ------------------------------------------------------------ */
         case CHAR_LEFT_PARENTHESIS:
         bravalue = OP_COND;       /* Conditional group */
+        tempptr = ptr;
 
         /* A condition can be an assertion, a number (referring to a numbered
         group), a name (referring to a named group), or 'R', referring to
@@ -5764,14 +5738,28 @@ for (;; ptr++)
         be the recursive thing or the name 'R' (and similarly for 'R' followed
         by digits), and (b) a number could be a name that consists of digits.
         In both cases, we look for a name first; if not found, we try the other
-        cases. */
+        cases.
+
+        For compatibility with auto-callouts, we allow a callout to be
+        specified before a condition that is an assertion. First, check for the
+        syntax of a callout; if found, adjust the temporary pointer that is
+        used to check for an assertion condition. That's all that is needed! */
+
+        if (ptr[1] == CHAR_QUESTION_MARK && ptr[2] == CHAR_C)
+          {
+          for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break;
+          if (ptr[i] == CHAR_RIGHT_PARENTHESIS)
+            tempptr += i + 1;
+          }
 
         /* For conditions that are assertions, check the syntax, and then exit
         the switch. This will take control down to where bracketed groups,
         including assertions, are processed. */
 
-        if (ptr[1] == CHAR_QUESTION_MARK && (ptr[2] == CHAR_EQUALS_SIGN ||
-            ptr[2] == CHAR_EXCLAMATION_MARK || ptr[2] == CHAR_LESS_THAN_SIGN))
+        if (tempptr[1] == CHAR_QUESTION_MARK &&
+              (tempptr[2] == CHAR_EQUALS_SIGN ||
+               tempptr[2] == CHAR_EXCLAMATION_MARK ||
+               tempptr[2] == CHAR_LESS_THAN_SIGN))
           break;
 
         /* Most other conditions use OP_CREF (a couple change to OP_RREF
@@ -6741,10 +6729,9 @@ for (;; ptr++)
     case CHAR_BACKSLASH:
     tempptr = ptr;
     escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, FALSE);
-
     if (*errorcodeptr != 0) goto FAILED;
 
-    if (escape == 0)
+    if (escape == 0)                  /* The escape coded a single character */
       c = ec;
     else
       {
@@ -6910,11 +6897,12 @@ for (;; ptr++)
       can obtain the OP value by negating the escape value in the default
       situation when PCRE_UCP is not set. When it *is* set, we substitute
       Unicode property tests. Note that \b and \B do a one-character
-      lookbehind. */
+      lookbehind, and \A also behaves as if it does. */
 
       else
         {
-        if ((escape == ESC_b || escape == ESC_B) && cd->max_lookbehind == 0)
+        if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
+             cd->max_lookbehind == 0)
           cd->max_lookbehind = 1;
 #ifdef SUPPORT_UCP
         if (escape >= ESC_DU && escape <= ESC_wu)
@@ -7778,12 +7766,15 @@ pcre32_compile2(PCRE_SPTR32 pattern, int options, int *errorcodeptr,
 {
 REAL_PCRE *re;
 int length = 1;  /* For final END opcode */
-pcre_uint32 firstchar, reqchar;
 pcre_int32 firstcharflags, reqcharflags;
+pcre_uint32 firstchar, reqchar;
+pcre_uint32 limit_match = PCRE_UINT32_MAX;
+pcre_uint32 limit_recursion = PCRE_UINT32_MAX;
 int newline;
 int errorcode = 0;
 int skipatstart = 0;
 BOOL utf;
+BOOL never_utf = FALSE;
 size_t size;
 pcre_uchar *code;
 const pcre_uchar *codestart;
@@ -7843,9 +7834,15 @@ if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
   goto PCRE_EARLY_ERROR_RETURN;
   }
 
+/* If PCRE_NEVER_UTF is set, remember it. */
+
+if ((options & PCRE_NEVER_UTF) != 0) never_utf = TRUE;
+
 /* Check for global one-time settings at the start of the pattern, and remember
 the offset for later. */
 
+cd->external_flags = 0;   /* Initialize here for LIMIT_MATCH/RECURSION */
+
 while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
        ptr[skipatstart+1] == CHAR_ASTERISK)
   {
@@ -7876,6 +7873,44 @@ PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. */
   else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)
     { skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; }
 
+  else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_MATCH_EQ, 12) == 0)
+    {
+    pcre_uint32 c = 0;
+    int p = skipatstart + 14;
+    while (isdigit(ptr[p]))
+      {
+      if (c > PCRE_UINT32_MAX / 10 - 1) break;   /* Integer overflow */
+      c = c*10 + ptr[p++] - CHAR_0;
+      }
+    if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break;
+    if (c < limit_match)
+      {
+      limit_match = c;
+      cd->external_flags |= PCRE_MLSET;
+      }
+    skipatstart = p;
+    continue;
+    }
+
+  else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_RECURSION_EQ, 16) == 0)
+    {
+    pcre_uint32 c = 0;
+    int p = skipatstart + 18;
+    while (isdigit(ptr[p]))
+      {
+      if (c > PCRE_UINT32_MAX / 10 - 1) break;   /* Integer overflow check */
+      c = c*10 + ptr[p++] - CHAR_0;
+      }
+    if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break;
+    if (c < limit_recursion)
+      {
+      limit_recursion = c;
+      cd->external_flags |= PCRE_RLSET;
+      }
+    skipatstart = p;
+    continue;
+    }
+
   if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0)
     { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
   else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3)  == 0)
@@ -7901,6 +7936,11 @@ PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. */
 
 /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
 utf = (options & PCRE_UTF8) != 0;
+if (utf && never_utf)
+  {
+  errorcode = ERR78;
+  goto PCRE_EARLY_ERROR_RETURN2;
+  }
 
 /* Can't support UTF unless PCRE has been compiled to include the code. The
 return of an error code from PRIV(valid_utf)() is a new feature, introduced in
@@ -8023,7 +8063,6 @@ cd->req_varyopt = 0;
 cd->assert_depth = 0;
 cd->max_lookbehind = 0;
 cd->external_options = options;
-cd->external_flags = 0;
 cd->open_caps = NULL;
 
 /* Now do the pre-compile. On error, errorcode will be set non-zero, so we
@@ -8073,6 +8112,8 @@ re->magic_number = MAGIC_NUMBER;
 re->size = (int)size;
 re->options = cd->external_options;
 re->flags = cd->external_flags;
+re->limit_match = limit_match;
+re->limit_recursion = limit_recursion;
 re->first_char = 0;
 re->req_char = 0;
 re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);
@@ -8082,7 +8123,9 @@ re->ref_count = 0;
 re->tables = (tables == PRIV(default_tables))? NULL : tables;
 re->nullpad = NULL;
 #ifdef COMPILE_PCRE32
-re->dummy1 = re->dummy2 = 0;
+re->dummy = 0;
+#else
+re->dummy1 = re->dummy2 = re->dummy3 = 0;
 #endif
 
 /* The starting points of the name/number translation table and of the code are
@@ -8142,7 +8185,7 @@ if (code - codestart > length) errorcode = ERR23;
 
 #ifdef SUPPORT_VALGRIND
 /* If the estimated length exceeds the really used length, mark the extra
-allocated memory as unadressable, so that any out-of-bound reads can be
+allocated memory as unaddressable, so that any out-of-bound reads can be
 detected. */
 VALGRIND_MAKE_MEM_NOACCESS(code, (length - (code - codestart)) * sizeof(pcre_uchar));
 #endif
diff --git a/usr.sbin/nginx/src/pcre/pcre_exec.c b/usr.sbin/nginx/src/pcre/pcre_exec.c
index 05d0e52d33c..48ba0ef4007 100644
--- a/usr.sbin/nginx/src/pcre/pcre_exec.c
+++ b/usr.sbin/nginx/src/pcre/pcre_exec.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2012 University of Cambridge
+           Copyright (c) 1997-2013 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -56,6 +56,20 @@ possible. There are also some static supporting functions. */
 #undef min
 #undef max
 
+/* The md->capture_last field uses the lower 16 bits for the last captured
+substring (which can never be greater than 65535) and a bit in the top half
+to mean "capture vector overflowed". This odd way of doing things was
+implemented when it was realized that preserving and restoring the overflow bit
+whenever the last capture number was saved/restored made for a neater
+interface, and doing it this way saved on (a) another variable, which would
+have increased the stack frame size (a big NO-NO in PCRE) and (b) another
+separate set of save/restore instructions. The following defines are used in
+implementing this. */
+
+#define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
+#define OVFLMASK    0xffff0000    /* The bits used for the overflow flag */
+#define OVFLBIT     0x00010000    /* The bit that is set for overflow */
+
 /* Values for setting in md->match_function_type to indicate two special types
 of call to match(). We do it this way to save on using another stack variable,
 as stack usage is to be discouraged. */
@@ -73,13 +87,17 @@ defined PCRE_ERROR_xxx codes, which are all negative. */
 negative to avoid the external error codes. */
 
 #define MATCH_ACCEPT       (-999)
-#define MATCH_COMMIT       (-998)
-#define MATCH_KETRPOS      (-997)
-#define MATCH_ONCE         (-996)
+#define MATCH_KETRPOS      (-998)
+#define MATCH_ONCE         (-997)
+/* The next 5 must be kept together and in sequence so that a test that checks
+for any one of them can use a range. */
+#define MATCH_COMMIT       (-996)
 #define MATCH_PRUNE        (-995)
 #define MATCH_SKIP         (-994)
 #define MATCH_SKIP_ARG     (-993)
 #define MATCH_THEN         (-992)
+#define MATCH_BACKTRACK_MAX MATCH_THEN
+#define MATCH_BACKTRACK_MIN MATCH_COMMIT
 
 /* Maximum number of ints of offset to save on the stack for recursive calls.
 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
@@ -219,7 +237,7 @@ if (caseless)
     {
     while (length-- > 0)
       {
-      pcre_uchar cc, cp;
+      pcre_uint32 cc, cp;
       if (eptr >= md->end_subject) return -2;   /* Partial match */
       cc = RAWUCHARTEST(eptr);
       cp = RAWUCHARTEST(p);
@@ -294,7 +312,7 @@ enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
-       RM61,  RM62, RM63, RM64, RM65, RM66, RM67 };
+       RM61,  RM62, RM63, RM64, RM65, RM66, RM67, RM68 };
 
 /* These versions of the macros use the stack, as normal. There are debugging
 versions and production versions. Note that the "rw" argument of RMATCH isn't
@@ -416,10 +434,10 @@ typedef struct heapframe {
   int Xlength;
   int Xmax;
   int Xmin;
-  int Xnumber;
+  unsigned int Xnumber;
   int Xoffset;
-  int Xop;
-  int Xsave_capture_last;
+  unsigned int Xop;
+  pcre_int32 Xsave_capture_last;
   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
   int Xstacksave[REC_STACK_SAVE_MAX];
 
@@ -634,8 +652,8 @@ int max;
 int min;
 unsigned int number;
 int offset;
-pcre_uchar op;
-int save_capture_last;
+unsigned int op;
+pcre_int32 save_capture_last;
 int save_offset1, save_offset2, save_offset3;
 int stacksave[REC_STACK_SAVE_MAX];
 
@@ -763,23 +781,16 @@ for (;;)
     case OP_FAIL:
     RRETURN(MATCH_NOMATCH);
 
-    /* COMMIT overrides PRUNE, SKIP, and THEN */
-
     case OP_COMMIT:
     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
       eptrb, RM52);
-    if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
-        rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
-        rrc != MATCH_THEN)
-      RRETURN(rrc);
+    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     RRETURN(MATCH_COMMIT);
 
-    /* PRUNE overrides THEN */
-
     case OP_PRUNE:
     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
       eptrb, RM51);
-    if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
+    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     RRETURN(MATCH_PRUNE);
 
     case OP_PRUNE_ARG:
@@ -789,38 +800,39 @@ for (;;)
       eptrb, RM56);
     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
          md->mark == NULL) md->mark = ecode + 2;
-    if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
+    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     RRETURN(MATCH_PRUNE);
 
-    /* SKIP overrides PRUNE and THEN */
-
     case OP_SKIP:
     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
       eptrb, RM53);
-    if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
-      RRETURN(rrc);
+    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     md->start_match_ptr = eptr;   /* Pass back current position */
     RRETURN(MATCH_SKIP);
 
     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
-    nomatch_mark. There is a flag that disables this opcode when re-matching a
-    pattern that ended with a SKIP for which there was not a matching MARK. */
+    nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
+    not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
+    that failed and any that precede it (either they also failed, or were not
+    triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
+    SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
+    set to the count of the one that failed. */
 
     case OP_SKIP_ARG:
-    if (md->ignore_skip_arg)
+    md->skip_arg_count++;
+    if (md->skip_arg_count <= md->ignore_skip_arg)
       {
       ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
       break;
       }
     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
       eptrb, RM57);
-    if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
-      RRETURN(rrc);
+    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 
     /* Pass back the current skip name by overloading md->start_match_ptr and
     returning the special MATCH_SKIP_ARG return code. This will either be
     caught by a matching MARK, or get to the top, where it causes a rematch
-    with the md->ignore_skip_arg flag set. */
+    with md->ignore_skip_arg set to the value of md->skip_arg_count. */
 
     md->start_match_ptr = ecode + 2;
     RRETURN(MATCH_SKIP_ARG);
@@ -1066,6 +1078,7 @@ for (;;)
       /* In all other cases, we have to make another call to match(). */
 
       save_mark = md->mark;
+      save_capture_last = md->capture_last;
       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
         RM2);
 
@@ -1097,6 +1110,7 @@ for (;;)
       ecode += GET(ecode, 1);
       md->mark = save_mark;
       if (*ecode != OP_ALT) break;
+      md->capture_last = save_capture_last;
       }
 
     RRETURN(MATCH_NOMATCH);
@@ -1218,6 +1232,7 @@ for (;;)
     POSSESSIVE_NON_CAPTURE:
     matched_once = FALSE;
     code_offset = (int)(ecode - md->start_code);
+    save_capture_last = md->capture_last;
 
     for (;;)
       {
@@ -1247,6 +1262,7 @@ for (;;)
       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       ecode += GET(ecode, 1);
       if (*ecode != OP_ALT) break;
+      md->capture_last = save_capture_last;
       }
 
     if (matched_once || allow_zero)
@@ -1291,13 +1307,16 @@ for (;;)
         cb.pattern_position = GET(ecode, LINK_SIZE + 3);
         cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
         cb.capture_top      = offset_top/2;
-        cb.capture_last     = md->capture_last;
+        cb.capture_last     = md->capture_last & CAPLMASK;
+        /* Internal change requires this for API compatibility. */
+        if (cb.capture_last == 0) cb.capture_last = -1;
         cb.callout_data     = md->callout_data;
         cb.mark             = md->nomatch_mark;
         if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
         if (rrc < 0) RRETURN(rrc);
         }
       ecode += PRIV(OP_lengths)[OP_CALLOUT];
+      codelink -= PRIV(OP_lengths)[OP_CALLOUT];
       }
 
     condcode = ecode[LINK_SIZE+1];
@@ -1513,7 +1532,7 @@ for (;;)
     to close any currently open capturing brackets. */
 
     case OP_CLOSE:
-    number = GET2(ecode, 1);
+    number = GET2(ecode, 1);   /* Must be less than 65536 */
     offset = number << 1;
 
 #ifdef PCRE_DEBUG
@@ -1521,8 +1540,8 @@ for (;;)
       printf("\n");
 #endif
 
-    md->capture_last = number;
-    if (offset >= md->offset_max) md->offset_overflow = TRUE; else
+    md->capture_last = (md->capture_last & OVFLMASK) | number;
+    if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
       {
       md->offset_vector[offset] =
         md->offset_vector[md->offset_end - number];
@@ -1584,28 +1603,49 @@ for (;;)
       }
     else condassert = FALSE;
 
+    /* Loop for each branch */
+
     do
       {
       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
+
+      /* A match means that the assertion is true; break out of the loop
+      that matches its alternatives. */
+
       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
         {
         mstart = md->start_match_ptr;   /* In case \K reset it */
         break;
         }
+
+      /* If not matched, restore the previous mark setting. */
+
       md->mark = save_mark;
 
-      /* A COMMIT failure must fail the entire assertion, without trying any
-      subsequent branches. */
+      /* See comment in the code for capturing groups above about handling
+      THEN. */
 
-      if (rrc == MATCH_COMMIT) RRETURN(MATCH_NOMATCH);
+      if (rrc == MATCH_THEN)
+        {
+        next = ecode + GET(ecode,1);
+        if (md->start_match_ptr < next &&
+            (*ecode == OP_ALT || *next == OP_ALT))
+          rrc = MATCH_NOMATCH;
+        }
 
-      /* PCRE does not allow THEN to escape beyond an assertion; it
-      is treated as NOMATCH. */
+      /* Anything other than NOMATCH causes the entire assertion to fail,
+      passing back the return code. This includes COMMIT, SKIP, PRUNE and an
+      uncaptured THEN, which means they take their normal effect. This
+      consistent approach does not always have exactly the same effect as in
+      Perl. */
 
-      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
+      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       ecode += GET(ecode, 1);
       }
-    while (*ecode == OP_ALT);
+    while (*ecode == OP_ALT);   /* Continue for next alternative */
+
+    /* If we have tried all the alternative branches, the assertion has
+    failed. If not, we broke out after a match. */
 
     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
 
@@ -1613,17 +1653,16 @@ for (;;)
 
     if (condassert) RRETURN(MATCH_MATCH);
 
-    /* Continue from after the assertion, updating the offsets high water
-    mark, since extracts may have been taken during the assertion. */
+    /* Continue from after a successful assertion, updating the offsets high
+    water mark, since extracts may have been taken during the assertion. */
 
     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
     ecode += 1 + LINK_SIZE;
     offset_top = md->end_offset_top;
     continue;
 
-    /* Negative assertion: all branches must fail to match. Encountering SKIP,
-    PRUNE, or COMMIT means we must assume failure without checking subsequent
-    branches. */
+    /* Negative assertion: all branches must fail to match for the assertion to
+    succeed. */
 
     case OP_ASSERT_NOT:
     case OP_ASSERTBACK_NOT:
@@ -1635,28 +1674,64 @@ for (;;)
       }
     else condassert = FALSE;
 
+    /* Loop for each alternative branch. */
+
     do
       {
       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
-      md->mark = save_mark;
-      if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
-      if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
+      md->mark = save_mark;   /* Always restore the mark setting */
+
+      switch(rrc)
         {
-        do ecode += GET(ecode,1); while (*ecode == OP_ALT);
+        case MATCH_MATCH:            /* A successful match means */
+        case MATCH_ACCEPT:           /* the assertion has failed. */
+        RRETURN(MATCH_NOMATCH);
+
+        case MATCH_NOMATCH:          /* Carry on with next branch */
         break;
+
+        /* See comment in the code for capturing groups above about handling
+        THEN. */
+
+        case MATCH_THEN:
+        next = ecode + GET(ecode,1);
+        if (md->start_match_ptr < next &&
+            (*ecode == OP_ALT || *next == OP_ALT))
+          {
+          rrc = MATCH_NOMATCH;
+          break;
+          }
+        /* Otherwise fall through. */
+
+        /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole
+        assertion to fail to match, without considering any more alternatives.
+        Failing to match means the assertion is true. This is a consistent
+        approach, but does not always have the same effect as in Perl. */
+
+        case MATCH_COMMIT:
+        case MATCH_SKIP:
+        case MATCH_SKIP_ARG:
+        case MATCH_PRUNE:
+        do ecode += GET(ecode,1); while (*ecode == OP_ALT);
+        goto NEG_ASSERT_TRUE;   /* Break out of alternation loop */
+
+        /* Anything else is an error */
+
+        default:
+        RRETURN(rrc);
         }
 
-      /* PCRE does not allow THEN to escape beyond an assertion; it is treated
-      as NOMATCH. */
+      /* Continue with next branch */
 
-      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
       ecode += GET(ecode,1);
       }
     while (*ecode == OP_ALT);
 
-    if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
+    /* All branches in the assertion failed to match. */
 
-    ecode += 1 + LINK_SIZE;
+    NEG_ASSERT_TRUE:
+    if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
+    ecode += 1 + LINK_SIZE;                /* Continue with current branch */
     continue;
 
     /* Move the subject pointer back. This occurs only at the start of
@@ -1716,7 +1791,9 @@ for (;;)
       cb.pattern_position = GET(ecode, 2);
       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
       cb.capture_top      = offset_top/2;
-      cb.capture_last     = md->capture_last;
+      cb.capture_last     = md->capture_last & CAPLMASK;
+      /* Internal change requires this for API compatibility. */
+      if (cb.capture_last == 0) cb.capture_last = -1;
       cb.callout_data     = md->callout_data;
       cb.mark             = md->nomatch_mark;
       if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
@@ -1762,6 +1839,7 @@ for (;;)
       /* Add to "recursing stack" */
 
       new_recursive.group_num = recno;
+      new_recursive.saved_capture_last = md->capture_last;
       new_recursive.subject_position = eptr;
       new_recursive.prevrec = md->recursive;
       md->recursive = &new_recursive;
@@ -1785,8 +1863,9 @@ for (;;)
             new_recursive.saved_max * sizeof(int));
 
       /* OK, now we can do the recursion. After processing each alternative,
-      restore the offset data. If there were nested recursions, md->recursive
-      might be changed, so reset it before looping. */
+      restore the offset data and the last captured value. If there were nested
+      recursions, md->recursive might be changed, so reset it before looping.
+      */
 
       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
       cbegroup = (*callpat >= OP_SBRA);
@@ -1797,6 +1876,7 @@ for (;;)
           md, eptrb, RM6);
         memcpy(md->offset_vector, new_recursive.offset_save,
             new_recursive.saved_max * sizeof(int));
+        md->capture_last = new_recursive.saved_capture_last;
         md->recursive = new_recursive.prevrec;
         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
           {
@@ -1813,11 +1893,16 @@ for (;;)
           goto RECURSION_MATCHED;        /* Exit loop; end processing */
           }
 
-        /* PCRE does not allow THEN or COMMIT to escape beyond a recursion; it
-        is treated as NOMATCH. */
+        /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
+        recursion; they cause a NOMATCH for the entire recursion. These codes
+        are defined in a range that can be tested for. */
+
+        if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
+          RRETURN(MATCH_NOMATCH);
 
-        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN &&
-                 rrc != MATCH_COMMIT)
+        /* Any return code other than NOMATCH is an error. */
+
+        if (rrc != MATCH_NOMATCH)
           {
           DPRINTF(("Recursion gave error %d\n", rrc));
           if (new_recursive.offset_save != stacksave)
@@ -1947,8 +2032,8 @@ for (;;)
 
       /* Deal with capturing */
 
-      md->capture_last = number;
-      if (offset >= md->offset_max) md->offset_overflow = TRUE; else
+      md->capture_last = (md->capture_last & OVFLMASK) | number;
+      if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
         {
         /* If offset is greater than offset_top, it means that we are
         "skipping" a capturing group, and that group's offsets must be marked
@@ -2604,6 +2689,13 @@ for (;;)
           }
         break;
 
+        case PT_UCNC:
+        if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+             c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+             c >= 0xe000) == (op == OP_NOTPROP))
+          RRETURN(MATCH_NOMATCH);
+        break;
+
         /* This should never occur */
 
         default:
@@ -3190,7 +3282,7 @@ for (;;)
 
       if (fc < 128)
         {
-        pcre_uchar cc = RAWUCHAR(eptr);
+        pcre_uint32 cc = RAWUCHAR(eptr);
         if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
         ecode++;
         eptr++;
@@ -3295,7 +3387,22 @@ for (;;)
     max = rep_max[c];                 /* zero for max => infinity */
     if (max == 0) max = INT_MAX;
 
-    /* Common code for all repeated single-character matches. */
+    /* Common code for all repeated single-character matches. We first check
+    for the minimum number of characters. If the minimum equals the maximum, we
+    are done. Otherwise, if minimizing, check the rest of the pattern for a
+    match; if there isn't one, advance up to the maximum, one character at a
+    time.
+
+    If maximizing, advance up to the maximum number of matching characters,
+    until eptr is past the end of the maximum run. If possessive, we are
+    then done (no backing up). Otherwise, match at this position; anything
+    other than no match is immediately returned. For nomatch, back up one
+    character, unless we are matching \R and the last thing matched was
+    \r\n, in which case, back up two bytes. When we reach the first optional
+    character position, we can save stack by doing a tail recurse.
+
+    The various UTF/non-UTF and caseful/caseless cases are handled separately,
+    for speed. */
 
     REPEATCHAR:
 #ifdef SUPPORT_UTF
@@ -3379,13 +3486,12 @@ for (;;)
               }
             }
 
-          if (possessive) continue;
-
+          if (possessive) continue;    /* No backtracking */
           for(;;)
             {
+            if (eptr == pp) goto TAIL_RECURSE;
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
 #ifdef SUPPORT_UCP
             eptr--;
             BACKCHAR(eptr);
@@ -3439,8 +3545,7 @@ for (;;)
 
       for (i = 1; i <= min; i++)
         {
-        pcre_uchar cc;
-
+        pcre_uint32 cc;                 /* Faster than pcre_uchar */
         if (eptr >= md->end_subject)
           {
           SCHECK_PARTIAL();
@@ -3455,8 +3560,7 @@ for (;;)
         {
         for (fi = min;; fi++)
           {
-          pcre_uchar cc;
-
+          pcre_uint32 cc;               /* Faster than pcre_uchar */
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           if (fi >= max) RRETURN(MATCH_NOMATCH);
@@ -3476,8 +3580,7 @@ for (;;)
         pp = eptr;
         for (i = min; i < max; i++)
           {
-          pcre_uchar cc;
-
+          pcre_uint32 cc;               /* Faster than pcre_uchar */
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
@@ -3488,10 +3591,10 @@ for (;;)
           eptr++;
           }
 
-        if (possessive) continue;
-
-        while (eptr >= pp)
+        if (possessive) continue;       /* No backtracking */
+        for (;;)
           {
+          if (eptr == pp) goto TAIL_RECURSE;
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
           eptr--;
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
@@ -3546,10 +3649,10 @@ for (;;)
           if (fc != RAWUCHARTEST(eptr)) break;
           eptr++;
           }
-        if (possessive) continue;
-
-        while (eptr >= pp)
+        if (possessive) continue;    /* No backtracking */
+        for (;;)
           {
+          if (eptr == pp) goto TAIL_RECURSE;
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
           eptr--;
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
@@ -3726,7 +3829,7 @@ for (;;)
           }
         }
       else
-#endif
+#endif  /* SUPPORT_UTF */
       /* Not UTF mode */
         {
         for (i = 1; i <= min; i++)
@@ -3764,7 +3867,7 @@ for (;;)
             }
           }
         else
-#endif
+#endif  /*SUPPORT_UTF */
         /* Not UTF mode */
           {
           for (fi = min;; fi++)
@@ -3806,17 +3909,18 @@ for (;;)
             if (fc == d || (unsigned int)foc == d) break;
             eptr += len;
             }
-          if (possessive) continue;
+          if (possessive) continue;    /* No backtracking */
           for(;;)
             {
+            if (eptr == pp) goto TAIL_RECURSE;
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (eptr-- == pp) break;        /* Stop if tried at original pos */
+            eptr--;
             BACKCHAR(eptr);
             }
           }
         else
-#endif
+#endif  /* SUPPORT_UTF */
         /* Not UTF mode */
           {
           for (i = min; i < max; i++)
@@ -3829,9 +3933,10 @@ for (;;)
             if (fc == *eptr || foc == *eptr) break;
             eptr++;
             }
-          if (possessive) continue;
-          while (eptr >= pp)
+          if (possessive) continue;    /* No backtracking */
+          for (;;)
             {
+            if (eptr == pp) goto TAIL_RECURSE;
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             eptr--;
@@ -3941,12 +4046,13 @@ for (;;)
             if (fc == d) break;
             eptr += len;
             }
-          if (possessive) continue;
+          if (possessive) continue;    /* No backtracking */
           for(;;)
             {
+            if (eptr == pp) goto TAIL_RECURSE;
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (eptr-- == pp) break;        /* Stop if tried at original pos */
+            eptr--;
             BACKCHAR(eptr);
             }
           }
@@ -3964,9 +4070,10 @@ for (;;)
             if (fc == *eptr) break;
             eptr++;
             }
-          if (possessive) continue;
-          while (eptr >= pp)
+          if (possessive) continue;    /* No backtracking */
+          for (;;)
             {
+            if (eptr == pp) goto TAIL_RECURSE;
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             eptr--;
@@ -4225,6 +4332,22 @@ for (;;)
             }
           break;
 
+          case PT_UCNC:
+          for (i = 1; i <= min; i++)
+            {
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              RRETURN(MATCH_NOMATCH);
+              }
+            GETCHARINCTEST(c, eptr);
+            if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+                 c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+                 c >= 0xe000) == prop_fail_result)
+              RRETURN(MATCH_NOMATCH);
+            }
+          break;
+
           /* This should not occur */
 
           default:
@@ -4430,8 +4553,7 @@ for (;;)
         case OP_DIGIT:
         for (i = 1; i <= min; i++)
           {
-          pcre_uchar cc;
-
+          pcre_uint32 cc;
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
@@ -4448,8 +4570,7 @@ for (;;)
         case OP_NOT_WHITESPACE:
         for (i = 1; i <= min; i++)
           {
-          pcre_uchar cc;
-
+          pcre_uint32 cc;
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
@@ -4466,8 +4587,7 @@ for (;;)
         case OP_WHITESPACE:
         for (i = 1; i <= min; i++)
           {
-          pcre_uchar cc;
-
+          pcre_uint32 cc;
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
@@ -4484,8 +4604,7 @@ for (;;)
         case OP_NOT_WORDCHAR:
         for (i = 1; i <= min; i++)
           {
-          pcre_uchar cc;
-
+          pcre_uint32 cc;
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
@@ -4502,8 +4621,7 @@ for (;;)
         case OP_WORDCHAR:
         for (i = 1; i <= min; i++)
           {
-          pcre_uchar cc;
-
+          pcre_uint32 cc;
           if (eptr >= md->end_subject)
             {
             SCHECK_PARTIAL();
@@ -4976,6 +5094,25 @@ for (;;)
             }
           /* Control never gets here */
 
+          case PT_UCNC:
+          for (fi = min;; fi++)
+            {
+            RMATCH(eptr, ecode, offset_top, md, eptrb, RM68);
+            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+            if (fi >= max) RRETURN(MATCH_NOMATCH);
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              RRETURN(MATCH_NOMATCH);
+              }
+            GETCHARINCTEST(c, eptr);
+            if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+                 c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+                 c >= 0xe000) == prop_fail_result)
+              RRETURN(MATCH_NOMATCH);
+            }
+          /* Control never gets here */
+
           /* This should never occur */
           default:
           RRETURN(PCRE_ERROR_INTERNAL);
@@ -5470,18 +5607,37 @@ for (;;)
           GOT_MAX:
           break;
 
+          case PT_UCNC:
+          for (i = min; i < max; i++)
+            {
+            int len = 1;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
+            GETCHARLENTEST(c, eptr, len);
+            if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+                 c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
+                 c >= 0xe000) == prop_fail_result)
+              break;
+            eptr += len;
+            }
+          break;
+
           default:
           RRETURN(PCRE_ERROR_INTERNAL);
           }
 
         /* eptr is now past the end of the maximum run */
 
-        if (possessive) continue;
+        if (possessive) continue;    /* No backtracking */
         for(;;)
           {
+          if (eptr == pp) goto TAIL_RECURSE;
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-          if (eptr-- == pp) break;        /* Stop if tried at original pos */
+          eptr--;
           if (utf) BACKCHAR(eptr);
           }
         }
@@ -5518,13 +5674,13 @@ for (;;)
 
         /* eptr is now past the end of the maximum run */
 
-        if (possessive) continue;
-
+        if (possessive) continue;    /* No backtracking */
         for(;;)
           {
+          if (eptr == pp) goto TAIL_RECURSE;
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-          if (eptr-- == pp) break;        /* Stop if tried at original pos */
+          eptr--;
           for (;;)                        /* Move back over one extended */
             {
             if (!utf) c = *eptr; else
@@ -5799,18 +5955,13 @@ for (;;)
           RRETURN(PCRE_ERROR_INTERNAL);
           }
 
-        /* eptr is now past the end of the maximum run. If possessive, we are
-        done (no backing up). Otherwise, match at this position; anything other
-        than no match is immediately returned. For nomatch, back up one
-        character, unless we are matching \R and the last thing matched was
-        \r\n, in which case, back up two bytes. */
-
-        if (possessive) continue;
+        if (possessive) continue;    /* No backtracking */
         for(;;)
           {
+          if (eptr == pp) goto TAIL_RECURSE;
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-          if (eptr-- == pp) break;        /* Stop if tried at original pos */
+          eptr--;
           BACKCHAR(eptr);
           if (ctype == OP_ANYNL && eptr > pp  && RAWUCHAR(eptr) == CHAR_NL &&
               RAWUCHAR(eptr - 1) == CHAR_CR) eptr--;
@@ -6048,15 +6199,10 @@ for (;;)
           RRETURN(PCRE_ERROR_INTERNAL);
           }
 
-        /* eptr is now past the end of the maximum run. If possessive, we are
-        done (no backing up). Otherwise, match at this position; anything other
-        than no match is immediately returned. For nomatch, back up one
-        character (byte), unless we are matching \R and the last thing matched
-        was \r\n, in which case, back up two bytes. */
-
-        if (possessive) continue;
-        while (eptr >= pp)
+        if (possessive) continue;    /* No backtracking */
+        for (;;)
           {
+          if (eptr == pp) goto TAIL_RECURSE;
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           eptr--;
@@ -6111,7 +6257,7 @@ switch (frame->Xwhere)
   LBL(32) LBL(34) LBL(42) LBL(46)
 #ifdef SUPPORT_UCP
   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
-  LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
+  LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) LBL(68)
 #endif  /* SUPPORT_UCP */
 #endif  /* SUPPORT_UTF */
   default:
@@ -6264,6 +6410,7 @@ const pcre_uint8 *start_bits = NULL;
 PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
 PCRE_PUCHAR end_subject;
 PCRE_PUCHAR start_partial = NULL;
+PCRE_PUCHAR match_partial = NULL;
 PCRE_PUCHAR req_char_ptr = start_match - 1;
 
 const pcre_study_data *study;
@@ -6393,6 +6540,8 @@ md->callout_data = NULL;
 
 tables = re->tables;
 
+/* The two limit values override the defaults, whatever their value. */
+
 if (extra_data != NULL)
   {
   register unsigned int flags = extra_data->flags;
@@ -6407,6 +6556,15 @@ if (extra_data != NULL)
   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
   }
 
+/* Limits in the regex override only if they are smaller. */
+
+if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit)
+  md->match_limit = re->limit_match;
+
+if ((re->flags & PCRE_RLSET) != 0 &&
+    re->limit_recursion < md->match_limit_recursion)
+  md->match_limit_recursion = re->limit_recursion;
+
 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
 is a feature that makes it possible to save compiled regex and re-use them
 in other programs later. */
@@ -6432,7 +6590,7 @@ end_subject = md->end_subject;
 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
 md->use_ucp = (re->options & PCRE_UCP) != 0;
 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
-md->ignore_skip_arg = FALSE;
+md->ignore_skip_arg = 0;
 
 /* Some options are unpacked into BOOL variables in the hope that testing
 them will be faster than individual option bits. */
@@ -6542,11 +6700,9 @@ if (re->top_backref > 0 && re->top_backref >= ocount/3)
   DPRINTF(("Got memory to hold back references\n"));
   }
 else md->offset_vector = offsets;
-
 md->offset_end = ocount;
 md->offset_max = (2*ocount)/3;
-md->offset_overflow = FALSE;
-md->capture_last = -1;
+md->capture_last = 0;
 
 /* Reset the working variable associated with each extraction. These should
 never be used unless previously set, but they get saved and restored, and so we
@@ -6816,8 +6972,13 @@ for(;;)
   md->match_call_count = 0;
   md->match_function_type = 0;
   md->end_offset_top = 0;
+  md->skip_arg_count = 0;
   rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
-  if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
+  if (md->hitend && start_partial == NULL)
+    {
+    start_partial = md->start_used_ptr;
+    match_partial = start_match;
+    }
 
   switch(rc)
     {
@@ -6830,14 +6991,14 @@ for(;;)
 
     case MATCH_SKIP_ARG:
     new_start_match = start_match;
-    md->ignore_skip_arg = TRUE;
+    md->ignore_skip_arg = md->skip_arg_count;
     break;
 
-    /* SKIP passes back the next starting point explicitly, but if it is the
-    same as the match we have just done, treat it as NOMATCH. */
+    /* SKIP passes back the next starting point explicitly, but if it is no
+    greater than the match we have just done, treat it as NOMATCH. */
 
     case MATCH_SKIP:
-    if (md->start_match_ptr != start_match)
+    if (md->start_match_ptr > start_match)
       {
       new_start_match = md->start_match_ptr;
       break;
@@ -6845,12 +7006,12 @@ for(;;)
     /* Fall through */
 
     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
-    exactly like PRUNE. Unset the ignore SKIP-with-argument flag. */
+    exactly like PRUNE. Unset ignore SKIP-with-argument. */
 
     case MATCH_NOMATCH:
     case MATCH_PRUNE:
     case MATCH_THEN:
-    md->ignore_skip_arg = FALSE;
+    md->ignore_skip_arg = 0;
     new_start_match = start_match + 1;
 #ifdef SUPPORT_UTF
     if (utf)
@@ -6943,7 +7104,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
         (arg_offset_max - 2) * sizeof(int));
       DPRINTF(("Copied offsets from temporary memory\n"));
       }
-    if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
+    if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT;
     DPRINTF(("Freeing temporary memory\n"));
     (PUBL(free))(md->offset_vector);
     }
@@ -6951,7 +7112,8 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
   /* Set the return code to the number of captured strings, or 0 if there were
   too many to fit into the vector. */
 
-  rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?
+  rc = ((md->capture_last & OVFLBIT) != 0 &&
+         md->end_offset_top >= arg_offset_max)?
     0 : md->end_offset_top/2;
 
   /* If there is space in the offset vector, set any unused pairs at the end of
@@ -7024,6 +7186,8 @@ if (start_partial != NULL)
     {
     offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
     offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
+    if (offsetcount > 2)
+      offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
     }
   rc = PCRE_ERROR_PARTIAL;
   }
diff --git a/usr.sbin/nginx/src/pcre/pcre_fullinfo.c b/usr.sbin/nginx/src/pcre/pcre_fullinfo.c
index 02c9df4a826..c4eb5c0e1ad 100644
--- a/usr.sbin/nginx/src/pcre/pcre_fullinfo.c
+++ b/usr.sbin/nginx/src/pcre/pcre_fullinfo.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2012 University of Cambridge
+           Copyright (c) 1997-2013 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -222,6 +222,16 @@ switch (what)
   *((int *)where) = re->max_lookbehind;
   break;
 
+  case PCRE_INFO_MATCHLIMIT:
+  if ((re->flags & PCRE_MLSET) == 0) return PCRE_ERROR_UNSET;
+  *((pcre_uint32 *)where) = re->limit_match;
+  break;
+
+  case PCRE_INFO_RECURSIONLIMIT:
+  if ((re->flags & PCRE_RLSET) == 0) return PCRE_ERROR_UNSET;
+  *((pcre_uint32 *)where) = re->limit_recursion;
+  break;
+
   default: return PCRE_ERROR_BADOPTION;
   }
 
diff --git a/usr.sbin/nginx/src/pcre/pcre_internal.h b/usr.sbin/nginx/src/pcre/pcre_internal.h
index f3cb001fea7..307069ca9d6 100644
--- a/usr.sbin/nginx/src/pcre/pcre_internal.h
+++ b/usr.sbin/nginx/src/pcre/pcre_internal.h
@@ -7,7 +7,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2012 University of Cambridge
+           Copyright (c) 1997-2013 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -194,23 +194,31 @@ preprocessor time in standard C environments. */
 typedef unsigned char pcre_uint8;
 
 #if USHRT_MAX == 65535
-  typedef unsigned short pcre_uint16;
-  typedef short pcre_int16;
+typedef unsigned short pcre_uint16;
+typedef short pcre_int16;
+#define PCRE_UINT16_MAX USHRT_MAX
+#define PCRE_INT16_MAX SHRT_MAX
 #elif UINT_MAX == 65535
-  typedef unsigned int pcre_uint16;
-  typedef int pcre_int16;
+typedef unsigned int pcre_uint16;
+typedef int pcre_int16;
+#define PCRE_UINT16_MAX UINT_MAX
+#define PCRE_INT16_MAX INT_MAX
 #else
-# error Cannot determine a type for 16-bit unsigned integers
+#error Cannot determine a type for 16-bit integers
 #endif
 
-#if UINT_MAX == 4294967295
-  typedef unsigned int pcre_uint32;
-  typedef int pcre_int32;
-#elif ULONG_MAX == 4294967295
-  typedef unsigned long int pcre_uint32;
-  typedef long int pcre_int32;
+#if UINT_MAX == 4294967295U
+typedef unsigned int pcre_uint32;
+typedef int pcre_int32;
+#define PCRE_UINT32_MAX UINT_MAX
+#define PCRE_INT32_MAX INT_MAX
+#elif ULONG_MAX == 4294967295UL
+typedef unsigned long int pcre_uint32;
+typedef long int pcre_int32;
+#define PCRE_UINT32_MAX ULONG_MAX
+#define PCRE_INT32_MAX LONG_MAX
 #else
-# error Cannot determine a type for 32-bit unsigned integers
+#error Cannot determine a type for 32-bit integers
 #endif
 
 /* When checking for integer overflow in pcre_compile(), we need to handle
@@ -1121,23 +1129,26 @@ other. NOTE: The values also appear in pcre_jit_compile.c. */
 
 
 /* Private flags containing information about the compiled regex. They used to
-live at the top end of the options word, but that got almost full, so now they
-are in a 16-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as
-the restrictions on partial matching have been lifted. It remains for backwards
+live at the top end of the options word, but that got almost full, so they were
+moved to a 16-bit flags word - which got almost full, so now they are in a
+32-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as the
+restrictions on partial matching have been lifted. It remains for backwards
 compatibility. */
 
-#define PCRE_MODE8         0x0001  /* compiled in 8 bit mode */
-#define PCRE_MODE16        0x0002  /* compiled in 16 bit mode */
-#define PCRE_MODE32        0x0004  /* compiled in 32 bit mode */
-#define PCRE_FIRSTSET      0x0010  /* first_char is set */
-#define PCRE_FCH_CASELESS  0x0020  /* caseless first char */
-#define PCRE_REQCHSET      0x0040  /* req_byte is set */
-#define PCRE_RCH_CASELESS  0x0080  /* caseless requested char */
-#define PCRE_STARTLINE     0x0100  /* start after \n for multiline */
-#define PCRE_NOPARTIAL     0x0200  /* can't use partial with this regex */
-#define PCRE_JCHANGED      0x0400  /* j option used in regex */
-#define PCRE_HASCRORLF     0x0800  /* explicit \r or \n in pattern */
-#define PCRE_HASTHEN       0x1000  /* pattern contains (*THEN) */
+#define PCRE_MODE8         0x00000001  /* compiled in 8 bit mode */
+#define PCRE_MODE16        0x00000002  /* compiled in 16 bit mode */
+#define PCRE_MODE32        0x00000004  /* compiled in 32 bit mode */
+#define PCRE_FIRSTSET      0x00000010  /* first_char is set */
+#define PCRE_FCH_CASELESS  0x00000020  /* caseless first char */
+#define PCRE_REQCHSET      0x00000040  /* req_byte is set */
+#define PCRE_RCH_CASELESS  0x00000080  /* caseless requested char */
+#define PCRE_STARTLINE     0x00000100  /* start after \n for multiline */
+#define PCRE_NOPARTIAL     0x00000200  /* can't use partial with this regex */
+#define PCRE_JCHANGED      0x00000400  /* j option used in regex */
+#define PCRE_HASCRORLF     0x00000800  /* explicit \r or \n in pattern */
+#define PCRE_HASTHEN       0x00001000  /* pattern contains (*THEN) */
+#define PCRE_MLSET         0x00002000  /* match limit set by regex */
+#define PCRE_RLSET         0x00004000  /* recursion limit set by regex */
 
 #if defined COMPILE_PCRE8
 #define PCRE_MODE          PCRE_MODE8
@@ -1164,7 +1175,7 @@ time, run time, or study time, respectively. */
    PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
    PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
    PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
-   PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE)
+   PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE|PCRE_NEVER_UTF)
 
 #define PUBLIC_EXEC_OPTIONS \
   (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \
@@ -1534,6 +1545,8 @@ a positive value. */
 #define STRING_UTF_RIGHTPAR            "UTF)"
 #define STRING_UCP_RIGHTPAR            "UCP)"
 #define STRING_NO_START_OPT_RIGHTPAR   "NO_START_OPT)"
+#define STRING_LIMIT_MATCH_EQ          "LIMIT_MATCH="
+#define STRING_LIMIT_RECURSION_EQ      "LIMIT_RECURSION="
 
 #else  /* SUPPORT_UTF */
 
@@ -1795,6 +1808,8 @@ only. */
 #define STRING_UTF_RIGHTPAR            STR_U STR_T STR_F STR_RIGHT_PARENTHESIS
 #define STRING_UCP_RIGHTPAR            STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
 #define STRING_NO_START_OPT_RIGHTPAR   STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
+#define STRING_LIMIT_MATCH_EQ          STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
+#define STRING_LIMIT_RECURSION_EQ      STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
 
 #endif  /* SUPPORT_UTF */
 
@@ -1835,6 +1850,7 @@ only. */
 #define PT_PXSPACE    7    /* POSIX space - Z plus 9,10,11,12,13 */
 #define PT_WORD       8    /* Word - L plus N plus underscore */
 #define PT_CLIST      9    /* Pseudo-property: match character list */
+#define PT_UCNC      10    /* Universal Character nameable character */
 
 /* Flag bits and data types for the extended class (OP_XCLASS) for classes that
 contain characters with values greater than 255. */
@@ -2270,7 +2286,7 @@ enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,
        ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
        ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
        ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
-       ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERRCOUNT };
+       ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERRCOUNT };
 
 /* JIT compiling modes. The function list is indexed by them. */
 enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
@@ -2280,48 +2296,49 @@ enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
 code vector run on as long as necessary after the end. We store an explicit
 offset to the name table so that if a regex is compiled on one host, saved, and
 then run on another where the size of pointers is different, all might still
-be well. For the case of compiled-on-4 and run-on-8, we include an extra
-pointer that is always NULL. For future-proofing, a few dummy fields were
-originally included - even though you can never get this planning right - but
-there is only one left now.
-
-NOTE NOTE NOTE:
-Because people can now save and re-use compiled patterns, any additions to this
-structure should be made at the end, and something earlier (e.g. a new
-flag in the options or one of the dummy fields) should indicate that the new
-fields are present. Currently PCRE always sets the dummy fields to zero.
-NOTE NOTE NOTE
+be well.
+
+The size of the structure must be a multiple of 8 bytes. For the case of
+compiled-on-4 and run-on-8, we include an extra pointer that is always NULL so
+that there are an even number of pointers which therefore are a multiple of 8
+bytes.
+
+It is necessary to fork the struct for the 32 bit library, since it needs to
+use pcre_uint32 for first_char and req_char. We can't put an ifdef inside the
+typedef because pcretest needs access to the struct of the 8-, 16- and 32-bit
+variants.
+
+*** WARNING ***
+When new fields are added to these structures, remember to adjust the code in
+pcre_byte_order.c that is concerned with swapping the byte order of the fields
+when a compiled regex is reloaded on a host with different endianness.
+*** WARNING ***
+There is also similar byte-flipping code in pcretest.c, which is used for
+testing the byte-flipping features. It must also be kept in step.
+*** WARNING ***
 */
 
-#if defined COMPILE_PCRE8
-#define REAL_PCRE real_pcre
-#elif defined COMPILE_PCRE16
-#define REAL_PCRE real_pcre16
-#elif defined COMPILE_PCRE32
-#define REAL_PCRE real_pcre32
-#endif
-
-/* It is necessary to fork the struct for 32 bit, since it needs to use
- * pcre_uchar for first_char and req_char. Can't put an ifdef inside the
- * typedef since pcretest needs access to  the struct of the 8-, 16-
- * and 32-bit variants. */
-
 typedef struct real_pcre8_or_16 {
   pcre_uint32 magic_number;
   pcre_uint32 size;               /* Total that was malloced */
   pcre_uint32 options;            /* Public options */
-  pcre_uint16 flags;              /* Private flags */
+  pcre_uint32 flags;              /* Private flags */
+  pcre_uint32 limit_match;        /* Limit set from regex */
+  pcre_uint32 limit_recursion;    /* Limit set from regex */
+  pcre_uint16 first_char;         /* Starting character */
+  pcre_uint16 req_char;           /* This character must be seen */
   pcre_uint16 max_lookbehind;     /* Longest lookbehind (characters) */
   pcre_uint16 top_bracket;        /* Highest numbered group */
   pcre_uint16 top_backref;        /* Highest numbered back reference */
-  pcre_uint16 first_char;         /* Starting character */
-  pcre_uint16 req_char;           /* This character must be seen */
   pcre_uint16 name_table_offset;  /* Offset to name table that follows */
   pcre_uint16 name_entry_size;    /* Size of any name items */
   pcre_uint16 name_count;         /* Number of name items */
   pcre_uint16 ref_count;          /* Reference count */
+  pcre_uint16 dummy1;             /* To ensure size is a multiple of 8 */
+  pcre_uint16 dummy2;             /* To ensure size is a multiple of 8 */
+  pcre_uint16 dummy3;             /* To ensure size is a multiple of 8 */
   const pcre_uint8 *tables;       /* Pointer to tables or NULL for std */
-  const pcre_uint8 *nullpad;      /* NULL padding */
+  void             *nullpad;      /* NULL padding */
 } real_pcre8_or_16;
 
 typedef struct real_pcre8_or_16 real_pcre;
@@ -2331,22 +2348,31 @@ typedef struct real_pcre32 {
   pcre_uint32 magic_number;
   pcre_uint32 size;               /* Total that was malloced */
   pcre_uint32 options;            /* Public options */
-  pcre_uint16 flags;              /* Private flags */
+  pcre_uint32 flags;              /* Private flags */
+  pcre_uint32 limit_match;        /* Limit set from regex */
+  pcre_uint32 limit_recursion;    /* Limit set from regex */
+  pcre_uint32 first_char;         /* Starting character */
+  pcre_uint32 req_char;           /* This character must be seen */
   pcre_uint16 max_lookbehind;     /* Longest lookbehind (characters) */
   pcre_uint16 top_bracket;        /* Highest numbered group */
   pcre_uint16 top_backref;        /* Highest numbered back reference */
-  pcre_uint32 first_char;         /* Starting character */
-  pcre_uint32 req_char;           /* This character must be seen */
   pcre_uint16 name_table_offset;  /* Offset to name table that follows */
   pcre_uint16 name_entry_size;    /* Size of any name items */
   pcre_uint16 name_count;         /* Number of name items */
   pcre_uint16 ref_count;          /* Reference count */
-  pcre_uint16 dummy1;             /* for later expansion */
-  pcre_uint16 dummy2;             /* for later expansion */
+  pcre_uint16 dummy;              /* To ensure size is a multiple of 8 */
   const pcre_uint8 *tables;       /* Pointer to tables or NULL for std */
-  void *nullpad;                  /* for later expansion */
+  void             *nullpad;      /* NULL padding */
 } real_pcre32;
 
+#if defined COMPILE_PCRE8
+#define REAL_PCRE real_pcre
+#elif defined COMPILE_PCRE16
+#define REAL_PCRE real_pcre16
+#elif defined COMPILE_PCRE32
+#define REAL_PCRE real_pcre32
+#endif
+
 /* Assert that the size of REAL_PCRE is divisible by 8 */
 typedef int __assert_real_pcre_size_divisible_8[(sizeof(REAL_PCRE) % 8) == 0 ? 1 : -1];
 
@@ -2398,14 +2424,14 @@ typedef struct compile_data {
   int  names_found;                 /* Number of entries so far */
   int  name_entry_size;             /* Size of each entry */
   int  workspace_size;              /* Size of workspace */
-  unsigned int  bracount;           /* Count of capturing parens as we compile */
+  unsigned int bracount;            /* Count of capturing parens as we compile */
   int  final_bracount;              /* Saved value after first pass */
   int  max_lookbehind;              /* Maximum lookbehind (characters) */
   int  top_backref;                 /* Maximum back reference */
   unsigned int backref_map;         /* Bitmap of low back refs */
   int  assert_depth;                /* Depth of nested assertions */
-  int  external_options;            /* External (initial) options */
-  int  external_flags;              /* External flag bits to be set */
+  pcre_uint32 external_options;     /* External (initial) options */
+  pcre_uint32 external_flags;       /* External flag bits to be set */
   int  req_varyopt;                 /* "After variable item" flag for reqbyte */
   BOOL had_accept;                  /* (*ACCEPT) encountered */
   BOOL had_pruneorskip;             /* (*PRUNE) or (*SKIP) encountered */
@@ -2431,6 +2457,7 @@ typedef struct recursion_info {
   unsigned int group_num;         /* Number of group that was called */
   int *offset_save;               /* Pointer to start of saved offsets */
   int saved_max;                  /* Number of saved offsets */
+  int saved_capture_last;         /* Last capture number */
   PCRE_PUCHAR subject_position;   /* Position at start of recursion */
 } recursion_info;
 
@@ -2467,12 +2494,13 @@ typedef struct match_data {
   int    nllen;                   /* Newline string length */
   int    name_count;              /* Number of names in name table */
   int    name_entry_size;         /* Size of entry in names table */
+  unsigned int skip_arg_count;    /* For counting SKIP_ARGs */
+  unsigned int ignore_skip_arg;   /* For re-run when SKIP arg name not found */
   pcre_uchar *name_table;         /* Table of names */
   pcre_uchar nl[4];               /* Newline string when fixed */
   const  pcre_uint8 *lcc;         /* Points to lower casing table */
   const  pcre_uint8 *fcc;         /* Points to case-flipping table */
   const  pcre_uint8 *ctypes;      /* Points to table of type maps */
-  BOOL   offset_overflow;         /* Set if too many extractions */
   BOOL   notbol;                  /* NOTBOL flag */
   BOOL   noteol;                  /* NOTEOL flag */
   BOOL   utf;                     /* UTF-8 / UTF-16 flag */
@@ -2484,7 +2512,6 @@ typedef struct match_data {
   BOOL   hitend;                  /* Hit the end of the subject at some point */
   BOOL   bsr_anycrlf;             /* \R is just any CRLF, not full Unicode */
   BOOL   hasthen;                 /* Pattern contains (*THEN) */
-  BOOL   ignore_skip_arg;         /* For re-run when SKIP name not found */
   const  pcre_uchar *start_code;  /* For use when recursing */
   PCRE_PUCHAR start_subject;      /* Start of the subject string */
   PCRE_PUCHAR end_subject;        /* End of the subject string */
@@ -2493,7 +2520,7 @@ typedef struct match_data {
   PCRE_PUCHAR start_used_ptr;     /* Earliest consulted character */
   int    partial;                 /* PARTIAL options */
   int    end_offset_top;          /* Highwater mark at end of match */
-  int    capture_last;            /* Most recent capture number */
+  pcre_int32 capture_last;        /* Most recent capture number + overflow flag */
   int    start_offset;            /* The start offset value */
   int    match_function_type;     /* Set for certain special calls of MATCH() */
   eptrblock *eptrchain;           /* Chain of eptrblocks for tail recursions */
diff --git a/usr.sbin/nginx/src/pcre/pcre_tables.c b/usr.sbin/nginx/src/pcre/pcre_tables.c
index 34ee0488a81..f38ab52cbb8 100644
--- a/usr.sbin/nginx/src/pcre/pcre_tables.c
+++ b/usr.sbin/nginx/src/pcre/pcre_tables.c
@@ -346,6 +346,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Xan0 STR_X STR_a STR_n "\0"
 #define STRING_Xps0 STR_X STR_p STR_s "\0"
 #define STRING_Xsp0 STR_X STR_s STR_p "\0"
+#define STRING_Xuc0 STR_X STR_u STR_c "\0"
 #define STRING_Xwd0 STR_X STR_w STR_d "\0"
 #define STRING_Yi0 STR_Y STR_i "\0"
 #define STRING_Z0 STR_Z "\0"
@@ -493,6 +494,7 @@ const char PRIV(utt_names)[] =
   STRING_Xan0
   STRING_Xps0
   STRING_Xsp0
+  STRING_Xuc0
   STRING_Xwd0
   STRING_Yi0
   STRING_Z0
@@ -640,12 +642,13 @@ const ucp_type_table PRIV(utt)[] = {
   { 1011, PT_ALNUM, 0 },
   { 1015, PT_PXSPACE, 0 },
   { 1019, PT_SPACE, 0 },
-  { 1023, PT_WORD, 0 },
-  { 1027, PT_SC, ucp_Yi },
-  { 1030, PT_GC, ucp_Z },
-  { 1032, PT_PC, ucp_Zl },
-  { 1035, PT_PC, ucp_Zp },
-  { 1038, PT_PC, ucp_Zs }
+  { 1023, PT_UCNC, 0 },
+  { 1027, PT_WORD, 0 },
+  { 1031, PT_SC, ucp_Yi },
+  { 1034, PT_GC, ucp_Z },
+  { 1036, PT_PC, ucp_Zl },
+  { 1039, PT_PC, ucp_Zp },
+  { 1042, PT_PC, ucp_Zs }
 };
 
 const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
diff --git a/usr.sbin/nginx/src/pcre/pcre_xclass.c b/usr.sbin/nginx/src/pcre/pcre_xclass.c
index fa73cd8c9d5..d777acb57c9 100644
--- a/usr.sbin/nginx/src/pcre/pcre_xclass.c
+++ b/usr.sbin/nginx/src/pcre/pcre_xclass.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2012 University of Cambridge
+           Copyright (c) 1997-2013 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -180,6 +180,20 @@ while ((t = *data++) != XCL_END)
         return !negated;
       break;
 
+      case PT_UCNC:
+      if (c < 0xa0)
+        {
+        if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+             c == CHAR_GRAVE_ACCENT) == (t == XCL_PROP))
+          return !negated;
+        }
+      else
+        {
+        if ((c < 0xd800 || c > 0xdfff) == (t == XCL_PROP))
+          return !negated;
+        }
+      break;
+
       /* This should never occur, but compilers may mutter if there is no
       default. */
author	robert <robert@openbsd.org>	2013-06-14 18:55:11 +0000
committer	robert <robert@openbsd.org>	2013-06-14 18:55:11 +0000
commit	6c0ee77c905b499597069f3161609a43bf472b69 (patch)
tree	173b5e19c8e0fd098deee027831213dc762b8616
parent	Correct interrupt moderation setting for 82598; tested on the CX4 version (diff)
download	wireguard-openbsd-6c0ee77c905b499597069f3161609a43bf472b69.tar.xz wireguard-openbsd-6c0ee77c905b499597069f3161609a43bf472b69.zip