update pcre to 8.30 to be in sync with ports

author: robert <robert@openbsd.org> 2012-02-18 11:28:28 +0000
committer: robert <robert@openbsd.org> 2012-02-18 11:28:28 +0000
commit: 4c64784e1754a12bcee2849f2480afe311febcce (patch)
tree: 2ea4aa1a762ff8e0460b68efef87581dc37a6aee /usr.sbin/nginx/src
parent: update to 1.0.12 (diff)
download: wireguard-openbsd-4c64784e1754a12bcee2849f2480afe311febcce.tar.xz
wireguard-openbsd-4c64784e1754a12bcee2849f2480afe311febcce.zip
15 files changed, 2756 insertions, 1706 deletions
diff --git a/usr.sbin/nginx/src/pcre/config.h b/usr.sbin/nginx/src/pcre/config.h
index b55d68b23b5..0bb73f32e01 100644
--- a/usr.sbin/nginx/src/pcre/config.h
+++ b/usr.sbin/nginx/src/pcre/config.h
@@ -31,8 +31,8 @@ them both to 0; an emulation function will be used. */
    character codes, define this macro as 1. On systems that can use
    "configure", this can be done via --enable-ebcdic. PCRE will then assume
    that all input strings are in EBCDIC. If you do not define this macro, PCRE
-   will assume input strings are ASCII or UTF-8 Unicode. It is not possible to
-   build a version of PCRE that supports both EBCDIC and UTF-8. */
+   will assume input strings are ASCII or UTF-8/16 Unicode. It is not possible
+   to build a version of PCRE that supports both EBCDIC and UTF-8/16. */
 /* #undef EBCDIC */
 
 /* Define to 1 if you have the `bcopy' function. */
@@ -190,7 +190,7 @@ them both to 0; an emulation function will be used. */
 #define PACKAGE_NAME "PCRE"
 
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "PCRE 8.20"
+#define PACKAGE_STRING "PCRE 8.30"
 
 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "pcre"
@@ -199,7 +199,7 @@ them both to 0; an emulation function will be used. */
 #define PACKAGE_URL ""
 
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "8.20"
+#define PACKAGE_VERSION "8.30"
 
 /* The value of PCREGREP_BUFSIZE determines the size of buffer used by
    pcregrep to hold parts of the file it is searching. On systems that support
@@ -251,20 +251,26 @@ them both to 0; an emulation function will be used. */
    handle .gz files. */
 /* #undef SUPPORT_LIBZ */
 
+/* Define to enable the 16 bit PCRE library. */
+#define SUPPORT_PCRE16 /**/
+
+/* Define to enable the 8 bit PCRE library. */
+#define SUPPORT_PCRE8 /**/
+
 /* Define to enable JIT support in pcregrep. */
 /* #undef SUPPORT_PCREGREP_JIT */
 
 /* Define to enable support for Unicode properties. */
 #define SUPPORT_UCP /**/
 
-/* Define to enable support for the UTF-8 Unicode encoding. This will work
+/* Define to enable support for the UTF-8/16 Unicode encoding. This will work
    even in an EBCDIC environment, but it is incompatible with the EBCDIC
-   macro. That is, PCRE can support *either* EBCDIC code *or* ASCII/UTF-8, but
-   not both at once. */
-#define SUPPORT_UTF8 /**/
+   macro. That is, PCRE can support *either* EBCDIC code *or* ASCII/UTF-8/16,
+   but not both at once. */
+#define SUPPORT_UTF /**/
 
 /* Version number of package */
-#define VERSION "8.20"
+#define VERSION "8.30"
 
 /* Define to empty if `const' does not conform to ANSI C. */
 /* #undef const */
diff --git a/usr.sbin/nginx/src/pcre/pcre.h b/usr.sbin/nginx/src/pcre/pcre.h
index 58ea327e9b2..712bd3d714b 100644
--- a/usr.sbin/nginx/src/pcre/pcre.h
+++ b/usr.sbin/nginx/src/pcre/pcre.h
@@ -5,7 +5,7 @@
 /* This is the public header file for the PCRE library, to be #included by
 applications that call the PCRE functions.
 
-           Copyright (c) 1997-2011 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
 /* The current PCRE version information. */
 
 #define PCRE_MAJOR          8
-#define PCRE_MINOR          21
+#define PCRE_MINOR          30
 #define PCRE_PRERELEASE     
-#define PCRE_DATE           2011-12-12
+#define PCRE_DATE           2012-02-04
 
 /* When an application links to a PCRE DLL in Windows, the symbols that are
 imported have to be identified as such. When building PCRE, the appropriate
@@ -116,9 +116,13 @@ compiling). */
 #define PCRE_NOTEOL             0x00000100  /* Exec, DFA exec */
 #define PCRE_UNGREEDY           0x00000200  /* Compile */
 #define PCRE_NOTEMPTY           0x00000400  /* Exec, DFA exec */
-#define PCRE_UTF8               0x00000800  /* Compile, used in exec, DFA exec */
+/* The next two are also used in exec and DFA exec */
+#define PCRE_UTF8               0x00000800  /* Compile (same as PCRE_UTF16) */
+#define PCRE_UTF16              0x00000800  /* Compile (same as PCRE_UTF8) */
 #define PCRE_NO_AUTO_CAPTURE    0x00001000  /* Compile */
-#define PCRE_NO_UTF8_CHECK      0x00002000  /* Compile, exec, DFA exec */
+/* The next two are also used in exec and DFA exec */
+#define PCRE_NO_UTF8_CHECK      0x00002000  /* Compile (same as PCRE_NO_UTF16_CHECK) */
+#define PCRE_NO_UTF16_CHECK     0x00002000  /* Compile (same as PCRE_NO_UTF8_CHECK) */
 #define PCRE_AUTO_CALLOUT       0x00004000  /* Compile */
 #define PCRE_PARTIAL_SOFT       0x00008000  /* Exec, DFA exec */
 #define PCRE_PARTIAL            0x00008000  /* Backwards compatible synonym */
@@ -142,34 +146,39 @@ compiling). */
 
 /* Exec-time and get/set-time error codes */
 
-#define PCRE_ERROR_NOMATCH         (-1)
-#define PCRE_ERROR_NULL            (-2)
-#define PCRE_ERROR_BADOPTION       (-3)
-#define PCRE_ERROR_BADMAGIC        (-4)
-#define PCRE_ERROR_UNKNOWN_OPCODE  (-5)
-#define PCRE_ERROR_UNKNOWN_NODE    (-5)  /* For backward compatibility */
-#define PCRE_ERROR_NOMEMORY        (-6)
-#define PCRE_ERROR_NOSUBSTRING     (-7)
-#define PCRE_ERROR_MATCHLIMIT      (-8)
-#define PCRE_ERROR_CALLOUT         (-9)  /* Never used by PCRE itself */
-#define PCRE_ERROR_BADUTF8        (-10)
-#define PCRE_ERROR_BADUTF8_OFFSET (-11)
-#define PCRE_ERROR_PARTIAL        (-12)
-#define PCRE_ERROR_BADPARTIAL     (-13)
-#define PCRE_ERROR_INTERNAL       (-14)
-#define PCRE_ERROR_BADCOUNT       (-15)
-#define PCRE_ERROR_DFA_UITEM      (-16)
-#define PCRE_ERROR_DFA_UCOND      (-17)
-#define PCRE_ERROR_DFA_UMLIMIT    (-18)
-#define PCRE_ERROR_DFA_WSSIZE     (-19)
-#define PCRE_ERROR_DFA_RECURSE    (-20)
-#define PCRE_ERROR_RECURSIONLIMIT (-21)
-#define PCRE_ERROR_NULLWSLIMIT    (-22)  /* No longer actually used */
-#define PCRE_ERROR_BADNEWLINE     (-23)
-#define PCRE_ERROR_BADOFFSET      (-24)
-#define PCRE_ERROR_SHORTUTF8      (-25)
-#define PCRE_ERROR_RECURSELOOP    (-26)
-#define PCRE_ERROR_JIT_STACKLIMIT (-27)
+#define PCRE_ERROR_NOMATCH          (-1)
+#define PCRE_ERROR_NULL             (-2)
+#define PCRE_ERROR_BADOPTION        (-3)
+#define PCRE_ERROR_BADMAGIC         (-4)
+#define PCRE_ERROR_UNKNOWN_OPCODE   (-5)
+#define PCRE_ERROR_UNKNOWN_NODE     (-5)  /* For backward compatibility */
+#define PCRE_ERROR_NOMEMORY         (-6)
+#define PCRE_ERROR_NOSUBSTRING      (-7)
+#define PCRE_ERROR_MATCHLIMIT       (-8)
+#define PCRE_ERROR_CALLOUT          (-9)  /* Never used by PCRE itself */
+#define PCRE_ERROR_BADUTF8         (-10)  /* Same for 8/16 */
+#define PCRE_ERROR_BADUTF16        (-10)  /* Same for 8/16 */
+#define PCRE_ERROR_BADUTF8_OFFSET  (-11)  /* Same for 8/16 */
+#define PCRE_ERROR_BADUTF16_OFFSET (-11)  /* Same for 8/16 */
+#define PCRE_ERROR_PARTIAL         (-12)
+#define PCRE_ERROR_BADPARTIAL      (-13)
+#define PCRE_ERROR_INTERNAL        (-14)
+#define PCRE_ERROR_BADCOUNT        (-15)
+#define PCRE_ERROR_DFA_UITEM       (-16)
+#define PCRE_ERROR_DFA_UCOND       (-17)
+#define PCRE_ERROR_DFA_UMLIMIT     (-18)
+#define PCRE_ERROR_DFA_WSSIZE      (-19)
+#define PCRE_ERROR_DFA_RECURSE     (-20)
+#define PCRE_ERROR_RECURSIONLIMIT  (-21)
+#define PCRE_ERROR_NULLWSLIMIT     (-22)  /* No longer actually used */
+#define PCRE_ERROR_BADNEWLINE      (-23)
+#define PCRE_ERROR_BADOFFSET       (-24)
+#define PCRE_ERROR_SHORTUTF8       (-25)
+#define PCRE_ERROR_SHORTUTF16      (-25)  /* Same for 8/16 */
+#define PCRE_ERROR_RECURSELOOP     (-26)
+#define PCRE_ERROR_JIT_STACKLIMIT  (-27)
+#define PCRE_ERROR_BADMODE         (-28)
+#define PCRE_ERROR_BADENDIANNESS   (-29)
 
 /* Specific error codes for UTF-8 validity checks */
 
@@ -196,6 +205,14 @@ compiling). */
 #define PCRE_UTF8_ERR20             20
 #define PCRE_UTF8_ERR21             21
 
+/* Specific error codes for UTF-16 validity checks */
+
+#define PCRE_UTF16_ERR0              0
+#define PCRE_UTF16_ERR1              1
+#define PCRE_UTF16_ERR2              2
+#define PCRE_UTF16_ERR3              3
+#define PCRE_UTF16_ERR4              4
+
 /* Request types for pcre_fullinfo() */
 
 #define PCRE_INFO_OPTIONS            0
@@ -231,13 +248,15 @@ compatible. */
 #define PCRE_CONFIG_MATCH_LIMIT_RECURSION   7
 #define PCRE_CONFIG_BSR                     8
 #define PCRE_CONFIG_JIT                     9
+#define PCRE_CONFIG_UTF16                  10
+#define PCRE_CONFIG_JITTARGET              11
 
 /* Request types for pcre_study(). Do not re-arrange, in order to remain
 compatible. */
 
 #define PCRE_STUDY_JIT_COMPILE            0x0001
 
-/* Bit flags for the pcre_extra structure. Do not re-arrange or redefine
+/* Bit flags for the pcre[16]_extra structure. Do not re-arrange or redefine
 these bits, just add new ones on the end, in order to remain compatible. */
 
 #define PCRE_EXTRA_STUDY_DATA             0x0001
@@ -253,9 +272,26 @@ these bits, just add new ones on the end, in order to remain compatible. */
 struct real_pcre;                 /* declaration; the definition is private  */
 typedef struct real_pcre pcre;
 
+struct real_pcre16;               /* declaration; the definition is private  */
+typedef struct real_pcre16 pcre16;
+
 struct real_pcre_jit_stack;       /* declaration; the definition is private  */
 typedef struct real_pcre_jit_stack pcre_jit_stack;
 
+struct real_pcre16_jit_stack;     /* declaration; the definition is private  */
+typedef struct real_pcre16_jit_stack pcre16_jit_stack;
+
+/* If PCRE is compiled with 16 bit character support, PCRE_UCHAR16 must contain
+a 16 bit wide signed data type. Otherwise it can be a dummy data type since
+pcre16 functions are not implemented. There is a check for this in pcre_internal.h. */
+#ifndef PCRE_UCHAR16
+#define PCRE_UCHAR16 unsigned short
+#endif
+
+#ifndef PCRE_SPTR16
+#define PCRE_SPTR16 const PCRE_UCHAR16 *
+#endif
+
 /* When PCRE is compiled as a C++ library, the subject pointer type can be
 replaced with a custom type. For conventional use, the public interface is a
 const char *. */
@@ -279,6 +315,19 @@ typedef struct pcre_extra {
   void *executable_jit;           /* Contains a pointer to a compiled jit code */
 } pcre_extra;
 
+/* Same structure as above, but with 16 bit char pointers. */
+
+typedef struct pcre16_extra {
+  unsigned long int flags;        /* Bits for which fields are set */
+  void *study_data;               /* Opaque data from pcre_study() */
+  unsigned long int match_limit;  /* Maximum number of calls to match() */
+  void *callout_data;             /* Data passed back in callouts */
+  const unsigned char *tables;    /* Pointer to character tables */
+  unsigned long int match_limit_recursion; /* Max recursive calls to match() */
+  PCRE_UCHAR16 **mark;            /* For passing back a mark pointer */
+  void *executable_jit;           /* Contains a pointer to a compiled jit code */
+} pcre16_extra;
+
 /* The structure for passing out data via the pcre_callout_function. We use a
 structure so that new fields can be added on the end in future versions,
 without changing the API of the function, thereby allowing old clients to work
@@ -304,6 +353,28 @@ typedef struct pcre_callout_block {
   /* ------------------------------------------------------------------ */
 } pcre_callout_block;
 
+/* Same structure as above, but with 16 bit char pointers. */
+
+typedef struct pcre16_callout_block {
+  int          version;           /* Identifies version of block */
+  /* ------------------------ Version 0 ------------------------------- */
+  int          callout_number;    /* Number compiled into pattern */
+  int         *offset_vector;     /* The offset vector */
+  PCRE_SPTR16  subject;           /* The subject being matched */
+  int          subject_length;    /* The length of the subject */
+  int          start_match;       /* Offset to start of this match attempt */
+  int          current_position;  /* Where we currently are in the subject */
+  int          capture_top;       /* Max current capture */
+  int          capture_last;      /* Most recently closed capture */
+  void        *callout_data;      /* Data passed in with the call */
+  /* ------------------- Added for Version 1 -------------------------- */
+  int          pattern_position;  /* Offset to next item in the pattern */
+  int          next_item_length;  /* Length of next item in the pattern */
+  /* ------------------- Added for Version 2 -------------------------- */
+  const PCRE_UCHAR16 *mark;       /* Pointer to current mark or NULL    */
+  /* ------------------------------------------------------------------ */
+} pcre16_callout_block;
+
 /* Indirection for store get and free functions. These can be set to
 alternative malloc/free functions if required. Special ones are used in the
 non-recursive case for "frames". There is also an optional callout function
@@ -316,58 +387,114 @@ PCRE_EXP_DECL void  (*pcre_free)(void *);
 PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
 PCRE_EXP_DECL void  (*pcre_stack_free)(void *);
 PCRE_EXP_DECL int   (*pcre_callout)(pcre_callout_block *);
+
+PCRE_EXP_DECL void *(*pcre16_malloc)(size_t);
+PCRE_EXP_DECL void  (*pcre16_free)(void *);
+PCRE_EXP_DECL void *(*pcre16_stack_malloc)(size_t);
+PCRE_EXP_DECL void  (*pcre16_stack_free)(void *);
+PCRE_EXP_DECL int   (*pcre16_callout)(pcre16_callout_block *);
 #else   /* VPCOMPAT */
 PCRE_EXP_DECL void *pcre_malloc(size_t);
 PCRE_EXP_DECL void  pcre_free(void *);
 PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
 PCRE_EXP_DECL void  pcre_stack_free(void *);
 PCRE_EXP_DECL int   pcre_callout(pcre_callout_block *);
+
+PCRE_EXP_DECL void *pcre16_malloc(size_t);
+PCRE_EXP_DECL void  pcre16_free(void *);
+PCRE_EXP_DECL void *pcre16_stack_malloc(size_t);
+PCRE_EXP_DECL void  pcre16_stack_free(void *);
+PCRE_EXP_DECL int   pcre16_callout(pcre16_callout_block *);
 #endif  /* VPCOMPAT */
 
 /* User defined callback which provides a stack just before the match starts. */
 
 typedef pcre_jit_stack *(*pcre_jit_callback)(void *);
+typedef pcre16_jit_stack *(*pcre16_jit_callback)(void *);
 
 /* Exported PCRE functions */
 
 PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
                   const unsigned char *);
+PCRE_EXP_DECL pcre16 *pcre16_compile(PCRE_SPTR16, int, const char **, int *,
+                  const unsigned char *);
 PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
                   int *, const unsigned char *);
+PCRE_EXP_DECL pcre16 *pcre16_compile2(PCRE_SPTR16, int, int *, const char **,
+                  int *, const unsigned char *);
 PCRE_EXP_DECL int  pcre_config(int, void *);
+PCRE_EXP_DECL int  pcre16_config(int, void *);
 PCRE_EXP_DECL int  pcre_copy_named_substring(const pcre *, const char *,
                   int *, int, const char *, char *, int);
-PCRE_EXP_DECL int  pcre_copy_substring(const char *, int *, int, int, char *,
-                  int);
+PCRE_EXP_DECL int  pcre16_copy_named_substring(const pcre16 *, PCRE_SPTR16,
+                  int *, int, PCRE_SPTR16, PCRE_UCHAR16 *, int);
+PCRE_EXP_DECL int  pcre_copy_substring(const char *, int *, int, int,
+                  char *, int);
+PCRE_EXP_DECL int  pcre16_copy_substring(PCRE_SPTR16, int *, int, int,
+                  PCRE_UCHAR16 *, int);
 PCRE_EXP_DECL int  pcre_dfa_exec(const pcre *, const pcre_extra *,
                   const char *, int, int, int, int *, int , int *, int);
+PCRE_EXP_DECL int  pcre16_dfa_exec(const pcre16 *, const pcre16_extra *,
+                  PCRE_SPTR16, int, int, int, int *, int , int *, int);
 PCRE_EXP_DECL int  pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
                    int, int, int, int *, int);
+PCRE_EXP_DECL int  pcre16_exec(const pcre16 *, const pcre16_extra *,
+                   PCRE_SPTR16, int, int, int, int *, int);
 PCRE_EXP_DECL void pcre_free_substring(const char *);
+PCRE_EXP_DECL void pcre16_free_substring(PCRE_SPTR16);
 PCRE_EXP_DECL void pcre_free_substring_list(const char **);
+PCRE_EXP_DECL void pcre16_free_substring_list(PCRE_SPTR16 *);
 PCRE_EXP_DECL int  pcre_fullinfo(const pcre *, const pcre_extra *, int,
                   void *);
+PCRE_EXP_DECL int  pcre16_fullinfo(const pcre16 *, const pcre16_extra *, int,
+                  void *);
 PCRE_EXP_DECL int  pcre_get_named_substring(const pcre *, const char *,
                   int *, int, const char *, const char **);
+PCRE_EXP_DECL int  pcre16_get_named_substring(const pcre16 *, PCRE_SPTR16,
+                  int *, int, PCRE_SPTR16, PCRE_SPTR16 *);
 PCRE_EXP_DECL int  pcre_get_stringnumber(const pcre *, const char *);
+PCRE_EXP_DECL int  pcre16_get_stringnumber(const pcre16 *, PCRE_SPTR16);
 PCRE_EXP_DECL int  pcre_get_stringtable_entries(const pcre *, const char *,
                   char **, char **);
+PCRE_EXP_DECL int  pcre16_get_stringtable_entries(const pcre16 *, PCRE_SPTR16,
+                  PCRE_UCHAR16 **, PCRE_UCHAR16 **);
 PCRE_EXP_DECL int  pcre_get_substring(const char *, int *, int, int,
                   const char **);
+PCRE_EXP_DECL int  pcre16_get_substring(PCRE_SPTR16, int *, int, int,
+                  PCRE_SPTR16 *);
 PCRE_EXP_DECL int  pcre_get_substring_list(const char *, int *, int,
                   const char ***);
-PCRE_EXP_DECL int  pcre_info(const pcre *, int *, int *);
+PCRE_EXP_DECL int  pcre16_get_substring_list(PCRE_SPTR16, int *, int,
+                  PCRE_SPTR16 **);
 PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
+PCRE_EXP_DECL const unsigned char *pcre16_maketables(void);
 PCRE_EXP_DECL int  pcre_refcount(pcre *, int);
+PCRE_EXP_DECL int  pcre16_refcount(pcre16 *, int);
 PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
+PCRE_EXP_DECL pcre16_extra *pcre16_study(const pcre16 *, int, const char **);
 PCRE_EXP_DECL void pcre_free_study(pcre_extra *);
+PCRE_EXP_DECL void pcre16_free_study(pcre16_extra *);
 PCRE_EXP_DECL const char *pcre_version(void);
+PCRE_EXP_DECL const char *pcre16_version(void);
+
+/* Utility functions for byte order swaps. */
+PCRE_EXP_DECL int  pcre_pattern_to_host_byte_order(pcre *, pcre_extra *,
+                  const unsigned char *);
+PCRE_EXP_DECL int  pcre16_pattern_to_host_byte_order(pcre16 *, pcre16_extra *,
+                  const unsigned char *);
+PCRE_EXP_DECL int  pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *,
+                  PCRE_SPTR16, int, int *, int);
 
 /* JIT compiler related functions. */
 
 PCRE_EXP_DECL pcre_jit_stack *pcre_jit_stack_alloc(int, int);
+PCRE_EXP_DECL pcre16_jit_stack *pcre16_jit_stack_alloc(int, int);
 PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *);
-PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *, pcre_jit_callback, void *);
+PCRE_EXP_DECL void pcre16_jit_stack_free(pcre16_jit_stack *);
+PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *,
+                  pcre_jit_callback, void *);
+PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre16_extra *,
+                  pcre16_jit_callback, void *);
 
 #ifdef __cplusplus
 }  /* extern "C" */
diff --git a/usr.sbin/nginx/src/pcre/pcre_chartables.c b/usr.sbin/nginx/src/pcre/pcre_chartables.c
index 9117ae3c7fa..2a39e9ff33a 100644
--- a/usr.sbin/nginx/src/pcre/pcre_chartables.c
+++ b/usr.sbin/nginx/src/pcre/pcre_chartables.c
@@ -26,7 +26,7 @@ unit might reference this" and so it will always be supplied to the linker. */
 
 #include "pcre_internal.h"
 
-const unsigned char _pcre_default_tables[] = {
+const pcre_uint8 PRIV(default_tables)[] = {
 
 /* This table is a lower casing table. */
 
diff --git a/usr.sbin/nginx/src/pcre/pcre_compile.c b/usr.sbin/nginx/src/pcre/pcre_compile.c
index d3da5f62109..8070f510266 100644
--- a/usr.sbin/nginx/src/pcre/pcre_compile.c
+++ b/usr.sbin/nginx/src/pcre/pcre_compile.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2011 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -53,12 +53,16 @@ supporting internal functions that are not used by other modules. */
 #include "pcre_internal.h"
 
 
-/* When PCRE_DEBUG is defined, we need the pcre_printint() function, which is
-also used by pcretest. PCRE_DEBUG is not defined when building a production
-library. */
+/* When PCRE_DEBUG is defined, we need the pcre(16)_printint() function, which
+is also used by pcretest. PCRE_DEBUG is not defined when building a production
+library. We do not need to select pcre16_printint.c specially, because the
+COMPILE_PCREx macro will already be appropriately set. */
 
 #ifdef PCRE_DEBUG
-#include "pcre_printint.src"
+/* pcre_printint.c should not include any headers */
+#define PCRE_INCLUDED
+#include "pcre_printint.c"
+#undef PCRE_INCLUDED
 #endif
 
 
@@ -104,6 +108,14 @@ overrun before it actually does run off the end of the data block. */
 
 #define WORK_SIZE_SAFETY_MARGIN (100)
 
+/* Private flags added to firstchar and reqchar. */
+
+#define REQ_CASELESS   0x10000000l      /* Indicates caselessness */
+#define REQ_VARY       0x20000000l      /* Reqchar followed non-literal item */
+
+/* Repeated character flags. */
+
+#define UTF_LENGTH     0x10000000l      /* The char contains its length. */
 
 /* Table for handling escaped characters in the range '0'-'z'. Positive returns
 are simple data values; negative values are for special things like \d and so
@@ -238,7 +250,7 @@ static const char posix_names[] =
   STRING_graph0 STRING_print0 STRING_punct0 STRING_space0
   STRING_word0  STRING_xdigit;
 
-static const uschar posix_name_lengths[] = {
+static const pcre_uint8 posix_name_lengths[] = {
   5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
 
 /* Table of class bit maps for each POSIX class. Each class is formed from a
@@ -273,47 +285,101 @@ substitutes must be in the order of the names, defined above, and there are
 both positive and negative cases. NULL means no substitute. */
 
 #ifdef SUPPORT_UCP
-static const uschar *substitutes[] = {
-  (uschar *)"\\P{Nd}",    /* \D */
-  (uschar *)"\\p{Nd}",    /* \d */
-  (uschar *)"\\P{Xsp}",   /* \S */       /* NOTE: Xsp is Perl space */
-  (uschar *)"\\p{Xsp}",   /* \s */
-  (uschar *)"\\P{Xwd}",   /* \W */
-  (uschar *)"\\p{Xwd}"    /* \w */
+static const pcre_uchar string_PNd[]  = {
+  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
+  CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
+static const pcre_uchar string_pNd[]  = {
+  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
+  CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
+static const pcre_uchar string_PXsp[] = {
+  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
+  CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' };
+static const pcre_uchar string_pXsp[] = {
+  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
+  CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' };
+static const pcre_uchar string_PXwd[] = {
+  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
+  CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
+static const pcre_uchar string_pXwd[] = {
+  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
+  CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
+
+static const pcre_uchar *substitutes[] = {
+  string_PNd,           /* \D */
+  string_pNd,           /* \d */
+  string_PXsp,          /* \S */       /* NOTE: Xsp is Perl space */
+  string_pXsp,          /* \s */
+  string_PXwd,          /* \W */
+  string_pXwd           /* \w */
 };
 
-static const uschar *posix_substitutes[] = {
-  (uschar *)"\\p{L}",     /* alpha */
-  (uschar *)"\\p{Ll}",    /* lower */
-  (uschar *)"\\p{Lu}",    /* upper */
-  (uschar *)"\\p{Xan}",   /* alnum */
-  NULL,                   /* ascii */
-  (uschar *)"\\h",        /* blank */
-  NULL,                   /* cntrl */
-  (uschar *)"\\p{Nd}",    /* digit */
-  NULL,                   /* graph */
-  NULL,                   /* print */
-  NULL,                   /* punct */
-  (uschar *)"\\p{Xps}",   /* space */    /* NOTE: Xps is POSIX space */
-  (uschar *)"\\p{Xwd}",   /* word */
-  NULL,                   /* xdigit */
+static const pcre_uchar string_pL[] =   {
+  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
+  CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
+static const pcre_uchar string_pLl[] =  {
+  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
+  CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' };
+static const pcre_uchar string_pLu[] =  {
+  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
+  CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' };
+static const pcre_uchar string_pXan[] = {
+  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
+  CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' };
+static const pcre_uchar string_h[] =    {
+  CHAR_BACKSLASH, CHAR_h, '\0' };
+static const pcre_uchar string_pXps[] = {
+  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
+  CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' };
+static const pcre_uchar string_PL[] =   {
+  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
+  CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
+static const pcre_uchar string_PLl[] =  {
+  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
+  CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' };
+static const pcre_uchar string_PLu[] =  {
+  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
+  CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' };
+static const pcre_uchar string_PXan[] = {
+  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
+  CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' };
+static const pcre_uchar string_H[] =    {
+  CHAR_BACKSLASH, CHAR_H, '\0' };
+static const pcre_uchar string_PXps[] = {
+  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
+  CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' };
+
+static const pcre_uchar *posix_substitutes[] = {
+  string_pL,            /* alpha */
+  string_pLl,           /* lower */
+  string_pLu,           /* upper */
+  string_pXan,          /* alnum */
+  NULL,                 /* ascii */
+  string_h,             /* blank */
+  NULL,                 /* cntrl */
+  string_pNd,           /* digit */
+  NULL,                 /* graph */
+  NULL,                 /* print */
+  NULL,                 /* punct */
+  string_pXps,          /* space */    /* NOTE: Xps is POSIX space */
+  string_pXwd,          /* word */
+  NULL,                 /* xdigit */
   /* Negated cases */
-  (uschar *)"\\P{L}",     /* ^alpha */
-  (uschar *)"\\P{Ll}",    /* ^lower */
-  (uschar *)"\\P{Lu}",    /* ^upper */
-  (uschar *)"\\P{Xan}",   /* ^alnum */
-  NULL,                   /* ^ascii */
-  (uschar *)"\\H",        /* ^blank */
-  NULL,                   /* ^cntrl */
-  (uschar *)"\\P{Nd}",    /* ^digit */
-  NULL,                   /* ^graph */
-  NULL,                   /* ^print */
-  NULL,                   /* ^punct */
-  (uschar *)"\\P{Xps}",   /* ^space */   /* NOTE: Xps is POSIX space */
-  (uschar *)"\\P{Xwd}",   /* ^word */
-  NULL                    /* ^xdigit */
+  string_PL,            /* ^alpha */
+  string_PLl,           /* ^lower */
+  string_PLu,           /* ^upper */
+  string_PXan,          /* ^alnum */
+  NULL,                 /* ^ascii */
+  string_H,             /* ^blank */
+  NULL,                 /* ^cntrl */
+  string_PNd,           /* ^digit */
+  NULL,                 /* ^graph */
+  NULL,                 /* ^print */
+  NULL,                 /* ^punct */
+  string_PXps,          /* ^space */   /* NOTE: Xps is POSIX space */
+  string_PXwd,          /* ^word */
+  NULL                  /* ^xdigit */
 };
-#define POSIX_SUBSIZE (sizeof(posix_substitutes)/sizeof(uschar *))
+#define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(pcre_uchar *))
 #endif
 
 #define STRING(a)  # a
@@ -372,7 +438,7 @@ static const char error_texts[] =
   /* 30 */
   "unknown POSIX class name\0"
   "POSIX collating elements are not supported\0"
-  "this version of PCRE is not compiled with PCRE_UTF8 support\0"
+  "this version of PCRE is compiled without UTF support\0"
   "spare error\0"  /** DEAD **/
   "character value in \\x{...} sequence is too large\0"
   /* 35 */
@@ -395,7 +461,7 @@ static const char error_texts[] =
   "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
   /* 50 */
   "repeated subpattern is too long\0"    /** DEAD **/
-  "octal value is greater than \\377 (not in UTF-8 mode)\0"
+  "octal value is greater than \\377 in 8-bit non-UTF-8 mode\0"
   "internal error: overran compiling workspace\0"
   "internal error: previously-checked referenced subpattern not found\0"
   "DEFINE group contains more than one branch\0"
@@ -414,13 +480,15 @@ static const char error_texts[] =
   /* 65 */
   "different names for subpatterns of the same number are not allowed\0"
   "(*MARK) must have an argument\0"
-  "this version of PCRE is not compiled with PCRE_UCP support\0"
+  "this version of PCRE is not compiled with Unicode property support\0"
   "\\c must be followed by an ASCII character\0"
   "\\k is not followed by a braced, angle-bracketed, or quoted name\0"
   /* 70 */
   "internal error: unknown opcode in find_fixedlength()\0"
   "\\N is not supported in a class\0"
   "too many forward references\0"
+  "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
+  "invalid UTF-16 string\0"
   ;
 
 /* Table to identify digits and hex digits. This is used when compiling
@@ -439,12 +507,18 @@ For convenience, we use the same bit definitions as in chartables:
 
 Then we can use ctype_digit and ctype_xdigit in the code. */
 
+/* Using a simple comparison for decimal numbers rather than a memory read
+is much faster, and the resulting code is simpler (the compiler turns it
+into a subtraction and unsigned comparison). */
+
+#define IS_DIGIT(x) ((x) >= CHAR_0 && (x) <= CHAR_9)
+
 #ifndef EBCDIC
 
 /* This is the "normal" case, for ASCII systems, and EBCDIC systems running in
 UTF-8 mode. */
 
-static const unsigned char digitab[] =
+static const pcre_uint8 digitab[] =
   {
   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   8- 15 */
@@ -483,7 +557,7 @@ static const unsigned char digitab[] =
 
 /* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */
 
-static const unsigned char digitab[] =
+static const pcre_uint8 digitab[] =
   {
   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */
   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   8- 15    */
@@ -518,7 +592,7 @@ static const unsigned char digitab[] =
   0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /*  0 - 7  F0 */
   0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};/*  8 -255    */
 
-static const unsigned char ebcdic_chartab[] = { /* chartable partial dup */
+static const pcre_uint8 ebcdic_chartab[] = { /* chartable partial dup */
   0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /*   0-  7 */
   0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
   0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /*  16- 23 */
@@ -557,7 +631,7 @@ static const unsigned char ebcdic_chartab[] = { /* chartable partial dup */
 /* Definition to allow mutual recursion */
 
 static BOOL
-  compile_regex(int, uschar **, const uschar **, int *, BOOL, BOOL, int, int,
+  compile_regex(int, pcre_uchar **, const pcre_uchar **, int *, BOOL, BOOL, int, int,
     int *, int *, branch_chain *, compile_data *, int *);
 
 
@@ -604,7 +678,7 @@ Returns:  0 if all went well, else an error number
 static int
 expand_workspace(compile_data *cd)
 {
-uschar *newspace;
+pcre_uchar *newspace;
 int newsize = cd->workspace_size * 2;
 
 if (newsize > COMPILE_WORK_SIZE_MAX) newsize = COMPILE_WORK_SIZE_MAX;
@@ -612,13 +686,12 @@ if (cd->workspace_size >= COMPILE_WORK_SIZE_MAX ||
     newsize - cd->workspace_size < WORK_SIZE_SAFETY_MARGIN)
  return ERR72;
 
-newspace = (pcre_malloc)(newsize);
+newspace = (PUBL(malloc))(IN_UCHARS(newsize));
 if (newspace == NULL) return ERR21;
-
-memcpy(newspace, cd->start_workspace, cd->workspace_size);
-cd->hwm = (uschar *)newspace + (cd->hwm - cd->start_workspace);
+memcpy(newspace, cd->start_workspace, cd->workspace_size * sizeof(pcre_uchar));
+cd->hwm = (pcre_uchar *)newspace + (cd->hwm - cd->start_workspace);
 if (cd->workspace_size > COMPILE_WORK_SIZE)
-  (pcre_free)((void *)cd->start_workspace);
+  (PUBL(free))((void *)cd->start_workspace);
 cd->start_workspace = newspace;
 cd->workspace_size = newsize;
 return 0;
@@ -642,17 +715,19 @@ Returns:    TRUE or FALSE
 */
 
 static BOOL
-is_counted_repeat(const uschar *p)
+is_counted_repeat(const pcre_uchar *p)
 {
-if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
-while ((digitab[*p] & ctype_digit) != 0) p++;
+if (!IS_DIGIT(*p)) return FALSE;
+p++;
+while (IS_DIGIT(*p)) p++;
 if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
 
 if (*p++ != CHAR_COMMA) return FALSE;
 if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
 
-if ((digitab[*p++] & ctype_digit) == 0) return FALSE;
-while ((digitab[*p] & ctype_digit) != 0) p++;
+if (!IS_DIGIT(*p)) return FALSE;
+p++;
+while (IS_DIGIT(*p)) p++;
 
 return (*p == CHAR_RIGHT_CURLY_BRACKET);
 }
@@ -684,12 +759,14 @@ Returns:         zero or positive => a data character
 */
 
 static int
-check_escape(const uschar **ptrptr, int *errorcodeptr, int bracount,
+check_escape(const pcre_uchar **ptrptr, int *errorcodeptr, int bracount,
   int options, BOOL isclass)
 {
-BOOL utf8 = (options & PCRE_UTF8) != 0;
-const uschar *ptr = *ptrptr + 1;
-int c, i;
+/* PCRE_UTF16 has the same value as PCRE_UTF8. */
+BOOL utf = (options & PCRE_UTF8) != 0;
+const pcre_uchar *ptr = *ptrptr + 1;
+pcre_int32 c;
+int i;
 
 GETCHARINCTEST(c, ptr);           /* Get character value, increment pointer */
 ptr--;                            /* Set pointer back to the last byte */
@@ -703,11 +780,13 @@ in a table. A non-zero result is something that can be returned immediately.
 Otherwise further processing may be required. */
 
 #ifndef EBCDIC  /* ASCII/UTF-8 coding */
-else if (c < CHAR_0 || c > CHAR_z) {}                     /* Not alphanumeric */
+/* Not alphanumeric */
+else if (c < CHAR_0 || c > CHAR_z) {}
 else if ((i = escapes[c - CHAR_0]) != 0) c = i;
 
 #else           /* EBCDIC coding */
-else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphanumeric */
+/* Not alphanumeric */
+else if (c < 'a' || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {}
 else if ((i = escapes[c - 0x48]) != 0)  c = i;
 #endif
 
@@ -715,7 +794,7 @@ else if ((i = escapes[c - 0x48]) != 0)  c = i;
 
 else
   {
-  const uschar *oldptr;
+  const pcre_uchar *oldptr;
   BOOL braced, negated;
 
   switch (c)
@@ -733,8 +812,10 @@ else
       {
       /* In JavaScript, \u must be followed by four hexadecimal numbers.
       Otherwise it is a lowercase u letter. */
-      if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0
-           && (digitab[ptr[3]] & ctype_xdigit) != 0 && (digitab[ptr[4]] & ctype_xdigit) != 0)
+      if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
+        && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0
+        && MAX_255(ptr[3]) && (digitab[ptr[3]] & ctype_xdigit) != 0
+        && MAX_255(ptr[4]) && (digitab[ptr[4]] & ctype_xdigit) != 0)
         {
         c = 0;
         for (i = 0; i < 4; ++i)
@@ -788,9 +869,9 @@ else
 
     if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
       {
-      const uschar *p;
+      const pcre_uchar *p;
       for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
-        if (*p != CHAR_MINUS && (digitab[*p] & ctype_digit) == 0) break;
+        if (*p != CHAR_MINUS && !IS_DIGIT(*p)) break;
       if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)
         {
         c = -ESC_k;
@@ -808,12 +889,21 @@ else
       }
     else negated = FALSE;
 
+    /* The integer range is limited by the machine's int representation. */
     c = 0;
-    while ((digitab[ptr[1]] & ctype_digit) != 0)
+    while (IS_DIGIT(ptr[1]))
+      {
+      if (((unsigned int)c) > INT_MAX / 10) /* Integer overflow */
+        {
+        c = -1;
+        break;
+        }
       c = c * 10 + *(++ptr) - CHAR_0;
-
-    if (c < 0)   /* Integer overflow */
+      }
+    if (((unsigned int)c) > INT_MAX) /* Integer overflow */
       {
+      while (IS_DIGIT(ptr[1]))
+        ptr++;
       *errorcodeptr = ERR61;
       break;
       }
@@ -861,11 +951,21 @@ else
     if (!isclass)
       {
       oldptr = ptr;
+      /* The integer range is limited by the machine's int representation. */
       c -= CHAR_0;
-      while ((digitab[ptr[1]] & ctype_digit) != 0)
+      while (IS_DIGIT(ptr[1]))
+        {
+        if (((unsigned int)c) > INT_MAX / 10) /* Integer overflow */
+          {
+          c = -1;
+          break;
+          }
         c = c * 10 + *(++ptr) - CHAR_0;
-      if (c < 0)    /* Integer overflow */
+        }
+      if (((unsigned int)c) > INT_MAX) /* Integer overflow */
         {
+        while (IS_DIGIT(ptr[1]))
+          ptr++;
         *errorcodeptr = ERR61;
         break;
         }
@@ -891,26 +991,29 @@ else
     /* \0 always starts an octal number, but we may drop through to here with a
     larger first octal digit. The original code used just to take the least
     significant 8 bits of octal numbers (I think this is what early Perls used
-    to do). Nowadays we allow for larger numbers in UTF-8 mode, but no more
-    than 3 octal digits. */
+    to do). Nowadays we allow for larger numbers in UTF-8 mode and 16-bit mode,
+    but no more than 3 octal digits. */
 
     case CHAR_0:
     c -= CHAR_0;
     while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)
         c = c * 8 + *(++ptr) - CHAR_0;
-    if (!utf8 && c > 255) *errorcodeptr = ERR51;
+#ifdef COMPILE_PCRE8
+    if (!utf && c > 0xff) *errorcodeptr = ERR51;
+#endif
     break;
 
     /* \x is complicated. \x{ddd} is a character number which can be greater
-    than 0xff in utf8 mode, but only if the ddd are hex digits. If not, { is
-    treated as a data character. */
+    than 0xff in utf or non-8bit mode, but only if the ddd are hex digits.
+    If not, { is treated as a data character. */
 
     case CHAR_x:
     if ((options & PCRE_JAVASCRIPT_COMPAT) != 0)
       {
       /* In JavaScript, \x must be followed by two hexadecimal numbers.
       Otherwise it is a lowercase x letter. */
-      if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0)
+      if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
+        && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0)
         {
         c = 0;
         for (i = 0; i < 2; ++i)
@@ -930,15 +1033,13 @@ else
 
     if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
       {
-      const uschar *pt = ptr + 2;
-      int count = 0;
+      const pcre_uchar *pt = ptr + 2;
 
       c = 0;
-      while ((digitab[*pt] & ctype_xdigit) != 0)
+      while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0)
         {
         register int cc = *pt++;
         if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
-        count++;
 
 #ifndef EBCDIC  /* ASCII/UTF-8 coding */
         if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
@@ -947,11 +1048,25 @@ else
         if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
         c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
 #endif
+
+#ifdef COMPILE_PCRE8
+        if (c > (utf ? 0x10ffff : 0xff)) { c = -1; break; }
+#else
+#ifdef COMPILE_PCRE16
+        if (c > (utf ? 0x10ffff : 0xffff)) { c = -1; break; }
+#endif
+#endif
+        }
+
+      if (c < 0)
+        {
+        while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0) pt++;
+        *errorcodeptr = ERR34;
         }
 
       if (*pt == CHAR_RIGHT_CURLY_BRACKET)
         {
-        if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34;
+        if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
         ptr = pt;
         break;
         }
@@ -963,7 +1078,7 @@ else
     /* Read just a single-byte hex-defined char */
 
     c = 0;
-    while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)
+    while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)
       {
       int cc;                                  /* Some compilers don't like */
       cc = *(++ptr);                           /* ++ in initializers */
@@ -1061,11 +1176,11 @@ Returns:         type value from ucp_type_table, or -1 for an invalid type
 */
 
 static int
-get_ucp(const uschar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)
+get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)
 {
 int c, i, bot, top;
-const uschar *ptr = *ptrptr;
-char name[32];
+const pcre_uchar *ptr = *ptrptr;
+pcre_uchar name[32];
 
 c = *(++ptr);
 if (c == 0) goto ERROR_RETURN;
@@ -1082,7 +1197,7 @@ if (c == CHAR_LEFT_CURLY_BRACKET)
     *negptr = TRUE;
     ptr++;
     }
-  for (i = 0; i < (int)sizeof(name) - 1; i++)
+  for (i = 0; i < (int)(sizeof(name) / sizeof(pcre_uchar)) - 1; i++)
     {
     c = *(++ptr);
     if (c == 0) goto ERROR_RETURN;
@@ -1106,16 +1221,16 @@ else
 /* Search for a recognized property name using binary chop */
 
 bot = 0;
-top = _pcre_utt_size;
+top = PRIV(utt_size);
 
 while (bot < top)
   {
   i = (bot + top) >> 1;
-  c = strcmp(name, _pcre_utt_names + _pcre_utt[i].name_offset);
+  c = STRCMP_UC_C8(name, PRIV(utt_names) + PRIV(utt)[i].name_offset);
   if (c == 0)
     {
-    *dptr = _pcre_utt[i].value;
-    return _pcre_utt[i].type;
+    *dptr = PRIV(utt)[i].value;
+    return PRIV(utt)[i].type;
     }
   if (c > 0) bot = i + 1; else top = i;
   }
@@ -1153,8 +1268,8 @@ Returns:         pointer to '}' on success;
                  current ptr on error, with errorcodeptr set non-zero
 */
 
-static const uschar *
-read_repeat_counts(const uschar *p, int *minp, int *maxp, int *errorcodeptr)
+static const pcre_uchar *
+read_repeat_counts(const pcre_uchar *p, int *minp, int *maxp, int *errorcodeptr)
 {
 int min = 0;
 int max = -1;
@@ -1162,7 +1277,7 @@ int max = -1;
 /* Read the minimum value and do a paranoid check: a negative value indicates
 an integer overflow. */
 
-while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - CHAR_0;
+while (IS_DIGIT(*p)) min = min * 10 + *p++ - CHAR_0;
 if (min < 0 || min > 65535)
   {
   *errorcodeptr = ERR5;
@@ -1177,7 +1292,7 @@ if (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else
   if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
     {
     max = 0;
-    while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - CHAR_0;
+    while(IS_DIGIT(*p)) max = max * 10 + *p++ - CHAR_0;
     if (max < 0 || max > 65535)
       {
       *errorcodeptr = ERR5;
@@ -1232,17 +1347,17 @@ Arguments:
   name         name to seek, or NULL if seeking a numbered subpattern
   lorn         name length, or subpattern number if name is NULL
   xmode        TRUE if we are in /x mode
-  utf8         TRUE if we are in UTF-8 mode
+  utf          TRUE if we are in UTF-8 / UTF-16 mode
   count        pointer to the current capturing subpattern number (updated)
 
 Returns:       the number of the named subpattern, or -1 if not found
 */
 
 static int
-find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn,
-  BOOL xmode, BOOL utf8, int *count)
+find_parens_sub(pcre_uchar **ptrptr, compile_data *cd, const pcre_uchar *name, int lorn,
+  BOOL xmode, BOOL utf, int *count)
 {
-uschar *ptr = *ptrptr;
+pcre_uchar *ptr = *ptrptr;
 int start_count = *count;
 int hwm_count = start_count;
 BOOL dup_parens = FALSE;
@@ -1309,7 +1424,7 @@ if (ptr[0] == CHAR_LEFT_PARENTHESIS)
         ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)
       {
       int term;
-      const uschar *thisname;
+      const pcre_uchar *thisname;
       *count += 1;
       if (name == NULL && *count == lorn) return *count;
       term = *ptr++;
@@ -1317,7 +1432,7 @@ if (ptr[0] == CHAR_LEFT_PARENTHESIS)
       thisname = ptr;
       while (*ptr != term) ptr++;
       if (name != NULL && lorn == ptr - thisname &&
-          strncmp((const char *)name, (const char *)thisname, lorn) == 0)
+          STRNCMP_UC_UC(name, thisname, lorn) == 0)
         return *count;
       term++;
       }
@@ -1360,7 +1475,7 @@ for (; ptr < cd->end_pattern; ptr++)
         {
         if (ptr[2] == CHAR_E)
           ptr+= 2;
-        else if (strncmp((const char *)ptr+2,
+        else if (STRNCMP_UC_C8(ptr + 2,
                  STR_Q STR_BACKSLASH STR_E, 3) == 0)
           ptr += 4;
         else
@@ -1408,8 +1523,8 @@ for (; ptr < cd->end_pattern; ptr++)
       {
       if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }
       ptr++;
-#ifdef SUPPORT_UTF8
-      if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++;
+#ifdef SUPPORT_UTF
+      if (utf) FORWARDCHAR(ptr);
 #endif
       }
     if (*ptr == 0) goto FAIL_EXIT;
@@ -1420,7 +1535,7 @@ for (; ptr < cd->end_pattern; ptr++)
 
   if (*ptr == CHAR_LEFT_PARENTHESIS)
     {
-    int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf8, count);
+    int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, count);
     if (rc > 0) return rc;
     if (*ptr == 0) goto FAIL_EXIT;
     }
@@ -1466,16 +1581,16 @@ Arguments:
   name         name to seek, or NULL if seeking a numbered subpattern
   lorn         name length, or subpattern number if name is NULL
   xmode        TRUE if we are in /x mode
-  utf8         TRUE if we are in UTF-8 mode
+  utf          TRUE if we are in UTF-8 / UTF-16 mode
 
 Returns:       the number of the found subpattern, or -1 if not found
 */
 
 static int
-find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode,
-  BOOL utf8)
+find_parens(compile_data *cd, const pcre_uchar *name, int lorn, BOOL xmode,
+  BOOL utf)
 {
-uschar *ptr = (uschar *)cd->start_pattern;
+pcre_uchar *ptr = (pcre_uchar *)cd->start_pattern;
 int count = 0;
 int rc;
 
@@ -1486,7 +1601,7 @@ matching closing parens. That is why we have to have a loop. */
 
 for (;;)
   {
-  rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf8, &count);
+  rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, &count);
   if (rc > 0 || *ptr++ == 0) break;
   }
 
@@ -1513,8 +1628,8 @@ Arguments:
 Returns:       pointer to the first significant opcode
 */
 
-static const uschar*
-first_significant_code(const uschar *code, BOOL skipassert)
+static const pcre_uchar*
+first_significant_code(const pcre_uchar *code, BOOL skipassert)
 {
 for (;;)
   {
@@ -1525,7 +1640,7 @@ for (;;)
     case OP_ASSERTBACK_NOT:
     if (!skipassert) return code;
     do code += GET(code, 1); while (*code == OP_ALT);
-    code += _pcre_OP_lengths[*code];
+    code += PRIV(OP_lengths)[*code];
     break;
 
     case OP_WORD_BOUNDARY:
@@ -1539,7 +1654,7 @@ for (;;)
     case OP_RREF:
     case OP_NRREF:
     case OP_DEF:
-    code += _pcre_OP_lengths[*code];
+    code += PRIV(OP_lengths)[*code];
     break;
 
     default:
@@ -1569,7 +1684,7 @@ and doing the check at the end; a flag specifies which mode we are running in.
 
 Arguments:
   code     points to the start of the pattern (the bracket)
-  utf8     TRUE in UTF-8 mode
+  utf      TRUE in UTF-8 / UTF-16 mode
   atend    TRUE if called when the pattern is complete
   cd       the "compile data" structure
 
@@ -1581,12 +1696,12 @@ Returns:   the fixed length,
 */
 
 static int
-find_fixedlength(uschar *code, BOOL utf8, BOOL atend, compile_data *cd)
+find_fixedlength(pcre_uchar *code, BOOL utf, BOOL atend, compile_data *cd)
 {
 int length = -1;
 
 register int branchlength = 0;
-register uschar *cc = code + 1 + LINK_SIZE;
+register pcre_uchar *cc = code + 1 + LINK_SIZE;
 
 /* Scan along the opcodes for this branch. If we get to the end of the
 branch, check the length against that of the other branches. */
@@ -1594,8 +1709,9 @@ branch, check the length against that of the other branches. */
 for (;;)
   {
   int d;
-  uschar *ce, *cs;
+  pcre_uchar *ce, *cs;
   register int op = *cc;
+
   switch (op)
     {
     /* We only need to continue for OP_CBRA (normal capturing bracket) and
@@ -1608,7 +1724,7 @@ for (;;)
     case OP_ONCE:
     case OP_ONCE_NC:
     case OP_COND:
-    d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), utf8, atend, cd);
+    d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cd);
     if (d < 0) return d;
     branchlength += d;
     do cc += GET(cc, 1); while (*cc == OP_ALT);
@@ -1639,10 +1755,10 @@ for (;;)
 
     case OP_RECURSE:
     if (!atend) return -3;
-    cs = ce = (uschar *)cd->start_code + GET(cc, 1);  /* Start subpattern */
-    do ce += GET(ce, 1); while (*ce == OP_ALT);       /* End subpattern */
-    if (cc > cs && cc < ce) return -1;                /* Recursion */
-    d = find_fixedlength(cs + 2, utf8, atend, cd);
+    cs = ce = (pcre_uchar *)cd->start_code + GET(cc, 1);  /* Start subpattern */
+    do ce += GET(ce, 1); while (*ce == OP_ALT);           /* End subpattern */
+    if (cc > cs && cc < ce) return -1;                    /* Recursion */
+    d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cd);
     if (d < 0) return d;
     branchlength += d;
     cc += 1 + LINK_SIZE;
@@ -1655,7 +1771,8 @@ for (;;)
     case OP_ASSERTBACK:
     case OP_ASSERTBACK_NOT:
     do cc += GET(cc, 1); while (*cc == OP_ALT);
-    /* Fall through */
+    cc += PRIV(OP_lengths)[*cc];
+    break;
 
     /* Skip over things that don't match chars */
 
@@ -1663,7 +1780,7 @@ for (;;)
     case OP_PRUNE_ARG:
     case OP_SKIP_ARG:
     case OP_THEN_ARG:
-    cc += cc[1] + _pcre_OP_lengths[*cc];
+    cc += cc[1] + PRIV(OP_lengths)[*cc];
     break;
 
     case OP_CALLOUT:
@@ -1690,7 +1807,7 @@ for (;;)
     case OP_SOM:
     case OP_THEN:
     case OP_WORD_BOUNDARY:
-    cc += _pcre_OP_lengths[*cc];
+    cc += PRIV(OP_lengths)[*cc];
     break;
 
     /* Handle literal characters */
@@ -1701,8 +1818,8 @@ for (;;)
     case OP_NOTI:
     branchlength++;
     cc += 2;
-#ifdef SUPPORT_UTF8
-    if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
+#ifdef SUPPORT_UTF
+    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
 #endif
     break;
 
@@ -1714,16 +1831,16 @@ for (;;)
     case OP_NOTEXACT:
     case OP_NOTEXACTI:
     branchlength += GET2(cc,1);
-    cc += 4;
-#ifdef SUPPORT_UTF8
-    if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
+    cc += 2 + IMM2_SIZE;
+#ifdef SUPPORT_UTF
+    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
 #endif
     break;
 
     case OP_TYPEEXACT:
     branchlength += GET2(cc,1);
-    if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2;
-    cc += 4;
+    if (cc[1 + IMM2_SIZE] == OP_PROP || cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2;
+    cc += 1 + IMM2_SIZE + 1;
     break;
 
     /* Handle single-char matchers */
@@ -1757,15 +1874,15 @@ for (;;)
 
     /* Check a class for variable quantification */
 
-#ifdef SUPPORT_UTF8
+#if defined SUPPORT_UTF || defined COMPILE_PCRE16
     case OP_XCLASS:
-    cc += GET(cc, 1) - 33;
+    cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS];
     /* Fall through */
 #endif
 
     case OP_CLASS:
     case OP_NCLASS:
-    cc += 33;
+    cc += PRIV(OP_lengths)[OP_CLASS];
 
     switch (*cc)
       {
@@ -1779,9 +1896,9 @@ for (;;)
 
       case OP_CRRANGE:
       case OP_CRMINRANGE:
-      if (GET2(cc,1) != GET2(cc,3)) return -1;
+      if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) return -1;
       branchlength += GET2(cc,1);
-      cc += 5;
+      cc += 1 + 2 * IMM2_SIZE;
       break;
 
       default:
@@ -1896,14 +2013,14 @@ length.
 
 Arguments:
   code        points to start of expression
-  utf8        TRUE in UTF-8 mode
+  utf         TRUE in UTF-8 / UTF-16 mode
   number      the required bracket number or negative to find a lookbehind
 
 Returns:      pointer to the opcode for the bracket, or NULL if not found
 */
 
-const uschar *
-_pcre_find_bracket(const uschar *code, BOOL utf8, int number)
+const pcre_uchar *
+PRIV(find_bracket)(const pcre_uchar *code, BOOL utf, int number)
 {
 for (;;)
   {
@@ -1921,8 +2038,8 @@ for (;;)
 
   else if (c == OP_REVERSE)
     {
-    if (number < 0) return (uschar *)code;
-    code += _pcre_OP_lengths[c];
+    if (number < 0) return (pcre_uchar *)code;
+    code += PRIV(OP_lengths)[c];
     }
 
   /* Handle capturing bracket */
@@ -1931,8 +2048,8 @@ for (;;)
            c == OP_CBRAPOS || c == OP_SCBRAPOS)
     {
     int n = GET2(code, 1+LINK_SIZE);
-    if (n == number) return (uschar *)code;
-    code += _pcre_OP_lengths[c];
+    if (n == number) return (pcre_uchar *)code;
+    code += PRIV(OP_lengths)[c];
     }
 
   /* Otherwise, we can get the item's length from the table, except that for
@@ -1960,7 +2077,8 @@ for (;;)
       case OP_TYPEMINUPTO:
       case OP_TYPEEXACT:
       case OP_TYPEPOSUPTO:
-      if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
+      if (code[1 + IMM2_SIZE] == OP_PROP
+        || code[1 + IMM2_SIZE] == OP_NOTPROP) code += 2;
       break;
 
       case OP_MARK:
@@ -1976,14 +2094,14 @@ for (;;)
 
     /* Add in the fixed length from the table */
 
-    code += _pcre_OP_lengths[c];
+    code += PRIV(OP_lengths)[c];
 
   /* In UTF-8 mode, opcodes that are followed by a character may be followed by
   a multi-byte character. The length in the table is a minimum, so we have to
   arrange to skip the extra bytes. */
 
-#ifdef SUPPORT_UTF8
-    if (utf8) switch(c)
+#ifdef SUPPORT_UTF
+    if (utf) switch(c)
       {
       case OP_CHAR:
       case OP_CHARI:
@@ -2013,11 +2131,11 @@ for (;;)
       case OP_MINQUERYI:
       case OP_POSQUERY:
       case OP_POSQUERYI:
-      if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
+      if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
       break;
       }
 #else
-    (void)(utf8);  /* Keep compiler happy by referencing function argument */
+    (void)(utf);  /* Keep compiler happy by referencing function argument */
 #endif
     }
   }
@@ -2034,13 +2152,13 @@ instance of OP_RECURSE.
 
 Arguments:
   code        points to start of expression
-  utf8        TRUE in UTF-8 mode
+  utf         TRUE in UTF-8 / UTF-16 mode
 
 Returns:      pointer to the opcode for OP_RECURSE, or NULL if not found
 */
 
-static const uschar *
-find_recurse(const uschar *code, BOOL utf8)
+static const pcre_uchar *
+find_recurse(const pcre_uchar *code, BOOL utf)
 {
 for (;;)
   {
@@ -2079,7 +2197,8 @@ for (;;)
       case OP_TYPEUPTO:
       case OP_TYPEMINUPTO:
       case OP_TYPEEXACT:
-      if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
+      if (code[1 + IMM2_SIZE] == OP_PROP
+        || code[1 + IMM2_SIZE] == OP_NOTPROP) code += 2;
       break;
 
       case OP_MARK:
@@ -2095,14 +2214,14 @@ for (;;)
 
     /* Add in the fixed length from the table */
 
-    code += _pcre_OP_lengths[c];
+    code += PRIV(OP_lengths)[c];
 
     /* In UTF-8 mode, opcodes that are followed by a character may be followed
     by a multi-byte character. The length in the table is a minimum, so we have
     to arrange to skip the extra bytes. */
 
-#ifdef SUPPORT_UTF8
-    if (utf8) switch(c)
+#ifdef SUPPORT_UTF
+    if (utf) switch(c)
       {
       case OP_CHAR:
       case OP_CHARI:
@@ -2132,11 +2251,11 @@ for (;;)
       case OP_MINQUERYI:
       case OP_POSQUERY:
       case OP_POSQUERYI:
-      if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
+      if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
       break;
       }
 #else
-    (void)(utf8);  /* Keep compiler happy by referencing function argument */
+    (void)(utf);  /* Keep compiler happy by referencing function argument */
 #endif
     }
   }
@@ -2159,22 +2278,22 @@ bracket whose current branch will already have been scanned.
 Arguments:
   code        points to start of search
   endcode     points to where to stop
-  utf8        TRUE if in UTF8 mode
+  utf         TRUE if in UTF-8 / UTF-16 mode
   cd          contains pointers to tables etc.
 
 Returns:      TRUE if what is matched could be empty
 */
 
 static BOOL
-could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8,
-  compile_data *cd)
+could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
+  BOOL utf, compile_data *cd)
 {
 register int c;
-for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE);
+for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
      code < endcode;
-     code = first_significant_code(code + _pcre_OP_lengths[c], TRUE))
+     code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))
   {
-  const uschar *ccode;
+  const pcre_uchar *ccode;
 
   c = *code;
 
@@ -2197,7 +2316,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE);
 
   if (c == OP_RECURSE)
     {
-    const uschar *scode;
+    const pcre_uchar *scode;
     BOOL empty_branch;
 
     /* Test for forward reference */
@@ -2215,7 +2334,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE);
 
     do
       {
-      if (could_be_empty_branch(scode, endcode, utf8, cd))
+      if (could_be_empty_branch(scode, endcode, utf, cd))
         {
         empty_branch = TRUE;
         break;
@@ -2233,7 +2352,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE);
   if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO ||
       c == OP_BRAPOSZERO)
     {
-    code += _pcre_OP_lengths[c];
+    code += PRIV(OP_lengths)[c];
     do code += GET(code, 1); while (*code == OP_ALT);
     c = *code;
     continue;
@@ -2271,7 +2390,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE);
       empty_branch = FALSE;
       do
         {
-        if (!empty_branch && could_be_empty_branch(code, endcode, utf8, cd))
+        if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd))
           empty_branch = TRUE;
         code += GET(code, 1);
         }
@@ -2289,11 +2408,11 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE);
     {
     /* Check for quantifiers after a class. XCLASS is used for classes that
     cannot be represented just by a bit map. This includes negated single
-    high-valued characters. The length in _pcre_OP_lengths[] is zero; the
+    high-valued characters. The length in PRIV(OP_lengths)[] is zero; the
     actual length is stored in the compiled code, so we must update "code"
     here. */
 
-#ifdef SUPPORT_UTF8
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
     case OP_XCLASS:
     ccode = code += GET(code, 1);
     goto CHECK_CLASS_REPEAT;
@@ -2301,9 +2420,9 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE);
 
     case OP_CLASS:
     case OP_NCLASS:
-    ccode = code + 33;
+    ccode = code + PRIV(OP_lengths)[OP_CLASS];
 
-#ifdef SUPPORT_UTF8
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
     CHECK_CLASS_REPEAT:
 #endif
 
@@ -2376,7 +2495,8 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE);
     case OP_TYPEUPTO:
     case OP_TYPEMINUPTO:
     case OP_TYPEPOSUPTO:
-    if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2;
+    if (code[1 + IMM2_SIZE] == OP_PROP
+      || code[1 + IMM2_SIZE] == OP_NOTPROP) code += 2;
     break;
 
     /* End of branch */
@@ -2391,7 +2511,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE);
     /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,
     MINUPTO, and POSUPTO may be followed by a multibyte character */
 
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
     case OP_STAR:
     case OP_STARI:
     case OP_MINSTAR:
@@ -2404,7 +2524,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE);
     case OP_MINQUERYI:
     case OP_POSQUERY:
     case OP_POSQUERYI:
-    if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f];
+    if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
     break;
 
     case OP_UPTO:
@@ -2413,7 +2533,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE);
     case OP_MINUPTOI:
     case OP_POSUPTO:
     case OP_POSUPTOI:
-    if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];
+    if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
     break;
 #endif
 
@@ -2457,19 +2577,19 @@ Arguments:
   code        points to start of the recursion
   endcode     points to where to stop (current RECURSE item)
   bcptr       points to the chain of current (unclosed) branch starts
-  utf8        TRUE if in UTF-8 mode
+  utf         TRUE if in UTF-8 / UTF-16 mode
   cd          pointers to tables etc
 
 Returns:      TRUE if what is matched could be empty
 */
 
 static BOOL
-could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,
-  BOOL utf8, compile_data *cd)
+could_be_empty(const pcre_uchar *code, const pcre_uchar *endcode,
+  branch_chain *bcptr, BOOL utf, compile_data *cd)
 {
 while (bcptr != NULL && bcptr->current_branch >= code)
   {
-  if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8, cd))
+  if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd))
     return FALSE;
   bcptr = bcptr->outer;
   }
@@ -2521,7 +2641,7 @@ Returns:   TRUE or FALSE
 */
 
 static BOOL
-check_posix_syntax(const uschar *ptr, const uschar **endptr)
+check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr)
 {
 int terminator;          /* Don't combine these lines; the Solaris cc */
 terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
@@ -2565,14 +2685,14 @@ Returns:     a value representing the name, or -1 if unknown
 */
 
 static int
-check_posix_name(const uschar *ptr, int len)
+check_posix_name(const pcre_uchar *ptr, int len)
 {
 const char *pn = posix_names;
 register int yield = 0;
 while (posix_name_lengths[yield] != 0)
   {
   if (len == posix_name_lengths[yield] &&
-    strncmp((const char *)ptr, pn, len) == 0) return yield;
+    STRNCMP_UC_C8(ptr, pn, len) == 0) return yield;
   pn += posix_name_lengths[yield] + 1;
   yield++;
   }
@@ -2604,7 +2724,7 @@ value in the reference (which is a group number).
 Arguments:
   group      points to the start of the group
   adjust     the amount by which the group is to be moved
-  utf8       TRUE in UTF-8 mode
+  utf        TRUE in UTF-8 / UTF-16 mode
   cd         contains pointers to tables etc.
   save_hwm   the hwm forward reference pointer at the start of the group
 
@@ -2612,15 +2732,15 @@ Returns:     nothing
 */
 
 static void
-adjust_recurse(uschar *group, int adjust, BOOL utf8, compile_data *cd,
-  uschar *save_hwm)
+adjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd,
+  pcre_uchar *save_hwm)
 {
-uschar *ptr = group;
+pcre_uchar *ptr = group;
 
-while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL)
+while ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL)
   {
   int offset;
-  uschar *hc;
+  pcre_uchar *hc;
 
   /* See if this recursion is on the forward reference list. If so, adjust the
   reference. */
@@ -2665,14 +2785,14 @@ Arguments:
 Returns:         new code pointer
 */
 
-static uschar *
-auto_callout(uschar *code, const uschar *ptr, compile_data *cd)
+static pcre_uchar *
+auto_callout(pcre_uchar *code, const pcre_uchar *ptr, compile_data *cd)
 {
 *code++ = OP_CALLOUT;
 *code++ = 255;
 PUT(code, 0, (int)(ptr - cd->start_pattern));  /* Pattern offset */
 PUT(code, LINK_SIZE, 0);                       /* Default length */
-return code + 2*LINK_SIZE;
+return code + 2 * LINK_SIZE;
 }
 
 
@@ -2694,7 +2814,7 @@ Returns:             nothing
 */
 
 static void
-complete_callout(uschar *previous_callout, const uschar *ptr, compile_data *cd)
+complete_callout(pcre_uchar *previous_callout, const pcre_uchar *ptr, compile_data *cd)
 {
 int length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2));
 PUT(previous_callout, 2 + LINK_SIZE, length);
@@ -2777,7 +2897,7 @@ switch(ptype)
           prop->chartype == ucp_Lt) == negated;
 
   case PT_GC:
-  return (pdata == _pcre_ucp_gentype[prop->chartype]) == negated;
+  return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
 
   case PT_PC:
   return (pdata == prop->chartype) == negated;
@@ -2788,23 +2908,23 @@ switch(ptype)
   /* These are specials */
 
   case PT_ALNUM:
-  return (_pcre_ucp_gentype[prop->chartype] == ucp_L ||
-          _pcre_ucp_gentype[prop->chartype] == ucp_N) == negated;
+  return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+          PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
 
   case PT_SPACE:    /* Perl space */
-  return (_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
+  return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
           c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
           == negated;
 
   case PT_PXSPACE:  /* POSIX space */
-  return (_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
+  return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
           c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
           c == CHAR_FF || c == CHAR_CR)
           == negated;
 
   case PT_WORD:
-  return (_pcre_ucp_gentype[prop->chartype] == ucp_L ||
-          _pcre_ucp_gentype[prop->chartype] == ucp_N ||
+  return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+          PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
           c == CHAR_UNDERSCORE) == negated;
   }
 return FALSE;
@@ -2823,7 +2943,7 @@ sense to automatically possessify the repeated item.
 
 Arguments:
   previous      pointer to the repeated opcode
-  utf8          TRUE in UTF-8 mode
+  utf           TRUE in UTF-8 / UTF-16 mode
   ptr           next character in pattern
   options       options bits
   cd            contains pointers to tables etc.
@@ -2832,10 +2952,10 @@ Returns:        TRUE if possessifying is wanted
 */
 
 static BOOL
-check_auto_possessive(const uschar *previous, BOOL utf8, const uschar *ptr,
-  int options, compile_data *cd)
+check_auto_possessive(const pcre_uchar *previous, BOOL utf,
+  const pcre_uchar *ptr, int options, compile_data *cd)
 {
-int c, next;
+pcre_int32 c, next;
 int op_code = *previous++;
 
 /* Skip whitespace and comments in extended mode */
@@ -2844,7 +2964,7 @@ if ((options & PCRE_EXTENDED) != 0)
   {
   for (;;)
     {
-    while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
+    while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
     if (*ptr == CHAR_NUMBER_SIGN)
       {
       ptr++;
@@ -2852,8 +2972,8 @@ if ((options & PCRE_EXTENDED) != 0)
         {
         if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
         ptr++;
-#ifdef SUPPORT_UTF8
-        if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++;
+#ifdef SUPPORT_UTF
+        if (utf) FORWARDCHAR(ptr);
 #endif
         }
       }
@@ -2871,15 +2991,13 @@ if (*ptr == CHAR_BACKSLASH)
   if (temperrorcode != 0) return FALSE;
   ptr++;    /* Point after the escape sequence */
   }
-
-else if ((cd->ctypes[*ptr] & ctype_meta) == 0)
+else if (!MAX_255(*ptr) || (cd->ctypes[*ptr] & ctype_meta) == 0)
   {
-#ifdef SUPPORT_UTF8
-  if (utf8) { GETCHARINC(next, ptr); } else
+#ifdef SUPPORT_UTF
+  if (utf) { GETCHARINC(next, ptr); } else
 #endif
   next = *ptr++;
   }
-
 else return FALSE;
 
 /* Skip whitespace and comments in extended mode */
@@ -2888,7 +3006,7 @@ if ((options & PCRE_EXTENDED) != 0)
   {
   for (;;)
     {
-    while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
+    while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
     if (*ptr == CHAR_NUMBER_SIGN)
       {
       ptr++;
@@ -2896,8 +3014,8 @@ if ((options & PCRE_EXTENDED) != 0)
         {
         if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
         ptr++;
-#ifdef SUPPORT_UTF8
-        if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++;
+#ifdef SUPPORT_UTF
+        if (utf) FORWARDCHAR(ptr);
 #endif
         }
       }
@@ -2908,7 +3026,7 @@ if ((options & PCRE_EXTENDED) != 0)
 /* If the next thing is itself optional, we have to give up. */
 
 if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
-  strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
+  STRNCMP_UC_C8(ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
     return FALSE;
 
 /* Now compare the next item with the previous opcode. First, handle cases when
@@ -2917,7 +3035,7 @@ the next item is a character. */
 if (next >= 0) switch(op_code)
   {
   case OP_CHAR:
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
   GETCHARTEST(c, previous);
 #else
   c = *previous;
@@ -2929,14 +3047,14 @@ if (next >= 0) switch(op_code)
   high-valued characters. */
 
   case OP_CHARI:
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
   GETCHARTEST(c, previous);
 #else
   c = *previous;
 #endif
   if (c == next) return FALSE;
-#ifdef SUPPORT_UTF8
-  if (utf8)
+#ifdef SUPPORT_UTF
+  if (utf)
     {
     unsigned int othercase;
     if (next < 128) othercase = cd->fcc[next]; else
@@ -2948,8 +3066,8 @@ if (next >= 0) switch(op_code)
     return (unsigned int)c != othercase;
     }
   else
-#endif  /* SUPPORT_UTF8 */
-  return (c != cd->fcc[next]);  /* Non-UTF-8 mode */
+#endif  /* SUPPORT_UTF */
+  return (c != TABLE_GET((unsigned int)next, cd->fcc, next));  /* Non-UTF-8 mode */
 
   /* For OP_NOT and OP_NOTI, the data is always a single-byte character. These
   opcodes are not used for multi-byte characters, because they are coded using
@@ -2960,8 +3078,8 @@ if (next >= 0) switch(op_code)
 
   case OP_NOTI:
   if ((c = *previous) == next) return TRUE;
-#ifdef SUPPORT_UTF8
-  if (utf8)
+#ifdef SUPPORT_UTF
+  if (utf)
     {
     unsigned int othercase;
     if (next < 128) othercase = cd->fcc[next]; else
@@ -2973,8 +3091,8 @@ if (next >= 0) switch(op_code)
     return (unsigned int)c == othercase;
     }
   else
-#endif  /* SUPPORT_UTF8 */
-  return (c == cd->fcc[next]);  /* Non-UTF-8 mode */
+#endif  /* SUPPORT_UTF */
+  return (c == (int)(TABLE_GET((unsigned int)next, cd->fcc, next)));  /* Non-UTF-8 mode */
 
   /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
   When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
@@ -3065,7 +3183,7 @@ switch(op_code)
   {
   case OP_CHAR:
   case OP_CHARI:
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
   GETCHARTEST(c, previous);
 #else
   c = *previous;
@@ -3170,7 +3288,7 @@ switch(op_code)
       to the original \d etc. At this point, ptr will point to a zero byte. */
 
       if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
-        strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
+        STRNCMP_UC_C8(ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
           return FALSE;
 
       /* Do the property check. */
@@ -3248,8 +3366,8 @@ Arguments:
   codeptr        points to the pointer to the current code point
   ptrptr         points to the current pattern pointer
   errorcodeptr   points to error code variable
-  firstbyteptr   set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE)
-  reqbyteptr     set to the last literal character required, else < 0
+  firstcharptr   set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE)
+  reqcharptr     set to the last literal character required, else < 0
   bcptr          points to current branch chain
   cond_depth     conditional nesting depth
   cd             contains pointers to tables etc.
@@ -3261,47 +3379,54 @@ Returns:         TRUE on success
 */
 
 static BOOL
-compile_branch(int *optionsptr, uschar **codeptr, const uschar **ptrptr,
-  int *errorcodeptr, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,
-  int cond_depth, compile_data *cd, int *lengthptr)
+compile_branch(int *optionsptr, pcre_uchar **codeptr,
+  const pcre_uchar **ptrptr, int *errorcodeptr, pcre_int32 *firstcharptr,
+  pcre_int32 *reqcharptr, branch_chain *bcptr, int cond_depth,
+  compile_data *cd, int *lengthptr)
 {
 int repeat_type, op_type;
 int repeat_min = 0, repeat_max = 0;      /* To please picky compilers */
 int bravalue = 0;
 int greedy_default, greedy_non_default;
-int firstbyte, reqbyte;
-int zeroreqbyte, zerofirstbyte;
-int req_caseopt, reqvary, tempreqvary;
+pcre_int32 firstchar, reqchar;
+pcre_int32 zeroreqchar, zerofirstchar;
+pcre_int32 req_caseopt, reqvary, tempreqvary;
 int options = *optionsptr;               /* May change dynamically */
 int after_manual_callout = 0;
 int length_prevgroup = 0;
 register int c;
-register uschar *code = *codeptr;
-uschar *last_code = code;
-uschar *orig_code = code;
-uschar *tempcode;
+register pcre_uchar *code = *codeptr;
+pcre_uchar *last_code = code;
+pcre_uchar *orig_code = code;
+pcre_uchar *tempcode;
 BOOL inescq = FALSE;
-BOOL groupsetfirstbyte = FALSE;
-const uschar *ptr = *ptrptr;
-const uschar *tempptr;
-const uschar *nestptr = NULL;
-uschar *previous = NULL;
-uschar *previous_callout = NULL;
-uschar *save_hwm = NULL;
-uschar classbits[32];
+BOOL groupsetfirstchar = FALSE;
+const pcre_uchar *ptr = *ptrptr;
+const pcre_uchar *tempptr;
+const pcre_uchar *nestptr = NULL;
+pcre_uchar *previous = NULL;
+pcre_uchar *previous_callout = NULL;
+pcre_uchar *save_hwm = NULL;
+pcre_uint8 classbits[32];
 
 /* We can fish out the UTF-8 setting once and for all into a BOOL, but we
 must not do this for other options (e.g. PCRE_EXTENDED) because they may change
 dynamically as we process the pattern. */
 
-#ifdef SUPPORT_UTF8
-BOOL class_utf8;
-BOOL utf8 = (options & PCRE_UTF8) != 0;
-uschar *class_utf8data;
-uschar *class_utf8data_base;
-uschar utf8_char[6];
+#ifdef SUPPORT_UTF
+/* PCRE_UTF16 has the same value as PCRE_UTF8. */
+BOOL utf = (options & PCRE_UTF8) != 0;
+pcre_uchar utf_chars[6];
 #else
-BOOL utf8 = FALSE;
+BOOL utf = FALSE;
+#endif
+
+/* Helper variables for OP_XCLASS opcode (for characters > 255). */
+
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+BOOL xclass;
+pcre_uchar *class_uchardata;
+pcre_uchar *class_uchardata_base;
 #endif
 
 #ifdef PCRE_DEBUG
@@ -3315,22 +3440,23 @@ greedy_non_default = greedy_default ^ 1;
 
 /* Initialize no first byte, no required byte. REQ_UNSET means "no char
 matching encountered yet". It gets changed to REQ_NONE if we hit something that
-matches a non-fixed char first char; reqbyte just remains unset if we never
+matches a non-fixed char first char; reqchar just remains unset if we never
 find one.
 
 When we hit a repeat whose minimum is zero, we may have to adjust these values
 to take the zero repeat into account. This is implemented by setting them to
-zerofirstbyte and zeroreqbyte when such a repeat is encountered. The individual
+zerofirstbyte and zeroreqchar when such a repeat is encountered. The individual
 item types that can be repeated set these backoff variables appropriately. */
 
-firstbyte = reqbyte = zerofirstbyte = zeroreqbyte = REQ_UNSET;
+firstchar = reqchar = zerofirstchar = zeroreqchar = REQ_UNSET;
 
-/* The variable req_caseopt contains either the REQ_CASELESS value or zero,
-according to the current setting of the caseless flag. REQ_CASELESS is a bit
-value > 255. It is added into the firstbyte or reqbyte variables to record the
-case status of the value. This is used only for ASCII characters. */
+/* The variable req_caseopt contains either the REQ_CASELESS value
+or zero, according to the current setting of the caseless flag. The
+REQ_CASELESS leaves the lower 28 bit empty. It is added into the
+firstchar or reqchar variables to record the case status of the
+value. This is used only for ASCII characters. */
 
-req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
+req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
 
 /* Switch on next character until the end of the branch */
 
@@ -3342,20 +3468,20 @@ for (;; ptr++)
   BOOL is_quantifier;
   BOOL is_recurse;
   BOOL reset_bracount;
-  int class_charcount;
-  int class_lastchar;
+  int class_has_8bitchar;
+  int class_single_char;
   int newoptions;
   int recno;
   int refsign;
   int skipbytes;
-  int subreqbyte;
-  int subfirstbyte;
+  int subreqchar;
+  int subfirstchar;
   int terminator;
   int mclength;
   int tempbracount;
-  uschar mcbuffer[8];
+  pcre_uchar mcbuffer[8];
 
-  /* Get next byte in the pattern */
+  /* Get next character in the pattern */
 
   c = *ptr;
 
@@ -3401,8 +3527,8 @@ for (;; ptr++)
       }
 
     *lengthptr += (int)(code - last_code);
-    DPRINTF(("length=%d added %d c=%c\n", *lengthptr, (int)(code - last_code),
-      c));
+    DPRINTF(("length=%d added %d c=%c (0x%x)\n", *lengthptr,
+      (int)(code - last_code), c, c));
 
     /* If "previous" is set and it is not at the start of the work space, move
     it back to there, in order to avoid filling up the work space. Otherwise,
@@ -3412,7 +3538,7 @@ for (;; ptr++)
       {
       if (previous > orig_code)
         {
-        memmove(orig_code, previous, code - previous);
+        memmove(orig_code, previous, IN_UCHARS(code - previous));
         code -= previous - orig_code;
         previous = orig_code;
         }
@@ -3481,7 +3607,7 @@ for (;; ptr++)
 
   if ((options & PCRE_EXTENDED) != 0)
     {
-    if ((cd->ctypes[c] & ctype_space) != 0) continue;
+    if (MAX_255(*ptr) && (cd->ctypes[c] & ctype_space) != 0) continue;
     if (c == CHAR_NUMBER_SIGN)
       {
       ptr++;
@@ -3489,8 +3615,8 @@ for (;; ptr++)
         {
         if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }
         ptr++;
-#ifdef SUPPORT_UTF8
-        if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++;
+#ifdef SUPPORT_UTF
+        if (utf) FORWARDCHAR(ptr);
 #endif
         }
       if (*ptr != 0) continue;
@@ -3514,8 +3640,8 @@ for (;; ptr++)
     case 0:                        /* The branch terminates at string end */
     case CHAR_VERTICAL_LINE:       /* or | or ) */
     case CHAR_RIGHT_PARENTHESIS:
-    *firstbyteptr = firstbyte;
-    *reqbyteptr = reqbyte;
+    *firstcharptr = firstchar;
+    *reqcharptr = reqchar;
     *codeptr = code;
     *ptrptr = ptr;
     if (lengthptr != NULL)
@@ -3539,7 +3665,7 @@ for (;; ptr++)
     previous = NULL;
     if ((options & PCRE_MULTILINE) != 0)
       {
-      if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
+      if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
       *code++ = OP_CIRCM;
       }
     else *code++ = OP_CIRC;
@@ -3551,12 +3677,12 @@ for (;; ptr++)
     break;
 
     /* There can never be a first char if '.' is first, whatever happens about
-    repeats. The value of reqbyte doesn't change either. */
+    repeats. The value of reqchar doesn't change either. */
 
     case CHAR_DOT:
-    if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
-    zerofirstbyte = firstbyte;
-    zeroreqbyte = reqbyte;
+    if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
+    zerofirstchar = firstchar;
+    zeroreqchar = reqchar;
     previous = code;
     *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;
     break;
@@ -3611,8 +3737,7 @@ for (;; ptr++)
         {
         if (ptr[1] == CHAR_E)
           ptr++;
-        else if (strncmp((const char *)ptr+1,
-                          STR_Q STR_BACKSLASH STR_E, 3) == 0)
+        else if (STRNCMP_UC_C8(ptr + 1, STR_Q STR_BACKSLASH STR_E, 3) == 0)
           ptr += 3;
         else
           break;
@@ -3631,8 +3756,8 @@ for (;; ptr++)
         (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
       {
       *code++ = negate_class? OP_ALLANY : OP_FAIL;
-      if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
-      zerofirstbyte = firstbyte;
+      if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
+      zerofirstchar = firstchar;
       break;
       }
 
@@ -3642,24 +3767,25 @@ for (;; ptr++)
 
     should_flip_negation = FALSE;
 
-    /* Keep a count of chars with values < 256 so that we can optimize the case
-    of just a single character (as long as it's < 256). However, For higher
-    valued UTF-8 characters, we don't yet do any optimization. */
+    /* For optimization purposes, we track some properties of the class.
+    class_has_8bitchar will be non-zero, if the class contains at least one
+    < 256 character. class_single_char will be 1 if the class contains only
+    a single character. */
 
-    class_charcount = 0;
-    class_lastchar = -1;
+    class_has_8bitchar = 0;
+    class_single_char = 0;
 
     /* Initialize the 32-char bit map to all zeros. We build the map in a
     temporary bit of memory, in case the class contains only 1 character (less
     than 256), because in that case the compiled code doesn't use the bit map.
     */
 
-    memset(classbits, 0, 32 * sizeof(uschar));
+    memset(classbits, 0, 32 * sizeof(pcre_uint8));
 
-#ifdef SUPPORT_UTF8
-    class_utf8 = FALSE;                       /* No chars >= 256 */
-    class_utf8data = code + LINK_SIZE + 2;    /* For UTF-8 items */
-    class_utf8data_base = class_utf8data;     /* For resetting in pass 1 */
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+    xclass = FALSE;                           /* No chars >= 256 */
+    class_uchardata = code + LINK_SIZE + 2;   /* For UTF-8 items */
+    class_uchardata_base = class_uchardata;   /* For resetting in pass 1 */
 #endif
 
     /* Process characters until ] is reached. By writing this as a "do" it
@@ -3668,25 +3794,26 @@ for (;; ptr++)
 
     if (c != 0) do
       {
-      const uschar *oldptr;
+      const pcre_uchar *oldptr;
 
-#ifdef SUPPORT_UTF8
-      if (utf8 && c > 127)
+#ifdef SUPPORT_UTF
+      if (utf && HAS_EXTRALEN(c))
         {                           /* Braces are required because the */
         GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */
         }
+#endif
 
-      /* In the pre-compile phase, accumulate the length of any UTF-8 extra
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+      /* In the pre-compile phase, accumulate the length of any extra
       data and reset the pointer. This is so that very large classes that
-      contain a zillion UTF-8 characters no longer overwrite the work space
+      contain a zillion > 255 characters no longer overwrite the work space
       (which is on the stack). */
 
       if (lengthptr != NULL)
         {
-        *lengthptr += (int)(class_utf8data - class_utf8data_base);
-        class_utf8data = class_utf8data_base;
+        *lengthptr += class_uchardata - class_uchardata_base;
+        class_uchardata = class_uchardata_base;
         }
-
 #endif
 
       /* Inside \Q...\E everything is literal except \E */
@@ -3714,8 +3841,8 @@ for (;; ptr++)
         {
         BOOL local_negate = FALSE;
         int posix_class, taboffset, tabopt;
-        register const uschar *cbits = cd->cbits;
-        uschar pbits[32];
+        register const pcre_uint8 *cbits = cd->cbits;
+        pcre_uint8 pbits[32];
 
         if (ptr[1] != CHAR_COLON)
           {
@@ -3770,7 +3897,7 @@ for (;; ptr++)
         /* Copy in the first table (always present) */
 
         memcpy(pbits, cbits + posix_class_maps[posix_class],
-          32 * sizeof(uschar));
+          32 * sizeof(pcre_uint8));
 
         /* If there is a second table, add or remove it as required. */
 
@@ -3801,16 +3928,20 @@ for (;; ptr++)
           for (c = 0; c < 32; c++) classbits[c] |= pbits[c];
 
         ptr = tempptr + 1;
-        class_charcount = 10;  /* Set > 1; assumes more than 1 per class */
+        /* Every class contains at least one < 256 characters. */
+        class_has_8bitchar = 1;
+        /* Every class contains at least two characters. */
+        class_single_char = 2;
         continue;    /* End of POSIX syntax handling */
         }
 
       /* Backslash may introduce a single character, or it may introduce one
       of the specials, which just set a flag. The sequence \b is a special
       case. Inside a class (and only there) it is treated as backspace. We
-      assume that other escapes have more than one character in them, so set
-      class_charcount bigger than one. Unrecognized escapes fall through and
-      are either treated as literal characters (by default), or are faulted if
+      assume that other escapes have more than one character in them, so
+      speculatively set both class_has_8bitchar and class_single_char bigger
+      than one. Unrecognized escapes fall through and are either treated
+      as literal characters (by default), or are faulted if
       PCRE_EXTRA is set. */
 
       if (c == CHAR_BACKSLASH)
@@ -3837,8 +3968,11 @@ for (;; ptr++)
 
         if (c < 0)
           {
-          register const uschar *cbits = cd->cbits;
-          class_charcount += 2;     /* Greater than 1 is what matters */
+          register const pcre_uint8 *cbits = cd->cbits;
+          /* Every class contains at least two < 256 characters. */
+          class_has_8bitchar++;
+          /* Every class contains at least two characters. */
+          class_single_char += 2;
 
           switch (-c)
             {
@@ -3851,7 +3985,7 @@ for (;; ptr++)
             case ESC_SU:
             nestptr = ptr;
             ptr = substitutes[-c - ESC_DU] - 1;  /* Just before substitute */
-            class_charcount -= 2;                /* Undo! */
+            class_has_8bitchar--;                /* Undo! */
             continue;
 #endif
             case ESC_d:
@@ -3892,23 +4026,38 @@ for (;; ptr++)
             SETBIT(classbits, 0x09); /* VT */
             SETBIT(classbits, 0x20); /* SPACE */
             SETBIT(classbits, 0xa0); /* NSBP */
-#ifdef SUPPORT_UTF8
-            if (utf8)
+#ifndef COMPILE_PCRE8
+            xclass = TRUE;
+            *class_uchardata++ = XCL_SINGLE;
+            *class_uchardata++ = 0x1680;
+            *class_uchardata++ = XCL_SINGLE;
+            *class_uchardata++ = 0x180e;
+            *class_uchardata++ = XCL_RANGE;
+            *class_uchardata++ = 0x2000;
+            *class_uchardata++ = 0x200a;
+            *class_uchardata++ = XCL_SINGLE;
+            *class_uchardata++ = 0x202f;
+            *class_uchardata++ = XCL_SINGLE;
+            *class_uchardata++ = 0x205f;
+            *class_uchardata++ = XCL_SINGLE;
+            *class_uchardata++ = 0x3000;
+#elif defined SUPPORT_UTF
+            if (utf)
               {
-              class_utf8 = TRUE;
-              *class_utf8data++ = XCL_SINGLE;
-              class_utf8data += _pcre_ord2utf8(0x1680, class_utf8data);
-              *class_utf8data++ = XCL_SINGLE;
-              class_utf8data += _pcre_ord2utf8(0x180e, class_utf8data);
-              *class_utf8data++ = XCL_RANGE;
-              class_utf8data += _pcre_ord2utf8(0x2000, class_utf8data);
-              class_utf8data += _pcre_ord2utf8(0x200A, class_utf8data);
-              *class_utf8data++ = XCL_SINGLE;
-              class_utf8data += _pcre_ord2utf8(0x202f, class_utf8data);
-              *class_utf8data++ = XCL_SINGLE;
-              class_utf8data += _pcre_ord2utf8(0x205f, class_utf8data);
-              *class_utf8data++ = XCL_SINGLE;
-              class_utf8data += _pcre_ord2utf8(0x3000, class_utf8data);
+              xclass = TRUE;
+              *class_uchardata++ = XCL_SINGLE;
+              class_uchardata += PRIV(ord2utf)(0x1680, class_uchardata);
+              *class_uchardata++ = XCL_SINGLE;
+              class_uchardata += PRIV(ord2utf)(0x180e, class_uchardata);
+              *class_uchardata++ = XCL_RANGE;
+              class_uchardata += PRIV(ord2utf)(0x2000, class_uchardata);
+              class_uchardata += PRIV(ord2utf)(0x200a, class_uchardata);
+              *class_uchardata++ = XCL_SINGLE;
+              class_uchardata += PRIV(ord2utf)(0x202f, class_uchardata);
+              *class_uchardata++ = XCL_SINGLE;
+              class_uchardata += PRIV(ord2utf)(0x205f, class_uchardata);
+              *class_uchardata++ = XCL_SINGLE;
+              class_uchardata += PRIV(ord2utf)(0x3000, class_uchardata);
               }
 #endif
             continue;
@@ -3926,32 +4075,59 @@ for (;; ptr++)
                 }
               classbits[c] |= x;
               }
-
-#ifdef SUPPORT_UTF8
-            if (utf8)
+#ifndef COMPILE_PCRE8
+            xclass = TRUE;
+            *class_uchardata++ = XCL_RANGE;
+            *class_uchardata++ = 0x0100;
+            *class_uchardata++ = 0x167f;
+            *class_uchardata++ = XCL_RANGE;
+            *class_uchardata++ = 0x1681;
+            *class_uchardata++ = 0x180d;
+            *class_uchardata++ = XCL_RANGE;
+            *class_uchardata++ = 0x180f;
+            *class_uchardata++ = 0x1fff;
+            *class_uchardata++ = XCL_RANGE;
+            *class_uchardata++ = 0x200b;
+            *class_uchardata++ = 0x202e;
+            *class_uchardata++ = XCL_RANGE;
+            *class_uchardata++ = 0x2030;
+            *class_uchardata++ = 0x205e;
+            *class_uchardata++ = XCL_RANGE;
+            *class_uchardata++ = 0x2060;
+            *class_uchardata++ = 0x2fff;
+            *class_uchardata++ = XCL_RANGE;
+            *class_uchardata++ = 0x3001;
+#ifdef SUPPORT_UTF
+            if (utf)
+              class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
+            else
+#endif
+              *class_uchardata++ = 0xffff;
+#elif defined SUPPORT_UTF
+            if (utf)
               {
-              class_utf8 = TRUE;
-              *class_utf8data++ = XCL_RANGE;
-              class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);
-              class_utf8data += _pcre_ord2utf8(0x167f, class_utf8data);
-              *class_utf8data++ = XCL_RANGE;
-              class_utf8data += _pcre_ord2utf8(0x1681, class_utf8data);
-              class_utf8data += _pcre_ord2utf8(0x180d, class_utf8data);
-              *class_utf8data++ = XCL_RANGE;
-              class_utf8data += _pcre_ord2utf8(0x180f, class_utf8data);
-              class_utf8data += _pcre_ord2utf8(0x1fff, class_utf8data);
-              *class_utf8data++ = XCL_RANGE;
-              class_utf8data += _pcre_ord2utf8(0x200B, class_utf8data);
-              class_utf8data += _pcre_ord2utf8(0x202e, class_utf8data);
-              *class_utf8data++ = XCL_RANGE;
-              class_utf8data += _pcre_ord2utf8(0x2030, class_utf8data);
-              class_utf8data += _pcre_ord2utf8(0x205e, class_utf8data);
-              *class_utf8data++ = XCL_RANGE;
-              class_utf8data += _pcre_ord2utf8(0x2060, class_utf8data);
-              class_utf8data += _pcre_ord2utf8(0x2fff, class_utf8data);
-              *class_utf8data++ = XCL_RANGE;
-              class_utf8data += _pcre_ord2utf8(0x3001, class_utf8data);
-              class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);
+              xclass = TRUE;
+              *class_uchardata++ = XCL_RANGE;
+              class_uchardata += PRIV(ord2utf)(0x0100, class_uchardata);
+              class_uchardata += PRIV(ord2utf)(0x167f, class_uchardata);
+              *class_uchardata++ = XCL_RANGE;
+              class_uchardata += PRIV(ord2utf)(0x1681, class_uchardata);
+              class_uchardata += PRIV(ord2utf)(0x180d, class_uchardata);
+              *class_uchardata++ = XCL_RANGE;
+              class_uchardata += PRIV(ord2utf)(0x180f, class_uchardata);
+              class_uchardata += PRIV(ord2utf)(0x1fff, class_uchardata);
+              *class_uchardata++ = XCL_RANGE;
+              class_uchardata += PRIV(ord2utf)(0x200b, class_uchardata);
+              class_uchardata += PRIV(ord2utf)(0x202e, class_uchardata);
+              *class_uchardata++ = XCL_RANGE;
+              class_uchardata += PRIV(ord2utf)(0x2030, class_uchardata);
+              class_uchardata += PRIV(ord2utf)(0x205e, class_uchardata);
+              *class_uchardata++ = XCL_RANGE;
+              class_uchardata += PRIV(ord2utf)(0x2060, class_uchardata);
+              class_uchardata += PRIV(ord2utf)(0x2fff, class_uchardata);
+              *class_uchardata++ = XCL_RANGE;
+              class_uchardata += PRIV(ord2utf)(0x3001, class_uchardata);
+              class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
               }
 #endif
             continue;
@@ -3962,13 +4138,18 @@ for (;; ptr++)
             SETBIT(classbits, 0x0c); /* FF */
             SETBIT(classbits, 0x0d); /* CR */
             SETBIT(classbits, 0x85); /* NEL */
-#ifdef SUPPORT_UTF8
-            if (utf8)
+#ifndef COMPILE_PCRE8
+            xclass = TRUE;
+            *class_uchardata++ = XCL_RANGE;
+            *class_uchardata++ = 0x2028;
+            *class_uchardata++ = 0x2029;
+#elif defined SUPPORT_UTF
+            if (utf)
               {
-              class_utf8 = TRUE;
-              *class_utf8data++ = XCL_RANGE;
-              class_utf8data += _pcre_ord2utf8(0x2028, class_utf8data);
-              class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);
+              xclass = TRUE;
+              *class_uchardata++ = XCL_RANGE;
+              class_uchardata += PRIV(ord2utf)(0x2028, class_uchardata);
+              class_uchardata += PRIV(ord2utf)(0x2029, class_uchardata);
               }
 #endif
             continue;
@@ -3990,16 +4171,29 @@ for (;; ptr++)
               classbits[c] |= x;
               }
 
-#ifdef SUPPORT_UTF8
-            if (utf8)
+#ifndef COMPILE_PCRE8
+            xclass = TRUE;
+            *class_uchardata++ = XCL_RANGE;
+            *class_uchardata++ = 0x0100;
+            *class_uchardata++ = 0x2027;
+            *class_uchardata++ = XCL_RANGE;
+            *class_uchardata++ = 0x202a;
+#ifdef SUPPORT_UTF
+            if (utf)
+              class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
+            else
+#endif
+              *class_uchardata++ = 0xffff;
+#elif defined SUPPORT_UTF
+            if (utf)
               {
-              class_utf8 = TRUE;
-              *class_utf8data++ = XCL_RANGE;
-              class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data);
-              class_utf8data += _pcre_ord2utf8(0x2027, class_utf8data);
-              *class_utf8data++ = XCL_RANGE;
-              class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data);
-              class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data);
+              xclass = TRUE;
+              *class_uchardata++ = XCL_RANGE;
+              class_uchardata += PRIV(ord2utf)(0x0100, class_uchardata);
+              class_uchardata += PRIV(ord2utf)(0x2027, class_uchardata);
+              *class_uchardata++ = XCL_RANGE;
+              class_uchardata += PRIV(ord2utf)(0x202a, class_uchardata);
+              class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
               }
 #endif
             continue;
@@ -4012,12 +4206,12 @@ for (;; ptr++)
               int pdata;
               int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);
               if (ptype < 0) goto FAILED;
-              class_utf8 = TRUE;
-              *class_utf8data++ = ((-c == ESC_p) != negated)?
+              xclass = TRUE;
+              *class_uchardata++ = ((-c == ESC_p) != negated)?
                 XCL_PROP : XCL_NOTPROP;
-              *class_utf8data++ = ptype;
-              *class_utf8data++ = pdata;
-              class_charcount -= 2;   /* Not a < 256 character */
+              *class_uchardata++ = ptype;
+              *class_uchardata++ = pdata;
+              class_has_8bitchar--;                /* Undo! */
               continue;
               }
 #endif
@@ -4031,14 +4225,15 @@ for (;; ptr++)
               *errorcodeptr = ERR7;
               goto FAILED;
               }
-            class_charcount -= 2;  /* Undo the default count from above */
-            c = *ptr;              /* Get the final character and fall through */
+            class_has_8bitchar--;    /* Undo the speculative increase. */
+            class_single_char -= 2;  /* Undo the speculative increase. */
+            c = *ptr;                /* Get the final character and fall through */
             break;
             }
           }
 
         /* Fall through if we have a single character (c >= 0). This may be
-        greater than 256 in UTF-8 mode. */
+        greater than 256. */
 
         }   /* End of backslash handling */
 
@@ -4086,8 +4281,8 @@ for (;; ptr++)
           goto LONE_SINGLE_CHARACTER;
           }
 
-#ifdef SUPPORT_UTF8
-        if (utf8)
+#ifdef SUPPORT_UTF
+        if (utf)
           {                           /* Braces are required because the */
           GETCHARLEN(d, ptr, ptr);    /* macro generates multiple statements */
           }
@@ -4131,22 +4326,36 @@ for (;; ptr++)
 
         if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
 
+        /* Since we found a character range, single character optimizations
+        cannot be done anymore. */
+        class_single_char = 2;
+
         /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless
         matching, we have to use an XCLASS with extra data items. Caseless
         matching for characters > 127 is available only if UCP support is
         available. */
 
-#ifdef SUPPORT_UTF8
-        if (utf8 && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127)))
+#if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)
+        if ((d > 255) || (utf && ((options & PCRE_CASELESS) != 0 && d > 127)))
+#elif defined  SUPPORT_UTF
+        if (utf && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127)))
+#elif !(defined COMPILE_PCRE8)
+        if (d > 255)
+#endif
+#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
           {
-          class_utf8 = TRUE;
+          xclass = TRUE;
 
           /* With UCP support, we can find the other case equivalents of
           the relevant characters. There may be several ranges. Optimize how
           they fit with the basic range. */
 
 #ifdef SUPPORT_UCP
+#ifndef COMPILE_PCRE8
+          if (utf && (options & PCRE_CASELESS) != 0)
+#else
           if ((options & PCRE_CASELESS) != 0)
+#endif
             {
             unsigned int occ, ocd;
             unsigned int cc = c;
@@ -4172,14 +4381,14 @@ for (;; ptr++)
 
               if (occ == ocd)
                 {
-                *class_utf8data++ = XCL_SINGLE;
+                *class_uchardata++ = XCL_SINGLE;
                 }
               else
                 {
-                *class_utf8data++ = XCL_RANGE;
-                class_utf8data += _pcre_ord2utf8(occ, class_utf8data);
+                *class_uchardata++ = XCL_RANGE;
+                class_uchardata += PRIV(ord2utf)(occ, class_uchardata);
                 }
-              class_utf8data += _pcre_ord2utf8(ocd, class_utf8data);
+              class_uchardata += PRIV(ord2utf)(ocd, class_uchardata);
               }
             }
 #endif  /* SUPPORT_UCP */
@@ -4187,33 +4396,69 @@ for (;; ptr++)
           /* Now record the original range, possibly modified for UCP caseless
           overlapping ranges. */
 
-          *class_utf8data++ = XCL_RANGE;
-          class_utf8data += _pcre_ord2utf8(c, class_utf8data);
-          class_utf8data += _pcre_ord2utf8(d, class_utf8data);
+          *class_uchardata++ = XCL_RANGE;
+#ifdef SUPPORT_UTF
+#ifndef COMPILE_PCRE8
+          if (utf)
+            {
+            class_uchardata += PRIV(ord2utf)(c, class_uchardata);
+            class_uchardata += PRIV(ord2utf)(d, class_uchardata);
+            }
+          else
+            {
+            *class_uchardata++ = c;
+            *class_uchardata++ = d;
+            }
+#else
+          class_uchardata += PRIV(ord2utf)(c, class_uchardata);
+          class_uchardata += PRIV(ord2utf)(d, class_uchardata);
+#endif
+#else /* SUPPORT_UTF */
+          *class_uchardata++ = c;
+          *class_uchardata++ = d;
+#endif /* SUPPORT_UTF */
 
           /* With UCP support, we are done. Without UCP support, there is no
-          caseless matching for UTF-8 characters > 127; we can use the bit map
-          for the smaller ones. */
+          caseless matching for UTF characters > 127; we can use the bit map
+          for the smaller ones. As for 16 bit characters without UTF, we
+          can still use  */
 
 #ifdef SUPPORT_UCP
-          continue;    /* With next character in the class */
-#else
-          if ((options & PCRE_CASELESS) == 0 || c > 127) continue;
+#ifndef COMPILE_PCRE8
+          if (utf)
+#endif
+            continue;    /* With next character in the class */
+#endif  /* SUPPORT_UCP */
 
+#if defined SUPPORT_UTF && !defined(SUPPORT_UCP) && !(defined COMPILE_PCRE8)
+          if (utf)
+            {
+            if ((options & PCRE_CASELESS) == 0 || c > 127) continue;
+            /* Adjust upper limit and fall through to set up the map */
+            d = 127;
+            }
+          else
+            {
+            if (c > 255) continue;
+            /* Adjust upper limit and fall through to set up the map */
+            d = 255;
+            }
+#elif defined SUPPORT_UTF && !defined(SUPPORT_UCP)
+          if ((options & PCRE_CASELESS) == 0 || c > 127) continue;
           /* Adjust upper limit and fall through to set up the map */
-
           d = 127;
-
-#endif  /* SUPPORT_UCP */
+#else
+          if (c > 255) continue;
+          /* Adjust upper limit and fall through to set up the map */
+          d = 255;
+#endif  /* SUPPORT_UTF && !SUPPORT_UCP && !COMPILE_PCRE8 */
           }
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF || !COMPILE_PCRE8 */
 
-        /* We use the bit map for all cases when not in UTF-8 mode; else
-        ranges that lie entirely within 0-127 when there is UCP support; else
-        for partial ranges without UCP support. */
+        /* We use the bit map for 8 bit mode, or when the characters fall
+        partially or entirely to [0-255] ([0-127] for UCP) ranges. */
 
-        class_charcount += d - c + 1;
-        class_lastchar = d;
+        class_has_8bitchar = 1;
 
         /* We can save a bit of time by skipping this in the pre-compile. */
 
@@ -4222,7 +4467,7 @@ for (;; ptr++)
           classbits[c/8] |= (1 << (c&7));
           if ((options & PCRE_CASELESS) != 0)
             {
-            int uc = cd->fcc[c];           /* flip case */
+            int uc = cd->fcc[c]; /* flip case */
             classbits[uc/8] |= (1 << (uc&7));
             }
           }
@@ -4236,41 +4481,117 @@ for (;; ptr++)
 
       LONE_SINGLE_CHARACTER:
 
-      /* Handle a character that cannot go in the bit map */
+      /* Only the value of 1 matters for class_single_char. */
+      if (class_single_char < 2) class_single_char++;
+
+      /* If class_charcount is 1, we saw precisely one character. As long as
+      there were no negated characters >= 128 and there was no use of \p or \P,
+      in other words, no use of any XCLASS features, we can optimize.
 
-#ifdef SUPPORT_UTF8
-      if (utf8 && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))
+      In UTF-8 mode, we can optimize the negative case only if there were no
+      characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR
+      operate on single-bytes characters only. This is an historical hangover.
+      Maybe one day we can tidy these opcodes to handle multi-byte characters.
+
+      The optimization throws away the bit map. We turn the item into a
+      1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative.
+      Note that OP_NOT[I] does not support multibyte characters. In the positive
+      case, it can cause firstchar to be set. Otherwise, there can be no first
+      char if this item is first, whatever repeat count may follow. In the case
+      of reqchar, save the previous value for reinstating. */
+
+#ifdef SUPPORT_UTF
+      if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET
+        && (!utf || !negate_class || c < (MAX_VALUE_FOR_SINGLE_CHAR + 1)))
+#else
+      if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
+#endif
         {
-        class_utf8 = TRUE;
-        *class_utf8data++ = XCL_SINGLE;
-        class_utf8data += _pcre_ord2utf8(c, class_utf8data);
+        ptr++;
+        zeroreqchar = reqchar;
+
+        /* The OP_NOT[I] opcodes work on single characters only. */
+
+        if (negate_class)
+          {
+          if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
+          zerofirstchar = firstchar;
+          *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;
+          *code++ = c;
+          goto NOT_CHAR;
+          }
+
+        /* For a single, positive character, get the value into mcbuffer, and
+        then we can handle this with the normal one-character code. */
+
+#ifdef SUPPORT_UTF
+        if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
+          mclength = PRIV(ord2utf)(c, mcbuffer);
+        else
+#endif
+          {
+          mcbuffer[0] = c;
+          mclength = 1;
+          }
+        goto ONE_CHAR;
+        }       /* End of 1-char optimization */
+
+      /* Handle a character that cannot go in the bit map. */
+
+#if defined SUPPORT_UTF && !(defined COMPILE_PCRE8)
+      if ((c > 255) || (utf && ((options & PCRE_CASELESS) != 0 && c > 127)))
+#elif defined SUPPORT_UTF
+      if (utf && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))
+#elif !(defined COMPILE_PCRE8)
+      if (c > 255)
+#endif
+
+#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
+        {
+        xclass = TRUE;
+        *class_uchardata++ = XCL_SINGLE;
+#ifdef SUPPORT_UTF
+#ifndef COMPILE_PCRE8
+        /* In non 8 bit mode, we can get here even if we are not in UTF mode. */
+        if (!utf)
+          *class_uchardata++ = c;
+        else
+#endif
+          class_uchardata += PRIV(ord2utf)(c, class_uchardata);
+#else /* SUPPORT_UTF */
+        *class_uchardata++ = c;
+#endif /* SUPPORT_UTF */
 
 #ifdef SUPPORT_UCP
+#ifdef COMPILE_PCRE8
         if ((options & PCRE_CASELESS) != 0)
+#else
+        /* In non 8 bit mode, we can get here even if we are not in UTF mode. */
+        if (utf && (options & PCRE_CASELESS) != 0)
+#endif
           {
           unsigned int othercase;
-          if ((othercase = UCD_OTHERCASE(c)) != (unsigned int)c)
+          if ((int)(othercase = UCD_OTHERCASE(c)) != c)
             {
-            *class_utf8data++ = XCL_SINGLE;
-            class_utf8data += _pcre_ord2utf8(othercase, class_utf8data);
+            *class_uchardata++ = XCL_SINGLE;
+            class_uchardata += PRIV(ord2utf)(othercase, class_uchardata);
             }
           }
 #endif  /* SUPPORT_UCP */
 
         }
       else
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF || COMPILE_PCRE16 */
 
       /* Handle a single-byte character */
         {
+        class_has_8bitchar = 1;
         classbits[c/8] |= (1 << (c&7));
         if ((options & PCRE_CASELESS) != 0)
           {
-          c = cd->fcc[c];   /* flip case */
+          c = cd->fcc[c]; /* flip case */
           classbits[c/8] |= (1 << (c&7));
           }
-        class_charcount++;
-        class_lastchar = c;
         }
       }
 
@@ -4291,66 +4612,13 @@ for (;; ptr++)
       goto FAILED;
       }
 
-    /* If class_charcount is 1, we saw precisely one character whose value is
-    less than 256. As long as there were no characters >= 128 and there was no
-    use of \p or \P, in other words, no use of any XCLASS features, we can
-    optimize.
-
-    In UTF-8 mode, we can optimize the negative case only if there were no
-    characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR
-    operate on single-bytes characters only. This is an historical hangover.
-    Maybe one day we can tidy these opcodes to handle multi-byte characters.
-
-    The optimization throws away the bit map. We turn the item into a
-    1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative.
-    Note that OP_NOT[I] does not support multibyte characters. In the positive
-    case, it can cause firstbyte to be set. Otherwise, there can be no first
-    char if this item is first, whatever repeat count may follow. In the case
-    of reqbyte, save the previous value for reinstating. */
-
-#ifdef SUPPORT_UTF8
-    if (class_charcount == 1 && !class_utf8 &&
-      (!utf8 || !negate_class || class_lastchar < 128))
-#else
-    if (class_charcount == 1)
-#endif
-      {
-      zeroreqbyte = reqbyte;
-
-      /* The OP_NOT[I] opcodes work on one-byte characters only. */
-
-      if (negate_class)
-        {
-        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
-        zerofirstbyte = firstbyte;
-        *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT;
-        *code++ = class_lastchar;
-        break;
-        }
-
-      /* For a single, positive character, get the value into mcbuffer, and
-      then we can handle this with the normal one-character code. */
-
-#ifdef SUPPORT_UTF8
-      if (utf8 && class_lastchar > 127)
-        mclength = _pcre_ord2utf8(class_lastchar, mcbuffer);
-      else
-#endif
-        {
-        mcbuffer[0] = class_lastchar;
-        mclength = 1;
-        }
-      goto ONE_CHAR;
-      }       /* End of 1-char optimization */
-
-    /* The general case - not the one-char optimization. If this is the first
-    thing in the branch, there can be no first char setting, whatever the
-    repeat count. Any reqbyte setting must remain unchanged after any kind of
-    repeat. */
+    /* If this is the first thing in the branch, there can be no first char
+    setting, whatever the repeat count. Any reqchar setting must remain
+    unchanged after any kind of repeat. */
 
-    if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
-    zerofirstbyte = firstbyte;
-    zeroreqbyte = reqbyte;
+    if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
+    zerofirstchar = firstchar;
+    zeroreqchar = reqchar;
 
     /* If there are characters with values > 255, we have to compile an
     extended class, with its own opcode, unless there was a negated special
@@ -4360,25 +4628,30 @@ for (;; ptr++)
     be listed) there are no characters < 256, we can omit the bitmap in the
     actual compiled code. */
 
-#ifdef SUPPORT_UTF8
-    if (class_utf8 && (!should_flip_negation || (options & PCRE_UCP) != 0))
+#ifdef SUPPORT_UTF
+    if (xclass && (!should_flip_negation || (options & PCRE_UCP) != 0))
+#elif !defined COMPILE_PCRE8
+    if (xclass && !should_flip_negation)
+#endif
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
       {
-      *class_utf8data++ = XCL_END;    /* Marks the end of extra data */
+      *class_uchardata++ = XCL_END;    /* Marks the end of extra data */
       *code++ = OP_XCLASS;
       code += LINK_SIZE;
-      *code = negate_class? XCL_NOT : 0;
+      *code = negate_class? XCL_NOT:0;
 
       /* If the map is required, move up the extra data to make room for it;
       otherwise just move the code pointer to the end of the extra data. */
 
-      if (class_charcount > 0)
+      if (class_has_8bitchar > 0)
         {
         *code++ |= XCL_MAP;
-        memmove(code + 32, code, class_utf8data - code);
+        memmove(code + (32 / sizeof(pcre_uchar)), code,
+          IN_UCHARS(class_uchardata - code));
         memcpy(code, classbits, 32);
-        code = class_utf8data + 32;
+        code = class_uchardata + (32 / sizeof(pcre_uchar));
         }
-      else code = class_utf8data;
+      else code = class_uchardata;
 
       /* Now fill in the complete length of the item */
 
@@ -4394,16 +4667,14 @@ for (;; ptr++)
     negating it if necessary. */
 
     *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
-    if (negate_class)
-      {
-      if (lengthptr == NULL)    /* Save time in the pre-compile phase */
-        for (c = 0; c < 32; c++) code[c] = ~classbits[c];
-      }
-    else
+    if (lengthptr == NULL)    /* Save time in the pre-compile phase */
       {
+      if (negate_class)
+        for (c = 0; c < 32; c++) classbits[c] = ~classbits[c];
       memcpy(code, classbits, 32);
       }
-    code += 32;
+    code += 32 / sizeof(pcre_uchar);
+    NOT_CHAR:
     break;
 
 
@@ -4440,8 +4711,8 @@ for (;; ptr++)
 
     if (repeat_min == 0)
       {
-      firstbyte = zerofirstbyte;    /* Adjust for zero repeat */
-      reqbyte = zeroreqbyte;        /* Ditto */
+      firstchar = zerofirstchar;    /* Adjust for zero repeat */
+      reqchar = zeroreqchar;        /* Ditto */
       }
 
     /* Remember whether this is a variable length repeat */
@@ -4483,7 +4754,7 @@ for (;; ptr++)
 
     if (*previous == OP_RECURSE)
       {
-      memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);
+      memmove(previous + 1 + LINK_SIZE, previous, IN_UCHARS(1 + LINK_SIZE));
       *previous = OP_ONCE;
       PUT(previous, 1, 2 + 2*LINK_SIZE);
       previous[2 + 2*LINK_SIZE] = OP_KET;
@@ -4506,37 +4777,36 @@ for (;; ptr++)
 
     /* If previous was a character match, abolish the item and generate a
     repeat item instead. If a char item has a minumum of more than one, ensure
-    that it is set in reqbyte - it might not be if a sequence such as x{3} is
-    the first thing in a branch because the x will have gone into firstbyte
+    that it is set in reqchar - it might not be if a sequence such as x{3} is
+    the first thing in a branch because the x will have gone into firstchar
     instead.  */
 
     if (*previous == OP_CHAR || *previous == OP_CHARI)
       {
       op_type = (*previous == OP_CHAR)? 0 : OP_STARI - OP_STAR;
 
-      /* Deal with UTF-8 characters that take up more than one byte. It's
+      /* Deal with UTF characters that take up more than one character. It's
       easier to write this out separately than try to macrify it. Use c to
-      hold the length of the character in bytes, plus 0x80 to flag that it's a
-      length rather than a small character. */
+      hold the length of the character in bytes, plus UTF_LENGTH to flag that
+      it's a length rather than a small character. */
 
-#ifdef SUPPORT_UTF8
-      if (utf8 && (code[-1] & 0x80) != 0)
+#ifdef SUPPORT_UTF
+      if (utf && NOT_FIRSTCHAR(code[-1]))
         {
-        uschar *lastchar = code - 1;
-        while((*lastchar & 0xc0) == 0x80) lastchar--;
+        pcre_uchar *lastchar = code - 1;
+        BACKCHAR(lastchar);
         c = (int)(code - lastchar);     /* Length of UTF-8 character */
-        memcpy(utf8_char, lastchar, c); /* Save the char */
-        c |= 0x80;                      /* Flag c as a length */
+        memcpy(utf_chars, lastchar, IN_UCHARS(c)); /* Save the char */
+        c |= UTF_LENGTH;                /* Flag c as a length */
         }
       else
-#endif
-
-      /* Handle the case of a single byte - either with no UTF8 support, or
-      with UTF-8 disabled, or for a UTF-8 character < 128. */
+#endif /* SUPPORT_UTF */
 
+      /* Handle the case of a single charater - either with no UTF support, or
+      with UTF disabled, or for a single character UTF character. */
         {
         c = code[-1];
-        if (repeat_min > 1) reqbyte = c | req_caseopt | cd->req_varyopt;
+        if (repeat_min > 1) reqchar = c | req_caseopt | cd->req_varyopt;
         }
 
       /* If the repetition is unlimited, it pays to see if the next thing on
@@ -4546,7 +4816,7 @@ for (;; ptr++)
 
       if (!possessive_quantifier &&
           repeat_max < 0 &&
-          check_auto_possessive(previous, utf8, ptr + 1, options, cd))
+          check_auto_possessive(previous, utf, ptr + 1, options, cd))
         {
         repeat_type = 0;    /* Force greedy */
         possessive_quantifier = TRUE;
@@ -4567,7 +4837,7 @@ for (;; ptr++)
       c = previous[1];
       if (!possessive_quantifier &&
           repeat_max < 0 &&
-          check_auto_possessive(previous, utf8, ptr + 1, options, cd))
+          check_auto_possessive(previous, utf, ptr + 1, options, cd))
         {
         repeat_type = 0;    /* Force greedy */
         possessive_quantifier = TRUE;
@@ -4584,14 +4854,14 @@ for (;; ptr++)
 
     else if (*previous < OP_EODN)
       {
-      uschar *oldcode;
+      pcre_uchar *oldcode;
       int prop_type, prop_value;
       op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */
       c = *previous;
 
       if (!possessive_quantifier &&
           repeat_max < 0 &&
-          check_auto_possessive(previous, utf8, ptr + 1, options, cd))
+          check_auto_possessive(previous, utf, ptr + 1, options, cd))
         {
         repeat_type = 0;    /* Force greedy */
         possessive_quantifier = TRUE;
@@ -4671,14 +4941,14 @@ for (;; ptr++)
         we have to insert the character for the previous code. For a repeated
         Unicode property match, there are two extra bytes that define the
         required property. In UTF-8 mode, long characters have their length in
-        c, with the 0x80 bit as a flag. */
+        c, with the UTF_LENGTH bit as a flag. */
 
         if (repeat_max < 0)
           {
-#ifdef SUPPORT_UTF8
-          if (utf8 && c >= 128)
+#ifdef SUPPORT_UTF
+          if (utf && (c & UTF_LENGTH) != 0)
             {
-            memcpy(code, utf8_char, c & 7);
+            memcpy(code, utf_chars, IN_UCHARS(c & 7));
             code += c & 7;
             }
           else
@@ -4700,10 +4970,10 @@ for (;; ptr++)
 
         else if (repeat_max != repeat_min)
           {
-#ifdef SUPPORT_UTF8
-          if (utf8 && c >= 128)
+#ifdef SUPPORT_UTF
+          if (utf && (c & UTF_LENGTH) != 0)
             {
-            memcpy(code, utf8_char, c & 7);
+            memcpy(code, utf_chars, IN_UCHARS(c & 7));
             code += c & 7;
             }
           else
@@ -4730,10 +5000,10 @@ for (;; ptr++)
 
       /* The character or character type itself comes last in all cases. */
 
-#ifdef SUPPORT_UTF8
-      if (utf8 && c >= 128)
+#ifdef SUPPORT_UTF
+      if (utf && (c & UTF_LENGTH) != 0)
         {
-        memcpy(code, utf8_char, c & 7);
+        memcpy(code, utf_chars, IN_UCHARS(c & 7));
         code += c & 7;
         }
       else
@@ -4757,7 +5027,7 @@ for (;; ptr++)
 
     else if (*previous == OP_CLASS ||
              *previous == OP_NCLASS ||
-#ifdef SUPPORT_UTF8
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
              *previous == OP_XCLASS ||
 #endif
              *previous == OP_REF ||
@@ -4806,8 +5076,8 @@ for (;; ptr++)
       {
       register int i;
       int len = (int)(code - previous);
-      uschar *bralink = NULL;
-      uschar *brazeroptr = NULL;
+      pcre_uchar *bralink = NULL;
+      pcre_uchar *brazeroptr = NULL;
 
       /* Repeating a DEFINE group is pointless, but Perl allows the syntax, so
       we just ignore the repeat. */
@@ -4860,8 +5130,8 @@ for (;; ptr++)
         if (repeat_max <= 1)    /* Covers 0, 1, and unlimited */
           {
           *code = OP_END;
-          adjust_recurse(previous, 1, utf8, cd, save_hwm);
-          memmove(previous+1, previous, len);
+          adjust_recurse(previous, 1, utf, cd, save_hwm);
+          memmove(previous + 1, previous, IN_UCHARS(len));
           code++;
           if (repeat_max == 0)
             {
@@ -4884,8 +5154,8 @@ for (;; ptr++)
           {
           int offset;
           *code = OP_END;
-          adjust_recurse(previous, 2 + LINK_SIZE, utf8, cd, save_hwm);
-          memmove(previous + 2 + LINK_SIZE, previous, len);
+          adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, save_hwm);
+          memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len));
           code += 2 + LINK_SIZE;
           *previous++ = OP_BRAZERO + repeat_type;
           *previous++ = OP_BRA;
@@ -4938,13 +5208,13 @@ for (;; ptr++)
 
           else
             {
-            if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;
+            if (groupsetfirstchar && reqchar < 0) reqchar = firstchar;
 
             for (i = 1; i < repeat_min; i++)
               {
-              uschar *hc;
-              uschar *this_hwm = cd->hwm;
-              memcpy(code, previous, len);
+              pcre_uchar *hc;
+              pcre_uchar *this_hwm = cd->hwm;
+              memcpy(code, previous, IN_UCHARS(len));
 
               while (cd->hwm > cd->start_workspace + cd->workspace_size -
                      WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm))
@@ -4953,8 +5223,8 @@ for (;; ptr++)
                 int this_offset = this_hwm - cd->start_workspace;
                 *errorcodeptr = expand_workspace(cd);
                 if (*errorcodeptr != 0) goto FAILED;
-                save_hwm = (uschar *)cd->start_workspace + save_offset;
-                this_hwm = (uschar *)cd->start_workspace + this_offset;
+                save_hwm = (pcre_uchar *)cd->start_workspace + save_offset;
+                this_hwm = (pcre_uchar *)cd->start_workspace + this_offset;
                 }
 
               for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
@@ -5006,8 +5276,8 @@ for (;; ptr++)
 
         else for (i = repeat_max - 1; i >= 0; i--)
           {
-          uschar *hc;
-          uschar *this_hwm = cd->hwm;
+          pcre_uchar *hc;
+          pcre_uchar *this_hwm = cd->hwm;
 
           *code++ = OP_BRAZERO + repeat_type;
 
@@ -5023,7 +5293,7 @@ for (;; ptr++)
             PUTINC(code, 0, offset);
             }
 
-          memcpy(code, previous, len);
+          memcpy(code, previous, IN_UCHARS(len));
 
           /* Ensure there is enough workspace for forward references before
           copying them. */
@@ -5035,8 +5305,8 @@ for (;; ptr++)
             int this_offset = this_hwm - cd->start_workspace;
             *errorcodeptr = expand_workspace(cd);
             if (*errorcodeptr != 0) goto FAILED;
-            save_hwm = (uschar *)cd->start_workspace + save_offset;
-            this_hwm = (uschar *)cd->start_workspace + this_offset;
+            save_hwm = (pcre_uchar *)cd->start_workspace + save_offset;
+            this_hwm = (pcre_uchar *)cd->start_workspace + this_offset;
             }
 
           for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
@@ -5055,7 +5325,7 @@ for (;; ptr++)
           {
           int oldlinkoffset;
           int offset = (int)(code - bralink + 1);
-          uschar *bra = code - offset;
+          pcre_uchar *bra = code - offset;
           oldlinkoffset = GET(bra, 1);
           bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
           *code++ = OP_KET;
@@ -5091,8 +5361,8 @@ for (;; ptr++)
 
       else
         {
-        uschar *ketcode = code - 1 - LINK_SIZE;
-        uschar *bracode = ketcode - GET(ketcode, 1);
+        pcre_uchar *ketcode = code - 1 - LINK_SIZE;
+        pcre_uchar *bracode = ketcode - GET(ketcode, 1);
 
         /* Convert possessive ONCE brackets to non-capturing */
 
@@ -5114,10 +5384,10 @@ for (;; ptr++)
 
           if (lengthptr == NULL)
             {
-            uschar *scode = bracode;
+            pcre_uchar *scode = bracode;
             do
               {
-              if (could_be_empty_branch(scode, ketcode, utf8, cd))
+              if (could_be_empty_branch(scode, ketcode, utf, cd))
                 {
                 *bracode += OP_SBRA - OP_BRA;
                 break;
@@ -5140,8 +5410,8 @@ for (;; ptr++)
               {
               int nlen = (int)(code - bracode);
               *code = OP_END;
-              adjust_recurse(bracode, 1 + LINK_SIZE, utf8, cd, save_hwm);
-              memmove(bracode + 1+LINK_SIZE, bracode, nlen);
+              adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, save_hwm);
+              memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen));
               code += 1 + LINK_SIZE;
               nlen += 1 + LINK_SIZE;
               *bracode = OP_BRAPOS;
@@ -5210,15 +5480,16 @@ for (;; ptr++)
       int len;
 
       if (*tempcode == OP_TYPEEXACT)
-        tempcode += _pcre_OP_lengths[*tempcode] +
-          ((tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP)? 2 : 0);
+        tempcode += PRIV(OP_lengths)[*tempcode] +
+          ((tempcode[1 + IMM2_SIZE] == OP_PROP
+          || tempcode[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);
 
       else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)
         {
-        tempcode += _pcre_OP_lengths[*tempcode];
-#ifdef SUPPORT_UTF8
-        if (utf8 && tempcode[-1] >= 0xc0)
-          tempcode += _pcre_utf8_table4[tempcode[-1] & 0x3f];
+        tempcode += PRIV(OP_lengths)[*tempcode];
+#ifdef SUPPORT_UTF
+        if (utf && HAS_EXTRALEN(tempcode[-1]))
+          tempcode += GET_EXTRALEN(tempcode[-1]);
 #endif
         }
 
@@ -5255,8 +5526,8 @@ for (;; ptr++)
 
         default:
         *code = OP_END;
-        adjust_recurse(tempcode, 1 + LINK_SIZE, utf8, cd, save_hwm);
-        memmove(tempcode + 1+LINK_SIZE, tempcode, len);
+        adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm);
+        memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
         code += 1 + LINK_SIZE;
         len += 1 + LINK_SIZE;
         tempcode[0] = OP_ONCE;
@@ -5268,7 +5539,7 @@ for (;; ptr++)
       }
 
     /* In all case we no longer have a previous item. We also set the
-    "follows varying string" flag for subsequently encountered reqbytes if
+    "follows varying string" flag for subsequently encountered reqchars if
     it isn't already set and we have just passed a varying length item. */
 
     END_REPEAT:
@@ -5291,16 +5562,18 @@ for (;; ptr++)
 
     /* First deal with various "verbs" that can be introduced by '*'. */
 
-    if (*(++ptr) == CHAR_ASTERISK &&
-         ((cd->ctypes[ptr[1]] & ctype_letter) != 0 || ptr[1] == ':'))
+    ptr++;
+    if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':'
+         || (MAX_255(ptr[1]) && ((cd->ctypes[ptr[1]] & ctype_letter) != 0))))
       {
       int i, namelen;
       int arglen = 0;
       const char *vn = verbnames;
-      const uschar *name = ptr + 1;
-      const uschar *arg = NULL;
+      const pcre_uchar *name = ptr + 1;
+      const pcre_uchar *arg = NULL;
       previous = NULL;
-      while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
+      ptr++;
+      while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;
       namelen = (int)(ptr - name);
 
       /* It appears that Perl allows any characters whatsoever, other than
@@ -5325,7 +5598,7 @@ for (;; ptr++)
       for (i = 0; i < verbcount; i++)
         {
         if (namelen == verbs[i].len &&
-            strncmp((char *)name, vn, namelen) == 0)
+            STRNCMP_UC_C8(name, vn, namelen) == 0)
           {
           /* Check for open captures before ACCEPT and convert it to
           ASSERT_ACCEPT if in an assertion. */
@@ -5346,8 +5619,8 @@ for (;; ptr++)
               }
             *code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
 
-            /* Do not set firstbyte after *ACCEPT */
-            if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
+            /* Do not set firstchar after *ACCEPT */
+            if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
             }
 
           /* Handle other cases with/without an argument */
@@ -5373,7 +5646,7 @@ for (;; ptr++)
             *code = verbs[i].op_arg;
             if (*code++ == OP_THEN_ARG) cd->external_flags |= PCRE_HASTHEN;
             *code++ = arglen;
-            memcpy(code, arg, arglen);
+            memcpy(code, arg, IN_UCHARS(arglen));
             code += arglen;
             *code++ = 0;
             }
@@ -5396,8 +5669,8 @@ for (;; ptr++)
       {
       int i, set, unset, namelen;
       int *optset;
-      const uschar *name;
-      uschar *slot;
+      const pcre_uchar *name;
+      pcre_uchar *slot;
 
       switch (*(++ptr))
         {
@@ -5450,10 +5723,10 @@ for (;; ptr++)
           break;
 
         /* Most other conditions use OP_CREF (a couple change to OP_RREF
-        below), and all need to skip 3 bytes at the start of the group. */
+        below), and all need to skip 1+IMM2_SIZE bytes at the start of the group. */
 
         code[1+LINK_SIZE] = OP_CREF;
-        skipbytes = 3;
+        skipbytes = 1+IMM2_SIZE;
         refsign = -1;
 
         /* Check for a test for recursion in a named group. */
@@ -5486,7 +5759,7 @@ for (;; ptr++)
 
         /* We now expect to read a name; any thing else is an error */
 
-        if ((cd->ctypes[ptr[1]] & ctype_word) == 0)
+        if (!MAX_255(ptr[1]) || (cd->ctypes[ptr[1]] & ctype_word) == 0)
           {
           ptr += 1;  /* To get the right offset */
           *errorcodeptr = ERR28;
@@ -5497,11 +5770,10 @@ for (;; ptr++)
 
         recno = 0;
         name = ++ptr;
-        while ((cd->ctypes[*ptr] & ctype_word) != 0)
+        while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0)
           {
           if (recno >= 0)
-            recno = ((digitab[*ptr] & ctype_digit) != 0)?
-              recno * 10 + *ptr - CHAR_0 : -1;
+            recno = (IS_DIGIT(*ptr))? recno * 10 + *ptr - CHAR_0 : -1;
           ptr++;
           }
         namelen = (int)(ptr - name);
@@ -5549,7 +5821,7 @@ for (;; ptr++)
         slot = cd->name_table;
         for (i = 0; i < cd->names_found; i++)
           {
-          if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break;
+          if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0) break;
           slot += cd->name_entry_size;
           }
 
@@ -5565,7 +5837,7 @@ for (;; ptr++)
         /* Search the pattern for a forward reference */
 
         else if ((i = find_parens(cd, name, namelen,
-                        (options & PCRE_EXTENDED) != 0, utf8)) > 0)
+                        (options & PCRE_EXTENDED) != 0, utf)) > 0)
           {
           PUT2(code, 2+LINK_SIZE, i);
           code[1+LINK_SIZE]++;
@@ -5591,7 +5863,7 @@ for (;; ptr++)
           recno = 0;
           for (i = 1; i < namelen; i++)
             {
-            if ((digitab[name[i]] & ctype_digit) == 0)
+            if (!IS_DIGIT(name[i]))
               {
               *errorcodeptr = ERR15;
               goto FAILED;
@@ -5606,7 +5878,7 @@ for (;; ptr++)
         /* Similarly, check for the (?(DEFINE) "condition", which is always
         false. */
 
-        else if (namelen == 6 && strncmp((char *)name, STRING_DEFINE, 6) == 0)
+        else if (namelen == 6 && STRNCMP_UC_C8(name, STRING_DEFINE, 6) == 0)
           {
           code[1+LINK_SIZE] = OP_DEF;
           skipbytes = 1;
@@ -5669,7 +5941,8 @@ for (;; ptr++)
           break;
 
           default:                /* Could be name define, else bad */
-          if ((cd->ctypes[ptr[1]] & ctype_word) != 0) goto DEFINE_NAME;
+          if (MAX_255(ptr[1]) && (cd->ctypes[ptr[1]] & ctype_word) != 0)
+            goto DEFINE_NAME;
           ptr++;                  /* Correct offset for error */
           *errorcodeptr = ERR24;
           goto FAILED;
@@ -5691,8 +5964,9 @@ for (;; ptr++)
         *code++ = OP_CALLOUT;
           {
           int n = 0;
-          while ((digitab[*(++ptr)] & ctype_digit) != 0)
-            n = n * 10 + *ptr - CHAR_0;
+          ptr++;
+          while(IS_DIGIT(*ptr))
+            n = n * 10 + *ptr++ - CHAR_0;
           if (*ptr != CHAR_RIGHT_PARENTHESIS)
             {
             *errorcodeptr = ERR39;
@@ -5737,7 +6011,7 @@ for (;; ptr++)
             CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
           name = ++ptr;
 
-          while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
+          while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
           namelen = (int)(ptr - name);
 
           /* In the pre-compile phase, just do a syntax check. */
@@ -5754,9 +6028,9 @@ for (;; ptr++)
               *errorcodeptr = ERR49;
               goto FAILED;
               }
-            if (namelen + 3 > cd->name_entry_size)
+            if (namelen + IMM2_SIZE + 1 > cd->name_entry_size)
               {
-              cd->name_entry_size = namelen + 3;
+              cd->name_entry_size = namelen + IMM2_SIZE + 1;
               if (namelen > MAX_NAME_SIZE)
                 {
                 *errorcodeptr = ERR48;
@@ -5785,10 +6059,10 @@ for (;; ptr++)
 
             for (i = 0; i < cd->names_found; i++)
               {
-              int crc = memcmp(name, slot+2, namelen);
+              int crc = memcmp(name, slot+IMM2_SIZE, IN_UCHARS(namelen));
               if (crc == 0)
                 {
-                if (slot[2+namelen] == 0)
+                if (slot[IMM2_SIZE+namelen] == 0)
                   {
                   if (GET2(slot, 0) != cd->bracount + 1 &&
                       (options & PCRE_DUPNAMES) == 0)
@@ -5809,7 +6083,7 @@ for (;; ptr++)
               if (crc < 0)
                 {
                 memmove(slot + cd->name_entry_size, slot,
-                  (cd->names_found - i) * cd->name_entry_size);
+                  IN_UCHARS((cd->names_found - i) * cd->name_entry_size));
                 break;
                 }
 
@@ -5823,7 +6097,7 @@ for (;; ptr++)
 
             if (!dupname)
               {
-              uschar *cslot = cd->name_table;
+              pcre_uchar *cslot = cd->name_table;
               for (i = 0; i < cd->names_found; i++)
                 {
                 if (cslot != slot)
@@ -5840,8 +6114,8 @@ for (;; ptr++)
               }
 
             PUT2(slot, 0, cd->bracount + 1);
-            memcpy(slot + 2, name, namelen);
-            slot[2+namelen] = 0;
+            memcpy(slot + IMM2_SIZE, name, IN_UCHARS(namelen));
+            slot[IMM2_SIZE + namelen] = 0;
             }
           }
 
@@ -5867,7 +6141,7 @@ for (;; ptr++)
 
         NAMED_REF_OR_RECURSE:
         name = ++ptr;
-        while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
+        while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
         namelen = (int)(ptr - name);
 
         /* In the pre-compile phase, do a syntax check. We used to just set
@@ -5879,7 +6153,7 @@ for (;; ptr++)
 
         if (lengthptr != NULL)
           {
-          const uschar *temp;
+          const pcre_uchar *temp;
 
           if (namelen == 0)
             {
@@ -5909,7 +6183,7 @@ for (;; ptr++)
           temp = cd->end_pattern;
           cd->end_pattern = ptr;
           recno = find_parens(cd, name, namelen,
-            (options & PCRE_EXTENDED) != 0, utf8);
+            (options & PCRE_EXTENDED) != 0, utf);
           cd->end_pattern = temp;
           if (recno < 0) recno = 0;    /* Forward ref; set dummy number */
           }
@@ -5924,8 +6198,8 @@ for (;; ptr++)
           slot = cd->name_table;
           for (i = 0; i < cd->names_found; i++)
             {
-            if (strncmp((char *)name, (char *)slot+2, namelen) == 0 &&
-                slot[2+namelen] == 0)
+            if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0 &&
+                slot[IMM2_SIZE+namelen] == 0)
               break;
             slot += cd->name_entry_size;
             }
@@ -5936,7 +6210,7 @@ for (;; ptr++)
             }
           else if ((recno =                /* Forward back reference */
                     find_parens(cd, name, namelen,
-                      (options & PCRE_EXTENDED) != 0, utf8)) <= 0)
+                      (options & PCRE_EXTENDED) != 0, utf)) <= 0)
             {
             *errorcodeptr = ERR15;
             goto FAILED;
@@ -5961,7 +6235,7 @@ for (;; ptr++)
         case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:
         case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
           {
-          const uschar *called;
+          const pcre_uchar *called;
           terminator = CHAR_RIGHT_PARENTHESIS;
 
           /* Come here from the \g<...> and \g'...' code (Oniguruma
@@ -5975,7 +6249,7 @@ for (;; ptr++)
           if ((refsign = *ptr) == CHAR_PLUS)
             {
             ptr++;
-            if ((digitab[*ptr] & ctype_digit) == 0)
+            if (!IS_DIGIT(*ptr))
               {
               *errorcodeptr = ERR63;
               goto FAILED;
@@ -5983,13 +6257,13 @@ for (;; ptr++)
             }
           else if (refsign == CHAR_MINUS)
             {
-            if ((digitab[ptr[1]] & ctype_digit) == 0)
+            if (!IS_DIGIT(ptr[1]))
               goto OTHER_CHAR_AFTER_QUERY;
             ptr++;
             }
 
           recno = 0;
-          while((digitab[*ptr] & ctype_digit) != 0)
+          while(IS_DIGIT(*ptr))
             recno = recno * 10 + *ptr++ - CHAR_0;
 
           if (*ptr != terminator)
@@ -6040,14 +6314,14 @@ for (;; ptr++)
             {
             *code = OP_END;
             if (recno != 0)
-              called = _pcre_find_bracket(cd->start_code, utf8, recno);
+              called = PRIV(find_bracket)(cd->start_code, utf, recno);
 
             /* Forward reference */
 
             if (called == NULL)
               {
               if (find_parens(cd, NULL, recno,
-                    (options & PCRE_EXTENDED) != 0, utf8) < 0)
+                    (options & PCRE_EXTENDED) != 0, utf) < 0)
                 {
                 *errorcodeptr = ERR15;
                 goto FAILED;
@@ -6077,7 +6351,7 @@ for (;; ptr++)
             conditional subpatterns will be picked up then. */
 
             else if (GET(called, 1) == 0 && cond_depth <= 0 &&
-                     could_be_empty(called, code, bcptr, utf8, cd))
+                     could_be_empty(called, code, bcptr, utf, cd))
               {
               *errorcodeptr = ERR40;
               goto FAILED;
@@ -6085,18 +6359,18 @@ for (;; ptr++)
             }
 
           /* Insert the recursion/subroutine item. It does not have a set first
-          byte (relevant if it is repeated, because it will then be wrapped
-          with ONCE brackets). */
+          character (relevant if it is repeated, because it will then be
+          wrapped with ONCE brackets). */
 
           *code = OP_RECURSE;
           PUT(code, 1, (int)(called - cd->start_code));
           code += 1 + LINK_SIZE;
-          groupsetfirstbyte = FALSE;
+          groupsetfirstchar = FALSE;
           }
 
         /* Can't determine a first byte now */
 
-        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
+        if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
         continue;
 
 
@@ -6153,7 +6427,7 @@ for (;; ptr++)
         both phases.
 
         If we are not at the pattern start, reset the greedy defaults and the
-        case value for firstbyte and reqbyte. */
+        case value for firstchar and reqchar. */
 
         if (*ptr == CHAR_RIGHT_PARENTHESIS)
           {
@@ -6166,7 +6440,7 @@ for (;; ptr++)
             {
             greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
             greedy_non_default = greedy_default ^ 1;
-            req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
+            req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
             }
 
           /* Change options at this level, and pass them back for use
@@ -6203,7 +6477,7 @@ for (;; ptr++)
       NUMBERED_GROUP:
       cd->bracount += 1;
       PUT2(code, 1+LINK_SIZE, cd->bracount);
-      skipbytes = 2;
+      skipbytes = IMM2_SIZE;
       }
 
     /* Process nested bracketed regex. Assertions used not to be repeatable,
@@ -6229,8 +6503,8 @@ for (;; ptr++)
          skipbytes,                       /* Skip over bracket number */
          cond_depth +
            ((bravalue == OP_COND)?1:0),   /* Depth of condition subpatterns */
-         &subfirstbyte,                   /* For possible first char */
-         &subreqbyte,                     /* For possible last char */
+         &subfirstchar,                   /* For possible first char */
+         &subreqchar,                     /* For possible last char */
          bcptr,                           /* Current branch chain */
          cd,                              /* Tables block */
          (lengthptr == NULL)? NULL :      /* Actual compile phase */
@@ -6258,7 +6532,7 @@ for (;; ptr++)
 
     if (bravalue == OP_COND && lengthptr == NULL)
       {
-      uschar *tc = code;
+      pcre_uchar *tc = code;
       int condcount = 0;
 
       do {
@@ -6281,7 +6555,7 @@ for (;; ptr++)
         }
 
       /* A "normal" conditional group. If there is just one branch, we must not
-      make use of its firstbyte or reqbyte, because this is equivalent to an
+      make use of its firstchar or reqchar, because this is equivalent to an
       empty second branch. */
 
       else
@@ -6291,7 +6565,7 @@ for (;; ptr++)
           *errorcodeptr = ERR27;
           goto FAILED;
           }
-        if (condcount == 1) subfirstbyte = subreqbyte = REQ_NONE;
+        if (condcount == 1) subfirstchar = subreqchar = REQ_NONE;
         }
       }
 
@@ -6335,55 +6609,55 @@ for (;; ptr++)
     /* Handle updating of the required and first characters for other types of
     group. Update for normal brackets of all kinds, and conditions with two
     branches (see code above). If the bracket is followed by a quantifier with
-    zero repeat, we have to back off. Hence the definition of zeroreqbyte and
-    zerofirstbyte outside the main loop so that they can be accessed for the
+    zero repeat, we have to back off. Hence the definition of zeroreqchar and
+    zerofirstchar outside the main loop so that they can be accessed for the
     back off. */
 
-    zeroreqbyte = reqbyte;
-    zerofirstbyte = firstbyte;
-    groupsetfirstbyte = FALSE;
+    zeroreqchar = reqchar;
+    zerofirstchar = firstchar;
+    groupsetfirstchar = FALSE;
 
     if (bravalue >= OP_ONCE)
       {
-      /* If we have not yet set a firstbyte in this branch, take it from the
+      /* If we have not yet set a firstchar in this branch, take it from the
       subpattern, remembering that it was set here so that a repeat of more
-      than one can replicate it as reqbyte if necessary. If the subpattern has
-      no firstbyte, set "none" for the whole branch. In both cases, a zero
-      repeat forces firstbyte to "none". */
+      than one can replicate it as reqchar if necessary. If the subpattern has
+      no firstchar, set "none" for the whole branch. In both cases, a zero
+      repeat forces firstchar to "none". */
 
-      if (firstbyte == REQ_UNSET)
+      if (firstchar == REQ_UNSET)
         {
-        if (subfirstbyte >= 0)
+        if (subfirstchar >= 0)
           {
-          firstbyte = subfirstbyte;
-          groupsetfirstbyte = TRUE;
+          firstchar = subfirstchar;
+          groupsetfirstchar = TRUE;
           }
-        else firstbyte = REQ_NONE;
-        zerofirstbyte = REQ_NONE;
+        else firstchar = REQ_NONE;
+        zerofirstchar = REQ_NONE;
         }
 
-      /* If firstbyte was previously set, convert the subpattern's firstbyte
-      into reqbyte if there wasn't one, using the vary flag that was in
+      /* If firstchar was previously set, convert the subpattern's firstchar
+      into reqchar if there wasn't one, using the vary flag that was in
       existence beforehand. */
 
-      else if (subfirstbyte >= 0 && subreqbyte < 0)
-        subreqbyte = subfirstbyte | tempreqvary;
+      else if (subfirstchar >= 0 && subreqchar < 0)
+        subreqchar = subfirstchar | tempreqvary;
 
       /* If the subpattern set a required byte (or set a first byte that isn't
       really the first byte - see above), set it. */
 
-      if (subreqbyte >= 0) reqbyte = subreqbyte;
+      if (subreqchar >= 0) reqchar = subreqchar;
       }
 
-    /* For a forward assertion, we take the reqbyte, if set. This can be
+    /* For a forward assertion, we take the reqchar, if set. This can be
     helpful if the pattern that follows the assertion doesn't set a different
-    char. For example, it's useful for /(?=abcde).+/. We can't set firstbyte
+    char. For example, it's useful for /(?=abcde).+/. We can't set firstchar
     for an assertion, however because it leads to incorrect effect for patterns
-    such as /(?=a)a.+/ when the "real" "a" would then become a reqbyte instead
-    of a firstbyte. This is overcome by a scan at the end if there's no
-    firstbyte, looking for an asserted first char. */
+    such as /(?=a)a.+/ when the "real" "a" would then become a reqchar instead
+    of a firstchar. This is overcome by a scan at the end if there's no
+    firstchar, looking for an asserted first char. */
 
-    else if (bravalue == OP_ASSERT && subreqbyte >= 0) reqbyte = subreqbyte;
+    else if (bravalue == OP_ASSERT && subreqchar >= 0) reqchar = subreqchar;
     break;     /* End of processing '(' */
 
 
@@ -6416,13 +6690,13 @@ for (;; ptr++)
       /* For metasequences that actually match a character, we disable the
       setting of a first character if it hasn't already been set. */
 
-      if (firstbyte == REQ_UNSET && -c > ESC_b && -c < ESC_Z)
-        firstbyte = REQ_NONE;
+      if (firstchar == REQ_UNSET && -c > ESC_b && -c < ESC_Z)
+        firstchar = REQ_NONE;
 
       /* Set values to reset to if this is followed by a zero repeat. */
 
-      zerofirstbyte = firstbyte;
-      zeroreqbyte = reqbyte;
+      zerofirstchar = firstchar;
+      zeroreqchar = reqchar;
 
       /* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n'
       is a subroutine call by number (Oniguruma syntax). In fact, the value
@@ -6433,7 +6707,7 @@ for (;; ptr++)
 
       if (-c == ESC_g)
         {
-        const uschar *p;
+        const pcre_uchar *p;
         save_hwm = cd->hwm;   /* Normally this is set when '(' is read */
         terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
           CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
@@ -6450,10 +6724,11 @@ for (;; ptr++)
 
         if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS)
           {
-          BOOL isnumber = TRUE;
+          BOOL is_a_number = TRUE;
           for (p = ptr + 1; *p != 0 && *p != terminator; p++)
             {
-            if ((cd->ctypes[*p] & ctype_digit) == 0) isnumber = FALSE;
+            if (!MAX_255(*p)) { is_a_number = FALSE; break; }
+            if ((cd->ctypes[*p] & ctype_digit) == 0) is_a_number = FALSE;
             if ((cd->ctypes[*p] & ctype_word) == 0) break;
             }
           if (*p != terminator)
@@ -6461,7 +6736,7 @@ for (;; ptr++)
             *errorcodeptr = ERR57;
             break;
             }
-          if (isnumber)
+          if (is_a_number)
             {
             ptr++;
             goto HANDLE_NUMERICAL_RECURSION;
@@ -6473,7 +6748,7 @@ for (;; ptr++)
         /* Test a signed number in angle brackets or quotes. */
 
         p = ptr + 2;
-        while ((digitab[*p] & ctype_digit) != 0) p++;
+        while (IS_DIGIT(*p)) p++;
         if (*p != terminator)
           {
           *errorcodeptr = ERR57;
@@ -6501,7 +6776,7 @@ for (;; ptr++)
         goto NAMED_REF_OR_RECURSE;
         }
 
-      /* Back references are handled specially; must disable firstbyte if
+      /* Back references are handled specially; must disable firstchar if
       not set to cope with cases like (?=(\w+))\1: which would otherwise set
       ':' later. */
 
@@ -6511,7 +6786,7 @@ for (;; ptr++)
         recno = -c - ESC_REF;
 
         HANDLE_REFERENCE:    /* Come here from named backref handling */
-        if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
+        if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
         previous = code;
         *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;
         PUT2INC(code, 0, recno);
@@ -6578,7 +6853,7 @@ for (;; ptr++)
 
           {
           previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
-          *code++ = (!utf8 && c == -ESC_C)? OP_ALLANY : -c;
+          *code++ = (!utf && c == -ESC_C)? OP_ALLANY : -c;
           }
         }
       continue;
@@ -6588,9 +6863,9 @@ for (;; ptr++)
     a value > 127. We set its representation in the length/buffer, and then
     handle it as a data character. */
 
-#ifdef SUPPORT_UTF8
-    if (utf8 && c > 127)
-      mclength = _pcre_ord2utf8(c, mcbuffer);
+#ifdef SUPPORT_UTF
+    if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR)
+      mclength = PRIV(ord2utf)(c, mcbuffer);
     else
 #endif
 
@@ -6611,12 +6886,9 @@ for (;; ptr++)
     mclength = 1;
     mcbuffer[0] = c;
 
-#ifdef SUPPORT_UTF8
-    if (utf8 && c >= 0xc0)
-      {
-      while ((ptr[1] & 0xc0) == 0x80)
-        mcbuffer[mclength++] = *(++ptr);
-      }
+#ifdef SUPPORT_UTF
+    if (utf && HAS_EXTRALEN(c))
+      ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr));
 #endif
 
     /* At this point we have the character's bytes in mcbuffer, and the length
@@ -6634,34 +6906,34 @@ for (;; ptr++)
 
     /* Set the first and required bytes appropriately. If no previous first
     byte, set it from this character, but revert to none on a zero repeat.
-    Otherwise, leave the firstbyte value alone, and don't change it on a zero
+    Otherwise, leave the firstchar value alone, and don't change it on a zero
     repeat. */
 
-    if (firstbyte == REQ_UNSET)
+    if (firstchar == REQ_UNSET)
       {
-      zerofirstbyte = REQ_NONE;
-      zeroreqbyte = reqbyte;
+      zerofirstchar = REQ_NONE;
+      zeroreqchar = reqchar;
 
-      /* If the character is more than one byte long, we can set firstbyte
+      /* If the character is more than one byte long, we can set firstchar
       only if it is not to be matched caselessly. */
 
       if (mclength == 1 || req_caseopt == 0)
         {
-        firstbyte = mcbuffer[0] | req_caseopt;
-        if (mclength != 1) reqbyte = code[-1] | cd->req_varyopt;
+        firstchar = mcbuffer[0] | req_caseopt;
+        if (mclength != 1) reqchar = code[-1] | cd->req_varyopt;
         }
-      else firstbyte = reqbyte = REQ_NONE;
+      else firstchar = reqchar = REQ_NONE;
       }
 
-    /* firstbyte was previously set; we can set reqbyte only if the length is
+    /* firstchar was previously set; we can set reqchar only if the length is
     1 or the matching is caseful. */
 
     else
       {
-      zerofirstbyte = firstbyte;
-      zeroreqbyte = reqbyte;
+      zerofirstchar = firstchar;
+      zeroreqchar = reqchar;
       if (mclength == 1 || req_caseopt == 0)
-        reqbyte = code[-1] | req_caseopt | cd->req_varyopt;
+        reqchar = code[-1] | req_caseopt | cd->req_varyopt;
       }
 
     break;            /* End of literal character handling */
@@ -6701,8 +6973,8 @@ Arguments:
   reset_bracount TRUE to reset the count for each branch
   skipbytes      skip this many bytes at start (for brackets and OP_COND)
   cond_depth     depth of nesting for conditional subpatterns
-  firstbyteptr   place to put the first required character, or a negative number
-  reqbyteptr     place to put the last required character, or a negative number
+  firstcharptr   place to put the first required character, or a negative number
+  reqcharptr     place to put the last required character, or a negative number
   bcptr          pointer to the chain of currently open branches
   cd             points to the data block with tables pointers etc.
   lengthptr      NULL during the real compile phase
@@ -6712,20 +6984,20 @@ Returns:         TRUE on success
 */
 
 static BOOL
-compile_regex(int options, uschar **codeptr, const uschar **ptrptr,
+compile_regex(int options, pcre_uchar **codeptr, const pcre_uchar **ptrptr,
   int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes,
-  int cond_depth, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr,
-  compile_data *cd, int *lengthptr)
+  int cond_depth, pcre_int32 *firstcharptr, pcre_int32 *reqcharptr,
+  branch_chain *bcptr, compile_data *cd, int *lengthptr)
 {
-const uschar *ptr = *ptrptr;
-uschar *code = *codeptr;
-uschar *last_branch = code;
-uschar *start_bracket = code;
-uschar *reverse_count = NULL;
+const pcre_uchar *ptr = *ptrptr;
+pcre_uchar *code = *codeptr;
+pcre_uchar *last_branch = code;
+pcre_uchar *start_bracket = code;
+pcre_uchar *reverse_count = NULL;
 open_capitem capitem;
 int capnumber = 0;
-int firstbyte, reqbyte;
-int branchfirstbyte, branchreqbyte;
+pcre_int32 firstchar, reqchar;
+pcre_int32 branchfirstchar, branchreqchar;
 int length;
 int orig_bracount;
 int max_bracount;
@@ -6734,7 +7006,7 @@ branch_chain bc;
 bc.outer = bcptr;
 bc.current_branch = code;
 
-firstbyte = reqbyte = REQ_UNSET;
+firstchar = reqchar = REQ_UNSET;
 
 /* Accumulate the length for use in the pre-compile phase. Start with the
 length of the BRA and KET and any extra bytes that are required at the
@@ -6793,8 +7065,8 @@ for (;;)
   /* Now compile the branch; in the pre-compile phase its length gets added
   into the length. */
 
-  if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstbyte,
-        &branchreqbyte, &bc, cond_depth, cd,
+  if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstchar,
+        &branchreqchar, &bc, cond_depth, cd,
         (lengthptr == NULL)? NULL : &length))
     {
     *ptrptr = ptr;
@@ -6810,43 +7082,43 @@ for (;;)
 
   if (lengthptr == NULL)
     {
-    /* If this is the first branch, the firstbyte and reqbyte values for the
+    /* If this is the first branch, the firstchar and reqchar values for the
     branch become the values for the regex. */
 
     if (*last_branch != OP_ALT)
       {
-      firstbyte = branchfirstbyte;
-      reqbyte = branchreqbyte;
+      firstchar = branchfirstchar;
+      reqchar = branchreqchar;
       }
 
-    /* If this is not the first branch, the first char and reqbyte have to
+    /* If this is not the first branch, the first char and reqchar have to
     match the values from all the previous branches, except that if the
-    previous value for reqbyte didn't have REQ_VARY set, it can still match,
+    previous value for reqchar didn't have REQ_VARY set, it can still match,
     and we set REQ_VARY for the regex. */
 
     else
       {
-      /* If we previously had a firstbyte, but it doesn't match the new branch,
-      we have to abandon the firstbyte for the regex, but if there was
-      previously no reqbyte, it takes on the value of the old firstbyte. */
+      /* If we previously had a firstchar, but it doesn't match the new branch,
+      we have to abandon the firstchar for the regex, but if there was
+      previously no reqchar, it takes on the value of the old firstchar. */
 
-      if (firstbyte >= 0 && firstbyte != branchfirstbyte)
+      if (firstchar >= 0 && firstchar != branchfirstchar)
         {
-        if (reqbyte < 0) reqbyte = firstbyte;
-        firstbyte = REQ_NONE;
+        if (reqchar < 0) reqchar = firstchar;
+        firstchar = REQ_NONE;
         }
 
-      /* If we (now or from before) have no firstbyte, a firstbyte from the
-      branch becomes a reqbyte if there isn't a branch reqbyte. */
+      /* If we (now or from before) have no firstchar, a firstchar from the
+      branch becomes a reqchar if there isn't a branch reqchar. */
 
-      if (firstbyte < 0 && branchfirstbyte >= 0 && branchreqbyte < 0)
-          branchreqbyte = branchfirstbyte;
+      if (firstchar < 0 && branchfirstchar >= 0 && branchreqchar < 0)
+          branchreqchar = branchfirstchar;
 
-      /* Now ensure that the reqbytes match */
+      /* Now ensure that the reqchars match */
 
-      if ((reqbyte & ~REQ_VARY) != (branchreqbyte & ~REQ_VARY))
-        reqbyte = REQ_NONE;
-      else reqbyte |= branchreqbyte;   /* To "or" REQ_VARY */
+      if ((reqchar & ~REQ_VARY) != (branchreqchar & ~REQ_VARY))
+        reqchar = REQ_NONE;
+      else reqchar |= branchreqchar;   /* To "or" REQ_VARY */
       }
 
     /* If lookbehind, check that this branch matches a fixed-length string, and
@@ -6916,7 +7188,7 @@ for (;;)
       if (cd->open_caps->flag)
         {
         memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
-          code - start_bracket);
+          IN_UCHARS(code - start_bracket));
         *start_bracket = OP_ONCE;
         code += 1 + LINK_SIZE;
         PUT(start_bracket, 1, (int)(code - start_bracket));
@@ -6936,8 +7208,8 @@ for (;;)
 
     *codeptr = code;
     *ptrptr = ptr;
-    *firstbyteptr = firstbyte;
-    *reqbyteptr = reqbyte;
+    *firstcharptr = firstchar;
+    *reqcharptr = reqchar;
     if (lengthptr != NULL)
       {
       if (OFLOW_MAX - *lengthptr < length)
@@ -7018,12 +7290,12 @@ Returns:     TRUE or FALSE
 */
 
 static BOOL
-is_anchored(register const uschar *code, unsigned int bracket_map,
+is_anchored(register const pcre_uchar *code, unsigned int bracket_map,
   unsigned int backref_map)
 {
 do {
-   const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],
-     FALSE);
+   const pcre_uchar *scode = first_significant_code(
+     code + PRIV(OP_lengths)[*code], FALSE);
    register int op = *scode;
 
    /* Non-capturing brackets */
@@ -7095,12 +7367,12 @@ Returns:         TRUE or FALSE
 */
 
 static BOOL
-is_startline(const uschar *code, unsigned int bracket_map,
+is_startline(const pcre_uchar *code, unsigned int bracket_map,
   unsigned int backref_map)
 {
 do {
-   const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code],
-     FALSE);
+   const pcre_uchar *scode = first_significant_code(
+     code + PRIV(OP_lengths)[*code], FALSE);
    register int op = *scode;
 
    /* If we are at the start of a conditional assertion group, *both* the
@@ -7111,7 +7383,7 @@ do {
    if (op == OP_COND)
      {
      scode += 1 + LINK_SIZE;
-     if (*scode == OP_CALLOUT) scode += _pcre_OP_lengths[OP_CALLOUT];
+     if (*scode == OP_CALLOUT) scode += PRIV(OP_lengths)[OP_CALLOUT];
      switch (*scode)
        {
        case OP_CREF:
@@ -7198,14 +7470,15 @@ Returns:     -1 or the fixed first char
 */
 
 static int
-find_firstassertedchar(const uschar *code, BOOL inassert)
+find_firstassertedchar(const pcre_uchar *code, BOOL inassert)
 {
 register int c = -1;
 do {
    int d;
    int xl = (*code == OP_CBRA || *code == OP_SCBRA ||
-             *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? 2:0;
-   const uschar *scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE);
+             *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0;
+   const pcre_uchar *scode = first_significant_code(code + 1+LINK_SIZE + xl,
+     TRUE);
    register int op = *scode;
 
    switch(op)
@@ -7229,7 +7502,7 @@ do {
      break;
 
      case OP_EXACT:
-     scode += 2;
+     scode += IMM2_SIZE;
      /* Fall through */
 
      case OP_CHAR:
@@ -7242,7 +7515,7 @@ do {
      break;
 
      case OP_EXACTI:
-     scode += 2;
+     scode += IMM2_SIZE;
      /* Fall through */
 
      case OP_CHARI:
@@ -7285,28 +7558,45 @@ Returns:        pointer to compiled data block, or NULL on error,
                 with errorptr and erroroffset set
 */
 
+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
 pcre_compile(const char *pattern, int options, const char **errorptr,
   int *erroroffset, const unsigned char *tables)
+#else
+PCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION
+pcre16_compile(PCRE_SPTR16 pattern, int options, const char **errorptr,
+  int *erroroffset, const unsigned char *tables)
+#endif
 {
+#ifdef COMPILE_PCRE8
 return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
+#else
+return pcre16_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
+#endif
 }
 
 
+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
 pcre_compile2(const char *pattern, int options, int *errorcodeptr,
   const char **errorptr, int *erroroffset, const unsigned char *tables)
+#else
+PCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION
+pcre16_compile2(PCRE_SPTR16 pattern, int options, int *errorcodeptr,
+  const char **errorptr, int *erroroffset, const unsigned char *tables)
+#endif
 {
-real_pcre *re;
+REAL_PCRE *re;
 int length = 1;  /* For final END opcode */
-int firstbyte, reqbyte, newline;
+pcre_int32 firstchar, reqchar;
+int newline;
 int errorcode = 0;
 int skipatstart = 0;
-BOOL utf8;
+BOOL utf;
 size_t size;
-uschar *code;
-const uschar *codestart;
-const uschar *ptr;
+pcre_uchar *code;
+const pcre_uchar *codestart;
+const pcre_uchar *ptr;
 compile_data compile_block;
 compile_data *cd = &compile_block;
 
@@ -7317,11 +7607,11 @@ this purpose. The same space is used in the second phase for remembering where
 to fill in forward references to subpatterns. That may overflow, in which case
 new memory is obtained from malloc(). */
 
-uschar cworkspace[COMPILE_WORK_SIZE];
+pcre_uchar cworkspace[COMPILE_WORK_SIZE];
 
 /* Set this early so that early errors get offset 0. */
 
-ptr = (const uschar *)pattern;
+ptr = (const pcre_uchar *)pattern;
 
 /* We can't pass back an error message if errorptr is NULL; I guess the best we
 can do is just return NULL, but we can set a code value if there is a code
@@ -7348,7 +7638,7 @@ if (erroroffset == NULL)
 
 /* Set up pointers to the individual character tables */
 
-if (tables == NULL) tables = _pcre_default_tables;
+if (tables == NULL) tables = PRIV(default_tables);
 cd->lcc = tables + lcc_offset;
 cd->fcc = tables + fcc_offset;
 cd->cbits = tables + cbits_offset;
@@ -7371,27 +7661,33 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
   int newnl = 0;
   int newbsr = 0;
 
-  if (strncmp((char *)(ptr+skipatstart+2), STRING_UTF8_RIGHTPAR, 5) == 0)
+#ifdef COMPILE_PCRE8
+  if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 5) == 0)
     { skipatstart += 7; options |= PCRE_UTF8; continue; }
-  else if (strncmp((char *)(ptr+skipatstart+2), STRING_UCP_RIGHTPAR, 4) == 0)
+#endif
+#ifdef COMPILE_PCRE16
+  if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 6) == 0)
+    { skipatstart += 8; options |= PCRE_UTF16; continue; }
+#endif
+  else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0)
     { skipatstart += 6; options |= PCRE_UCP; continue; }
-  else if (strncmp((char *)(ptr+skipatstart+2), STRING_NO_START_OPT_RIGHTPAR, 13) == 0)
+  else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)
     { skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; }
 
-  if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0)
+  if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0)
     { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
-  else if (strncmp((char *)(ptr+skipatstart+2), STRING_LF_RIGHTPAR, 3)  == 0)
+  else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3)  == 0)
     { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }
-  else if (strncmp((char *)(ptr+skipatstart+2), STRING_CRLF_RIGHTPAR, 5)  == 0)
+  else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CRLF_RIGHTPAR, 5)  == 0)
     { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
-  else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANY_RIGHTPAR, 4) == 0)
+  else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANY_RIGHTPAR, 4) == 0)
     { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }
-  else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANYCRLF_RIGHTPAR, 8) == 0)
+  else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANYCRLF_RIGHTPAR, 8) == 0)
     { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }
 
-  else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0)
+  else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0)
     { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }
-  else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_UNICODE_RIGHTPAR, 12) == 0)
+  else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_UNICODE_RIGHTPAR, 12) == 0)
     { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }
 
   if (newnl != 0)
@@ -7401,22 +7697,27 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
   else break;
   }
 
-utf8 = (options & PCRE_UTF8) != 0;
+/* PCRE_UTF16 has the same value as PCRE_UTF8. */
+utf = (options & PCRE_UTF8) != 0;
 
-/* Can't support UTF8 unless PCRE has been compiled to include the code. The
-return of an error code from _pcre_valid_utf8() is a new feature, introduced in
+/* Can't support UTF unless PCRE has been compiled to include the code. The
+return of an error code from PRIV(valid_utf)() is a new feature, introduced in
 release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is
 not used here. */
 
-#ifdef SUPPORT_UTF8
-if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
-     (errorcode = _pcre_valid_utf8((USPTR)pattern, -1, erroroffset)) != 0)
+#ifdef SUPPORT_UTF
+if (utf && (options & PCRE_NO_UTF8_CHECK) == 0 &&
+     (errorcode = PRIV(valid_utf)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0)
   {
+#ifdef COMPILE_PCRE8
   errorcode = ERR44;
+#else
+  errorcode = ERR74;
+#endif
   goto PCRE_EARLY_ERROR_RETURN2;
   }
 #else
-if (utf8)
+if (utf)
   {
   errorcode = ERR32;
   goto PCRE_EARLY_ERROR_RETURN;
@@ -7492,7 +7793,10 @@ cd->backref_map = 0;
 /* Reflect pattern for debugging output */
 
 DPRINTF(("------------------------------------------------------------------\n"));
-DPRINTF(("%s\n", pattern));
+#ifdef PCRE_DEBUG
+print_puchar(stdout, (PCRE_PUCHAR)pattern);
+#endif
+DPRINTF(("\n"));
 
 /* Pretend to compile the pattern while actually just accumulating the length
 of memory required. This behaviour is triggered by passing a non-NULL final
@@ -7509,9 +7813,10 @@ cd->start_code = cworkspace;
 cd->hwm = cworkspace;
 cd->start_workspace = cworkspace;
 cd->workspace_size = COMPILE_WORK_SIZE;
-cd->start_pattern = (const uschar *)pattern;
-cd->end_pattern = (const uschar *)(pattern + strlen(pattern));
+cd->start_pattern = (const pcre_uchar *)pattern;
+cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));
 cd->req_varyopt = 0;
+cd->assert_depth = 0;
 cd->external_options = options;
 cd->external_flags = 0;
 cd->open_caps = NULL;
@@ -7526,11 +7831,11 @@ ptr += skipatstart;
 code = cworkspace;
 *code = OP_BRA;
 (void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE,
-  FALSE, 0, 0, &firstbyte, &reqbyte, NULL, cd, &length);
+  FALSE, 0, 0, &firstchar, &reqchar, NULL, cd, &length);
 if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
 
 DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
-  cd->hwm - cworkspace));
+  (int)(cd->hwm - cworkspace)));
 
 if (length > MAX_PATTERN_SIZE)
   {
@@ -7543,8 +7848,8 @@ externally provided function. Integer overflow should no longer be possible
 because nowadays we limit the maximum value of cd->names_found and
 cd->name_entry_size. */
 
-size = length + sizeof(real_pcre) + cd->names_found * cd->name_entry_size;
-re = (real_pcre *)(pcre_malloc)(size);
+size = sizeof(REAL_PCRE) + (length + cd->names_found * cd->name_entry_size) * sizeof(pcre_uchar);
+re = (REAL_PCRE *)(PUBL(malloc))(size);
 
 if (re == NULL)
   {
@@ -7563,13 +7868,13 @@ re->size = (int)size;
 re->options = cd->external_options;
 re->flags = cd->external_flags;
 re->dummy1 = 0;
-re->first_byte = 0;
-re->req_byte = 0;
-re->name_table_offset = sizeof(real_pcre);
+re->first_char = 0;
+re->req_char = 0;
+re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);
 re->name_entry_size = cd->name_entry_size;
 re->name_count = cd->names_found;
 re->ref_count = 0;
-re->tables = (tables == _pcre_default_tables)? NULL : tables;
+re->tables = (tables == PRIV(default_tables))? NULL : tables;
 re->nullpad = NULL;
 
 /* The starting points of the name/number translation table and of the code are
@@ -7583,10 +7888,10 @@ cd->final_bracount = cd->bracount;  /* Save for checking forward references */
 cd->assert_depth = 0;
 cd->bracount = 0;
 cd->names_found = 0;
-cd->name_table = (uschar *)re + re->name_table_offset;
+cd->name_table = (pcre_uchar *)re + re->name_table_offset;
 codestart = cd->name_table + re->name_entry_size * re->name_count;
 cd->start_code = codestart;
-cd->hwm = (uschar *)(cd->start_workspace);
+cd->hwm = (pcre_uchar *)(cd->start_workspace);
 cd->req_varyopt = 0;
 cd->had_accept = FALSE;
 cd->check_lookbehind = FALSE;
@@ -7596,16 +7901,16 @@ cd->open_caps = NULL;
 error, errorcode will be set non-zero, so we don't need to look at the result
 of the function here. */
 
-ptr = (const uschar *)pattern + skipatstart;
-code = (uschar *)codestart;
+ptr = (const pcre_uchar *)pattern + skipatstart;
+code = (pcre_uchar *)codestart;
 *code = OP_BRA;
 (void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0,
-  &firstbyte, &reqbyte, NULL, cd, NULL);
+  &firstchar, &reqchar, NULL, cd, NULL);
 re->top_bracket = cd->bracount;
 re->top_backref = cd->top_backref;
-re->flags = cd->external_flags;
+re->flags = cd->external_flags | PCRE_MODE;
 
-if (cd->had_accept) reqbyte = REQ_NONE;   /* Must disable after (*ACCEPT) */
+if (cd->had_accept) reqchar = REQ_NONE;   /* Must disable after (*ACCEPT) */
 
 /* If not reached end of pattern on success, there's an excess bracket. */
 
@@ -7626,7 +7931,7 @@ references; optimize for them, as searching a large regex takes time. */
 if (cd->hwm > cd->start_workspace)
   {
   int prev_recno = -1;
-  const uschar *groupptr = NULL;
+  const pcre_uchar *groupptr = NULL;
   while (errorcode == 0 && cd->hwm > cd->start_workspace)
     {
     int offset, recno;
@@ -7635,18 +7940,18 @@ if (cd->hwm > cd->start_workspace)
     recno = GET(codestart, offset);
     if (recno != prev_recno)
       {
-      groupptr = _pcre_find_bracket(codestart, utf8, recno);
+      groupptr = PRIV(find_bracket)(codestart, utf, recno);
       prev_recno = recno;
       }
     if (groupptr == NULL) errorcode = ERR53;
-      else PUT(((uschar *)codestart), offset, (int)(groupptr - codestart));
+      else PUT(((pcre_uchar *)codestart), offset, (int)(groupptr - codestart));
     }
   }
 
 /* If the workspace had to be expanded, free the new memory. */
 
 if (cd->workspace_size > COMPILE_WORK_SIZE)
-  (pcre_free)((void *)cd->start_workspace);
+  (PUBL(free))((void *)cd->start_workspace);
 
 /* Give an error if there's back reference to a non-existent capturing
 subpattern. */
@@ -7663,21 +7968,21 @@ length, and set their lengths. */
 
 if (cd->check_lookbehind)
   {
-  uschar *cc = (uschar *)codestart;
+  pcre_uchar *cc = (pcre_uchar *)codestart;
 
   /* Loop, searching for OP_REVERSE items, and process those that do not have
   their length set. (Actually, it will also re-process any that have a length
   of zero, but that is a pathological case, and it does no harm.) When we find
   one, we temporarily terminate the branch it is in while we scan it. */
 
-  for (cc = (uschar *)_pcre_find_bracket(codestart, utf8, -1);
+  for (cc = (pcre_uchar *)PRIV(find_bracket)(codestart, utf, -1);
        cc != NULL;
-       cc = (uschar *)_pcre_find_bracket(cc, utf8, -1))
+       cc = (pcre_uchar *)PRIV(find_bracket)(cc, utf, -1))
     {
     if (GET(cc, 1) == 0)
       {
       int fixed_length;
-      uschar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);
+      pcre_uchar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);
       int end_op = *be;
       *be = OP_END;
       fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE,
@@ -7700,9 +8005,9 @@ if (cd->check_lookbehind)
 
 if (errorcode != 0)
   {
-  (pcre_free)(re);
+  (PUBL(free))(re);
   PCRE_EARLY_ERROR_RETURN:
-  *erroroffset = (int)(ptr - (const uschar *)pattern);
+  *erroroffset = (int)(ptr - (const pcre_uchar *)pattern);
   PCRE_EARLY_ERROR_RETURN2:
   *errorptr = find_error_text(errorcode);
   if (errorcodeptr != NULL) *errorcodeptr = errorcode;
@@ -7725,13 +8030,38 @@ if ((re->options & PCRE_ANCHORED) == 0)
     re->options |= PCRE_ANCHORED;
   else
     {
-    if (firstbyte < 0)
-      firstbyte = find_firstassertedchar(codestart, FALSE);
-    if (firstbyte >= 0)   /* Remove caseless flag for non-caseable chars */
+    if (firstchar < 0)
+      firstchar = find_firstassertedchar(codestart, FALSE);
+    if (firstchar >= 0)   /* Remove caseless flag for non-caseable chars */
       {
-      int ch = firstbyte & 255;
-      re->first_byte = ((firstbyte & REQ_CASELESS) != 0 &&
-         cd->fcc[ch] == ch)? ch : firstbyte;
+#ifdef COMPILE_PCRE8
+      re->first_char = firstchar & 0xff;
+#else
+#ifdef COMPILE_PCRE16
+      re->first_char = firstchar & 0xffff;
+#endif
+#endif
+      if ((firstchar & REQ_CASELESS) != 0)
+        {
+#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
+        /* We ignore non-ASCII first chars in 8 bit mode. */
+        if (utf)
+          {
+          if (re->first_char < 128)
+            {
+            if (cd->fcc[re->first_char] != re->first_char)
+              re->flags |= PCRE_FCH_CASELESS;
+            }
+          else if (UCD_OTHERCASE(re->first_char) != re->first_char)
+            re->flags |= PCRE_FCH_CASELESS;
+          }
+        else
+#endif
+        if (MAX_255(re->first_char)
+            && cd->fcc[re->first_char] != re->first_char)
+          re->flags |= PCRE_FCH_CASELESS;
+        }
+
       re->flags |= PCRE_FIRSTSET;
       }
     else if (is_startline(codestart, 0, cd->backref_map))
@@ -7743,12 +8073,36 @@ if ((re->options & PCRE_ANCHORED) == 0)
 variable length item in the regex. Remove the caseless flag for non-caseable
 bytes. */
 
-if (reqbyte >= 0 &&
-     ((re->options & PCRE_ANCHORED) == 0 || (reqbyte & REQ_VARY) != 0))
+if (reqchar >= 0 &&
+     ((re->options & PCRE_ANCHORED) == 0 || (reqchar & REQ_VARY) != 0))
   {
-  int ch = reqbyte & 255;
-  re->req_byte = ((reqbyte & REQ_CASELESS) != 0 &&
-    cd->fcc[ch] == ch)? (reqbyte & ~REQ_CASELESS) : reqbyte;
+#ifdef COMPILE_PCRE8
+  re->req_char = reqchar & 0xff;
+#else
+#ifdef COMPILE_PCRE16
+  re->req_char = reqchar & 0xffff;
+#endif
+#endif
+  if ((reqchar & REQ_CASELESS) != 0)
+    {
+#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
+    /* We ignore non-ASCII first chars in 8 bit mode. */
+    if (utf)
+      {
+      if (re->req_char < 128)
+        {
+        if (cd->fcc[re->req_char] != re->req_char)
+          re->flags |= PCRE_RCH_CASELESS;
+        }
+      else if (UCD_OTHERCASE(re->req_char) != re->req_char)
+        re->flags |= PCRE_RCH_CASELESS;
+      }
+    else
+#endif
+    if (MAX_255(re->req_char) && cd->fcc[re->req_char] != re->req_char)
+      re->flags |= PCRE_RCH_CASELESS;
+    }
+
   re->flags |= PCRE_REQCHSET;
   }
 
@@ -7763,38 +8117,46 @@ printf("Options=%08x\n", re->options);
 
 if ((re->flags & PCRE_FIRSTSET) != 0)
   {
-  int ch = re->first_byte & 255;
-  const char *caseless = ((re->first_byte & REQ_CASELESS) == 0)?
-    "" : " (caseless)";
-  if (isprint(ch)) printf("First char = %c%s\n", ch, caseless);
+  pcre_uchar ch = re->first_char;
+  const char *caseless =
+    ((re->flags & PCRE_FCH_CASELESS) == 0)? "" : " (caseless)";
+  if (PRINTABLE(ch)) printf("First char = %c%s\n", ch, caseless);
     else printf("First char = \\x%02x%s\n", ch, caseless);
   }
 
 if ((re->flags & PCRE_REQCHSET) != 0)
   {
-  int ch = re->req_byte & 255;
-  const char *caseless = ((re->req_byte & REQ_CASELESS) == 0)?
-    "" : " (caseless)";
-  if (isprint(ch)) printf("Req char = %c%s\n", ch, caseless);
+  pcre_uchar ch = re->req_char;
+  const char *caseless =
+    ((re->flags & PCRE_RCH_CASELESS) == 0)? "" : " (caseless)";
+  if (PRINTABLE(ch)) printf("Req char = %c%s\n", ch, caseless);
     else printf("Req char = \\x%02x%s\n", ch, caseless);
   }
 
-pcre_printint(re, stdout, TRUE);
+#ifdef COMPILE_PCRE8
+pcre_printint((pcre *)re, stdout, TRUE);
+#else
+pcre16_printint((pcre *)re, stdout, TRUE);
+#endif
 
 /* This check is done here in the debugging case so that the code that
 was compiled can be seen. */
 
 if (code - codestart > length)
   {
-  (pcre_free)(re);
+  (PUBL(free))(re);
   *errorptr = find_error_text(ERR23);
-  *erroroffset = ptr - (uschar *)pattern;
+  *erroroffset = ptr - (pcre_uchar *)pattern;
   if (errorcodeptr != NULL) *errorcodeptr = ERR23;
   return NULL;
   }
 #endif   /* PCRE_DEBUG */
 
+#ifdef COMPILE_PCRE8
 return (pcre *)re;
+#else
+return (pcre16 *)re;
+#endif
 }
 
 /* End of pcre_compile.c */
diff --git a/usr.sbin/nginx/src/pcre/pcre_exec.c b/usr.sbin/nginx/src/pcre/pcre_exec.c
index 46498d590cf..b71535355c1 100644
--- a/usr.sbin/nginx/src/pcre/pcre_exec.c
+++ b/usr.sbin/nginx/src/pcre/pcre_exec.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2011 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -113,7 +113,7 @@ Returns:     nothing
 */
 
 static void
-pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
+pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
 {
 unsigned int c;
 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
@@ -144,11 +144,11 @@ Returns:      < 0 if not matched, otherwise the number of subject bytes matched
 */
 
 static int
-match_ref(int offset, register USPTR eptr, int length, match_data *md,
+match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
   BOOL caseless)
 {
-USPTR eptr_start = eptr;
-register USPTR p = md->start_subject + md->offset_vector[offset];
+PCRE_PUCHAR eptr_start = eptr;
+register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
 
 #ifdef PCRE_DEBUG
 if (eptr >= md->end_subject)
@@ -173,9 +173,9 @@ ASCII characters. */
 
 if (caseless)
   {
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 #ifdef SUPPORT_UCP
-  if (md->utf8)
+  if (md->utf)
     {
     /* Match characters up to the end of the reference. NOTE: the number of
     bytes matched may differ, because there are some characters whose upper and
@@ -185,7 +185,7 @@ if (caseless)
     the latter. It is important, therefore, to check the length along the
     reference, not along the subject (earlier code did this wrong). */
 
-    USPTR endptr = p + length;
+    PCRE_PUCHAR endptr = p + length;
     while (p < endptr)
       {
       int c, d;
@@ -204,7 +204,11 @@ if (caseless)
     {
     if (eptr + length > md->end_subject) return -1;
     while (length-- > 0)
-      { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
+      {
+      if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
+      p++;
+      eptr++;
+      }
     }
   }
 
@@ -307,7 +311,7 @@ argument of match(), which never changes. */
 
 #define RMATCH(ra,rb,rc,rd,re,rw)\
   {\
-  heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
+  heapframe *newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
   if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
   frame->Xwhere = rw; \
   newframe->Xeptr = ra;\
@@ -328,7 +332,7 @@ argument of match(), which never changes. */
   {\
   heapframe *oldframe = frame;\
   frame = oldframe->Xprevframe;\
-  (pcre_stack_free)(oldframe);\
+  if (oldframe != &frame_zero) (PUBL(stack_free))(oldframe);\
   if (frame != NULL)\
     {\
     rrc = ra;\
@@ -345,24 +349,24 @@ typedef struct heapframe {
 
   /* Function arguments that may change */
 
-  USPTR Xeptr;
-  const uschar *Xecode;
-  USPTR Xmstart;
+  PCRE_PUCHAR Xeptr;
+  const pcre_uchar *Xecode;
+  PCRE_PUCHAR Xmstart;
   int Xoffset_top;
   eptrblock *Xeptrb;
   unsigned int Xrdepth;
 
   /* Function local variables */
 
-  USPTR Xcallpat;
-#ifdef SUPPORT_UTF8
-  USPTR Xcharptr;
+  PCRE_PUCHAR Xcallpat;
+#ifdef SUPPORT_UTF
+  PCRE_PUCHAR Xcharptr;
 #endif
-  USPTR Xdata;
-  USPTR Xnext;
-  USPTR Xpp;
-  USPTR Xprev;
-  USPTR Xsaved_eptr;
+  PCRE_PUCHAR Xdata;
+  PCRE_PUCHAR Xnext;
+  PCRE_PUCHAR Xpp;
+  PCRE_PUCHAR Xprev;
+  PCRE_PUCHAR Xsaved_eptr;
 
   recursion_info Xnew_recursive;
 
@@ -375,7 +379,7 @@ typedef struct heapframe {
   int Xprop_value;
   int Xprop_fail_result;
   int Xoclength;
-  uschar Xocchars[8];
+  pcre_uchar Xocchars[6];
 #endif
 
   int Xcodelink;
@@ -440,7 +444,7 @@ the subject. */
 
 
 /* Performance note: It might be tempting to extract commonly used fields from
-the md structure (e.g. utf8, end_subject) into individual variables to improve
+the md structure (e.g. utf, end_subject) into individual variables to improve
 performance. Tests using gcc on a SPARC disproved this; in the first case, it
 made performance worse.
 
@@ -463,8 +467,9 @@ Returns:       MATCH_MATCH if matched            )  these values are >= 0
 */
 
 static int
-match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
-  int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)
+match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
+  PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
+  unsigned int rdepth)
 {
 /* These variables do not need to be preserved over recursion in this function,
 so they can be ordinary variables in all cases. Mark some of them with
@@ -473,20 +478,22 @@ so they can be ordinary variables in all cases. Mark some of them with
 register int  rrc;         /* Returns from recursive calls */
 register int  i;           /* Used for loops not involving calls to RMATCH() */
 register unsigned int c;   /* Character values not kept over RMATCH() calls */
-register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
+register BOOL utf;         /* Local copy of UTF flag for speed */
 
 BOOL minimize, possessive; /* Quantifier options */
 BOOL caseless;
 int condcode;
 
 /* When recursion is not being used, all "local" variables that have to be
-preserved over calls to RMATCH() are part of a "frame" which is obtained from
-heap storage. Set up the top-level frame here; others are obtained from the
-heap whenever RMATCH() does a "recursion". See the macro definitions above. */
+preserved over calls to RMATCH() are part of a "frame". We set up the top-level
+frame on the stack here; subsequent instantiations are obtained from the heap
+whenever RMATCH() does a "recursion". See the macro definitions above. Putting
+the top-level on the stack rather than malloc-ing them all gives a performance
+boost in many cases where there is not much "recursion". */
 
 #ifdef NO_RECURSE
-heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
-if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
+heapframe frame_zero;
+heapframe *frame = &frame_zero;
 frame->Xprevframe = NULL;            /* Marks the top level */
 
 /* Copy in the original argument variables */
@@ -513,7 +520,7 @@ HEAP_RECURSE:
 
 /* Ditto for the local variables */
 
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
 #define charptr            frame->Xcharptr
 #endif
 #define callpat            frame->Xcallpat
@@ -571,15 +578,15 @@ declarations can be cut out in a block. The only declarations within blocks
 below are for variables that do not have to be preserved over a recursive call
 to RMATCH(). */
 
-#ifdef SUPPORT_UTF8
-const uschar *charptr;
+#ifdef SUPPORT_UTF
+const pcre_uchar *charptr;
 #endif
-const uschar *callpat;
-const uschar *data;
-const uschar *next;
-USPTR         pp;
-const uschar *prev;
-USPTR         saved_eptr;
+const pcre_uchar *callpat;
+const pcre_uchar *data;
+const pcre_uchar *next;
+PCRE_PUCHAR       pp;
+const pcre_uchar *prev;
+PCRE_PUCHAR       saved_eptr;
 
 recursion_info new_recursive;
 
@@ -592,7 +599,7 @@ int prop_type;
 int prop_value;
 int prop_fail_result;
 int oclength;
-uschar occhars[8];
+pcre_uchar occhars[6];
 #endif
 
 int codelink;
@@ -608,6 +615,23 @@ int save_offset1, save_offset2, save_offset3;
 int stacksave[REC_STACK_SAVE_MAX];
 
 eptrblock newptrb;
+
+/* There is a special fudge for calling match() in a way that causes it to
+measure the size of its basic stack frame when the stack is being used for
+recursion. The second argument (ecode) being NULL triggers this behaviour. It
+cannot normally ever be NULL. The return is the negated value of the frame
+size. */
+
+if (ecode == NULL)
+  {
+  if (rdepth == 0)
+    return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
+  else
+    {
+    int len = (char *)&rdepth - (char *)eptr;
+    return (len > 0)? -len : len;
+    }
+  }
 #endif     /* NO_RECURSE */
 
 /* To save space on the stack and in the heap frame, I have doubled up on some
@@ -620,6 +644,8 @@ the alternative names that are used. */
 #define code_offset   codelink
 #define condassert    condition
 #define matched_once  prev_is_word
+#define foc           number
+#define save_mark     data
 
 /* These statements are here to stop the compiler complaining about unitialized
 variables. */
@@ -645,10 +671,10 @@ defined). However, RMATCH isn't like a function call because it's quite a
 complicated macro. It has to be used in one particular way. This shouldn't,
 however, impact performance when true recursion is being used. */
 
-#ifdef SUPPORT_UTF8
-utf8 = md->utf8;       /* Local copy of the flag */
+#ifdef SUPPORT_UTF
+utf = md->utf;       /* Local copy of the flag */
 #else
-utf8 = FALSE;
+utf = FALSE;
 #endif
 
 /* First check that we haven't called match() too many times, or that we
@@ -689,7 +715,7 @@ for (;;)
     case OP_MARK:
     md->nomatch_mark = ecode + 2;
     md->mark = NULL;    /* In case previously set by assertion */
-    RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
+    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
       eptrb, RM55);
     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
          md->mark == NULL) md->mark = ecode + 2;
@@ -702,7 +728,7 @@ for (;;)
     unaltered. */
 
     else if (rrc == MATCH_SKIP_ARG &&
-        strcmp((char *)(ecode + 2), (char *)(md->start_match_ptr)) == 0)
+        STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0)
       {
       md->start_match_ptr = eptr;
       RRETURN(MATCH_SKIP);
@@ -715,7 +741,7 @@ for (;;)
     /* COMMIT overrides PRUNE, SKIP, and THEN */
 
     case OP_COMMIT:
-    RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
+    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
       eptrb, RM52);
     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
         rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
@@ -726,7 +752,7 @@ for (;;)
     /* PRUNE overrides THEN */
 
     case OP_PRUNE:
-    RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
+    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
       eptrb, RM51);
     if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
     RRETURN(MATCH_PRUNE);
@@ -734,7 +760,7 @@ for (;;)
     case OP_PRUNE_ARG:
     md->nomatch_mark = ecode + 2;
     md->mark = NULL;    /* In case previously set by assertion */
-    RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
+    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
       eptrb, RM56);
     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
          md->mark == NULL) md->mark = ecode + 2;
@@ -744,7 +770,7 @@ for (;;)
     /* SKIP overrides PRUNE and THEN */
 
     case OP_SKIP:
-    RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
+    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
       eptrb, RM53);
     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
       RRETURN(rrc);
@@ -758,10 +784,10 @@ for (;;)
     case OP_SKIP_ARG:
     if (md->ignore_skip_arg)
       {
-      ecode += _pcre_OP_lengths[*ecode] + ecode[1];
+      ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
       break;
       }
-    RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
+    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
       eptrb, RM57);
     if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
       RRETURN(rrc);
@@ -779,7 +805,7 @@ for (;;)
     match pointer to do this. */
 
     case OP_THEN:
-    RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
+    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
       eptrb, RM54);
     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
     md->start_match_ptr = ecode;
@@ -788,7 +814,7 @@ for (;;)
     case OP_THEN_ARG:
     md->nomatch_mark = ecode + 2;
     md->mark = NULL;    /* In case previously set by assertion */
-    RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top,
+    RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
       md, eptrb, RM58);
     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
          md->mark == NULL) md->mark = ecode + 2;
@@ -812,6 +838,7 @@ for (;;)
     case OP_ONCE_NC:
     prev = ecode;
     saved_eptr = eptr;
+    save_mark = md->mark;
     do
       {
       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
@@ -830,6 +857,7 @@ for (;;)
 
       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
       ecode += GET(ecode,1);
+      md->mark = save_mark;
       }
     while (*ecode == OP_ALT);
 
@@ -909,6 +937,7 @@ for (;;)
       save_offset2 = md->offset_vector[offset+1];
       save_offset3 = md->offset_vector[md->offset_end - number];
       save_capture_last = md->capture_last;
+      save_mark = md->mark;
 
       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
       md->offset_vector[md->offset_end - number] =
@@ -917,7 +946,7 @@ for (;;)
       for (;;)
         {
         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
-        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
+        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
           eptrb, RM1);
         if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
 
@@ -945,6 +974,7 @@ for (;;)
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
         md->capture_last = save_capture_last;
         ecode += GET(ecode, 1);
+        md->mark = save_mark;
         if (*ecode != OP_ALT) break;
         }
 
@@ -1004,13 +1034,14 @@ for (;;)
 
       else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
         {
-        ecode += _pcre_OP_lengths[*ecode];
+        ecode += PRIV(OP_lengths)[*ecode];
         goto TAIL_RECURSE;
         }
 
       /* In all other cases, we have to make another call to match(). */
 
-      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,
+      save_mark = md->mark;
+      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
         RM2);
 
       /* See comment in the code for capturing groups above about handling
@@ -1028,7 +1059,7 @@ for (;;)
         {
         if (rrc == MATCH_ONCE)
           {
-          const uschar *scode = ecode;
+          const pcre_uchar *scode = ecode;
           if (*scode != OP_ONCE)           /* If not at start, find it */
             {
             while (*scode == OP_ALT) scode += GET(scode, 1);
@@ -1039,6 +1070,7 @@ for (;;)
         RRETURN(rrc);
         }
       ecode += GET(ecode, 1);
+      md->mark = save_mark;
       if (*ecode != OP_ALT) break;
       }
 
@@ -1093,7 +1125,7 @@ for (;;)
         md->offset_vector[md->offset_end - number] =
           (int)(eptr - md->start_subject);
         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
-        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
+        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
           eptrb, RM63);
         if (rrc == MATCH_KETRPOS)
           {
@@ -1165,7 +1197,7 @@ for (;;)
     for (;;)
       {
       if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
-      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
+      RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
         eptrb, RM48);
       if (rrc == MATCH_KETRPOS)
         {
@@ -1215,13 +1247,17 @@ for (;;)
 
     if (ecode[LINK_SIZE+1] == OP_CALLOUT)
       {
-      if (pcre_callout != NULL)
+      if (PUBL(callout) != NULL)
         {
-        pcre_callout_block cb;
+        PUBL(callout_block) cb;
         cb.version          = 2;   /* Version 1 of the callout block */
         cb.callout_number   = ecode[LINK_SIZE+2];
         cb.offset_vector    = md->offset_vector;
+#ifdef COMPILE_PCRE8
         cb.subject          = (PCRE_SPTR)md->start_subject;
+#else
+        cb.subject          = (PCRE_SPTR16)md->start_subject;
+#endif
         cb.subject_length   = (int)(md->end_subject - md->start_subject);
         cb.start_match      = (int)(mstart - md->start_subject);
         cb.current_position = (int)(eptr - md->start_subject);
@@ -1231,10 +1267,10 @@ for (;;)
         cb.capture_last     = md->capture_last;
         cb.callout_data     = md->callout_data;
         cb.mark             = md->nomatch_mark;
-        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
+        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
         if (rrc < 0) RRETURN(rrc);
         }
-      ecode += _pcre_OP_lengths[OP_CALLOUT];
+      ecode += PRIV(OP_lengths)[OP_CALLOUT];
       }
 
     condcode = ecode[LINK_SIZE+1];
@@ -1260,7 +1296,7 @@ for (;;)
 
         if (!condition && condcode == OP_NRREF)
           {
-          uschar *slotA = md->name_table;
+          pcre_uchar *slotA = md->name_table;
           for (i = 0; i < md->name_count; i++)
             {
             if (GET2(slotA, 0) == recno) break;
@@ -1273,11 +1309,11 @@ for (;;)
 
           if (i < md->name_count)
             {
-            uschar *slotB = slotA;
+            pcre_uchar *slotB = slotA;
             while (slotB > md->name_table)
               {
               slotB -= md->name_entry_size;
-              if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
+              if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
                 {
                 condition = GET2(slotB, 0) == md->recursive->group_num;
                 if (condition) break;
@@ -1293,7 +1329,7 @@ for (;;)
               for (i++; i < md->name_count; i++)
                 {
                 slotB += md->name_entry_size;
-                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
+                if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
                   {
                   condition = GET2(slotB, 0) == md->recursive->group_num;
                   if (condition) break;
@@ -1306,7 +1342,7 @@ for (;;)
 
         /* Chose branch according to the condition */
 
-        ecode += condition? 3 : GET(ecode, 1);
+        ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
         }
       }
 
@@ -1323,7 +1359,7 @@ for (;;)
       if (!condition && condcode == OP_NCREF)
         {
         int refno = offset >> 1;
-        uschar *slotA = md->name_table;
+        pcre_uchar *slotA = md->name_table;
 
         for (i = 0; i < md->name_count; i++)
           {
@@ -1337,11 +1373,11 @@ for (;;)
 
         if (i < md->name_count)
           {
-          uschar *slotB = slotA;
+          pcre_uchar *slotB = slotA;
           while (slotB > md->name_table)
             {
             slotB -= md->name_entry_size;
-            if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
+            if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
               {
               offset = GET2(slotB, 0) << 1;
               condition = offset < offset_top &&
@@ -1359,7 +1395,7 @@ for (;;)
             for (i++; i < md->name_count; i++)
               {
               slotB += md->name_entry_size;
-              if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
+              if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
                 {
                 offset = GET2(slotB, 0) << 1;
                 condition = offset < offset_top &&
@@ -1374,7 +1410,7 @@ for (;;)
 
       /* Chose branch according to the condition */
 
-      ecode += condition? 3 : GET(ecode, 1);
+      ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
       }
 
     else if (condcode == OP_DEF)     /* DEFINE - always false */
@@ -1466,7 +1502,7 @@ for (;;)
       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
       if (offset_top <= offset) offset_top = offset + 2;
       }
-    ecode += 3;
+    ecode += 1 + IMM2_SIZE;
     break;
 
 
@@ -1513,6 +1549,7 @@ for (;;)
 
     case OP_ASSERT:
     case OP_ASSERTBACK:
+    save_mark = md->mark;
     if (md->match_function_type == MATCH_CONDASSERT)
       {
       condassert = TRUE;
@@ -1534,6 +1571,7 @@ for (;;)
 
       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
       ecode += GET(ecode, 1);
+      md->mark = save_mark;
       }
     while (*ecode == OP_ALT);
 
@@ -1557,6 +1595,7 @@ for (;;)
 
     case OP_ASSERT_NOT:
     case OP_ASSERTBACK_NOT:
+    save_mark = md->mark;
     if (md->match_function_type == MATCH_CONDASSERT)
       {
       condassert = TRUE;
@@ -1567,6 +1606,7 @@ for (;;)
     do
       {
       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
+      md->mark = save_mark;
       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
       if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
         {
@@ -1593,8 +1633,8 @@ for (;;)
     back a number of characters, not bytes. */
 
     case OP_REVERSE:
-#ifdef SUPPORT_UTF8
-    if (utf8)
+#ifdef SUPPORT_UTF
+    if (utf)
       {
       i = GET(ecode, 1);
       while (i-- > 0)
@@ -1625,13 +1665,17 @@ for (;;)
     function is able to force a failure. */
 
     case OP_CALLOUT:
-    if (pcre_callout != NULL)
+    if (PUBL(callout) != NULL)
       {
-      pcre_callout_block cb;
+      PUBL(callout_block) cb;
       cb.version          = 2;   /* Version 1 of the callout block */
       cb.callout_number   = ecode[1];
       cb.offset_vector    = md->offset_vector;
+#ifdef COMPILE_PCRE8
       cb.subject          = (PCRE_SPTR)md->start_subject;
+#else
+      cb.subject          = (PCRE_SPTR16)md->start_subject;
+#endif
       cb.subject_length   = (int)(md->end_subject - md->start_subject);
       cb.start_match      = (int)(mstart - md->start_subject);
       cb.current_position = (int)(eptr - md->start_subject);
@@ -1641,7 +1685,7 @@ for (;;)
       cb.capture_last     = md->capture_last;
       cb.callout_data     = md->callout_data;
       cb.mark             = md->nomatch_mark;
-      if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
+      if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
       if (rrc < 0) RRETURN(rrc);
       }
     ecode += 2 + 2*LINK_SIZE;
@@ -1700,7 +1744,7 @@ for (;;)
       else
         {
         new_recursive.offset_save =
-          (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
+          (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
         }
       memcpy(new_recursive.offset_save, md->offset_vector,
@@ -1715,7 +1759,7 @@ for (;;)
       do
         {
         if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
-        RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
+        RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
           md, eptrb, RM6);
         memcpy(md->offset_vector, new_recursive.offset_save,
             new_recursive.saved_max * sizeof(int));
@@ -1724,7 +1768,7 @@ for (;;)
           {
           DPRINTF(("Recursion matched\n"));
           if (new_recursive.offset_save != stacksave)
-            (pcre_free)(new_recursive.offset_save);
+            (PUBL(free))(new_recursive.offset_save);
 
           /* Set where we got to in the subject, and reset the start in case
           it was changed by \K. This *is* propagated back out of a recursion,
@@ -1742,7 +1786,7 @@ for (;;)
           {
           DPRINTF(("Recursion gave error %d\n", rrc));
           if (new_recursive.offset_save != stacksave)
-            (pcre_free)(new_recursive.offset_save);
+            (PUBL(free))(new_recursive.offset_save);
           RRETURN(rrc);
           }
 
@@ -1754,7 +1798,7 @@ for (;;)
       DPRINTF(("Recursion didn't match\n"));
       md->recursive = new_recursive.prevrec;
       if (new_recursive.offset_save != stacksave)
-        (pcre_free)(new_recursive.offset_save);
+        (PUBL(free))(new_recursive.offset_save);
       RRETURN(MATCH_NOMATCH);
       }
 
@@ -2066,15 +2110,15 @@ for (;;)
       be "non-word" characters. Remember the earliest consulted character for
       partial matching. */
 
-#ifdef SUPPORT_UTF8
-      if (utf8)
+#ifdef SUPPORT_UTF
+      if (utf)
         {
         /* Get status of previous character */
 
         if (eptr == md->start_subject) prev_is_word = FALSE; else
           {
-          USPTR lastptr = eptr - 1;
-          while((*lastptr & 0xc0) == 0x80) lastptr--;
+          PCRE_PUCHAR lastptr = eptr - 1;
+          BACKCHAR(lastptr);
           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
           GETCHAR(c, lastptr);
 #ifdef SUPPORT_UCP
@@ -2139,7 +2183,8 @@ for (;;)
             }
           else
 #endif
-          prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
+          prev_is_word = MAX_255(eptr[-1])
+            && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
           }
 
         /* Get status of next character */
@@ -2162,7 +2207,8 @@ for (;;)
           }
         else
 #endif
-        cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
+        cur_is_word = MAX_255(*eptr)
+          && ((md->ctypes[*eptr] & ctype_word) != 0);
         }
 
       /* Now see if the situation is what we want */
@@ -2186,7 +2232,9 @@ for (;;)
       RRETURN(MATCH_NOMATCH);
       }
     eptr++;
-    if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
+#ifdef SUPPORT_UTF
+    if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
+#endif
     ecode++;
     break;
 
@@ -2211,7 +2259,7 @@ for (;;)
       }
     GETCHARINCTEST(c, eptr);
     if (
-#ifdef SUPPORT_UTF8
+#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
        c < 256 &&
 #endif
        (md->ctypes[c] & ctype_digit) != 0
@@ -2228,8 +2276,8 @@ for (;;)
       }
     GETCHARINCTEST(c, eptr);
     if (
-#ifdef SUPPORT_UTF8
-       c >= 256 ||
+#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
+       c > 255 ||
 #endif
        (md->ctypes[c] & ctype_digit) == 0
        )
@@ -2245,7 +2293,7 @@ for (;;)
       }
     GETCHARINCTEST(c, eptr);
     if (
-#ifdef SUPPORT_UTF8
+#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
        c < 256 &&
 #endif
        (md->ctypes[c] & ctype_space) != 0
@@ -2262,8 +2310,8 @@ for (;;)
       }
     GETCHARINCTEST(c, eptr);
     if (
-#ifdef SUPPORT_UTF8
-       c >= 256 ||
+#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
+       c > 255 ||
 #endif
        (md->ctypes[c] & ctype_space) == 0
        )
@@ -2279,7 +2327,7 @@ for (;;)
       }
     GETCHARINCTEST(c, eptr);
     if (
-#ifdef SUPPORT_UTF8
+#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
        c < 256 &&
 #endif
        (md->ctypes[c] & ctype_word) != 0
@@ -2296,8 +2344,8 @@ for (;;)
       }
     GETCHARINCTEST(c, eptr);
     if (
-#ifdef SUPPORT_UTF8
-       c >= 256 ||
+#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
+       c > 255 ||
 #endif
        (md->ctypes[c] & ctype_word) == 0
        )
@@ -2475,7 +2523,7 @@ for (;;)
         break;
 
         case PT_GC:
-        if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
+        if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
           RRETURN(MATCH_NOMATCH);
         break;
 
@@ -2492,20 +2540,20 @@ for (;;)
         /* These are specials */
 
         case PT_ALNUM:
-        if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
-             _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
+        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+             PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
           RRETURN(MATCH_NOMATCH);
         break;
 
         case PT_SPACE:    /* Perl space */
-        if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
+        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
              c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
                == (op == OP_NOTPROP))
           RRETURN(MATCH_NOMATCH);
         break;
 
         case PT_PXSPACE:  /* POSIX space */
-        if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
+        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
              c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
              c == CHAR_FF || c == CHAR_CR)
                == (op == OP_NOTPROP))
@@ -2513,8 +2561,8 @@ for (;;)
         break;
 
         case PT_WORD:
-        if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
-             _pcre_ucp_gentype[prop->chartype] == ucp_N ||
+        if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+             PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
           RRETURN(MATCH_NOMATCH);
         break;
@@ -2543,7 +2591,7 @@ for (;;)
     while (eptr < md->end_subject)
       {
       int len = 1;
-      if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
+      if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
       if (UCD_CATEGORY(c) != ucp_M) break;
       eptr += len;
       }
@@ -2564,7 +2612,7 @@ for (;;)
     case OP_REFI:
     caseless = op == OP_REFI;
     offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
-    ecode += 3;
+    ecode += 1 + IMM2_SIZE;
 
     /* If the reference is unset, there are two possibilities:
 
@@ -2604,9 +2652,9 @@ for (;;)
       case OP_CRMINRANGE:
       minimize = (*ecode == OP_CRMINRANGE);
       min = GET2(ecode, 1);
-      max = GET2(ecode, 3);
+      max = GET2(ecode, 1 + IMM2_SIZE);
       if (max == 0) max = INT_MAX;
-      ecode += 5;
+      ecode += 1 + 2 * IMM2_SIZE;
       break;
 
       default:               /* No repeat follows */
@@ -2620,9 +2668,13 @@ for (;;)
       }
 
     /* Handle repeated back references. If the length of the reference is
-    zero, just continue with the main loop. */
+    zero, just continue with the main loop. If the length is negative, it
+    means the reference is unset in non-Java-compatible mode. If the minimum is
+    zero, we can continue at the same level without recursion. For any other
+    minimum, carrying on will result in NOMATCH. */
 
     if (length == 0) continue;
+    if (length < 0 && min == 0) continue;
 
     /* First, ensure the minimum number of matches are present. We get back
     the length of the reference string explicitly rather than passing the
@@ -2703,8 +2755,11 @@ for (;;)
     case OP_NCLASS:
     case OP_CLASS:
       {
+      /* The data variable is saved across frames, so the byte map needs to
+      be stored there. */
+#define BYTE_MAP ((pcre_uint8 *)data)
       data = ecode + 1;                /* Save for matching */
-      ecode += 33;                     /* Advance past the item */
+      ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
 
       switch (*ecode)
         {
@@ -2725,9 +2780,9 @@ for (;;)
         case OP_CRMINRANGE:
         minimize = (*ecode == OP_CRMINRANGE);
         min = GET2(ecode, 1);
-        max = GET2(ecode, 3);
+        max = GET2(ecode, 1 + IMM2_SIZE);
         if (max == 0) max = INT_MAX;
-        ecode += 5;
+        ecode += 1 + 2 * IMM2_SIZE;
         break;
 
         default:               /* No repeat follows */
@@ -2737,9 +2792,8 @@ for (;;)
 
       /* First, ensure the minimum number of matches are present. */
 
-#ifdef SUPPORT_UTF8
-      /* UTF-8 mode */
-      if (utf8)
+#ifdef SUPPORT_UTF
+      if (utf)
         {
         for (i = 1; i <= min; i++)
           {
@@ -2754,14 +2808,12 @@ for (;;)
             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
             }
           else
-            {
-            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
-            }
+            if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
           }
         }
       else
 #endif
-      /* Not UTF-8 mode */
+      /* Not UTF mode */
         {
         for (i = 1; i <= min; i++)
           {
@@ -2771,7 +2823,14 @@ for (;;)
             RRETURN(MATCH_NOMATCH);
             }
           c = *eptr++;
-          if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
+#ifndef COMPILE_PCRE8
+          if (c > 255)
+            {
+            if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
+            }
+          else
+#endif
+            if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
           }
         }
 
@@ -2785,9 +2844,8 @@ for (;;)
 
       if (minimize)
         {
-#ifdef SUPPORT_UTF8
-        /* UTF-8 mode */
-        if (utf8)
+#ifdef SUPPORT_UTF
+        if (utf)
           {
           for (fi = min;; fi++)
             {
@@ -2805,14 +2863,12 @@ for (;;)
               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
               }
             else
-              {
-              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
-              }
+              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
             }
           }
         else
 #endif
-        /* Not UTF-8 mode */
+        /* Not UTF mode */
           {
           for (fi = min;; fi++)
             {
@@ -2825,7 +2881,14 @@ for (;;)
               RRETURN(MATCH_NOMATCH);
               }
             c = *eptr++;
-            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
+#ifndef COMPILE_PCRE8
+            if (c > 255)
+              {
+              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
+              }
+            else
+#endif
+              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
             }
           }
         /* Control never gets here */
@@ -2837,9 +2900,8 @@ for (;;)
         {
         pp = eptr;
 
-#ifdef SUPPORT_UTF8
-        /* UTF-8 mode */
-        if (utf8)
+#ifdef SUPPORT_UTF
+        if (utf)
           {
           for (i = min; i < max; i++)
             {
@@ -2855,9 +2917,7 @@ for (;;)
               if (op == OP_CLASS) break;
               }
             else
-              {
-              if ((data[c/8] & (1 << (c&7))) == 0) break;
-              }
+              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
             eptr += len;
             }
           for (;;)
@@ -2870,7 +2930,7 @@ for (;;)
           }
         else
 #endif
-          /* Not UTF-8 mode */
+          /* Not UTF mode */
           {
           for (i = min; i < max; i++)
             {
@@ -2880,7 +2940,14 @@ for (;;)
               break;
               }
             c = *eptr;
-            if ((data[c/8] & (1 << (c&7))) == 0) break;
+#ifndef COMPILE_PCRE8
+            if (c > 255)
+              {
+              if (op == OP_CLASS) break;
+              }
+            else
+#endif
+              if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
             eptr++;
             }
           while (eptr >= pp)
@@ -2893,6 +2960,7 @@ for (;;)
 
         RRETURN(MATCH_NOMATCH);
         }
+#undef BYTE_MAP
       }
     /* Control never gets here */
 
@@ -2901,7 +2969,7 @@ for (;;)
     when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
     mode, because Unicode properties are supported in non-UTF-8 mode. */
 
-#ifdef SUPPORT_UTF8
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
     case OP_XCLASS:
       {
       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
@@ -2926,9 +2994,9 @@ for (;;)
         case OP_CRMINRANGE:
         minimize = (*ecode == OP_CRMINRANGE);
         min = GET2(ecode, 1);
-        max = GET2(ecode, 3);
+        max = GET2(ecode, 1 + IMM2_SIZE);
         if (max == 0) max = INT_MAX;
-        ecode += 5;
+        ecode += 1 + 2 * IMM2_SIZE;
         break;
 
         default:               /* No repeat follows */
@@ -2946,7 +3014,7 @@ for (;;)
           RRETURN(MATCH_NOMATCH);
           }
         GETCHARINCTEST(c, eptr);
-        if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
+        if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
         }
 
       /* If max == min we can continue with the main loop without the
@@ -2970,7 +3038,7 @@ for (;;)
             RRETURN(MATCH_NOMATCH);
             }
           GETCHARINCTEST(c, eptr);
-          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
+          if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
           }
         /* Control never gets here */
         }
@@ -2988,8 +3056,12 @@ for (;;)
             SCHECK_PARTIAL();
             break;
             }
+#ifdef SUPPORT_UTF
           GETCHARLENTEST(c, eptr, len);
-          if (!_pcre_xclass(c, data)) break;
+#else
+          c = *eptr;
+#endif
+          if (!PRIV(xclass)(c, data, utf)) break;
           eptr += len;
           }
         for(;;)
@@ -2997,7 +3069,9 @@ for (;;)
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           if (eptr-- == pp) break;        /* Stop if tried at original pos */
-          if (utf8) BACKCHAR(eptr);
+#ifdef SUPPORT_UTF
+          if (utf) BACKCHAR(eptr);
+#endif
           }
         RRETURN(MATCH_NOMATCH);
         }
@@ -3009,8 +3083,8 @@ for (;;)
     /* Match a single character, casefully */
 
     case OP_CHAR:
-#ifdef SUPPORT_UTF8
-    if (utf8)
+#ifdef SUPPORT_UTF
+    if (utf)
       {
       length = 1;
       ecode++;
@@ -3024,8 +3098,7 @@ for (;;)
       }
     else
 #endif
-
-    /* Non-UTF-8 mode */
+    /* Not UTF mode */
       {
       if (md->end_subject - eptr < 1)
         {
@@ -3047,8 +3120,8 @@ for (;;)
       RRETURN(MATCH_NOMATCH);
       }
 
-#ifdef SUPPORT_UTF8
-    if (utf8)
+#ifdef SUPPORT_UTF
+    if (utf)
       {
       length = 1;
       ecode++;
@@ -3061,7 +3134,10 @@ for (;;)
 
       if (fc < 128)
         {
-        if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
+        if (md->lcc[fc]
+            != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
+        ecode++;
+        eptr++;
         }
 
       /* Otherwise we must pick up the subject character. Note that we cannot
@@ -3087,11 +3163,13 @@ for (;;)
         }
       }
     else
-#endif   /* SUPPORT_UTF8 */
+#endif   /* SUPPORT_UTF */
 
-    /* Non-UTF-8 mode */
+    /* Not UTF mode */
       {
-      if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
+      if (TABLE_GET(ecode[1], md->lcc, ecode[1])
+          != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
+      eptr++;
       ecode += 2;
       }
     break;
@@ -3101,7 +3179,7 @@ for (;;)
     case OP_EXACT:
     case OP_EXACTI:
     min = max = GET2(ecode, 1);
-    ecode += 3;
+    ecode += 1 + IMM2_SIZE;
     goto REPEATCHAR;
 
     case OP_POSUPTO:
@@ -3116,7 +3194,7 @@ for (;;)
     min = 0;
     max = GET2(ecode, 1);
     minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
-    ecode += 3;
+    ecode += 1 + IMM2_SIZE;
     goto REPEATCHAR;
 
     case OP_POSSTAR:
@@ -3164,8 +3242,8 @@ for (;;)
     /* Common code for all repeated single-character matches. */
 
     REPEATCHAR:
-#ifdef SUPPORT_UTF8
-    if (utf8)
+#ifdef SUPPORT_UTF
+    if (utf)
       {
       length = 1;
       charptr = ecode;
@@ -3181,18 +3259,18 @@ for (;;)
         unsigned int othercase;
         if (op >= OP_STARI &&     /* Caseless */
             (othercase = UCD_OTHERCASE(fc)) != fc)
-          oclength = _pcre_ord2utf8(othercase, occhars);
+          oclength = PRIV(ord2utf)(othercase, occhars);
         else oclength = 0;
 #endif  /* SUPPORT_UCP */
 
         for (i = 1; i <= min; i++)
           {
           if (eptr <= md->end_subject - length &&
-            memcmp(eptr, charptr, length) == 0) eptr += length;
+            memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
 #ifdef SUPPORT_UCP
           else if (oclength > 0 &&
                    eptr <= md->end_subject - oclength &&
-                   memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
+                   memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
 #endif  /* SUPPORT_UCP */
           else
             {
@@ -3211,11 +3289,11 @@ for (;;)
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             if (fi >= max) RRETURN(MATCH_NOMATCH);
             if (eptr <= md->end_subject - length &&
-              memcmp(eptr, charptr, length) == 0) eptr += length;
+              memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
 #ifdef SUPPORT_UCP
             else if (oclength > 0 &&
                      eptr <= md->end_subject - oclength &&
-                     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
+                     memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
 #endif  /* SUPPORT_UCP */
             else
               {
@@ -3232,11 +3310,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             if (eptr <= md->end_subject - length &&
-                memcmp(eptr, charptr, length) == 0) eptr += length;
+                memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
 #ifdef SUPPORT_UCP
             else if (oclength > 0 &&
                      eptr <= md->end_subject - oclength &&
-                     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
+                     memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
 #endif  /* SUPPORT_UCP */
             else
               {
@@ -3268,14 +3346,12 @@ for (;;)
       value of fc will always be < 128. */
       }
     else
-#endif  /* SUPPORT_UTF8 */
-
-    /* When not in UTF-8 mode, load a single-byte character. */
-
-    fc = *ecode++;
+#endif  /* SUPPORT_UTF */
+      /* When not in UTF-8 mode, load a single-byte character. */
+      fc = *ecode++;
 
-    /* The value of fc at this point is always less than 256, though we may or
-    may not be in UTF-8 mode. The code is duplicated for the caseless and
+    /* The value of fc at this point is always one character, though we may
+    or may not be in UTF mode. The code is duplicated for the caseless and
     caseful cases, for speed, since matching characters is likely to be quite
     common. First, ensure the minimum number of matches are present. If min =
     max, continue at the same level without recursing. Otherwise, if
@@ -3288,7 +3364,23 @@ for (;;)
 
     if (op >= OP_STARI)  /* Caseless */
       {
-      fc = md->lcc[fc];
+#ifdef COMPILE_PCRE8
+      /* fc must be < 128 if UTF is enabled. */
+      foc = md->fcc[fc];
+#else
+#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UCP
+      if (utf && fc > 127)
+        foc = UCD_OTHERCASE(fc);
+#else
+      if (utf && fc > 127)
+        foc = fc;
+#endif /* SUPPORT_UCP */
+      else
+#endif /* SUPPORT_UTF */
+        foc = TABLE_GET(fc, md->fcc, fc);
+#endif /* COMPILE_PCRE8 */
+
       for (i = 1; i <= min; i++)
         {
         if (eptr >= md->end_subject)
@@ -3296,7 +3388,8 @@ for (;;)
           SCHECK_PARTIAL();
           RRETURN(MATCH_NOMATCH);
           }
-        if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
+        if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
+        eptr++;
         }
       if (min == max) continue;
       if (minimize)
@@ -3311,7 +3404,8 @@ for (;;)
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
+          if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH);
+          eptr++;
           }
         /* Control never gets here */
         }
@@ -3325,7 +3419,7 @@ for (;;)
             SCHECK_PARTIAL();
             break;
             }
-          if (fc != md->lcc[*eptr]) break;
+          if (fc != *eptr && foc != *eptr) break;
           eptr++;
           }
 
@@ -3414,11 +3508,25 @@ for (;;)
     GETCHARINCTEST(c, eptr);
     if (op == OP_NOTI)         /* The caseless case */
       {
-#ifdef SUPPORT_UTF8
-      if (c < 256)
-#endif
-      c = md->lcc[c];
-      if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
+      register unsigned int ch, och;
+      ch = *ecode++;
+#ifdef COMPILE_PCRE8
+      /* ch must be < 128 if UTF is enabled. */
+      och = md->fcc[ch];
+#else
+#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UCP
+      if (utf && ch > 127)
+        och = UCD_OTHERCASE(ch);
+#else
+      if (utf && ch > 127)
+        och = ch;
+#endif /* SUPPORT_UCP */
+      else
+#endif /* SUPPORT_UTF */
+        och = TABLE_GET(ch, md->fcc, ch);
+#endif /* COMPILE_PCRE8 */
+      if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
       }
     else    /* Caseful */
       {
@@ -3436,7 +3544,7 @@ for (;;)
     case OP_NOTEXACT:
     case OP_NOTEXACTI:
     min = max = GET2(ecode, 1);
-    ecode += 3;
+    ecode += 1 + IMM2_SIZE;
     goto REPEATNOTCHAR;
 
     case OP_NOTUPTO:
@@ -3446,7 +3554,7 @@ for (;;)
     min = 0;
     max = GET2(ecode, 1);
     minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
-    ecode += 3;
+    ecode += 1 + IMM2_SIZE;
     goto REPEATNOTCHAR;
 
     case OP_NOTPOSSTAR:
@@ -3478,7 +3586,7 @@ for (;;)
     possessive = TRUE;
     min = 0;
     max = GET2(ecode, 1);
-    ecode += 3;
+    ecode += 1 + IMM2_SIZE;
     goto REPEATNOTCHAR;
 
     case OP_NOTSTAR:
@@ -3517,11 +3625,25 @@ for (;;)
 
     if (op >= OP_NOTSTARI)     /* Caseless */
       {
-      fc = md->lcc[fc];
+#ifdef COMPILE_PCRE8
+      /* fc must be < 128 if UTF is enabled. */
+      foc = md->fcc[fc];
+#else
+#ifdef SUPPORT_UTF
+#ifdef SUPPORT_UCP
+      if (utf && fc > 127)
+        foc = UCD_OTHERCASE(fc);
+#else
+      if (utf && fc > 127)
+        foc = fc;
+#endif /* SUPPORT_UCP */
+      else
+#endif /* SUPPORT_UTF */
+        foc = TABLE_GET(fc, md->fcc, fc);
+#endif /* COMPILE_PCRE8 */
 
-#ifdef SUPPORT_UTF8
-      /* UTF-8 mode */
-      if (utf8)
+#ifdef SUPPORT_UTF
+      if (utf)
         {
         register unsigned int d;
         for (i = 1; i <= min; i++)
@@ -3532,14 +3654,12 @@ for (;;)
             RRETURN(MATCH_NOMATCH);
             }
           GETCHARINC(d, eptr);
-          if (d < 256) d = md->lcc[d];
-          if (fc == d) RRETURN(MATCH_NOMATCH);
+          if (fc == d || (unsigned int) foc == d) RRETURN(MATCH_NOMATCH);
           }
         }
       else
 #endif
-
-      /* Not UTF-8 mode */
+      /* Not UTF mode */
         {
         for (i = 1; i <= min; i++)
           {
@@ -3548,7 +3668,8 @@ for (;;)
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
+          if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
+          eptr++;
           }
         }
 
@@ -3556,9 +3677,8 @@ for (;;)
 
       if (minimize)
         {
-#ifdef SUPPORT_UTF8
-        /* UTF-8 mode */
-        if (utf8)
+#ifdef SUPPORT_UTF
+        if (utf)
           {
           register unsigned int d;
           for (fi = min;; fi++)
@@ -3572,13 +3692,12 @@ for (;;)
               RRETURN(MATCH_NOMATCH);
               }
             GETCHARINC(d, eptr);
-            if (d < 256) d = md->lcc[d];
-            if (fc == d) RRETURN(MATCH_NOMATCH);
+            if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
             }
           }
         else
 #endif
-        /* Not UTF-8 mode */
+        /* Not UTF mode */
           {
           for (fi = min;; fi++)
             {
@@ -3590,7 +3709,8 @@ for (;;)
               SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
               }
-            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
+            if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
+            eptr++;
             }
           }
         /* Control never gets here */
@@ -3602,9 +3722,8 @@ for (;;)
         {
         pp = eptr;
 
-#ifdef SUPPORT_UTF8
-        /* UTF-8 mode */
-        if (utf8)
+#ifdef SUPPORT_UTF
+        if (utf)
           {
           register unsigned int d;
           for (i = min; i < max; i++)
@@ -3616,12 +3735,11 @@ for (;;)
               break;
               }
             GETCHARLEN(d, eptr, len);
-            if (d < 256) d = md->lcc[d];
-            if (fc == d) break;
+            if (fc == d || (unsigned int)foc == d) break;
             eptr += len;
             }
-        if (possessive) continue;
-        for(;;)
+          if (possessive) continue;
+          for(;;)
             {
             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
@@ -3631,7 +3749,7 @@ for (;;)
           }
         else
 #endif
-        /* Not UTF-8 mode */
+        /* Not UTF mode */
           {
           for (i = min; i < max; i++)
             {
@@ -3640,7 +3758,7 @@ for (;;)
               SCHECK_PARTIAL();
               break;
               }
-            if (fc == md->lcc[*eptr]) break;
+            if (fc == *eptr || foc == *eptr) break;
             eptr++;
             }
           if (possessive) continue;
@@ -3661,9 +3779,8 @@ for (;;)
 
     else
       {
-#ifdef SUPPORT_UTF8
-      /* UTF-8 mode */
-      if (utf8)
+#ifdef SUPPORT_UTF
+      if (utf)
         {
         register unsigned int d;
         for (i = 1; i <= min; i++)
@@ -3679,7 +3796,7 @@ for (;;)
         }
       else
 #endif
-      /* Not UTF-8 mode */
+      /* Not UTF mode */
         {
         for (i = 1; i <= min; i++)
           {
@@ -3696,9 +3813,8 @@ for (;;)
 
       if (minimize)
         {
-#ifdef SUPPORT_UTF8
-        /* UTF-8 mode */
-        if (utf8)
+#ifdef SUPPORT_UTF
+        if (utf)
           {
           register unsigned int d;
           for (fi = min;; fi++)
@@ -3717,7 +3833,7 @@ for (;;)
           }
         else
 #endif
-        /* Not UTF-8 mode */
+        /* Not UTF mode */
           {
           for (fi = min;; fi++)
             {
@@ -3741,9 +3857,8 @@ for (;;)
         {
         pp = eptr;
 
-#ifdef SUPPORT_UTF8
-        /* UTF-8 mode */
-        if (utf8)
+#ifdef SUPPORT_UTF
+        if (utf)
           {
           register unsigned int d;
           for (i = min; i < max; i++)
@@ -3769,7 +3884,7 @@ for (;;)
           }
         else
 #endif
-        /* Not UTF-8 mode */
+        /* Not UTF mode */
           {
           for (i = min; i < max; i++)
             {
@@ -3802,7 +3917,7 @@ for (;;)
     case OP_TYPEEXACT:
     min = max = GET2(ecode, 1);
     minimize = TRUE;
-    ecode += 3;
+    ecode += 1 + IMM2_SIZE;
     goto REPEATTYPE;
 
     case OP_TYPEUPTO:
@@ -3810,7 +3925,7 @@ for (;;)
     min = 0;
     max = GET2(ecode, 1);
     minimize = *ecode == OP_TYPEMINUPTO;
-    ecode += 3;
+    ecode += 1 + IMM2_SIZE;
     goto REPEATTYPE;
 
     case OP_TYPEPOSSTAR:
@@ -3838,7 +3953,7 @@ for (;;)
     possessive = TRUE;
     min = 0;
     max = GET2(ecode, 1);
-    ecode += 3;
+    ecode += 1 + IMM2_SIZE;
     goto REPEATTYPE;
 
     case OP_TYPESTAR:
@@ -4045,7 +4160,7 @@ for (;;)
           while (eptr < md->end_subject)
             {
             int len = 1;
-            if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
+            if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
             if (UCD_CATEGORY(c) != ucp_M) break;
             eptr += len;
             }
@@ -4057,8 +4172,8 @@ for (;;)
 
 /* Handle all other cases when the coding is UTF-8 */
 
-#ifdef SUPPORT_UTF8
-      if (utf8) switch(ctype)
+#ifdef SUPPORT_UTF
+      if (utf) switch(ctype)
         {
         case OP_ANY:
         for (i = 1; i <= min; i++)
@@ -4070,7 +4185,7 @@ for (;;)
             }
           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
           eptr++;
-          while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
+          ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
           }
         break;
 
@@ -4083,7 +4198,7 @@ for (;;)
             RRETURN(MATCH_NOMATCH);
             }
           eptr++;
-          while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
+          ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
           }
         break;
 
@@ -4265,8 +4380,9 @@ for (;;)
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
+          if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0)
             RRETURN(MATCH_NOMATCH);
+          eptr++;
           /* No need to skip more bytes - we know it's a 1-byte character */
           }
         break;
@@ -4281,7 +4397,8 @@ for (;;)
             }
           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
             RRETURN(MATCH_NOMATCH);
-          while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
+          eptr++;
+          ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
           }
         break;
 
@@ -4293,8 +4410,9 @@ for (;;)
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
+          if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0)
             RRETURN(MATCH_NOMATCH);
+          eptr++;
           /* No need to skip more bytes - we know it's a 1-byte character */
           }
         break;
@@ -4309,7 +4427,8 @@ for (;;)
             }
           if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
             RRETURN(MATCH_NOMATCH);
-          while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
+          eptr++;
+          ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
           }
         break;
 
@@ -4321,8 +4440,9 @@ for (;;)
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
+          if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0)
             RRETURN(MATCH_NOMATCH);
+          eptr++;
           /* No need to skip more bytes - we know it's a 1-byte character */
           }
         break;
@@ -4332,7 +4452,7 @@ for (;;)
         }  /* End switch(ctype) */
 
       else
-#endif     /* SUPPORT_UTF8 */
+#endif     /* SUPPORT_UTF */
 
       /* Code for the non-UTF-8 case for minimum matching of operators other
       than OP_PROP and OP_NOTPROP. */
@@ -4392,6 +4512,10 @@ for (;;)
             case 0x000b:
             case 0x000c:
             case 0x0085:
+#ifdef COMPILE_PCRE16
+            case 0x2028:
+            case 0x2029:
+#endif
             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
             break;
             }
@@ -4412,6 +4536,24 @@ for (;;)
             case 0x09:      /* HT */
             case 0x20:      /* SPACE */
             case 0xa0:      /* NBSP */
+#ifdef COMPILE_PCRE16
+            case 0x1680:    /* OGHAM SPACE MARK */
+            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
+            case 0x2000:    /* EN QUAD */
+            case 0x2001:    /* EM QUAD */
+            case 0x2002:    /* EN SPACE */
+            case 0x2003:    /* EM SPACE */
+            case 0x2004:    /* THREE-PER-EM SPACE */
+            case 0x2005:    /* FOUR-PER-EM SPACE */
+            case 0x2006:    /* SIX-PER-EM SPACE */
+            case 0x2007:    /* FIGURE SPACE */
+            case 0x2008:    /* PUNCTUATION SPACE */
+            case 0x2009:    /* THIN SPACE */
+            case 0x200A:    /* HAIR SPACE */
+            case 0x202f:    /* NARROW NO-BREAK SPACE */
+            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
+            case 0x3000:    /* IDEOGRAPHIC SPACE */
+#endif
             RRETURN(MATCH_NOMATCH);
             }
           }
@@ -4431,6 +4573,24 @@ for (;;)
             case 0x09:      /* HT */
             case 0x20:      /* SPACE */
             case 0xa0:      /* NBSP */
+#ifdef COMPILE_PCRE16
+            case 0x1680:    /* OGHAM SPACE MARK */
+            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
+            case 0x2000:    /* EN QUAD */
+            case 0x2001:    /* EM QUAD */
+            case 0x2002:    /* EN SPACE */
+            case 0x2003:    /* EM SPACE */
+            case 0x2004:    /* THREE-PER-EM SPACE */
+            case 0x2005:    /* FOUR-PER-EM SPACE */
+            case 0x2006:    /* SIX-PER-EM SPACE */
+            case 0x2007:    /* FIGURE SPACE */
+            case 0x2008:    /* PUNCTUATION SPACE */
+            case 0x2009:    /* THIN SPACE */
+            case 0x200A:    /* HAIR SPACE */
+            case 0x202f:    /* NARROW NO-BREAK SPACE */
+            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
+            case 0x3000:    /* IDEOGRAPHIC SPACE */
+#endif
             break;
             }
           }
@@ -4452,6 +4612,10 @@ for (;;)
             case 0x0c:      /* FF */
             case 0x0d:      /* CR */
             case 0x85:      /* NEL */
+#ifdef COMPILE_PCRE16
+            case 0x2028:    /* LINE SEPARATOR */
+            case 0x2029:    /* PARAGRAPH SEPARATOR */
+#endif
             RRETURN(MATCH_NOMATCH);
             }
           }
@@ -4473,6 +4637,10 @@ for (;;)
             case 0x0c:      /* FF */
             case 0x0d:      /* CR */
             case 0x85:      /* NEL */
+#ifdef COMPILE_PCRE16
+            case 0x2028:    /* LINE SEPARATOR */
+            case 0x2029:    /* PARAGRAPH SEPARATOR */
+#endif
             break;
             }
           }
@@ -4486,7 +4654,9 @@ for (;;)
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
+          if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
+            RRETURN(MATCH_NOMATCH);
+          eptr++;
           }
         break;
 
@@ -4498,7 +4668,9 @@ for (;;)
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
+          if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
+            RRETURN(MATCH_NOMATCH);
+          eptr++;
           }
         break;
 
@@ -4510,7 +4682,9 @@ for (;;)
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
+          if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
+            RRETURN(MATCH_NOMATCH);
+          eptr++;
           }
         break;
 
@@ -4522,7 +4696,9 @@ for (;;)
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
+          if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
+            RRETURN(MATCH_NOMATCH);
+          eptr++;
           }
         break;
 
@@ -4534,8 +4710,9 @@ for (;;)
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          if ((md->ctypes[*eptr++] & ctype_word) != 0)
+          if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
             RRETURN(MATCH_NOMATCH);
+          eptr++;
           }
         break;
 
@@ -4547,8 +4724,9 @@ for (;;)
             SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
             }
-          if ((md->ctypes[*eptr++] & ctype_word) == 0)
+          if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
             RRETURN(MATCH_NOMATCH);
+          eptr++;
           }
         break;
 
@@ -4766,7 +4944,7 @@ for (;;)
           while (eptr < md->end_subject)
             {
             int len = 1;
-            if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
+            if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
             if (UCD_CATEGORY(c) != ucp_M) break;
             eptr += len;
             }
@@ -4775,9 +4953,8 @@ for (;;)
       else
 #endif     /* SUPPORT_UCP */
 
-#ifdef SUPPORT_UTF8
-      /* UTF-8 mode */
-      if (utf8)
+#ifdef SUPPORT_UTF
+      if (utf)
         {
         for (fi = min;; fi++)
           {
@@ -4919,7 +5096,7 @@ for (;;)
             break;
 
             case OP_WHITESPACE:
-            if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
+            if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
               RRETURN(MATCH_NOMATCH);
             break;
 
@@ -4940,7 +5117,7 @@ for (;;)
         }
       else
 #endif
-      /* Not UTF-8 mode */
+      /* Not UTF mode */
         {
         for (fi = min;; fi++)
           {
@@ -4976,6 +5153,10 @@ for (;;)
               case 0x000b:
               case 0x000c:
               case 0x0085:
+#ifdef COMPILE_PCRE16
+              case 0x2028:
+              case 0x2029:
+#endif
               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
               break;
               }
@@ -4988,6 +5169,24 @@ for (;;)
               case 0x09:      /* HT */
               case 0x20:      /* SPACE */
               case 0xa0:      /* NBSP */
+#ifdef COMPILE_PCRE16
+              case 0x1680:    /* OGHAM SPACE MARK */
+              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
+              case 0x2000:    /* EN QUAD */
+              case 0x2001:    /* EM QUAD */
+              case 0x2002:    /* EN SPACE */
+              case 0x2003:    /* EM SPACE */
+              case 0x2004:    /* THREE-PER-EM SPACE */
+              case 0x2005:    /* FOUR-PER-EM SPACE */
+              case 0x2006:    /* SIX-PER-EM SPACE */
+              case 0x2007:    /* FIGURE SPACE */
+              case 0x2008:    /* PUNCTUATION SPACE */
+              case 0x2009:    /* THIN SPACE */
+              case 0x200A:    /* HAIR SPACE */
+              case 0x202f:    /* NARROW NO-BREAK SPACE */
+              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
+              case 0x3000:    /* IDEOGRAPHIC SPACE */
+#endif
               RRETURN(MATCH_NOMATCH);
               }
             break;
@@ -4999,6 +5198,24 @@ for (;;)
               case 0x09:      /* HT */
               case 0x20:      /* SPACE */
               case 0xa0:      /* NBSP */
+#ifdef COMPILE_PCRE16
+              case 0x1680:    /* OGHAM SPACE MARK */
+              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
+              case 0x2000:    /* EN QUAD */
+              case 0x2001:    /* EM QUAD */
+              case 0x2002:    /* EN SPACE */
+              case 0x2003:    /* EM SPACE */
+              case 0x2004:    /* THREE-PER-EM SPACE */
+              case 0x2005:    /* FOUR-PER-EM SPACE */
+              case 0x2006:    /* SIX-PER-EM SPACE */
+              case 0x2007:    /* FIGURE SPACE */
+              case 0x2008:    /* PUNCTUATION SPACE */
+              case 0x2009:    /* THIN SPACE */
+              case 0x200A:    /* HAIR SPACE */
+              case 0x202f:    /* NARROW NO-BREAK SPACE */
+              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
+              case 0x3000:    /* IDEOGRAPHIC SPACE */
+#endif
               break;
               }
             break;
@@ -5012,6 +5229,10 @@ for (;;)
               case 0x0c:      /* FF */
               case 0x0d:      /* CR */
               case 0x85:      /* NEL */
+#ifdef COMPILE_PCRE16
+              case 0x2028:    /* LINE SEPARATOR */
+              case 0x2029:    /* PARAGRAPH SEPARATOR */
+#endif
               RRETURN(MATCH_NOMATCH);
               }
             break;
@@ -5025,32 +5246,36 @@ for (;;)
               case 0x0c:      /* FF */
               case 0x0d:      /* CR */
               case 0x85:      /* NEL */
+#ifdef COMPILE_PCRE16
+              case 0x2028:    /* LINE SEPARATOR */
+              case 0x2029:    /* PARAGRAPH SEPARATOR */
+#endif
               break;
               }
             break;
 
             case OP_NOT_DIGIT:
-            if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
+            if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
             break;
 
             case OP_DIGIT:
-            if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
+            if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
             break;
 
             case OP_NOT_WHITESPACE:
-            if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
+            if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
             break;
 
             case OP_WHITESPACE:
-            if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
+            if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
             break;
 
             case OP_NOT_WORDCHAR:
-            if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
+            if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
             break;
 
             case OP_WORDCHAR:
-            if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
+            if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
             break;
 
             default:
@@ -5239,7 +5464,7 @@ for (;;)
           RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           if (eptr-- == pp) break;        /* Stop if tried at original pos */
-          if (utf8) BACKCHAR(eptr);
+          if (utf) BACKCHAR(eptr);
           }
         }
 
@@ -5256,13 +5481,13 @@ for (;;)
             SCHECK_PARTIAL();
             break;
             }
-          if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
+          if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
           if (UCD_CATEGORY(c) == ucp_M) break;
           eptr += len;
           while (eptr < md->end_subject)
             {
             len = 1;
-            if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); }
+            if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
             if (UCD_CATEGORY(c) != ucp_M) break;
             eptr += len;
             }
@@ -5279,7 +5504,7 @@ for (;;)
           if (eptr-- == pp) break;        /* Stop if tried at original pos */
           for (;;)                        /* Move back over one extended */
             {
-            if (!utf8) c = *eptr; else
+            if (!utf) c = *eptr; else
               {
               BACKCHAR(eptr);
               GETCHAR(c, eptr);
@@ -5293,10 +5518,8 @@ for (;;)
       else
 #endif   /* SUPPORT_UCP */
 
-#ifdef SUPPORT_UTF8
-      /* UTF-8 mode */
-
-      if (utf8)
+#ifdef SUPPORT_UTF
+      if (utf)
         {
         switch(ctype)
           {
@@ -5312,7 +5535,7 @@ for (;;)
                 }
               if (IS_NEWLINE(eptr)) break;
               eptr++;
-              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
+              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
               }
             }
 
@@ -5329,7 +5552,7 @@ for (;;)
                 }
               if (IS_NEWLINE(eptr)) break;
               eptr++;
-              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
+              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
               }
             }
           break;
@@ -5345,7 +5568,7 @@ for (;;)
                 break;
                 }
               eptr++;
-              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
+              ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
               }
             }
           else
@@ -5578,9 +5801,8 @@ for (;;)
           }
         }
       else
-#endif  /* SUPPORT_UTF8 */
-
-      /* Not UTF-8 mode */
+#endif  /* SUPPORT_UTF */
+      /* Not UTF mode */
         {
         switch(ctype)
           {
@@ -5624,10 +5846,12 @@ for (;;)
               }
             else
               {
-              if (c != 0x000a &&
-                  (md->bsr_anycrlf ||
-                    (c != 0x000b && c != 0x000c && c != 0x0085)))
-                break;
+              if (c != 0x000a && (md->bsr_anycrlf ||
+                (c != 0x000b && c != 0x000c && c != 0x0085
+#ifdef COMPILE_PCRE16
+                && c != 0x2028 && c != 0x2029
+#endif
+                ))) break;
               eptr++;
               }
             }
@@ -5642,7 +5866,12 @@ for (;;)
               break;
               }
             c = *eptr;
-            if (c == 0x09 || c == 0x20 || c == 0xa0) break;
+            if (c == 0x09 || c == 0x20 || c == 0xa0
+#ifdef COMPILE_PCRE16
+              || c == 0x1680 || c == 0x180e || (c >= 0x2000 && c <= 0x200A)
+              || c == 0x202f || c == 0x205f || c == 0x3000
+#endif
+              ) break;
             eptr++;
             }
           break;
@@ -5656,7 +5885,12 @@ for (;;)
               break;
               }
             c = *eptr;
-            if (c != 0x09 && c != 0x20 && c != 0xa0) break;
+            if (c != 0x09 && c != 0x20 && c != 0xa0
+#ifdef COMPILE_PCRE16
+              && c != 0x1680 && c != 0x180e && (c < 0x2000 || c > 0x200A)
+              && c != 0x202f && c != 0x205f && c != 0x3000
+#endif
+              ) break;
             eptr++;
             }
           break;
@@ -5670,8 +5904,11 @@ for (;;)
               break;
               }
             c = *eptr;
-            if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
-              break;
+            if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85
+#ifdef COMPILE_PCRE16
+              || c == 0x2028 || c == 0x2029
+#endif
+              ) break;
             eptr++;
             }
           break;
@@ -5685,8 +5922,11 @@ for (;;)
               break;
               }
             c = *eptr;
-            if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
-              break;
+            if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85
+#ifdef COMPILE_PCRE16
+              && c != 0x2028 && c != 0x2029
+#endif
+              ) break;
             eptr++;
             }
           break;
@@ -5699,7 +5939,7 @@ for (;;)
               SCHECK_PARTIAL();
               break;
               }
-            if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
+            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
             eptr++;
             }
           break;
@@ -5712,7 +5952,7 @@ for (;;)
               SCHECK_PARTIAL();
               break;
               }
-            if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
+            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
             eptr++;
             }
           break;
@@ -5725,7 +5965,7 @@ for (;;)
               SCHECK_PARTIAL();
               break;
               }
-            if ((md->ctypes[*eptr] & ctype_space) != 0) break;
+            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
             eptr++;
             }
           break;
@@ -5738,7 +5978,7 @@ for (;;)
               SCHECK_PARTIAL();
               break;
               }
-            if ((md->ctypes[*eptr] & ctype_space) == 0) break;
+            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
             eptr++;
             }
           break;
@@ -5751,7 +5991,7 @@ for (;;)
               SCHECK_PARTIAL();
               break;
               }
-            if ((md->ctypes[*eptr] & ctype_word) != 0) break;
+            if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
             eptr++;
             }
           break;
@@ -5764,7 +6004,7 @@ for (;;)
               SCHECK_PARTIAL();
               break;
               }
-            if ((md->ctypes[*eptr] & ctype_word) == 0) break;
+            if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
             eptr++;
             }
           break;
@@ -5827,16 +6067,23 @@ switch (frame->Xwhere)
   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
   LBL(65) LBL(66)
-#ifdef SUPPORT_UTF8
-  LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+  LBL(21)
+#endif
+#ifdef SUPPORT_UTF
+  LBL(16) LBL(18) LBL(20)
+  LBL(22) LBL(23) LBL(28) LBL(30)
   LBL(32) LBL(34) LBL(42) LBL(46)
 #ifdef SUPPORT_UCP
   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
   LBL(59) LBL(60) LBL(61) LBL(62)
 #endif  /* SUPPORT_UCP */
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
   default:
   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
+
+printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere);
+
   return PCRE_ERROR_INTERNAL;
   }
 #undef LBL
@@ -5923,64 +6170,90 @@ Returns:          > 0 => success; value is the number of elements filled in
                  < -1 => some kind of unexpected problem
 */
 
+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
   int offsetcount)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
+  PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
+  int offsetcount)
+#endif
 {
 int rc, ocount, arg_offset_max;
-int first_byte = -1;
-int req_byte = -1;
-int req_byte2 = -1;
 int newline;
 BOOL using_temporary_offsets = FALSE;
 BOOL anchored;
 BOOL startline;
 BOOL firstline;
-BOOL first_byte_caseless = FALSE;
-BOOL req_byte_caseless = FALSE;
-BOOL utf8;
+BOOL utf;
+BOOL has_first_char = FALSE;
+BOOL has_req_char = FALSE;
+pcre_uchar first_char = 0;
+pcre_uchar first_char2 = 0;
+pcre_uchar req_char = 0;
+pcre_uchar req_char2 = 0;
 match_data match_block;
 match_data *md = &match_block;
-const uschar *tables;
-const uschar *start_bits = NULL;
-USPTR start_match = (USPTR)subject + start_offset;
-USPTR end_subject;
-USPTR start_partial = NULL;
-USPTR req_byte_ptr = start_match - 1;
-
-pcre_study_data internal_study;
+const pcre_uint8 *tables;
+const pcre_uint8 *start_bits = NULL;
+PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
+PCRE_PUCHAR end_subject;
+PCRE_PUCHAR start_partial = NULL;
+PCRE_PUCHAR req_char_ptr = start_match - 1;
+
 const pcre_study_data *study;
+const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
 
-real_pcre internal_re;
-const real_pcre *external_re = (const real_pcre *)argument_re;
-const real_pcre *re = external_re;
+/* Check for the special magic call that measures the size of the stack used
+per recursive call of match(). */
+
+if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
+    start_offset == -999)
+#ifdef NO_RECURSE
+  return -sizeof(heapframe);
+#else
+  return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
+#endif
 
 /* Plausibility checks */
 
 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
-if (re == NULL || subject == NULL ||
-   (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
+if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
+  return PCRE_ERROR_NULL;
 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
 
+/* Check that the first field in the block is the magic number. If it is not,
+return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
+REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
+means that the pattern is likely compiled with different endianness. */
+
+if (re->magic_number != MAGIC_NUMBER)
+  return re->magic_number == REVERSED_MAGIC_NUMBER?
+    PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
+if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
+
 /* These two settings are used in the code for checking a UTF-8 string that
 follows immediately afterwards. Other values in the md block are used only
 during "normal" pcre_exec() processing, not when the JIT support is in use,
 so they are set up later. */
 
-utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
+/* PCRE_UTF16 has the same value as PCRE_UTF8. */
+utf = md->utf = (re->options & PCRE_UTF8) != 0;
 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
 
 /* Check a UTF-8 string if required. Pass back the character offset and error
 code for an invalid string if a results vector is available. */
 
-#ifdef SUPPORT_UTF8
-if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
+#ifdef SUPPORT_UTF
+if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
   {
   int erroroffset;
-  int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset);
+  int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
   if (errorcode != 0)
     {
     if (offsetcount >= 2)
@@ -5988,13 +6261,18 @@ if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
       offsets[0] = erroroffset;
       offsets[1] = errorcode;
       }
+#ifdef COMPILE_PCRE16
+    return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
+      PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
+#else
     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
+#endif
     }
 
-  /* Check that a start_offset points to the start of a UTF-8 character. */
+  /* Check that a start_offset points to the start of a UTF character. */
   if (start_offset > 0 && start_offset < length &&
-      (((USPTR)subject)[start_offset] & 0xc0) == 0x80)
+      NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
     return PCRE_ERROR_BADUTF8_OFFSET;
   }
 #endif
@@ -6012,15 +6290,16 @@ if (extra_data != NULL
     && (extra_data->flags & PCRE_EXTRA_TABLES) == 0
     && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
                     PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
-  return _pcre_jit_exec(re, extra_data->executable_jit, subject, length,
-    start_offset, options, ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
+  return PRIV(jit_exec)(re, extra_data->executable_jit,
+    (const pcre_uchar *)subject, length, start_offset, options,
+    ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
     ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
 #endif
 
 /* Carry on with non-JIT matching. This information is for finding all the
 numbers associated with a given name, for condition testing. */
 
-md->name_table = (uschar *)re + re->name_table_offset;
+md->name_table = (pcre_uchar *)re + re->name_table_offset;
 md->name_count = re->name_count;
 md->name_entry_size = re->name_entry_size;
 
@@ -6034,7 +6313,7 @@ md->callout_data = NULL;
 
 /* The table pointer is always in native byte order. */
 
-tables = external_re->tables;
+tables = re->tables;
 
 if (extra_data != NULL)
   {
@@ -6054,19 +6333,7 @@ if (extra_data != NULL)
 is a feature that makes it possible to save compiled regex and re-use them
 in other programs later. */
 
-if (tables == NULL) tables = _pcre_default_tables;
-
-/* Check that the first field in the block is the magic number. If it is not,
-test for a regex that was compiled on a host of opposite endianness. If this is
-the case, flipped values are put in internal_re and internal_study if there was
-study data too. */
-
-if (re->magic_number != MAGIC_NUMBER)
-  {
-  re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
-  if (re == NULL) return PCRE_ERROR_BADMAGIC;
-  if (study != NULL) study = &internal_study;
-  }
+if (tables == NULL) tables = PRIV(default_tables);
 
 /* Set up other data */
 
@@ -6076,10 +6343,10 @@ firstline = (re->options & PCRE_FIRSTLINE) != 0;
 
 /* The code starts after the real_pcre block and the capture name table. */
 
-md->start_code = (const uschar *)external_re + re->name_table_offset +
+md->start_code = (const pcre_uchar *)re + re->name_table_offset +
   re->name_count * re->name_entry_size;
 
-md->start_subject = (USPTR)subject;
+md->start_subject = (PCRE_PUCHAR)subject;
 md->start_offset = start_offset;
 md->end_subject = md->start_subject + length;
 end_subject = md->end_subject;
@@ -6104,6 +6371,7 @@ md->recursive = NULL;                   /* No recursion at top level */
 md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
 
 md->lcc = tables + lcc_offset;
+md->fcc = tables + fcc_offset;
 md->ctypes = tables + ctypes_offset;
 
 /* Handle different \R options. */
@@ -6190,7 +6458,7 @@ arg_offset_max = (2*ocount)/3;
 if (re->top_backref > 0 && re->top_backref >= ocount/3)
   {
   ocount = re->top_backref * 3 + 3;
-  md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
+  md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
   using_temporary_offsets = TRUE;
   DPRINTF(("Got memory to hold back references\n"));
@@ -6217,7 +6485,7 @@ if (md->offset_vector != NULL)
   md->offset_vector[0] = md->offset_vector[1] = -1;
   }
 
-/* Set up the first character to match, if available. The first_byte value is
+/* Set up the first character to match, if available. The first_char value is
 never set for an anchored regular expression, but the anchoring may be forced
 at run time, so we have to test for anchoring. The first char may be unset for
 an unanchored pattern, of course. If there's no first char and the pattern was
@@ -6227,9 +6495,16 @@ if (!anchored)
   {
   if ((re->flags & PCRE_FIRSTSET) != 0)
     {
-    first_byte = re->first_byte & 255;
-    if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
-      first_byte = md->lcc[first_byte];
+    has_first_char = TRUE;
+    first_char = first_char2 = (pcre_uchar)(re->first_char);
+    if ((re->flags & PCRE_FCH_CASELESS) != 0)
+      {
+      first_char2 = TABLE_GET(first_char, md->fcc, first_char);
+#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
+      if (utf && first_char > 127)
+        first_char2 = UCD_OTHERCASE(first_char);
+#endif
+      }
     }
   else
     if (!startline && study != NULL &&
@@ -6242,14 +6517,19 @@ character" set. */
 
 if ((re->flags & PCRE_REQCHSET) != 0)
   {
-  req_byte = re->req_byte & 255;
-  req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
-  req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
+  has_req_char = TRUE;
+  req_char = req_char2 = (pcre_uchar)(re->req_char);
+  if ((re->flags & PCRE_RCH_CASELESS) != 0)
+    {
+    req_char2 = TABLE_GET(req_char, md->fcc, req_char);
+#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
+    if (utf && req_char > 127)
+      req_char2 = UCD_OTHERCASE(req_char);
+#endif
+    }
   }
 
 
-
-
 /* ==========================================================================*/
 
 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
@@ -6257,8 +6537,8 @@ the loop runs just once. */
 
 for(;;)
   {
-  USPTR save_end_subject = end_subject;
-  USPTR new_start_match;
+  PCRE_PUCHAR save_end_subject = end_subject;
+  PCRE_PUCHAR new_start_match;
 
   /* If firstline is TRUE, the start of the match is constrained to the first
   line of a multiline string. That is, the match must be before or at the first
@@ -6268,14 +6548,14 @@ for(;;)
 
   if (firstline)
     {
-    USPTR t = start_match;
-#ifdef SUPPORT_UTF8
-    if (utf8)
+    PCRE_PUCHAR t = start_match;
+#ifdef SUPPORT_UTF
+    if (utf)
       {
       while (t < md->end_subject && !IS_NEWLINE(t))
         {
         t++;
-        while (t < end_subject && (*t & 0xc0) == 0x80) t++;
+        ACROSSCHAR(t < end_subject, *t, t++);
         }
       }
     else
@@ -6292,15 +6572,16 @@ for(;;)
 
   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
     {
-    /* Advance to a unique first byte if there is one. */
+    /* Advance to a unique first char if there is one. */
 
-    if (first_byte >= 0)
+    if (has_first_char)
       {
-      if (first_byte_caseless)
-        while (start_match < end_subject && md->lcc[*start_match] != first_byte)
+      if (first_char != first_char2)
+        while (start_match < end_subject &&
+            *start_match != first_char && *start_match != first_char2)
           start_match++;
       else
-        while (start_match < end_subject && *start_match != first_byte)
+        while (start_match < end_subject && *start_match != first_char)
           start_match++;
       }
 
@@ -6310,14 +6591,14 @@ for(;;)
       {
       if (start_match > md->start_subject + start_offset)
         {
-#ifdef SUPPORT_UTF8
-        if (utf8)
+#ifdef SUPPORT_UTF
+        if (utf)
           {
           while (start_match < end_subject && !WAS_NEWLINE(start_match))
             {
             start_match++;
-            while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
-              start_match++;
+            ACROSSCHAR(start_match < end_subject, *start_match,
+              start_match++);
             }
           }
         else
@@ -6344,13 +6625,18 @@ for(;;)
       while (start_match < end_subject)
         {
         register unsigned int c = *start_match;
+#ifndef COMPILE_PCRE8
+        if (c > 255) c = 255;
+#endif
         if ((start_bits[c/8] & (1 << (c&7))) == 0)
           {
           start_match++;
-#ifdef SUPPORT_UTF8
-          if (utf8)
-            while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
-              start_match++;
+#if defined SUPPORT_UTF && defined COMPILE_PCRE8
+          /* In non 8-bit mode, the iteration will stop for
+          characters > 255 at the beginning or not stop at all. */
+          if (utf)
+            ACROSSCHAR(start_match < end_subject, *start_match,
+              start_match++);
 #endif
           }
         else break;
@@ -6379,8 +6665,8 @@ for(;;)
       break;
       }
 
-    /* If req_byte is set, we know that that character must appear in the
-    subject for the match to succeed. If the first character is set, req_byte
+    /* If req_char is set, we know that that character must appear in the
+    subject for the match to succeed. If the first character is set, req_char
     must be later in the subject; otherwise the test starts at the match point.
     This optimization can save a huge amount of backtracking in patterns with
     nested unlimited repeats that aren't going to match. Writing separate code
@@ -6393,28 +6679,28 @@ for(;;)
     32-megabyte string... so we don't do this when the string is sufficiently
     long. */
 
-    if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
+    if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
       {
-      register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
+      register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
 
       /* We don't need to repeat the search if we haven't yet reached the
       place we found it at last time. */
 
-      if (p > req_byte_ptr)
+      if (p > req_char_ptr)
         {
-        if (req_byte_caseless)
+        if (req_char != req_char2)
           {
           while (p < end_subject)
             {
             register int pp = *p++;
-            if (pp == req_byte || pp == req_byte2) { p--; break; }
+            if (pp == req_char || pp == req_char2) { p--; break; }
             }
           }
         else
           {
           while (p < end_subject)
             {
-            if (*p++ == req_byte) { p--; break; }
+            if (*p++ == req_char) { p--; break; }
             }
           }
 
@@ -6431,7 +6717,7 @@ for(;;)
         found it, so that we don't search again next time round the loop if
         the start hasn't passed this character yet. */
 
-        req_byte_ptr = p;
+        req_char_ptr = p;
         }
       }
     }
@@ -6486,10 +6772,10 @@ for(;;)
     case MATCH_THEN:
     md->ignore_skip_arg = FALSE;
     new_start_match = start_match + 1;
-#ifdef SUPPORT_UTF8
-    if (utf8)
-      while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
-        new_start_match++;
+#ifdef SUPPORT_UTF
+    if (utf)
+      ACROSSCHAR(new_start_match < end_subject, *new_start_match,
+        new_start_match++);
 #endif
     break;
 
@@ -6527,9 +6813,13 @@ for(;;)
 
   /* If we have just passed a CR and we are now at a LF, and the pattern does
   not contain any explicit matches for \r or \n, and the newline option is CRLF
-  or ANY or ANYCRLF, advance the match position by one more character. */
+  or ANY or ANYCRLF, advance the match position by one more character. In
+  normal matching start_match will aways be greater than the first position at
+  this stage, but a failed *SKIP can cause a return at the same point, which is
+  why the first test exists. */
 
-  if (start_match[-1] == CHAR_CR &&
+  if (start_match > (PCRE_PUCHAR)subject + start_offset &&
+      start_match[-1] == CHAR_CR &&
       start_match < end_subject &&
       *start_match == CHAR_NL &&
       (re->flags & PCRE_HASCRORLF) == 0 &&
@@ -6575,7 +6865,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
       }
     if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;
     DPRINTF(("Freeing temporary memory\n"));
-    (pcre_free)(md->offset_vector);
+    (PUBL(free))(md->offset_vector);
     }
 
   /* Set the return code to the number of captured strings, or 0 if there were
@@ -6616,7 +6906,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
   /* Return MARK data if requested */
 
   if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
-    *(extra_data->mark) = (unsigned char *)(md->mark);
+    *(extra_data->mark) = (pcre_uchar *)md->mark;
   DPRINTF((">>>> returning %d\n", rc));
   return rc;
   }
@@ -6627,7 +6917,7 @@ attempt has failed at all permitted starting positions. */
 if (using_temporary_offsets)
   {
   DPRINTF(("Freeing temporary memory\n"));
-  (pcre_free)(md->offset_vector);
+  (PUBL(free))(md->offset_vector);
   }
 
 /* For anything other than nomatch or partial match, just return the code. */
@@ -6646,8 +6936,8 @@ if (start_partial != NULL)
   md->mark = NULL;
   if (offsetcount > 1)
     {
-    offsets[0] = (int)(start_partial - (USPTR)subject);
-    offsets[1] = (int)(end_subject - (USPTR)subject);
+    offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
+    offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
     }
   rc = PCRE_ERROR_PARTIAL;
   }
@@ -6663,7 +6953,7 @@ else
 /* Return the MARK data if it has been requested. */
 
 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
-  *(extra_data->mark) = (unsigned char *)(md->nomatch_mark);
+  *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
 return rc;
 }
 
diff --git a/usr.sbin/nginx/src/pcre/pcre_fullinfo.c b/usr.sbin/nginx/src/pcre/pcre_fullinfo.c
index 4895b2aab2d..a3d1198b0e8 100644
--- a/usr.sbin/nginx/src/pcre/pcre_fullinfo.c
+++ b/usr.sbin/nginx/src/pcre/pcre_fullinfo.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2011 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -65,13 +65,17 @@ Arguments:
 Returns:           0 if data returned, negative on error
 */
 
+#ifdef COMPILE_PCRE8
 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
-pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
-  void *where)
+pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data,
+  int what, void *where)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_fullinfo(const pcre16 *argument_re, const pcre16_extra *extra_data,
+  int what, void *where)
+#endif
 {
-real_pcre internal_re;
-pcre_study_data internal_study;
-const real_pcre *re = (const real_pcre *)argument_re;
+const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
 const pcre_study_data *study = NULL;
 
 if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
@@ -79,12 +83,18 @@ if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
   study = (const pcre_study_data *)extra_data->study_data;
 
+/* Check that the first field in the block is the magic number. If it is not,
+return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
+REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
+means that the pattern is likely compiled with different endianness. */
+
 if (re->magic_number != MAGIC_NUMBER)
-  {
-  re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
-  if (re == NULL) return PCRE_ERROR_BADMAGIC;
-  if (study != NULL) study = &internal_study;
-  }
+  return re->magic_number == REVERSED_MAGIC_NUMBER?
+    PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
+
+/* Check that this pattern was compiled in the correct bit mode */
+
+if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
 
 switch (what)
   {
@@ -106,11 +116,10 @@ switch (what)
       (extra_data != NULL &&
       (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
       extra_data->executable_jit != NULL)?
-    _pcre_jit_get_size(extra_data->executable_jit) : 0;
+    PRIV(jit_get_size)(extra_data->executable_jit) : 0;
 #else
   *((size_t *)where) = 0;
 #endif
-
   break;
 
   case PCRE_INFO_CAPTURECOUNT:
@@ -123,7 +132,7 @@ switch (what)
 
   case PCRE_INFO_FIRSTBYTE:
   *((int *)where) =
-    ((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte :
+    ((re->flags & PCRE_FIRSTSET) != 0)? re->first_char :
     ((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
   break;
 
@@ -131,7 +140,7 @@ switch (what)
   block, not the internal copy (with flipped integer fields). */
 
   case PCRE_INFO_FIRSTTABLE:
-  *((const uschar **)where) =
+  *((const pcre_uint8 **)where) =
     (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)?
       ((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;
   break;
@@ -139,7 +148,7 @@ switch (what)
   case PCRE_INFO_MINLENGTH:
   *((int *)where) =
     (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0)?
-      (int)study->minlength : -1;
+      (int)(study->minlength) : -1;
   break;
 
   case PCRE_INFO_JIT:
@@ -150,7 +159,7 @@ switch (what)
 
   case PCRE_INFO_LASTLITERAL:
   *((int *)where) =
-    ((re->flags & PCRE_REQCHSET) != 0)? re->req_byte : -1;
+    ((re->flags & PCRE_REQCHSET) != 0)? re->req_char : -1;
   break;
 
   case PCRE_INFO_NAMEENTRYSIZE:
@@ -162,11 +171,11 @@ switch (what)
   break;
 
   case PCRE_INFO_NAMETABLE:
-  *((const uschar **)where) = (const uschar *)re + re->name_table_offset;
+  *((const pcre_uchar **)where) = (const pcre_uchar *)re + re->name_table_offset;
   break;
 
   case PCRE_INFO_DEFAULT_TABLES:
-  *((const uschar **)where) = (const uschar *)(_pcre_default_tables);
+  *((const pcre_uint8 **)where) = (const pcre_uint8 *)(PRIV(default_tables));
   break;
 
   /* From release 8.00 this will always return TRUE because NOPARTIAL is
diff --git a/usr.sbin/nginx/src/pcre/pcre_globals.c b/usr.sbin/nginx/src/pcre/pcre_globals.c
index 4562e0a069a..36e6ddb3a89 100644
--- a/usr.sbin/nginx/src/pcre/pcre_globals.c
+++ b/usr.sbin/nginx/src/pcre/pcre_globals.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -67,18 +67,18 @@ static void LocalPcreFree(void* aPtr)
   {
   free(aPtr);
   }
-PCRE_EXP_DATA_DEFN void *(*pcre_malloc)(size_t) = LocalPcreMalloc;
-PCRE_EXP_DATA_DEFN void  (*pcre_free)(void *) = LocalPcreFree;
-PCRE_EXP_DATA_DEFN void *(*pcre_stack_malloc)(size_t) = LocalPcreMalloc;
-PCRE_EXP_DATA_DEFN void  (*pcre_stack_free)(void *) = LocalPcreFree;
-PCRE_EXP_DATA_DEFN int   (*pcre_callout)(pcre_callout_block *) = NULL;
+PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = LocalPcreMalloc;
+PCRE_EXP_DATA_DEFN void  (*PUBL(free))(void *) = LocalPcreFree;
+PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = LocalPcreMalloc;
+PCRE_EXP_DATA_DEFN void  (*PUBL(stack_free))(void *) = LocalPcreFree;
+PCRE_EXP_DATA_DEFN int   (*PUBL(callout))(PUBL(callout_block) *) = NULL;
 
 #elif !defined VPCOMPAT
-PCRE_EXP_DATA_DEFN void *(*pcre_malloc)(size_t) = malloc;
-PCRE_EXP_DATA_DEFN void  (*pcre_free)(void *) = free;
-PCRE_EXP_DATA_DEFN void *(*pcre_stack_malloc)(size_t) = malloc;
-PCRE_EXP_DATA_DEFN void  (*pcre_stack_free)(void *) = free;
-PCRE_EXP_DATA_DEFN int   (*pcre_callout)(pcre_callout_block *) = NULL;
+PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = malloc;
+PCRE_EXP_DATA_DEFN void  (*PUBL(free))(void *) = free;
+PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = malloc;
+PCRE_EXP_DATA_DEFN void  (*PUBL(stack_free))(void *) = free;
+PCRE_EXP_DATA_DEFN int   (*PUBL(callout))(PUBL(callout_block) *) = NULL;
 #endif
 
 /* End of pcre_globals.c */
diff --git a/usr.sbin/nginx/src/pcre/pcre_internal.h b/usr.sbin/nginx/src/pcre/pcre_internal.h
index 6ea397a39ef..e5a4b6a526d 100644
--- a/usr.sbin/nginx/src/pcre/pcre_internal.h
+++ b/usr.sbin/nginx/src/pcre/pcre_internal.h
@@ -7,7 +7,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2011 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -40,7 +40,8 @@ POSSIBILITY OF SUCH DAMAGE.
 
 /* This header contains definitions that are shared between the different
 modules, but which are not relevant to the exported API. This includes some
-functions whose names all begin with "_pcre_". */
+functions whose names all begin with "_pcre_" or "_pcre16_" depending on
+the PRIV macro. */
 
 #ifndef PCRE_INTERNAL_H
 #define PCRE_INTERNAL_H
@@ -51,20 +52,39 @@ functions whose names all begin with "_pcre_". */
 #define PCRE_DEBUG
 #endif
 
-/* We do not support both EBCDIC and UTF-8 at the same time. The "configure"
-script prevents both being selected, but not everybody uses "configure". */
-
-#if defined EBCDIC && defined SUPPORT_UTF8
-#error The use of both EBCDIC and SUPPORT_UTF8 is not supported.
+/* PCRE is compiled as an 8 bit library if it is not requested otherwise. */
+#ifndef COMPILE_PCRE16
+#define COMPILE_PCRE8
 #endif
 
-/* If SUPPORT_UCP is defined, SUPPORT_UTF8 must also be defined. The
+/* If SUPPORT_UCP is defined, SUPPORT_UTF must also be defined. The
 "configure" script ensures this, but not everybody uses "configure". */
 
-#if defined SUPPORT_UCP && !defined SUPPORT_UTF8
+#if defined SUPPORT_UCP && !(defined SUPPORT_UTF)
+#define SUPPORT_UTF 1
+#endif
+
+/* We define SUPPORT_UTF if SUPPORT_UTF8 is enabled for compatibility
+reasons with existing code. */
+
+#if defined SUPPORT_UTF8 && !(defined SUPPORT_UTF)
+#define SUPPORT_UTF 1
+#endif
+
+/* Fixme: SUPPORT_UTF8 should be eventually disappear from the code.
+Until then we define it if SUPPORT_UTF is defined. */
+
+#if defined SUPPORT_UTF && !(defined SUPPORT_UTF8)
 #define SUPPORT_UTF8 1
 #endif
 
+/* We do not support both EBCDIC and UTF-8/16 at the same time. The "configure"
+script prevents both being selected, but not everybody uses "configure". */
+
+#if defined EBCDIC && defined SUPPORT_UTF
+#error The use of both EBCDIC and SUPPORT_UTF8/16 is not supported.
+#endif
+
 /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
 inline, and there are *still* stupid compilers about that don't like indented
 pre-processor statements, or at least there were when I first wrote this. After
@@ -158,12 +178,14 @@ set, we ensure here that it has no effect. */
 #define PCRE_CALL_CONVENTION
 #endif
 
-/* We need to have types that specify unsigned 16-bit and 32-bit integers. We
+/* We need to have types that specify unsigned 8, 16 and 32-bit integers. We
 cannot determine these outside the compilation (e.g. by running a program as
 part of "configure") because PCRE is often cross-compiled for use on other
 systems. Instead we make use of the maximum sizes that are available at
 preprocessor time in standard C environments. */
 
+typedef unsigned char pcre_uint8;
+
 #if USHRT_MAX == 65535
   typedef unsigned short pcre_uint16;
   typedef short pcre_int16;
@@ -206,12 +228,47 @@ by "configure". */
 
 /* All character handling must be done as unsigned characters. Otherwise there
 are problems with top-bit-set characters and functions such as isspace().
-However, we leave the interface to the outside world as char *, because that
-should make things easier for callers. We define a short type for unsigned char
-to save lots of typing. I tried "uchar", but it causes problems on Digital
-Unix, where it is defined in sys/types, so use "uschar" instead. */
+However, we leave the interface to the outside world as char * or short *,
+because that should make things easier for callers. This character type is
+called pcre_uchar.
+
+The IN_UCHARS macro multiply its argument with the byte size of the current
+pcre_uchar type. Useful for memcpy and such operations, whose require the
+byte size of their input/output buffers.
+
+The MAX_255 macro checks whether its pcre_uchar input is less than 256.
 
-typedef unsigned char uschar;
+The TABLE_GET macro is designed for accessing elements of tables whose contain
+exactly 256 items. When the character is able to contain more than 256
+items, some check is needed before accessing these tables.
+*/
+
+#ifdef COMPILE_PCRE8
+
+typedef unsigned char pcre_uchar;
+#define IN_UCHARS(x) (x)
+#define MAX_255(c) 1
+#define TABLE_GET(c, table, default) ((table)[c])
+
+#else
+
+#ifdef COMPILE_PCRE16
+#if USHRT_MAX != 65535
+/* This is a warning message. Change PCRE_UCHAR16 to a 16 bit data type in
+pcre.h(.in) and disable (comment out) this message. */
+#error Warning: PCRE_UCHAR16 is not a 16 bit data type.
+#endif
+
+typedef pcre_uint16 pcre_uchar;
+#define IN_UCHARS(x) ((x) << 1)
+#define MAX_255(c) ((c) <= 255u)
+#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default))
+
+#else
+#error Unsupported compiling mode
+#endif /* COMPILE_PCRE16 */
+
+#endif /* COMPILE_PCRE8 */
 
 /* This is an unsigned int value that no character can ever have. UTF-8
 characters only go up to 0x7fffffff (though Unicode doesn't go beyond
@@ -234,8 +291,8 @@ start/end of string field names are. */
 #define IS_NEWLINE(p) \
   ((NLBLOCK->nltype != NLTYPE_FIXED)? \
     ((p) < NLBLOCK->PSEND && \
-     _pcre_is_newline((p), NLBLOCK->nltype, NLBLOCK->PSEND, &(NLBLOCK->nllen),\
-       utf8)) \
+     PRIV(is_newline)((p), NLBLOCK->nltype, NLBLOCK->PSEND, \
+       &(NLBLOCK->nllen), utf)) \
     : \
     ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \
      (p)[0] == NLBLOCK->nl[0] && \
@@ -248,8 +305,8 @@ start/end of string field names are. */
 #define WAS_NEWLINE(p) \
   ((NLBLOCK->nltype != NLTYPE_FIXED)? \
     ((p) > NLBLOCK->PSSTART && \
-     _pcre_was_newline((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \
-       &(NLBLOCK->nllen), utf8)) \
+     PRIV(was_newline)((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \
+       &(NLBLOCK->nllen), utf)) \
     : \
     ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \
      (p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \
@@ -267,15 +324,11 @@ used for the external interface and appears in pcre.h, which is why its name
 must begin with PCRE_. */
 
 #ifdef CUSTOM_SUBJECT_PTR
-#define PCRE_SPTR CUSTOM_SUBJECT_PTR
-#define USPTR CUSTOM_SUBJECT_PTR
+#define PCRE_PUCHAR CUSTOM_SUBJECT_PTR
 #else
-#define PCRE_SPTR const char *
-#define USPTR const unsigned char *
+#define PCRE_PUCHAR const pcre_uchar *
 #endif
 
-
-
 /* Include the public PCRE header and the definitions of UCP character property
 values. */
 
@@ -343,6 +396,8 @@ The macros are controlled by the value of LINK_SIZE. This defaults to 2 in
 the config.h file, but can be overridden by using -D on the command line. This
 is automated on Unix systems via the "configure" command. */
 
+#ifdef COMPILE_PCRE8
+
 #if LINK_SIZE == 2
 
 #define PUT(a,n,d)   \
@@ -379,13 +434,54 @@ is automated on Unix systems via the "configure" command. */
 #define GET(a,n) \
   (((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3])
 
-#define MAX_PATTERN_SIZE (1 << 30)   /* Keep it positive */
+/* Keep it positive */
+#define MAX_PATTERN_SIZE (1 << 30)
 
+#else
+#error LINK_SIZE must be either 2, 3, or 4
+#endif
+
+#else /* COMPILE_PCRE8 */
+
+#ifdef COMPILE_PCRE16
+
+#if LINK_SIZE == 2
+
+#undef LINK_SIZE
+#define LINK_SIZE 1
+
+#define PUT(a,n,d)   \
+  (a[n] = (d))
+
+#define GET(a,n) \
+  (a[n])
+
+#define MAX_PATTERN_SIZE (1 << 16)
+
+#elif LINK_SIZE == 3 || LINK_SIZE == 4
+
+#undef LINK_SIZE
+#define LINK_SIZE 2
+
+#define PUT(a,n,d)   \
+  (a[n] = (d) >> 16), \
+  (a[(n)+1] = (d) & 65535)
+
+#define GET(a,n) \
+  (((a)[n] << 16) | (a)[(n)+1])
+
+/* Keep it positive */
+#define MAX_PATTERN_SIZE (1 << 30)
 
 #else
 #error LINK_SIZE must be either 2, 3, or 4
 #endif
 
+#else
+#error Unsupported compiling mode
+#endif /* COMPILE_PCRE16 */
+
+#endif /* COMPILE_PCRE8 */
 
 /* Convenience macro defined in terms of the others */
 
@@ -396,6 +492,10 @@ is automated on Unix systems via the "configure" command. */
 offsets changes. There are used for repeat counts and for other things such as
 capturing parenthesis numbers in back references. */
 
+#ifdef COMPILE_PCRE8
+
+#define IMM2_SIZE 2
+
 #define PUT2(a,n,d)   \
   a[n] = (d) >> 8; \
   a[(n)+1] = (d) & 255
@@ -403,17 +503,39 @@ capturing parenthesis numbers in back references. */
 #define GET2(a,n) \
   (((a)[n] << 8) | (a)[(n)+1])
 
-#define PUT2INC(a,n,d)  PUT2(a,n,d), a += 2
+#else /* COMPILE_PCRE8 */
+
+#ifdef COMPILE_PCRE16
+
+#define IMM2_SIZE 1
+
+#define PUT2(a,n,d)   \
+   a[n] = d
+
+#define GET2(a,n) \
+   a[n]
+
+#else
+#error Unsupported compiling mode
+#endif /* COMPILE_PCRE16 */
+
+#endif /* COMPILE_PCRE8 */
 
+#define PUT2INC(a,n,d)  PUT2(a,n,d), a += IMM2_SIZE
 
-/* When UTF-8 encoding is being used, a character is no longer just a single
-byte. The macros for character handling generate simple sequences when used in
-byte-mode, and more complicated ones for UTF-8 characters. GETCHARLENTEST is
-not used when UTF-8 is not supported, so it is not defined, and BACKCHAR should
-never be called in byte mode. To make sure they can never even appear when
-UTF-8 support is omitted, we don't even define them. */
+/* When UTF encoding is being used, a character is no longer just a single
+character. The macros for character handling generate simple sequences when
+used in character-mode, and more complicated ones for UTF characters.
+GETCHARLENTEST and other macros are not used when UTF is not supported,
+so they are not defined. To make sure they can never even appear when
+UTF support is omitted, we don't even define them. */
 
-#ifndef SUPPORT_UTF8
+#ifndef SUPPORT_UTF
+
+/* #define MAX_VALUE_FOR_SINGLE_CHAR */
+/* #define HAS_EXTRALEN(c) */
+/* #define GET_EXTRALEN(c) */
+/* #define NOT_FIRSTCHAR(c) */
 #define GETCHAR(c, eptr) c = *eptr;
 #define GETCHARTEST(c, eptr) c = *eptr;
 #define GETCHARINC(c, eptr) c = *eptr++;
@@ -421,14 +543,36 @@ UTF-8 support is omitted, we don't even define them. */
 #define GETCHARLEN(c, eptr, len) c = *eptr;
 /* #define GETCHARLENTEST(c, eptr, len) */
 /* #define BACKCHAR(eptr) */
+/* #define FORWARDCHAR(eptr) */
+/* #define ACROSSCHAR(condition, eptr, action) */
+
+#else   /* SUPPORT_UTF */
 
-#else   /* SUPPORT_UTF8 */
+#ifdef COMPILE_PCRE8
 
 /* These macros were originally written in the form of loops that used data
-from the tables whose names start with _pcre_utf8_table. They were rewritten by
+from the tables whose names start with PRIV(utf8_table). They were rewritten by
 a user so as not to use loops, because in some environments this gives a
 significant performance advantage, and it seems never to do any harm. */
 
+/* Tells the biggest code point which can be encoded as a single character. */
+
+#define MAX_VALUE_FOR_SINGLE_CHAR 127
+
+/* Tests whether the code point needs extra characters to decode. */
+
+#define HAS_EXTRALEN(c) ((c) >= 0xc0)
+
+/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
+Otherwise it has an undefined behaviour. */
+
+#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f])
+
+/* Returns TRUE, if the given character is not the first character
+of a UTF sequence. */
+
+#define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80)
+
 /* Base macro to pick up the remaining bytes of a UTF-8 character, not
 advancing the pointer. */
 
@@ -463,7 +607,7 @@ pointer. */
 
 #define GETCHARTEST(c, eptr) \
   c = *eptr; \
-  if (utf8 && c >= 0xc0) GETUTF8(c, eptr);
+  if (utf && c >= 0xc0) GETUTF8(c, eptr);
 
 /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing
 the pointer. */
@@ -511,7 +655,7 @@ This is called when we don't know if we are in UTF-8 mode. */
 
 #define GETCHARINCTEST(c, eptr) \
   c = *eptr++; \
-  if (utf8 && c >= 0xc0) GETUTF8INC(c, eptr);
+  if (utf && c >= 0xc0) GETUTF8INC(c, eptr);
 
 /* Base macro to pick up the remaining bytes of a UTF-8 character, not
 advancing the pointer, incrementing the length. */
@@ -563,7 +707,7 @@ do not know if we are in UTF-8 mode. */
 
 #define GETCHARLENTEST(c, eptr, len) \
   c = *eptr; \
-  if (utf8 && c >= 0xc0) GETUTF8LEN(c, eptr, len);
+  if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len);
 
 /* If the pointer is not at the start of a character, move it back until
 it is. This is called only in UTF-8 mode - we don't put a test within the macro
@@ -571,7 +715,116 @@ because almost all calls are already within a block of UTF-8 only code. */
 
 #define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--
 
-#endif  /* SUPPORT_UTF8 */
+/* Same as above, just in the other direction. */
+#define FORWARDCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr++
+
+/* Same as above, but it allows a fully customizable form. */
+#define ACROSSCHAR(condition, eptr, action) \
+  while((condition) && ((eptr) & 0xc0) == 0x80) action
+
+#else /* COMPILE_PCRE8 */
+
+#ifdef COMPILE_PCRE16
+
+/* Tells the biggest code point which can be encoded as a single character. */
+
+#define MAX_VALUE_FOR_SINGLE_CHAR 65535
+
+/* Tests whether the code point needs extra characters to decode. */
+
+#define HAS_EXTRALEN(c) (((c) & 0xfc00) == 0xd800)
+
+/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE.
+Otherwise it has an undefined behaviour. */
+
+#define GET_EXTRALEN(c) 1
+
+/* Returns TRUE, if the given character is not the first character
+of a UTF sequence. */
+
+#define NOT_FIRSTCHAR(c) (((c) & 0xfc00) == 0xdc00)
+
+/* Base macro to pick up the low surrogate of a UTF-16 character, not
+advancing the pointer. */
+
+#define GETUTF16(c, eptr) \
+   { c = (((c & 0x3ff) << 10) | (eptr[1] & 0x3ff)) + 0x10000; }
+
+/* Get the next UTF-16 character, not advancing the pointer. This is called when
+we know we are in UTF-16 mode. */
+
+#define GETCHAR(c, eptr) \
+  c = *eptr; \
+  if ((c & 0xfc00) == 0xd800) GETUTF16(c, eptr);
+
+/* Get the next UTF-16 character, testing for UTF-16 mode, and not advancing the
+pointer. */
+
+#define GETCHARTEST(c, eptr) \
+  c = *eptr; \
+  if (utf && (c & 0xfc00) == 0xd800) GETUTF16(c, eptr);
+
+/* Base macro to pick up the low surrogate of a UTF-16 character, advancing
+the pointer. */
+
+#define GETUTF16INC(c, eptr) \
+   { c = (((c & 0x3ff) << 10) | (*eptr++ & 0x3ff)) + 0x10000; }
+
+/* Get the next UTF-16 character, advancing the pointer. This is called when we
+know we are in UTF-16 mode. */
+
+#define GETCHARINC(c, eptr) \
+  c = *eptr++; \
+  if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, eptr);
+
+/* Get the next character, testing for UTF-16 mode, and advancing the pointer.
+This is called when we don't know if we are in UTF-16 mode. */
+
+#define GETCHARINCTEST(c, eptr) \
+  c = *eptr++; \
+  if (utf && (c & 0xfc00) == 0xd800) GETUTF16INC(c, eptr);
+
+/* Base macro to pick up the low surrogate of a UTF-16 character, not
+advancing the pointer, incrementing the length. */
+
+#define GETUTF16LEN(c, eptr, len) \
+   { c = (((c & 0x3ff) << 10) | (eptr[1] & 0x3ff)) + 0x10000; len++; }
+
+/* Get the next UTF-16 character, not advancing the pointer, incrementing
+length if there is a low surrogate. This is called when we know we are in
+UTF-16 mode. */
+
+#define GETCHARLEN(c, eptr, len) \
+  c = *eptr; \
+  if ((c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);
+
+/* Get the next UTF-816character, testing for UTF-16 mode, not advancing the
+pointer, incrementing length if there is a low surrogate. This is called when
+we do not know if we are in UTF-16 mode. */
+
+#define GETCHARLENTEST(c, eptr, len) \
+  c = *eptr; \
+  if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len);
+
+/* If the pointer is not at the start of a character, move it back until
+it is. This is called only in UTF-16 mode - we don't put a test within the
+macro because almost all calls are already within a block of UTF-16 only
+code. */
+
+#define BACKCHAR(eptr) if ((*eptr & 0xfc00) == 0xdc00) eptr--
+
+/* Same as above, just in the other direction. */
+#define FORWARDCHAR(eptr) if ((*eptr & 0xfc00) == 0xdc00) eptr++
+
+/* Same as above, but it allows a fully customizable form. */
+#define ACROSSCHAR(condition, eptr, action) \
+  if ((condition) && ((eptr) & 0xfc00) == 0xdc00) action
+
+#endif
+
+#endif /* COMPILE_PCRE8 */
+
+#endif  /* SUPPORT_UTF */
 
 
 /* In case there is no definition of offsetof() provided - though any proper
@@ -588,13 +841,21 @@ are in a 16-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as
 the restrictions on partial matching have been lifted. It remains for backwards
 compatibility. */
 
-#define PCRE_NOPARTIAL     0x0001  /* can't use partial with this regex */
-#define PCRE_FIRSTSET      0x0002  /* first_byte is set */
-#define PCRE_REQCHSET      0x0004  /* req_byte is set */
-#define PCRE_STARTLINE     0x0008  /* start after \n for multiline */
-#define PCRE_JCHANGED      0x0010  /* j option used in regex */
-#define PCRE_HASCRORLF     0x0020  /* explicit \r or \n in pattern */
-#define PCRE_HASTHEN       0x0040  /* pattern contains (*THEN) */
+#ifdef COMPILE_PCRE8
+#define PCRE_MODE          0x0001  /* compiled in 8 bit mode */
+#endif
+#ifdef COMPILE_PCRE16
+#define PCRE_MODE          0x0002  /* compiled in 16 bit mode */
+#endif
+#define PCRE_FIRSTSET      0x0010  /* first_char is set */
+#define PCRE_FCH_CASELESS  0x0020  /* caseless first char */
+#define PCRE_REQCHSET      0x0040  /* req_byte is set */
+#define PCRE_RCH_CASELESS  0x0080  /* caseless requested char */
+#define PCRE_STARTLINE     0x0100  /* start after \n for multiline */
+#define PCRE_NOPARTIAL     0x0200  /* can't use partial with this regex */
+#define PCRE_JCHANGED      0x0400  /* j option used in regex */
+#define PCRE_HASCRORLF     0x0800  /* explicit \r or \n in pattern */
+#define PCRE_HASTHEN       0x1000  /* pattern contains (*THEN) */
 
 /* Flags for the "extra" block produced by pcre_study(). */
 
@@ -628,11 +889,15 @@ time, run time, or study time, respectively. */
 #define PUBLIC_STUDY_OPTIONS \
    PCRE_STUDY_JIT_COMPILE
 
-/* Magic number to provide a small check against being handed junk. Also used
-to detect whether a pattern was compiled on a host of different endianness. */
+/* Magic number to provide a small check against being handed junk. */
 
 #define MAGIC_NUMBER  0x50435245UL   /* 'PCRE' */
 
+/* This variable is used to detect a loaded regular expression
+in different endianness. */
+
+#define REVERSED_MAGIC_NUMBER  0x45524350UL   /* 'ERCP' */
+
 /* Negative values for the firstchar and reqchar variables */
 
 #define REQ_UNSET (-2)
@@ -643,12 +908,6 @@ req_byte match. */
 
 #define REQ_BYTE_MAX 1000
 
-/* Flags added to firstbyte or reqbyte; a "non-literal" item is either a
-variable-length repeat, or a anything other than literal characters. */
-
-#define REQ_CASELESS 0x0100    /* indicates caselessness */
-#define REQ_VARY     0x0200    /* reqbyte followed non-literal item */
-
 /* Miscellaneous definitions. The #ifndef is to pacify compiler warnings in
 environments where these macros are defined elsewhere. Unfortunately, there
 is no way to do the same for the typedef. */
@@ -677,7 +936,7 @@ for) in a minority area (EBCDIC platforms), this is not sensible. Any
 application that did need both could compile two versions of the library, using
 macros to give the functions distinct names. */
 
-#ifndef SUPPORT_UTF8
+#ifndef SUPPORT_UTF
 
 /* UTF-8 support is not enabled; use the platform-dependent character literals
 so that PCRE works on both ASCII and EBCDIC platforms, in non-UTF-mode only. */
@@ -937,11 +1196,16 @@ so that PCRE works on both ASCII and EBCDIC platforms, in non-UTF-mode only. */
 #define STRING_ANYCRLF_RIGHTPAR        "ANYCRLF)"
 #define STRING_BSR_ANYCRLF_RIGHTPAR    "BSR_ANYCRLF)"
 #define STRING_BSR_UNICODE_RIGHTPAR    "BSR_UNICODE)"
-#define STRING_UTF8_RIGHTPAR           "UTF8)"
+#ifdef COMPILE_PCRE8
+#define STRING_UTF_RIGHTPAR            "UTF8)"
+#endif
+#ifdef COMPILE_PCRE16
+#define STRING_UTF_RIGHTPAR            "UTF16)"
+#endif
 #define STRING_UCP_RIGHTPAR            "UCP)"
 #define STRING_NO_START_OPT_RIGHTPAR   "NO_START_OPT)"
 
-#else  /* SUPPORT_UTF8 */
+#else  /* SUPPORT_UTF */
 
 /* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This
 works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode
@@ -1192,11 +1456,16 @@ only. */
 #define STRING_ANYCRLF_RIGHTPAR        STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
 #define STRING_BSR_ANYCRLF_RIGHTPAR    STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
 #define STRING_BSR_UNICODE_RIGHTPAR    STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
-#define STRING_UTF8_RIGHTPAR           STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
+#ifdef COMPILE_PCRE8
+#define STRING_UTF_RIGHTPAR            STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
+#endif
+#ifdef COMPILE_PCRE16
+#define STRING_UTF_RIGHTPAR            STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS
+#endif
 #define STRING_UCP_RIGHTPAR            STR_U STR_C STR_P STR_RIGHT_PARENTHESIS
 #define STRING_NO_START_OPT_RIGHTPAR   STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
 
-#endif  /* SUPPORT_UTF8 */
+#endif  /* SUPPORT_UTF */
 
 /* Escape items that are just an encoding of a particular data value. */
 
@@ -1236,7 +1505,7 @@ only. */
 #define PT_WORD       8    /* Word - L plus N plus underscore */
 
 /* Flag bits and data types for the extended class (OP_XCLASS) for classes that
-contain UTF-8 characters with values greater than 255. */
+contain characters with values greater than 255. */
 
 #define XCL_NOT    0x01    /* Flag: this is a negative class */
 #define XCL_MAP    0x02    /* Flag: a 32-byte map is present */
@@ -1252,7 +1521,7 @@ value such as \n. They must have non-zero values, as check_escape() returns
 their negation. Also, they must appear in the same order as in the opcode
 definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it
 corresponds to "." in DOTALL mode rather than an escape sequence. It is also
-used for [^] in JavaScript compatibility mode, and for \C in non-utf8 mode. In
+used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In
 non-DOTALL mode, "." behaves like \N.
 
 The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc.
@@ -1433,8 +1702,8 @@ enum {
   OP_CLASS,          /* 106 Match a character class, chars < 256 only */
   OP_NCLASS,         /* 107 Same, but the bitmap was created from a negative
                               class - the difference is relevant only when a
-                              UTF-8 character > 255 is encountered. */
-  OP_XCLASS,         /* 108 Extended class for handling UTF-8 chars within the
+                              character > 255 is encountered. */
+  OP_XCLASS,         /* 108 Extended class for handling > 255 chars within the
                               class. This does both positive and negative. */
   OP_REF,            /* 109 Match a back reference, casefully */
   OP_REFI,           /* 110 Match a back reference, caselessly */
@@ -1591,30 +1860,35 @@ in UTF-8 mode. The code that uses this table must know about such things. */
   2,                             /* noti                                   */ \
   /* Positive single-char repeats                             ** These are */ \
   2, 2, 2, 2, 2, 2,              /* *, *?, +, +?, ?, ??       ** minima in */ \
-  4, 4, 4,                       /* upto, minupto, exact      ** mode      */ \
-  2, 2, 2, 4,                    /* *+, ++, ?+, upto+                      */ \
+  2+IMM2_SIZE, 2+IMM2_SIZE,      /* upto, minupto             ** mode      */ \
+  2+IMM2_SIZE,                   /* exact                                  */ \
+  2, 2, 2, 2+IMM2_SIZE,          /* *+, ++, ?+, upto+                      */ \
   2, 2, 2, 2, 2, 2,              /* *I, *?I, +I, +?I, ?I, ??I ** UTF-8     */ \
-  4, 4, 4,                       /* upto I, minupto I, exact I             */ \
-  2, 2, 2, 4,                    /* *+I, ++I, ?+I, upto+I                  */ \
+  2+IMM2_SIZE, 2+IMM2_SIZE,      /* upto I, minupto I                      */ \
+  2+IMM2_SIZE,                   /* exact I                                */ \
+  2, 2, 2, 2+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */ \
   /* Negative single-char repeats - only for chars < 256                   */ \
   2, 2, 2, 2, 2, 2,              /* NOT *, *?, +, +?, ?, ??                */ \
-  4, 4, 4,                       /* NOT upto, minupto, exact               */ \
-  2, 2, 2, 4,                    /* Possessive NOT *, +, ?, upto           */ \
+  2+IMM2_SIZE, 2+IMM2_SIZE,      /* NOT upto, minupto                      */ \
+  2+IMM2_SIZE,                   /* NOT exact                              */ \
+  2, 2, 2, 2+IMM2_SIZE,          /* Possessive NOT *, +, ?, upto           */ \
   2, 2, 2, 2, 2, 2,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */ \
-  4, 4, 4,                       /* NOT upto I, minupto I, exact I         */ \
-  2, 2, 2, 4,                    /* Possessive NOT *I, +I, ?I, upto I      */ \
+  2+IMM2_SIZE, 2+IMM2_SIZE,      /* NOT upto I, minupto I                  */ \
+  2+IMM2_SIZE,                   /* NOT exact I                            */ \
+  2, 2, 2, 2+IMM2_SIZE,          /* Possessive NOT *I, +I, ?I, upto I      */ \
   /* Positive type repeats                                                 */ \
   2, 2, 2, 2, 2, 2,              /* Type *, *?, +, +?, ?, ??               */ \
-  4, 4, 4,                       /* Type upto, minupto, exact              */ \
-  2, 2, 2, 4,                    /* Possessive *+, ++, ?+, upto+           */ \
+  2+IMM2_SIZE, 2+IMM2_SIZE,      /* Type upto, minupto                     */ \
+  2+IMM2_SIZE,                   /* Type exact                             */ \
+  2, 2, 2, 2+IMM2_SIZE,          /* Possessive *+, ++, ?+, upto+           */ \
   /* Character class & ref repeats                                         */ \
   1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */ \
-  5, 5,                          /* CRRANGE, CRMINRANGE                    */ \
- 33,                             /* CLASS                                  */ \
- 33,                             /* NCLASS                                 */ \
+  1+2*IMM2_SIZE, 1+2*IMM2_SIZE,  /* CRRANGE, CRMINRANGE                    */ \
+  1+(32/sizeof(pcre_uchar)),     /* CLASS                                  */ \
+  1+(32/sizeof(pcre_uchar)),     /* NCLASS                                 */ \
   0,                             /* XCLASS - variable length               */ \
-  3,                             /* REF                                    */ \
-  3,                             /* REFI                                   */ \
+  1+IMM2_SIZE,                   /* REF                                    */ \
+  1+IMM2_SIZE,                   /* REFI                                   */ \
   1+LINK_SIZE,                   /* RECURSE                                */ \
   2+2*LINK_SIZE,                 /* CALLOUT                                */ \
   1+LINK_SIZE,                   /* Alt                                    */ \
@@ -1631,23 +1905,23 @@ in UTF-8 mode. The code that uses this table must know about such things. */
   1+LINK_SIZE,                   /* ONCE_NC                                */ \
   1+LINK_SIZE,                   /* BRA                                    */ \
   1+LINK_SIZE,                   /* BRAPOS                                 */ \
-  3+LINK_SIZE,                   /* CBRA                                   */ \
-  3+LINK_SIZE,                   /* CBRAPOS                                */ \
+  1+LINK_SIZE+IMM2_SIZE,         /* CBRA                                   */ \
+  1+LINK_SIZE+IMM2_SIZE,         /* CBRAPOS                                */ \
   1+LINK_SIZE,                   /* COND                                   */ \
   1+LINK_SIZE,                   /* SBRA                                   */ \
   1+LINK_SIZE,                   /* SBRAPOS                                */ \
-  3+LINK_SIZE,                   /* SCBRA                                  */ \
-  3+LINK_SIZE,                   /* SCBRAPOS                               */ \
+  1+LINK_SIZE+IMM2_SIZE,         /* SCBRA                                  */ \
+  1+LINK_SIZE+IMM2_SIZE,         /* SCBRAPOS                               */ \
   1+LINK_SIZE,                   /* SCOND                                  */ \
-  3, 3,                          /* CREF, NCREF                            */ \
-  3, 3,                          /* RREF, NRREF                            */ \
+  1+IMM2_SIZE, 1+IMM2_SIZE,      /* CREF, NCREF                            */ \
+  1+IMM2_SIZE, 1+IMM2_SIZE,      /* RREF, NRREF                            */ \
   1,                             /* DEF                                    */ \
   1, 1, 1,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */ \
   3, 1, 3,                       /* MARK, PRUNE, PRUNE_ARG                 */ \
   1, 3,                          /* SKIP, SKIP_ARG                         */ \
   1, 3,                          /* THEN, THEN_ARG                         */ \
   1, 1, 1, 1,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */ \
-  3, 1                           /* CLOSE, SKIPZERO  */
+  1+IMM2_SIZE, 1                 /* CLOSE, SKIPZERO                        */
 
 /* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion"
 condition. */
@@ -1665,7 +1939,7 @@ enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,
        ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
        ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
        ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
-       ERR70, ERR71, ERR72, ERRCOUNT };
+       ERR70, ERR71, ERR72, ERR73, ERR74, ERRCOUNT };
 
 /* The real format of the start of the pcre block; the index of names and the
 code vector run on as long as necessary after the end. We store an explicit
@@ -1684,7 +1958,13 @@ fields are present. Currently PCRE always sets the dummy fields to zero.
 NOTE NOTE NOTE
 */
 
-typedef struct real_pcre {
+#ifdef COMPILE_PCRE8
+#define REAL_PCRE real_pcre
+#else
+#define REAL_PCRE real_pcre16
+#endif
+
+typedef struct REAL_PCRE {
   pcre_uint32 magic_number;
   pcre_uint32 size;               /* Total that was malloced */
   pcre_uint32 options;            /* Public options */
@@ -1692,16 +1972,16 @@ typedef struct real_pcre {
   pcre_uint16 dummy1;             /* For future use */
   pcre_uint16 top_bracket;
   pcre_uint16 top_backref;
-  pcre_uint16 first_byte;
-  pcre_uint16 req_byte;
+  pcre_uint16 first_char;         /* Starting character */
+  pcre_uint16 req_char;           /* This character must be seen */
   pcre_uint16 name_table_offset;  /* Offset to name table that follows */
   pcre_uint16 name_entry_size;    /* Size of any name items */
   pcre_uint16 name_count;         /* Number of name items */
   pcre_uint16 ref_count;          /* Reference count */
 
-  const unsigned char *tables;    /* Pointer to tables or NULL for std */
-  const unsigned char *nullpad;   /* NULL padding */
-} real_pcre;
+  const pcre_uint8 *tables;       /* Pointer to tables or NULL for std */
+  const pcre_uint8 *nullpad;      /* NULL padding */
+} REAL_PCRE;
 
 /* The format of the block used to store data from pcre_study(). The same
 remark (see NOTE above) about extending this structure applies. */
@@ -1709,7 +1989,7 @@ remark (see NOTE above) about extending this structure applies. */
 typedef struct pcre_study_data {
   pcre_uint32 size;               /* Total that was malloced */
   pcre_uint32 flags;              /* Private flags */
-  uschar start_bits[32];          /* Starting char bits */
+  pcre_uint8 start_bits[32];      /* Starting char bits */
   pcre_uint32 minlength;          /* Minimum subject length */
 } pcre_study_data;
 
@@ -1728,33 +2008,33 @@ typedef struct open_capitem {
 doing the compiling, so that they are thread-safe. */
 
 typedef struct compile_data {
-  const uschar *lcc;            /* Points to lower casing table */
-  const uschar *fcc;            /* Points to case-flipping table */
-  const uschar *cbits;          /* Points to character type table */
-  const uschar *ctypes;         /* Points to table of type maps */
-  const uschar *start_workspace;/* The start of working space */
-  const uschar *start_code;     /* The start of the compiled code */
-  const uschar *start_pattern;  /* The start of the pattern */
-  const uschar *end_pattern;    /* The end of the pattern */
-  open_capitem *open_caps;      /* Chain of open capture items */
-  uschar *hwm;                  /* High watermark of workspace */
-  uschar *name_table;           /* The name/number table */
-  int  names_found;             /* Number of entries so far */
-  int  name_entry_size;         /* Size of each entry */
-  int  workspace_size;          /* Size of workspace */
-  int  bracount;                /* Count of capturing parens as we compile */
-  int  final_bracount;          /* Saved value after first pass */
-  int  top_backref;             /* Maximum back reference */
-  unsigned int backref_map;     /* Bitmap of low back refs */
-  int  assert_depth;            /* Depth of nested assertions */
-  int  external_options;        /* External (initial) options */
-  int  external_flags;          /* External flag bits to be set */
-  int  req_varyopt;             /* "After variable item" flag for reqbyte */
-  BOOL had_accept;              /* (*ACCEPT) encountered */
-  BOOL check_lookbehind;        /* Lookbehinds need later checking */
-  int  nltype;                  /* Newline type */
-  int  nllen;                   /* Newline string length */
-  uschar nl[4];                 /* Newline string when fixed length */
+  const pcre_uint8 *lcc;            /* Points to lower casing table */
+  const pcre_uint8 *fcc;            /* Points to case-flipping table */
+  const pcre_uint8 *cbits;          /* Points to character type table */
+  const pcre_uint8 *ctypes;         /* Points to table of type maps */
+  const pcre_uchar *start_workspace;/* The start of working space */
+  const pcre_uchar *start_code;     /* The start of the compiled code */
+  const pcre_uchar *start_pattern;  /* The start of the pattern */
+  const pcre_uchar *end_pattern;    /* The end of the pattern */
+  open_capitem *open_caps;          /* Chain of open capture items */
+  pcre_uchar *hwm;                  /* High watermark of workspace */
+  pcre_uchar *name_table;           /* The name/number table */
+  int  names_found;                 /* Number of entries so far */
+  int  name_entry_size;             /* Size of each entry */
+  int  workspace_size;              /* Size of workspace */
+  int  bracount;                    /* Count of capturing parens as we compile */
+  int  final_bracount;              /* Saved value after first pass */
+  int  top_backref;                 /* Maximum back reference */
+  unsigned int backref_map;         /* Bitmap of low back refs */
+  int  assert_depth;                /* Depth of nested assertions */
+  int  external_options;            /* External (initial) options */
+  int  external_flags;              /* External flag bits to be set */
+  int  req_varyopt;                 /* "After variable item" flag for reqbyte */
+  BOOL had_accept;                  /* (*ACCEPT) encountered */
+  BOOL check_lookbehind;            /* Lookbehinds need later checking */
+  int  nltype;                      /* Newline type */
+  int  nllen;                       /* Newline string length */
+  pcre_uchar nl[4];                 /* Newline string when fixed length */
 } compile_data;
 
 /* Structure for maintaining a chain of pointers to the currently incomplete
@@ -1762,7 +2042,7 @@ branches, for testing for left recursion while compiling. */
 
 typedef struct branch_chain {
   struct branch_chain *outer;
-  uschar *current_branch;
+  pcre_uchar *current_branch;
 } branch_chain;
 
 /* Structure for items in a linked list that represents an explicit recursive
@@ -1773,7 +2053,7 @@ typedef struct recursion_info {
   int group_num;                  /* Number of group that was called */
   int *offset_save;               /* Pointer to start of saved offsets */
   int saved_max;                  /* Number of saved offsets */
-  USPTR subject_position;         /* Position at start of recursion */
+  PCRE_PUCHAR subject_position;   /* Position at start of recursion */
 } recursion_info;
 
 /* A similar structure for pcre_dfa_exec(). */
@@ -1781,7 +2061,7 @@ typedef struct recursion_info {
 typedef struct dfa_recursion_info {
   struct dfa_recursion_info *prevrec;
   int group_num;
-  USPTR subject_position;
+  PCRE_PUCHAR subject_position;
 } dfa_recursion_info;
 
 /* Structure for building a chain of data for holding the values of the subject
@@ -1791,7 +2071,7 @@ pcre_exec(). */
 
 typedef struct eptrblock {
   struct eptrblock *epb_prev;
-  USPTR epb_saved_eptr;
+  PCRE_PUCHAR epb_saved_eptr;
 } eptrblock;
 
 
@@ -1802,67 +2082,68 @@ typedef struct match_data {
   unsigned long int match_call_count;      /* As it says */
   unsigned long int match_limit;           /* As it says */
   unsigned long int match_limit_recursion; /* As it says */
-  int   *offset_vector;         /* Offset vector */
-  int    offset_end;            /* One past the end */
-  int    offset_max;            /* The maximum usable for return data */
-  int    nltype;                /* Newline type */
-  int    nllen;                 /* Newline string length */
-  int    name_count;            /* Number of names in name table */
-  int    name_entry_size;       /* Size of entry in names table */
-  uschar *name_table;           /* Table of names */
-  uschar nl[4];                 /* Newline string when fixed */
-  const  uschar *lcc;           /* Points to lower casing table */
-  const  uschar *ctypes;        /* Points to table of type maps */
-  BOOL   offset_overflow;       /* Set if too many extractions */
-  BOOL   notbol;                /* NOTBOL flag */
-  BOOL   noteol;                /* NOTEOL flag */
-  BOOL   utf8;                  /* UTF8 flag */
-  BOOL   jscript_compat;        /* JAVASCRIPT_COMPAT flag */
-  BOOL   use_ucp;               /* PCRE_UCP flag */
-  BOOL   endonly;               /* Dollar not before final \n */
-  BOOL   notempty;              /* Empty string match not wanted */
-  BOOL   notempty_atstart;      /* Empty string match at start not wanted */
-  BOOL   hitend;                /* Hit the end of the subject at some point */
-  BOOL   bsr_anycrlf;           /* \R is just any CRLF, not full Unicode */
-  BOOL   hasthen;               /* Pattern contains (*THEN) */
-  BOOL   ignore_skip_arg;       /* For re-run when SKIP name not found */
-  const  uschar *start_code;    /* For use when recursing */
-  USPTR  start_subject;         /* Start of the subject string */
-  USPTR  end_subject;           /* End of the subject string */
-  USPTR  start_match_ptr;       /* Start of matched string */
-  USPTR  end_match_ptr;         /* Subject position at end match */
-  USPTR  start_used_ptr;        /* Earliest consulted character */
-  int    partial;               /* PARTIAL options */
-  int    end_offset_top;        /* Highwater mark at end of match */
-  int    capture_last;          /* Most recent capture number */
-  int    start_offset;          /* The start offset value */
-  int    match_function_type;   /* Set for certain special calls of MATCH() */
-  eptrblock *eptrchain;         /* Chain of eptrblocks for tail recursions */
-  int    eptrn;                 /* Next free eptrblock */
-  recursion_info *recursive;    /* Linked list of recursion data */
-  void  *callout_data;          /* To pass back to callouts */
-  const  uschar *mark;          /* Mark pointer to pass back on success */
-  const  uschar *nomatch_mark;  /* Mark pointer to pass back on failure */
-  const  uschar *once_target;   /* Where to back up to for atomic groups */
+  int   *offset_vector;           /* Offset vector */
+  int    offset_end;              /* One past the end */
+  int    offset_max;              /* The maximum usable for return data */
+  int    nltype;                  /* Newline type */
+  int    nllen;                   /* Newline string length */
+  int    name_count;              /* Number of names in name table */
+  int    name_entry_size;         /* Size of entry in names table */
+  pcre_uchar *name_table;         /* Table of names */
+  pcre_uchar nl[4];               /* Newline string when fixed */
+  const  pcre_uint8 *lcc;         /* Points to lower casing table */
+  const  pcre_uint8 *fcc;         /* Points to case-flipping table */
+  const  pcre_uint8 *ctypes;      /* Points to table of type maps */
+  BOOL   offset_overflow;         /* Set if too many extractions */
+  BOOL   notbol;                  /* NOTBOL flag */
+  BOOL   noteol;                  /* NOTEOL flag */
+  BOOL   utf;                     /* UTF-8 / UTF-16 flag */
+  BOOL   jscript_compat;          /* JAVASCRIPT_COMPAT flag */
+  BOOL   use_ucp;                 /* PCRE_UCP flag */
+  BOOL   endonly;                 /* Dollar not before final \n */
+  BOOL   notempty;                /* Empty string match not wanted */
+  BOOL   notempty_atstart;        /* Empty string match at start not wanted */
+  BOOL   hitend;                  /* Hit the end of the subject at some point */
+  BOOL   bsr_anycrlf;             /* \R is just any CRLF, not full Unicode */
+  BOOL   hasthen;                 /* Pattern contains (*THEN) */
+  BOOL   ignore_skip_arg;         /* For re-run when SKIP name not found */
+  const  pcre_uchar *start_code;  /* For use when recursing */
+  PCRE_PUCHAR start_subject;      /* Start of the subject string */
+  PCRE_PUCHAR end_subject;        /* End of the subject string */
+  PCRE_PUCHAR start_match_ptr;    /* Start of matched string */
+  PCRE_PUCHAR end_match_ptr;      /* Subject position at end match */
+  PCRE_PUCHAR start_used_ptr;     /* Earliest consulted character */
+  int    partial;                 /* PARTIAL options */
+  int    end_offset_top;          /* Highwater mark at end of match */
+  int    capture_last;            /* Most recent capture number */
+  int    start_offset;            /* The start offset value */
+  int    match_function_type;     /* Set for certain special calls of MATCH() */
+  eptrblock *eptrchain;           /* Chain of eptrblocks for tail recursions */
+  int    eptrn;                   /* Next free eptrblock */
+  recursion_info *recursive;      /* Linked list of recursion data */
+  void  *callout_data;            /* To pass back to callouts */
+  const  pcre_uchar *mark;        /* Mark pointer to pass back on success */
+  const  pcre_uchar *nomatch_mark;/* Mark pointer to pass back on failure */
+  const  pcre_uchar *once_target; /* Where to back up to for atomic groups */
 } match_data;
 
 /* A similar structure is used for the same purpose by the DFA matching
 functions. */
 
 typedef struct dfa_match_data {
-  const uschar *start_code;      /* Start of the compiled pattern */
-  const uschar *start_subject;   /* Start of the subject string */
-  const uschar *end_subject;     /* End of subject string */
-  const uschar *start_used_ptr;  /* Earliest consulted character */
-  const uschar *tables;          /* Character tables */
-  int   start_offset;            /* The start offset value */
-  int   moptions;                /* Match options */
-  int   poptions;                /* Pattern options */
-  int    nltype;                 /* Newline type */
-  int    nllen;                  /* Newline string length */
-  uschar nl[4];                  /* Newline string when fixed */
-  void  *callout_data;           /* To pass back to callouts */
-  dfa_recursion_info *recursive; /* Linked list of recursion data */
+  const pcre_uchar *start_code;     /* Start of the compiled pattern */
+  const pcre_uchar *start_subject ; /* Start of the subject string */
+  const pcre_uchar *end_subject;    /* End of subject string */
+  const pcre_uchar *start_used_ptr; /* Earliest consulted character */
+  const pcre_uint8 *tables;         /* Character tables */
+  int   start_offset;               /* The start offset value */
+  int   moptions;                   /* Match options */
+  int   poptions;                   /* Pattern options */
+  int   nltype;                     /* Newline type */
+  int   nllen;                      /* Newline string length */
+  pcre_uchar nl[4];                 /* Newline string when fixed */
+  void *callout_data;               /* To pass back to callouts */
+  dfa_recursion_info *recursive;    /* Linked list of recursion data */
 } dfa_match_data;
 
 /* Bit definitions for entries in the pcre_ctypes table. */
@@ -1898,6 +2179,28 @@ total length. */
 #define ctypes_offset (cbits_offset + cbit_length)
 #define tables_length (ctypes_offset + 256)
 
+/* Internal function prefix */
+
+#ifdef COMPILE_PCRE8
+#ifndef PUBL
+#define PUBL(name) pcre_##name
+#endif
+#ifndef PRIV
+#define PRIV(name) _pcre_##name
+#endif
+#else /* COMPILE_PCRE8 */
+#ifdef COMPILE_PCRE16
+#ifndef PUBL
+#define PUBL(name) pcre16_##name
+#endif
+#ifndef PRIV
+#define PRIV(name) _pcre16_##name
+#endif
+#else
+#error Unsupported compiling mode
+#endif /* COMPILE_PCRE16 */
+#endif /* COMPILE_PCRE8 */
+
 /* Layout of the UCP type table that translates property names into types and
 codes. Each entry used to point directly to a name, but to reduce the number of
 relocations in shared libraries, it now has an offset into a single string
@@ -1915,75 +2218,115 @@ of the exported public functions. They have to be "external" in the C sense,
 but are not part of the PCRE public API. The data for these tables is in the
 pcre_tables.c module. */
 
-extern const int    _pcre_utf8_table1[];
-extern const int    _pcre_utf8_table2[];
-extern const int    _pcre_utf8_table3[];
-extern const uschar _pcre_utf8_table4[];
+#ifdef COMPILE_PCRE8
 
-#ifdef SUPPORT_JIT
-extern const uschar _pcre_utf8_char_sizes[];
-#endif
+extern const int            PRIV(utf8_table1)[];
+extern const int            PRIV(utf8_table1_size);
+extern const int            PRIV(utf8_table2)[];
+extern const int            PRIV(utf8_table3)[];
+extern const pcre_uint8     PRIV(utf8_table4)[];
 
-extern const int    _pcre_utf8_table1_size;
+#endif /* COMPILE_PCRE8 */
 
-extern const char   _pcre_utt_names[];
-extern const ucp_type_table _pcre_utt[];
-extern const int _pcre_utt_size;
+extern const char           PRIV(utt_names)[];
+extern const ucp_type_table PRIV(utt)[];
+extern const int            PRIV(utt_size);
 
-extern const uschar _pcre_default_tables[];
+extern const pcre_uint8     PRIV(default_tables)[];
 
-extern const uschar _pcre_OP_lengths[];
+extern const pcre_uint8     PRIV(OP_lengths)[];
 
 
 /* Internal shared functions. These are functions that are used by more than
 one of the exported public functions. They have to be "external" in the C
 sense, but are not part of the PCRE public API. */
 
-extern const uschar *_pcre_find_bracket(const uschar *, BOOL, int);
-extern BOOL          _pcre_is_newline(USPTR, int, USPTR, int *, BOOL);
-extern int           _pcre_ord2utf8(int, uschar *);
-extern real_pcre    *_pcre_try_flipped(const real_pcre *, real_pcre *,
-                       const pcre_study_data *, pcre_study_data *);
-extern int           _pcre_valid_utf8(USPTR, int, int *);
-extern BOOL          _pcre_was_newline(USPTR, int, USPTR, int *, BOOL);
-extern BOOL          _pcre_xclass(int, const uschar *);
+/* String comparison functions. */
+#ifdef COMPILE_PCRE8
+
+#define STRCMP_UC_UC(str1, str2) \
+  strcmp((char *)(str1), (char *)(str2))
+#define STRCMP_UC_C8(str1, str2) \
+  strcmp((char *)(str1), (str2))
+#define STRNCMP_UC_UC(str1, str2, num) \
+  strncmp((char *)(str1), (char *)(str2), (num))
+#define STRNCMP_UC_C8(str1, str2, num) \
+  strncmp((char *)(str1), (str2), (num))
+#define STRLEN_UC(str) strlen((const char *)str)
+
+#else
+
+extern int               PRIV(strcmp_uc_uc)(const pcre_uchar *,
+                           const pcre_uchar *);
+extern int               PRIV(strcmp_uc_c8)(const pcre_uchar *,
+                           const char *);
+extern int               PRIV(strncmp_uc_uc)(const pcre_uchar *,
+                           const pcre_uchar *, unsigned int num);
+extern int               PRIV(strncmp_uc_c8)(const pcre_uchar *,
+                           const char *, unsigned int num);
+extern unsigned int      PRIV(strlen_uc)(const pcre_uchar *str);
+
+#define STRCMP_UC_UC(str1, str2) \
+  PRIV(strcmp_uc_uc)((str1), (str2))
+#define STRCMP_UC_C8(str1, str2) \
+  PRIV(strcmp_uc_c8)((str1), (str2))
+#define STRNCMP_UC_UC(str1, str2, num) \
+  PRIV(strncmp_uc_uc)((str1), (str2), (num))
+#define STRNCMP_UC_C8(str1, str2, num) \
+  PRIV(strncmp_uc_c8)((str1), (str2), (num))
+#define STRLEN_UC(str) PRIV(strlen_uc)(str)
+
+#endif /* COMPILE_PCRE8 */
+
+extern const pcre_uchar *PRIV(find_bracket)(const pcre_uchar *, BOOL, int);
+extern BOOL              PRIV(is_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,
+                           int *, BOOL);
+extern int               PRIV(ord2utf)(pcre_uint32, pcre_uchar *);
+extern int               PRIV(valid_utf)(PCRE_PUCHAR, int, int *);
+extern BOOL              PRIV(was_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR,
+                           int *, BOOL);
+extern BOOL              PRIV(xclass)(int, const pcre_uchar *, BOOL);
 
 #ifdef SUPPORT_JIT
-extern void          _pcre_jit_compile(const real_pcre *, pcre_extra *);
-extern int           _pcre_jit_exec(const real_pcre *, void *, PCRE_SPTR,
-                        int, int, int, int, int *, int);
-extern void          _pcre_jit_free(void *);
-extern int           _pcre_jit_get_size(void *);
+extern void              PRIV(jit_compile)(const REAL_PCRE *, PUBL(extra) *);
+extern int               PRIV(jit_exec)(const REAL_PCRE *, void *,
+                           const pcre_uchar *, int, int, int, int, int *, int);
+extern void              PRIV(jit_free)(void *);
+extern int               PRIV(jit_get_size)(void *);
+extern const char*       PRIV(jit_get_target)(void);
 #endif
 
 /* Unicode character database (UCD) */
 
 typedef struct {
-  uschar script;
-  uschar chartype;
+  pcre_uint8 script;
+  pcre_uint8 chartype;
   pcre_int32 other_case;
 } ucd_record;
 
-extern const ucd_record  _pcre_ucd_records[];
-extern const uschar      _pcre_ucd_stage1[];
-extern const pcre_uint16 _pcre_ucd_stage2[];
-extern const int         _pcre_ucp_gentype[];
+extern const ucd_record  PRIV(ucd_records)[];
+extern const pcre_uint8  PRIV(ucd_stage1)[];
+extern const pcre_uint16 PRIV(ucd_stage2)[];
+extern const int         PRIV(ucp_gentype)[];
 #ifdef SUPPORT_JIT
-extern const int         _pcre_ucp_typerange[];
+extern const int         PRIV(ucp_typerange)[];
 #endif
 
+#ifdef SUPPORT_UCP
 /* UCD access macros */
 
 #define UCD_BLOCK_SIZE 128
-#define GET_UCD(ch) (_pcre_ucd_records + \
-        _pcre_ucd_stage2[_pcre_ucd_stage1[(ch) / UCD_BLOCK_SIZE] * \
+#define GET_UCD(ch) (PRIV(ucd_records) + \
+        PRIV(ucd_stage2)[PRIV(ucd_stage1)[(ch) / UCD_BLOCK_SIZE] * \
         UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE])
 
 #define UCD_CHARTYPE(ch)  GET_UCD(ch)->chartype
 #define UCD_SCRIPT(ch)    GET_UCD(ch)->script
-#define UCD_CATEGORY(ch)  _pcre_ucp_gentype[UCD_CHARTYPE(ch)]
+#define UCD_CATEGORY(ch)  PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
 #define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case)
 
+#endif /* SUPPORT_UCP */
+
 #endif
 
 /* End of pcre_internal.h */
diff --git a/usr.sbin/nginx/src/pcre/pcre_newline.c b/usr.sbin/nginx/src/pcre/pcre_newline.c
index 38cf7f72f8d..a0a13c8ed11 100644
--- a/usr.sbin/nginx/src/pcre/pcre_newline.c
+++ b/usr.sbin/nginx/src/pcre/pcre_newline.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2009 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -67,16 +67,25 @@ Arguments:
   type         the newline type
   endptr       pointer to the end of the string
   lenptr       where to return the length
-  utf8         TRUE if in utf8 mode
+  utf          TRUE if in utf mode
 
 Returns:       TRUE or FALSE
 */
 
 BOOL
-_pcre_is_newline(USPTR ptr, int type, USPTR endptr, int *lenptr, BOOL utf8)
+PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr,
+  BOOL utf)
 {
 int c;
-if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
+(void)utf;
+#ifdef SUPPORT_UTF
+if (utf)
+  {
+  GETCHAR(c, ptr);
+  }
+else
+#endif  /* SUPPORT_UTF */
+  c = *ptr;
 
 if (type == NLTYPE_ANYCRLF) switch(c)
   {
@@ -95,9 +104,15 @@ else switch(c)
   case 0x000c: *lenptr = 1; return TRUE;             /* FF */
   case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
                return TRUE;                          /* CR */
-  case 0x0085: *lenptr = utf8? 2 : 1; return TRUE;   /* NEL */
+#ifdef COMPILE_PCRE8
+  case 0x0085: *lenptr = utf? 2 : 1; return TRUE;    /* NEL */
   case 0x2028:                                       /* LS */
   case 0x2029: *lenptr = 3; return TRUE;             /* PS */
+#else
+  case 0x0085:                                       /* NEL */
+  case 0x2028:                                       /* LS */
+  case 0x2029: *lenptr = 1; return TRUE;             /* PS */
+#endif /* COMPILE_PCRE8 */
   default: return FALSE;
   }
 }
@@ -116,26 +131,27 @@ Arguments:
   type         the newline type
   startptr     pointer to the start of the string
   lenptr       where to return the length
-  utf8         TRUE if in utf8 mode
+  utf          TRUE if in utf mode
 
 Returns:       TRUE or FALSE
 */
 
 BOOL
-_pcre_was_newline(USPTR ptr, int type, USPTR startptr, int *lenptr, BOOL utf8)
+PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr,
+  BOOL utf)
 {
 int c;
+(void)utf;
 ptr--;
-#ifdef SUPPORT_UTF8
-if (utf8)
+#ifdef SUPPORT_UTF
+if (utf)
   {
   BACKCHAR(ptr);
   GETCHAR(c, ptr);
   }
-else c = *ptr;
-#else   /* no UTF-8 support */
-c = *ptr;
-#endif  /* SUPPORT_UTF8 */
+else
+#endif  /* SUPPORT_UTF */
+  c = *ptr;
 
 if (type == NLTYPE_ANYCRLF) switch(c)
   {
@@ -152,9 +168,15 @@ else switch(c)
   case 0x000b:                                      /* VT */
   case 0x000c:                                      /* FF */
   case 0x000d: *lenptr = 1; return TRUE;            /* CR */
-  case 0x0085: *lenptr = utf8? 2 : 1; return TRUE;  /* NEL */
+#ifdef COMPILE_PCRE8
+  case 0x0085: *lenptr = utf? 2 : 1; return TRUE;   /* NEL */
   case 0x2028:                                      /* LS */
   case 0x2029: *lenptr = 3; return TRUE;            /* PS */
+#else
+  case 0x0085:                                       /* NEL */
+  case 0x2028:                                       /* LS */
+  case 0x2029: *lenptr = 1; return TRUE;             /* PS */
+#endif /* COMPILE_PCRE8 */
   default: return FALSE;
   }
 }
diff --git a/usr.sbin/nginx/src/pcre/pcre_ord2utf8.c b/usr.sbin/nginx/src/pcre/pcre_ord2utf8.c
index 6f4eb9ebe95..50fca9525ac 100644
--- a/usr.sbin/nginx/src/pcre/pcre_ord2utf8.c
+++ b/usr.sbin/nginx/src/pcre/pcre_ord2utf8.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -52,35 +52,45 @@ character value into a UTF8 string. */
 *       Convert character value to UTF-8         *
 *************************************************/
 
-/* This function takes an integer value in the range 0 - 0x7fffffff
-and encodes it as a UTF-8 character in 0 to 6 bytes.
+/* This function takes an integer value in the range 0 - 0x10ffff
+and encodes it as a UTF-8 character in 1 to 6 pcre_uchars.
 
 Arguments:
   cvalue     the character value
-  buffer     pointer to buffer for result - at least 6 bytes long
+  buffer     pointer to buffer for result - at least 6 pcre_uchars long
 
 Returns:     number of characters placed in the buffer
 */
 
 int
-_pcre_ord2utf8(int cvalue, uschar *buffer)
+PRIV(ord2utf)(pcre_uint32 cvalue, pcre_uchar *buffer)
 {
-#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UTF
+
 register int i, j;
-for (i = 0; i < _pcre_utf8_table1_size; i++)
-  if (cvalue <= _pcre_utf8_table1[i]) break;
+
+/* Checking invalid cvalue character, encoded as invalid UTF-16 character.
+Should never happen in practice. */
+if ((cvalue & 0xf800) == 0xd800 || cvalue >= 0x110000)
+  cvalue = 0xfffe;
+
+for (i = 0; i < PRIV(utf8_table1_size); i++)
+  if ((int)cvalue <= PRIV(utf8_table1)[i]) break;
 buffer += i;
 for (j = i; j > 0; j--)
  {
  *buffer-- = 0x80 | (cvalue & 0x3f);
  cvalue >>= 6;
  }
-*buffer = _pcre_utf8_table2[i] | cvalue;
+*buffer = PRIV(utf8_table2)[i] | cvalue;
 return i + 1;
+
 #else
+
 (void)(cvalue);  /* Keep compiler happy; this function won't ever be */
-(void)(buffer);  /* called when SUPPORT_UTF8 is not defined. */
+(void)(buffer);  /* called when SUPPORT_UTF is not defined. */
 return 0;
+
 #endif
 }
 
diff --git a/usr.sbin/nginx/src/pcre/pcre_tables.c b/usr.sbin/nginx/src/pcre/pcre_tables.c
index 45c221181ac..c8134ec318f 100644
--- a/usr.sbin/nginx/src/pcre/pcre_tables.c
+++ b/usr.sbin/nginx/src/pcre/pcre_tables.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2009 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -37,6 +37,7 @@ POSSIBILITY OF SUCH DAMAGE.
 -----------------------------------------------------------------------------
 */
 
+#ifndef PCRE_INCLUDED
 
 /* This module contains some fixed tables that are used by more than one of the
 PCRE code modules. The tables are also #included by the pcretest program, which
@@ -50,11 +51,12 @@ clashes with the library. */
 
 #include "pcre_internal.h"
 
+#endif /* PCRE_INCLUDED */
 
 /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
 the definition is next to the definition of the opcodes in pcre_internal.h. */
 
-const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
+const pcre_uint8 PRIV(OP_lengths)[] = { OP_LENGTHS };
 
 
 
@@ -65,44 +67,38 @@ const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
 /* These are the breakpoints for different numbers of bytes in a UTF-8
 character. */
 
-#ifdef SUPPORT_UTF8
+#if (defined SUPPORT_UTF && defined COMPILE_PCRE8) \
+  || (defined PCRE_INCLUDED && defined SUPPORT_PCRE16)
 
-const int _pcre_utf8_table1[] =
+/* These tables are also required by pcretest in 16 bit mode. */
+
+const int PRIV(utf8_table1)[] =
   { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
 
-const int _pcre_utf8_table1_size = sizeof(_pcre_utf8_table1)/sizeof(int);
+const int PRIV(utf8_table1_size) = sizeof(PRIV(utf8_table1)) / sizeof(int);
 
 /* These are the indicator bits and the mask for the data bits to set in the
 first byte of a character, indexed by the number of additional bytes. */
 
-const int _pcre_utf8_table2[] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
-const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
+const int PRIV(utf8_table2)[] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
+const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
 
 /* Table of the number of extra bytes, indexed by the first byte masked with
 0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
 
-const uschar _pcre_utf8_table4[] = {
+const pcre_uint8 PRIV(utf8_table4)[] = {
   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
   3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
 
-#ifdef SUPPORT_JIT
-/* Full table of the number of extra bytes when the
-character code is greater or equal than 0xc0.
-See _pcre_utf8_table4 above. */
+#endif /* (SUPPORT_UTF && COMPILE_PCRE8) || (PCRE_INCLUDED && SUPPORT_PCRE16)*/
 
-const uschar _pcre_utf8_char_sizes[] = {
-  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-  3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,
-};
-#endif
+#ifdef SUPPORT_UTF
 
 /* Table to translate from particular type value to the general value. */
 
-const int _pcre_ucp_gentype[] = {
+const int PRIV(ucp_gentype)[] = {
   ucp_C, ucp_C, ucp_C, ucp_C, ucp_C,  /* Cc, Cf, Cn, Co, Cs */
   ucp_L, ucp_L, ucp_L, ucp_L, ucp_L,  /* Ll, Lu, Lm, Lo, Lt */
   ucp_M, ucp_M, ucp_M,                /* Mc, Me, Mn */
@@ -114,10 +110,10 @@ const int _pcre_ucp_gentype[] = {
 };
 
 #ifdef SUPPORT_JIT
-/* This table reverses _pcre_ucp_gentype. We can save the cost
+/* This table reverses PRIV(ucp_gentype). We can save the cost
 of a memory load. */
 
-const int _pcre_ucp_typerange[] = {
+const int PRIV(ucp_typerange)[] = {
   ucp_Cc, ucp_Cs,
   ucp_Ll, ucp_Lu,
   ucp_Mc, ucp_Mn,
@@ -126,7 +122,7 @@ const int _pcre_ucp_typerange[] = {
   ucp_Sc, ucp_So,
   ucp_Zl, ucp_Zs,
 };
-#endif
+#endif /* SUPPORT_JIT */
 
 /* The pcre_utt[] table below translates Unicode property names into type and
 code values. It is searched by binary chop, so must be in collating sequence of
@@ -284,7 +280,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Zp0 STR_Z STR_p "\0"
 #define STRING_Zs0 STR_Z STR_s "\0"
 
-const char _pcre_utt_names[] =
+const char PRIV(utt_names)[] =
   STRING_Any0
   STRING_Arabic0
   STRING_Armenian0
@@ -424,7 +420,7 @@ const char _pcre_utt_names[] =
   STRING_Zp0
   STRING_Zs0;
 
-const ucp_type_table _pcre_utt[] = {
+const ucp_type_table PRIV(utt)[] = {
   {   0, PT_ANY, 0 },
   {   4, PT_SC, ucp_Arabic },
   {  11, PT_SC, ucp_Armenian },
@@ -565,8 +561,8 @@ const ucp_type_table _pcre_utt[] = {
   { 961, PT_PC, ucp_Zs }
 };
 
-const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
+const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
 
-#endif  /* SUPPORT_UTF8 */
+#endif /* SUPPORT_UTF */
 
 /* End of pcre_tables.c */
diff --git a/usr.sbin/nginx/src/pcre/pcre_try_flipped.c b/usr.sbin/nginx/src/pcre/pcre_try_flipped.c
deleted file mode 100644
index 606504c0b0d..00000000000
--- a/usr.sbin/nginx/src/pcre/pcre_try_flipped.c
+++ /dev/null
@@ -1,139 +0,0 @@
-/*************************************************
-*      Perl-Compatible Regular Expressions       *
-*************************************************/
-
-/* PCRE is a library of functions to support regular expressions whose syntax
-and semantics are as close as possible to those of the Perl 5 language.
-
-                       Written by Philip Hazel
-           Copyright (c) 1997-2009 University of Cambridge
-
------------------------------------------------------------------------------
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-      this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-
-    * Neither the name of the University of Cambridge nor the names of its
-      contributors may be used to endorse or promote products derived from
-      this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
------------------------------------------------------------------------------
-*/
-
-
-/* This module contains an internal function that tests a compiled pattern to
-see if it was compiled with the opposite endianness. If so, it uses an
-auxiliary local function to flip the appropriate bytes. */
-
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include "pcre_internal.h"
-
-
-/*************************************************
-*         Flip bytes in an integer               *
-*************************************************/
-
-/* This function is called when the magic number in a regex doesn't match, in
-order to flip its bytes to see if we are dealing with a pattern that was
-compiled on a host of different endianness. If so, this function is used to
-flip other byte values.
-
-Arguments:
-  value        the number to flip
-  n            the number of bytes to flip (assumed to be 2 or 4)
-
-Returns:       the flipped value
-*/
-
-static unsigned long int
-byteflip(unsigned long int value, int n)
-{
-if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
-return ((value & 0x000000ff) << 24) |
-       ((value & 0x0000ff00) <<  8) |
-       ((value & 0x00ff0000) >>  8) |
-       ((value & 0xff000000) >> 24);
-}
-
-
-
-/*************************************************
-*       Test for a byte-flipped compiled regex   *
-*************************************************/
-
-/* This function is called from pcre_exec(), pcre_dfa_exec(), and also from
-pcre_fullinfo(). Its job is to test whether the regex is byte-flipped - that
-is, it was compiled on a system of opposite endianness. The function is called
-only when the native MAGIC_NUMBER test fails. If the regex is indeed flipped,
-we flip all the relevant values into a different data block, and return it.
-
-Arguments:
-  re               points to the regex
-  study            points to study data, or NULL
-  internal_re      points to a new regex block
-  internal_study   points to a new study block
-
-Returns:           the new block if is is indeed a byte-flipped regex
-                   NULL if it is not
-*/
-
-real_pcre *
-_pcre_try_flipped(const real_pcre *re, real_pcre *internal_re,
-  const pcre_study_data *study, pcre_study_data *internal_study)
-{
-if (byteflip(re->magic_number, sizeof(re->magic_number)) != MAGIC_NUMBER)
-  return NULL;
-
-*internal_re = *re;           /* To copy other fields */
-internal_re->size = byteflip(re->size, sizeof(re->size));
-internal_re->options = byteflip(re->options, sizeof(re->options));
-internal_re->flags = (pcre_uint16)byteflip(re->flags, sizeof(re->flags));
-internal_re->top_bracket =
-  (pcre_uint16)byteflip(re->top_bracket, sizeof(re->top_bracket));
-internal_re->top_backref =
-  (pcre_uint16)byteflip(re->top_backref, sizeof(re->top_backref));
-internal_re->first_byte =
-  (pcre_uint16)byteflip(re->first_byte, sizeof(re->first_byte));
-internal_re->req_byte =
-  (pcre_uint16)byteflip(re->req_byte, sizeof(re->req_byte));
-internal_re->name_table_offset =
-  (pcre_uint16)byteflip(re->name_table_offset, sizeof(re->name_table_offset));
-internal_re->name_entry_size =
-  (pcre_uint16)byteflip(re->name_entry_size, sizeof(re->name_entry_size));
-internal_re->name_count =
-  (pcre_uint16)byteflip(re->name_count, sizeof(re->name_count));
-
-if (study != NULL)
-  {
-  *internal_study = *study;   /* To copy other fields */
-  internal_study->size = byteflip(study->size, sizeof(study->size));
-  internal_study->flags = byteflip(study->flags, sizeof(study->flags));
-  internal_study->minlength = byteflip(study->minlength,
-    sizeof(study->minlength));
-  }
-
-return internal_re;
-}
-
-/* End of pcre_tryflipped.c */
diff --git a/usr.sbin/nginx/src/pcre/pcre_ucd.c b/usr.sbin/nginx/src/pcre/pcre_ucd.c
index 112cfb41a62..b25574e267e 100644
--- a/usr.sbin/nginx/src/pcre/pcre_ucd.c
+++ b/usr.sbin/nginx/src/pcre/pcre_ucd.c
@@ -18,21 +18,21 @@
 /* Instead, just supply small dummy tables. */
 
 #ifndef SUPPORT_UCP
-const ucd_record _pcre_ucd_records[] = {{0,0,0 }};
-const uschar _pcre_ucd_stage1[] = {0};
-const pcre_uint16 _pcre_ucd_stage2[] = {0};
+const ucd_record PRIV(ucd_records)[] = {{0,0,0 }};
+const pcre_uint8 PRIV(ucd_stage1)[] = {0};
+const pcre_uint16 PRIV(ucd_stage2)[] = {0};
 #else
 
 /* When recompiling tables with a new Unicode version,
 please check types in the structure definition from pcre_internal.h:
 typedef struct {
-uschar property_0;
-uschar property_1;
+pcre_uint8 property_0;
+pcre_uint8 property_1;
 pcre_int32 property_2;
 } ucd_record; */
 
 
-const ucd_record _pcre_ucd_records[] = { /* 4320 bytes, record size 8 */
+const ucd_record PRIV(ucd_records)[] = { /* 4320 bytes, record size 8 */
   {     9,      0,      0, }, /*   0 */
   {     9,     29,      0, }, /*   1 */
   {     9,     21,      0, }, /*   2 */
@@ -575,7 +575,7 @@ const ucd_record _pcre_ucd_records[] = { /* 4320 bytes, record size 8 */
   {    26,     26,      0, }, /* 539 */
 };
 
-const uschar _pcre_ucd_stage1[] = { /* 8704 bytes */
+const pcre_uint8 PRIV(ucd_stage1)[] = { /* 8704 bytes */
   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, /* U+0000 */
  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, /* U+0800 */
  32, 33, 34, 34, 35, 36, 37, 38, 39, 40, 40, 40, 41, 42, 43, 44, /* U+1000 */
@@ -1122,7 +1122,7 @@ const uschar _pcre_ucd_stage1[] = { /* 8704 bytes */
 114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,184, /* U+10F800 */
 };
 
-const pcre_uint16 _pcre_ucd_stage2[] = { /* 47360 bytes, block = 128 */
+const pcre_uint16 PRIV(ucd_stage2)[] = { /* 47360 bytes, block = 128 */
 /* block 0 */
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
diff --git a/usr.sbin/nginx/src/pcre/pcre_valid_utf8.c b/usr.sbin/nginx/src/pcre/pcre_valid_utf8.c
index b94bcc98e6b..7b9d3dfa496 100644
--- a/usr.sbin/nginx/src/pcre/pcre_valid_utf8.c
+++ b/usr.sbin/nginx/src/pcre/pcre_valid_utf8.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2009 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -103,10 +103,10 @@ Returns:       = 0    if the string is a valid UTF-8 string
 */
 
 int
-_pcre_valid_utf8(USPTR string, int length, int *erroroffset)
+PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
 {
-#ifdef SUPPORT_UTF8
-register USPTR p;
+#ifdef SUPPORT_UTF
+register PCRE_PUCHAR p;
 
 if (length < 0)
   {
@@ -133,7 +133,7 @@ for (p = string; length-- > 0; p++)
     return PCRE_UTF8_ERR21;
     }
 
-  ab = _pcre_utf8_table4[c & 0x3f];     /* Number of additional bytes */
+  ab = PRIV(utf8_table4)[c & 0x3f];     /* Number of additional bytes */
   if (length < ab)
     {
     *erroroffset = (int)(p - string);          /* Missing bytes */
@@ -288,7 +288,7 @@ for (p = string; length-- > 0; p++)
     }
   }
 
-#else  /* SUPPORT_UTF8 */
+#else  /* SUPPORT_UTF */
 (void)(string);  /* Keep picky compilers happy */
 (void)(length);
 #endif
diff --git a/usr.sbin/nginx/src/pcre/pcre_xclass.c b/usr.sbin/nginx/src/pcre/pcre_xclass.c
index 64c1b0043fa..dca7a399403 100644
--- a/usr.sbin/nginx/src/pcre/pcre_xclass.c
+++ b/usr.sbin/nginx/src/pcre/pcre_xclass.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2010 University of Cambridge
+           Copyright (c) 1997-2012 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -64,39 +64,63 @@ Returns:      TRUE if character matches, else FALSE
 */
 
 BOOL
-_pcre_xclass(int c, const uschar *data)
+PRIV(xclass)(int c, const pcre_uchar *data, BOOL utf)
 {
 int t;
 BOOL negated = (*data & XCL_NOT) != 0;
 
+(void)utf;
+#ifdef COMPILE_PCRE8
+/* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */
+utf = TRUE;
+#endif
+
 /* Character values < 256 are matched against a bitmap, if one is present. If
 not, we still carry on, because there may be ranges that start below 256 in the
 additional data. */
 
 if (c < 256)
   {
-  if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0)
-    return !negated;   /* char found */
+  if ((*data & XCL_MAP) != 0 &&
+    (((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0)
+    return !negated; /* char found */
   }
 
 /* First skip the bit map if present. Then match against the list of Unicode
 properties or large chars or ranges that end with a large char. We won't ever
 encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
 
-if ((*data++ & XCL_MAP) != 0) data += 32;
+if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(pcre_uchar);
 
 while ((t = *data++) != XCL_END)
   {
   int x, y;
   if (t == XCL_SINGLE)
     {
-    GETCHARINC(x, data);
+#ifdef SUPPORT_UTF
+    if (utf)
+      {
+      GETCHARINC(x, data); /* macro generates multiple statements */
+      }
+    else
+#endif
+      x = *data++;
     if (c == x) return !negated;
     }
   else if (t == XCL_RANGE)
     {
-    GETCHARINC(x, data);
-    GETCHARINC(y, data);
+#ifdef SUPPORT_UTF
+    if (utf)
+      {
+      GETCHARINC(x, data); /* macro generates multiple statements */
+      GETCHARINC(y, data); /* macro generates multiple statements */
+      }
+    else
+#endif
+      {
+      x = *data++;
+      y = *data++;
+      }
     if (c >= x && c <= y) return !negated;
     }
 
@@ -117,7 +141,7 @@ while ((t = *data++) != XCL_END)
       break;
 
       case PT_GC:
-      if ((data[1] == _pcre_ucp_gentype[prop->chartype]) == (t == XCL_PROP))
+      if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP))
         return !negated;
       break;
 
@@ -130,28 +154,28 @@ while ((t = *data++) != XCL_END)
       break;
 
       case PT_ALNUM:
-      if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
-           _pcre_ucp_gentype[prop->chartype] == ucp_N) == (t == XCL_PROP))
+      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+           PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP))
         return !negated;
       break;
 
       case PT_SPACE:    /* Perl space */
-      if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
+      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
            c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
              == (t == XCL_PROP))
         return !negated;
       break;
 
       case PT_PXSPACE:  /* POSIX space */
-      if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
+      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
            c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
            c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP))
         return !negated;
       break;
 
       case PT_WORD:
-      if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
-           _pcre_ucp_gentype[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
+      if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
+           PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
              == (t == XCL_PROP))
         return !negated;
       break;
author	robert <robert@openbsd.org>	2012-02-18 11:28:28 +0000
committer	robert <robert@openbsd.org>	2012-02-18 11:28:28 +0000
commit	4c64784e1754a12bcee2849f2480afe311febcce (patch)
tree	2ea4aa1a762ff8e0460b68efef87581dc37a6aee /usr.sbin/nginx/src
parent	update to 1.0.12 (diff)
download	wireguard-openbsd-4c64784e1754a12bcee2849f2480afe311febcce.tar.xz wireguard-openbsd-4c64784e1754a12bcee2849f2480afe311febcce.zip