summaryrefslogtreecommitdiffstats
path: root/gnu
diff options
context:
space:
mode:
authorafresh1 <afresh1@openbsd.org>2020-06-01 18:01:43 +0000
committerafresh1 <afresh1@openbsd.org>2020-06-01 18:01:43 +0000
commit1510771fee0a4018f433f239923dfd9a9a2a2e78 (patch)
tree68276b105bd5f595fb9b08999c1b7605117c91f6 /gnu
parentTry without cursor/keypad flags if a key doesn't exist, and limit ctrl (diff)
downloadwireguard-openbsd-1510771fee0a4018f433f239923dfd9a9a2a2e78.tar.xz
wireguard-openbsd-1510771fee0a4018f433f239923dfd9a9a2a2e78.zip
Fix various overflows and state corruption in perl
Found by: ManhND of The Tarantula Team, VinCSS (a member of Vingroup), Hugo van der Sanden, Slaven Rezic, and Sergey Aleynikov Fixed by: John Lightsey, Hugo van der Sanden, and Karl Williamson Addresses: * CVE-2020-10543 * CVE-2020-10878 * CVE-2020-12723
Diffstat (limited to 'gnu')
-rw-r--r--gnu/usr.bin/perl/embed.fnc3
-rw-r--r--gnu/usr.bin/perl/embed.h3
-rw-r--r--gnu/usr.bin/perl/proto.h5
-rw-r--r--gnu/usr.bin/perl/regcomp.c139
-rwxr-xr-xgnu/usr.bin/perl/t/re/pat.t26
5 files changed, 125 insertions, 51 deletions
diff --git a/gnu/usr.bin/perl/embed.fnc b/gnu/usr.bin/perl/embed.fnc
index aedb4baef19..0a61018b8df 100644
--- a/gnu/usr.bin/perl/embed.fnc
+++ b/gnu/usr.bin/perl/embed.fnc
@@ -2480,7 +2480,8 @@ Es |SSize_t|study_chunk |NN RExC_state_t *pRExC_state \
|NULLOK struct scan_data_t *data \
|I32 stopparen|U32 recursed_depth \
|NULLOK regnode_ssc *and_withp \
- |U32 flags|U32 depth
+ |U32 flags|U32 depth|bool was_mutate_ok
+Es |void |rck_elide_nothing|NN regnode *node
EsR |SV * |get_ANYOFM_contents|NN const regnode * n
EsRn |U32 |add_data |NN RExC_state_t* const pRExC_state \
|NN const char* const s|const U32 n
diff --git a/gnu/usr.bin/perl/embed.h b/gnu/usr.bin/perl/embed.h
index 75c91f77f45..5346ec58613 100644
--- a/gnu/usr.bin/perl/embed.h
+++ b/gnu/usr.bin/perl/embed.h
@@ -1208,6 +1208,7 @@
#define parse_lparen_question_flags(a) S_parse_lparen_question_flags(aTHX_ a)
#define parse_uniprop_string(a,b,c,d,e,f,g,h,i) Perl_parse_uniprop_string(aTHX_ a,b,c,d,e,f,g,h,i)
#define populate_ANYOF_from_invlist(a,b) S_populate_ANYOF_from_invlist(aTHX_ a,b)
+#define rck_elide_nothing(a) S_rck_elide_nothing(aTHX_ a)
#define reg(a,b,c,d) S_reg(aTHX_ a,b,c,d)
#define reg2Lanode(a,b,c,d) S_reg2Lanode(aTHX_ a,b,c,d)
#define reg_node(a,b) S_reg_node(aTHX_ a,b)
@@ -1238,7 +1239,7 @@
#define ssc_is_cp_posixl_init S_ssc_is_cp_posixl_init
#define ssc_or(a,b,c) S_ssc_or(aTHX_ a,b,c)
#define ssc_union(a,b,c) S_ssc_union(aTHX_ a,b,c)
-#define study_chunk(a,b,c,d,e,f,g,h,i,j,k) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k)
+#define study_chunk(a,b,c,d,e,f,g,h,i,j,k,l) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k,l)
# endif
# if defined(PERL_IN_REGCOMP_C) || defined (PERL_IN_DUMP_C)
#define _invlist_dump(a,b,c,d) Perl__invlist_dump(aTHX_ a,b,c,d)
diff --git a/gnu/usr.bin/perl/proto.h b/gnu/usr.bin/perl/proto.h
index 141ddbaee6d..d3fecc05d1b 100644
--- a/gnu/usr.bin/perl/proto.h
+++ b/gnu/usr.bin/perl/proto.h
@@ -5543,6 +5543,9 @@ PERL_CALLCONV SV * Perl_parse_uniprop_string(pTHX_ const char * const name, cons
STATIC void S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr);
#define PERL_ARGS_ASSERT_POPULATE_ANYOF_FROM_INVLIST \
assert(node); assert(invlist_ptr)
+STATIC void S_rck_elide_nothing(pTHX_ regnode *node);
+#define PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING \
+ assert(node)
PERL_STATIC_NO_RET void S_re_croak2(pTHX_ bool utf8, const char* pat1, const char* pat2, ...)
__attribute__noreturn__;
#define PERL_ARGS_ASSERT_RE_CROAK2 \
@@ -5656,7 +5659,7 @@ PERL_STATIC_INLINE void S_ssc_union(pTHX_ regnode_ssc *ssc, SV* const invlist, c
#define PERL_ARGS_ASSERT_SSC_UNION \
assert(ssc); assert(invlist)
#endif
-STATIC SSize_t S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, U32 flags, U32 depth);
+STATIC SSize_t S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, U32 flags, U32 depth, bool was_mutate_ok);
#define PERL_ARGS_ASSERT_STUDY_CHUNK \
assert(pRExC_state); assert(scanp); assert(minlenp); assert(deltap); assert(last)
#endif
diff --git a/gnu/usr.bin/perl/regcomp.c b/gnu/usr.bin/perl/regcomp.c
index 93c8d98fbb0..b07191dbade 100644
--- a/gnu/usr.bin/perl/regcomp.c
+++ b/gnu/usr.bin/perl/regcomp.c
@@ -106,6 +106,7 @@ typedef struct scan_frame {
regnode *next_regnode; /* next node to process when last is reached */
U32 prev_recursed_depth;
I32 stopparen; /* what stopparen do we use */
+ bool in_gosub; /* this or an outer frame is for GOSUB */
struct scan_frame *this_prev_frame; /* this previous frame */
struct scan_frame *prev_frame; /* previous frame */
@@ -4450,6 +4451,44 @@ S_unwind_scan_frames(pTHX_ const void *p)
} while (f);
}
+/* Follow the next-chain of the current node and optimize away
+ all the NOTHINGs from it.
+ */
+STATIC void
+S_rck_elide_nothing(pTHX_ regnode *node)
+{
+ dVAR;
+
+ PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING;
+
+ if (OP(node) != CURLYX) {
+ const int max = (reg_off_by_arg[OP(node)]
+ ? I32_MAX
+ /* I32 may be smaller than U16 on CRAYs! */
+ : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
+ int off = (reg_off_by_arg[OP(node)] ? ARG(node) : NEXT_OFF(node));
+ int noff;
+ regnode *n = node;
+
+ /* Skip NOTHING and LONGJMP. */
+ while (
+ (n = regnext(n))
+ && (
+ (PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
+ || ((OP(n) == LONGJMP) && (noff = ARG(n)))
+ )
+ && off + noff < max
+ ) {
+ off += noff;
+ }
+ if (reg_off_by_arg[OP(node)])
+ ARG(node) = off;
+ else
+ NEXT_OFF(node) = off;
+ }
+ return;
+}
+
/* the return from this sub is the minimum length that could possibly match */
STATIC SSize_t
S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
@@ -4459,7 +4498,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
I32 stopparen,
U32 recursed_depth,
regnode_ssc *and_withp,
- U32 flags, U32 depth)
+ U32 flags, U32 depth, bool was_mutate_ok)
/* scanp: Start here (read-write). */
/* deltap: Write maxlen-minlen here. */
/* last: Stop before this one. */
@@ -4538,6 +4577,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
node length to get a real minimum (because
the folded version may be shorter) */
bool unfolded_multi_char = FALSE;
+ /* avoid mutating ops if we are anywhere within the recursed or
+ * enframed handling for a GOSUB: the outermost level will handle it.
+ */
+ bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub);
/* Peephole optimizer: */
DEBUG_STUDYDATA("Peep", data, depth, is_inf);
DEBUG_PEEP("Peep", scan, depth, flags);
@@ -4548,30 +4591,13 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
* parsing code, as each (?:..) is handled by a different invocation of
* reg() -- Yves
*/
- JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0);
-
- /* Follow the next-chain of the current node and optimize
- away all the NOTHINGs from it. */
- if (OP(scan) != CURLYX) {
- const int max = (reg_off_by_arg[OP(scan)]
- ? I32_MAX
- /* I32 may be smaller than U16 on CRAYs! */
- : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
- int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan));
- int noff;
- regnode *n = scan;
-
- /* Skip NOTHING and LONGJMP. */
- while ((n = regnext(n))
- && ((PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n)))
- || ((OP(n) == LONGJMP) && (noff = ARG(n))))
- && off + noff < max)
- off += noff;
- if (reg_off_by_arg[OP(scan)])
- ARG(scan) = off;
- else
- NEXT_OFF(scan) = off;
- }
+ if (mutate_ok)
+ JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0);
+
+ /* Follow the next-chain of the current node and optimize
+ away all the NOTHINGs from it.
+ */
+ rck_elide_nothing(scan);
/* The principal pseudo-switch. Cannot be a switch, since we
look into several different things. */
@@ -4598,7 +4624,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
/* DEFINEP study_chunk() recursion */
(void)study_chunk(pRExC_state, &scan, &minlen,
&deltanext, next, &data_fake, stopparen,
- recursed_depth, NULL, f, depth+1);
+ recursed_depth, NULL, f, depth+1, mutate_ok);
scan = next;
} else
@@ -4666,7 +4692,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
/* recurse study_chunk() for each BRANCH in an alternation */
minnext = study_chunk(pRExC_state, &scan, minlenp,
&deltanext, next, &data_fake, stopparen,
- recursed_depth, NULL, f, depth+1);
+ recursed_depth, NULL, f, depth+1,
+ mutate_ok);
if (min1 > minnext)
min1 = minnext;
@@ -4733,9 +4760,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
}
}
- if (PERL_ENABLE_TRIE_OPTIMISATION &&
- OP( startbranch ) == BRANCH )
- {
+ if (PERL_ENABLE_TRIE_OPTIMISATION
+ && OP(startbranch) == BRANCH
+ && mutate_ok
+ ) {
/* demq.
Assuming this was/is a branch we are dealing with: 'scan'
@@ -5190,6 +5218,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
newframe->stopparen = stopparen;
newframe->prev_recursed_depth = recursed_depth;
newframe->this_prev_frame= frame;
+ newframe->in_gosub = (
+ (frame && frame->in_gosub) || OP(scan) == GOSUB
+ );
DEBUG_STUDYDATA("frame-new", data, depth, is_inf);
DEBUG_PEEP("fnew", scan, depth, flags);
@@ -5347,7 +5378,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
/* This temporary node can now be turned into EXACTFU, and
* must, as regexec.c doesn't handle it */
- if (OP(next) == EXACTFU_S_EDGE) {
+ if (OP(next) == EXACTFU_S_EDGE && mutate_ok) {
OP(next) = EXACTFU;
}
@@ -5355,8 +5386,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
&& isALPHA_A(* STRING(next))
&& ( OP(next) == EXACTFAA
|| ( OP(next) == EXACTFU
- && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(next)))))
- {
+ && ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(next))))
+ && mutate_ok
+ ) {
/* These differ in just one bit */
U8 mask = ~ ('A' ^ 'a');
@@ -5443,7 +5475,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
(mincount == 0
? (f & ~SCF_DO_SUBSTR)
: f)
- ,depth+1);
+ , depth+1, mutate_ok);
if (flags & SCF_DO_STCLASS)
data->start_class = oclass;
@@ -5489,6 +5521,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
RExC_precomp)));
}
+ if ( ( minnext > 0 && mincount >= SSize_t_MAX / minnext )
+ || min >= SSize_t_MAX - minnext * mincount )
+ {
+ FAIL("Regexp out of space");
+ }
+
min += minnext * mincount;
is_inf_internal |= deltanext == SSize_t_MAX
|| (maxcount == REG_INFTY && minnext + deltanext > 0);
@@ -5503,7 +5541,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
if ( OP(oscan) == CURLYX && data
&& data->flags & SF_IN_PAR
&& !(data->flags & SF_HAS_EVAL)
- && !deltanext && minnext == 1 ) {
+ && !deltanext && minnext == 1
+ && mutate_ok
+ ) {
/* Try to optimize to CURLYN. */
regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
regnode * const nxt1 = nxt;
@@ -5553,10 +5593,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
&& !(data->flags & SF_HAS_EVAL)
&& !deltanext /* atom is fixed width */
&& minnext != 0 /* CURLYM can't handle zero width */
-
/* Nor characters whose fold at run-time may be
* multi-character */
&& ! (RExC_seen & REG_UNFOLDED_MULTI_SEEN)
+ && mutate_ok
) {
/* XXXX How to optimize if data == 0? */
/* Optimize to a simpler form. */
@@ -5609,7 +5649,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
/* recurse study_chunk() on optimised CURLYX => CURLYM */
study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
NULL, stopparen, recursed_depth, NULL, 0,
- depth+1);
+ depth+1, mutate_ok);
}
else
oscan->flags = 0;
@@ -5739,11 +5779,7 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
if (data && (fl & SF_HAS_EVAL))
data->flags |= SF_HAS_EVAL;
optimize_curly_tail:
- if (OP(oscan) != CURLYX) {
- while (PL_regkind[OP(next = regnext(oscan))] == NOTHING
- && NEXT_OFF(next))
- NEXT_OFF(oscan) += NEXT_OFF(next);
- }
+ rck_elide_nothing(oscan);
continue;
default:
@@ -6018,7 +6054,8 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
/* recurse study_chunk() for lookahead body */
minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
last, &data_fake, stopparen,
- recursed_depth, NULL, f, depth+1);
+ recursed_depth, NULL, f, depth+1,
+ mutate_ok);
if (scan->flags) {
if ( deltanext < 0
|| deltanext > (I32) U8_MAX
@@ -6123,7 +6160,7 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
*minnextp = study_chunk(pRExC_state, &nscan, minnextp,
&deltanext, last, &data_fake,
stopparen, recursed_depth, NULL,
- f, depth+1);
+ f, depth+1, mutate_ok);
if (scan->flags) {
assert(0); /* This code has never been tested since this
is normally not compiled */
@@ -6291,7 +6328,8 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
/* optimise study_chunk() for TRIE */
minnext = study_chunk(pRExC_state, &scan, minlenp,
&deltanext, (regnode *)nextbranch, &data_fake,
- stopparen, recursed_depth, NULL, f, depth+1);
+ stopparen, recursed_depth, NULL, f, depth+1,
+ mutate_ok);
}
if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
nextbranch= regnext((regnode*)nextbranch);
@@ -7740,6 +7778,13 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
/* We have that number in RExC_npar */
RExC_total_parens = RExC_npar;
+
+ /* XXX For backporting, use long jumps if there is any possibility of
+ * overflow */
+ if (RExC_size > U16_MAX && ! RExC_use_BRANCHJ) {
+ RExC_use_BRANCHJ = TRUE;
+ flags |= RESTART_PARSE;
+ }
}
else if (! MUST_RESTART(flags)) {
ReREFCNT_dec(Rx);
@@ -8077,7 +8122,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
&data, -1, 0, NULL,
SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag
| (restudied ? SCF_TRIE_DOING_RESTUDY : 0),
- 0);
+ 0, TRUE);
CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk"));
@@ -8206,7 +8251,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS|(restudied
? SCF_TRIE_DOING_RESTUDY
: 0),
- 0);
+ 0, TRUE);
CHECK_RESTUDY_GOTO_butfirst(NOOP);
diff --git a/gnu/usr.bin/perl/t/re/pat.t b/gnu/usr.bin/perl/t/re/pat.t
index 9338dcc98e5..b24bd024b6c 100755
--- a/gnu/usr.bin/perl/t/re/pat.t
+++ b/gnu/usr.bin/perl/t/re/pat.t
@@ -25,7 +25,7 @@ BEGIN {
skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader;
skip_all_without_unicode_tables();
-plan tests => 869; # Update this when adding/deleting tests.
+plan tests => 873; # Update this when adding/deleting tests.
run_tests() unless caller;
@@ -2141,6 +2141,30 @@ SKIP:
ok($result, "regexp correctly matched");
}
+ # gh16947: test regexp corruption (GOSUB)
+ {
+ fresh_perl_is(q{
+ 'xy' =~ /x(?0)|x(?|y|y)/ && print 'ok'
+ }, 'ok', {}, 'gh16947: test regexp corruption (GOSUB)');
+ }
+ # gh16947: test fix doesn't break SUSPEND
+ {
+ fresh_perl_is(q{ 'sx' =~ m{ss++}i; print 'ok' },
+ 'ok', {}, "gh16947: test fix doesn't break SUSPEND");
+ }
+
+ # gh17743: more regexp corruption via GOSUB
+ {
+ fresh_perl_is(q{
+ "0" =~ /((0(?0)|000(?|0000|0000)(?0))|)/; print "ok"
+ }, 'ok', {}, 'gh17743: test regexp corruption (1)');
+
+ fresh_perl_is(q{
+ "000000000000" =~ /(0(())(0((?0)())|000(?|\x{ef}\x{bf}\x{bd}|\x{ef}\x{bf}\x{bd}))|)/;
+ print "ok"
+ }, 'ok', {}, 'gh17743: test regexp corruption (2)');
+ }
+
} # End of sub run_tests
1;