diff options
author | 2016-05-26 05:46:44 +0000 | |
---|---|---|
committer | 2016-05-26 05:46:44 +0000 | |
commit | e315dedff8d9cbbf64bc080d283f4c20ae7a136a (patch) | |
tree | 0156a7008083d58b5f7af6b389fe46d8057e304c | |
parent | Remove superfluous loop counter to set alternate video interface since we (diff) | |
download | wireguard-openbsd-e315dedff8d9cbbf64bc080d283f4c20ae7a136a.tar.xz wireguard-openbsd-e315dedff8d9cbbf64bc080d283f4c20ae7a136a.zip |
Change the way regexec handles REG_STARTEND combined with REG_NOTBOL.
The new code sees this combination as a continuation of string at offset
pmatch[0].rm_so, instead of a new string which starts at that offset.
This change fixes a search quirk in vi and is needed for upcoming fixes in
ed/sed/vi.
This new behaviour is also used in gnu regex.
Lots of help from schwarze@
Manpage bits by schwarze@
OK schwarze@ and millert@
-rw-r--r-- | lib/libc/regex/engine.c | 16 | ||||
-rw-r--r-- | lib/libc/regex/regex.3 | 66 |
2 files changed, 62 insertions, 20 deletions
diff --git a/lib/libc/regex/engine.c b/lib/libc/regex/engine.c index 4acb80b0e33..261956bec96 100644 --- a/lib/libc/regex/engine.c +++ b/lib/libc/regex/engine.c @@ -1,4 +1,4 @@ -/* $OpenBSD: engine.c,v 1.22 2016/05/25 21:01:11 schwarze Exp $ */ +/* $OpenBSD: engine.c,v 1.23 2016/05/26 05:46:44 martijn Exp $ */ /*- * Copyright (c) 1992, 1993, 1994 Henry Spencer. @@ -671,12 +671,17 @@ fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst) states fresh = m->fresh; states tmp = m->tmp; char *p = start; - int c = (start == m->beginp) ? OUT : *(start-1); + int c; int lastc; /* previous c */ int flagch; int i; char *coldp; /* last p after which no match was underway */ + if (start == m->offp || (start == m->beginp && !(m->eflags®_NOTBOL))) + c = OUT; + else + c = *(start-1); + CLEAR(st); SET1(st, startst); st = step(m->g, startst, stopst, st, NOTHING, st); @@ -754,12 +759,17 @@ slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst) states empty = m->empty; states tmp = m->tmp; char *p = start; - int c = (start == m->beginp) ? OUT : *(start-1); + int c; int lastc; /* previous c */ int flagch; int i; char *matchp; /* last p at which a match ended */ + if (start == m->offp || (start == m->beginp && !(m->eflags®_NOTBOL))) + c = OUT; + else + c = *(start-1); + AT("slow", start, stop, startst, stopst); CLEAR(st); SET1(st, startst); diff --git a/lib/libc/regex/regex.3 b/lib/libc/regex/regex.3 index 8ee6c71b955..294a6d88251 100644 --- a/lib/libc/regex/regex.3 +++ b/lib/libc/regex/regex.3 @@ -1,4 +1,4 @@ -.\" $OpenBSD: regex.3,v 1.26 2015/11/10 23:48:18 jmc Exp $ +.\" $OpenBSD: regex.3,v 1.27 2016/05/26 05:46:44 martijn Exp $ .\" .\" Copyright (c) 1997, Phillip F Knaack. All rights reserved. .\" @@ -35,7 +35,7 @@ .\" .\" @(#)regex.3 8.4 (Berkeley) 3/20/94 .\" -.Dd $Mdocdate: November 10 2015 $ +.Dd $Mdocdate: May 26 2016 $ .Dt REGEXEC 3 .Os .Sh NAME @@ -225,11 +225,16 @@ argument is the bitwise of zero or more of the following values: .Bl -tag -width XREG_STARTENDX .It Dv REG_NOTBOL -The first character of -the string -is not the beginning of a line, so the -.Ql ^ -anchor should not match before it. +The first character of the string is treated as the continuation +of a line. +This means that the anchors +.Ql ^ , +.Ql [[:<:]] , +and +.Ql \e< +do not match before it; but see +.Dv REG_STARTEND +below. This does not affect the behavior of newlines under .Dv REG_NEWLINE . .It Dv REG_NOTEOL @@ -237,15 +242,16 @@ The NUL terminating the string does not end a line, so the .Ql $ -anchor should not match before it. +anchor does not match before it. This does not affect the behavior of newlines under .Dv REG_NEWLINE . .It Dv REG_STARTEND The string is considered to start at -\fIstring\fR\ + \fIpmatch\fR[0].\fIrm_so\fR -and to have a terminating NUL located at -\fIstring\fR\ + \fIpmatch\fR[0].\fIrm_eo\fR -(there need not actually be a NUL at that location), +.Fa string No + +.Fa pmatch Ns [0]. Ns Fa rm_so +and to end before the byte located at +.Fa string No + +.Fa pmatch Ns [0]. Ns Fa rm_eo , regardless of the value of .Fa nmatch . See below for the definition of @@ -257,11 +263,37 @@ compatible with but not specified by .St -p1003.2 , and should be used with caution in software intended to be portable to other systems. -Note that a non-zero \fIrm_so\fR does not imply -.Dv REG_NOTBOL ; -.Dv REG_STARTEND -affects only the location of the string, -not how it is matched. +.Pp +Without +.Dv REG_NOTBOL , +the position +.Fa rm_so +is considered the beginning of a line, such that +.Ql ^ +matches before it, and the beginning of a word if there is a word +character at this position, such that +.Ql [[:<:]] +and +.Ql \e< +match before it. +.Pp +With +.Dv REG_NOTBOL , +the character at position +.Fa rm_so +is treated as the continuation of a line, and if +.Fa rm_so +is greater than 0, the preceding character is taken into consideration. +If the preceding character is a newline and the regular expression was compiled +with +.Dv REG_NEWLINE , +.Ql ^ +matches before the string; if the preceding character is not a word character +but the string starts with a word character, +.Ql [[:<:]] +and +.Ql \e< +match before the string. .El .Pp See |