diff options
author | 2020-06-13 01:19:55 +0000 | |
---|---|---|
committer | 2020-06-13 01:19:55 +0000 | |
commit | ef7896159fc0611a0b45253ea37cc7c8f65d4bf5 (patch) | |
tree | 39e2e85dd108ec4abcdbbe6b1fdd72e47d50de14 | |
parent | Teach powerpc64 ddb to x, w, break, step, trace. (diff) | |
download | wireguard-openbsd-ef7896159fc0611a0b45253ea37cc7c8f65d4bf5.tar.xz wireguard-openbsd-ef7896159fc0611a0b45253ea37cc7c8f65d4bf5.zip |
POSIX doesn't permit an unescaped '/' in an extended regular expression.
Unlike upstream awk, ours has historically allowed unescaped '/'
inside a bracket expression for compatibility with other awk
implementations but the check was too simple-minded. This improves
the matching to allow things like /[]/]/, /[^]// and '/[abc[:digit:]/@#]/'
To enable strict POSIX compliance, set POSIXLY_CORRECT.
-rw-r--r-- | usr.bin/awk/awk.1 | 8 | ||||
-rw-r--r-- | usr.bin/awk/awk.h | 3 | ||||
-rw-r--r-- | usr.bin/awk/lex.c | 29 | ||||
-rw-r--r-- | usr.bin/awk/main.c | 7 | ||||
-rw-r--r-- | usr.bin/awk/run.c | 9 |
5 files changed, 35 insertions, 21 deletions
diff --git a/usr.bin/awk/awk.1 b/usr.bin/awk/awk.1 index 84e4f057e73..4d320b705f2 100644 --- a/usr.bin/awk/awk.1 +++ b/usr.bin/awk/awk.1 @@ -1,4 +1,4 @@ -.\" $OpenBSD: awk.1,v 1.50 2020/06/10 21:05:02 millert Exp $ +.\" $OpenBSD: awk.1,v 1.51 2020/06/13 01:19:55 millert Exp $ .\" .\" Copyright (C) Lucent Technologies 1997 .\" All Rights Reserved @@ -22,7 +22,7 @@ .\" ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF .\" THIS SOFTWARE. .\" -.Dd $Mdocdate: June 10 2020 $ +.Dd $Mdocdate: June 13 2020 $ .Dt AWK 1 .Os .Sh NAME @@ -805,7 +805,9 @@ string argument for .Fn sub and .Fn gsub -are not collapsed. +are not collapsed and a slash +.Pq Ql / +does not need to be escaped in a bracket expression. .Pp The flags .Op Fl \&dV diff --git a/usr.bin/awk/awk.h b/usr.bin/awk/awk.h index e55af318b9a..080bf94cc22 100644 --- a/usr.bin/awk/awk.h +++ b/usr.bin/awk/awk.h @@ -1,4 +1,4 @@ -/* $OpenBSD: awk.h,v 1.23 2020/06/10 21:06:09 millert Exp $ */ +/* $OpenBSD: awk.h,v 1.24 2020/06/13 01:19:55 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -62,6 +62,7 @@ extern enum compile_states { } compile_time; extern bool safe; /* false => unsafe, true => safe */ +extern bool do_posix; /* true if POSIXLY_CORRECT set */ #define RECSIZE (8 * 1024) /* sets limit on records, fields, etc., etc. */ extern int recsize; /* size of current record, orig RECSIZE */ diff --git a/usr.bin/awk/lex.c b/usr.bin/awk/lex.c index f9ee05c7a5f..18a5ecac785 100644 --- a/usr.bin/awk/lex.c +++ b/usr.bin/awk/lex.c @@ -1,4 +1,4 @@ -/* $OpenBSD: lex.c,v 1.19 2020/06/10 21:05:50 millert Exp $ */ +/* $OpenBSD: lex.c,v 1.20 2020/06/13 01:19:55 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -524,12 +524,12 @@ int regexpr(void) int c, openclass = 0; static char *buf = NULL; static int bufsz = 500; - char *bp; + char *bp, *cstart; if (buf == NULL && (buf = malloc(bufsz)) == NULL) FATAL("out of space for rex expr"); bp = buf; - for ( ; ((c = input()) != '/' || openclass == 1) && c != 0; ) { + for ( ; ((c = input()) != '/' || openclass > 0) && c != 0; ) { if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr")) FATAL("out of space for reg expr %.10s...", buf); if (c == '\n') { @@ -541,10 +541,25 @@ int regexpr(void) *bp++ = '\\'; *bp++ = input(); } else { - if (c == '[') - openclass = 1; - else if (c == ']') - openclass = 0; + /* + * POSIX requires a slash in a regexp to be escaped, + * other awks don't require it to be escaped inside + * a character class. + */ + if (!do_posix) { + if (c == '[') { + if (++openclass == 1) + cstart = bp; + } else if (c == ']' && openclass > 0) { + /* + * A ']' as the first char in a + * class is treated literally. + */ + if (cstart != bp - 1 && + (cstart != bp - 2 || bp[-1] != '^')) + openclass--; + } + } *bp++ = c; } } diff --git a/usr.bin/awk/main.c b/usr.bin/awk/main.c index 75feccc0aa5..62b0d4c33dc 100644 --- a/usr.bin/awk/main.c +++ b/usr.bin/awk/main.c @@ -1,4 +1,4 @@ -/* $OpenBSD: main.c,v 1.39 2020/06/11 13:51:18 millert Exp $ */ +/* $OpenBSD: main.c,v 1.40 2020/06/13 01:19:55 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -53,7 +53,8 @@ static size_t maxpfile; /* max program filename */ static size_t npfile; /* number of filenames */ static size_t curpfile; /* current filename */ -bool safe = false; /* true => "safe" mode */ +bool safe = false; /* true => "safe" mode */ +bool do_posix = false; /* true => POSIX mode */ static noreturn void fpecatch(int n #ifdef SA_SIGINFO @@ -162,6 +163,8 @@ int main(int argc, char *argv[]) (void)signal(SIGFPE, fpecatch); #endif + do_posix = (getenv("POSIXLY_CORRECT") != NULL); + yyin = NULL; symtab = makesymtab(NSYMTAB); while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') { diff --git a/usr.bin/awk/run.c b/usr.bin/awk/run.c index 7bdb607b8d0..b150d90a55a 100644 --- a/usr.bin/awk/run.c +++ b/usr.bin/awk/run.c @@ -1,4 +1,4 @@ -/* $OpenBSD: run.c,v 1.57 2020/06/10 21:05:50 millert Exp $ */ +/* $OpenBSD: run.c,v 1.58 2020/06/13 01:19:55 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -2122,13 +2122,6 @@ void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ { /* sptr[0] == '\\' */ char *pb = *pb_ptr; const char *sptr = *sptr_ptr; - static bool first = true; - static bool do_posix = false; - - if (first) { - first = false; - do_posix = (getenv("POSIXLY_CORRECT") != NULL); - } if (sptr[1] == '\\') { if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ |