summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormillert <millert@openbsd.org>2020-06-13 01:19:55 +0000
committermillert <millert@openbsd.org>2020-06-13 01:19:55 +0000
commitef7896159fc0611a0b45253ea37cc7c8f65d4bf5 (patch)
tree39e2e85dd108ec4abcdbbe6b1fdd72e47d50de14
parentTeach powerpc64 ddb to x, w, break, step, trace. (diff)
downloadwireguard-openbsd-ef7896159fc0611a0b45253ea37cc7c8f65d4bf5.tar.xz
wireguard-openbsd-ef7896159fc0611a0b45253ea37cc7c8f65d4bf5.zip
POSIX doesn't permit an unescaped '/' in an extended regular expression.
Unlike upstream awk, ours has historically allowed unescaped '/' inside a bracket expression for compatibility with other awk implementations but the check was too simple-minded. This improves the matching to allow things like /[]/]/, /[^]// and '/[abc[:digit:]/@#]/' To enable strict POSIX compliance, set POSIXLY_CORRECT.
-rw-r--r--usr.bin/awk/awk.18
-rw-r--r--usr.bin/awk/awk.h3
-rw-r--r--usr.bin/awk/lex.c29
-rw-r--r--usr.bin/awk/main.c7
-rw-r--r--usr.bin/awk/run.c9
5 files changed, 35 insertions, 21 deletions
diff --git a/usr.bin/awk/awk.1 b/usr.bin/awk/awk.1
index 84e4f057e73..4d320b705f2 100644
--- a/usr.bin/awk/awk.1
+++ b/usr.bin/awk/awk.1
@@ -1,4 +1,4 @@
-.\" $OpenBSD: awk.1,v 1.50 2020/06/10 21:05:02 millert Exp $
+.\" $OpenBSD: awk.1,v 1.51 2020/06/13 01:19:55 millert Exp $
.\"
.\" Copyright (C) Lucent Technologies 1997
.\" All Rights Reserved
@@ -22,7 +22,7 @@
.\" ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
.\" THIS SOFTWARE.
.\"
-.Dd $Mdocdate: June 10 2020 $
+.Dd $Mdocdate: June 13 2020 $
.Dt AWK 1
.Os
.Sh NAME
@@ -805,7 +805,9 @@ string argument for
.Fn sub
and
.Fn gsub
-are not collapsed.
+are not collapsed and a slash
+.Pq Ql /
+does not need to be escaped in a bracket expression.
.Pp
The flags
.Op Fl \&dV
diff --git a/usr.bin/awk/awk.h b/usr.bin/awk/awk.h
index e55af318b9a..080bf94cc22 100644
--- a/usr.bin/awk/awk.h
+++ b/usr.bin/awk/awk.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: awk.h,v 1.23 2020/06/10 21:06:09 millert Exp $ */
+/* $OpenBSD: awk.h,v 1.24 2020/06/13 01:19:55 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -62,6 +62,7 @@ extern enum compile_states {
} compile_time;
extern bool safe; /* false => unsafe, true => safe */
+extern bool do_posix; /* true if POSIXLY_CORRECT set */
#define RECSIZE (8 * 1024) /* sets limit on records, fields, etc., etc. */
extern int recsize; /* size of current record, orig RECSIZE */
diff --git a/usr.bin/awk/lex.c b/usr.bin/awk/lex.c
index f9ee05c7a5f..18a5ecac785 100644
--- a/usr.bin/awk/lex.c
+++ b/usr.bin/awk/lex.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: lex.c,v 1.19 2020/06/10 21:05:50 millert Exp $ */
+/* $OpenBSD: lex.c,v 1.20 2020/06/13 01:19:55 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -524,12 +524,12 @@ int regexpr(void)
int c, openclass = 0;
static char *buf = NULL;
static int bufsz = 500;
- char *bp;
+ char *bp, *cstart;
if (buf == NULL && (buf = malloc(bufsz)) == NULL)
FATAL("out of space for rex expr");
bp = buf;
- for ( ; ((c = input()) != '/' || openclass == 1) && c != 0; ) {
+ for ( ; ((c = input()) != '/' || openclass > 0) && c != 0; ) {
if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
FATAL("out of space for reg expr %.10s...", buf);
if (c == '\n') {
@@ -541,10 +541,25 @@ int regexpr(void)
*bp++ = '\\';
*bp++ = input();
} else {
- if (c == '[')
- openclass = 1;
- else if (c == ']')
- openclass = 0;
+ /*
+ * POSIX requires a slash in a regexp to be escaped,
+ * other awks don't require it to be escaped inside
+ * a character class.
+ */
+ if (!do_posix) {
+ if (c == '[') {
+ if (++openclass == 1)
+ cstart = bp;
+ } else if (c == ']' && openclass > 0) {
+ /*
+ * A ']' as the first char in a
+ * class is treated literally.
+ */
+ if (cstart != bp - 1 &&
+ (cstart != bp - 2 || bp[-1] != '^'))
+ openclass--;
+ }
+ }
*bp++ = c;
}
}
diff --git a/usr.bin/awk/main.c b/usr.bin/awk/main.c
index 75feccc0aa5..62b0d4c33dc 100644
--- a/usr.bin/awk/main.c
+++ b/usr.bin/awk/main.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: main.c,v 1.39 2020/06/11 13:51:18 millert Exp $ */
+/* $OpenBSD: main.c,v 1.40 2020/06/13 01:19:55 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -53,7 +53,8 @@ static size_t maxpfile; /* max program filename */
static size_t npfile; /* number of filenames */
static size_t curpfile; /* current filename */
-bool safe = false; /* true => "safe" mode */
+bool safe = false; /* true => "safe" mode */
+bool do_posix = false; /* true => POSIX mode */
static noreturn void fpecatch(int n
#ifdef SA_SIGINFO
@@ -162,6 +163,8 @@ int main(int argc, char *argv[])
(void)signal(SIGFPE, fpecatch);
#endif
+ do_posix = (getenv("POSIXLY_CORRECT") != NULL);
+
yyin = NULL;
symtab = makesymtab(NSYMTAB);
while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') {
diff --git a/usr.bin/awk/run.c b/usr.bin/awk/run.c
index 7bdb607b8d0..b150d90a55a 100644
--- a/usr.bin/awk/run.c
+++ b/usr.bin/awk/run.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: run.c,v 1.57 2020/06/10 21:05:50 millert Exp $ */
+/* $OpenBSD: run.c,v 1.58 2020/06/13 01:19:55 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@@ -2122,13 +2122,6 @@ void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */
{ /* sptr[0] == '\\' */
char *pb = *pb_ptr;
const char *sptr = *sptr_ptr;
- static bool first = true;
- static bool do_posix = false;
-
- if (first) {
- first = false;
- do_posix = (getenv("POSIXLY_CORRECT") != NULL);
- }
if (sptr[1] == '\\') {
if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */