Move dig(1) and needed DNS libraries into it's own source directory in

usr.bin/dig. From the beginning when we started to remove unneeded nameserver code, it was our goal to extract dig functionality from the bind sources, for everyone's benefit as this is easier to reason about. In total we removed about 2/3 or over 300.000 lines of code. We kept the lib/ subdirectory layout but moved the content of bin/ to the top from the old bind source directory. Previous sources and history can be found in the src/usr.sbin/bind Attic. With & OK deraadt Proposed directory layout sounds good to sthen
author: florian <florian@openbsd.org> 2020-02-07 09:58:52 +0000
committer: florian <florian@openbsd.org> 2020-02-07 09:58:52 +0000
commit: 5185a7002a54c85c252cec8ac1712898b2c4d364 (patch)
tree: 85c674e5d41ec98028ad29a3aac4db558edae254 /usr.bin/dig/lib/isc/regex.c
parent: It appears we have come full-circle, where source code starts to use (diff)
download: wireguard-openbsd-5185a7002a54c85c252cec8ac1712898b2c4d364.tar.xz
wireguard-openbsd-5185a7002a54c85c252cec8ac1712898b2c4d364.zip
1 files changed, 371 insertions, 0 deletions
diff --git a/usr.bin/dig/lib/isc/regex.c b/usr.bin/dig/lib/isc/regex.c
new file mode 100644
index 00000000000..4344a8b0339
--- /dev/null
+++ b/usr.bin/dig/lib/isc/regex.c
@@ -0,0 +1,371 @@
+/*
+ * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+
+
+
+
+#include <isc/regex.h>
+#include <string.h>
+
+#if VALREGEX_REPORT_REASON
+#define FAIL(x) do { reason = (x); goto error; } while(0)
+#else
+#define FAIL(x) goto error
+#endif
+
+/*
+ * Validate the regular expression 'C' locale.
+ */
+int
+isc_regex_validate(const char *c) {
+	enum {
+		none, parse_bracket, parse_bound,
+		parse_ce, parse_ec, parse_cc
+	} state = none;
+	/* Well known character classes. */
+	const char *cc[] = {
+		":alnum:", ":digit:", ":punct:", ":alpha:", ":graph:",
+		":space:", ":blank:", ":lower:", ":upper:", ":cntrl:",
+		":print:", ":xdigit:"
+	};
+	isc_boolean_t seen_comma = ISC_FALSE;
+	isc_boolean_t seen_high = ISC_FALSE;
+	isc_boolean_t seen_char = ISC_FALSE;
+	isc_boolean_t seen_ec = ISC_FALSE;
+	isc_boolean_t seen_ce = ISC_FALSE;
+	isc_boolean_t have_atom = ISC_FALSE;
+	int group = 0;
+	int range = 0;
+	int sub = 0;
+	isc_boolean_t empty_ok = ISC_FALSE;
+	isc_boolean_t neg = ISC_FALSE;
+	isc_boolean_t was_multiple = ISC_FALSE;
+	unsigned int low = 0;
+	unsigned int high = 0;
+	const char *ccname = NULL;
+	int range_start = 0;
+#if VALREGEX_REPORT_REASON
+	const char *reason = "";
+#endif
+
+	if (c == NULL || *c == 0)
+		FAIL("empty string");
+
+	while (c != NULL && *c != 0) {
+		switch (state) {
+		case none:
+			switch (*c) {
+			case '\\':	/* make literal */
+				++c;
+				switch (*c) {
+				case '1': case '2': case '3':
+				case '4': case '5': case '6':
+				case '7': case '8': case '9':
+					if ((*c - '0') > sub)
+						FAIL("bad back reference");
+					have_atom = ISC_TRUE;
+					was_multiple = ISC_FALSE;
+					break;
+				case 0:
+					FAIL("escaped end-of-string");
+				default:
+					goto literal;
+				}
+				++c;
+				break;
+			case '[':	/* bracket start */
+				++c;
+				neg = ISC_FALSE;
+				was_multiple = ISC_FALSE;
+				seen_char = ISC_FALSE;
+				state = parse_bracket;
+				break;
+			case '{': 	/* bound start */
+				switch (c[1]) {
+				case '0': case '1': case '2': case '3':
+				case '4': case '5': case '6': case '7':
+				case '8': case '9':
+					if (!have_atom)
+						FAIL("no atom");
+					if (was_multiple)
+						FAIL("was multiple");
+					seen_comma = ISC_FALSE;
+					seen_high = ISC_FALSE;
+					low = high = 0;
+					state = parse_bound;
+					break;
+				default:
+					goto literal;
+				}
+				++c;
+				have_atom = ISC_TRUE;
+				was_multiple = ISC_TRUE;
+				break;
+			case '}':
+				goto literal;
+			case '(':	/* group start */
+				have_atom = ISC_FALSE;
+				was_multiple = ISC_FALSE;
+				empty_ok = ISC_TRUE;
+				++group;
+				++sub;
+				++c;
+				break;
+			case ')':	/* group end */
+				if (group && !have_atom && !empty_ok)
+					FAIL("empty alternative");
+				have_atom = ISC_TRUE;
+				was_multiple = ISC_FALSE;
+				if (group != 0)
+					--group;
+				++c;
+				break;
+			case '|':	/* alternative seperator */
+				if (!have_atom)
+					FAIL("no atom");
+				have_atom = ISC_FALSE;
+				empty_ok = ISC_FALSE;
+				was_multiple = ISC_FALSE;
+				++c;
+				break;
+			case '^':
+			case '$':
+				have_atom = ISC_TRUE;
+				was_multiple = ISC_TRUE;
+				++c;
+				break;
+			case '+':
+			case '*':
+			case '?':
+				if (was_multiple)
+					FAIL("was multiple");
+				if (!have_atom)
+					FAIL("no atom");
+				have_atom = ISC_TRUE;
+				was_multiple = ISC_TRUE;
+				++c;
+				break;
+			case '.':
+			default:
+			literal:
+				have_atom = ISC_TRUE;
+				was_multiple = ISC_FALSE;
+				++c;
+				break;
+			}
+			break;
+		case parse_bound:
+			switch (*c) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+				if (!seen_comma) {
+					low = low * 10 + *c - '0';
+					if (low > 255)
+						FAIL("lower bound too big");
+				} else {
+					seen_high = ISC_TRUE;
+					high = high * 10 + *c - '0';
+					if (high > 255)
+						FAIL("upper bound too big");
+				}
+				++c;
+				break;
+			case ',':
+				if (seen_comma)
+					FAIL("multiple commas");
+				seen_comma = ISC_TRUE;
+				++c;
+				break;
+			default:
+			case '{':
+				FAIL("non digit/comma");
+			case '}':
+				if (seen_high && low > high)
+					FAIL("bad parse bound");
+				seen_comma = ISC_FALSE;
+				state = none;
+				++c;
+				break;
+			}
+			break;
+		case parse_bracket:
+			switch (*c) {
+			case '^':
+				if (seen_char || neg) goto inside;
+				neg = ISC_TRUE;
+				++c;
+				break;
+			case '-':
+				if (range == 2) goto inside;
+				if (!seen_char) goto inside;
+				if (range == 1)
+					FAIL("bad range");
+				range = 2;
+				++c;
+				break;
+			case '[':
+				++c;
+				switch (*c) {
+				case '.':	/* collating element */
+					if (range != 0) --range;
+					++c;
+					state = parse_ce;
+					seen_ce = ISC_FALSE;
+					break;
+				case '=':	/* equivalence class */
+					if (range == 2)
+					    FAIL("equivalence class in range");
+					++c;
+					state = parse_ec;
+					seen_ec = ISC_FALSE;
+					break;
+				case ':':	/* character class */
+					if (range == 2)
+					      FAIL("character class in range");
+					ccname = c;
+					++c;
+					state = parse_cc;
+					break;
+				}
+				seen_char = ISC_TRUE;
+				break;
+			case ']':
+				if (!c[1] && !seen_char)
+					FAIL("unfinished brace");
+				if (!seen_char)
+					goto inside;
+				++c;
+				range = 0;
+				have_atom = ISC_TRUE;
+				state = none;
+				break;
+			default:
+			inside:
+				seen_char = ISC_TRUE;
+				if (range == 2 && (*c & 0xff) < range_start)
+					FAIL("out of order range");
+				if (range != 0)
+					--range;
+				range_start = *c & 0xff;
+				++c;
+				break;
+			};
+			break;
+		case parse_ce:
+			switch (*c) {
+			case '.':
+				++c;
+				switch (*c) {
+				case ']':
+					if (!seen_ce)
+						 FAIL("empty ce");
+					++c;
+					state = parse_bracket;
+					break;
+				default:
+					if (seen_ce)
+						range_start = 256;
+					else
+						range_start = '.';
+					seen_ce = ISC_TRUE;
+					break;
+				}
+				break;
+			default:
+				if (seen_ce)
+					range_start = 256;
+				else
+					range_start = *c;
+				seen_ce = ISC_TRUE;
+				++c;
+				break;
+			}
+			break;
+		case parse_ec:
+			switch (*c) {
+			case '=':
+				++c;
+				switch (*c) {
+				case ']':
+					if (!seen_ec)
+						FAIL("no ec");
+					++c;
+					state = parse_bracket;
+					break;
+				default:
+					seen_ec = ISC_TRUE;
+					break;
+				}
+				break;
+			default:
+				seen_ec = ISC_TRUE;
+				++c;
+				break;
+			}
+			break;
+		case parse_cc:
+			switch (*c) {
+			case ':':
+				++c;
+				switch (*c) {
+				case ']': {
+					unsigned int i;
+					isc_boolean_t found = ISC_FALSE;
+					for (i = 0;
+					     i < sizeof(cc)/sizeof(*cc);
+					     i++)
+					{
+						unsigned int len;
+						len = strlen(cc[i]);
+						if (len !=
+						    (unsigned int)(c - ccname))
+							continue;
+						if (strncmp(cc[i], ccname, len))
+							continue;
+						found = ISC_TRUE;
+					}
+					if (!found)
+						FAIL("unknown cc");
+					++c;
+					state = parse_bracket;
+					break;
+					}
+				default:
+					break;
+				}
+				break;
+			default:
+				++c;
+				break;
+			}
+			break;
+		}
+	}
+	if (group != 0)
+		FAIL("group open");
+	if (state != none)
+		FAIL("incomplete");
+	if (!have_atom)
+		FAIL("no atom");
+	return (sub);
+
+ error:
+#if VALREGEX_REPORT_REASON
+	fprintf(stderr, "%s\n", reason);
+#endif
+	return (-1);
+}
author	florian <florian@openbsd.org>	2020-02-07 09:58:52 +0000
committer	florian <florian@openbsd.org>	2020-02-07 09:58:52 +0000
commit	5185a7002a54c85c252cec8ac1712898b2c4d364 (patch)
tree	85c674e5d41ec98028ad29a3aac4db558edae254 /usr.bin/dig/lib/isc/regex.c
parent	It appears we have come full-circle, where source code starts to use (diff)
download	wireguard-openbsd-5185a7002a54c85c252cec8ac1712898b2c4d364.tar.xz wireguard-openbsd-5185a7002a54c85c252cec8ac1712898b2c4d364.zip