summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorschwarze <schwarze@openbsd.org>2019-05-15 09:07:46 +0000
committerschwarze <schwarze@openbsd.org>2019-05-15 09:07:46 +0000
commit52e4174efd48911938e34457d3fe3659c4d68d65 (patch)
treecbb789a15b336dd8ae3e45cc25ee8fc901660f73
parentremove some use of uninitialized values pointed out by jsg@ (diff)
downloadwireguard-openbsd-52e4174efd48911938e34457d3fe3659c4d68d65.tar.xz
wireguard-openbsd-52e4174efd48911938e34457d3fe3659c4d68d65.zip
Cleanup:
Delete handling of LC_COLLATE (which has no effect) and delete support for non-ASCII single-byte encodings, allowing to replace the condition byte_sort == 1 with sort_mb_cur_max == 1. Since none of our single-byte character and string functions are locale dependent, also zap inspection of LC_CTYPE while here. For know, keep the code supporting multi-byte encodings even though it is very ugly, it cannot be reached right now, and we have no plans to enable it any time soon. OK millert@ tb@
-rw-r--r--usr.bin/sort/bwstring.c75
-rw-r--r--usr.bin/sort/bwstring.h5
-rw-r--r--usr.bin/sort/file.c5
-rw-r--r--usr.bin/sort/sort.138
-rw-r--r--usr.bin/sort/sort.c42
5 files changed, 21 insertions, 144 deletions
diff --git a/usr.bin/sort/bwstring.c b/usr.bin/sort/bwstring.c
index 8fb4ab26e1d..5ccb96a7a11 100644
--- a/usr.bin/sort/bwstring.c
+++ b/usr.bin/sort/bwstring.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: bwstring.c,v 1.7 2015/04/01 22:38:08 millert Exp $ */
+/* $OpenBSD: bwstring.c,v 1.8 2019/05/15 09:07:46 schwarze Exp $ */
/*-
* Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
@@ -40,9 +40,6 @@
#include "bwstring.h"
#include "sort.h"
-bool byte_sort;
-size_t sort_mb_cur_max = 1;
-
static wchar_t **wmonths;
static char **cmonths;
@@ -686,22 +683,20 @@ bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
if (len1 <= offset)
return (len2 <= offset) ? 0 : -1;
- else {
+
if (len2 <= offset)
return 1;
- else {
+
len1 -= offset;
len2 -= offset;
if (sort_mb_cur_max == 1) {
const unsigned char *s1, *s2;
+ int res;
s1 = bws1->data.cstr + offset;
s2 = bws2->data.cstr + offset;
- if (byte_sort) {
- int res = 0;
-
if (len1 > len2) {
res = memcmp(s1, s2, len2);
if (!res)
@@ -714,66 +709,6 @@ bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
res = memcmp(s1, s2, len1);
return res;
-
- } else {
- int res = 0;
- size_t i, maxlen;
-
- i = 0;
- maxlen = len1;
-
- if (maxlen > len2)
- maxlen = len2;
-
- while (i < maxlen) {
- /* goto next non-zero part: */
- while ((i < maxlen) &&
- !s1[i] && !s2[i])
- ++i;
-
- if (i >= maxlen)
- break;
-
- if (s1[i] == 0) {
- if (s2[i] == 0)
- /* NOTREACHED */
- err(2, "bwscoll error 01");
- else
- return -1;
- } else if (s2[i] == 0)
- return 1;
-
- res = strcoll((const char *)(s1 + i), (const char *)(s2 + i));
- if (res)
- return res;
-
- while ((i < maxlen) &&
- s1[i] && s2[i])
- ++i;
-
- if (i >= maxlen)
- break;
-
- if (s1[i] == 0) {
- if (s2[i] == 0) {
- ++i;
- continue;
- } else
- return -1;
- } else if (s2[i] == 0)
- return 1;
- else
- /* NOTREACHED */
- err(2, "bwscoll error 02");
- }
-
- if (len1 < len2)
- return -1;
- else if (len1 > len2)
- return 1;
-
- return 0;
- }
} else {
const wchar_t *s1, *s2;
size_t i, maxlen;
@@ -834,8 +769,6 @@ bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
return 0;
return len1 < len2 ? -1 : 1;
}
- }
- }
}
/*
diff --git a/usr.bin/sort/bwstring.h b/usr.bin/sort/bwstring.h
index fa72bd406f9..bf42c15a13a 100644
--- a/usr.bin/sort/bwstring.h
+++ b/usr.bin/sort/bwstring.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: bwstring.h,v 1.2 2015/12/31 16:09:31 millert Exp $ */
+/* $OpenBSD: bwstring.h,v 1.3 2019/05/15 09:07:46 schwarze Exp $ */
/*-
* Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
@@ -37,8 +37,7 @@
#include "mem.h"
-extern bool byte_sort;
-extern size_t sort_mb_cur_max;
+static const size_t sort_mb_cur_max = 1;
/* wchar_t is of 4 bytes: */
#define SIZEOF_WCHAR_STRING(LEN) ((LEN)*sizeof(wchar_t))
diff --git a/usr.bin/sort/file.c b/usr.bin/sort/file.c
index a89f55289e0..9d84d7cb4f6 100644
--- a/usr.bin/sort/file.c
+++ b/usr.bin/sort/file.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: file.c,v 1.21 2016/10/17 02:58:29 lteo Exp $ */
+/* $OpenBSD: file.c,v 1.22 2019/05/15 09:07:46 schwarze Exp $ */
/*-
* Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
@@ -1078,7 +1078,8 @@ sort_list_to_file(struct sort_list *list, const char *outfile)
if (!sm->Mflag && !sm->Rflag && !sm->Vflag &&
!sm->gflag && !sm->hflag && !sm->nflag) {
- if ((sort_opts_vals.sort_method == SORT_DEFAULT) && byte_sort)
+ if (sort_opts_vals.sort_method == SORT_DEFAULT &&
+ sort_mb_cur_max == 1)
sort_opts_vals.sort_method = SORT_RADIXSORT;
} else if (sort_opts_vals.sort_method == SORT_RADIXSORT)
diff --git a/usr.bin/sort/sort.1 b/usr.bin/sort/sort.1
index 2c66f99dbb9..e02dcecc0cf 100644
--- a/usr.bin/sort/sort.1
+++ b/usr.bin/sort/sort.1
@@ -1,4 +1,4 @@
-.\" $OpenBSD: sort.1,v 1.59 2019/05/13 17:00:12 schwarze Exp $
+.\" $OpenBSD: sort.1,v 1.60 2019/05/15 09:07:46 schwarze Exp $
.\"
.\" Copyright (c) 1991, 1993
.\" The Regents of the University of California. All rights reserved.
@@ -32,7 +32,7 @@
.\"
.\" @(#)sort.1 8.1 (Berkeley) 6/6/93
.\"
-.Dd $Mdocdate: May 13 2019 $
+.Dd $Mdocdate: May 15 2019 $
.Dt SORT 1
.Os
.Sh NAME
@@ -60,9 +60,8 @@ option
A record can contain any printable or unprintable characters.
Comparisons are based on one or more sort keys extracted from
each line of input, and are performed lexicographically,
-according to the current locale's collating rules and the
-specified command-line options that can tune the actual
-sorting behavior.
+according to the specified command-line options
+that can tune the actual sorting behavior.
By default, if keys are not given,
.Nm
uses entire lines for comparison.
@@ -201,7 +200,6 @@ The files are compared by their prefixes and versions (leading
zeros are ignored in version numbers, see example below).
If an input string does not match the pattern, then it is compared
using the byte compare function.
-All string comparisons are performed in the C locale.
.Pp
For example:
.Bd -literal -offset indent
@@ -494,22 +492,6 @@ which has no
equivalent.
.Sh ENVIRONMENT
.Bl -tag -width Fl
-.It Ev LANG
-Used as a last resort to determine different kinds of locale-specific
-behavior if neither the respective environment variable nor
-.Ev LC_ALL
-are set.
-.It Ev LC_ALL
-Locale settings that override all of the other locale settings.
-This environment variable can be used to set all these settings
-to the same value at once.
-.It Ev LC_COLLATE
-Locale settings to be used to determine the collation for
-sorting records.
-.It Ev LC_CTYPE
-Locale settings to be used to case conversion and classification
-of characters, that is, which characters are considered
-whitespaces, etc.
.It Ev TMPDIR
Path to the directory in which temporary files will be stored.
Note that
@@ -553,7 +535,10 @@ The
.Nm
utility is compliant with the
.St -p1003.1-2008
-specification.
+specification, except that it ignores the user's
+.Xr locale 1
+and always assumes
+.Ev LC_ALL Ns =C.
.Pp
The flags
.Op Fl gHhiMRSsTVz
@@ -603,13 +588,10 @@ This implementation of
has no limits on input line length (other than imposed by available
memory) or any restrictions on bytes allowed within lines.
.Pp
-The performance depends highly on locale settings,
+The performance depends highly on
efficient choice of sort keys and key complexity.
-The fastest sort is with the C locale, on whole lines, with option
+The fastest sort is on whole lines, with option
.Fl s .
-In general, the C locale is the fastest, followed by single-byte
-locales with multi-byte locales being the slowest.
-The correct collation order respected in all cases.
For the key specification, the simpler to process the
lines the faster the search will be.
.Pp
diff --git a/usr.bin/sort/sort.c b/usr.bin/sort/sort.c
index 8530122eee9..0040201ec4d 100644
--- a/usr.bin/sort/sort.c
+++ b/usr.bin/sort/sort.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: sort.c,v 1.88 2019/05/13 17:00:12 schwarze Exp $ */
+/* $OpenBSD: sort.c,v 1.89 2019/05/15 09:07:46 schwarze Exp $ */
/*-
* Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
@@ -36,7 +36,6 @@
#include <errno.h>
#include <getopt.h>
#include <limits.h>
-#include <locale.h>
#include <md5.h>
#include <regex.h>
#include <signal.h>
@@ -235,37 +234,6 @@ set_hw_params(void)
}
/*
- * Set current locale symbols.
- */
-static void
-set_locale(void)
-{
- const char *locale;
-
- setlocale(LC_CTYPE, "");
- locale = setlocale(LC_COLLATE, NULL);
- if (locale != NULL) {
- char *tmpl;
- const char *byteclocale;
-
- tmpl = sort_strdup(locale);
- byteclocale = setlocale(LC_COLLATE, "C");
- if (byteclocale && strcmp(byteclocale, tmpl) == 0) {
- byte_sort = true;
- } else {
- byteclocale = setlocale(LC_COLLATE, "POSIX");
- if (byteclocale && strcmp(byteclocale, tmpl) == 0)
- byte_sort = true;
- else
- setlocale(LC_COLLATE, tmpl);
- }
- sort_free(tmpl);
- }
- if (!byte_sort)
- sort_mb_cur_max = MB_CUR_MAX;
-}
-
-/*
* Set directory temporary files.
*/
static void
@@ -846,7 +814,6 @@ main(int argc, char *argv[])
atexit(clear_tmp_files);
- set_locale();
set_tmpdir();
set_sort_opts();
@@ -1113,14 +1080,9 @@ main(int argc, char *argv[])
ks->sm.func = get_sort_func(&(ks->sm));
}
- if (debug_sort) {
+ if (debug_sort)
printf("Memory to be used for sorting: %llu\n",
available_free_memory);
- printf("Using collate rules of %s locale\n",
- setlocale(LC_COLLATE, NULL));
- if (byte_sort)
- printf("Byte sort is used\n");
- }
if (sort_opts_vals.cflag)
return check(argc ? *argv : "-");