summaryrefslogtreecommitdiffstats
path: root/usr.bin/less
diff options
context:
space:
mode:
authorschwarze <schwarze@openbsd.org>2019-05-15 18:18:08 +0000
committerschwarze <schwarze@openbsd.org>2019-05-15 18:18:08 +0000
commit358ad9abc01b96be5581c8c9e8ca89f446329de9 (patch)
treef3e1a465cf6a7d1b4e8d43050c6b9a89e6e4d07e /usr.bin/less
parentUTF-8 cleanup in the function cmd_putstr(). (diff)
downloadwireguard-openbsd-358ad9abc01b96be5581c8c9e8ca89f446329de9.tar.xz
wireguard-openbsd-358ad9abc01b96be5581c8c9e8ca89f446329de9.zip
Clean up all major UTF-8 issues in cvt.c.
This also allows to delete the buggy, now unused function put_wchar(). OK millert@
Diffstat (limited to 'usr.bin/less')
-rw-r--r--usr.bin/less/charset.c42
-rw-r--r--usr.bin/less/cvt.c14
-rw-r--r--usr.bin/less/funcs.h1
3 files changed, 11 insertions, 46 deletions
diff --git a/usr.bin/less/charset.c b/usr.bin/less/charset.c
index 363d1a80cba..ec1365414ed 100644
--- a/usr.bin/less/charset.c
+++ b/usr.bin/less/charset.c
@@ -329,48 +329,6 @@ get_wchar(const char *p)
}
/*
- * Store a character into a UTF-8 string.
- */
-void
-put_wchar(char **pp, LWCHAR ch)
-{
- if (!utf_mode || ch < 0x80) {
- /* 0xxxxxxx */
- *(*pp)++ = (char)ch;
- } else if (ch < 0x800) {
- /* 110xxxxx 10xxxxxx */
- *(*pp)++ = (char)(0xC0 | ((ch >> 6) & 0x1F));
- *(*pp)++ = (char)(0x80 | (ch & 0x3F));
- } else if (ch < 0x10000) {
- /* 1110xxxx 10xxxxxx 10xxxxxx */
- *(*pp)++ = (char)(0xE0 | ((ch >> 12) & 0x0F));
- *(*pp)++ = (char)(0x80 | ((ch >> 6) & 0x3F));
- *(*pp)++ = (char)(0x80 | (ch & 0x3F));
- } else if (ch < 0x200000) {
- /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
- *(*pp)++ = (char)(0xF0 | ((ch >> 18) & 0x07));
- *(*pp)++ = (char)(0x80 | ((ch >> 12) & 0x3F));
- *(*pp)++ = (char)(0x80 | ((ch >> 6) & 0x3F));
- *(*pp)++ = (char)(0x80 | (ch & 0x3F));
- } else if (ch < 0x4000000) {
- /* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
- *(*pp)++ = (char)(0xF0 | ((ch >> 24) & 0x03));
- *(*pp)++ = (char)(0x80 | ((ch >> 18) & 0x3F));
- *(*pp)++ = (char)(0x80 | ((ch >> 12) & 0x3F));
- *(*pp)++ = (char)(0x80 | ((ch >> 6) & 0x3F));
- *(*pp)++ = (char)(0x80 | (ch & 0x3F));
- } else {
- /* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
- *(*pp)++ = (char)(0xF0 | ((ch >> 30) & 0x01));
- *(*pp)++ = (char)(0x80 | ((ch >> 24) & 0x3F));
- *(*pp)++ = (char)(0x80 | ((ch >> 18) & 0x3F));
- *(*pp)++ = (char)(0x80 | ((ch >> 12) & 0x3F));
- *(*pp)++ = (char)(0x80 | ((ch >> 6) & 0x3F));
- *(*pp)++ = (char)(0x80 | (ch & 0x3F));
- }
-}
-
-/*
* Step forward or backward one character in a string.
*/
LWCHAR
diff --git a/usr.bin/less/cvt.c b/usr.bin/less/cvt.c
index ed7c72d69a1..70a58851e36 100644
--- a/usr.bin/less/cvt.c
+++ b/usr.bin/less/cvt.c
@@ -60,7 +60,8 @@ cvt_text(char *odst, char *osrc, int *chpos, int *lenp, int ops)
char *edst = odst;
char *src;
char *src_end;
- LWCHAR ch;
+ wchar_t ch;
+ int len;
if (lenp != NULL)
src_end = osrc + *lenp;
@@ -70,8 +71,10 @@ cvt_text(char *odst, char *osrc, int *chpos, int *lenp, int ops)
for (src = osrc, dst = odst; src < src_end; ) {
int src_pos = src - osrc;
int dst_pos = dst - odst;
- ch = step_char(&src, +1, src_end);
+ if ((len = mbtowc(&ch, src, src_end - src)) < 1)
+ ch = L'\0';
if ((ops & CVT_BS) && ch == '\b' && dst > odst) {
+ src++;
/* Delete backspace and preceding char. */
do {
dst--;
@@ -83,11 +86,16 @@ cvt_text(char *odst, char *osrc, int *chpos, int *lenp, int ops)
while (src < src_end)
if (!is_ansi_middle(*src++))
break;
+ } else if (len < 1) {
+ *dst++ = *src++;
+ if (chpos != NULL)
+ chpos[dst_pos] = src_pos;
} else {
+ src += len;
/* Just copy the char to the destination buffer. */
if ((ops & CVT_TO_LC) && iswupper(ch))
ch = towlower(ch);
- put_wchar(&dst, ch);
+ dst += wctomb(dst, ch);
/* Record the original position of the char. */
if (chpos != NULL)
chpos[dst_pos] = src_pos;
diff --git a/usr.bin/less/funcs.h b/usr.bin/less/funcs.h
index 5c24ba89e96..87439f06e2e 100644
--- a/usr.bin/less/funcs.h
+++ b/usr.bin/less/funcs.h
@@ -62,7 +62,6 @@ char *prutfchar(LWCHAR);
int utf_len(char);
int is_utf8_well_formed(const char *);
LWCHAR get_wchar(const char *);
-void put_wchar(char **, LWCHAR);
LWCHAR step_char(char **, int, char *);
int is_composing_char(LWCHAR);
int is_ubin_char(LWCHAR);