diff options
author | 2019-05-15 18:18:08 +0000 | |
---|---|---|
committer | 2019-05-15 18:18:08 +0000 | |
commit | 358ad9abc01b96be5581c8c9e8ca89f446329de9 (patch) | |
tree | f3e1a465cf6a7d1b4e8d43050c6b9a89e6e4d07e /usr.bin/less | |
parent | UTF-8 cleanup in the function cmd_putstr(). (diff) | |
download | wireguard-openbsd-358ad9abc01b96be5581c8c9e8ca89f446329de9.tar.xz wireguard-openbsd-358ad9abc01b96be5581c8c9e8ca89f446329de9.zip |
Clean up all major UTF-8 issues in cvt.c.
This also allows to delete the buggy, now unused function put_wchar().
OK millert@
Diffstat (limited to 'usr.bin/less')
-rw-r--r-- | usr.bin/less/charset.c | 42 | ||||
-rw-r--r-- | usr.bin/less/cvt.c | 14 | ||||
-rw-r--r-- | usr.bin/less/funcs.h | 1 |
3 files changed, 11 insertions, 46 deletions
diff --git a/usr.bin/less/charset.c b/usr.bin/less/charset.c index 363d1a80cba..ec1365414ed 100644 --- a/usr.bin/less/charset.c +++ b/usr.bin/less/charset.c @@ -329,48 +329,6 @@ get_wchar(const char *p) } /* - * Store a character into a UTF-8 string. - */ -void -put_wchar(char **pp, LWCHAR ch) -{ - if (!utf_mode || ch < 0x80) { - /* 0xxxxxxx */ - *(*pp)++ = (char)ch; - } else if (ch < 0x800) { - /* 110xxxxx 10xxxxxx */ - *(*pp)++ = (char)(0xC0 | ((ch >> 6) & 0x1F)); - *(*pp)++ = (char)(0x80 | (ch & 0x3F)); - } else if (ch < 0x10000) { - /* 1110xxxx 10xxxxxx 10xxxxxx */ - *(*pp)++ = (char)(0xE0 | ((ch >> 12) & 0x0F)); - *(*pp)++ = (char)(0x80 | ((ch >> 6) & 0x3F)); - *(*pp)++ = (char)(0x80 | (ch & 0x3F)); - } else if (ch < 0x200000) { - /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ - *(*pp)++ = (char)(0xF0 | ((ch >> 18) & 0x07)); - *(*pp)++ = (char)(0x80 | ((ch >> 12) & 0x3F)); - *(*pp)++ = (char)(0x80 | ((ch >> 6) & 0x3F)); - *(*pp)++ = (char)(0x80 | (ch & 0x3F)); - } else if (ch < 0x4000000) { - /* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ - *(*pp)++ = (char)(0xF0 | ((ch >> 24) & 0x03)); - *(*pp)++ = (char)(0x80 | ((ch >> 18) & 0x3F)); - *(*pp)++ = (char)(0x80 | ((ch >> 12) & 0x3F)); - *(*pp)++ = (char)(0x80 | ((ch >> 6) & 0x3F)); - *(*pp)++ = (char)(0x80 | (ch & 0x3F)); - } else { - /* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ - *(*pp)++ = (char)(0xF0 | ((ch >> 30) & 0x01)); - *(*pp)++ = (char)(0x80 | ((ch >> 24) & 0x3F)); - *(*pp)++ = (char)(0x80 | ((ch >> 18) & 0x3F)); - *(*pp)++ = (char)(0x80 | ((ch >> 12) & 0x3F)); - *(*pp)++ = (char)(0x80 | ((ch >> 6) & 0x3F)); - *(*pp)++ = (char)(0x80 | (ch & 0x3F)); - } -} - -/* * Step forward or backward one character in a string. */ LWCHAR diff --git a/usr.bin/less/cvt.c b/usr.bin/less/cvt.c index ed7c72d69a1..70a58851e36 100644 --- a/usr.bin/less/cvt.c +++ b/usr.bin/less/cvt.c @@ -60,7 +60,8 @@ cvt_text(char *odst, char *osrc, int *chpos, int *lenp, int ops) char *edst = odst; char *src; char *src_end; - LWCHAR ch; + wchar_t ch; + int len; if (lenp != NULL) src_end = osrc + *lenp; @@ -70,8 +71,10 @@ cvt_text(char *odst, char *osrc, int *chpos, int *lenp, int ops) for (src = osrc, dst = odst; src < src_end; ) { int src_pos = src - osrc; int dst_pos = dst - odst; - ch = step_char(&src, +1, src_end); + if ((len = mbtowc(&ch, src, src_end - src)) < 1) + ch = L'\0'; if ((ops & CVT_BS) && ch == '\b' && dst > odst) { + src++; /* Delete backspace and preceding char. */ do { dst--; @@ -83,11 +86,16 @@ cvt_text(char *odst, char *osrc, int *chpos, int *lenp, int ops) while (src < src_end) if (!is_ansi_middle(*src++)) break; + } else if (len < 1) { + *dst++ = *src++; + if (chpos != NULL) + chpos[dst_pos] = src_pos; } else { + src += len; /* Just copy the char to the destination buffer. */ if ((ops & CVT_TO_LC) && iswupper(ch)) ch = towlower(ch); - put_wchar(&dst, ch); + dst += wctomb(dst, ch); /* Record the original position of the char. */ if (chpos != NULL) chpos[dst_pos] = src_pos; diff --git a/usr.bin/less/funcs.h b/usr.bin/less/funcs.h index 5c24ba89e96..87439f06e2e 100644 --- a/usr.bin/less/funcs.h +++ b/usr.bin/less/funcs.h @@ -62,7 +62,6 @@ char *prutfchar(LWCHAR); int utf_len(char); int is_utf8_well_formed(const char *); LWCHAR get_wchar(const char *); -void put_wchar(char **, LWCHAR); LWCHAR step_char(char **, int, char *); int is_composing_char(LWCHAR); int is_ubin_char(LWCHAR); |