lt

simple terminal emulator
Log | Files | Refs | git clone https://git.ne02ptzero.me/git/lt

commit 356693dd2dc9b194791e10848aa7b6dcf2bec784
parent 446e94e2a8047c79fd9c3f6de13edac4fe862ddf
Author: Ne02ptzero <louis@ne02ptzero.me>
Date:   Fri,  8 Jun 2018 18:09:28 +0200

NEW: Beginning of UTF8 support in the read part of the TTY

Signed-off-by: Ne02ptzero <louis@ne02ptzero.me>

Diffstat:
Mterm.c | 5++++-
Autf8.c | 67+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Autf8.h | 11+++++++++++
3 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/term.c b/term.c @@ -1,6 +1,7 @@ #include "term.h" #include "config.h" #include "utils.h" +#include "utf8.h" #include <stdlib.h> #include <string.h> @@ -223,7 +224,9 @@ int term_write(term_t *term, const char *buf, int len, bool show_ctrl) { if (IS_SET(term->mode, MODE_UTF8) && !IS_SET(term->mode, MODE_SIXEL)) { - // UTF-8 + charsize = utf8_decode(buf + n, &u, len - n); + if (charsize == 0) + break; } else { diff --git a/utf8.c b/utf8.c @@ -0,0 +1,67 @@ +#include <assert.h> + +#include "utf8.h" +#include "utils.h" + +#define UTF_INVALID 0xdeadbeef +#define UTF_SIZ 4 + +static const uint8_t __utf_mask[] = { 0xC0, 0x80, 0xE0, 0xF0, 0xF8}; +static const uint8_t __utf_byte[] = { 0X80, 0, 0XC0, 0XE0, 0XF0}; +static const uint_least32_t __utf_min[] = { 0, 0, 0x80, 0x800, 0x10000}; +static const uint_least32_t __utf_max[] = {0x10FFFF, 0x7F, 0x7FF, 0xFFFF, 0x10FFFF}; + +uint_least32_t utf8_decode_byte(char c, size_t *i) +{ + assert(i != NULL); + + for (*i = 0; *i < sizeof(__utf_mask); (*i)++) + { + if ((c & __utf_mask[*i]) == __utf_byte[*i]) + return c & ~__utf_mask[*i]; + } + + return 0; +} + +size_t utf8_decode(const char *c, uint_least32_t *u, size_t clen) +{ + size_t j = 1; + size_t len; + uint_least32_t u_decoded; + + *u = UTF_INVALID; + if (clen == 0) + return 0; + + u_decoded = utf8_decode_byte(c[0], &len); + if (!BETWEEN(len, 1, UTF_SIZ)) + return 1; + + for (size_t i = 1; i < clen && j < len; i++, j++) + { + size_t type; + + u_decoded = (u_decoded << 6) | utf8_decode_byte(c[i], &type); + if (type != 0) + return j; + } + + if (j < len) + return 0; + + *u = u_decoded; + utf8_validate(u, len); + return len; +} + +size_t utf8_validate(uint_least32_t *u, size_t i) +{ + if (!BETWEEN(*u, __utf_min[i], __utf_max[i]) || BETWEEN(*u, 0xD800, 0xDFFF)) + *u = UTF_INVALID; + + for (i = 1; *u > __utf_max[i]; i++) + ; + + return i; +} diff --git a/utf8.h b/utf8.h @@ -0,0 +1,11 @@ +#ifndef UTF8_H +#define UTF8_H + +#include <stdint.h> +#include <stdlib.h> + +size_t utf8_decode(const char *c, uint_least32_t *u, size_t len); +size_t utf8_validate(uint_least32_t *u, size_t i); +uint_least32_t utf8_decode_byte(char c, size_t *i); + +#endif /* UTF8_H */