Better handling of invalid multi-byte sequences
authorJack Miller <jack@codezen.org>
Tue, 17 Mar 2015 20:25:15 +0000 (15:25 -0500)
committerJack Miller <jack@codezen.org>
Tue, 17 Mar 2015 20:36:45 +0000 (15:36 -0500)
Occasionally, feed data has some weird multi-bytes sequences that both
mbtowc and mbstowcs don't like. It looks like browsers handle them okay,
but I haven't found a single command line program (not even vim) that
can handle them well so for now just skip over anything for which mbtowc
returns -1 for.

canto_curses/theme.py
canto_curses/widecurse.c

index 5f4f004..ed5e6be 100644 (file)
@@ -57,7 +57,11 @@ class FakePad():
         pass
 
     def waddch(self, ch):
-        self.x += wcwidth(ch)
+        cwidth = wcwidth(ch)
+        if cwidth < 0:
+            return
+
+        self.x += cwidth
         if self.x >= self.width:
             self.y += 1
             self.x -= self.width
@@ -120,9 +124,12 @@ def theme_print_one(pad, uni, width):
 
     for i, c in enumerate(uni):
         ec = encoder(c)
+        cwidth = wcwidth(ec)
+        if cwidth < 0:
+            continue
+
         if escaped:
             # No room
-            cwidth = wcwidth(ec)
             if cwidth > width:
                 return "\\" + uni[i:]
 
@@ -212,8 +219,6 @@ def theme_print_one(pad, uni, width):
                 if wwidth <= max_width and wwidth >= width:
                     return uni[i + 1:]
 
-            cwidth = wcwidth(ec)
-
             # Character too long (should be handled above).
             if cwidth > width:
                 return uni[i:]
@@ -221,7 +226,7 @@ def theme_print_one(pad, uni, width):
             try:
                 pad.waddch(ec)
             except Exception as e:
-                log.debug("Can't print ec: %s in: %s" % (ec, uni))
+                log.debug("Can't print ec: %s in: %s" % (ec, repr(encoder(uni))))
                 log.debug("Exception: %s" % e)
 
             width -= cwidth
@@ -272,8 +277,13 @@ def theme_len(uni):
 
     for c in uni:
         ec = encoder(c)
+
+        cwidth = wcwidth(ec)
+        if cwidth < 0:
+            continue
+
         if escaped:
-            length += wcwidth(ec)
+            length += cwidth
             escaped = False
         elif code:
             code = False
@@ -282,7 +292,7 @@ def theme_len(uni):
         elif c == "%":
             code = True
         else:
-            width = wcwidth(ec)
+            width = cwidth
             if width >= 0:
                 length += width
     return length
index 7c0afb0..934440a 100644 (file)
@@ -20,9 +20,9 @@ static PyObject *py_wcwidth(PyObject * self, PyObject * args)
        if (!PyArg_ParseTuple(args, "et", &m_enc, &message))
                return NULL;
 
-       bytes = mbtowc(dest, &message[0], 3);
-       if (bytes <= 0)
-               ret = 0;
+       bytes = mbtowc(dest, &message[0], strlen(message));
+       if (bytes < 0)
+               ret = bytes;
        else
                ret = wcwidth(dest[0]);
 
@@ -57,7 +57,7 @@ static PyObject *py_waddch(PyObject * self, PyObject * args)
                wchar_t dest[2];
                int bytes;
 
-               bytes = mbtowc(dest, &message[0], 3);
+               bytes = mbtowc(dest, &message[0], strlen(message));
 
                if (bytes > 0) {
                        waddwstr(win, dest);