# HG changeset patch # User Mikael Berthe # Date 1152344405 -7200 # Node ID a75f7a13df7b6b28d3ced3a0128040a749041e64 # Parent f4bf564893b5f25003fc2ca6fd1d71364b345c9d UTF-8 terminal support (Reimar Döffinger) This is a patch from Reimar Döffinger, slightly modified, which makes mcabber work better on UTF-8 terminals. diff -r f4bf564893b5 -r a75f7a13df7b mcabber/src/Makefile.am --- a/mcabber/src/Makefile.am Sat Jul 08 09:33:54 2006 +0200 +++ b/mcabber/src/Makefile.am Sat Jul 08 09:40:05 2006 +0200 @@ -3,10 +3,10 @@ jabglue.c jabglue.h jab_iq.c jab_priv.h \ commands.c commands.h compl.c compl.h \ hbuf.c hbuf.h screen.c screen.h logprint.h \ - settings.c settings.h hooks.c hooks.h \ + settings.c settings.h hooks.c hooks.h utf8.c utf8.h \ histolog.c histolog.h utils.c utils.h help.c help.h -LDADD = $(GLIB_LIBS) -lncurses -lpanel \ +LDADD = $(GLIB_LIBS) \ ../libjabber/liblibjabber.a ../connwrap/libconnwrap.a INCLUDES = $(GLIB_CFLAGS) diff -r f4bf564893b5 -r a75f7a13df7b mcabber/src/hbuf.c --- a/mcabber/src/hbuf.c Sat Jul 08 09:33:54 2006 +0200 +++ b/mcabber/src/hbuf.c Sat Jul 08 09:40:05 2006 +0200 @@ -23,6 +23,7 @@ #include "hbuf.h" #include "utils.h" +#include "utf8.h" /* This is a private structure type */ @@ -188,23 +189,28 @@ } // #2 Go back to head and create non-persistent blocks when needed if (width) { - char *line, *end; + char *end; curr_elt = first_elt; while (curr_elt) { hbuf_b_curr = (hbuf_block*)(curr_elt->data); - line = hbuf_b_curr->ptr; - if (strlen(line) > width) { - hbuf_block *hbuf_b_prev = hbuf_b_curr; + hbuf_block *hbuf_b_prev = hbuf_b_curr; - // We need to break where we can find a space char - char *br; // break pointer - for (br = line + width; br > line && *br != 32 && *br != 9; br--) - ; - if (br <= line) - br = line + width; + // We need to break where we can find a space char + char *br = NULL; // break pointer + char *c = hbuf_b_curr->ptr; + unsigned int cur_w = 0; + while (*c && cur_w <= width) { + if (iswblank(get_char(c))) + br = c; + cur_w += wcwidth(get_char(c)); + c = next_char(c); + } + if (*c && cur_w > width) { + if (!br || br == hbuf_b_curr->ptr) + br = c; else - br++; + br = next_char(br); end = hbuf_b_curr->ptr_end; hbuf_b_curr->ptr_end = br; // Create another block, non-persistent diff -r f4bf564893b5 -r a75f7a13df7b mcabber/src/screen.c --- a/mcabber/src/screen.c Sat Jul 08 09:33:54 2006 +0200 +++ b/mcabber/src/screen.c Sat Jul 08 09:40:05 2006 +0200 @@ -41,6 +41,7 @@ #endif #include "screen.h" +#include "utf8.h" #include "hbuf.h" #include "commands.h" #include "compl.h" @@ -883,8 +884,11 @@ // Wrap existing status buffer lines hbuf_rebuild(&statushbuf, maxX - Roster_Width - PREFIX_WIDTH); +#ifndef UNICODE if (utf8_mode) - scr_LogPrint(LPRINT_NORMAL, "WARNING: UTF-8 not yet supported!"); + scr_LogPrint(LPRINT_NORMAL, + "WARNING: Compiled without full UTF-8 support!"); +#endif } else { // Update panels replace_panel(rosterPanel, rosterWnd); @@ -1950,22 +1954,27 @@ // the line, then this transposes the two characters before point. void readline_transpose_chars() { - char swp; + char *c1, *c2; + unsigned a, b; if (ptr_inputline == inputLine) return; if (!*ptr_inputline) { // We're at EOL // If line is only 1 char long, nothing to do... - if (ptr_inputline == inputLine+1) return; + if (ptr_inputline == prev_char(ptr_inputline, inputLine)) return; // Transpose the two previous characters - swp = *(ptr_inputline-2); - *(ptr_inputline-2) = *(ptr_inputline-1); - *(ptr_inputline-1) = swp; + c2 = prev_char(ptr_inputline, inputLine); + c1 = prev_char(c2, inputLine); + a = get_char(c1); + b = get_char(c2); + put_char(put_char(c1, b), a); } else { // Swap the two characters before the cursor and move right. - swp = *(ptr_inputline-1); - *(ptr_inputline-1) = *ptr_inputline; - *ptr_inputline++ = swp; + c2 = ptr_inputline; + c1 = prev_char(c2, inputLine); + a = get_char(c1); + b = get_char(c2); + put_char(put_char(c1, b), a); check_offset(1); } } @@ -1979,16 +1988,17 @@ if (ptr_inputline == inputLine) return; - for (c = ptr_inputline-1 ; c > inputLine ; c--) { - if (!isalnum(*c)) { - if (*c == ' ') + c = prev_char(ptr_inputline, inputLine); + for ( ; c > inputLine ; c = prev_char(c, inputLine)) { + if (!iswalnum(get_char(c))) { + if (iswblank(get_char(c))) if (!spaceallowed) break; } else spaceallowed = 0; } - if (c != inputLine || *c != ' ') - if ((c < ptr_inputline-1) && (!isalnum(*c))) - c++; + if (c != inputLine || iswblank(get_char(c))) + if ((c < prev_char(ptr_inputline, inputLine)) && (!iswalnum(get_char(c)))) + c = next_char(c); // Modify the line ptr_inputline = c; @@ -2008,16 +2018,19 @@ if (ptr_inputline == inputLine) return; - for (ptr_inputline-- ; ptr_inputline > inputLine ; ptr_inputline--) { - if (!isalnum(*ptr_inputline)) { - if (*ptr_inputline == ' ') + for (ptr_inputline = prev_char(ptr_inputline, inputLine) ; + ptr_inputline > inputLine ; + ptr_inputline = prev_char(ptr_inputline, inputLine)) { + if (!iswalnum(get_char(ptr_inputline))) { + if (iswblank(get_char(ptr_inputline))) if (!spaceallowed) break; } else spaceallowed = 0; } - if (ptr_inputline < old_ptr_inputLine-1 - && *ptr_inputline == ' ' && *(ptr_inputline+1) != ' ') - ptr_inputline++; + if (ptr_inputline < prev_char(old_ptr_inputLine, inputLine) + && iswblank(get_char(ptr_inputline)) + && iswblank(get_char(next_char(ptr_inputline)))) + ptr_inputline = next_char(ptr_inputline); check_offset(-1); } @@ -2029,9 +2042,9 @@ int spaceallowed = 1; while (*ptr_inputline) { - ptr_inputline++; - if (!isalnum(*ptr_inputline)) { - if (*ptr_inputline == ' ') + ptr_inputline = next_char(ptr_inputline); + if (!iswalnum(get_char(ptr_inputline))) { + if (iswblank(get_char(ptr_inputline))) if (!spaceallowed) break; } else spaceallowed = 0; } @@ -2064,7 +2077,7 @@ // This is a command row = 0; - for (p = inputLine ; p < ptr_inputline ; p++) { + for (p = inputLine ; p < ptr_inputline ; p = next_char(p)) { if (quote) { if (*p == '"' && *(p-1) != '\\') quote = FALSE; @@ -2101,6 +2114,8 @@ strcpy(ptr_inputline, tmpLine); } +static void scr_cancel_current_completion(void); + // scr_handle_tab() // Function called when tab is pressed. // Initiate or continue a completion... @@ -2121,7 +2136,7 @@ return; if (nrow == 0) { // Command completion - row = &inputLine[1]; + row = next_char(inputLine); compl_categ = COMPL_CMD; } else if (nrow == -1) { // Nickname completion compl_categ = COMPL_RESOURCE; @@ -2159,13 +2174,7 @@ completion_started = TRUE; } } else { // Completion already initialized - char *c; - guint back = cancel_completion(); - // Remove $back chars - ptr_inputline -= back; - c = ptr_inputline; - for ( ; *c ; c++) - *c = *(c+back); + scr_cancel_current_completion(); // Now complete again cchar = complete(); if (cchar) @@ -2176,12 +2185,16 @@ static void scr_cancel_current_completion(void) { char *c; + char *src = ptr_inputline; guint back = cancel_completion(); + guint i; // Remove $back chars - ptr_inputline -= back; + for (i = 0; i < back; i++) + ptr_inputline = prev_char(ptr_inputline, inputLine); c = ptr_inputline; - for ( ; *c ; c++) - *c = *(c+back); + for ( ; *src ; ) + *c++ = *src++; + *c = 0; } static void scr_end_current_completion(void) @@ -2195,30 +2208,47 @@ // screen. static inline void check_offset(int direction) { + int i; + char *c = &inputLine[inputline_offset]; // Left side if (inputline_offset && direction <= 0) { - while (ptr_inputline <= (char*)&inputLine + inputline_offset) { - if (inputline_offset) { - inputline_offset -= 5; - if (inputline_offset < 0) - inputline_offset = 0; - } else + while (ptr_inputline <= c) { + for (i = 0; i < 5; i++) + c = prev_char(c, inputLine); + if (c == inputLine) break; } } // Right side if (direction >= 0) { - while (ptr_inputline >= inputline_offset + (char*)&inputLine + maxX) - inputline_offset += 5; + int delta = wcwidth(get_char(c)); + while (ptr_inputline > c) { + c = next_char(c); + delta += wcwidth(get_char(c)); + } + c = &inputLine[inputline_offset]; + while (delta >= maxX) { + for (i = 0; i < 5; i++) { + delta -= wcwidth(get_char(c)); + c = next_char(c); + } + } } + inputline_offset = c - inputLine; } static inline void refresh_inputline(void) { mvwprintw(inputWnd, 0,0, "%s", inputLine + inputline_offset); wclrtoeol(inputWnd); - if (*ptr_inputline) - wmove(inputWnd, 0, ptr_inputline - (char*)&inputLine - inputline_offset); + if (*ptr_inputline) { + // hack to set cursor pos. Characters can have different width, + // so I know of no better way. + char c = *ptr_inputline; + *ptr_inputline = 0; + mvwprintw(inputWnd, 0,0, "%s", inputLine + inputline_offset); + *ptr_inputline = c; + } } void scr_handle_CtrlC(void) @@ -2292,6 +2322,29 @@ return -1; } +static inline int match_utf8_keyseq(int *iseq) +{ + int *strp = iseq; + unsigned c = *strp++; + unsigned mask = 0x80; + int len = -1; + while (c & mask) { + mask >>= 1; + len++; + } + if (len <= 0 || len > 4) + return -1; + c &= mask - 1; + while ((*strp & 0xc0) == 0x80) { + if (len-- <= 0) // can't happen + return -1; + c = (c << 6) | (*strp++ & 0x3f); + } + if (len) + return 0; + return c; +} + void scr_Getch(keycode *kcode) { keyseq *mks = NULL; @@ -2302,6 +2355,25 @@ memset(ks, 0, sizeof(ks)); kcode->value = wgetch(inputWnd); + if (utf8_mode) { + ks[0] = kcode->value; + for (i = 0; i < MAX_KEYSEQ_LENGTH - 1; i++) { + int match = match_utf8_keyseq(ks); + if (match == -1) + break; + if (match > 0) { + kcode->value = match; + kcode->utf8 = 1; + return; + } + ks[i + 1] = wgetch(inputWnd); + if (ks[i + 1] == ERR) + break; + } + while (i > 0) + ungetch(ks[i--]); + memset(ks, 0, sizeof(ks)); + } if (kcode->value != 27) return; @@ -2337,7 +2409,8 @@ return; } -static int bindcommand(keycode kcode) { +static int bindcommand(keycode kcode) +{ gchar asciikey[16]; const gchar *boundcmd; @@ -2363,8 +2436,11 @@ } scr_LogPrint(LPRINT_NORMAL, "Unknown key=%s", asciikey); +#ifndef UNICODE if (utf8_mode) - scr_LogPrint(LPRINT_NORMAL, "WARNING: UTF-8 not yet supported!"); + scr_LogPrint(LPRINT_NORMAL, + "WARNING: Compiled without full UTF-8 support!"); +#endif return -1; } @@ -2405,25 +2481,28 @@ case 127: // Backspace too case KEY_BACKSPACE: if (ptr_inputline != (char*)&inputLine) { - char *c = --ptr_inputline; - for ( ; *c ; c++) - *c = *(c+1); + char *src = ptr_inputline; + char *c = prev_char(ptr_inputline, inputLine); + ptr_inputline = c; + for ( ; *src ; ) + *c++ = *src++; + *c = 0; check_offset(-1); } break; case KEY_DC:// Del if (*ptr_inputline) - strcpy(ptr_inputline, ptr_inputline+1); + strcpy(ptr_inputline, next_char(ptr_inputline)); break; case KEY_LEFT: if (ptr_inputline != (char*)&inputLine) { - ptr_inputline--; + ptr_inputline = prev_char(ptr_inputline, inputLine); check_offset(-1); } break; case KEY_RIGHT: if (*ptr_inputline) - ptr_inputline++; + ptr_inputline = next_char(ptr_inputline); check_offset(1); break; case 7: // Ctrl-g @@ -2531,9 +2610,11 @@ case 23: // Ctrl-w readline_backward_kill_word(); break; + case 515: case 516: // Ctrl-Left readline_backward_word(); break; + case 517: case 518: // Ctrl-Right readline_forward_word(); break; @@ -2559,16 +2640,16 @@ update_panels(); break; default: - if (isprint(key)) { + if (iswprint(key) && (!utf8_mode || kcode.utf8 || key < 128)) { char tmpLine[INPUTLINE_LENGTH+1]; // Check the line isn't too long - if (strlen(inputLine) >= INPUTLINE_LENGTH) + if (strlen(inputLine) + 4 > INPUTLINE_LENGTH) return 0; // Insert char strcpy(tmpLine, ptr_inputline); - *ptr_inputline++ = key; + ptr_inputline = put_char(ptr_inputline, key); strcpy(ptr_inputline, tmpLine); check_offset(1); } else { diff -r f4bf564893b5 -r a75f7a13df7b mcabber/src/screen.h --- a/mcabber/src/screen.h Sat Jul 08 09:33:54 2006 +0200 +++ b/mcabber/src/screen.h Sat Jul 08 09:40:05 2006 +0200 @@ -33,6 +33,7 @@ typedef struct { int value; + int utf8; enum { MKEY_META = 1, MKEY_EQUIV, diff -r f4bf564893b5 -r a75f7a13df7b mcabber/src/utf8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mcabber/src/utf8.c Sat Jul 08 09:40:05 2006 +0200 @@ -0,0 +1,98 @@ +/* + * utf8.c -- UTF-8 routines + * + * Copyright (C) 2006 Reimar Döffinger + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + */ + +#include "utf8.h" + +char *prev_char(char *str, const char *limit) +{ + if (str <= limit) + return str; + str--; + if (utf8_mode) + while ((str > limit) && ((*str & 0xc0) == 0x80)) + str--; + return str; +} + +char *next_char(char *str) +{ + if (!*str) + return str; + str++; + if (utf8_mode) + while ((*str & 0xc0) == 0x80) + str++; + return str; +} + +unsigned get_char(const char *str) +{ + unsigned char *strp = (unsigned char *)str; + unsigned c = *strp++; + unsigned mask = 0x80; + int len = -1; + if (!utf8_mode) + return c; + while (c & mask) { + mask >>= 1; + len++; + } + if (len <= 0 || len > 4) + goto no_utf8; + c &= mask - 1; + while ((*strp & 0xc0) == 0x80) { + if (len-- <= 0) + goto no_utf8; + c = (c << 6) | (*strp++ & 0x3f); + } + if (len) + goto no_utf8; + return c; + +no_utf8: + return *str; +} + +char *put_char(char *str, unsigned c) +{ + int mask = 0xffffffc0; + int i = 4; + char code[5]; + if (!utf8_mode || c < 128) { + *str++ = c; + return str; + } + while (c & mask) { + code[i--] = 0x80 | (c & 0x3f); + c >>= 6; + mask >>= 1; + if (i < 0) { + *str++ = '?'; + return str; + } + } + code[i] = (mask << 1) | c; + for (; i < 5; i++) + *str++ = code[i]; + return str; +} + +/* vim: set expandtab cindent cinoptions=>2\:2(0: For Vim users... */ diff -r f4bf564893b5 -r a75f7a13df7b mcabber/src/utf8.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mcabber/src/utf8.h Sat Jul 08 09:40:05 2006 +0200 @@ -0,0 +1,31 @@ +#ifndef __UTF8_H__ +#define __UTF8_H__ 1 + +#include + +#ifdef HAVE_WCHAR_H +# include +# define UNICODE +#else +# define wcwidth(c) 1 +#endif + +#ifdef HAVE_WCTYPE_H +# include +#else +# define iswblank(c) (c == ' ') +# define iswalnum(c) isalnum(c) +# define iswprint(c) isprint(c) +# undef UNICODE +#endif + +extern int utf8_mode; + +char *prev_char(char *str, const char *limit); +char *next_char(char *str); +unsigned get_char(const char *str); +char *put_char(char *str, unsigned c); + +#endif /* __UTF8_H__ */ + +/* vim: set expandtab cindent cinoptions=>2\:2(0: For Vim users... */