diff --git a/src/common/dcc.c b/src/common/dcc.c index 65f52322..881bcf78 100644 --- a/src/common/dcc.c +++ b/src/common/dcc.c @@ -487,7 +487,7 @@ dcc_write_chat (char *nick, char *text) if (dcc && dcc->dccstat == STAT_ACTIVE) { len = strlen (text); - tcp_send_real (NULL, dcc->sok, dcc->serv->encoding, text, len); + tcp_send_real (NULL, dcc->sok, dcc->serv->write_converter, text, len); send (dcc->sok, "\n", 1, 0); dcc->size += len; fe_dcc_update (dcc); @@ -509,7 +509,7 @@ dcc_chat_line (struct DCC *dcc, char *line) char portbuf[32]; message_tags_data no_tags = MESSAGE_TAGS_DATA_INIT; - line = text_invalid_encoding_to_utf8 (line, -1, dcc->serv->encoding, NULL); + line = text_convert_invalid (line, -1, dcc->serv->read_converter, unicode_fallback_string, NULL); sess = find_dialog (dcc->serv, dcc->nick); if (!sess) diff --git a/src/common/hexchat.h b/src/common/hexchat.h index 7ff4cbd4..a30a4a13 100644 --- a/src/common/hexchat.h +++ b/src/common/hexchat.h @@ -534,6 +534,9 @@ typedef struct server time_t away_time; /* when we were marked away */ char *encoding; + GIConv read_converter; /* iconv converter for converting from server encoding to UTF-8. */ + GIConv write_converter; /* iconv converter for converting from UTF-8 to server encoding. */ + GSList *favlist; /* list of channels & keys to join */ unsigned int motd_skipped:1; diff --git a/src/common/plugin.c b/src/common/plugin.c index 5ef20de8..9a1efbec 100644 --- a/src/common/plugin.c +++ b/src/common/plugin.c @@ -979,7 +979,7 @@ hexchat_command (hexchat_plugin *ph, const char *command) } /* scripts/plugins continue to send non-UTF8... *sigh* */ - command_utf8 = text_invalid_encoding_to_utf8 (command, -1, "UTF-8", NULL); + command_utf8 = text_fixup_invalid_utf8 (command, -1, NULL); handle_command (ph->context, command_utf8, FALSE); g_free (command_utf8); } diff --git a/src/common/server.c b/src/common/server.c index b837fdbe..62ba9540 100644 --- a/src/common/server.c +++ b/src/common/server.c @@ -86,12 +86,12 @@ extern pxProxyFactory *libproxy_factory; send via SSL. server/dcc both use this function. */ int -tcp_send_real (void *ssl, int sok, char *encoding, char *buf, int len) +tcp_send_real (void *ssl, int sok, GIConv write_converter, char *buf, int len) { int ret; gsize buf_encoded_len; - gchar *buf_encoded = text_invalid_utf8_to_encoding (buf, len, encoding, &buf_encoded_len); + gchar *buf_encoded = text_convert_invalid (buf, len, write_converter, "?", &buf_encoded_len); #ifdef USE_OPENSSL if (!ssl) ret = send (sok, buf_encoded, buf_encoded_len, 0); @@ -112,7 +112,7 @@ server_send_real (server *serv, char *buf, int len) url_check_line (buf); - return tcp_send_real (serv->ssl, serv->sok, serv->encoding, buf, len); + return tcp_send_real (serv->ssl, serv->sok, serv->write_converter, buf, len); } /* new throttling system, uses the same method as the Undernet @@ -258,7 +258,7 @@ static void server_inline (server *serv, char *line, gssize len) { gsize len_utf8; - line = text_invalid_encoding_to_utf8 (line, len, serv->encoding, &len_utf8); + line = text_convert_invalid (line, len, serv->read_converter, unicode_fallback_string, &len_utf8); fe_add_rawlog (serv, line, len_utf8, FALSE); @@ -1668,6 +1668,18 @@ server_set_encoding (server *serv, char *new_encoding) { serv->encoding = g_strdup ("UTF-8"); } + + if (serv->read_converter != NULL) + { + g_iconv_close (serv->read_converter); + } + serv->read_converter = g_iconv_open ("UTF-8", serv->encoding); + + if (serv->write_converter != NULL) + { + g_iconv_close (serv->write_converter); + } + serv->write_converter = g_iconv_open (serv->encoding, "UTF-8"); } server * @@ -1863,6 +1875,10 @@ server_free (server *serv) g_free (serv->bad_nick_prefixes); g_free (serv->last_away_reason); g_free (serv->encoding); + + g_iconv_close (serv->read_converter); + g_iconv_close (serv->write_converter); + if (serv->favlist) g_slist_free_full (serv->favlist, (GDestroyNotify) servlist_favchan_free); #ifdef USE_OPENSSL diff --git a/src/common/server.h b/src/common/server.h index 211f407c..ff8ef404 100644 --- a/src/common/server.h +++ b/src/common/server.h @@ -25,7 +25,7 @@ extern GSList *serv_list; /* eventually need to keep the tcp_* functions isolated to server.c */ int tcp_send_len (server *serv, char *buf, int len); void tcp_sendf (server *serv, const char *fmt, ...) G_GNUC_PRINTF (2, 3); -int tcp_send_real (void *ssl, int sok, char *encoding, char *buf, int len); +int tcp_send_real (void *ssl, int sok, GIConv write_converter, char *buf, int len); server *server_new (void); int is_server (server *serv); diff --git a/src/common/text.c b/src/common/text.c index 3f9d4441..2a8a50f7 100644 --- a/src/common/text.c +++ b/src/common/text.c @@ -51,6 +51,9 @@ #include #endif +const gchar* unicode_fallback_string = "\357\277\275"; /* The Unicode replacement character 0xFFFD */ +const gchar* arbitrary_encoding_fallback_string = "?"; + struct pevt_stage1 { int len; @@ -750,15 +753,15 @@ log_write (session *sess, char *text, time_t ts) } /** - * Converts a given string in from_encoding to to_encoding. This is similar to g_convert_with_fallback, except that it is tolerant of sequences in + * Converts a given string using the given iconv converter. This is similar to g_convert_with_fallback, except that it is tolerant of sequences in * the original input that are invalid even in from_encoding. g_convert_with_fallback fails for such text, whereas this function replaces such a * sequence with the fallback string. * * If len is -1, strlen(text) is used to calculate the length. Do not pass -1 if text is supposed to contain \0 bytes, such as if from_encoding is a * multi-byte encoding like UTF-16. */ -static gchar * -text_convert_invalid (const gchar* text, gssize len, const gchar *to_encoding, const gchar *from_encoding, const gchar *fallback, gsize *len_out) +gchar * +text_convert_invalid (const gchar* text, gssize len, GIConv converter, const gchar *fallback, gsize *len_out) { gchar *result_part; gsize result_part_len; @@ -775,7 +778,7 @@ text_convert_invalid (const gchar* text, gssize len, const gchar *to_encoding, c end = text + len; /* Find the first position of an invalid sequence. */ - result_part = g_convert (text, len, to_encoding, from_encoding, &invalid_start_pos, &result_part_len, NULL); + result_part = g_convert_with_iconv (text, len, converter, &invalid_start_pos, &result_part_len, NULL); if (result_part != NULL) { /* All text converted successfully on the first try. Return it. */ @@ -798,7 +801,7 @@ text_convert_invalid (const gchar* text, gssize len, const gchar *to_encoding, c g_assert (current_start + invalid_start_pos < end); /* Convert everything before the position of the invalid sequence. It should be successful. */ - result_part = g_convert (current_start, invalid_start_pos, to_encoding, from_encoding, &invalid_start_pos, &result_part_len, NULL); + result_part = g_convert_with_iconv (current_start, invalid_start_pos, converter, &invalid_start_pos, &result_part_len, NULL); g_assert (result_part != NULL); g_string_append_len (result, result_part, result_part_len); g_free (result_part); @@ -809,7 +812,7 @@ text_convert_invalid (const gchar* text, gssize len, const gchar *to_encoding, c /* Now try converting everything after the invalid sequence. */ current_start += invalid_start_pos + 1; - result_part = g_convert (current_start, end - current_start, to_encoding, from_encoding, &invalid_start_pos, &result_part_len, NULL); + result_part = g_convert_with_iconv (current_start, end - current_start, converter, &invalid_start_pos, &result_part_len, NULL); if (result_part != NULL) { /* The rest of the text converted successfully. Append it and return the whole converted text. */ @@ -829,16 +832,19 @@ text_convert_invalid (const gchar* text, gssize len, const gchar *to_encoding, c } } +/** + * Replaces any invalid UTF-8 in the given text with the unicode replacement character. + */ gchar * -text_invalid_utf8_to_encoding (const gchar* text, gssize len, const gchar *to_encoding, gsize *len_out) +text_fixup_invalid_utf8 (const gchar* text, gssize len, gsize *len_out) { - return text_convert_invalid (text, len, to_encoding, "UTF-8", "?", len_out); -} + static GIConv utf8_fixup_converter = NULL; + if (utf8_fixup_converter == NULL) + { + utf8_fixup_converter = g_iconv_open ("UTF-8", "UTF-8"); + } -gchar * -text_invalid_encoding_to_utf8 (const gchar* text, gssize len, const gchar *from_encoding, gsize *len_out) -{ - return text_convert_invalid (text, len, "UTF-8", from_encoding, "\357\277\275", len_out); + return text_convert_invalid (text, len, utf8_fixup_converter, unicode_fallback_string, len_out); } void @@ -858,7 +864,7 @@ PrintTextTimeStamp (session *sess, char *text, time_t timestamp) } else { - text = text_invalid_encoding_to_utf8 (text, -1, "UTF-8", NULL); + text = text_fixup_invalid_utf8 (text, -1, NULL); } log_write (sess, text, timestamp); diff --git a/src/common/text.h b/src/common/text.h index 4f47815d..28fc0c0d 100644 --- a/src/common/text.h +++ b/src/common/text.h @@ -57,12 +57,15 @@ void text_emit (int index, session *sess, char *a, char *b, char *c, char *d, time_t timestamp); int text_emit_by_name (char *name, session *sess, time_t timestamp, char *a, char *b, char *c, char *d); -gchar *text_invalid_utf8_to_encoding (const gchar* text, gssize len, const gchar *to_encoding, gsize *len_out); -gchar *text_invalid_encoding_to_utf8 (const gchar* text, gssize len, const gchar *from_encoding, gsize *len_out); +gchar *text_convert_invalid (const gchar* text, gssize len, GIConv converter, const gchar *fallback, gsize *len_out); +gchar *text_fixup_invalid_utf8 (const gchar* text, gssize len, gsize *len_out); int get_stamp_str (char *fmt, time_t tim, char **ret); void format_event (session *sess, int index, char **args, char *o, gsize sizeofo, unsigned int stripcolor_args); char *text_find_format_string (char *name); - + +extern const gchar* unicode_fallback_string; +extern const gchar* arbitrary_encoding_fallback_string; + void sound_play (const char *file, gboolean quiet); void sound_play_event (int i); void sound_beep (session *);