mirror of
https://github.com/moparisthebest/hexchat
synced 2024-11-14 05:15:05 -05:00
541 lines
10 KiB
C
541 lines
10 KiB
C
/* X-Chat
|
|
* Copyright (C) 1998 Peter Zelezny.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
#include "hexchat.h"
|
|
#include "hexchatc.h"
|
|
#include "cfgfiles.h"
|
|
#include "fe.h"
|
|
#include "tree.h"
|
|
#include "url.h"
|
|
#ifdef HAVE_STRINGS_H
|
|
#include <strings.h>
|
|
#endif
|
|
|
|
void *url_tree = NULL;
|
|
GTree *url_btree = NULL;
|
|
static int do_an_re (const char *word, int *start, int *end, int *type);
|
|
static GRegex *re_url (void);
|
|
static GRegex *re_host (void);
|
|
static GRegex *re_email (void);
|
|
static GRegex *re_nick (void);
|
|
static GRegex *re_channel (void);
|
|
static GRegex *re_path (void);
|
|
|
|
|
|
static int
|
|
url_free (char *url, void *data)
|
|
{
|
|
free (url);
|
|
return TRUE;
|
|
}
|
|
|
|
void
|
|
url_clear (void)
|
|
{
|
|
tree_foreach (url_tree, (tree_traverse_func *)url_free, NULL);
|
|
tree_destroy (url_tree);
|
|
url_tree = NULL;
|
|
g_tree_destroy (url_btree);
|
|
url_btree = NULL;
|
|
}
|
|
|
|
static int
|
|
url_save_cb (char *url, FILE *fd)
|
|
{
|
|
fprintf (fd, "%s\n", url);
|
|
return TRUE;
|
|
}
|
|
|
|
void
|
|
url_save_tree (const char *fname, const char *mode, gboolean fullpath)
|
|
{
|
|
FILE *fd;
|
|
|
|
if (fullpath)
|
|
fd = hexchat_fopen_file (fname, mode, XOF_FULLPATH);
|
|
else
|
|
fd = hexchat_fopen_file (fname, mode, 0);
|
|
if (fd == NULL)
|
|
return;
|
|
|
|
tree_foreach (url_tree, (tree_traverse_func *)url_save_cb, fd);
|
|
fclose (fd);
|
|
}
|
|
|
|
static void
|
|
url_save_node (char* url)
|
|
{
|
|
FILE *fd;
|
|
|
|
/* open <config>/url.log in append mode */
|
|
fd = hexchat_fopen_file ("url.log", "a", 0);
|
|
if (fd == NULL)
|
|
{
|
|
return;
|
|
}
|
|
|
|
fprintf (fd, "%s\n", url);
|
|
fclose (fd);
|
|
}
|
|
|
|
static int
|
|
url_find (char *urltext)
|
|
{
|
|
return (g_tree_lookup_extended (url_btree, urltext, NULL, NULL));
|
|
}
|
|
|
|
static void
|
|
url_add (char *urltext, int len)
|
|
{
|
|
char *data;
|
|
int size;
|
|
|
|
/* we don't need any URLs if we have neither URL grabbing nor URL logging enabled */
|
|
if (!prefs.hex_url_grabber && !prefs.hex_url_logging)
|
|
{
|
|
return;
|
|
}
|
|
|
|
data = malloc (len + 1);
|
|
if (!data)
|
|
{
|
|
return;
|
|
}
|
|
memcpy (data, urltext, len);
|
|
data[len] = 0;
|
|
|
|
if (data[len - 1] == '.') /* chop trailing dot */
|
|
{
|
|
len--;
|
|
data[len] = 0;
|
|
}
|
|
/* chop trailing ) but only if there's no counterpart */
|
|
if (data[len - 1] == ')' && strchr (data, '(') == NULL)
|
|
{
|
|
data[len - 1] = 0;
|
|
}
|
|
|
|
if (prefs.hex_url_logging)
|
|
{
|
|
url_save_node (data);
|
|
}
|
|
|
|
/* the URL is saved already, only continue if we need the URL grabber too */
|
|
if (!prefs.hex_url_grabber)
|
|
{
|
|
free (data);
|
|
return;
|
|
}
|
|
|
|
if (!url_tree)
|
|
{
|
|
url_tree = tree_new ((tree_cmp_func *)strcasecmp, NULL);
|
|
url_btree = g_tree_new ((GCompareFunc)strcasecmp);
|
|
}
|
|
|
|
if (url_find (data))
|
|
{
|
|
free (data);
|
|
return;
|
|
}
|
|
|
|
size = tree_size (url_tree);
|
|
/* 0 is unlimited */
|
|
if (prefs.hex_url_grabber_limit > 0 && size >= prefs.hex_url_grabber_limit)
|
|
{
|
|
/* the loop is necessary to handle having the limit lowered while
|
|
HexChat is running */
|
|
size -= prefs.hex_url_grabber_limit;
|
|
for(; size > 0; size--)
|
|
{
|
|
char *pos;
|
|
|
|
pos = tree_remove_at_pos (url_tree, 0);
|
|
g_tree_remove (url_btree, pos);
|
|
free (pos);
|
|
}
|
|
}
|
|
|
|
tree_append (url_tree, data);
|
|
g_tree_insert (url_btree, data, GINT_TO_POINTER (tree_size (url_tree) - 1));
|
|
fe_url_add (data);
|
|
}
|
|
|
|
/* check if a word is clickable. This is called on mouse motion events, so
|
|
keep it FAST! This new version was found to be almost 3x faster than
|
|
2.4.4 release. */
|
|
|
|
static int laststart = 0;
|
|
static int lastend = 0;
|
|
static int lasttype = 0;
|
|
|
|
static int
|
|
strchrs (char c, char *s)
|
|
{
|
|
while (*s)
|
|
if (c == *s++)
|
|
return TRUE;
|
|
return FALSE;
|
|
}
|
|
|
|
#define NICKPRE "~+!@%%&"
|
|
int
|
|
url_check_word (const char *word)
|
|
{
|
|
laststart = lastend = lasttype = 0;
|
|
if (do_an_re (word, &laststart, &lastend, &lasttype))
|
|
{
|
|
switch (lasttype)
|
|
{
|
|
char *str;
|
|
|
|
case WORD_NICK:
|
|
if (strchrs (word[laststart], NICKPRE))
|
|
laststart++;
|
|
str = g_strndup (&word[laststart], lastend - laststart);
|
|
if (!userlist_find (current_sess, str))
|
|
lasttype = 0;
|
|
g_free (str);
|
|
return lasttype;
|
|
case WORD_EMAIL:
|
|
if (!isalnum (word[laststart]))
|
|
laststart++;
|
|
/* Fall through */
|
|
case WORD_URL:
|
|
case WORD_HOST:
|
|
case WORD_CHANNEL:
|
|
case WORD_PATH:
|
|
return lasttype;
|
|
default:
|
|
return 0; /* Should not occur */
|
|
}
|
|
}
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
/* List of IRC commands for which contents (and thus possible URLs)
|
|
* are visible to the user. NOTE: Trailing blank required in each. */
|
|
static char *commands[] = {
|
|
"NOTICE ",
|
|
"PRIVMSG ",
|
|
"TOPIC ",
|
|
"332 ", /* RPL_TOPIC */
|
|
"372 " /* RPL_MOTD */
|
|
};
|
|
|
|
#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
|
|
|
|
void
|
|
url_check_line (char *buf, int len)
|
|
{
|
|
GRegex *re(void);
|
|
GMatchInfo *gmi;
|
|
char *po = buf;
|
|
int i;
|
|
|
|
/* Skip over message prefix */
|
|
if (*po == ':')
|
|
{
|
|
po = strchr (po, ' ');
|
|
if (!po)
|
|
return;
|
|
po++;
|
|
}
|
|
/* Allow only commands from the above list */
|
|
for (i = 0; i < ARRAY_SIZE (commands); i++)
|
|
{
|
|
char *cmd = commands[i];
|
|
int len = strlen (cmd);
|
|
|
|
if (strncmp (cmd, po, len) == 0)
|
|
{
|
|
po += len;
|
|
break;
|
|
}
|
|
}
|
|
if (i == ARRAY_SIZE (commands))
|
|
return;
|
|
|
|
/* Skip past the channel name or user nick */
|
|
po = strchr (po, ' ');
|
|
if (!po)
|
|
return;
|
|
po++;
|
|
|
|
g_regex_match(re_url(), po, 0, &gmi);
|
|
while (g_match_info_matches(gmi))
|
|
{
|
|
int start, end;
|
|
|
|
g_match_info_fetch_pos(gmi, 0, &start, &end);
|
|
if (po[end - 1] == '\r')
|
|
po[--end] = 0;
|
|
if (g_strstr_len (po + start, end - start, "://"))
|
|
url_add(po + start, end - start);
|
|
g_match_info_next(gmi, NULL);
|
|
}
|
|
g_match_info_free(gmi);
|
|
}
|
|
|
|
int
|
|
url_last (int *lstart, int *lend)
|
|
{
|
|
*lstart = laststart;
|
|
*lend = lastend;
|
|
return lasttype;
|
|
}
|
|
|
|
static int
|
|
do_an_re(const char *word,int *start, int *end, int *type)
|
|
{
|
|
typedef struct func_s {
|
|
GRegex *(*fn)(void);
|
|
int type;
|
|
} func_t;
|
|
func_t funcs[] =
|
|
{
|
|
{ re_email, WORD_EMAIL },
|
|
{ re_url, WORD_URL },
|
|
{ re_host, WORD_HOST },
|
|
{ re_channel, WORD_CHANNEL },
|
|
{ re_path, WORD_PATH },
|
|
{ re_nick, WORD_NICK }
|
|
};
|
|
|
|
GMatchInfo *gmi;
|
|
int k;
|
|
|
|
for (k = 0; k < sizeof funcs / sizeof (func_t); k++)
|
|
{
|
|
g_regex_match (funcs[k].fn(), word, 0, &gmi);
|
|
if (!g_match_info_matches (gmi))
|
|
{
|
|
g_match_info_free (gmi);
|
|
continue;
|
|
}
|
|
while (g_match_info_matches (gmi))
|
|
{
|
|
g_match_info_fetch_pos (gmi, 0, start, end);
|
|
g_match_info_next (gmi, NULL);
|
|
}
|
|
g_match_info_free (gmi);
|
|
*type = funcs[k].type;
|
|
return TRUE;
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
/* Miscellaneous description --- */
|
|
#define DOMAIN "[a-z0-9][-a-z0-9]*(\\.[-a-z0-9]+)*\\."
|
|
#define TLD "[a-z][-a-z0-9]*[a-z]"
|
|
#define IPADDR "[0-9]+(\\.[0-9]+){3}"
|
|
#define HOST "(" DOMAIN TLD "|" IPADDR ")"
|
|
#define OPT_PORT "(:[1-9][0-9]{0,4})?"
|
|
|
|
GRegex *
|
|
make_re(char *grist, char *type)
|
|
{
|
|
GRegex *ret;
|
|
GError *err = NULL;
|
|
|
|
ret = g_regex_new (grist, G_REGEX_CASELESS + G_REGEX_OPTIMIZE, 0, &err);
|
|
g_free (grist);
|
|
return ret;
|
|
}
|
|
|
|
/* HOST description --- */
|
|
/* (see miscellaneous above) */
|
|
static GRegex *
|
|
re_host (void)
|
|
{
|
|
static GRegex *host_ret;
|
|
char *grist;
|
|
|
|
if (host_ret) return host_ret;
|
|
|
|
grist = g_strdup_printf (
|
|
"(" /* HOST */
|
|
HOST OPT_PORT
|
|
")"
|
|
);
|
|
host_ret = make_re (grist, "re_host");
|
|
return host_ret;
|
|
}
|
|
|
|
/* URL description --- */
|
|
#define SCHEME "(%s)"
|
|
#define LPAR "\\("
|
|
#define RPAR "\\)"
|
|
#define NOPARENS "[^() \t]*"
|
|
|
|
char *prefix[] = {
|
|
"irc\\.",
|
|
"ftp\\.",
|
|
"www\\.",
|
|
"irc://",
|
|
"ircs://",
|
|
"ftp://",
|
|
"http://",
|
|
"https://",
|
|
"file://",
|
|
"rtsp://",
|
|
NULL
|
|
};
|
|
|
|
static GRegex *
|
|
re_url (void)
|
|
{
|
|
static GRegex *url_ret;
|
|
char *grist;
|
|
char *scheme;
|
|
|
|
if (url_ret) return url_ret;
|
|
|
|
scheme = g_strjoinv ("|", prefix);
|
|
grist = g_strdup_printf (
|
|
"(" /* URL or HOST */
|
|
"("
|
|
SCHEME HOST OPT_PORT
|
|
"(" /* Optional "/path?query_string#fragment_id" */
|
|
"/" /* Must start with slash */
|
|
"("
|
|
"(" LPAR NOPARENS RPAR ")"
|
|
"|"
|
|
"(" NOPARENS ")"
|
|
")*" /* Zero or more occurrences of either of these */
|
|
"(?<![.,?!\\]])" /* Not allowed to end with these */
|
|
")?" /* Zero or one of this /path?query_string#fragment_id thing */
|
|
")|("
|
|
HOST OPT_PORT "/"
|
|
"(" /* Optional "path?query_string#fragment_id" */
|
|
"("
|
|
"(" LPAR NOPARENS RPAR ")"
|
|
"|"
|
|
"(" NOPARENS ")"
|
|
")*" /* Zero or more occurrences of either of these */
|
|
"(?<![.,?!\\]])" /* Not allowed to end with these */
|
|
")?" /* Zero or one of this /path?query_string#fragment_id thing */
|
|
")"
|
|
")"
|
|
, scheme
|
|
);
|
|
url_ret = make_re (grist, "re_url");
|
|
g_free (scheme);
|
|
return url_ret;
|
|
}
|
|
|
|
/* EMAIL description --- */
|
|
#define EMAIL "[a-z][-_a-z0-9]+@" "(" HOST ")"
|
|
|
|
static GRegex *
|
|
re_email (void)
|
|
{
|
|
static GRegex *email_ret;
|
|
char *grist;
|
|
|
|
if (email_ret) return email_ret;
|
|
|
|
grist = g_strdup_printf (
|
|
"(" /* EMAIL */
|
|
EMAIL
|
|
")"
|
|
);
|
|
email_ret = make_re (grist, "re_email");
|
|
return email_ret;
|
|
}
|
|
|
|
/* NICK description --- */
|
|
/* For NICKPRE see before url_check_word() */
|
|
#define NICKHYP "-"
|
|
#define NICKLET "a-z"
|
|
#define NICKDIG "0-9"
|
|
/* Note for NICKSPE: \\\\ boils down to a single \ */
|
|
#define NICKSPE "\\[\\]\\\\`_^{|}"
|
|
#define NICK0 "[" NICKPRE "]?[" NICKLET NICKSPE "]"
|
|
#define NICK1 "[" NICKHYP NICKLET NICKDIG NICKSPE "]*"
|
|
#define NICK NICK0 NICK1
|
|
|
|
static GRegex *
|
|
re_nick (void)
|
|
{
|
|
static GRegex *nick_ret;
|
|
char *grist;
|
|
|
|
if (nick_ret) return nick_ret;
|
|
|
|
grist = g_strdup_printf (
|
|
"(" /* NICK */
|
|
NICK
|
|
")"
|
|
);
|
|
nick_ret = make_re (grist, "re_nick");
|
|
return nick_ret;
|
|
}
|
|
|
|
/* CHANNEL description --- */
|
|
#define CHANNEL "#[^ \t\a,:]+"
|
|
|
|
static GRegex *
|
|
re_channel (void)
|
|
{
|
|
static GRegex *channel_ret;
|
|
char *grist;
|
|
|
|
if (channel_ret) return channel_ret;
|
|
|
|
grist = g_strdup_printf (
|
|
"(" /* CHANNEL */
|
|
CHANNEL
|
|
")"
|
|
);
|
|
channel_ret = make_re (grist, "re_channel");
|
|
return channel_ret;
|
|
}
|
|
|
|
/* PATH description --- */
|
|
#ifdef WIN32
|
|
/* Windows path can be .\ ..\ or C: D: etc */
|
|
#define PATH "^(\\.{1,2}\\\\|[a-z]:).*"
|
|
#else
|
|
/* Linux path can be / or ./ or ../ etc */
|
|
#define PATH "^(/|\\./|\\.\\./).*"
|
|
#endif
|
|
|
|
static GRegex *
|
|
re_path (void)
|
|
{
|
|
static GRegex *path_ret;
|
|
char *grist;
|
|
|
|
if (path_ret) return path_ret;
|
|
|
|
grist = g_strdup_printf (
|
|
"(" /* PATH */
|
|
PATH
|
|
")"
|
|
);
|
|
path_ret = make_re (grist, "re_path");
|
|
return path_ret;
|
|
}
|