hexchat/src/common/url.c

/* X-Chat
 * Copyright (C) 1998 Peter Zelezny.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "hexchat.h"
#include "hexchatc.h"
#include "cfgfiles.h"
#include "fe.h"
#include "tree.h"
#include "url.h"
#ifdef HAVE_STRINGS_H
#include <strings.h>
#endif

void *url_tree = NULL;
GTree *url_btree = NULL;
static int do_an_re (const char *word, int *start, int *end, int *type);
static GRegex *re_url (void);
static GRegex *re_host (void);
static GRegex *re_email (void);
static GRegex *re_nick (void);
static GRegex *re_channel (void);
static GRegex *re_path (void);


static int
url_free (char *url, void *data)
{
	free (url);
	return TRUE;
}

void
url_clear (void)
{
	tree_foreach (url_tree, (tree_traverse_func *)url_free, NULL);
	tree_destroy (url_tree);
	url_tree = NULL;
	g_tree_destroy (url_btree);
	url_btree = NULL;
}

static int
url_save_cb (char *url, FILE *fd)
{
	fprintf (fd, "%s\n", url);
	return TRUE;
}

void
url_save_tree (const char *fname, const char *mode, gboolean fullpath)
{
	FILE *fd;

	if (fullpath)
		fd = hexchat_fopen_file (fname, mode, XOF_FULLPATH);
	else
		fd = hexchat_fopen_file (fname, mode, 0);
	if (fd == NULL)
		return;

	tree_foreach (url_tree, (tree_traverse_func *)url_save_cb, fd);
	fclose (fd);
}

static void
url_save_node (char* url)
{
	FILE *fd;

	/* open <config>/url.log in append mode */
	fd = hexchat_fopen_file ("url.log", "a", 0);
	if (fd == NULL)
	{
		return;
	}

	fprintf (fd, "%s\n", url);
	fclose (fd);	
}

static int
url_find (char *urltext)
{
	return (g_tree_lookup_extended (url_btree, urltext, NULL, NULL));
}

static void
url_add (char *urltext, int len)
{
	char *data;
	int size;

	/* we don't need any URLs if we have neither URL grabbing nor URL logging enabled */
	if (!prefs.hex_url_grabber && !prefs.hex_url_logging)
	{
		return;
	}

	data = malloc (len + 1);
	if (!data)
	{
		return;
	}
	memcpy (data, urltext, len);
	data[len] = 0;

	if (data[len - 1] == '.')	/* chop trailing dot */
	{
		len--;
		data[len] = 0;
	}
	/* chop trailing ) but only if there's no counterpart */
	if (data[len - 1] == ')' && strchr (data, '(') == NULL)
	{
		data[len - 1] = 0;
	}

	if (prefs.hex_url_logging)
	{
		url_save_node (data);
	}

	/* the URL is saved already, only continue if we need the URL grabber too */
	if (!prefs.hex_url_grabber)
	{
		free (data);
		return;
	}

	if (!url_tree)
	{
		url_tree = tree_new ((tree_cmp_func *)strcasecmp, NULL);
		url_btree = g_tree_new ((GCompareFunc)strcasecmp);
	}

	if (url_find (data))
	{
		free (data);
		return;
	}

	size = tree_size (url_tree);
	/* 0 is unlimited */
	if (prefs.hex_url_grabber_limit > 0 && size >= prefs.hex_url_grabber_limit)
	{
		/* the loop is necessary to handle having the limit lowered while
		   HexChat is running */
		size -= prefs.hex_url_grabber_limit;
		for(; size > 0; size--)
		{
			char *pos;

			pos = tree_remove_at_pos (url_tree, 0);
			g_tree_remove (url_btree, pos);
			free (pos);
		}
	}

	tree_append (url_tree, data);
	g_tree_insert (url_btree, data, GINT_TO_POINTER (tree_size (url_tree) - 1));
	fe_url_add (data);
}

/* check if a word is clickable. This is called on mouse motion events, so
   keep it FAST! This new version was found to be almost 3x faster than
   2.4.4 release. */

static int laststart = 0;
static int lastend = 0;
static int lasttype = 0;

static int
strchrs (char c, char *s)
{
	while (*s)
		if (c == *s++)
			return TRUE;
	return FALSE;
}

#define NICKPRE "~+!@%%&"
int
url_check_word (const char *word)
{
	laststart = lastend = lasttype = 0;
	if (do_an_re (word, &laststart, &lastend, &lasttype))
	{
		switch (lasttype)
		{
			char *str;

			case WORD_NICK:
				if (strchrs (word[laststart], NICKPRE))
					laststart++;
				str = g_strndup (&word[laststart], lastend - laststart);
				if (!userlist_find (current_sess, str))
					lasttype = 0;
				g_free (str);
				return lasttype;
			case WORD_EMAIL:
				if (!isalnum (word[laststart]))
					laststart++;
				/* Fall through */
			case WORD_URL:
			case WORD_HOST:
			case WORD_CHANNEL:
			case WORD_PATH:
				return lasttype;
			default:
				return 0;	/* Should not occur */
		}
	}
	else
		return 0;
}

/* List of IRC commands for which contents (and thus possible URLs)
 * are visible to the user.  NOTE:  Trailing blank required in each. */
static char *commands[] = {
	"NOTICE ",
	"PRIVMSG ",
	"TOPIC ",
	"332 ",		/* RPL_TOPIC */
	"372 "		/* RPL_MOTD */
};

#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))

void
url_check_line (char *buf, int len)
{
	GRegex *re(void);
	GMatchInfo *gmi;
	char *po = buf;
	int i;

	/* Skip over message prefix */
	if (*po == ':')
	{
		po = strchr (po, ' ');
		if (!po)
			return;
		po++;
	}
	/* Allow only commands from the above list */
	for (i = 0; i < ARRAY_SIZE (commands); i++)
	{
		char *cmd = commands[i];
		int len = strlen (cmd);

		if (strncmp (cmd, po, len) == 0)
		{
			po += len;
			break;
		}
	}
	if (i == ARRAY_SIZE (commands))
		return;

	/* Skip past the channel name or user nick */
	po = strchr (po, ' ');
	if (!po)
		return;
	po++;

	g_regex_match(re_url(), po, 0, &gmi);
	while (g_match_info_matches(gmi))
	{
		int start, end;

		g_match_info_fetch_pos(gmi, 0, &start, &end);
		if (po[end - 1] == '\r')
			po[--end] = 0;
		if (g_strstr_len (po + start, end - start, "://"))
			url_add(po + start, end - start);
		g_match_info_next(gmi, NULL);
	}
	g_match_info_free(gmi);
}

int
url_last (int *lstart, int *lend)
{
	*lstart = laststart;
	*lend = lastend;
	return lasttype;
}

static int
do_an_re(const char *word,int *start, int *end, int *type)
{
	typedef struct func_s {
		GRegex *(*fn)(void);
		int type;
	} func_t;
	func_t funcs[] =
	{
		{ re_email, WORD_EMAIL },
		{ re_url, WORD_URL },
		{ re_host, WORD_HOST },
		{ re_channel, WORD_CHANNEL },
		{ re_path, WORD_PATH },
		{ re_nick, WORD_NICK }
	};

	GMatchInfo *gmi;
	int k;

	for (k = 0; k < sizeof funcs / sizeof (func_t); k++)
	{
		g_regex_match (funcs[k].fn(), word, 0, &gmi);
		if (!g_match_info_matches (gmi))
		{
			g_match_info_free (gmi);
			continue;
		}
		while (g_match_info_matches (gmi))
		{
			g_match_info_fetch_pos (gmi, 0, start, end);
			g_match_info_next (gmi, NULL);
		}
		g_match_info_free (gmi);
		*type = funcs[k].type;
		return TRUE;
	}

	return FALSE;
}

/*	Miscellaneous description --- */
#define DOMAIN "[a-z0-9][-a-z0-9]*(\\.[-a-z0-9]+)*\\."
#define TLD "[a-z][-a-z0-9]*[a-z]"
#define IPADDR "[0-9]+(\\.[0-9]+){3}"
#define HOST "(" DOMAIN TLD "|" IPADDR ")"
#define OPT_PORT "(:[1-9][0-9]{0,4})?"

GRegex *
make_re(char *grist, char *type)
{
	GRegex *ret;
	GError *err = NULL;

	ret = g_regex_new (grist, G_REGEX_CASELESS + G_REGEX_OPTIMIZE, 0, &err);
	g_free (grist);
	return ret;
}

/*	HOST description --- */
/* (see miscellaneous above) */
static GRegex *
re_host (void)
{
	static GRegex *host_ret;
	char *grist;
	grist = g_strdup_printf (
		"("	/* HOST */
			HOST OPT_PORT
		")"
	);
	host_ret = make_re (grist, "re_host");
	return host_ret;
}

/*	URL description --- */
#define SCHEME "(%s)"
#define LPAR "\\("
#define RPAR "\\)"
#define NOPARENS "[^() \t]*"

char *prefix[] = {
	"irc\\.",
	"ftp\\.",
	"www\\.",
	"irc://",
	"ircs://",
	"ftp://",
	"http://",
	"https://",
	"file://",
	"rtsp://",
	NULL
};

static GRegex *
re_url (void)
{
	static GRegex *url_ret;
	char *grist;
	char *scheme;

	if (url_ret) return url_ret;

	scheme = g_strjoinv ("|", prefix);
	grist = g_strdup_printf (
		"("	/* URL or HOST */
			"("
				SCHEME HOST OPT_PORT
				"("	/* Optional "/path?query_string#fragment_id" */
					"/"	/* Must start with slash */
					"("	
						"(" LPAR NOPARENS RPAR ")"
						"|"
						"(" NOPARENS ")"
					")*"	/* Zero or more occurrences of either of these */
					"(?<![.,?!\\]])"	/* Not allowed to end with these */
				")?"	/* Zero or one of this /path?query_string#fragment_id thing */
			")|("
				HOST OPT_PORT "/"
				"("	/* Optional "path?query_string#fragment_id" */
					"("
						"(" LPAR NOPARENS RPAR ")"
						"|"
						"(" NOPARENS ")"
					")*"	/* Zero or more occurrences of either of these */
					"(?<![.,?!\\]])"	/* Not allowed to end with these */
				")?"	/* Zero or one of this /path?query_string#fragment_id thing */
			")"
		")"
		, scheme
	);
	url_ret = make_re (grist, "re_url");
	g_free (scheme);
	return url_ret;
}

/*	EMAIL description --- */
#define EMAIL "[a-z][-_a-z0-9]+@" "(" HOST ")"

static GRegex *
re_email (void)
{
	static GRegex *email_ret;
	char *grist;

	if (email_ret) return email_ret;

	grist = g_strdup_printf (
		"("	/* EMAIL */
			EMAIL
		")"
	);
	email_ret = make_re (grist, "re_email");
	return email_ret;
}

/*	NICK description --- */
/* For NICKPRE see before url_check_word() */
#define NICKHYP	"-"
#define NICKLET "a-z"
#define NICKDIG "0-9"
/*	Note for NICKSPE:  \\\\ boils down to a single \ */
#define NICKSPE	"\\[\\]\\\\`_^{|}"
#define NICK0 "[" NICKPRE "]?[" NICKLET NICKSPE "]"
#define NICK1 "[" NICKHYP NICKLET NICKDIG NICKSPE "]*"
#define NICK	NICK0 NICK1

static GRegex *
re_nick (void)
{
	static GRegex *nick_ret;
	char *grist;

	if (nick_ret) return nick_ret;

	grist = g_strdup_printf (
		"("	/* NICK */
			NICK
		")"
	);
	nick_ret = make_re (grist, "re_nick");
	return nick_ret;
}

/*	CHANNEL description --- */
#define CHANNEL "#[^ \t\a,:]+"

static GRegex *
re_channel (void)
{
	static GRegex *channel_ret;
	char *grist;

	if (channel_ret) return channel_ret;

	grist = g_strdup_printf (
		"("	/* CHANNEL */
			CHANNEL
		")"
	);
	channel_ret = make_re (grist, "re_channel");
	return channel_ret;
}

/*	PATH description --- */
#ifdef WIN32
/* Windows path can be C: D: etc */
#define PATH "^([a-z]:).*"
#else
/* Linux path can be / or ./ or ../ etc */
#define PATH "^(/|\\./|\\.\\./).*"
#endif

static GRegex *
re_path (void)
{
	static GRegex *path_ret;
	char *grist;

	if (path_ret) return path_ret;

	grist = g_strdup_printf (
		"("	/* PATH */
			PATH
		")"
	);
	path_ret = make_re (grist, "re_path");
	return path_ret;
}
add xchat r1489 2011-02-23 22:14:30 -05:00			`/* X-Chat`
			`* Copyright (C) 1998 Peter Zelezny.`
			`*`
			`* This program is free software; you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License as published by`
			`* the Free Software Foundation; either version 2 of the License, or`
			`* (at your option) any later version.`
			`*`
			`* This program is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`* GNU General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU General Public License`
			`* along with this program; if not, write to the Free Software`
Fix FSF address to be Franklin Street 2013-01-02 17:58:26 -05:00			`* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA`
add xchat r1489 2011-02-23 22:14:30 -05:00			`*/`

			`#include <stdio.h>`
			`#include <stdlib.h>`
			`#include <string.h>`
			`#include <ctype.h>`
A lot more rebranding 2012-10-24 15:33:02 -04:00			`#include "hexchat.h"`
			`#include "hexchatc.h"`
add xchat r1489 2011-02-23 22:14:30 -05:00			`#include "cfgfiles.h"`
			`#include "fe.h"`
			`#include "tree.h"`
			`#include "url.h"`
			`#ifdef HAVE_STRINGS_H`
			`#include <strings.h>`
			`#endif`

			`void *url_tree = NULL;`
Fix memory leak related to url grabbing 2012-10-01 15:53:25 -04:00			`GTree *url_btree = NULL;`
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`static int do_an_re (const char word, int start, int end, int type);`
			`static GRegex *re_url (void);`
			`static GRegex *re_host (void);`
			`static GRegex *re_email (void);`
			`static GRegex *re_nick (void);`
			`static GRegex *re_channel (void);`
			`static GRegex *re_path (void);`
add xchat r1489 2011-02-23 22:14:30 -05:00

			`static int`
			`url_free (char url, void data)`
			`{`
			`free (url);`
			`return TRUE;`
			`}`

			`void`
			`url_clear (void)`
			`{`
			`tree_foreach (url_tree, (tree_traverse_func *)url_free, NULL);`
			`tree_destroy (url_tree);`
			`url_tree = NULL;`
Fix memory leak related to url grabbing 2012-10-01 15:53:25 -04:00			`g_tree_destroy (url_btree);`
			`url_btree = NULL;`
add xchat r1489 2011-02-23 22:14:30 -05:00			`}`

			`static int`
			`url_save_cb (char url, FILE fd)`
			`{`
			`fprintf (fd, "%s\n", url);`
			`return TRUE;`
			`}`

			`void`
Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 04:03:39 -04:00			`url_save_tree (const char fname, const char mode, gboolean fullpath)`
add xchat r1489 2011-02-23 22:14:30 -05:00			`{`
			`FILE *fd;`

			`if (fullpath)`
Nah, even more rebranding 2012-10-30 06:35:39 -04:00			`fd = hexchat_fopen_file (fname, mode, XOF_FULLPATH);`
add xchat r1489 2011-02-23 22:14:30 -05:00			`else`
Nah, even more rebranding 2012-10-30 06:35:39 -04:00			`fd = hexchat_fopen_file (fname, mode, 0);`
add xchat r1489 2011-02-23 22:14:30 -05:00			`if (fd == NULL)`
			`return;`

			`tree_foreach (url_tree, (tree_traverse_func *)url_save_cb, fd);`
			`fclose (fd);`
			`}`

Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 04:03:39 -04:00			`static void`
			`url_save_node (char* url)`
add xchat r1489 2011-02-23 22:14:30 -05:00			`{`
Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 04:03:39 -04:00			`FILE *fd;`

			`/* open <config>/url.log in append mode */`
Nah, even more rebranding 2012-10-30 06:35:39 -04:00			`fd = hexchat_fopen_file ("url.log", "a", 0);`
Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 04:03:39 -04:00			`if (fd == NULL)`
			`{`
			`return;`
			`}`

			`fprintf (fd, "%s\n", url);`
			`fclose (fd);`
add xchat r1489 2011-02-23 22:14:30 -05:00			`}`

			`static int`
			`url_find (char *urltext)`
			`{`
Fix memory leak related to url grabbing 2012-10-01 15:53:25 -04:00			`return (g_tree_lookup_extended (url_btree, urltext, NULL, NULL));`
add xchat r1489 2011-02-23 22:14:30 -05:00			`}`

			`static void`
			`url_add (char *urltext, int len)`
			`{`
update xchat to r1503 2012-03-15 18:58:52 -04:00			`char *data;`
			`int size;`

Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 04:03:39 -04:00			`/* we don't need any URLs if we have neither URL grabbing nor URL logging enabled */`
Variable cleanup for the rest 2012-10-22 09:55:43 -04:00			`if (!prefs.hex_url_grabber && !prefs.hex_url_logging)`
Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 04:03:39 -04:00			`{`
update xchat to r1503 2012-03-15 18:58:52 -04:00			`return;`
Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 04:03:39 -04:00			`}`
update xchat to r1503 2012-03-15 18:58:52 -04:00
			`data = malloc (len + 1);`
add xchat r1489 2011-02-23 22:14:30 -05:00			`if (!data)`
Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 04:03:39 -04:00			`{`
add xchat r1489 2011-02-23 22:14:30 -05:00			`return;`
Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 04:03:39 -04:00			`}`
add xchat r1489 2011-02-23 22:14:30 -05:00			`memcpy (data, urltext, len);`
			`data[len] = 0;`

			`if (data[len - 1] == '.') /* chop trailing dot */`
			`{`
			`len--;`
			`data[len] = 0;`
			`}`
Fix Wikipedia URL detection - URLs inside parentheses won't work 2012-10-07 07:27:44 -04:00			`/* chop trailing ) but only if there's no counterpart */`
			`if (data[len - 1] == ')' && strchr (data, '(') == NULL)`
Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 04:03:39 -04:00			`{`
add xchat r1489 2011-02-23 22:14:30 -05:00			`data[len - 1] = 0;`
Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 04:03:39 -04:00			`}`

Variable cleanup for the rest 2012-10-22 09:55:43 -04:00			`if (prefs.hex_url_logging)`
Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 04:03:39 -04:00			`{`
			`url_save_node (data);`
			`}`

			`/* the URL is saved already, only continue if we need the URL grabber too */`
Variable cleanup for the rest 2012-10-22 09:55:43 -04:00			`if (!prefs.hex_url_grabber)`
Save URLs to disk on-the-fly and provide an option for toggling it 2012-10-13 04:03:39 -04:00			`{`
			`free (data);`
			`return;`
			`}`
add xchat r1489 2011-02-23 22:14:30 -05:00
Fix memory leak related to url grabbing 2012-10-01 15:53:25 -04:00			`if (!url_tree)`
			`{`
			`url_tree = tree_new ((tree_cmp_func *)strcasecmp, NULL);`
			`url_btree = g_tree_new ((GCompareFunc)strcasecmp);`
			`}`

add xchat r1489 2011-02-23 22:14:30 -05:00			`if (url_find (data))`
			`{`
			`free (data);`
			`return;`
			`}`

update xchat to r1503 2012-03-15 18:58:52 -04:00			`size = tree_size (url_tree);`
			`/* 0 is unlimited */`
Variable cleanup for the rest 2012-10-22 09:55:43 -04:00			`if (prefs.hex_url_grabber_limit > 0 && size >= prefs.hex_url_grabber_limit)`
update xchat to r1503 2012-03-15 18:58:52 -04:00			`{`
			`/* the loop is necessary to handle having the limit lowered while`
Nah, even more rebranding 2012-10-30 06:35:39 -04:00			`HexChat is running */`
Variable cleanup for the rest 2012-10-22 09:55:43 -04:00			`size -= prefs.hex_url_grabber_limit;`
update xchat to r1503 2012-03-15 18:58:52 -04:00			`for(; size > 0; size--)`
Fix memory leak related to url grabbing 2012-10-01 15:53:25 -04:00			`{`
			`char *pos;`

			`pos = tree_remove_at_pos (url_tree, 0);`
			`g_tree_remove (url_btree, pos);`
			`free (pos);`
			`}`
update xchat to r1503 2012-03-15 18:58:52 -04:00			`}`

			`tree_append (url_tree, data);`
Fix memory leak related to url grabbing 2012-10-01 15:53:25 -04:00			`g_tree_insert (url_btree, data, GINT_TO_POINTER (tree_size (url_tree) - 1));`
add xchat r1489 2011-02-23 22:14:30 -05:00			`fe_url_add (data);`
			`}`

			`/* check if a word is clickable. This is called on mouse motion events, so`
			`keep it FAST! This new version was found to be almost 3x faster than`
			`2.4.4 release. */`

overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`static int laststart = 0;`
			`static int lastend = 0;`
			`static int lasttype = 0;`

Correct nick recognition. Closes 372. 2013-01-11 04:39:21 -05:00			`static int`
			`strchrs (char c, char *s)`
			`{`
			`while (*s)`
			`if (c == *s++)`
			`return TRUE;`
			`return FALSE;`
			`}`

			`#define NICKPRE "~+!@%%&"`
add xchat r1489 2011-02-23 22:14:30 -05:00			`int`
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`url_check_word (const char *word)`
add xchat r1489 2011-02-23 22:14:30 -05:00			`{`
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`laststart = lastend = lasttype = 0;`
			`if (do_an_re (word, &laststart, &lastend, &lasttype))`
add xchat r1489 2011-02-23 22:14:30 -05:00			`{`
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`switch (lasttype)`
add xchat r1489 2011-02-23 22:14:30 -05:00			`{`
Correct nick recognition. Closes 372. 2013-01-11 04:39:21 -05:00			`char *str;`

overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`case WORD_NICK:`
Correct nick recognition. Closes 372. 2013-01-11 04:39:21 -05:00			`if (strchrs (word[laststart], NICKPRE))`
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`laststart++;`
Oops, Windows doesn't offer strndup(). Use g_strndup(). 2013-01-11 19:12:52 -05:00			`str = g_strndup (&word[laststart], lastend - laststart);`
Correct nick recognition. Closes 372. 2013-01-11 04:39:21 -05:00			`if (!userlist_find (current_sess, str))`
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`lasttype = 0;`
Oops, Windows doesn't offer strndup(). Use g_strndup(). 2013-01-11 19:12:52 -05:00			`g_free (str);`
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`return lasttype;`
			`case WORD_EMAIL:`
			`if (!isalnum (word[laststart]))`
			`laststart++;`
			`/* Fall through */`
			`case WORD_URL:`
			`case WORD_HOST:`
			`case WORD_CHANNEL:`
I forgot to add case WORD_PATH to do_an_url()'s switch statement 2013-01-02 18:03:16 -05:00			`case WORD_PATH:`
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`return lasttype;`
			`default:`
			`return 0; /* Should not occur */`
add xchat r1489 2011-02-23 22:14:30 -05:00			`}`
			`}`
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`else`
			`return 0;`
add xchat r1489 2011-02-23 22:14:30 -05:00			`}`

Limit url-grabbing to NOTICE, PRIVMSG, TOPIC, 332 (RPL_TOPIC), 372 (RPL_MOTD) 2012-11-13 15:06:35 -05:00			`/* List of IRC commands for which contents (and thus possible URLs)`
			`* are visible to the user. NOTE: Trailing blank required in each. */`
			`static char *commands[] = {`
			`"NOTICE ",`
			`"PRIVMSG ",`
			`"TOPIC ",`
			`"332 ", /* RPL_TOPIC */`
			`"372 " /* RPL_MOTD */`
			`};`

			`#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))`

add xchat r1489 2011-02-23 22:14:30 -05:00			`void`
			`url_check_line (char *buf, int len)`
			`{`
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`GRegex *re(void);`
			`GMatchInfo *gmi;`
add xchat r1489 2011-02-23 22:14:30 -05:00			`char *po = buf;`
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`int i;`
Limit url-grabbing to NOTICE, PRIVMSG, TOPIC, 332 (RPL_TOPIC), 372 (RPL_MOTD) 2012-11-13 15:06:35 -05:00
			`/* Skip over message prefix */`
			`if (*po == ':')`
			`{`
			`po = strchr (po, ' ');`
			`if (!po)`
			`return;`
			`po++;`
			`}`
			`/* Allow only commands from the above list */`
			`for (i = 0; i < ARRAY_SIZE (commands); i++)`
			`{`
			`char *cmd = commands[i];`
			`int len = strlen (cmd);`

			`if (strncmp (cmd, po, len) == 0)`
			`{`
			`po += len;`
			`break;`
			`}`
			`}`
			`if (i == ARRAY_SIZE (commands))`
			`return;`

			`/* Skip past the channel name or user nick */`
			`po = strchr (po, ' ');`
			`if (!po)`
			`return;`
			`po++;`
add xchat r1489 2011-02-23 22:14:30 -05:00
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`g_regex_match(re_url(), po, 0, &gmi);`
			`while (g_match_info_matches(gmi))`
			`{`
			`int start, end;`

			`g_match_info_fetch_pos(gmi, 0, &start, &end);`
			`if (po[end - 1] == '\r')`
			`po[--end] = 0;`
			`if (g_strstr_len (po + start, end - start, "://"))`
			`url_add(po + start, end - start);`
			`g_match_info_next(gmi, NULL);`
			`}`
			`g_match_info_free(gmi);`
			`}`

			`int`
			`url_last (int lstart, int lend)`
			`{`
			`*lstart = laststart;`
			`*lend = lastend;`
			`return lasttype;`
			`}`

			`static int`
			`do_an_re(const char word,int start, int end, int type)`
			`{`
			`typedef struct func_s {`
			`GRegex (fn)(void);`
			`int type;`
			`} func_t;`
			`func_t funcs[] =`
			`{`
			`{ re_email, WORD_EMAIL },`
			`{ re_url, WORD_URL },`
			`{ re_host, WORD_HOST },`
			`{ re_channel, WORD_CHANNEL },`
			`{ re_path, WORD_PATH },`
			`{ re_nick, WORD_NICK }`
			`};`
add xchat r1489 2011-02-23 22:14:30 -05:00
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`GMatchInfo *gmi;`
			`int k;`
add xchat r1489 2011-02-23 22:14:30 -05:00
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`for (k = 0; k < sizeof funcs / sizeof (func_t); k++)`
add xchat r1489 2011-02-23 22:14:30 -05:00			`{`
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`g_regex_match (funcs[k].fn(), word, 0, &gmi);`
			`if (!g_match_info_matches (gmi))`
add xchat r1489 2011-02-23 22:14:30 -05:00			`{`
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`g_match_info_free (gmi);`
			`continue;`
			`}`
			`while (g_match_info_matches (gmi))`
			`{`
			`g_match_info_fetch_pos (gmi, 0, start, end);`
			`g_match_info_next (gmi, NULL);`
add xchat r1489 2011-02-23 22:14:30 -05:00			`}`
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`g_match_info_free (gmi);`
			`*type = funcs[k].type;`
			`return TRUE;`
add xchat r1489 2011-02-23 22:14:30 -05:00			`}`
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00
			`return FALSE;`
			`}`

			`/* Miscellaneous description --- */`
Oops. didn't get the DOMAIN slight change last time. Here it is now. 2013-01-15 16:26:11 -05:00			`#define DOMAIN "[a-z0-9][-a-z0-9](\\.[-a-z0-9]+)\\."`
Prev called anything a URL! Reverted but slight DOMAIN change. 2013-01-15 16:19:29 -05:00			`#define TLD "[a-z][-a-z0-9]*[a-z]"`
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`#define IPADDR "[0-9]+(\\.[0-9]+){3}"`
Prev called anything a URL! Reverted but slight DOMAIN change. 2013-01-15 16:19:29 -05:00			`#define HOST "(" DOMAIN TLD "\|" IPADDR ")"`
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`#define OPT_PORT "(:[1-9][0-9]{0,4})?"`

			`GRegex *`
			`make_re(char grist, char type)`
			`{`
			`GRegex *ret;`
			`GError *err = NULL;`

			`ret = g_regex_new (grist, G_REGEX_CASELESS + G_REGEX_OPTIMIZE, 0, &err);`
			`g_free (grist);`
			`return ret;`
			`}`

			`/* HOST description --- */`
			`/* (see miscellaneous above) */`
			`static GRegex *`
			`re_host (void)`
			`{`
			`static GRegex *host_ret;`
			`char *grist;`
			`grist = g_strdup_printf (`
			`"(" /* HOST */`
			`HOST OPT_PORT`
			`")"`
			`);`
			`host_ret = make_re (grist, "re_host");`
			`return host_ret;`
			`}`

			`/* URL description --- */`
			`#define SCHEME "(%s)"`
			`#define LPAR "\\("`
			`#define RPAR "\\)"`
			`#define NOPARENS "[^() \t]*"`

			`char *prefix[] = {`
			`"irc\\.",`
			`"ftp\\.",`
			`"www\\.",`
			`"irc://",`
Add "ircs://" to the list of URL schemes 2013-01-08 15:55:20 -05:00			`"ircs://",`
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`"ftp://",`
			`"http://",`
			`"https://",`
			`"file://",`
			`"rtsp://",`
			`NULL`
			`};`

			`static GRegex *`
			`re_url (void)`
			`{`
			`static GRegex *url_ret;`
			`char *grist;`
			`char *scheme;`

			`if (url_ret) return url_ret;`

			`scheme = g_strjoinv ("\|", prefix);`
			`grist = g_strdup_printf (`
			`"(" /* URL or HOST */`
Also allow URLs of of the form \.[a-z]+\/ 2013-01-03 02:47:58 -05:00			`"("`
			`SCHEME HOST OPT_PORT`
			`"(" /* Optional "/path?query_string#fragment_id" */`
			`"/" /* Must start with slash */`
			`"("`
			`"(" LPAR NOPARENS RPAR ")"`
			`"\|"`
			`"(" NOPARENS ")"`
			`")" / Zero or more occurrences of either of these */`
			`"(?<![.,?!\\]])" /* Not allowed to end with these */`
			`")?" /* Zero or one of this /path?query_string#fragment_id thing */`
			`")\|("`
			`HOST OPT_PORT "/"`
			`"(" /* Optional "path?query_string#fragment_id" */`
			`"("`
			`"(" LPAR NOPARENS RPAR ")"`
			`"\|"`
			`"(" NOPARENS ")"`
			`")" / Zero or more occurrences of either of these */`
			`"(?<![.,?!\\]])" /* Not allowed to end with these */`
			`")?" /* Zero or one of this /path?query_string#fragment_id thing */`
			`")"`
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`")"`
Also allow URLs of of the form \.[a-z]+\/ 2013-01-03 02:47:58 -05:00			`, scheme`
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`);`
			`url_ret = make_re (grist, "re_url");`
			`g_free (scheme);`
			`return url_ret;`
			`}`

			`/* EMAIL description --- */`
			`#define EMAIL "[a-z][-_a-z0-9]+@" "(" HOST ")"`

			`static GRegex *`
			`re_email (void)`
			`{`
			`static GRegex *email_ret;`
			`char *grist;`

			`if (email_ret) return email_ret;`

			`grist = g_strdup_printf (`
			`"(" /* EMAIL */`
			`EMAIL`
			`")"`
			`);`
			`email_ret = make_re (grist, "re_email");`
			`return email_ret;`
			`}`

			`/* NICK description --- */`
Correct nick recognition. Closes 372. 2013-01-11 04:39:21 -05:00			`/* For NICKPRE see before url_check_word() */`
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`#define NICKHYP "-"`
			`#define NICKLET "a-z"`
			`#define NICKDIG "0-9"`
			`/* Note for NICKSPE: \\\\ boils down to a single \ */`
			#define NICKSPE "\\[\\]\\\\`_^{\|}"
For NICK0 use NICKSPE, not NICKDIG 2013-01-03 17:13:20 -05:00			`#define NICK0 "[" NICKPRE "]?[" NICKLET NICKSPE "]"`
Ywes, nicks can be as short as one character long! 2013-01-03 17:24:04 -05:00			`#define NICK1 "[" NICKHYP NICKLET NICKDIG NICKSPE "]*"`
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`#define NICK NICK0 NICK1`

			`static GRegex *`
			`re_nick (void)`
			`{`
			`static GRegex *nick_ret;`
			`char *grist;`

			`if (nick_ret) return nick_ret;`

			`grist = g_strdup_printf (`
			`"(" /* NICK */`
			`NICK`
			`")"`
			`);`
			`nick_ret = make_re (grist, "re_nick");`
			`return nick_ret;`
			`}`

			`/* CHANNEL description --- */`
			`#define CHANNEL "#[^ \t\a,:]+"`

			`static GRegex *`
			`re_channel (void)`
			`{`
			`static GRegex *channel_ret;`
			`char *grist;`

			`if (channel_ret) return channel_ret;`

			`grist = g_strdup_printf (`
			`"(" /* CHANNEL */`
			`CHANNEL`
			`")"`
			`);`
			`channel_ret = make_re (grist, "re_channel");`
			`return channel_ret;`
			`}`

			`/* PATH description --- */`
			`#ifdef WIN32`
only detect full paths on windows. closes #302 2013-01-31 23:58:18 -05:00			`/* Windows path can be C: D: etc */`
			`#define PATH "^([a-z]:).*"`
overhauling of URL detection, including channel, nick, etc 'words' 2013-01-02 17:50:26 -05:00			`#else`
			`/* Linux path can be / or ./ or ../ etc */`
			`#define PATH "^(/\|\\./\|\\.\\./).*"`
			`#endif`

			`static GRegex *`
			`re_path (void)`
			`{`
			`static GRegex *path_ret;`
			`char *grist;`

			`if (path_ret) return path_ret;`

			`grist = g_strdup_printf (`
			`"(" /* PATH */`
			`PATH`
			`")"`
			`);`
			`path_ret = make_re (grist, "re_path");`
			`return path_ret;`
add xchat r1489 2011-02-23 22:14:30 -05:00			`}`