From 80bdd9ce114af6a6e42a20bef725bdc3dae52cf9 Mon Sep 17 00:00:00 2001 From: TingPing Date: Sun, 7 Sep 2014 19:39:30 -0400 Subject: [PATCH] Detect utf8 urls They might not be valid, but like many things they are still used --- src/common/url.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/url.c b/src/common/url.c index a3922345..1321374f 100644 --- a/src/common/url.c +++ b/src/common/url.c @@ -415,8 +415,8 @@ regex_match (const GRegex *re, const char *word, int *start, int *end) } /* Miscellaneous description --- */ -#define DOMAIN "[_a-z0-9][-_a-z0-9]*(\\.[-_a-z0-9]+)*" -#define TLD "\\.[a-z][-a-z0-9]*[a-z]" +#define DOMAIN "[_\\pL\\pN][-_\\pL\\pN]*(\\.[-_\\pL\\pN]+)*" +#define TLD "\\.[\\pL][-\\pL\\pN]*[\\pL]" #define IPADDR "[0-9]{1,3}(\\.[0-9]{1,3}){3}" #define IPV6GROUP "([0-9a-f]{0,4})" #define IPV6ADDR "((" IPV6GROUP "(:" IPV6GROUP "){7})" \