update to 3.2 version of full-text-rss, issue #694

2024-11-23 09:32:15 -05:00 · 2014-05-22 17:16:38 +03:00 · 2014-05-22 17:16:38 +03:00 · 3ec62cf95a
commit 3ec62cf95a
parent ab157bbb75
15 changed files with 4417 additions and 4174 deletions
--- a/inc/3rdparty/config.php
+++ b/inc/3rdparty/config.php
@ -19,7 +19,7 @@ if (!isset($options)) $options = new stdClass();
 // Enable service
 // ----------------------
 // Set this to false if you want to disable the service.
-// If set to false, no feed is produced and users will 
+// If set to false, no feed is produced and users will
 // be told that the service is disabled.
 $options->enabled = true;

@ -43,10 +43,64 @@ $options->default_entries = 5;
 // ----------------------
 // The maximum number of feed items to process when no access key is supplied.
 // This limits the user-supplied &max=x value. For example, if the user
-// asks for 20 items to be processed (&max=20), if max_entries is set to 
+// asks for 20 items to be processed (&max=20), if max_entries is set to
 // 10, only 10 will be processed.
 $options->max_entries = 10;

+// Full content
+// ----------------------
+// By default Full-Text RSS includes the extracted content in the output.
+// You can exclude this from the output by passing '&content=0' in the querystring.
+//
+// Possible values...
+// Always include: true
+// Never include: false
+// Include unless user overrides (&content=0): 'user' (default)
+//
+// Note: currently this does not disable full content extraction. It simply omits it
+// from the output.
+$options->content = 'user';
+
+// Excerpts
+// ----------------------
+// By default Full-Text RSS does not include excerpts in the output.
+// You can enable this by passing '&summary=1' in the querystring.
+// This will include a plain text excerpt from the extracted content.
+//
+// Possible values...
+// Always include: true (recommended for new users)
+// Never include: false
+// Don't include unless user overrides (&summary=1): 'user' (default)
+//
+// Important: if both content and excerpts are requested, the excerpt will be
+// placed in the description element and the full content inside content:encoded.
+// If excerpts are not requested, the full content will go inside the description element.
+//
+// Why are we not returning both excerpts and content by default?
+// Mainly for backward compatibility.
+// Excerpts should appear in the feed item's description element. Previous versions
+// of Full-Text RSS did not return excerpts, so the description element was always
+// used for the full content (as recommended by the RSS advisory). When returning both,
+// we need somewhere else to place the content (content:encoded).
+// Having both enabled should not create any problems for news readers, but it may create
+// problems for developers upgrading from one of our earlier versions who may now find
+// their applications are returning excerpts instead of the full content they were
+// expecting. To avoid such surprises for users who are upgrading Full-Text RSS,
+// excerpts must be explicitly requested in the querystring by default.
+//
+// Why not use a different element name for excerpts?
+// According to the RSS advisory:
+// "Publishers who employ summaries should store the summary in description and
+// the full content in content:encoded, ordering description first within the item.
+// On items with no summary, the full content should be stored in description."
+// See: http://www.rssboard.org/rss-profile#namespace-elements-content-encoded
+//
+// For more consistent element naming, we recommend new users set this option to true.
+// The full content can still be excluded via the querystring, but the element names
+// will not change: when $options->summary = true, the description element will always
+// be reserved for the excerpt and content:encoded always for full content.
+$options->summary = 'user';
+
 // Rewrite relative URLs
 // ----------------------
 // With this enabled relative URLs found in the extracted content
@ -67,7 +121,7 @@ $options->exclude_items_on_fail = 'user';
 // Enable multi-page support
 // -------------------------
 // If enabled, we will try to follow next page links on multi-page articles.
-// Currently this only happens for sites where next_page_link has been defined 
+// Currently this only happens for sites where next_page_link has been defined
 // in a site config file.
 $options->multipage = true;

@ -125,10 +179,10 @@ $options->detect_language = 1;

 // Registration key
 // ---------------
-// The registration key is optional. It is not required to use Full-Text RSS, 
-// and does not affect the normal operation of Full-Text RSS. It is currently 
-// only used on admin pages which help you update site patterns with the 
-// latest version offered by FiveFilters.org. For these admin-related 
+// The registration key is optional. It is not required to use Full-Text RSS,
+// and does not affect the normal operation of Full-Text RSS. It is currently
+// only used on admin pages which help you update site patterns with the
+// latest version offered by FiveFilters.org. For these admin-related
 // tasks to complete, we will require a valid registration key.
 // If you would like one, you can purchase the latest version of Full-Text RSS
 // at http://fivefilters.org/content-only/
@ -144,12 +198,12 @@ $options->registration_key = '';
 // ----------------------
 // Certain pages/actions, e.g. updating site patterns with our online tool, will require admin credentials.
 // To use these pages, enter a password here and you'll be prompted for it when you try to access those pages.
-// If no password or username is set, pages requiring admin privelages will be inaccessible. 
+// If no password or username is set, pages requiring admin privelages will be inaccessible.
 // The default username is 'admin'.
 // If overriding with an environment variable, separate username and password with a colon, e.g.:
 // ftr_admin_credentials: admin:my-secret-password
 // Example: $options->admin_credentials = array('username'=>'admin', 'password'=>'my-secret-password');
-$options->admin_credentials = array('username'=>'admin', 'password'=>'admin');
+$options->admin_credentials = array('username'=>'admin', 'password'=>'');

 // URLs to allow
 // ----------------------
@ -178,12 +232,12 @@ $options->key_required = false;
 // ----------------------
 // By default, when processing feeds, we assume item titles in the feed
 // have not been truncated. So after processing web pages, the extracted titles
-// are not used in the generated feed. If you prefer to have extracted titles in 
-// the feed you can either set this to false, in which case we will always favour 
-// extracted titles. Alternatively, if set to 'user' (default) we'll use the 
+// are not used in the generated feed. If you prefer to have extracted titles in
+// the feed you can either set this to false, in which case we will always favour
+// extracted titles. Alternatively, if set to 'user' (default) we'll use the
 // extracted title if you pass '&use_extracted_title' in the querystring.
 // Possible values:
-// * Favour feed titles: true 
+// * Favour feed titles: true
 // * Favour extracted titles: false
 // * Favour feed titles with user override: 'user' (default)
 // Note: this has no effect when the input URL is to a web page - in these cases
@ -192,17 +246,17 @@ $options->favour_feed_titles = 'user';

 // Access keys (password protected access)
 // ------------------------------------
-// NOTE: You do not need an API key from fivefilters.org to run your own 
+// NOTE: You do not need an API key from fivefilters.org to run your own
 // copy of the code. This is here if you'd like to restrict access to
 // _your_ copy.
 // Keys let you group users - those with a key and those without - and
 // restrict access to the service to those without a key.
 // If you want everyone to access the service in the same way, you can
 // leave the array below empty and ignore the access key options further down.
-// The options further down let you control how the service should behave 
+// The options further down let you control how the service should behave
 // in each mode.
-// Note: Explicitly including the index number (1 and 2 in the examples below) 
-// is highly recommended (when generating feeds, we encode the key and 
+// Note: Explicitly including the index number (1 and 2 in the examples below)
+// is highly recommended (when generating feeds, we encode the key and
 // refer to it by index number and hash).
 $options->api_keys = array();
 // Example:
@ -232,13 +286,13 @@ $options->max_entries_with_key = 10;
 // filter the resulting HTML for XSS attacks, making it redundant for
 // Full-Text RSS do the same. Similarly with frameworks/CMS which display
 // feed content - the content should be treated like any other user-submitted content.
-// 
+//
 // If you are writing an application yourself which is processing feeds generated by
 // Full-Text RSS, you can either filter the HTML yourself to remove potential XSS attacks
 // or enable this option. This might be useful if you are processing our generated
 // feeds with JavaScript on the client side - although there's client side xss
 // filtering available too, e.g. https://code.google.com/p/google-caja/wiki/JsHtmlSanitizer
-// 
+//
 // If enabled, we'll pass retrieved HTML content through htmLawed with
 // safe flag on and style attributes denied, see
 // http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/htmLawed_README.htm#s3.6
@ -253,8 +307,8 @@ $options->xss_filter = 'user';
 // Allowed parsers
 // ----------------------
 // Full-Text RSS attempts to use PHP's libxml extension to process HTML.
-// While fast, on some sites it may not always produce good results. 
-// For these sites, you can specify an alternative HTML parser: 
+// While fast, on some sites it may not always produce good results.
+// For these sites, you can specify an alternative HTML parser:
 // parser: html5lib
 // The html5lib parser is bundled with Full-Text RSS.
 // see http://code.google.com/p/html5lib/
@ -273,7 +327,7 @@ $options->cors = false;

 // Use APC user cache?
 // ----------------------
-// If enabled we will store site config files (when requested 
+// If enabled we will store site config files (when requested
 // for the first time) in APC's user cache. Keys prefixed with 'sc.'
 // This improves performance by reducing disk access.
 // Note: this has no effect if APC is unavailable on your server.
@ -346,7 +400,7 @@ $options->rewrite_url = array(
 // Valid actions:
 // * 'exclude' - exclude this item from the result
 // * 'link' - create HTML link to the item
-$options->content_type_exc = array( 
+$options->content_type_exc = array(
 							   'application/pdf' => array('action'=>'link', 'name'=>'PDF'),
 							   'image' => array('action'=>'link', 'name'=>'Image'),
 							   'audio' => array('action'=>'link', 'name'=>'Audio'),
@ -375,13 +429,13 @@ $options->cache_cleanup = 100;
 /// DO NOT CHANGE ANYTHING BELOW THIS ///////////
 /////////////////////////////////////////////////

-if (!defined('_FF_FTR_VERSION')) define('_FF_FTR_VERSION', '3.1');
+if (!defined('_FF_FTR_VERSION')) define('_FF_FTR_VERSION', '3.2');

 if (basename(__FILE__) == 'config.php') {
 	if (file_exists(dirname(__FILE__).'/custom_config.php')) {
 		require_once dirname(__FILE__).'/custom_config.php';
 	}
-	
+
 	// check for environment variables - often used on cloud platforms
 	// environment variables should be prefixed with 'ftr_', e.g.
 	// ftr_max_entries: 1
--- a/inc/3rdparty/libraries/content-extractor/ContentExtractor.php
+++ b/inc/3rdparty/libraries/content-extractor/ContentExtractor.php
--- a/inc/3rdparty/libraries/content-extractor/SiteConfig.php
+++ b/inc/3rdparty/libraries/content-extractor/SiteConfig.php
@ -1,338 +1,343 @@
-<?php
-/**
- * Site Config
- * 
- * Each instance of this class should hold extraction patterns and other directives
- * for a website. See ContentExtractor class to see how it's used.
- * 
- * @version 0.7
- * @date 2012-08-27
- * @author Keyvan Minoukadeh
- * @copyright 2012 Keyvan Minoukadeh
- * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
- */
-
-class SiteConfig
-{
-	// Use first matching element as title (0 or more xpath expressions)
-	public $title = array();
-	
-	// Use first matching element as body (0 or more xpath expressions)
-	public $body = array();
-	
-	// Use first matching element as author (0 or more xpath expressions)
-	public $author = array();
-	
-	// Use first matching element as date (0 or more xpath expressions)
-	public $date = array();
-	
-	// Strip elements matching these xpath expressions (0 or more)
-	public $strip = array();
-	
-	// Strip elements which contain these strings (0 or more) in the id or class attribute 
-	public $strip_id_or_class = array();
-	
-	// Strip images which contain these strings (0 or more) in the src attribute 
-	public $strip_image_src = array();
-	
-	// Additional HTTP headers to send
-	// NOT YET USED
-	public $http_header = array();
-	
-	// Process HTML with tidy before creating DOM (bool or null if undeclared)
-	public $tidy = null;
-	
-	protected $default_tidy = true; // used if undeclared
-	
-	// Autodetect title/body if xpath expressions fail to produce results.
-	// Note that this applies to title and body separately, ie. 
-	//   * if we get a body match but no title match, this option will determine whether we autodetect title 
-	//   * if neither match, this determines whether we autodetect title and body.
-	// Also note that this only applies when there is at least one xpath expression in title or body, ie.
-	//   * if title and body are both empty (no xpath expressions), this option has no effect (both title and body will be auto-detected)
-	//   * if there's an xpath expression for title and none for body, body will be auto-detected and this option will determine whether we auto-detect title if the xpath expression for it fails to produce results.
-	// Usage scenario: you want to extract something specific from a set of URLs, e.g. a table, and if the table is not found, you want to ignore the entry completely. Auto-detection is unlikely to succeed here, so you construct your patterns and set this option to false. Another scenario may be a site where auto-detection has proven to fail (or worse, picked up the wrong content).
-	// bool or null if undeclared
-	public $autodetect_on_failure = null;
-	protected $default_autodetect_on_failure = true; // used if undeclared
-	
-	// Clean up content block - attempt to remove elements that appear to be superfluous
-	// bool or null if undeclared
-	public $prune = null;
-	protected $default_prune = true; // used if undeclared
-	
-	// Test URL - if present, can be used to test the config above
-	public $test_url = array();
-	
-	// Single-page link - should identify a link element or URL pointing to the page holding the entire article
-	// This is useful for sites which split their articles across multiple pages. Links to such pages tend to 
-	// display the first page with links to the other pages at the bottom. Often there is also a link to a page
-	// which displays the entire article on one page (e.g. 'print view').
-	// This should be an XPath expression identifying the link to that page. If present and we find a match,
-	// we will retrieve that page and the rest of the options in this config will be applied to the new page.
-	public $single_page_link = array();
-	
-	public $next_page_link = array();
-	
-	// Single-page link in feed? - same as above, but patterns applied to item description HTML taken from feed
-	public $single_page_link_in_feed = array();
-	
-	// Which parser to use for turning raw HTML into a DOMDocument (either 'libxml' or 'html5lib')
-	// string or null if undeclared
-	public $parser = null;
-	protected $default_parser = 'libxml'; // used if undeclared
-	
-	// Strings to search for in HTML before processing begins (used with $replace_string)
-	public $find_string = array();
-	// Strings to replace those found in $find_string before HTML processing begins
-	public $replace_string = array();
-	
-	// the options below cannot be set in the config files which this class represents
-	
-	//public $cache_in_apc = false; // used to decide if we should cache in apc or not
-	public $cache_key = null;
-	public static $debug = false;
-	protected static $apc = false;
-	protected static $config_path;
-	protected static $config_path_fallback;
-	protected static $config_cache = array();
-	const HOSTNAME_REGEX = '/^(([a-zA-Z0-9-]*[a-zA-Z0-9])\.)*([A-Za-z0-9-]*[A-Za-z0-9])$/';
-	
-	protected static function debug($msg) {
-		if (self::$debug) {
-			//$mem = round(memory_get_usage()/1024, 2);
-			//$memPeak = round(memory_get_peak_usage()/1024, 2);
-			echo '* ',$msg;
-			//echo ' - mem used: ',$mem," (peak: $memPeak)\n";
-			echo "\n";
-			ob_flush();
-			flush();
-		}
-	}
-	
-	// enable APC caching of certain site config files?
-	// If enabled the following site config files will be 
-	// cached in APC cache (when requested for first time):
-	// * anything in site_config/custom/ and its corresponding file in site_config/standard/
-	// * the site config files associated with HTML fingerprints
-	// * the global site config file
-	// returns true if enabled, false otherwise
-	public static function use_apc($apc=true) {
-		if (!function_exists('apc_add')) {
-			if ($apc) self::debug('APC will not be used (function apc_add does not exist)');
-			return false;
-		}
-		self::$apc = $apc;
-		return $apc;
-	}
-	
-	// return bool or null
-	public function tidy($use_default=true) {
-		if ($use_default) return (isset($this->tidy)) ? $this->tidy : $this->default_tidy;
-		return $this->tidy;
-	}
-	
-	// return bool or null
-	public function prune($use_default=true) {
-		if ($use_default) return (isset($this->prune)) ? $this->prune : $this->default_prune;
-		return $this->prune;
-	}
-	
-	// return string or null
-	public function parser($use_default=true) {
-		if ($use_default) return (isset($this->parser)) ? $this->parser : $this->default_parser;
-		return $this->parser;
-	}
-
-	// return bool or null
-	public function autodetect_on_failure($use_default=true) {
-		if ($use_default) return (isset($this->autodetect_on_failure)) ? $this->autodetect_on_failure : $this->default_autodetect_on_failure;
-		return $this->autodetect_on_failure;
-	}
-	
-	public static function set_config_path($path, $fallback=null) {
-		self::$config_path = $path;
-		self::$config_path_fallback = $fallback;
-	}
-	
-	public static function add_to_cache($key, SiteConfig $config, $use_apc=true) {
-		$key = strtolower($key);
-		if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
-		if ($config->cache_key) $key = $config->cache_key;
-		self::$config_cache[$key] = $config;
-		if (self::$apc && $use_apc) {
-			self::debug("Adding site config to APC cache with key sc.$key");
-			apc_add("sc.$key", $config);
-		}
-		self::debug("Cached site config with key $key");
-	}
-	
-	public static function is_cached($key) {
-		$key = strtolower($key);
-		if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
-		if (array_key_exists($key, self::$config_cache)) {
-			return true;
-		} elseif (self::$apc && (bool)apc_fetch("sc.$key")) {
-			return true;
-		}
-		return false;
-	}
-	
-	public function append(SiteConfig $newconfig) {
-		// check for commands where we accept multiple statements (no test_url)
-		foreach (array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header', 'find_string', 'replace_string') as $var) {
-			// append array elements for this config variable from $newconfig to this config
-			//$this->$var = $this->$var + $newconfig->$var;
-			$this->$var = array_unique(array_merge($this->$var, $newconfig->$var));
-		}
-		// check for single statement commands
-		// we do not overwrite existing non null values
-		foreach (array('tidy', 'prune', 'parser', 'autodetect_on_failure') as $var) {
-			if ($this->$var === null) $this->$var = $newconfig->$var;
-		}
-	}
-	
-	// returns SiteConfig instance if an appropriate one is found, false otherwise
-	// if $exact_host_match is true, we will not look for wildcard config matches
-	// by default if host is 'test.example.org' we will look for and load '.example.org.txt' if it exists
-	public static function build($host, $exact_host_match=false) {
-		$host = strtolower($host);
-		if (substr($host, 0, 4) == 'www.') $host = substr($host, 4);
-		if (!$host || (strlen($host) > 200) || !preg_match(self::HOSTNAME_REGEX, ltrim($host, '.'))) return false;
-		// check for site configuration
-		$try = array($host);
-		// should we look for wildcard matches 
-		if (!$exact_host_match) {
-			$split = explode('.', $host);
-			if (count($split) > 1) {
-				array_shift($split);
-				$try[] = '.'.implode('.', $split);
-			}
-		}
-		
-		// look for site config file in primary folder
-		self::debug(". looking for site config for $host in primary folder");
-		foreach ($try as $h) {
-			if (array_key_exists($h, self::$config_cache)) {
-				self::debug("... site config for $h already loaded in this request");
-				return self::$config_cache[$h];
-			} elseif (self::$apc && ($sconfig = apc_fetch("sc.$h"))) {
-				self::debug("... site config for $h in APC cache");
-				return $sconfig;
-			} elseif (file_exists(self::$config_path."/$h.txt")) {
-				self::debug("... found site config ($h.txt)");
-				$file_primary = self::$config_path."/$h.txt";
-				$matched_name = $h;
-				break;
-			}
-		}
-		
-		// if we found site config, process it
-		if (isset($file_primary)) {
-			$config_lines = file($file_primary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
-			if (!$config_lines || !is_array($config_lines)) return false;
-			$config = self::build_from_array($config_lines);
-			// if APC caching is available and enabled, mark this for cache
-			//$config->cache_in_apc = true;
-			$config->cache_key = $matched_name;
-			
-			// if autodetec on failure is off (on by default) we do not need to look
-			// in secondary folder
-			if (!$config->autodetect_on_failure()) {
-				self::debug('... autodetect on failure is disabled (no other site config files will be loaded)');
-				return $config;
-			}
-		}
-		
-		// look for site config file in secondary folder
-		if (isset(self::$config_path_fallback)) {
-			self::debug(". looking for site config for $host in secondary folder");
-			foreach ($try as $h) {
-				if (file_exists(self::$config_path_fallback."/$h.txt")) {
-					self::debug("... found site config in secondary folder ($h.txt)");
-					$file_secondary = self::$config_path_fallback."/$h.txt";
-					$matched_name = $h;
-					break;
-				}
-			}
-			if (!isset($file_secondary)) {
-				self::debug("... no site config match in secondary folder");
-			}
-		}
-		
-		// return false if no config file found
-		if (!isset($file_primary) && !isset($file_secondary)) {
-			self::debug("... no site config match for $host");
-			return false;
-		}
-		
-		// return primary config if secondary not found
-		if (!isset($file_secondary) && isset($config)) {
-			return $config;
-		}
-		
-		// process secondary config file
-		$config_lines = file($file_secondary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
-		if (!$config_lines || !is_array($config_lines)) {
-			// failed to process secondary
-			if (isset($config)) {
-				// return primary config
-				return $config;
-			} else {
-				return false;
-			}
-		}
-		
-		// merge with primary and return
-		if (isset($config)) {
-			self::debug('. merging config files');
-			$config->append(self::build_from_array($config_lines));
-			return $config;
-		} else {
-			// return just secondary
-			$config = self::build_from_array($config_lines);
-			// if APC caching is available and enabled, mark this for cache
-			//$config->cache_in_apc = true;
-			$config->cache_key = $matched_name;
-			return $config;
-		}
-	}
-	
-	public static function build_from_array(array $lines) {
-		$config = new SiteConfig();
-		foreach ($lines as $line) {
-			$line = trim($line);
-			
-			// skip comments, empty lines
-			if ($line == '' || $line[0] == '#') continue;
-			
-			// get command
-			$command = explode(':', $line, 2);
-			// if there's no colon ':', skip this line
-			if (count($command) != 2) continue;
-			$val = trim($command[1]);
-			$command = trim($command[0]);
-			if ($command == '' || $val == '') continue;
-			
-			// check for commands where we accept multiple statements
-			if (in_array($command, array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header', 'test_url', 'find_string', 'replace_string'))) {
-				array_push($config->$command, $val);
-			// check for single statement commands that evaluate to true or false
-			} elseif (in_array($command, array('tidy', 'prune', 'autodetect_on_failure'))) {
-				$config->$command = ($val == 'yes');
-			// check for single statement commands stored as strings
-			} elseif (in_array($command, array('parser'))) {
-				$config->$command = $val;
-			// check for replace_string(find): replace
-			} elseif ((substr($command, -1) == ')') && preg_match('!^([a-z0-9_]+)\((.*?)\)$!i', $command, $match)) {
-				if (in_array($match[1], array('replace_string'))) {
-					$command = $match[1];
-					array_push($config->find_string, $match[2]);
-					array_push($config->$command, $val);
-				}
-			}
-		}
-		return $config;
-	}
-}
-?>
+<?php
+/**
+ * Site Config
+ * 
+ * Each instance of this class should hold extraction patterns and other directives
+ * for a website. See ContentExtractor class to see how it's used.
+ * 
+ * @version 0.8
+ * @date 2013-04-16
+ * @author Keyvan Minoukadeh
+ * @copyright 2013 Keyvan Minoukadeh
+ * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
+ */
+
+class SiteConfig
+{
+	// Use first matching element as title (0 or more xpath expressions)
+	public $title = array();
+	
+	// Use first matching element as body (0 or more xpath expressions)
+	public $body = array();
+	
+	// Use first matching element as author (0 or more xpath expressions)
+	public $author = array();
+	
+	// Use first matching element as date (0 or more xpath expressions)
+	public $date = array();
+	
+	// Strip elements matching these xpath expressions (0 or more)
+	public $strip = array();
+	
+	// Strip elements which contain these strings (0 or more) in the id or class attribute 
+	public $strip_id_or_class = array();
+	
+	// Strip images which contain these strings (0 or more) in the src attribute 
+	public $strip_image_src = array();
+	
+	// Additional HTTP headers to send
+	// NOT YET USED
+	public $http_header = array();
+	
+	// Process HTML with tidy before creating DOM (bool or null if undeclared)
+	public $tidy = null;
+	
+	protected $default_tidy = true; // used if undeclared
+	
+	// Autodetect title/body if xpath expressions fail to produce results.
+	// Note that this applies to title and body separately, ie. 
+	//   * if we get a body match but no title match, this option will determine whether we autodetect title 
+	//   * if neither match, this determines whether we autodetect title and body.
+	// Also note that this only applies when there is at least one xpath expression in title or body, ie.
+	//   * if title and body are both empty (no xpath expressions), this option has no effect (both title and body will be auto-detected)
+	//   * if there's an xpath expression for title and none for body, body will be auto-detected and this option will determine whether we auto-detect title if the xpath expression for it fails to produce results.
+	// Usage scenario: you want to extract something specific from a set of URLs, e.g. a table, and if the table is not found, you want to ignore the entry completely. Auto-detection is unlikely to succeed here, so you construct your patterns and set this option to false. Another scenario may be a site where auto-detection has proven to fail (or worse, picked up the wrong content).
+	// bool or null if undeclared
+	public $autodetect_on_failure = null;
+	protected $default_autodetect_on_failure = true; // used if undeclared
+	
+	// Clean up content block - attempt to remove elements that appear to be superfluous
+	// bool or null if undeclared
+	public $prune = null;
+	protected $default_prune = true; // used if undeclared
+	
+	// Test URL - if present, can be used to test the config above
+	public $test_url = array();
+	
+	// Single-page link - should identify a link element or URL pointing to the page holding the entire article
+	// This is useful for sites which split their articles across multiple pages. Links to such pages tend to 
+	// display the first page with links to the other pages at the bottom. Often there is also a link to a page
+	// which displays the entire article on one page (e.g. 'print view').
+	// This should be an XPath expression identifying the link to that page. If present and we find a match,
+	// we will retrieve that page and the rest of the options in this config will be applied to the new page.
+	public $single_page_link = array();
+	
+	public $next_page_link = array();
+	
+	// Single-page link in feed? - same as above, but patterns applied to item description HTML taken from feed
+	public $single_page_link_in_feed = array();
+	
+	// Which parser to use for turning raw HTML into a DOMDocument (either 'libxml' or 'html5lib')
+	// string or null if undeclared
+	public $parser = null;
+	protected $default_parser = 'libxml'; // used if undeclared
+	
+	// Strings to search for in HTML before processing begins (used with $replace_string)
+	public $find_string = array();
+	// Strings to replace those found in $find_string before HTML processing begins
+	public $replace_string = array();
+	
+	// the options below cannot be set in the config files which this class represents
+	
+	//public $cache_in_apc = false; // used to decide if we should cache in apc or not
+	public $cache_key = null;
+	public static $debug = false;
+	protected static $apc = false;
+	protected static $config_path;
+	protected static $config_path_fallback;
+	protected static $config_cache = array();
+	const HOSTNAME_REGEX = '/^(([a-zA-Z0-9-]*[a-zA-Z0-9])\.)*([A-Za-z0-9-]*[A-Za-z0-9])$/';
+	
+	protected static function debug($msg) {
+		if (self::$debug) {
+			//$mem = round(memory_get_usage()/1024, 2);
+			//$memPeak = round(memory_get_peak_usage()/1024, 2);
+			echo '* ',$msg;
+			//echo ' - mem used: ',$mem," (peak: $memPeak)\n";
+			echo "\n";
+			ob_flush();
+			flush();
+		}
+	}
+	
+	// enable APC caching of certain site config files?
+	// If enabled the following site config files will be 
+	// cached in APC cache (when requested for first time):
+	// * anything in site_config/custom/ and its corresponding file in site_config/standard/
+	// * the site config files associated with HTML fingerprints
+	// * the global site config file
+	// returns true if enabled, false otherwise
+	public static function use_apc($apc=true) {
+		if (!function_exists('apc_add')) {
+			if ($apc) self::debug('APC will not be used (function apc_add does not exist)');
+			return false;
+		}
+		self::$apc = $apc;
+		return $apc;
+	}
+	
+	// return bool or null
+	public function tidy($use_default=true) {
+		if ($use_default) return (isset($this->tidy)) ? $this->tidy : $this->default_tidy;
+		return $this->tidy;
+	}
+	
+	// return bool or null
+	public function prune($use_default=true) {
+		if ($use_default) return (isset($this->prune)) ? $this->prune : $this->default_prune;
+		return $this->prune;
+	}
+	
+	// return string or null
+	public function parser($use_default=true) {
+		if ($use_default) return (isset($this->parser)) ? $this->parser : $this->default_parser;
+		return $this->parser;
+	}
+
+	// return bool or null
+	public function autodetect_on_failure($use_default=true) {
+		if ($use_default) return (isset($this->autodetect_on_failure)) ? $this->autodetect_on_failure : $this->default_autodetect_on_failure;
+		return $this->autodetect_on_failure;
+	}
+	
+	public static function set_config_path($path, $fallback=null) {
+		self::$config_path = $path;
+		self::$config_path_fallback = $fallback;
+	}
+	
+	public static function add_to_cache($key, SiteConfig $config, $use_apc=true) {
+		$key = strtolower($key);
+		if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
+		if ($config->cache_key) $key = $config->cache_key;
+		self::$config_cache[$key] = $config;
+		if (self::$apc && $use_apc) {
+			self::debug("Adding site config to APC cache with key sc.$key");
+			apc_add("sc.$key", $config);
+		}
+		self::debug("Cached site config with key $key");
+	}
+	
+	public static function is_cached($key) {
+		$key = strtolower($key);
+		if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
+		if (array_key_exists($key, self::$config_cache)) {
+			return true;
+		} elseif (self::$apc && (bool)apc_fetch("sc.$key")) {
+			return true;
+		}
+		return false;
+	}
+	
+	public function append(SiteConfig $newconfig) {
+		// check for commands where we accept multiple statements (no test_url)
+		foreach (array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header') as $var) {
+			// append array elements for this config variable from $newconfig to this config
+			//$this->$var = $this->$var + $newconfig->$var;
+			$this->$var = array_unique(array_merge($this->$var, $newconfig->$var));
+		}
+		// check for single statement commands
+		// we do not overwrite existing non null values
+		foreach (array('tidy', 'prune', 'parser', 'autodetect_on_failure') as $var) {
+			if ($this->$var === null) $this->$var = $newconfig->$var;
+		}
+		// treat find_string and replace_string separately (don't apply array_unique) (thanks fabrizio!)
+		foreach (array('find_string', 'replace_string') as $var) {
+			// append array elements for this config variable from $newconfig to this config
+			//$this->$var = $this->$var + $newconfig->$var;
+			$this->$var = array_merge($this->$var, $newconfig->$var);
+		}
+	}
+	
+	// returns SiteConfig instance if an appropriate one is found, false otherwise
+	// if $exact_host_match is true, we will not look for wildcard config matches
+	// by default if host is 'test.example.org' we will look for and load '.example.org.txt' if it exists
+	public static function build($host, $exact_host_match=false) {
+		$host = strtolower($host);
+		if (substr($host, 0, 4) == 'www.') $host = substr($host, 4);
+		if (!$host || (strlen($host) > 200) || !preg_match(self::HOSTNAME_REGEX, ltrim($host, '.'))) return false;
+		// check for site configuration
+		$try = array($host);
+		// should we look for wildcard matches 
+		if (!$exact_host_match) {
+			$split = explode('.', $host);
+			if (count($split) > 1) {
+				array_shift($split);
+				$try[] = '.'.implode('.', $split);
+			}
+		}
+		
+		// look for site config file in primary folder
+		self::debug(". looking for site config for $host in primary folder");
+		foreach ($try as $h) {
+			if (array_key_exists($h, self::$config_cache)) {
+				self::debug("... site config for $h already loaded in this request");
+				return self::$config_cache[$h];
+			} elseif (self::$apc && ($sconfig = apc_fetch("sc.$h"))) {
+				self::debug("... site config for $h in APC cache");
+				return $sconfig;
+			} elseif (file_exists(self::$config_path."/$h.txt")) {
+				self::debug("... found site config ($h.txt)");
+				$file_primary = self::$config_path."/$h.txt";
+				$matched_name = $h;
+				break;
+			}
+		}
+		
+		// if we found site config, process it
+		if (isset($file_primary)) {
+			$config_lines = file($file_primary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
+			if (!$config_lines || !is_array($config_lines)) return false;
+			$config = self::build_from_array($config_lines);
+			// if APC caching is available and enabled, mark this for cache
+			//$config->cache_in_apc = true;
+			$config->cache_key = $matched_name;
+			
+			// if autodetec on failure is off (on by default) we do not need to look
+			// in secondary folder
+			if (!$config->autodetect_on_failure()) {
+				self::debug('... autodetect on failure is disabled (no other site config files will be loaded)');
+				return $config;
+			}
+		}
+		
+		// look for site config file in secondary folder
+		if (isset(self::$config_path_fallback)) {
+			self::debug(". looking for site config for $host in secondary folder");
+			foreach ($try as $h) {
+				if (file_exists(self::$config_path_fallback."/$h.txt")) {
+					self::debug("... found site config in secondary folder ($h.txt)");
+					$file_secondary = self::$config_path_fallback."/$h.txt";
+					$matched_name = $h;
+					break;
+				}
+			}
+			if (!isset($file_secondary)) {
+				self::debug("... no site config match in secondary folder");
+			}
+		}
+		
+		// return false if no config file found
+		if (!isset($file_primary) && !isset($file_secondary)) {
+			self::debug("... no site config match for $host");
+			return false;
+		}
+		
+		// return primary config if secondary not found
+		if (!isset($file_secondary) && isset($config)) {
+			return $config;
+		}
+		
+		// process secondary config file
+		$config_lines = file($file_secondary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
+		if (!$config_lines || !is_array($config_lines)) {
+			// failed to process secondary
+			if (isset($config)) {
+				// return primary config
+				return $config;
+			} else {
+				return false;
+			}
+		}
+		
+		// merge with primary and return
+		if (isset($config)) {
+			self::debug('. merging config files');
+			$config->append(self::build_from_array($config_lines));
+			return $config;
+		} else {
+			// return just secondary
+			$config = self::build_from_array($config_lines);
+			// if APC caching is available and enabled, mark this for cache
+			//$config->cache_in_apc = true;
+			$config->cache_key = $matched_name;
+			return $config;
+		}
+	}
+	
+	public static function build_from_array(array $lines) {
+		$config = new SiteConfig();
+		foreach ($lines as $line) {
+			$line = trim($line);
+			
+			// skip comments, empty lines
+			if ($line == '' || $line[0] == '#') continue;
+			
+			// get command
+			$command = explode(':', $line, 2);
+			// if there's no colon ':', skip this line
+			if (count($command) != 2) continue;
+			$val = trim($command[1]);
+			$command = trim($command[0]);
+			if ($command == '' || $val == '') continue;
+			
+			// check for commands where we accept multiple statements
+			if (in_array($command, array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header', 'test_url', 'find_string', 'replace_string'))) {
+				array_push($config->$command, $val);
+			// check for single statement commands that evaluate to true or false
+			} elseif (in_array($command, array('tidy', 'prune', 'autodetect_on_failure'))) {
+				$config->$command = ($val == 'yes');
+			// check for single statement commands stored as strings
+			} elseif (in_array($command, array('parser'))) {
+				$config->$command = $val;
+			// check for replace_string(find): replace
+			} elseif ((substr($command, -1) == ')') && preg_match('!^([a-z0-9_]+)\((.*?)\)$!i', $command, $match)) {
+				if (in_array($match[1], array('replace_string'))) {
+					$command = $match[1];
+					array_push($config->find_string, $match[2]);
+					array_push($config->$command, $val);
+				}
+			}
+		}
+		return $config;
+	}
+}
--- a/inc/3rdparty/libraries/feedwriter/FeedItem.php
+++ b/inc/3rdparty/libraries/feedwriter/FeedItem.php
@ -1,7 +1,7 @@
 <?php
 /**
 * Univarsel Feed Writer
- * 
+ *
 * FeedItem class - Used as feed element in FeedWriter class
 *
 * @package         UnivarselFeedWriter
@ -12,20 +12,20 @@
 {
    private $elements = array();    //Collection of feed elements
    private $version;
-    
+
    /**
-    * Constructor 
-    * 
-    * @param    contant     (RSS1/RSS2/ATOM) RSS2 is default. 
-    */ 
+    * Constructor
+    *
+    * @param    contant     (RSS1/RSS2/ATOM) RSS2 is default.
+    */
    function __construct($version = RSS2)
-    {    
+    {
        $this->version = $version;
    }

    /**
    * Set element (overwrites existing elements with $elementName)
-    * 
+    *
    * @access   public
    * @param    srting  The tag name of an element
    * @param    srting  The content of tag
@ -38,11 +38,11 @@
            unset($this->elements[$elementName]);
        }
        $this->addElement($elementName, $content, $attributes);
-    }    
-    
+    }
+
    /**
    * Add an element to elements array
-    * 
+    *
    * @access   public
    * @param    srting  The tag name of an element
    * @param    srting  The content of tag
@ -61,11 +61,11 @@
        $this->elements[$elementName][$i]['content']    = $content;
        $this->elements[$elementName][$i]['attributes'] = $attributes;
    }
-    
+
    /**
-    * Set multiple feed elements from an array. 
+    * Set multiple feed elements from an array.
    * Elements which have attributes cannot be added by this method
-    * 
+    *
    * @access   public
    * @param    array   array of elements in 'tagName' => 'tagContent' format.
    * @return   void
@ -73,15 +73,15 @@
    public function addElementArray($elementArray)
    {
        if(! is_array($elementArray)) return;
-        foreach ($elementArray as $elementName => $content) 
+        foreach ($elementArray as $elementName => $content)
        {
            $this->addElement($elementName, $content);
        }
    }
-    
+
    /**
    * Return the collection of elements in this feed item
-    * 
+    *
    * @access   public
    * @return   array
    */
@ -89,68 +89,74 @@
    {
        return $this->elements;
    }
-    
+
    // Wrapper functions ------------------------------------------------------
-    
+
    /**
    * Set the 'dscription' element of feed item
-    * 
+    *
    * @access   public
    * @param    string  The content of 'description' element
    * @return   void
    */
-    public function setDescription($description) 
+    public function setDescription($description)
    {
-        $this->setElement('description', $description);
+        $tag = ($this->version == ATOM)? 'summary' : 'description';
+        $this->setElement($tag, $description);
    }
-    
+
    /**
    * @desc     Set the 'title' element of feed item
    * @access   public
    * @param    string  The content of 'title' element
    * @return   void
    */
-    public function setTitle($title) 
+    public function setTitle($title)
    {
-        $this->setElement('title', $title);      
+        $this->setElement('title', $title);
    }
-    
+
    /**
    * Set the 'date' element of feed item
-    * 
+    *
    * @access   public
    * @param    string  The content of 'date' element
    * @return   void
    */
-    public function setDate($date) 
+    public function setDate($date)
    {
        if(! is_numeric($date))
        {
            $date = strtotime($date);
        }
-      
-        if($this->version == RSS2) 
+
+        if($this->version == ATOM)
        {
-            $tag    = 'pubDate';
-            $value  = date(DATE_RSS, $date);
+        	$tag    = 'updated';
+        	$value  = date(DATE_ATOM, $date);
        }
-        else                                
+        elseif($this->version == RSS2)
        {
-            $tag    = 'dc:date';
-            $value  = date("Y-m-d", $date);
+        	$tag    = 'pubDate';
+        	$value  = date(DATE_RSS, $date);
        }
-        
-        $this->setElement($tag, $value);    
+        else
+        {
+        	$tag    = 'dc:date';
+        	$value  = date("Y-m-d", $date);
+        }
+
+        $this->setElement($tag, $value);
    }
-    
+
    /**
    * Set the 'link' element of feed item
-    * 
+    *
    * @access   public
    * @param    string  The content of 'link' element
    * @return   void
    */
-    public function setLink($link) 
+    public function setLink($link)
    {
        if($this->version == RSS2 || $this->version == RSS1)
        {
@ -161,27 +167,27 @@
        {
            $this->setElement('link','',array('href'=>$link));
            $this->setElement('id', FeedWriter::uuid($link,'urn:uuid:'));
-        } 
-        
+        }
+
    }

    /**
    * Set the 'source' element of feed item
-    * 
+    *
    * @access   public
    * @param    string  The content of 'source' element
    * @return   void
    */
-    public function setSource($link) 
+    public function setSource($link)
    {
        $attributes = array('url'=>$link);
        $this->setElement('source', "wallabag",$attributes);
    }
-    
+
    /**
    * Set the 'encloser' element of feed item
    * For RSS 2.0 only
-    * 
+    *
    * @access   public
    * @param    string  The url attribute of encloser tag
    * @param    string  The length attribute of encloser tag
@ -193,6 +199,6 @@
        $attributes = array('url'=>$url, 'length'=>$length, 'type'=>$type);
        $this->setElement('enclosure','',$attributes);
    }
-    
+
 } // end of class FeedItem
 ?>
--- a/inc/3rdparty/libraries/feedwriter/FeedWriter.php
+++ b/inc/3rdparty/libraries/feedwriter/FeedWriter.php
@ -97,15 +97,12 @@ define('JSONP', 3, true);
              header('X-content-type-options: nosniff');
          } elseif ($this->version == JSON) {
              header('Content-type: application/json; charset=UTF-8');
+              $this->json = new stdClass();
          } elseif ($this->version == JSONP) {
              header('Content-type: application/javascript; charset=UTF-8');
+              $this->json = new stdClass();
          }
        }
-      
-        if ($this->version == JSON || $this->version == JSONP) {
-          $this->json = new stdClass();
-        }
-      

        $this->printHead();
        $this->printChannels();
@ -116,6 +113,11 @@ define('JSONP', 3, true);
        }
    }

+    public function &getItems()
+    {
+    	return $this->items;
+    }
+
    /**
    * Create a new FeedItem.
    *
@ -199,7 +201,8 @@ define('JSONP', 3, true);
    */
    public function setDescription($description)
    {
-        $this->setChannelElement('description', $description);
+        $tag = ($this->version == ATOM)? 'subtitle' : 'description';
+        $this->setChannelElement($tag, $desciption);
    }

    /**
@ -244,7 +247,7 @@ define('JSONP', 3, true);
        {
            $out  = '<?xml version="1.0" encoding="utf-8"?>'."\n";
            if ($this->xsl) $out .= '<?xml-stylesheet type="text/xsl" href="'.htmlspecialchars($this->xsl).'"?>' . PHP_EOL;
-            $out .= '<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/">' . PHP_EOL;
+            $out .= '<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/">' . PHP_EOL;
            echo $out;
        }
        elseif ($this->version == JSON || $this->version == JSONP)
--- a/inc/3rdparty/libraries/html5/TreeBuilder.php
+++ b/inc/3rdparty/libraries/html5/TreeBuilder.php
@ -134,6 +134,7 @@ class HTML5_TreeBuilder {

    // Namespaces for foreign content
    const NS_HTML   = null; // to prevent DOM from requiring NS on everything
+    const NS_XHTML  = 'http://www.w3.org/1999/xhtml';
    const NS_MATHML = 'http://www.w3.org/1998/Math/MathML';
    const NS_SVG    = 'http://www.w3.org/2000/svg';
    const NS_XLINK  = 'http://www.w3.org/1999/xlink';
@ -3157,11 +3158,19 @@ class HTML5_TreeBuilder {
        }

    private function insertElement($token, $append = true) {
-        $el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
+        //$el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
+        $namespaceURI = strpos($token['name'], ':') ? self::NS_XHTML : self::NS_HTML;
+        $el = $this->dom->createElementNS($namespaceURI, $token['name']);

        if (!empty($token['attr'])) {
            foreach($token['attr'] as $attr) {
-                if(!$el->hasAttribute($attr['name'])) {
+
+				// mike@macgirvin.com 2011-11-17, check attribute name for
+				// validity (ignoring extenders and combiners) as illegal chars in names
+				// causes everything to abort
+
+ 				$valid = preg_match('/^[a-zA-Z\_\:]([\-a-zA-Z0-9\_\:\.]+$)/',$attr['name']);
+                if($attr['name'] && (!$el->hasAttribute($attr['name'])) && ($valid)) {
                    $el->setAttribute($attr['name'], $attr['value']);
                }
            }
--- a/inc/3rdparty/libraries/humble-http-agent/CookieJar.php
+++ b/inc/3rdparty/libraries/humble-http-agent/CookieJar.php
@ -1,404 +1,403 @@
-<?php
-/**
- * Cookie Jar
- * 
- * PHP class for handling cookies, as defined by the Netscape spec: 
- * <http://curl.haxx.se/rfc/cookie_spec.html>
- *
- * This class should be used to handle cookies (storing cookies from HTTP response messages, and
- * sending out cookies in HTTP request messages). This has been adapted for FiveFilters.org 
- * from the original version used in HTTP Navigator. See http://www.keyvan.net/code/http-navigator/
- * 
- * This class is mainly based on Cookies.pm <http://search.cpan.org/author/GAAS/libwww-perl-5.65/
- * lib/HTTP/Cookies.pm> from the libwww-perl collection <http://www.linpro.no/lwp/>.
- * Unlike Cookies.pm, this class only supports the Netscape cookie spec, not RFC 2965.
- * 
- * @version 0.5
- * @date 2011-03-15
- * @see http://php.net/HttpRequestPool
- * @author Keyvan Minoukadeh
- * @copyright 2011 Keyvan Minoukadeh
- * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
- */
-
-class CookieJar
-{
-    /**
-    * Cookies - array containing all cookies.
-    *
-    * <pre>
-    * Cookies are stored like this:
-    *   [domain][path][name] = array
-    * where array is:
-    *   0 => value, 1 => secure, 2 => expires
-    * </pre>
-    * @var array
-    * @access private
-    */
-    public $cookies = array();
-	public $debug = false;
-
-    /**
-    * Constructor
-    */
-    function __construct() {
-    }
-
-	protected function debug($msg, $file=null, $line=null) {
-		if ($this->debug) {
-			$mem = round(memory_get_usage()/1024, 2);
-			$memPeak = round(memory_get_peak_usage()/1024, 2);
-			echo '* ',$msg;
-			if (isset($file, $line)) echo " ($file line $line)";
-			echo ' - mem used: ',$mem," (peak: $memPeak)\n";	
-			ob_flush();
-			flush();
-		}
-	}	
-	
-    /**
-    * Get matching cookies
-    *
-    * Only use this method if you cannot use add_cookie_header(), for example, if you want to use
-    * this cookie jar class without using the request class.
-    *
-    * @param array $param associative array containing 'domain', 'path', 'secure' keys
-    * @return string
-    * @see add_cookie_header()
-    */
-    public function getMatchingCookies($url)
-    {
-		if (($parts = @parse_url($url)) && isset($parts['scheme'], $parts['host'], $parts['path'])) {
-			$param['domain'] = $parts['host'];
-			$param['path'] = $parts['path'];
-			$param['secure'] = (strtolower($parts['scheme']) == 'https');
-			unset($parts);
-		} else {
-			return false;
-		}
-        // RFC 2965 notes:
-        //  If multiple cookies satisfy the criteria above, they are ordered in
-        //  the Cookie header such that those with more specific Path attributes
-        //  precede those with less specific.  Ordering with respect to other
-        //  attributes (e.g., Domain) is unspecified.
-        $domain = $param['domain'];
-        if (strpos($domain, '.') === false) $domain .= '.local';
-        $request_path = $param['path'];
-        if ($request_path == '') $request_path = '/';
-        $request_secure = $param['secure'];
-        $now = time();
-        $matched_cookies = array();
-        // domain - find matching domains
-        $this->debug('Finding matching domains for '.$domain, __FILE__, __LINE__);
-        while (strpos($domain, '.') !== false) {
-            if (isset($this->cookies[$domain])) {
-                $this->debug(' domain match found: '.$domain);
-                $cookies =& $this->cookies[$domain];
-            } else {
-                $domain = $this->_reduce_domain($domain);
-                continue;
-            }
-            // paths - find matching paths starting from most specific
-            $this->debug('  - Finding matching paths for '.$request_path);
-            $paths = array_keys($cookies);
-            usort($paths, array($this, '_cmp_length'));
-            foreach ($paths as $path) {
-                // continue to next cookie if request path does not path-match cookie path
-                if (!$this->_path_match($request_path, $path)) continue;
-                // loop through cookie names
-                $this->debug('     path match found: '.$path);
-                foreach ($cookies[$path] as $name => $values) {
-                    // if this cookie is secure but request isn't, continue to next cookie
-                    if ($values[1] && !$request_secure) continue;
-                    // if cookie is not a session cookie and has expired, continue to next cookie
-                    if (is_int($values[2]) && ($values[2] < $now)) continue;
-                    // cookie matches request
-                    $this->debug('      cookie match: '.$name.'='.$values[0]);
-                    $matched_cookies[] = $name.'='.$values[0];
-                }
-            }
-            $domain = $this->_reduce_domain($domain);
-        }
-        // return cookies
-        return implode('; ', $matched_cookies);
-    }
-
-    /**
-    * Parse Set-Cookie values.
-    *
-    * Only use this method if you cannot use extract_cookies(), for example, if you want to use
-    * this cookie jar class without using the response class.
-    *
-    * @param array $set_cookies array holding 1 or more "Set-Cookie" header values
-    * @param array $param associative array containing 'host', 'path' keys
-    * @return void
-    * @see extract_cookies()
-    */
-    public function storeCookies($url, $set_cookies)
-    {
-        if (count($set_cookies) == 0) return;
-		$param = @parse_url($url);
-		if (!is_array($param) || !isset($param['host'])) return;
-        $request_host = $param['host'];
-        if (strpos($request_host, '.') === false) $request_host .= '.local';
-        $request_path = @$param['path'];
-        if ($request_path == '') $request_path = '/';
-        //
-        // loop through set-cookie headers
-        //
-        foreach ($set_cookies as $set_cookie) {
-            $this->debug('Parsing: '.$set_cookie);
-            // temporary cookie store (before adding to jar)
-            $tmp_cookie = array();
-            $param = explode(';', $set_cookie);
-            // loop through params
-            for ($x=0; $x<count($param); $x++) {
-                $key_val = explode('=', $param[$x], 2);
-                if (count($key_val) != 2) {
-                    // if the first param isn't a name=value pair, continue to the next set-cookie
-                    // header
-                    if ($x == 0) continue 2;
-                    // check for secure flag
-                    if (strtolower(trim($key_val[0])) == 'secure') $tmp_cookie['secure'] = true;
-                    // continue to next param
-                    continue;
-                }
-                list($key, $val) = array_map('trim', $key_val);
-                // first name=value pair is the cookie name and value
-                // the name and value are stored under 'name' and 'value' to avoid conflicts
-                // with later parameters.
-                if ($x == 0) {
-                    $tmp_cookie = array('name'=>$key, 'value'=>$val);
-                    continue;
-                }
-                $key = strtolower($key);
-                if (in_array($key, array('expires', 'path', 'domain', 'secure'))) {
-                    $tmp_cookie[$key] = $val;
-                }
-            }
-            //
-            // set cookie
-            //
-            // check domain
-            if (isset($tmp_cookie['domain']) && ($tmp_cookie['domain'] != $request_host) &&
-                    ($tmp_cookie['domain'] != ".$request_host")) {
-                $domain = $tmp_cookie['domain'];
-                if ((strpos($domain, '.') === false) && ($domain != 'local')) {
-                    $this->debug(' - domain "'.$domain.'" has no dot and is not a local domain');
-                    continue;
-                }
-                if (preg_match('/\.[0-9]+$/', $domain)) {
-                    $this->debug(' - domain "'.$domain.'" appears to be an ip address');
-                    continue;
-                }
-                if (substr($domain, 0, 1) != '.') $domain = ".$domain";
-                if (!$this->_domain_match($request_host, $domain)) {
-                    $this->debug(' - request host "'.$request_host.'" does not domain-match "'.$domain.'"');
-                    continue;
-                }
-            } else {
-                // if domain is not specified in the set-cookie header, domain will default to
-                // the request host
-                $domain = $request_host;
-            }
-            // check path
-            if (isset($tmp_cookie['path']) && ($tmp_cookie['path'] != '')) {
-                $path = urldecode($tmp_cookie['path']);
-                if (!$this->_path_match($request_path, $path)) {
-                    $this->debug(' - request path "'.$request_path.'" does not path-match "'.$path.'"');
-                    continue;
-                }
-            } else {
-                $path = $request_path;
-                $path = substr($path, 0, strrpos($path, '/'));
-                if ($path == '') $path = '/';
-            }
-            // check if secure
-            $secure = (isset($tmp_cookie['secure'])) ? true : false;
-            // check expiry
-            if (isset($tmp_cookie['expires'])) {
-                if (($expires = strtotime($tmp_cookie['expires'])) < 0) {
-                    $expires = null;
-                }
-            } else {
-                $expires = null;
-            }
-            // set cookie
-            $this->set_cookie($domain, $path, $tmp_cookie['name'], $tmp_cookie['value'], $secure, $expires);
-        }
-    }
-	
-	// return array of set-cookie values extracted from HTTP response headers (string $h)
-	public function extractCookies($h) {
-        $x = 0;
-        $lines = 0;
-        $headers = array();
-        $last_match = false;
-		$h = explode("\n", $h);
-        foreach ($h as $line) {
-			$line = rtrim($line);
-            $lines++;
-
-            $trimmed_line = trim($line);
-            if (isset($line_last)) {
-                // check if we have \r\n\r\n (indicating the end of headers)
-                // some servers will not use CRLF (\r\n), so we make CR (\r) optional.
-                // if (preg_match('/\015?\012\015?\012/', $line_last.$line)) {
-                //     break;
-                // }
-                // As an alternative, we can check if the current trimmed line is empty
-                if ($trimmed_line == '') {
-                    break;
-                }
-
-                // check for continuation line...
-                // RFC 2616 Section 2.2 "Basic Rules":
-                // HTTP/1.1 header field values can be folded onto multiple lines if the
-                // continuation line begins with a space or horizontal tab. All linear
-                // white space, including folding, has the same semantics as SP. A
-                // recipient MAY replace any linear white space with a single SP before
-                // interpreting the field value or forwarding the message downstream.
-                if ($last_match && preg_match('/^\s+(.*)/', $line, $match)) {
-                    // append to previous header value
-                    $headers[$x-1] .= ' '.rtrim($match[1]);
-                    continue;
-                }
-            }
-            $line_last = $line;
-
-            // split header name and value
-            if (preg_match('/^Set-Cookie\s*:\s*(.*)/i', $line, $match)) {
-                $headers[$x++] = rtrim($match[1]);
-                $last_match = true;
-            } else {
-                $last_match = false;
-            }
-        }
-        return $headers;
-	}
-
-    /**
-    * Set Cookie
-    * @param string $domain
-    * @param string $path
-    * @param string $name cookie name
-    * @param string $value cookie value
-    * @param bool $secure
-    * @param int $expires expiry time (null if session cookie, <= 0 will delete cookie)
-    * @return void
-    */
-    function set_cookie($domain, $path, $name, $value, $secure=false, $expires=null)
-    {
-        if ($domain == '') return;
-        if ($path == '') return;
-        if ($name == '') return;
-        // check if cookie needs to go
-        if (isset($expires) && ($expires <= 0)) {
-            if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
-            return;
-        }
-        if ($value == '') return;
-        $this->cookies[$domain][$path][$name] = array($value, $secure, $expires);
-        return;
-    }
-
-    /**
-    * Clear cookies - [domain [,path [,name]]] - call method with no arguments to clear all cookies.
-    * @param string $domain
-    * @param string $path
-    * @param string $name
-    * @return void
-    */
-    function clear($domain=null, $path=null, $name=null)
-    {
-        if (!isset($domain)) {
-            $this->cookies = array();
-        } elseif (!isset($path)) {
-            if (isset($this->cookies[$domain])) unset($this->cookies[$domain]);
-        } elseif (!isset($name)) {
-            if (isset($this->cookies[$domain][$path])) unset($this->cookies[$domain][$path]);
-        } elseif (isset($name)) {
-            if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
-        }
-    }
-
-    /**
-    * Compare string length - used for sorting
-    * @access private
-    * @return int
-    */
-    function _cmp_length($a, $b)
-    {
-        $la = strlen($a); $lb = strlen($b);
-        if ($la == $lb) return 0;
-        return ($la > $lb) ? -1 : 1;
-    }
-
-    /**
-    * Reduce domain
-    * @param string $domain
-    * @return string
-    * @access private
-    */
-    function _reduce_domain($domain)
-    {
-        if ($domain == '') return '';
-        if (substr($domain, 0, 1) == '.') return substr($domain, 1);
-        return substr($domain, strpos($domain, '.'));
-    }
-
-    /**
-    * Path match - check if path1 path-matches path2
-    *
-    * From RFC 2965: 
-    *   <i>For two strings that represent paths, P1 and P2, P1 path-matches P2
-    *   if P2 is a prefix of P1 (including the case where P1 and P2 string-
-    *   compare equal).  Thus, the string /tec/waldo path-matches /tec.</i>
-    * @param string $path1
-    * @param string $path2
-    * @return bool
-    * @access private
-    */
-    function _path_match($path1, $path2)
-    {
-        return (substr($path1, 0, strlen($path2)) == $path2);
-    }
-
-    /**
-    * Domain match - check if domain1 domain-matches domain2
-    *
-    * A few extracts from RFC 2965: 
-    *  -  A Set-Cookie2 from request-host y.x.foo.com for Domain=.foo.com
-    *     would be rejected, because H is y.x and contains a dot.
-    *
-    *  -  A Set-Cookie2 from request-host x.foo.com for Domain=.foo.com
-    *     would be accepted.
-    *
-    *  -  A Set-Cookie2 with Domain=.com or Domain=.com., will always be
-    *     rejected, because there is no embedded dot.
-    *
-    *  -  A Set-Cookie2 from request-host example for Domain=.local will
-    *     be accepted, because the effective host name for the request-
-    *     host is example.local, and example.local domain-matches .local.
-    *
-    * I'm ignoring the first point for now (must check to see how other browsers handle
-    * this rule for Set-Cookie headers)
-    *
-    * @param string $domain1
-    * @param string $domain2
-    * @return bool
-    * @access private
-    */
-    function _domain_match($domain1, $domain2)
-    {
-        $domain1 = strtolower($domain1);
-        $domain2 = strtolower($domain2);
-        while (strpos($domain1, '.') !== false) {
-            if ($domain1 == $domain2) return true;
-            $domain1 = $this->_reduce_domain($domain1);
-            continue;
-        }
-        return false;
-    }
-}
-?>
+<?php
+/**
+ * Cookie Jar
+ * 
+ * PHP class for handling cookies, as defined by the Netscape spec: 
+ * <http://curl.haxx.se/rfc/cookie_spec.html>
+ *
+ * This class should be used to handle cookies (storing cookies from HTTP response messages, and
+ * sending out cookies in HTTP request messages). This has been adapted for FiveFilters.org 
+ * from the original version used in HTTP Navigator. See http://www.keyvan.net/code/http-navigator/
+ * 
+ * This class is mainly based on Cookies.pm <http://search.cpan.org/author/GAAS/libwww-perl-5.65/
+ * lib/HTTP/Cookies.pm> from the libwww-perl collection <http://www.linpro.no/lwp/>.
+ * Unlike Cookies.pm, this class only supports the Netscape cookie spec, not RFC 2965.
+ * 
+ * @version 0.5
+ * @date 2011-03-15
+ * @see http://php.net/HttpRequestPool
+ * @author Keyvan Minoukadeh
+ * @copyright 2011 Keyvan Minoukadeh
+ * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
+ */
+
+class CookieJar
+{
+    /**
+    * Cookies - array containing all cookies.
+    *
+    * <pre>
+    * Cookies are stored like this:
+    *   [domain][path][name] = array
+    * where array is:
+    *   0 => value, 1 => secure, 2 => expires
+    * </pre>
+    * @var array
+    * @access private
+    */
+    public $cookies = array();
+	public $debug = false;
+
+    /**
+    * Constructor
+    */
+    function __construct() {
+    }
+
+	protected function debug($msg, $file=null, $line=null) {
+		if ($this->debug) {
+			$mem = round(memory_get_usage()/1024, 2);
+			$memPeak = round(memory_get_peak_usage()/1024, 2);
+			echo '* ',$msg;
+			if (isset($file, $line)) echo " ($file line $line)";
+			echo ' - mem used: ',$mem," (peak: $memPeak)\n";	
+			ob_flush();
+			flush();
+		}
+	}	
+	
+    /**
+    * Get matching cookies
+    *
+    * Only use this method if you cannot use add_cookie_header(), for example, if you want to use
+    * this cookie jar class without using the request class.
+    *
+    * @param array $param associative array containing 'domain', 'path', 'secure' keys
+    * @return string
+    * @see add_cookie_header()
+    */
+    public function getMatchingCookies($url)
+    {
+		if (($parts = @parse_url($url)) && isset($parts['scheme'], $parts['host'], $parts['path'])) {
+			$param['domain'] = $parts['host'];
+			$param['path'] = $parts['path'];
+			$param['secure'] = (strtolower($parts['scheme']) == 'https');
+			unset($parts);
+		} else {
+			return false;
+		}
+        // RFC 2965 notes:
+        //  If multiple cookies satisfy the criteria above, they are ordered in
+        //  the Cookie header such that those with more specific Path attributes
+        //  precede those with less specific.  Ordering with respect to other
+        //  attributes (e.g., Domain) is unspecified.
+        $domain = $param['domain'];
+        if (strpos($domain, '.') === false) $domain .= '.local';
+        $request_path = $param['path'];
+        if ($request_path == '') $request_path = '/';
+        $request_secure = $param['secure'];
+        $now = time();
+        $matched_cookies = array();
+        // domain - find matching domains
+        $this->debug('Finding matching domains for '.$domain, __FILE__, __LINE__);
+        while (strpos($domain, '.') !== false) {
+            if (isset($this->cookies[$domain])) {
+                $this->debug(' domain match found: '.$domain);
+                $cookies =& $this->cookies[$domain];
+            } else {
+                $domain = $this->_reduce_domain($domain);
+                continue;
+            }
+            // paths - find matching paths starting from most specific
+            $this->debug('  - Finding matching paths for '.$request_path);
+            $paths = array_keys($cookies);
+            usort($paths, array($this, '_cmp_length'));
+            foreach ($paths as $path) {
+                // continue to next cookie if request path does not path-match cookie path
+                if (!$this->_path_match($request_path, $path)) continue;
+                // loop through cookie names
+                $this->debug('     path match found: '.$path);
+                foreach ($cookies[$path] as $name => $values) {
+                    // if this cookie is secure but request isn't, continue to next cookie
+                    if ($values[1] && !$request_secure) continue;
+                    // if cookie is not a session cookie and has expired, continue to next cookie
+                    if (is_int($values[2]) && ($values[2] < $now)) continue;
+                    // cookie matches request
+                    $this->debug('      cookie match: '.$name.'='.$values[0]);
+                    $matched_cookies[] = $name.'='.$values[0];
+                }
+            }
+            $domain = $this->_reduce_domain($domain);
+        }
+        // return cookies
+        return implode('; ', $matched_cookies);
+    }
+
+    /**
+    * Parse Set-Cookie values.
+    *
+    * Only use this method if you cannot use extract_cookies(), for example, if you want to use
+    * this cookie jar class without using the response class.
+    *
+    * @param array $set_cookies array holding 1 or more "Set-Cookie" header values
+    * @param array $param associative array containing 'host', 'path' keys
+    * @return void
+    * @see extract_cookies()
+    */
+    public function storeCookies($url, $set_cookies)
+    {
+        if (count($set_cookies) == 0) return;
+		$param = @parse_url($url);
+		if (!is_array($param) || !isset($param['host'])) return;
+        $request_host = $param['host'];
+        if (strpos($request_host, '.') === false) $request_host .= '.local';
+        $request_path = @$param['path'];
+        if ($request_path == '') $request_path = '/';
+        //
+        // loop through set-cookie headers
+        //
+        foreach ($set_cookies as $set_cookie) {
+            $this->debug('Parsing: '.$set_cookie);
+            // temporary cookie store (before adding to jar)
+            $tmp_cookie = array();
+            $param = explode(';', $set_cookie);
+            // loop through params
+            for ($x=0; $x<count($param); $x++) {
+                $key_val = explode('=', $param[$x], 2);
+                if (count($key_val) != 2) {
+                    // if the first param isn't a name=value pair, continue to the next set-cookie
+                    // header
+                    if ($x == 0) continue 2;
+                    // check for secure flag
+                    if (strtolower(trim($key_val[0])) == 'secure') $tmp_cookie['secure'] = true;
+                    // continue to next param
+                    continue;
+                }
+                list($key, $val) = array_map('trim', $key_val);
+                // first name=value pair is the cookie name and value
+                // the name and value are stored under 'name' and 'value' to avoid conflicts
+                // with later parameters.
+                if ($x == 0) {
+                    $tmp_cookie = array('name'=>$key, 'value'=>$val);
+                    continue;
+                }
+                $key = strtolower($key);
+                if (in_array($key, array('expires', 'path', 'domain', 'secure'))) {
+                    $tmp_cookie[$key] = $val;
+                }
+            }
+            //
+            // set cookie
+            //
+            // check domain
+            if (isset($tmp_cookie['domain']) && ($tmp_cookie['domain'] != $request_host) &&
+                    ($tmp_cookie['domain'] != ".$request_host")) {
+                $domain = $tmp_cookie['domain'];
+                if ((strpos($domain, '.') === false) && ($domain != 'local')) {
+                    $this->debug(' - domain "'.$domain.'" has no dot and is not a local domain');
+                    continue;
+                }
+                if (preg_match('/\.[0-9]+$/', $domain)) {
+                    $this->debug(' - domain "'.$domain.'" appears to be an ip address');
+                    continue;
+                }
+                if (substr($domain, 0, 1) != '.') $domain = ".$domain";
+                if (!$this->_domain_match($request_host, $domain)) {
+                    $this->debug(' - request host "'.$request_host.'" does not domain-match "'.$domain.'"');
+                    continue;
+                }
+            } else {
+                // if domain is not specified in the set-cookie header, domain will default to
+                // the request host
+                $domain = $request_host;
+            }
+            // check path
+            if (isset($tmp_cookie['path']) && ($tmp_cookie['path'] != '')) {
+                $path = urldecode($tmp_cookie['path']);
+                if (!$this->_path_match($request_path, $path)) {
+                    $this->debug(' - request path "'.$request_path.'" does not path-match "'.$path.'"');
+                    continue;
+                }
+            } else {
+                $path = $request_path;
+                $path = substr($path, 0, strrpos($path, '/'));
+                if ($path == '') $path = '/';
+            }
+            // check if secure
+            $secure = (isset($tmp_cookie['secure'])) ? true : false;
+            // check expiry
+            if (isset($tmp_cookie['expires'])) {
+                if (($expires = strtotime($tmp_cookie['expires'])) < 0) {
+                    $expires = null;
+                }
+            } else {
+                $expires = null;
+            }
+            // set cookie
+            $this->set_cookie($domain, $path, $tmp_cookie['name'], $tmp_cookie['value'], $secure, $expires);
+        }
+    }
+	
+	// return array of set-cookie values extracted from HTTP response headers (string $h)
+	public function extractCookies($h) {
+        $x = 0;
+        $lines = 0;
+        $headers = array();
+        $last_match = false;
+		$h = explode("\n", $h);
+        foreach ($h as $line) {
+			$line = rtrim($line);
+            $lines++;
+
+            $trimmed_line = trim($line);
+            if (isset($line_last)) {
+                // check if we have \r\n\r\n (indicating the end of headers)
+                // some servers will not use CRLF (\r\n), so we make CR (\r) optional.
+                // if (preg_match('/\015?\012\015?\012/', $line_last.$line)) {
+                //     break;
+                // }
+                // As an alternative, we can check if the current trimmed line is empty
+                if ($trimmed_line == '') {
+                    break;
+                }
+
+                // check for continuation line...
+                // RFC 2616 Section 2.2 "Basic Rules":
+                // HTTP/1.1 header field values can be folded onto multiple lines if the
+                // continuation line begins with a space or horizontal tab. All linear
+                // white space, including folding, has the same semantics as SP. A
+                // recipient MAY replace any linear white space with a single SP before
+                // interpreting the field value or forwarding the message downstream.
+                if ($last_match && preg_match('/^\s+(.*)/', $line, $match)) {
+                    // append to previous header value
+                    $headers[$x-1] .= ' '.rtrim($match[1]);
+                    continue;
+                }
+            }
+            $line_last = $line;
+
+            // split header name and value
+            if (preg_match('/^Set-Cookie\s*:\s*(.*)/i', $line, $match)) {
+                $headers[$x++] = rtrim($match[1]);
+                $last_match = true;
+            } else {
+                $last_match = false;
+            }
+        }
+        return $headers;
+	}
+
+    /**
+    * Set Cookie
+    * @param string $domain
+    * @param string $path
+    * @param string $name cookie name
+    * @param string $value cookie value
+    * @param bool $secure
+    * @param int $expires expiry time (null if session cookie, <= 0 will delete cookie)
+    * @return void
+    */
+    function set_cookie($domain, $path, $name, $value, $secure=false, $expires=null)
+    {
+        if ($domain == '') return;
+        if ($path == '') return;
+        if ($name == '') return;
+        // check if cookie needs to go
+        if (isset($expires) && ($expires <= 0)) {
+            if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
+            return;
+        }
+        if ($value == '') return;
+        $this->cookies[$domain][$path][$name] = array($value, $secure, $expires);
+        return;
+    }
+
+    /**
+    * Clear cookies - [domain [,path [,name]]] - call method with no arguments to clear all cookies.
+    * @param string $domain
+    * @param string $path
+    * @param string $name
+    * @return void
+    */
+    function clear($domain=null, $path=null, $name=null)
+    {
+        if (!isset($domain)) {
+            $this->cookies = array();
+        } elseif (!isset($path)) {
+            if (isset($this->cookies[$domain])) unset($this->cookies[$domain]);
+        } elseif (!isset($name)) {
+            if (isset($this->cookies[$domain][$path])) unset($this->cookies[$domain][$path]);
+        } elseif (isset($name)) {
+            if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
+        }
+    }
+
+    /**
+    * Compare string length - used for sorting
+    * @access private
+    * @return int
+    */
+    function _cmp_length($a, $b)
+    {
+        $la = strlen($a); $lb = strlen($b);
+        if ($la == $lb) return 0;
+        return ($la > $lb) ? -1 : 1;
+    }
+
+    /**
+    * Reduce domain
+    * @param string $domain
+    * @return string
+    * @access private
+    */
+    function _reduce_domain($domain)
+    {
+        if ($domain == '') return '';
+        if (substr($domain, 0, 1) == '.') return substr($domain, 1);
+        return substr($domain, strpos($domain, '.'));
+    }
+
+    /**
+    * Path match - check if path1 path-matches path2
+    *
+    * From RFC 2965: 
+    *   <i>For two strings that represent paths, P1 and P2, P1 path-matches P2
+    *   if P2 is a prefix of P1 (including the case where P1 and P2 string-
+    *   compare equal).  Thus, the string /tec/waldo path-matches /tec.</i>
+    * @param string $path1
+    * @param string $path2
+    * @return bool
+    * @access private
+    */
+    function _path_match($path1, $path2)
+    {
+        return (substr($path1, 0, strlen($path2)) == $path2);
+    }
+
+    /**
+    * Domain match - check if domain1 domain-matches domain2
+    *
+    * A few extracts from RFC 2965: 
+    *  -  A Set-Cookie2 from request-host y.x.foo.com for Domain=.foo.com
+    *     would be rejected, because H is y.x and contains a dot.
+    *
+    *  -  A Set-Cookie2 from request-host x.foo.com for Domain=.foo.com
+    *     would be accepted.
+    *
+    *  -  A Set-Cookie2 with Domain=.com or Domain=.com., will always be
+    *     rejected, because there is no embedded dot.
+    *
+    *  -  A Set-Cookie2 from request-host example for Domain=.local will
+    *     be accepted, because the effective host name for the request-
+    *     host is example.local, and example.local domain-matches .local.
+    *
+    * I'm ignoring the first point for now (must check to see how other browsers handle
+    * this rule for Set-Cookie headers)
+    *
+    * @param string $domain1
+    * @param string $domain2
+    * @return bool
+    * @access private
+    */
+    function _domain_match($domain1, $domain2)
+    {
+        $domain1 = strtolower($domain1);
+        $domain2 = strtolower($domain2);
+        while (strpos($domain1, '.') !== false) {
+            if ($domain1 == $domain2) return true;
+            $domain1 = $this->_reduce_domain($domain1);
+            continue;
+        }
+        return false;
+    }
+}
--- a/inc/3rdparty/libraries/humble-http-agent/HumbleHttpAgent.php
+++ b/inc/3rdparty/libraries/humble-http-agent/HumbleHttpAgent.php
--- a/inc/3rdparty/libraries/humble-http-agent/SimplePie_HumbleHttpAgent.php
+++ b/inc/3rdparty/libraries/humble-http-agent/SimplePie_HumbleHttpAgent.php
@ -1,79 +1,78 @@
-<?php
-/**
- * Humble HTTP Agent extension for SimplePie_File
- * 
- * This class is designed to extend and override SimplePie_File
- * in order to prevent duplicate HTTP requests being sent out.
- * The idea is to initialise an instance of Humble HTTP Agent
- * and attach it, to a static class variable, of this class.
- * SimplePie will then automatically initialise this class
- * 
- * @date 2011-02-28
- */
-
-class SimplePie_HumbleHttpAgent extends SimplePie_File
-{
-	protected static $agent;
-	var $url;
-	var $useragent;
-	var $success = true;
-	var $headers = array();
-	var $body;
-	var $status_code;
-	var $redirects = 0;
-	var $error;
-	var $method = SIMPLEPIE_FILE_SOURCE_NONE;
-
-	public static function set_agent(HumbleHttpAgent $agent) {
-		self::$agent = $agent;
-	}
-	
-	public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false) {
-		if (class_exists('idna_convert'))
-		{
-			$idn = new idna_convert();
-			$parsed = SimplePie_Misc::parse_url($url);
-			$url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']);
-		}
-		$this->url = $url;
-		$this->useragent = $useragent;
-		if (preg_match('/^http(s)?:\/\//i', $url))
-		{
-			if (!is_array($headers))
-			{
-				$headers = array();
-			}
-			$this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL;
-			$headers2 = array();
-			foreach ($headers as $key => $value) {
-				$headers2[] = "$key: $value";
-			}
-			//TODO: allow for HTTP headers
-			// curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2);
-
-			$response = self::$agent->get($url);
-			
-			if ($response === false || !isset($response['status_code'])) {
-				$this->error = 'failed to fetch URL';
-				$this->success = false;
-			} else {
-				// The extra lines at the end are there to satisfy SimplePie's HTTP parser.
-				// The class expects a full HTTP message, whereas we're giving it only
-				// headers - the new lines indicate the start of the body.
-				$parser = new SimplePie_HTTP_Parser($response['headers']."\r\n\r\n");
-				if ($parser->parse()) {
-					$this->headers = $parser->headers;
-					//$this->body = $parser->body;
-					$this->body = $response['body'];
-					$this->status_code = $parser->status_code;
-				}
-			}
-		}
-		else
-		{
-			$this->error = 'invalid URL';
-			$this->success = false;
-		}
-	}
-}
-?>
+<?php
+/**
+ * Humble HTTP Agent extension for SimplePie_File
+ * 
+ * This class is designed to extend and override SimplePie_File
+ * in order to prevent duplicate HTTP requests being sent out.
+ * The idea is to initialise an instance of Humble HTTP Agent
+ * and attach it, to a static class variable, of this class.
+ * SimplePie will then automatically initialise this class
+ * 
+ * @date 2011-02-28
+ */
+
+class SimplePie_HumbleHttpAgent extends SimplePie_File
+{
+	protected static $agent;
+	var $url;
+	var $useragent;
+	var $success = true;
+	var $headers = array();
+	var $body;
+	var $status_code;
+	var $redirects = 0;
+	var $error;
+	var $method = SIMPLEPIE_FILE_SOURCE_NONE;
+
+	public static function set_agent(HumbleHttpAgent $agent) {
+		self::$agent = $agent;
+	}
+	
+	public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false) {
+		if (class_exists('idna_convert'))
+		{
+			$idn = new idna_convert();
+			$parsed = SimplePie_Misc::parse_url($url);
+			$url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']);
+		}
+		$this->url = $url;
+		$this->useragent = $useragent;
+		if (preg_match('/^http(s)?:\/\//i', $url))
+		{
+			if (!is_array($headers))
+			{
+				$headers = array();
+			}
+			$this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL;
+			$headers2 = array();
+			foreach ($headers as $key => $value) {
+				$headers2[] = "$key: $value";
+			}
+			//TODO: allow for HTTP headers
+			// curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2);
+
+			$response = self::$agent->get($url);
+			
+			if ($response === false || !isset($response['status_code'])) {
+				$this->error = 'failed to fetch URL';
+				$this->success = false;
+			} else {
+				// The extra lines at the end are there to satisfy SimplePie's HTTP parser.
+				// The class expects a full HTTP message, whereas we're giving it only
+				// headers - the new lines indicate the start of the body.
+				$parser = new SimplePie_HTTP_Parser($response['headers']."\r\n\r\n");
+				if ($parser->parse()) {
+					$this->headers = $parser->headers;
+					//$this->body = $parser->body;
+					$this->body = $response['body'];
+					$this->status_code = $parser->status_code;
+				}
+			}
+		}
+		else
+		{
+			$this->error = 'invalid URL';
+			$this->success = false;
+		}
+	}
+}
--- a/inc/3rdparty/libraries/language-detect/LanguageDetect.php
+++ b/inc/3rdparty/libraries/language-detect/LanguageDetect.php
--- a/inc/3rdparty/libraries/readability/Readability.php
+++ b/inc/3rdparty/libraries/readability/Readability.php
--- a/inc/3rdparty/makefulltextfeed.php
+++ b/inc/3rdparty/makefulltextfeed.php
@ -3,8 +3,8 @@
 // Author: Keyvan Minoukadeh
 // Copyright (c) 2013 Keyvan Minoukadeh
 // License: AGPLv3
-// Version: 3.1
-// Date: 2013-03-05
+// Version: 3.2
+// Date: 2013-05-13
 // More info: http://fivefilters.org/content-only/
 // Help: http://help.fivefilters.org

@ -25,12 +25,8 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.

 // Usage
 // -----
-// Request this file passing it your feed in the querystring: makefulltextfeed.php?url=mysite.org
-// The following options can be passed in the querystring:
-// * URL: url=[feed or website url] (required, should be URL-encoded - in php: urlencode($url))
-// * URL points to HTML (not feed): html=true (optional, by default it's automatically detected)
-// * API key: key=[api key] (optional, refer to config.php)
-// * Max entries to process: max=[max number of items] (optional)
+// Request this file passing it a web page or feed URL in the querystring: makefulltextfeed.php?url=example.org/article
+// For more request parameters, see http://help.fivefilters.org/customer/portal/articles/226660-usage

 error_reporting(E_ALL ^ E_NOTICE);
 ini_set("display_errors", 1);
@ -76,8 +72,8 @@ header('X-Robots-Tag: noindex, nofollow');
 ////////////////////////////////
 // Check if service is enabled
 ////////////////////////////////
-if (!$options->enabled) { 
-	die('The full-text RSS service is currently disabled'); 
+if (!$options->enabled) {
+	die('The full-text RSS service is currently disabled');
 }

 ////////////////////////////////
@ -121,8 +117,8 @@ $options->smart_cache = $options->smart_cache && function_exists('apc_inc');
 ////////////////////////////////
 // Check for feed URL
 ////////////////////////////////
-if (!isset($_GET['url'])) { 
-	die('No URL supplied'); 
+if (!isset($_GET['url'])) {
+	die('No URL supplied');
 }
 $url = trim($_GET['url']);
 if (strtolower(substr($url, 0, 7)) == 'feed://') {
@ -161,10 +157,12 @@ if (isset($_GET['key']) && ($key_index = array_search($_GET['key'], $options->ap
 	if (isset($_GET['links'])) $redirect .= '&links='.urlencode($_GET['links']);
 	if (isset($_GET['exc'])) $redirect .= '&exc='.urlencode($_GET['exc']);
 	if (isset($_GET['format'])) $redirect .= '&format='.urlencode($_GET['format']);
-	if (isset($_GET['callback'])) $redirect .= '&callback='.urlencode($_GET['callback']);	
+	if (isset($_GET['callback'])) $redirect .= '&callback='.urlencode($_GET['callback']);
 	if (isset($_GET['l'])) $redirect .= '&l='.urlencode($_GET['l']);
 	if (isset($_GET['xss'])) $redirect .= '&xss';
 	if (isset($_GET['use_extracted_title'])) $redirect .= '&use_extracted_title';
+	if (isset($_GET['content'])) $redirect .= '&content='.urlencode($_GET['content']);
+	if (isset($_GET['summary'])) $redirect .= '&summary='.urlencode($_GET['summary']);
 	if (isset($_GET['debug'])) $redirect .= '&debug';
 	if ($debug_mode) {
 		debug('Redirecting to hide access key, follow URL below to continue');
@ -177,7 +175,7 @@ if (isset($_GET['key']) && ($key_index = array_search($_GET['key'], $options->ap

 ///////////////////////////////////////////////
 // Set timezone.
-// Prevents warnings, but needs more testing - 
+// Prevents warnings, but needs more testing -
 // perhaps if timezone is set in php.ini we
 // don't need to set it at all...
 ///////////////////////////////////////////////
@ -199,7 +197,7 @@ if (isset($_GET['key']) && isset($_GET['hash']) && isset($options->api_keys[(int
 }
 $key_index = ($valid_key) ? (int)$_GET['key'] : 0;
 if (!$valid_key && $options->key_required) {
-	die('A valid key must be supplied'); 
+	die('A valid key must be supplied');
 }
 if (!$valid_key && isset($_GET['key']) && $_GET['key'] != '') {
 	die('The entered key is invalid');
@ -250,6 +248,28 @@ if ($options->favour_feed_titles == 'user') {
 	$favour_feed_titles = $options->favour_feed_titles;
 }

+///////////////////////////////////////////////
+// Include full content in output?
+///////////////////////////////////////////////
+if ($options->content === 'user') {
+	if (isset($_GET['content']) && $_GET['content'] === '0') {
+		$options->content = false;
+	} else {
+		$options->content = true;
+	}
+}
+
+///////////////////////////////////////////////
+// Include summaries in output?
+///////////////////////////////////////////////
+if ($options->summary === 'user') {
+	if (isset($_GET['summary']) && $_GET['summary'] === '1') {
+		$options->summary = true;
+	} else {
+		$options->summary = false;
+	}
+}
+
 ///////////////////////////////////////////////
 // Exclude items if extraction fails
 ///////////////////////////////////////////////
@ -272,15 +292,6 @@ if ($options->detect_language === 'user') {
 	$detect_language = $options->detect_language;
 }

-if ($detect_language >= 2) {
-	$language_codes = array('albanian' => 'sq','arabic' => 'ar','azeri' => 'az','bengali' => 'bn','bulgarian' => 'bg',
-	'cebuano' => 'ceb', // ISO 639-2
-	'croatian' => 'hr','czech' => 'cs','danish' => 'da','dutch' => 'nl','english' => 'en','estonian' => 'et','farsi' => 'fa','finnish' => 'fi','french' => 'fr','german' => 'de','hausa' => 'ha',
-	'hawaiian' => 'haw', // ISO 639-2 
-	'hindi' => 'hi','hungarian' => 'hu','icelandic' => 'is','indonesian' => 'id','italian' => 'it','kazakh' => 'kk','kyrgyz' => 'ky','latin' => 'la','latvian' => 'lv','lithuanian' => 'lt','macedonian' => 'mk','mongolian' => 'mn','nepali' => 'ne','norwegian' => 'no','pashto' => 'ps',
-	'pidgin' => 'cpe', // ISO 639-2  
-	'polish' => 'pl','portuguese' => 'pt','romanian' => 'ro','russian' => 'ru','serbian' => 'sr','slovak' => 'sk','slovene' => 'sl','somali' => 'so','spanish' => 'es','swahili' => 'sw','swedish' => 'sv','tagalog' => 'tl','turkish' => 'tr','ukrainian' => 'uk','urdu' => 'ur','uzbek' => 'uz','vietnamese' => 'vi','welsh' => 'cy');
-}
 $use_cld = extension_loaded('cld') && (version_compare(PHP_VERSION, '5.3.0') >= 0);

 /////////////////////////////////////
@ -330,7 +341,7 @@ if ($options->cors) header('Access-Control-Allow-Origin: *');
 //////////////////////////////////
 if ($options->caching) {
 	debug('Caching is enabled...');
-	$cache_id = md5($max.$url.$valid_key.$links.$favour_feed_titles.$xss_filter.$exclude_on_fail.$format.$detect_language.(int)isset($_GET['pubsub']));
+	$cache_id = md5($max.$url.(int)$valid_key.$links.(int)$favour_feed_titles.(int)$options->content.(int)$options->summary.(int)$xss_filter.(int)$exclude_on_fail.$format.$detect_language.(int)isset($_GET['pubsub']));
 	$check_cache = true;
 	if ($options->apc && $options->smart_cache) {
 		apc_add("cache.$cache_id", 0, 10*60);
@ -468,7 +479,7 @@ if ($img_url = $feed->get_image_url()) {
 ////////////////////////////////////////////
 // Loop through feed items
 ////////////////////////////////////////////
-$items = $feed->get_items(0, $max);	
+$items = $feed->get_items(0, $max);
 // Request all feed items in parallel (if supported)
 $urls_sanitized = array();
 $urls = array();
@ -550,24 +561,43 @@ foreach ($items as $key => $item) {
 			$is_single_page = false;
 			if ($single_page_response = getSinglePage($item, $html, $effective_url)) {
 				$is_single_page = true;
-				$html = $single_page_response['body'];
-				// remove strange things
-				$html = str_replace('</[>', '', $html);	
-				$html = convert_to_utf8($html, $single_page_response['headers']);
 				$effective_url = $single_page_response['effective_url'];
-				debug("Retrieved single-page view from $effective_url");
+				// check if action defined for returned Content-Type
+				$mime_info = get_mime_action_info($single_page_response['headers']);
+				if (isset($mime_info['action'])) {
+					if ($mime_info['action'] == 'exclude') {
+						continue; // skip this feed item entry
+					} elseif ($mime_info['action'] == 'link') {
+						if ($mime_info['type'] == 'image') {
+							$html = "<a href=\"$effective_url\"><img src=\"$effective_url\" alt=\"{$mime_info['name']}\" /></a>";
+						} else {
+							$html = "<a href=\"$effective_url\">Download {$mime_info['name']}</a>";
+						}
+						$extracted_title = $mime_info['name'];
+						$do_content_extraction = false;
+					}
+				}
+				if ($do_content_extraction) {
+					$html = $single_page_response['body'];
+					// remove strange things
+					$html = str_replace('</[>', '', $html);
+					$html = convert_to_utf8($html, $single_page_response['headers']);
+					debug("Retrieved single-page view from $effective_url");
+				}
 				unset($single_page_response);
 			}
+		}
+		if ($do_content_extraction) {
 			debug('--------');
 			debug('Attempting to extract content');
 			$extract_result = $extractor->process($html, $effective_url);
 			$readability = $extractor->readability;
-			$content_block = ($extract_result) ? $extractor->getContent() : null;			
+			$content_block = ($extract_result) ? $extractor->getContent() : null;
 			$extracted_title = ($extract_result) ? $extractor->getTitle() : '';
 			// Deal with multi-page articles
 			//die('Next: '.$extractor->getNextPageUrl());
 			$is_multi_page = (!$is_single_page && $extract_result && $extractor->getNextPageUrl());
-			if ($options->multipage && $is_multi_page) {
+			if ($options->multipage && $is_multi_page && $options->content) {
 				debug('--------');
 				debug('Attempting to process multi-page article');
 				$multi_page_urls = array();
@ -580,7 +610,7 @@ foreach ($items as $key => $item) {
 						// check it's not what we have already!
 						if (!in_array($next_page_url, $multi_page_urls)) {
 							// it's not, so let's attempt to fetch it
-							$multi_page_urls[] = $next_page_url;						
+							$multi_page_urls[] = $next_page_url;
 							$_prev_ref = $http->referer;
 							if (($response = $http->get($next_page_url, true)) && $response['status_code'] < 300) {
 								// make sure mime type is not something with a different action associated
@ -605,13 +635,15 @@ foreach ($items as $key => $item) {
 				// did we successfully deal with this multi-page article?
 				if (empty($multi_page_content)) {
 					debug('Failed to extract all parts of multi-page article, so not going to include them');
-					$multi_page_content[] = $readability->dom->createElement('p')->innerHTML = '<em>This article appears to continue on subsequent pages which we could not extract</em>';
+					$_page = $readability->dom->createElement('p');
+					$_page->innerHTML = '<em>This article appears to continue on subsequent pages which we could not extract</em>';
+					$multi_page_content[] = $_page;
 				}
 				foreach ($multi_page_content as $_page) {
 					$_page = $content_block->ownerDocument->importNode($_page, true);
 					$content_block->appendChild($_page);
 				}
-				unset($multi_page_urls, $multi_page_content, $page_mime_info, $next_page_url);
+				unset($multi_page_urls, $multi_page_content, $page_mime_info, $next_page_url, $_page);
 			}
 		}
 		// use extracted title for both feed and item title if we're using single-item dummy feed
@ -658,7 +690,7 @@ foreach ($items as $key => $item) {
 			} else {
 				$html = $content_block->ownerDocument->saveXML($content_block); // essentially outerHTML
 			}
-			unset($content_block);
+			//unset($content_block);
 			// post-processing cleanup
 			$html = preg_replace('!<p>[\s\h\v]*</p>!u', '', $html);
 			if ($links == 'remove') {
@ -671,130 +703,155 @@ foreach ($items as $key => $item) {
 		}
 	}

-		if ($valid_key && isset($_GET['pubsub'])) { // used only on fivefilters.org at the moment
-			$newitem->addElement('guid', 'http://fivefilters.org/content-only/redirect.php?url='.urlencode($item->get_permalink()), array('isPermaLink'=>'false'));
+	if ($valid_key && isset($_GET['pubsub'])) { // used only on fivefilters.org at the moment
+		$newitem->addElement('guid', 'http://fivefilters.org/content-only/redirect.php?url='.urlencode($item->get_permalink()), array('isPermaLink'=>'false'));
+	} else {
+		$newitem->addElement('guid', $item->get_permalink(), array('isPermaLink'=>'true'));
+	}
+	// filter xss?
+	if ($xss_filter) {
+		debug('Filtering HTML to remove XSS');
+		$html = htmLawed::hl($html, array('safe'=>1, 'deny_attribute'=>'style', 'comment'=>1, 'cdata'=>1));
+	}
+
+	// add content
+	if ($options->summary === true) {
+		// get summary
+		$summary = '';
+		if (!$do_content_extraction) {
+			$summary = $html;
 		} else {
-			$newitem->addElement('guid', $item->get_permalink(), array('isPermaLink'=>'true'));
-		}
-		// filter xss?
-		if ($xss_filter) {
-			debug('Filtering HTML to remove XSS');
-			$html = htmLawed::hl($html, array('safe'=>1, 'deny_attribute'=>'style', 'comment'=>1, 'cdata'=>1));
-		}
-		$newitem->setDescription($html);
-		
-		// set date
-		if ((int)$item->get_date('U') > 0) {
-			$newitem->setDate((int)$item->get_date('U'));
-		} elseif ($extractor->getDate()) {
-			$newitem->setDate($extractor->getDate());
-		}
-		
-		// add authors
-		if ($authors = $item->get_authors()) {
-			foreach ($authors as $author) {
-				// for some feeds, SimplePie stores author's name as email, e.g. http://feeds.feedburner.com/nymag/intel
-				if ($author->get_name() !== null) {
-					$newitem->addElement('dc:creator', $author->get_name());
-				} elseif ($author->get_email() !== null) {
-					$newitem->addElement('dc:creator', $author->get_email());
+			// Try to get first few paragraphs
+			if (isset($content_block) && ($content_block instanceof DOMElement)) {
+				$_paras = $content_block->getElementsByTagName('p');
+				foreach ($_paras as $_para) {
+					$summary .= preg_replace("/[\n\r\t ]+/", ' ', $_para->textContent).' ';
+					if (strlen($summary) > 200) break;
 				}
-			}
-		} elseif ($authors = $extractor->getAuthors()) {
-			//TODO: make sure the list size is reasonable
-			foreach ($authors as $author) {
-				// TODO: xpath often selects authors from other articles linked from the page.
-				// for now choose first item
-				$newitem->addElement('dc:creator', $author);
-				break;
+			} else {
+				$summary = $html;
 			}
 		}
-		
-		// add language
-		if ($detect_language) {
-			$language = $extractor->getLanguage();
-			if (!$language) $language = $feed->get_language();
-			if (($detect_language == 3 || (!$language && $detect_language == 2)) && $text_sample) {
-				try {
-					if ($use_cld) {
-						// Use PHP-CLD extension
-						$php_cld = 'CLD\detect'; // in quotes to prevent PHP 5.2 parse error
-						$res = $php_cld($text_sample);
-						if (is_array($res) && count($res) > 0) {
-							$language = $res[0]['code'];
-						}	
-					} else {
-						//die('what');
-						// Use PEAR's Text_LanguageDetect
-						if (!isset($l))	{
-							$l = new Text_LanguageDetect('libraries/language-detect/lang.dat', 'libraries/language-detect/unicode_blocks.dat');
-						}
-						$l_result = $l->detect($text_sample, 1);
-						if (count($l_result) > 0) {
-							$language = $language_codes[key($l_result)];
-						}
+		unset($_paras, $_para);
+		$summary = get_excerpt($summary);
+		$newitem->setDescription($summary);
+		if ($options->content) $newitem->setElement('content:encoded', $html);
+	} else {
+		if ($options->content) $newitem->setDescription($html);
+	}
+
+	// set date
+	if ((int)$item->get_date('U') > 0) {
+		$newitem->setDate((int)$item->get_date('U'));
+	} elseif ($extractor->getDate()) {
+		$newitem->setDate($extractor->getDate());
+	}
+
+	// add authors
+	if ($authors = $item->get_authors()) {
+		foreach ($authors as $author) {
+			// for some feeds, SimplePie stores author's name as email, e.g. http://feeds.feedburner.com/nymag/intel
+			if ($author->get_name() !== null) {
+				$newitem->addElement('dc:creator', $author->get_name());
+			} elseif ($author->get_email() !== null) {
+				$newitem->addElement('dc:creator', $author->get_email());
+			}
+		}
+	} elseif ($authors = $extractor->getAuthors()) {
+		//TODO: make sure the list size is reasonable
+		foreach ($authors as $author) {
+			// TODO: xpath often selects authors from other articles linked from the page.
+			// for now choose first item
+			$newitem->addElement('dc:creator', $author);
+			break;
+		}
+	}
+
+	// add language
+	if ($detect_language) {
+		$language = $extractor->getLanguage();
+		if (!$language) $language = $feed->get_language();
+		if (($detect_language == 3 || (!$language && $detect_language == 2)) && $text_sample) {
+			try {
+				if ($use_cld) {
+					// Use PHP-CLD extension
+					$php_cld = 'CLD\detect'; // in quotes to prevent PHP 5.2 parse error
+					$res = $php_cld($text_sample);
+					if (is_array($res) && count($res) > 0) {
+						$language = $res[0]['code'];
 					}
-				} catch (Exception $e) {
-					//die('error: '.$e);	
-					// do nothing
-				}
-			}
-			if ($language && (strlen($language) < 7)) {	
-				$newitem->addElement('dc:language', $language);
-			}
-		}
-		
-		// add MIME type (if it appeared in our exclusions lists)
-		if (isset($mime_info['mime'])) $newitem->addElement('dc:format', $mime_info['mime']);
-		// add effective URL (URL after redirects)
-		if (isset($effective_url)) {
-			//TODO: ensure $effective_url is valid witout - sometimes it causes problems, e.g.
-			//http://www.siasat.pk/forum/showthread.php?108883-Pakistan-Chowk-by-Rana-Mubashir-<2D>-25th-March-2012-Special-Program-from-Liari-(Karachi)
-			//temporary measure: use utf8_encode()
-			$newitem->addElement('dc:identifier', remove_url_cruft(utf8_encode($effective_url)));
-		} else {
-			$newitem->addElement('dc:identifier', remove_url_cruft($item->get_permalink()));
-		}
-		
-		// add categories
-		if ($categories = $item->get_categories()) {
-			foreach ($categories as $category) {
-				if ($category->get_label() !== null) {
-					$newitem->addElement('category', $category->get_label());
-				}
-			}
-		}
-		
-		// check for enclosures
-		if ($options->keep_enclosures) {
-			if ($enclosures = $item->get_enclosures()) {
-				foreach ($enclosures as $enclosure) {
-					// thumbnails
-					foreach ((array)$enclosure->get_thumbnails() as $thumbnail) {
-						$newitem->addElement('media:thumbnail', '', array('url'=>$thumbnail));
+				} else {
+					//die('what');
+					// Use PEAR's Text_LanguageDetect
+					if (!isset($l))	{
+					  $l = new Text_LanguageDetect();
+					  $l->setNameMode(2); // return ISO 639-1 codes (e.g. "en")
+					}
+					$l_result = $l->detect($text_sample, 1);
+					if (count($l_result) > 0) {
+						$language = key($l_result);
 					}
-					if (!$enclosure->get_link()) continue;
-					$enc = array();
-					// Media RSS spec ($enc): http://search.yahoo.com/mrss
-					// SimplePie methods ($enclosure): http://simplepie.org/wiki/reference/start#methods4
-					$enc['url'] = $enclosure->get_link();
-					if ($enclosure->get_length()) $enc['fileSize'] = $enclosure->get_length();
-					if ($enclosure->get_type()) $enc['type'] = $enclosure->get_type();
-					if ($enclosure->get_medium()) $enc['medium'] = $enclosure->get_medium();
-					if ($enclosure->get_expression()) $enc['expression'] = $enclosure->get_expression();
-					if ($enclosure->get_bitrate()) $enc['bitrate'] = $enclosure->get_bitrate();
-					if ($enclosure->get_framerate()) $enc['framerate'] = $enclosure->get_framerate();
-					if ($enclosure->get_sampling_rate()) $enc['samplingrate'] = $enclosure->get_sampling_rate();
-					if ($enclosure->get_channels()) $enc['channels'] = $enclosure->get_channels();
-					if ($enclosure->get_duration()) $enc['duration'] = $enclosure->get_duration();
-					if ($enclosure->get_height()) $enc['height'] = $enclosure->get_height();
-					if ($enclosure->get_width()) $enc['width'] = $enclosure->get_width();
-					if ($enclosure->get_language()) $enc['lang'] = $enclosure->get_language();
-					$newitem->addElement('media:content', '', $enc);
 				}
+			} catch (Exception $e) {
+				//die('error: '.$e);
+				// do nothing
 			}
 		}
-	/* } */
+		if ($language && (strlen($language) < 7)) {
+			$newitem->addElement('dc:language', $language);
+		}
+	}
+
+	// add MIME type (if it appeared in our exclusions lists)
+	if (isset($mime_info['mime'])) $newitem->addElement('dc:format', $mime_info['mime']);
+	// add effective URL (URL after redirects)
+	if (isset($effective_url)) {
+		//TODO: ensure $effective_url is valid witout - sometimes it causes problems, e.g.
+		//http://www.siasat.pk/forum/showthread.php?108883-Pakistan-Chowk-by-Rana-Mubashir-<2D>-25th-March-2012-Special-Program-from-Liari-(Karachi)
+		//temporary measure: use utf8_encode()
+		$newitem->addElement('dc:identifier', remove_url_cruft(utf8_encode($effective_url)));
+	} else {
+		$newitem->addElement('dc:identifier', remove_url_cruft($item->get_permalink()));
+	}
+
+	// add categories
+	if ($categories = $item->get_categories()) {
+		foreach ($categories as $category) {
+			if ($category->get_label() !== null) {
+				$newitem->addElement('category', $category->get_label());
+			}
+		}
+	}
+
+	// check for enclosures
+	if ($options->keep_enclosures) {
+		if ($enclosures = $item->get_enclosures()) {
+			foreach ($enclosures as $enclosure) {
+				// thumbnails
+				foreach ((array)$enclosure->get_thumbnails() as $thumbnail) {
+					$newitem->addElement('media:thumbnail', '', array('url'=>$thumbnail));
+				}
+				if (!$enclosure->get_link()) continue;
+				$enc = array();
+				// Media RSS spec ($enc): http://search.yahoo.com/mrss
+				// SimplePie methods ($enclosure): http://simplepie.org/wiki/reference/start#methods4
+				$enc['url'] = $enclosure->get_link();
+				if ($enclosure->get_length()) $enc['fileSize'] = $enclosure->get_length();
+				if ($enclosure->get_type()) $enc['type'] = $enclosure->get_type();
+				if ($enclosure->get_medium()) $enc['medium'] = $enclosure->get_medium();
+				if ($enclosure->get_expression()) $enc['expression'] = $enclosure->get_expression();
+				if ($enclosure->get_bitrate()) $enc['bitrate'] = $enclosure->get_bitrate();
+				if ($enclosure->get_framerate()) $enc['framerate'] = $enclosure->get_framerate();
+				if ($enclosure->get_sampling_rate()) $enc['samplingrate'] = $enclosure->get_sampling_rate();
+				if ($enclosure->get_channels()) $enc['channels'] = $enclosure->get_channels();
+				if ($enclosure->get_duration()) $enc['duration'] = $enclosure->get_duration();
+				if ($enclosure->get_height()) $enc['height'] = $enclosure->get_height();
+				if ($enclosure->get_width()) $enc['width'] = $enclosure->get_width();
+				if ($enclosure->get_language()) $enc['lang'] = $enclosure->get_language();
+				$newitem->addElement('media:content', '', $enc);
+			}
+		}
+	}
 	$output->addItem($newitem);
 	unset($html);
 	$item_count++;
--- a/inc/3rdparty/makefulltextfeedHelpers.php
+++ b/inc/3rdparty/makefulltextfeedHelpers.php
@ -66,6 +66,38 @@ class DummySingleItem {
 // HELPER FUNCTIONS
 ///////////////////////////////

+// Adapted from WordPress
+// http://core.trac.wordpress.org/browser/tags/3.5.1/wp-includes/formatting.php#L2173
+function get_excerpt($text, $num_words=55, $more=null) {
+	if (null === $more) $more = '&hellip;';
+	$text = strip_tags($text);
+	//TODO: Check if word count is based on single characters (East Asian characters)
+	/*
+	if (1==2) {
+  	$text = trim(preg_replace("/[\n\r\t ]+/", ' ', $text), ' ');
+  	preg_match_all('/./u', $text, $words_array);
+  	$words_array = array_slice($words_array[0], 0, $num_words + 1);
+  	$sep = '';
+	} else {
+  	$words_array = preg_split("/[\n\r\t ]+/", $text, $num_words + 1, PREG_SPLIT_NO_EMPTY);
+  	$sep = ' ';
+	}
+	*/
+	$words_array = preg_split("/[\n\r\t ]+/", $text, $num_words + 1, PREG_SPLIT_NO_EMPTY);
+	$sep = ' ';
+	if (count($words_array) > $num_words) {
+		array_pop($words_array);
+		$text = implode($sep, $words_array);
+		$text = $text.$more;
+	} else {
+		$text = implode($sep, $words_array);
+	}
+	// trim whitespace at beginning or end of string
+	// See: http://stackoverflow.com/questions/4166896/trim-unicode-whitespace-in-php-5-2
+	$text = preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $text);
+	return $text;
+}
+
 function url_allowed($url) {
 	global $options;
 	if (!empty($options->allowed_urls)) {
@ -165,14 +197,6 @@ function convert_to_utf8($html, $header=null)
 			if (strtolower($encoding) != 'utf-8') {
 				debug('Converting to UTF-8');
 				$html = SimplePie_Misc::change_encoding($html, $encoding, 'utf-8');
-				/*
-				if (function_exists('iconv')) {
-					// iconv appears to handle certain character encodings better than mb_convert_encoding
-					$html = iconv($encoding, 'utf-8', $html);
-				} else {
-					$html = mb_convert_encoding($html, 'utf-8', $encoding);
-				}
-				*/
 			}
 		}
 	}
@ -196,7 +220,7 @@ function makeAbsolute($base, $elem) {
 }
 function makeAbsoluteAttr($base, $e, $attr) {
 	if ($e->hasAttribute($attr)) {
-		// Trim leading and trailing white space. I don't really like this but 
+		// Trim leading and trailing white space. I don't really like this but
 		// unfortunately it does appear on some sites. e.g.  <img src=" /path/to/image.jpg" />
 		$url = trim(str_replace('%20', ' ', $e->getAttribute($attr)));
 		$url = str_replace(' ', '%20', $url);
--- a/inc/3rdparty/site_config/index.php
+++ b/inc/3rdparty/site_config/index.php
@ -1,3 +1,2 @@
-<?php
-// this is here to prevent directory listing over the web
-?>
+<?php
+// this is here to prevent directory listing over the web
--- a/inc/3rdparty/site_config/standard/version.txt
+++ b/inc/3rdparty/site_config/standard/version.txt
@ -1 +1 @@
-4
+2013-05-12T22:53:07Z