mirror of
https://github.com/moparisthebest/wallabag
synced 2024-11-27 03:12:21 -05:00
update to 3.2 version of full-text-rss, issue #694
This commit is contained in:
parent
ab157bbb75
commit
3ec62cf95a
104
inc/3rdparty/config.php
vendored
104
inc/3rdparty/config.php
vendored
@ -19,7 +19,7 @@ if (!isset($options)) $options = new stdClass();
|
|||||||
// Enable service
|
// Enable service
|
||||||
// ----------------------
|
// ----------------------
|
||||||
// Set this to false if you want to disable the service.
|
// Set this to false if you want to disable the service.
|
||||||
// If set to false, no feed is produced and users will
|
// If set to false, no feed is produced and users will
|
||||||
// be told that the service is disabled.
|
// be told that the service is disabled.
|
||||||
$options->enabled = true;
|
$options->enabled = true;
|
||||||
|
|
||||||
@ -43,10 +43,64 @@ $options->default_entries = 5;
|
|||||||
// ----------------------
|
// ----------------------
|
||||||
// The maximum number of feed items to process when no access key is supplied.
|
// The maximum number of feed items to process when no access key is supplied.
|
||||||
// This limits the user-supplied &max=x value. For example, if the user
|
// This limits the user-supplied &max=x value. For example, if the user
|
||||||
// asks for 20 items to be processed (&max=20), if max_entries is set to
|
// asks for 20 items to be processed (&max=20), if max_entries is set to
|
||||||
// 10, only 10 will be processed.
|
// 10, only 10 will be processed.
|
||||||
$options->max_entries = 10;
|
$options->max_entries = 10;
|
||||||
|
|
||||||
|
// Full content
|
||||||
|
// ----------------------
|
||||||
|
// By default Full-Text RSS includes the extracted content in the output.
|
||||||
|
// You can exclude this from the output by passing '&content=0' in the querystring.
|
||||||
|
//
|
||||||
|
// Possible values...
|
||||||
|
// Always include: true
|
||||||
|
// Never include: false
|
||||||
|
// Include unless user overrides (&content=0): 'user' (default)
|
||||||
|
//
|
||||||
|
// Note: currently this does not disable full content extraction. It simply omits it
|
||||||
|
// from the output.
|
||||||
|
$options->content = 'user';
|
||||||
|
|
||||||
|
// Excerpts
|
||||||
|
// ----------------------
|
||||||
|
// By default Full-Text RSS does not include excerpts in the output.
|
||||||
|
// You can enable this by passing '&summary=1' in the querystring.
|
||||||
|
// This will include a plain text excerpt from the extracted content.
|
||||||
|
//
|
||||||
|
// Possible values...
|
||||||
|
// Always include: true (recommended for new users)
|
||||||
|
// Never include: false
|
||||||
|
// Don't include unless user overrides (&summary=1): 'user' (default)
|
||||||
|
//
|
||||||
|
// Important: if both content and excerpts are requested, the excerpt will be
|
||||||
|
// placed in the description element and the full content inside content:encoded.
|
||||||
|
// If excerpts are not requested, the full content will go inside the description element.
|
||||||
|
//
|
||||||
|
// Why are we not returning both excerpts and content by default?
|
||||||
|
// Mainly for backward compatibility.
|
||||||
|
// Excerpts should appear in the feed item's description element. Previous versions
|
||||||
|
// of Full-Text RSS did not return excerpts, so the description element was always
|
||||||
|
// used for the full content (as recommended by the RSS advisory). When returning both,
|
||||||
|
// we need somewhere else to place the content (content:encoded).
|
||||||
|
// Having both enabled should not create any problems for news readers, but it may create
|
||||||
|
// problems for developers upgrading from one of our earlier versions who may now find
|
||||||
|
// their applications are returning excerpts instead of the full content they were
|
||||||
|
// expecting. To avoid such surprises for users who are upgrading Full-Text RSS,
|
||||||
|
// excerpts must be explicitly requested in the querystring by default.
|
||||||
|
//
|
||||||
|
// Why not use a different element name for excerpts?
|
||||||
|
// According to the RSS advisory:
|
||||||
|
// "Publishers who employ summaries should store the summary in description and
|
||||||
|
// the full content in content:encoded, ordering description first within the item.
|
||||||
|
// On items with no summary, the full content should be stored in description."
|
||||||
|
// See: http://www.rssboard.org/rss-profile#namespace-elements-content-encoded
|
||||||
|
//
|
||||||
|
// For more consistent element naming, we recommend new users set this option to true.
|
||||||
|
// The full content can still be excluded via the querystring, but the element names
|
||||||
|
// will not change: when $options->summary = true, the description element will always
|
||||||
|
// be reserved for the excerpt and content:encoded always for full content.
|
||||||
|
$options->summary = 'user';
|
||||||
|
|
||||||
// Rewrite relative URLs
|
// Rewrite relative URLs
|
||||||
// ----------------------
|
// ----------------------
|
||||||
// With this enabled relative URLs found in the extracted content
|
// With this enabled relative URLs found in the extracted content
|
||||||
@ -67,7 +121,7 @@ $options->exclude_items_on_fail = 'user';
|
|||||||
// Enable multi-page support
|
// Enable multi-page support
|
||||||
// -------------------------
|
// -------------------------
|
||||||
// If enabled, we will try to follow next page links on multi-page articles.
|
// If enabled, we will try to follow next page links on multi-page articles.
|
||||||
// Currently this only happens for sites where next_page_link has been defined
|
// Currently this only happens for sites where next_page_link has been defined
|
||||||
// in a site config file.
|
// in a site config file.
|
||||||
$options->multipage = true;
|
$options->multipage = true;
|
||||||
|
|
||||||
@ -125,10 +179,10 @@ $options->detect_language = 1;
|
|||||||
|
|
||||||
// Registration key
|
// Registration key
|
||||||
// ---------------
|
// ---------------
|
||||||
// The registration key is optional. It is not required to use Full-Text RSS,
|
// The registration key is optional. It is not required to use Full-Text RSS,
|
||||||
// and does not affect the normal operation of Full-Text RSS. It is currently
|
// and does not affect the normal operation of Full-Text RSS. It is currently
|
||||||
// only used on admin pages which help you update site patterns with the
|
// only used on admin pages which help you update site patterns with the
|
||||||
// latest version offered by FiveFilters.org. For these admin-related
|
// latest version offered by FiveFilters.org. For these admin-related
|
||||||
// tasks to complete, we will require a valid registration key.
|
// tasks to complete, we will require a valid registration key.
|
||||||
// If you would like one, you can purchase the latest version of Full-Text RSS
|
// If you would like one, you can purchase the latest version of Full-Text RSS
|
||||||
// at http://fivefilters.org/content-only/
|
// at http://fivefilters.org/content-only/
|
||||||
@ -144,12 +198,12 @@ $options->registration_key = '';
|
|||||||
// ----------------------
|
// ----------------------
|
||||||
// Certain pages/actions, e.g. updating site patterns with our online tool, will require admin credentials.
|
// Certain pages/actions, e.g. updating site patterns with our online tool, will require admin credentials.
|
||||||
// To use these pages, enter a password here and you'll be prompted for it when you try to access those pages.
|
// To use these pages, enter a password here and you'll be prompted for it when you try to access those pages.
|
||||||
// If no password or username is set, pages requiring admin privelages will be inaccessible.
|
// If no password or username is set, pages requiring admin privelages will be inaccessible.
|
||||||
// The default username is 'admin'.
|
// The default username is 'admin'.
|
||||||
// If overriding with an environment variable, separate username and password with a colon, e.g.:
|
// If overriding with an environment variable, separate username and password with a colon, e.g.:
|
||||||
// ftr_admin_credentials: admin:my-secret-password
|
// ftr_admin_credentials: admin:my-secret-password
|
||||||
// Example: $options->admin_credentials = array('username'=>'admin', 'password'=>'my-secret-password');
|
// Example: $options->admin_credentials = array('username'=>'admin', 'password'=>'my-secret-password');
|
||||||
$options->admin_credentials = array('username'=>'admin', 'password'=>'admin');
|
$options->admin_credentials = array('username'=>'admin', 'password'=>'');
|
||||||
|
|
||||||
// URLs to allow
|
// URLs to allow
|
||||||
// ----------------------
|
// ----------------------
|
||||||
@ -178,12 +232,12 @@ $options->key_required = false;
|
|||||||
// ----------------------
|
// ----------------------
|
||||||
// By default, when processing feeds, we assume item titles in the feed
|
// By default, when processing feeds, we assume item titles in the feed
|
||||||
// have not been truncated. So after processing web pages, the extracted titles
|
// have not been truncated. So after processing web pages, the extracted titles
|
||||||
// are not used in the generated feed. If you prefer to have extracted titles in
|
// are not used in the generated feed. If you prefer to have extracted titles in
|
||||||
// the feed you can either set this to false, in which case we will always favour
|
// the feed you can either set this to false, in which case we will always favour
|
||||||
// extracted titles. Alternatively, if set to 'user' (default) we'll use the
|
// extracted titles. Alternatively, if set to 'user' (default) we'll use the
|
||||||
// extracted title if you pass '&use_extracted_title' in the querystring.
|
// extracted title if you pass '&use_extracted_title' in the querystring.
|
||||||
// Possible values:
|
// Possible values:
|
||||||
// * Favour feed titles: true
|
// * Favour feed titles: true
|
||||||
// * Favour extracted titles: false
|
// * Favour extracted titles: false
|
||||||
// * Favour feed titles with user override: 'user' (default)
|
// * Favour feed titles with user override: 'user' (default)
|
||||||
// Note: this has no effect when the input URL is to a web page - in these cases
|
// Note: this has no effect when the input URL is to a web page - in these cases
|
||||||
@ -192,17 +246,17 @@ $options->favour_feed_titles = 'user';
|
|||||||
|
|
||||||
// Access keys (password protected access)
|
// Access keys (password protected access)
|
||||||
// ------------------------------------
|
// ------------------------------------
|
||||||
// NOTE: You do not need an API key from fivefilters.org to run your own
|
// NOTE: You do not need an API key from fivefilters.org to run your own
|
||||||
// copy of the code. This is here if you'd like to restrict access to
|
// copy of the code. This is here if you'd like to restrict access to
|
||||||
// _your_ copy.
|
// _your_ copy.
|
||||||
// Keys let you group users - those with a key and those without - and
|
// Keys let you group users - those with a key and those without - and
|
||||||
// restrict access to the service to those without a key.
|
// restrict access to the service to those without a key.
|
||||||
// If you want everyone to access the service in the same way, you can
|
// If you want everyone to access the service in the same way, you can
|
||||||
// leave the array below empty and ignore the access key options further down.
|
// leave the array below empty and ignore the access key options further down.
|
||||||
// The options further down let you control how the service should behave
|
// The options further down let you control how the service should behave
|
||||||
// in each mode.
|
// in each mode.
|
||||||
// Note: Explicitly including the index number (1 and 2 in the examples below)
|
// Note: Explicitly including the index number (1 and 2 in the examples below)
|
||||||
// is highly recommended (when generating feeds, we encode the key and
|
// is highly recommended (when generating feeds, we encode the key and
|
||||||
// refer to it by index number and hash).
|
// refer to it by index number and hash).
|
||||||
$options->api_keys = array();
|
$options->api_keys = array();
|
||||||
// Example:
|
// Example:
|
||||||
@ -232,13 +286,13 @@ $options->max_entries_with_key = 10;
|
|||||||
// filter the resulting HTML for XSS attacks, making it redundant for
|
// filter the resulting HTML for XSS attacks, making it redundant for
|
||||||
// Full-Text RSS do the same. Similarly with frameworks/CMS which display
|
// Full-Text RSS do the same. Similarly with frameworks/CMS which display
|
||||||
// feed content - the content should be treated like any other user-submitted content.
|
// feed content - the content should be treated like any other user-submitted content.
|
||||||
//
|
//
|
||||||
// If you are writing an application yourself which is processing feeds generated by
|
// If you are writing an application yourself which is processing feeds generated by
|
||||||
// Full-Text RSS, you can either filter the HTML yourself to remove potential XSS attacks
|
// Full-Text RSS, you can either filter the HTML yourself to remove potential XSS attacks
|
||||||
// or enable this option. This might be useful if you are processing our generated
|
// or enable this option. This might be useful if you are processing our generated
|
||||||
// feeds with JavaScript on the client side - although there's client side xss
|
// feeds with JavaScript on the client side - although there's client side xss
|
||||||
// filtering available too, e.g. https://code.google.com/p/google-caja/wiki/JsHtmlSanitizer
|
// filtering available too, e.g. https://code.google.com/p/google-caja/wiki/JsHtmlSanitizer
|
||||||
//
|
//
|
||||||
// If enabled, we'll pass retrieved HTML content through htmLawed with
|
// If enabled, we'll pass retrieved HTML content through htmLawed with
|
||||||
// safe flag on and style attributes denied, see
|
// safe flag on and style attributes denied, see
|
||||||
// http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/htmLawed_README.htm#s3.6
|
// http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/htmLawed_README.htm#s3.6
|
||||||
@ -253,8 +307,8 @@ $options->xss_filter = 'user';
|
|||||||
// Allowed parsers
|
// Allowed parsers
|
||||||
// ----------------------
|
// ----------------------
|
||||||
// Full-Text RSS attempts to use PHP's libxml extension to process HTML.
|
// Full-Text RSS attempts to use PHP's libxml extension to process HTML.
|
||||||
// While fast, on some sites it may not always produce good results.
|
// While fast, on some sites it may not always produce good results.
|
||||||
// For these sites, you can specify an alternative HTML parser:
|
// For these sites, you can specify an alternative HTML parser:
|
||||||
// parser: html5lib
|
// parser: html5lib
|
||||||
// The html5lib parser is bundled with Full-Text RSS.
|
// The html5lib parser is bundled with Full-Text RSS.
|
||||||
// see http://code.google.com/p/html5lib/
|
// see http://code.google.com/p/html5lib/
|
||||||
@ -273,7 +327,7 @@ $options->cors = false;
|
|||||||
|
|
||||||
// Use APC user cache?
|
// Use APC user cache?
|
||||||
// ----------------------
|
// ----------------------
|
||||||
// If enabled we will store site config files (when requested
|
// If enabled we will store site config files (when requested
|
||||||
// for the first time) in APC's user cache. Keys prefixed with 'sc.'
|
// for the first time) in APC's user cache. Keys prefixed with 'sc.'
|
||||||
// This improves performance by reducing disk access.
|
// This improves performance by reducing disk access.
|
||||||
// Note: this has no effect if APC is unavailable on your server.
|
// Note: this has no effect if APC is unavailable on your server.
|
||||||
@ -346,7 +400,7 @@ $options->rewrite_url = array(
|
|||||||
// Valid actions:
|
// Valid actions:
|
||||||
// * 'exclude' - exclude this item from the result
|
// * 'exclude' - exclude this item from the result
|
||||||
// * 'link' - create HTML link to the item
|
// * 'link' - create HTML link to the item
|
||||||
$options->content_type_exc = array(
|
$options->content_type_exc = array(
|
||||||
'application/pdf' => array('action'=>'link', 'name'=>'PDF'),
|
'application/pdf' => array('action'=>'link', 'name'=>'PDF'),
|
||||||
'image' => array('action'=>'link', 'name'=>'Image'),
|
'image' => array('action'=>'link', 'name'=>'Image'),
|
||||||
'audio' => array('action'=>'link', 'name'=>'Audio'),
|
'audio' => array('action'=>'link', 'name'=>'Audio'),
|
||||||
@ -375,13 +429,13 @@ $options->cache_cleanup = 100;
|
|||||||
/// DO NOT CHANGE ANYTHING BELOW THIS ///////////
|
/// DO NOT CHANGE ANYTHING BELOW THIS ///////////
|
||||||
/////////////////////////////////////////////////
|
/////////////////////////////////////////////////
|
||||||
|
|
||||||
if (!defined('_FF_FTR_VERSION')) define('_FF_FTR_VERSION', '3.1');
|
if (!defined('_FF_FTR_VERSION')) define('_FF_FTR_VERSION', '3.2');
|
||||||
|
|
||||||
if (basename(__FILE__) == 'config.php') {
|
if (basename(__FILE__) == 'config.php') {
|
||||||
if (file_exists(dirname(__FILE__).'/custom_config.php')) {
|
if (file_exists(dirname(__FILE__).'/custom_config.php')) {
|
||||||
require_once dirname(__FILE__).'/custom_config.php';
|
require_once dirname(__FILE__).'/custom_config.php';
|
||||||
}
|
}
|
||||||
|
|
||||||
// check for environment variables - often used on cloud platforms
|
// check for environment variables - often used on cloud platforms
|
||||||
// environment variables should be prefixed with 'ftr_', e.g.
|
// environment variables should be prefixed with 'ftr_', e.g.
|
||||||
// ftr_max_entries: 1
|
// ftr_max_entries: 1
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,338 +1,343 @@
|
|||||||
<?php
|
<?php
|
||||||
/**
|
/**
|
||||||
* Site Config
|
* Site Config
|
||||||
*
|
*
|
||||||
* Each instance of this class should hold extraction patterns and other directives
|
* Each instance of this class should hold extraction patterns and other directives
|
||||||
* for a website. See ContentExtractor class to see how it's used.
|
* for a website. See ContentExtractor class to see how it's used.
|
||||||
*
|
*
|
||||||
* @version 0.7
|
* @version 0.8
|
||||||
* @date 2012-08-27
|
* @date 2013-04-16
|
||||||
* @author Keyvan Minoukadeh
|
* @author Keyvan Minoukadeh
|
||||||
* @copyright 2012 Keyvan Minoukadeh
|
* @copyright 2013 Keyvan Minoukadeh
|
||||||
* @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
|
* @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class SiteConfig
|
class SiteConfig
|
||||||
{
|
{
|
||||||
// Use first matching element as title (0 or more xpath expressions)
|
// Use first matching element as title (0 or more xpath expressions)
|
||||||
public $title = array();
|
public $title = array();
|
||||||
|
|
||||||
// Use first matching element as body (0 or more xpath expressions)
|
// Use first matching element as body (0 or more xpath expressions)
|
||||||
public $body = array();
|
public $body = array();
|
||||||
|
|
||||||
// Use first matching element as author (0 or more xpath expressions)
|
// Use first matching element as author (0 or more xpath expressions)
|
||||||
public $author = array();
|
public $author = array();
|
||||||
|
|
||||||
// Use first matching element as date (0 or more xpath expressions)
|
// Use first matching element as date (0 or more xpath expressions)
|
||||||
public $date = array();
|
public $date = array();
|
||||||
|
|
||||||
// Strip elements matching these xpath expressions (0 or more)
|
// Strip elements matching these xpath expressions (0 or more)
|
||||||
public $strip = array();
|
public $strip = array();
|
||||||
|
|
||||||
// Strip elements which contain these strings (0 or more) in the id or class attribute
|
// Strip elements which contain these strings (0 or more) in the id or class attribute
|
||||||
public $strip_id_or_class = array();
|
public $strip_id_or_class = array();
|
||||||
|
|
||||||
// Strip images which contain these strings (0 or more) in the src attribute
|
// Strip images which contain these strings (0 or more) in the src attribute
|
||||||
public $strip_image_src = array();
|
public $strip_image_src = array();
|
||||||
|
|
||||||
// Additional HTTP headers to send
|
// Additional HTTP headers to send
|
||||||
// NOT YET USED
|
// NOT YET USED
|
||||||
public $http_header = array();
|
public $http_header = array();
|
||||||
|
|
||||||
// Process HTML with tidy before creating DOM (bool or null if undeclared)
|
// Process HTML with tidy before creating DOM (bool or null if undeclared)
|
||||||
public $tidy = null;
|
public $tidy = null;
|
||||||
|
|
||||||
protected $default_tidy = true; // used if undeclared
|
protected $default_tidy = true; // used if undeclared
|
||||||
|
|
||||||
// Autodetect title/body if xpath expressions fail to produce results.
|
// Autodetect title/body if xpath expressions fail to produce results.
|
||||||
// Note that this applies to title and body separately, ie.
|
// Note that this applies to title and body separately, ie.
|
||||||
// * if we get a body match but no title match, this option will determine whether we autodetect title
|
// * if we get a body match but no title match, this option will determine whether we autodetect title
|
||||||
// * if neither match, this determines whether we autodetect title and body.
|
// * if neither match, this determines whether we autodetect title and body.
|
||||||
// Also note that this only applies when there is at least one xpath expression in title or body, ie.
|
// Also note that this only applies when there is at least one xpath expression in title or body, ie.
|
||||||
// * if title and body are both empty (no xpath expressions), this option has no effect (both title and body will be auto-detected)
|
// * if title and body are both empty (no xpath expressions), this option has no effect (both title and body will be auto-detected)
|
||||||
// * if there's an xpath expression for title and none for body, body will be auto-detected and this option will determine whether we auto-detect title if the xpath expression for it fails to produce results.
|
// * if there's an xpath expression for title and none for body, body will be auto-detected and this option will determine whether we auto-detect title if the xpath expression for it fails to produce results.
|
||||||
// Usage scenario: you want to extract something specific from a set of URLs, e.g. a table, and if the table is not found, you want to ignore the entry completely. Auto-detection is unlikely to succeed here, so you construct your patterns and set this option to false. Another scenario may be a site where auto-detection has proven to fail (or worse, picked up the wrong content).
|
// Usage scenario: you want to extract something specific from a set of URLs, e.g. a table, and if the table is not found, you want to ignore the entry completely. Auto-detection is unlikely to succeed here, so you construct your patterns and set this option to false. Another scenario may be a site where auto-detection has proven to fail (or worse, picked up the wrong content).
|
||||||
// bool or null if undeclared
|
// bool or null if undeclared
|
||||||
public $autodetect_on_failure = null;
|
public $autodetect_on_failure = null;
|
||||||
protected $default_autodetect_on_failure = true; // used if undeclared
|
protected $default_autodetect_on_failure = true; // used if undeclared
|
||||||
|
|
||||||
// Clean up content block - attempt to remove elements that appear to be superfluous
|
// Clean up content block - attempt to remove elements that appear to be superfluous
|
||||||
// bool or null if undeclared
|
// bool or null if undeclared
|
||||||
public $prune = null;
|
public $prune = null;
|
||||||
protected $default_prune = true; // used if undeclared
|
protected $default_prune = true; // used if undeclared
|
||||||
|
|
||||||
// Test URL - if present, can be used to test the config above
|
// Test URL - if present, can be used to test the config above
|
||||||
public $test_url = array();
|
public $test_url = array();
|
||||||
|
|
||||||
// Single-page link - should identify a link element or URL pointing to the page holding the entire article
|
// Single-page link - should identify a link element or URL pointing to the page holding the entire article
|
||||||
// This is useful for sites which split their articles across multiple pages. Links to such pages tend to
|
// This is useful for sites which split their articles across multiple pages. Links to such pages tend to
|
||||||
// display the first page with links to the other pages at the bottom. Often there is also a link to a page
|
// display the first page with links to the other pages at the bottom. Often there is also a link to a page
|
||||||
// which displays the entire article on one page (e.g. 'print view').
|
// which displays the entire article on one page (e.g. 'print view').
|
||||||
// This should be an XPath expression identifying the link to that page. If present and we find a match,
|
// This should be an XPath expression identifying the link to that page. If present and we find a match,
|
||||||
// we will retrieve that page and the rest of the options in this config will be applied to the new page.
|
// we will retrieve that page and the rest of the options in this config will be applied to the new page.
|
||||||
public $single_page_link = array();
|
public $single_page_link = array();
|
||||||
|
|
||||||
public $next_page_link = array();
|
public $next_page_link = array();
|
||||||
|
|
||||||
// Single-page link in feed? - same as above, but patterns applied to item description HTML taken from feed
|
// Single-page link in feed? - same as above, but patterns applied to item description HTML taken from feed
|
||||||
public $single_page_link_in_feed = array();
|
public $single_page_link_in_feed = array();
|
||||||
|
|
||||||
// Which parser to use for turning raw HTML into a DOMDocument (either 'libxml' or 'html5lib')
|
// Which parser to use for turning raw HTML into a DOMDocument (either 'libxml' or 'html5lib')
|
||||||
// string or null if undeclared
|
// string or null if undeclared
|
||||||
public $parser = null;
|
public $parser = null;
|
||||||
protected $default_parser = 'libxml'; // used if undeclared
|
protected $default_parser = 'libxml'; // used if undeclared
|
||||||
|
|
||||||
// Strings to search for in HTML before processing begins (used with $replace_string)
|
// Strings to search for in HTML before processing begins (used with $replace_string)
|
||||||
public $find_string = array();
|
public $find_string = array();
|
||||||
// Strings to replace those found in $find_string before HTML processing begins
|
// Strings to replace those found in $find_string before HTML processing begins
|
||||||
public $replace_string = array();
|
public $replace_string = array();
|
||||||
|
|
||||||
// the options below cannot be set in the config files which this class represents
|
// the options below cannot be set in the config files which this class represents
|
||||||
|
|
||||||
//public $cache_in_apc = false; // used to decide if we should cache in apc or not
|
//public $cache_in_apc = false; // used to decide if we should cache in apc or not
|
||||||
public $cache_key = null;
|
public $cache_key = null;
|
||||||
public static $debug = false;
|
public static $debug = false;
|
||||||
protected static $apc = false;
|
protected static $apc = false;
|
||||||
protected static $config_path;
|
protected static $config_path;
|
||||||
protected static $config_path_fallback;
|
protected static $config_path_fallback;
|
||||||
protected static $config_cache = array();
|
protected static $config_cache = array();
|
||||||
const HOSTNAME_REGEX = '/^(([a-zA-Z0-9-]*[a-zA-Z0-9])\.)*([A-Za-z0-9-]*[A-Za-z0-9])$/';
|
const HOSTNAME_REGEX = '/^(([a-zA-Z0-9-]*[a-zA-Z0-9])\.)*([A-Za-z0-9-]*[A-Za-z0-9])$/';
|
||||||
|
|
||||||
protected static function debug($msg) {
|
protected static function debug($msg) {
|
||||||
if (self::$debug) {
|
if (self::$debug) {
|
||||||
//$mem = round(memory_get_usage()/1024, 2);
|
//$mem = round(memory_get_usage()/1024, 2);
|
||||||
//$memPeak = round(memory_get_peak_usage()/1024, 2);
|
//$memPeak = round(memory_get_peak_usage()/1024, 2);
|
||||||
echo '* ',$msg;
|
echo '* ',$msg;
|
||||||
//echo ' - mem used: ',$mem," (peak: $memPeak)\n";
|
//echo ' - mem used: ',$mem," (peak: $memPeak)\n";
|
||||||
echo "\n";
|
echo "\n";
|
||||||
ob_flush();
|
ob_flush();
|
||||||
flush();
|
flush();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// enable APC caching of certain site config files?
|
// enable APC caching of certain site config files?
|
||||||
// If enabled the following site config files will be
|
// If enabled the following site config files will be
|
||||||
// cached in APC cache (when requested for first time):
|
// cached in APC cache (when requested for first time):
|
||||||
// * anything in site_config/custom/ and its corresponding file in site_config/standard/
|
// * anything in site_config/custom/ and its corresponding file in site_config/standard/
|
||||||
// * the site config files associated with HTML fingerprints
|
// * the site config files associated with HTML fingerprints
|
||||||
// * the global site config file
|
// * the global site config file
|
||||||
// returns true if enabled, false otherwise
|
// returns true if enabled, false otherwise
|
||||||
public static function use_apc($apc=true) {
|
public static function use_apc($apc=true) {
|
||||||
if (!function_exists('apc_add')) {
|
if (!function_exists('apc_add')) {
|
||||||
if ($apc) self::debug('APC will not be used (function apc_add does not exist)');
|
if ($apc) self::debug('APC will not be used (function apc_add does not exist)');
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
self::$apc = $apc;
|
self::$apc = $apc;
|
||||||
return $apc;
|
return $apc;
|
||||||
}
|
}
|
||||||
|
|
||||||
// return bool or null
|
// return bool or null
|
||||||
public function tidy($use_default=true) {
|
public function tidy($use_default=true) {
|
||||||
if ($use_default) return (isset($this->tidy)) ? $this->tidy : $this->default_tidy;
|
if ($use_default) return (isset($this->tidy)) ? $this->tidy : $this->default_tidy;
|
||||||
return $this->tidy;
|
return $this->tidy;
|
||||||
}
|
}
|
||||||
|
|
||||||
// return bool or null
|
// return bool or null
|
||||||
public function prune($use_default=true) {
|
public function prune($use_default=true) {
|
||||||
if ($use_default) return (isset($this->prune)) ? $this->prune : $this->default_prune;
|
if ($use_default) return (isset($this->prune)) ? $this->prune : $this->default_prune;
|
||||||
return $this->prune;
|
return $this->prune;
|
||||||
}
|
}
|
||||||
|
|
||||||
// return string or null
|
// return string or null
|
||||||
public function parser($use_default=true) {
|
public function parser($use_default=true) {
|
||||||
if ($use_default) return (isset($this->parser)) ? $this->parser : $this->default_parser;
|
if ($use_default) return (isset($this->parser)) ? $this->parser : $this->default_parser;
|
||||||
return $this->parser;
|
return $this->parser;
|
||||||
}
|
}
|
||||||
|
|
||||||
// return bool or null
|
// return bool or null
|
||||||
public function autodetect_on_failure($use_default=true) {
|
public function autodetect_on_failure($use_default=true) {
|
||||||
if ($use_default) return (isset($this->autodetect_on_failure)) ? $this->autodetect_on_failure : $this->default_autodetect_on_failure;
|
if ($use_default) return (isset($this->autodetect_on_failure)) ? $this->autodetect_on_failure : $this->default_autodetect_on_failure;
|
||||||
return $this->autodetect_on_failure;
|
return $this->autodetect_on_failure;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static function set_config_path($path, $fallback=null) {
|
public static function set_config_path($path, $fallback=null) {
|
||||||
self::$config_path = $path;
|
self::$config_path = $path;
|
||||||
self::$config_path_fallback = $fallback;
|
self::$config_path_fallback = $fallback;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static function add_to_cache($key, SiteConfig $config, $use_apc=true) {
|
public static function add_to_cache($key, SiteConfig $config, $use_apc=true) {
|
||||||
$key = strtolower($key);
|
$key = strtolower($key);
|
||||||
if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
|
if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
|
||||||
if ($config->cache_key) $key = $config->cache_key;
|
if ($config->cache_key) $key = $config->cache_key;
|
||||||
self::$config_cache[$key] = $config;
|
self::$config_cache[$key] = $config;
|
||||||
if (self::$apc && $use_apc) {
|
if (self::$apc && $use_apc) {
|
||||||
self::debug("Adding site config to APC cache with key sc.$key");
|
self::debug("Adding site config to APC cache with key sc.$key");
|
||||||
apc_add("sc.$key", $config);
|
apc_add("sc.$key", $config);
|
||||||
}
|
}
|
||||||
self::debug("Cached site config with key $key");
|
self::debug("Cached site config with key $key");
|
||||||
}
|
}
|
||||||
|
|
||||||
public static function is_cached($key) {
|
public static function is_cached($key) {
|
||||||
$key = strtolower($key);
|
$key = strtolower($key);
|
||||||
if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
|
if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
|
||||||
if (array_key_exists($key, self::$config_cache)) {
|
if (array_key_exists($key, self::$config_cache)) {
|
||||||
return true;
|
return true;
|
||||||
} elseif (self::$apc && (bool)apc_fetch("sc.$key")) {
|
} elseif (self::$apc && (bool)apc_fetch("sc.$key")) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function append(SiteConfig $newconfig) {
|
public function append(SiteConfig $newconfig) {
|
||||||
// check for commands where we accept multiple statements (no test_url)
|
// check for commands where we accept multiple statements (no test_url)
|
||||||
foreach (array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header', 'find_string', 'replace_string') as $var) {
|
foreach (array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header') as $var) {
|
||||||
// append array elements for this config variable from $newconfig to this config
|
// append array elements for this config variable from $newconfig to this config
|
||||||
//$this->$var = $this->$var + $newconfig->$var;
|
//$this->$var = $this->$var + $newconfig->$var;
|
||||||
$this->$var = array_unique(array_merge($this->$var, $newconfig->$var));
|
$this->$var = array_unique(array_merge($this->$var, $newconfig->$var));
|
||||||
}
|
}
|
||||||
// check for single statement commands
|
// check for single statement commands
|
||||||
// we do not overwrite existing non null values
|
// we do not overwrite existing non null values
|
||||||
foreach (array('tidy', 'prune', 'parser', 'autodetect_on_failure') as $var) {
|
foreach (array('tidy', 'prune', 'parser', 'autodetect_on_failure') as $var) {
|
||||||
if ($this->$var === null) $this->$var = $newconfig->$var;
|
if ($this->$var === null) $this->$var = $newconfig->$var;
|
||||||
}
|
}
|
||||||
}
|
// treat find_string and replace_string separately (don't apply array_unique) (thanks fabrizio!)
|
||||||
|
foreach (array('find_string', 'replace_string') as $var) {
|
||||||
// returns SiteConfig instance if an appropriate one is found, false otherwise
|
// append array elements for this config variable from $newconfig to this config
|
||||||
// if $exact_host_match is true, we will not look for wildcard config matches
|
//$this->$var = $this->$var + $newconfig->$var;
|
||||||
// by default if host is 'test.example.org' we will look for and load '.example.org.txt' if it exists
|
$this->$var = array_merge($this->$var, $newconfig->$var);
|
||||||
public static function build($host, $exact_host_match=false) {
|
}
|
||||||
$host = strtolower($host);
|
}
|
||||||
if (substr($host, 0, 4) == 'www.') $host = substr($host, 4);
|
|
||||||
if (!$host || (strlen($host) > 200) || !preg_match(self::HOSTNAME_REGEX, ltrim($host, '.'))) return false;
|
// returns SiteConfig instance if an appropriate one is found, false otherwise
|
||||||
// check for site configuration
|
// if $exact_host_match is true, we will not look for wildcard config matches
|
||||||
$try = array($host);
|
// by default if host is 'test.example.org' we will look for and load '.example.org.txt' if it exists
|
||||||
// should we look for wildcard matches
|
public static function build($host, $exact_host_match=false) {
|
||||||
if (!$exact_host_match) {
|
$host = strtolower($host);
|
||||||
$split = explode('.', $host);
|
if (substr($host, 0, 4) == 'www.') $host = substr($host, 4);
|
||||||
if (count($split) > 1) {
|
if (!$host || (strlen($host) > 200) || !preg_match(self::HOSTNAME_REGEX, ltrim($host, '.'))) return false;
|
||||||
array_shift($split);
|
// check for site configuration
|
||||||
$try[] = '.'.implode('.', $split);
|
$try = array($host);
|
||||||
}
|
// should we look for wildcard matches
|
||||||
}
|
if (!$exact_host_match) {
|
||||||
|
$split = explode('.', $host);
|
||||||
// look for site config file in primary folder
|
if (count($split) > 1) {
|
||||||
self::debug(". looking for site config for $host in primary folder");
|
array_shift($split);
|
||||||
foreach ($try as $h) {
|
$try[] = '.'.implode('.', $split);
|
||||||
if (array_key_exists($h, self::$config_cache)) {
|
}
|
||||||
self::debug("... site config for $h already loaded in this request");
|
}
|
||||||
return self::$config_cache[$h];
|
|
||||||
} elseif (self::$apc && ($sconfig = apc_fetch("sc.$h"))) {
|
// look for site config file in primary folder
|
||||||
self::debug("... site config for $h in APC cache");
|
self::debug(". looking for site config for $host in primary folder");
|
||||||
return $sconfig;
|
foreach ($try as $h) {
|
||||||
} elseif (file_exists(self::$config_path."/$h.txt")) {
|
if (array_key_exists($h, self::$config_cache)) {
|
||||||
self::debug("... found site config ($h.txt)");
|
self::debug("... site config for $h already loaded in this request");
|
||||||
$file_primary = self::$config_path."/$h.txt";
|
return self::$config_cache[$h];
|
||||||
$matched_name = $h;
|
} elseif (self::$apc && ($sconfig = apc_fetch("sc.$h"))) {
|
||||||
break;
|
self::debug("... site config for $h in APC cache");
|
||||||
}
|
return $sconfig;
|
||||||
}
|
} elseif (file_exists(self::$config_path."/$h.txt")) {
|
||||||
|
self::debug("... found site config ($h.txt)");
|
||||||
// if we found site config, process it
|
$file_primary = self::$config_path."/$h.txt";
|
||||||
if (isset($file_primary)) {
|
$matched_name = $h;
|
||||||
$config_lines = file($file_primary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
|
break;
|
||||||
if (!$config_lines || !is_array($config_lines)) return false;
|
}
|
||||||
$config = self::build_from_array($config_lines);
|
}
|
||||||
// if APC caching is available and enabled, mark this for cache
|
|
||||||
//$config->cache_in_apc = true;
|
// if we found site config, process it
|
||||||
$config->cache_key = $matched_name;
|
if (isset($file_primary)) {
|
||||||
|
$config_lines = file($file_primary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
|
||||||
// if autodetec on failure is off (on by default) we do not need to look
|
if (!$config_lines || !is_array($config_lines)) return false;
|
||||||
// in secondary folder
|
$config = self::build_from_array($config_lines);
|
||||||
if (!$config->autodetect_on_failure()) {
|
// if APC caching is available and enabled, mark this for cache
|
||||||
self::debug('... autodetect on failure is disabled (no other site config files will be loaded)');
|
//$config->cache_in_apc = true;
|
||||||
return $config;
|
$config->cache_key = $matched_name;
|
||||||
}
|
|
||||||
}
|
// if autodetec on failure is off (on by default) we do not need to look
|
||||||
|
// in secondary folder
|
||||||
// look for site config file in secondary folder
|
if (!$config->autodetect_on_failure()) {
|
||||||
if (isset(self::$config_path_fallback)) {
|
self::debug('... autodetect on failure is disabled (no other site config files will be loaded)');
|
||||||
self::debug(". looking for site config for $host in secondary folder");
|
return $config;
|
||||||
foreach ($try as $h) {
|
}
|
||||||
if (file_exists(self::$config_path_fallback."/$h.txt")) {
|
}
|
||||||
self::debug("... found site config in secondary folder ($h.txt)");
|
|
||||||
$file_secondary = self::$config_path_fallback."/$h.txt";
|
// look for site config file in secondary folder
|
||||||
$matched_name = $h;
|
if (isset(self::$config_path_fallback)) {
|
||||||
break;
|
self::debug(". looking for site config for $host in secondary folder");
|
||||||
}
|
foreach ($try as $h) {
|
||||||
}
|
if (file_exists(self::$config_path_fallback."/$h.txt")) {
|
||||||
if (!isset($file_secondary)) {
|
self::debug("... found site config in secondary folder ($h.txt)");
|
||||||
self::debug("... no site config match in secondary folder");
|
$file_secondary = self::$config_path_fallback."/$h.txt";
|
||||||
}
|
$matched_name = $h;
|
||||||
}
|
break;
|
||||||
|
}
|
||||||
// return false if no config file found
|
}
|
||||||
if (!isset($file_primary) && !isset($file_secondary)) {
|
if (!isset($file_secondary)) {
|
||||||
self::debug("... no site config match for $host");
|
self::debug("... no site config match in secondary folder");
|
||||||
return false;
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// return primary config if secondary not found
|
// return false if no config file found
|
||||||
if (!isset($file_secondary) && isset($config)) {
|
if (!isset($file_primary) && !isset($file_secondary)) {
|
||||||
return $config;
|
self::debug("... no site config match for $host");
|
||||||
}
|
return false;
|
||||||
|
}
|
||||||
// process secondary config file
|
|
||||||
$config_lines = file($file_secondary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
|
// return primary config if secondary not found
|
||||||
if (!$config_lines || !is_array($config_lines)) {
|
if (!isset($file_secondary) && isset($config)) {
|
||||||
// failed to process secondary
|
return $config;
|
||||||
if (isset($config)) {
|
}
|
||||||
// return primary config
|
|
||||||
return $config;
|
// process secondary config file
|
||||||
} else {
|
$config_lines = file($file_secondary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
|
||||||
return false;
|
if (!$config_lines || !is_array($config_lines)) {
|
||||||
}
|
// failed to process secondary
|
||||||
}
|
if (isset($config)) {
|
||||||
|
// return primary config
|
||||||
// merge with primary and return
|
return $config;
|
||||||
if (isset($config)) {
|
} else {
|
||||||
self::debug('. merging config files');
|
return false;
|
||||||
$config->append(self::build_from_array($config_lines));
|
}
|
||||||
return $config;
|
}
|
||||||
} else {
|
|
||||||
// return just secondary
|
// merge with primary and return
|
||||||
$config = self::build_from_array($config_lines);
|
if (isset($config)) {
|
||||||
// if APC caching is available and enabled, mark this for cache
|
self::debug('. merging config files');
|
||||||
//$config->cache_in_apc = true;
|
$config->append(self::build_from_array($config_lines));
|
||||||
$config->cache_key = $matched_name;
|
return $config;
|
||||||
return $config;
|
} else {
|
||||||
}
|
// return just secondary
|
||||||
}
|
$config = self::build_from_array($config_lines);
|
||||||
|
// if APC caching is available and enabled, mark this for cache
|
||||||
public static function build_from_array(array $lines) {
|
//$config->cache_in_apc = true;
|
||||||
$config = new SiteConfig();
|
$config->cache_key = $matched_name;
|
||||||
foreach ($lines as $line) {
|
return $config;
|
||||||
$line = trim($line);
|
}
|
||||||
|
}
|
||||||
// skip comments, empty lines
|
|
||||||
if ($line == '' || $line[0] == '#') continue;
|
public static function build_from_array(array $lines) {
|
||||||
|
$config = new SiteConfig();
|
||||||
// get command
|
foreach ($lines as $line) {
|
||||||
$command = explode(':', $line, 2);
|
$line = trim($line);
|
||||||
// if there's no colon ':', skip this line
|
|
||||||
if (count($command) != 2) continue;
|
// skip comments, empty lines
|
||||||
$val = trim($command[1]);
|
if ($line == '' || $line[0] == '#') continue;
|
||||||
$command = trim($command[0]);
|
|
||||||
if ($command == '' || $val == '') continue;
|
// get command
|
||||||
|
$command = explode(':', $line, 2);
|
||||||
// check for commands where we accept multiple statements
|
// if there's no colon ':', skip this line
|
||||||
if (in_array($command, array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header', 'test_url', 'find_string', 'replace_string'))) {
|
if (count($command) != 2) continue;
|
||||||
array_push($config->$command, $val);
|
$val = trim($command[1]);
|
||||||
// check for single statement commands that evaluate to true or false
|
$command = trim($command[0]);
|
||||||
} elseif (in_array($command, array('tidy', 'prune', 'autodetect_on_failure'))) {
|
if ($command == '' || $val == '') continue;
|
||||||
$config->$command = ($val == 'yes');
|
|
||||||
// check for single statement commands stored as strings
|
// check for commands where we accept multiple statements
|
||||||
} elseif (in_array($command, array('parser'))) {
|
if (in_array($command, array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header', 'test_url', 'find_string', 'replace_string'))) {
|
||||||
$config->$command = $val;
|
array_push($config->$command, $val);
|
||||||
// check for replace_string(find): replace
|
// check for single statement commands that evaluate to true or false
|
||||||
} elseif ((substr($command, -1) == ')') && preg_match('!^([a-z0-9_]+)\((.*?)\)$!i', $command, $match)) {
|
} elseif (in_array($command, array('tidy', 'prune', 'autodetect_on_failure'))) {
|
||||||
if (in_array($match[1], array('replace_string'))) {
|
$config->$command = ($val == 'yes');
|
||||||
$command = $match[1];
|
// check for single statement commands stored as strings
|
||||||
array_push($config->find_string, $match[2]);
|
} elseif (in_array($command, array('parser'))) {
|
||||||
array_push($config->$command, $val);
|
$config->$command = $val;
|
||||||
}
|
// check for replace_string(find): replace
|
||||||
}
|
} elseif ((substr($command, -1) == ')') && preg_match('!^([a-z0-9_]+)\((.*?)\)$!i', $command, $match)) {
|
||||||
}
|
if (in_array($match[1], array('replace_string'))) {
|
||||||
return $config;
|
$command = $match[1];
|
||||||
}
|
array_push($config->find_string, $match[2]);
|
||||||
}
|
array_push($config->$command, $val);
|
||||||
?>
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $config;
|
||||||
|
}
|
||||||
|
}
|
100
inc/3rdparty/libraries/feedwriter/FeedItem.php
vendored
Normal file → Executable file
100
inc/3rdparty/libraries/feedwriter/FeedItem.php
vendored
Normal file → Executable file
@ -1,7 +1,7 @@
|
|||||||
<?php
|
<?php
|
||||||
/**
|
/**
|
||||||
* Univarsel Feed Writer
|
* Univarsel Feed Writer
|
||||||
*
|
*
|
||||||
* FeedItem class - Used as feed element in FeedWriter class
|
* FeedItem class - Used as feed element in FeedWriter class
|
||||||
*
|
*
|
||||||
* @package UnivarselFeedWriter
|
* @package UnivarselFeedWriter
|
||||||
@ -12,20 +12,20 @@
|
|||||||
{
|
{
|
||||||
private $elements = array(); //Collection of feed elements
|
private $elements = array(); //Collection of feed elements
|
||||||
private $version;
|
private $version;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor
|
* Constructor
|
||||||
*
|
*
|
||||||
* @param contant (RSS1/RSS2/ATOM) RSS2 is default.
|
* @param contant (RSS1/RSS2/ATOM) RSS2 is default.
|
||||||
*/
|
*/
|
||||||
function __construct($version = RSS2)
|
function __construct($version = RSS2)
|
||||||
{
|
{
|
||||||
$this->version = $version;
|
$this->version = $version;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set element (overwrites existing elements with $elementName)
|
* Set element (overwrites existing elements with $elementName)
|
||||||
*
|
*
|
||||||
* @access public
|
* @access public
|
||||||
* @param srting The tag name of an element
|
* @param srting The tag name of an element
|
||||||
* @param srting The content of tag
|
* @param srting The content of tag
|
||||||
@ -38,11 +38,11 @@
|
|||||||
unset($this->elements[$elementName]);
|
unset($this->elements[$elementName]);
|
||||||
}
|
}
|
||||||
$this->addElement($elementName, $content, $attributes);
|
$this->addElement($elementName, $content, $attributes);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add an element to elements array
|
* Add an element to elements array
|
||||||
*
|
*
|
||||||
* @access public
|
* @access public
|
||||||
* @param srting The tag name of an element
|
* @param srting The tag name of an element
|
||||||
* @param srting The content of tag
|
* @param srting The content of tag
|
||||||
@ -61,11 +61,11 @@
|
|||||||
$this->elements[$elementName][$i]['content'] = $content;
|
$this->elements[$elementName][$i]['content'] = $content;
|
||||||
$this->elements[$elementName][$i]['attributes'] = $attributes;
|
$this->elements[$elementName][$i]['attributes'] = $attributes;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set multiple feed elements from an array.
|
* Set multiple feed elements from an array.
|
||||||
* Elements which have attributes cannot be added by this method
|
* Elements which have attributes cannot be added by this method
|
||||||
*
|
*
|
||||||
* @access public
|
* @access public
|
||||||
* @param array array of elements in 'tagName' => 'tagContent' format.
|
* @param array array of elements in 'tagName' => 'tagContent' format.
|
||||||
* @return void
|
* @return void
|
||||||
@ -73,15 +73,15 @@
|
|||||||
public function addElementArray($elementArray)
|
public function addElementArray($elementArray)
|
||||||
{
|
{
|
||||||
if(! is_array($elementArray)) return;
|
if(! is_array($elementArray)) return;
|
||||||
foreach ($elementArray as $elementName => $content)
|
foreach ($elementArray as $elementName => $content)
|
||||||
{
|
{
|
||||||
$this->addElement($elementName, $content);
|
$this->addElement($elementName, $content);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the collection of elements in this feed item
|
* Return the collection of elements in this feed item
|
||||||
*
|
*
|
||||||
* @access public
|
* @access public
|
||||||
* @return array
|
* @return array
|
||||||
*/
|
*/
|
||||||
@ -89,68 +89,74 @@
|
|||||||
{
|
{
|
||||||
return $this->elements;
|
return $this->elements;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wrapper functions ------------------------------------------------------
|
// Wrapper functions ------------------------------------------------------
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the 'dscription' element of feed item
|
* Set the 'dscription' element of feed item
|
||||||
*
|
*
|
||||||
* @access public
|
* @access public
|
||||||
* @param string The content of 'description' element
|
* @param string The content of 'description' element
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
public function setDescription($description)
|
public function setDescription($description)
|
||||||
{
|
{
|
||||||
$this->setElement('description', $description);
|
$tag = ($this->version == ATOM)? 'summary' : 'description';
|
||||||
|
$this->setElement($tag, $description);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @desc Set the 'title' element of feed item
|
* @desc Set the 'title' element of feed item
|
||||||
* @access public
|
* @access public
|
||||||
* @param string The content of 'title' element
|
* @param string The content of 'title' element
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
public function setTitle($title)
|
public function setTitle($title)
|
||||||
{
|
{
|
||||||
$this->setElement('title', $title);
|
$this->setElement('title', $title);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the 'date' element of feed item
|
* Set the 'date' element of feed item
|
||||||
*
|
*
|
||||||
* @access public
|
* @access public
|
||||||
* @param string The content of 'date' element
|
* @param string The content of 'date' element
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
public function setDate($date)
|
public function setDate($date)
|
||||||
{
|
{
|
||||||
if(! is_numeric($date))
|
if(! is_numeric($date))
|
||||||
{
|
{
|
||||||
$date = strtotime($date);
|
$date = strtotime($date);
|
||||||
}
|
}
|
||||||
|
|
||||||
if($this->version == RSS2)
|
if($this->version == ATOM)
|
||||||
{
|
{
|
||||||
$tag = 'pubDate';
|
$tag = 'updated';
|
||||||
$value = date(DATE_RSS, $date);
|
$value = date(DATE_ATOM, $date);
|
||||||
}
|
}
|
||||||
else
|
elseif($this->version == RSS2)
|
||||||
{
|
{
|
||||||
$tag = 'dc:date';
|
$tag = 'pubDate';
|
||||||
$value = date("Y-m-d", $date);
|
$value = date(DATE_RSS, $date);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
$this->setElement($tag, $value);
|
{
|
||||||
|
$tag = 'dc:date';
|
||||||
|
$value = date("Y-m-d", $date);
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->setElement($tag, $value);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the 'link' element of feed item
|
* Set the 'link' element of feed item
|
||||||
*
|
*
|
||||||
* @access public
|
* @access public
|
||||||
* @param string The content of 'link' element
|
* @param string The content of 'link' element
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
public function setLink($link)
|
public function setLink($link)
|
||||||
{
|
{
|
||||||
if($this->version == RSS2 || $this->version == RSS1)
|
if($this->version == RSS2 || $this->version == RSS1)
|
||||||
{
|
{
|
||||||
@ -161,27 +167,27 @@
|
|||||||
{
|
{
|
||||||
$this->setElement('link','',array('href'=>$link));
|
$this->setElement('link','',array('href'=>$link));
|
||||||
$this->setElement('id', FeedWriter::uuid($link,'urn:uuid:'));
|
$this->setElement('id', FeedWriter::uuid($link,'urn:uuid:'));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the 'source' element of feed item
|
* Set the 'source' element of feed item
|
||||||
*
|
*
|
||||||
* @access public
|
* @access public
|
||||||
* @param string The content of 'source' element
|
* @param string The content of 'source' element
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
public function setSource($link)
|
public function setSource($link)
|
||||||
{
|
{
|
||||||
$attributes = array('url'=>$link);
|
$attributes = array('url'=>$link);
|
||||||
$this->setElement('source', "wallabag",$attributes);
|
$this->setElement('source', "wallabag",$attributes);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the 'encloser' element of feed item
|
* Set the 'encloser' element of feed item
|
||||||
* For RSS 2.0 only
|
* For RSS 2.0 only
|
||||||
*
|
*
|
||||||
* @access public
|
* @access public
|
||||||
* @param string The url attribute of encloser tag
|
* @param string The url attribute of encloser tag
|
||||||
* @param string The length attribute of encloser tag
|
* @param string The length attribute of encloser tag
|
||||||
@ -193,6 +199,6 @@
|
|||||||
$attributes = array('url'=>$url, 'length'=>$length, 'type'=>$type);
|
$attributes = array('url'=>$url, 'length'=>$length, 'type'=>$type);
|
||||||
$this->setElement('enclosure','',$attributes);
|
$this->setElement('enclosure','',$attributes);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // end of class FeedItem
|
} // end of class FeedItem
|
||||||
?>
|
?>
|
17
inc/3rdparty/libraries/feedwriter/FeedWriter.php
vendored
17
inc/3rdparty/libraries/feedwriter/FeedWriter.php
vendored
@ -97,15 +97,12 @@ define('JSONP', 3, true);
|
|||||||
header('X-content-type-options: nosniff');
|
header('X-content-type-options: nosniff');
|
||||||
} elseif ($this->version == JSON) {
|
} elseif ($this->version == JSON) {
|
||||||
header('Content-type: application/json; charset=UTF-8');
|
header('Content-type: application/json; charset=UTF-8');
|
||||||
|
$this->json = new stdClass();
|
||||||
} elseif ($this->version == JSONP) {
|
} elseif ($this->version == JSONP) {
|
||||||
header('Content-type: application/javascript; charset=UTF-8');
|
header('Content-type: application/javascript; charset=UTF-8');
|
||||||
|
$this->json = new stdClass();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($this->version == JSON || $this->version == JSONP) {
|
|
||||||
$this->json = new stdClass();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
$this->printHead();
|
$this->printHead();
|
||||||
$this->printChannels();
|
$this->printChannels();
|
||||||
@ -116,6 +113,11 @@ define('JSONP', 3, true);
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function &getItems()
|
||||||
|
{
|
||||||
|
return $this->items;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new FeedItem.
|
* Create a new FeedItem.
|
||||||
*
|
*
|
||||||
@ -199,7 +201,8 @@ define('JSONP', 3, true);
|
|||||||
*/
|
*/
|
||||||
public function setDescription($description)
|
public function setDescription($description)
|
||||||
{
|
{
|
||||||
$this->setChannelElement('description', $description);
|
$tag = ($this->version == ATOM)? 'subtitle' : 'description';
|
||||||
|
$this->setChannelElement($tag, $desciption);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -244,7 +247,7 @@ define('JSONP', 3, true);
|
|||||||
{
|
{
|
||||||
$out = '<?xml version="1.0" encoding="utf-8"?>'."\n";
|
$out = '<?xml version="1.0" encoding="utf-8"?>'."\n";
|
||||||
if ($this->xsl) $out .= '<?xml-stylesheet type="text/xsl" href="'.htmlspecialchars($this->xsl).'"?>' . PHP_EOL;
|
if ($this->xsl) $out .= '<?xml-stylesheet type="text/xsl" href="'.htmlspecialchars($this->xsl).'"?>' . PHP_EOL;
|
||||||
$out .= '<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/">' . PHP_EOL;
|
$out .= '<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/">' . PHP_EOL;
|
||||||
echo $out;
|
echo $out;
|
||||||
}
|
}
|
||||||
elseif ($this->version == JSON || $this->version == JSONP)
|
elseif ($this->version == JSON || $this->version == JSONP)
|
||||||
|
13
inc/3rdparty/libraries/html5/TreeBuilder.php
vendored
13
inc/3rdparty/libraries/html5/TreeBuilder.php
vendored
@ -134,6 +134,7 @@ class HTML5_TreeBuilder {
|
|||||||
|
|
||||||
// Namespaces for foreign content
|
// Namespaces for foreign content
|
||||||
const NS_HTML = null; // to prevent DOM from requiring NS on everything
|
const NS_HTML = null; // to prevent DOM from requiring NS on everything
|
||||||
|
const NS_XHTML = 'http://www.w3.org/1999/xhtml';
|
||||||
const NS_MATHML = 'http://www.w3.org/1998/Math/MathML';
|
const NS_MATHML = 'http://www.w3.org/1998/Math/MathML';
|
||||||
const NS_SVG = 'http://www.w3.org/2000/svg';
|
const NS_SVG = 'http://www.w3.org/2000/svg';
|
||||||
const NS_XLINK = 'http://www.w3.org/1999/xlink';
|
const NS_XLINK = 'http://www.w3.org/1999/xlink';
|
||||||
@ -3157,11 +3158,19 @@ class HTML5_TreeBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private function insertElement($token, $append = true) {
|
private function insertElement($token, $append = true) {
|
||||||
$el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
|
//$el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
|
||||||
|
$namespaceURI = strpos($token['name'], ':') ? self::NS_XHTML : self::NS_HTML;
|
||||||
|
$el = $this->dom->createElementNS($namespaceURI, $token['name']);
|
||||||
|
|
||||||
if (!empty($token['attr'])) {
|
if (!empty($token['attr'])) {
|
||||||
foreach($token['attr'] as $attr) {
|
foreach($token['attr'] as $attr) {
|
||||||
if(!$el->hasAttribute($attr['name'])) {
|
|
||||||
|
// mike@macgirvin.com 2011-11-17, check attribute name for
|
||||||
|
// validity (ignoring extenders and combiners) as illegal chars in names
|
||||||
|
// causes everything to abort
|
||||||
|
|
||||||
|
$valid = preg_match('/^[a-zA-Z\_\:]([\-a-zA-Z0-9\_\:\.]+$)/',$attr['name']);
|
||||||
|
if($attr['name'] && (!$el->hasAttribute($attr['name'])) && ($valid)) {
|
||||||
$el->setAttribute($attr['name'], $attr['value']);
|
$el->setAttribute($attr['name'], $attr['value']);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,404 +1,403 @@
|
|||||||
<?php
|
<?php
|
||||||
/**
|
/**
|
||||||
* Cookie Jar
|
* Cookie Jar
|
||||||
*
|
*
|
||||||
* PHP class for handling cookies, as defined by the Netscape spec:
|
* PHP class for handling cookies, as defined by the Netscape spec:
|
||||||
* <http://curl.haxx.se/rfc/cookie_spec.html>
|
* <http://curl.haxx.se/rfc/cookie_spec.html>
|
||||||
*
|
*
|
||||||
* This class should be used to handle cookies (storing cookies from HTTP response messages, and
|
* This class should be used to handle cookies (storing cookies from HTTP response messages, and
|
||||||
* sending out cookies in HTTP request messages). This has been adapted for FiveFilters.org
|
* sending out cookies in HTTP request messages). This has been adapted for FiveFilters.org
|
||||||
* from the original version used in HTTP Navigator. See http://www.keyvan.net/code/http-navigator/
|
* from the original version used in HTTP Navigator. See http://www.keyvan.net/code/http-navigator/
|
||||||
*
|
*
|
||||||
* This class is mainly based on Cookies.pm <http://search.cpan.org/author/GAAS/libwww-perl-5.65/
|
* This class is mainly based on Cookies.pm <http://search.cpan.org/author/GAAS/libwww-perl-5.65/
|
||||||
* lib/HTTP/Cookies.pm> from the libwww-perl collection <http://www.linpro.no/lwp/>.
|
* lib/HTTP/Cookies.pm> from the libwww-perl collection <http://www.linpro.no/lwp/>.
|
||||||
* Unlike Cookies.pm, this class only supports the Netscape cookie spec, not RFC 2965.
|
* Unlike Cookies.pm, this class only supports the Netscape cookie spec, not RFC 2965.
|
||||||
*
|
*
|
||||||
* @version 0.5
|
* @version 0.5
|
||||||
* @date 2011-03-15
|
* @date 2011-03-15
|
||||||
* @see http://php.net/HttpRequestPool
|
* @see http://php.net/HttpRequestPool
|
||||||
* @author Keyvan Minoukadeh
|
* @author Keyvan Minoukadeh
|
||||||
* @copyright 2011 Keyvan Minoukadeh
|
* @copyright 2011 Keyvan Minoukadeh
|
||||||
* @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
|
* @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class CookieJar
|
class CookieJar
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
* Cookies - array containing all cookies.
|
* Cookies - array containing all cookies.
|
||||||
*
|
*
|
||||||
* <pre>
|
* <pre>
|
||||||
* Cookies are stored like this:
|
* Cookies are stored like this:
|
||||||
* [domain][path][name] = array
|
* [domain][path][name] = array
|
||||||
* where array is:
|
* where array is:
|
||||||
* 0 => value, 1 => secure, 2 => expires
|
* 0 => value, 1 => secure, 2 => expires
|
||||||
* </pre>
|
* </pre>
|
||||||
* @var array
|
* @var array
|
||||||
* @access private
|
* @access private
|
||||||
*/
|
*/
|
||||||
public $cookies = array();
|
public $cookies = array();
|
||||||
public $debug = false;
|
public $debug = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor
|
* Constructor
|
||||||
*/
|
*/
|
||||||
function __construct() {
|
function __construct() {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function debug($msg, $file=null, $line=null) {
|
protected function debug($msg, $file=null, $line=null) {
|
||||||
if ($this->debug) {
|
if ($this->debug) {
|
||||||
$mem = round(memory_get_usage()/1024, 2);
|
$mem = round(memory_get_usage()/1024, 2);
|
||||||
$memPeak = round(memory_get_peak_usage()/1024, 2);
|
$memPeak = round(memory_get_peak_usage()/1024, 2);
|
||||||
echo '* ',$msg;
|
echo '* ',$msg;
|
||||||
if (isset($file, $line)) echo " ($file line $line)";
|
if (isset($file, $line)) echo " ($file line $line)";
|
||||||
echo ' - mem used: ',$mem," (peak: $memPeak)\n";
|
echo ' - mem used: ',$mem," (peak: $memPeak)\n";
|
||||||
ob_flush();
|
ob_flush();
|
||||||
flush();
|
flush();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get matching cookies
|
* Get matching cookies
|
||||||
*
|
*
|
||||||
* Only use this method if you cannot use add_cookie_header(), for example, if you want to use
|
* Only use this method if you cannot use add_cookie_header(), for example, if you want to use
|
||||||
* this cookie jar class without using the request class.
|
* this cookie jar class without using the request class.
|
||||||
*
|
*
|
||||||
* @param array $param associative array containing 'domain', 'path', 'secure' keys
|
* @param array $param associative array containing 'domain', 'path', 'secure' keys
|
||||||
* @return string
|
* @return string
|
||||||
* @see add_cookie_header()
|
* @see add_cookie_header()
|
||||||
*/
|
*/
|
||||||
public function getMatchingCookies($url)
|
public function getMatchingCookies($url)
|
||||||
{
|
{
|
||||||
if (($parts = @parse_url($url)) && isset($parts['scheme'], $parts['host'], $parts['path'])) {
|
if (($parts = @parse_url($url)) && isset($parts['scheme'], $parts['host'], $parts['path'])) {
|
||||||
$param['domain'] = $parts['host'];
|
$param['domain'] = $parts['host'];
|
||||||
$param['path'] = $parts['path'];
|
$param['path'] = $parts['path'];
|
||||||
$param['secure'] = (strtolower($parts['scheme']) == 'https');
|
$param['secure'] = (strtolower($parts['scheme']) == 'https');
|
||||||
unset($parts);
|
unset($parts);
|
||||||
} else {
|
} else {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// RFC 2965 notes:
|
// RFC 2965 notes:
|
||||||
// If multiple cookies satisfy the criteria above, they are ordered in
|
// If multiple cookies satisfy the criteria above, they are ordered in
|
||||||
// the Cookie header such that those with more specific Path attributes
|
// the Cookie header such that those with more specific Path attributes
|
||||||
// precede those with less specific. Ordering with respect to other
|
// precede those with less specific. Ordering with respect to other
|
||||||
// attributes (e.g., Domain) is unspecified.
|
// attributes (e.g., Domain) is unspecified.
|
||||||
$domain = $param['domain'];
|
$domain = $param['domain'];
|
||||||
if (strpos($domain, '.') === false) $domain .= '.local';
|
if (strpos($domain, '.') === false) $domain .= '.local';
|
||||||
$request_path = $param['path'];
|
$request_path = $param['path'];
|
||||||
if ($request_path == '') $request_path = '/';
|
if ($request_path == '') $request_path = '/';
|
||||||
$request_secure = $param['secure'];
|
$request_secure = $param['secure'];
|
||||||
$now = time();
|
$now = time();
|
||||||
$matched_cookies = array();
|
$matched_cookies = array();
|
||||||
// domain - find matching domains
|
// domain - find matching domains
|
||||||
$this->debug('Finding matching domains for '.$domain, __FILE__, __LINE__);
|
$this->debug('Finding matching domains for '.$domain, __FILE__, __LINE__);
|
||||||
while (strpos($domain, '.') !== false) {
|
while (strpos($domain, '.') !== false) {
|
||||||
if (isset($this->cookies[$domain])) {
|
if (isset($this->cookies[$domain])) {
|
||||||
$this->debug(' domain match found: '.$domain);
|
$this->debug(' domain match found: '.$domain);
|
||||||
$cookies =& $this->cookies[$domain];
|
$cookies =& $this->cookies[$domain];
|
||||||
} else {
|
} else {
|
||||||
$domain = $this->_reduce_domain($domain);
|
$domain = $this->_reduce_domain($domain);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// paths - find matching paths starting from most specific
|
// paths - find matching paths starting from most specific
|
||||||
$this->debug(' - Finding matching paths for '.$request_path);
|
$this->debug(' - Finding matching paths for '.$request_path);
|
||||||
$paths = array_keys($cookies);
|
$paths = array_keys($cookies);
|
||||||
usort($paths, array($this, '_cmp_length'));
|
usort($paths, array($this, '_cmp_length'));
|
||||||
foreach ($paths as $path) {
|
foreach ($paths as $path) {
|
||||||
// continue to next cookie if request path does not path-match cookie path
|
// continue to next cookie if request path does not path-match cookie path
|
||||||
if (!$this->_path_match($request_path, $path)) continue;
|
if (!$this->_path_match($request_path, $path)) continue;
|
||||||
// loop through cookie names
|
// loop through cookie names
|
||||||
$this->debug(' path match found: '.$path);
|
$this->debug(' path match found: '.$path);
|
||||||
foreach ($cookies[$path] as $name => $values) {
|
foreach ($cookies[$path] as $name => $values) {
|
||||||
// if this cookie is secure but request isn't, continue to next cookie
|
// if this cookie is secure but request isn't, continue to next cookie
|
||||||
if ($values[1] && !$request_secure) continue;
|
if ($values[1] && !$request_secure) continue;
|
||||||
// if cookie is not a session cookie and has expired, continue to next cookie
|
// if cookie is not a session cookie and has expired, continue to next cookie
|
||||||
if (is_int($values[2]) && ($values[2] < $now)) continue;
|
if (is_int($values[2]) && ($values[2] < $now)) continue;
|
||||||
// cookie matches request
|
// cookie matches request
|
||||||
$this->debug(' cookie match: '.$name.'='.$values[0]);
|
$this->debug(' cookie match: '.$name.'='.$values[0]);
|
||||||
$matched_cookies[] = $name.'='.$values[0];
|
$matched_cookies[] = $name.'='.$values[0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
$domain = $this->_reduce_domain($domain);
|
$domain = $this->_reduce_domain($domain);
|
||||||
}
|
}
|
||||||
// return cookies
|
// return cookies
|
||||||
return implode('; ', $matched_cookies);
|
return implode('; ', $matched_cookies);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse Set-Cookie values.
|
* Parse Set-Cookie values.
|
||||||
*
|
*
|
||||||
* Only use this method if you cannot use extract_cookies(), for example, if you want to use
|
* Only use this method if you cannot use extract_cookies(), for example, if you want to use
|
||||||
* this cookie jar class without using the response class.
|
* this cookie jar class without using the response class.
|
||||||
*
|
*
|
||||||
* @param array $set_cookies array holding 1 or more "Set-Cookie" header values
|
* @param array $set_cookies array holding 1 or more "Set-Cookie" header values
|
||||||
* @param array $param associative array containing 'host', 'path' keys
|
* @param array $param associative array containing 'host', 'path' keys
|
||||||
* @return void
|
* @return void
|
||||||
* @see extract_cookies()
|
* @see extract_cookies()
|
||||||
*/
|
*/
|
||||||
public function storeCookies($url, $set_cookies)
|
public function storeCookies($url, $set_cookies)
|
||||||
{
|
{
|
||||||
if (count($set_cookies) == 0) return;
|
if (count($set_cookies) == 0) return;
|
||||||
$param = @parse_url($url);
|
$param = @parse_url($url);
|
||||||
if (!is_array($param) || !isset($param['host'])) return;
|
if (!is_array($param) || !isset($param['host'])) return;
|
||||||
$request_host = $param['host'];
|
$request_host = $param['host'];
|
||||||
if (strpos($request_host, '.') === false) $request_host .= '.local';
|
if (strpos($request_host, '.') === false) $request_host .= '.local';
|
||||||
$request_path = @$param['path'];
|
$request_path = @$param['path'];
|
||||||
if ($request_path == '') $request_path = '/';
|
if ($request_path == '') $request_path = '/';
|
||||||
//
|
//
|
||||||
// loop through set-cookie headers
|
// loop through set-cookie headers
|
||||||
//
|
//
|
||||||
foreach ($set_cookies as $set_cookie) {
|
foreach ($set_cookies as $set_cookie) {
|
||||||
$this->debug('Parsing: '.$set_cookie);
|
$this->debug('Parsing: '.$set_cookie);
|
||||||
// temporary cookie store (before adding to jar)
|
// temporary cookie store (before adding to jar)
|
||||||
$tmp_cookie = array();
|
$tmp_cookie = array();
|
||||||
$param = explode(';', $set_cookie);
|
$param = explode(';', $set_cookie);
|
||||||
// loop through params
|
// loop through params
|
||||||
for ($x=0; $x<count($param); $x++) {
|
for ($x=0; $x<count($param); $x++) {
|
||||||
$key_val = explode('=', $param[$x], 2);
|
$key_val = explode('=', $param[$x], 2);
|
||||||
if (count($key_val) != 2) {
|
if (count($key_val) != 2) {
|
||||||
// if the first param isn't a name=value pair, continue to the next set-cookie
|
// if the first param isn't a name=value pair, continue to the next set-cookie
|
||||||
// header
|
// header
|
||||||
if ($x == 0) continue 2;
|
if ($x == 0) continue 2;
|
||||||
// check for secure flag
|
// check for secure flag
|
||||||
if (strtolower(trim($key_val[0])) == 'secure') $tmp_cookie['secure'] = true;
|
if (strtolower(trim($key_val[0])) == 'secure') $tmp_cookie['secure'] = true;
|
||||||
// continue to next param
|
// continue to next param
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
list($key, $val) = array_map('trim', $key_val);
|
list($key, $val) = array_map('trim', $key_val);
|
||||||
// first name=value pair is the cookie name and value
|
// first name=value pair is the cookie name and value
|
||||||
// the name and value are stored under 'name' and 'value' to avoid conflicts
|
// the name and value are stored under 'name' and 'value' to avoid conflicts
|
||||||
// with later parameters.
|
// with later parameters.
|
||||||
if ($x == 0) {
|
if ($x == 0) {
|
||||||
$tmp_cookie = array('name'=>$key, 'value'=>$val);
|
$tmp_cookie = array('name'=>$key, 'value'=>$val);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
$key = strtolower($key);
|
$key = strtolower($key);
|
||||||
if (in_array($key, array('expires', 'path', 'domain', 'secure'))) {
|
if (in_array($key, array('expires', 'path', 'domain', 'secure'))) {
|
||||||
$tmp_cookie[$key] = $val;
|
$tmp_cookie[$key] = $val;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//
|
//
|
||||||
// set cookie
|
// set cookie
|
||||||
//
|
//
|
||||||
// check domain
|
// check domain
|
||||||
if (isset($tmp_cookie['domain']) && ($tmp_cookie['domain'] != $request_host) &&
|
if (isset($tmp_cookie['domain']) && ($tmp_cookie['domain'] != $request_host) &&
|
||||||
($tmp_cookie['domain'] != ".$request_host")) {
|
($tmp_cookie['domain'] != ".$request_host")) {
|
||||||
$domain = $tmp_cookie['domain'];
|
$domain = $tmp_cookie['domain'];
|
||||||
if ((strpos($domain, '.') === false) && ($domain != 'local')) {
|
if ((strpos($domain, '.') === false) && ($domain != 'local')) {
|
||||||
$this->debug(' - domain "'.$domain.'" has no dot and is not a local domain');
|
$this->debug(' - domain "'.$domain.'" has no dot and is not a local domain');
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (preg_match('/\.[0-9]+$/', $domain)) {
|
if (preg_match('/\.[0-9]+$/', $domain)) {
|
||||||
$this->debug(' - domain "'.$domain.'" appears to be an ip address');
|
$this->debug(' - domain "'.$domain.'" appears to be an ip address');
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (substr($domain, 0, 1) != '.') $domain = ".$domain";
|
if (substr($domain, 0, 1) != '.') $domain = ".$domain";
|
||||||
if (!$this->_domain_match($request_host, $domain)) {
|
if (!$this->_domain_match($request_host, $domain)) {
|
||||||
$this->debug(' - request host "'.$request_host.'" does not domain-match "'.$domain.'"');
|
$this->debug(' - request host "'.$request_host.'" does not domain-match "'.$domain.'"');
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// if domain is not specified in the set-cookie header, domain will default to
|
// if domain is not specified in the set-cookie header, domain will default to
|
||||||
// the request host
|
// the request host
|
||||||
$domain = $request_host;
|
$domain = $request_host;
|
||||||
}
|
}
|
||||||
// check path
|
// check path
|
||||||
if (isset($tmp_cookie['path']) && ($tmp_cookie['path'] != '')) {
|
if (isset($tmp_cookie['path']) && ($tmp_cookie['path'] != '')) {
|
||||||
$path = urldecode($tmp_cookie['path']);
|
$path = urldecode($tmp_cookie['path']);
|
||||||
if (!$this->_path_match($request_path, $path)) {
|
if (!$this->_path_match($request_path, $path)) {
|
||||||
$this->debug(' - request path "'.$request_path.'" does not path-match "'.$path.'"');
|
$this->debug(' - request path "'.$request_path.'" does not path-match "'.$path.'"');
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
$path = $request_path;
|
$path = $request_path;
|
||||||
$path = substr($path, 0, strrpos($path, '/'));
|
$path = substr($path, 0, strrpos($path, '/'));
|
||||||
if ($path == '') $path = '/';
|
if ($path == '') $path = '/';
|
||||||
}
|
}
|
||||||
// check if secure
|
// check if secure
|
||||||
$secure = (isset($tmp_cookie['secure'])) ? true : false;
|
$secure = (isset($tmp_cookie['secure'])) ? true : false;
|
||||||
// check expiry
|
// check expiry
|
||||||
if (isset($tmp_cookie['expires'])) {
|
if (isset($tmp_cookie['expires'])) {
|
||||||
if (($expires = strtotime($tmp_cookie['expires'])) < 0) {
|
if (($expires = strtotime($tmp_cookie['expires'])) < 0) {
|
||||||
$expires = null;
|
$expires = null;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
$expires = null;
|
$expires = null;
|
||||||
}
|
}
|
||||||
// set cookie
|
// set cookie
|
||||||
$this->set_cookie($domain, $path, $tmp_cookie['name'], $tmp_cookie['value'], $secure, $expires);
|
$this->set_cookie($domain, $path, $tmp_cookie['name'], $tmp_cookie['value'], $secure, $expires);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// return array of set-cookie values extracted from HTTP response headers (string $h)
|
// return array of set-cookie values extracted from HTTP response headers (string $h)
|
||||||
public function extractCookies($h) {
|
public function extractCookies($h) {
|
||||||
$x = 0;
|
$x = 0;
|
||||||
$lines = 0;
|
$lines = 0;
|
||||||
$headers = array();
|
$headers = array();
|
||||||
$last_match = false;
|
$last_match = false;
|
||||||
$h = explode("\n", $h);
|
$h = explode("\n", $h);
|
||||||
foreach ($h as $line) {
|
foreach ($h as $line) {
|
||||||
$line = rtrim($line);
|
$line = rtrim($line);
|
||||||
$lines++;
|
$lines++;
|
||||||
|
|
||||||
$trimmed_line = trim($line);
|
$trimmed_line = trim($line);
|
||||||
if (isset($line_last)) {
|
if (isset($line_last)) {
|
||||||
// check if we have \r\n\r\n (indicating the end of headers)
|
// check if we have \r\n\r\n (indicating the end of headers)
|
||||||
// some servers will not use CRLF (\r\n), so we make CR (\r) optional.
|
// some servers will not use CRLF (\r\n), so we make CR (\r) optional.
|
||||||
// if (preg_match('/\015?\012\015?\012/', $line_last.$line)) {
|
// if (preg_match('/\015?\012\015?\012/', $line_last.$line)) {
|
||||||
// break;
|
// break;
|
||||||
// }
|
// }
|
||||||
// As an alternative, we can check if the current trimmed line is empty
|
// As an alternative, we can check if the current trimmed line is empty
|
||||||
if ($trimmed_line == '') {
|
if ($trimmed_line == '') {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// check for continuation line...
|
// check for continuation line...
|
||||||
// RFC 2616 Section 2.2 "Basic Rules":
|
// RFC 2616 Section 2.2 "Basic Rules":
|
||||||
// HTTP/1.1 header field values can be folded onto multiple lines if the
|
// HTTP/1.1 header field values can be folded onto multiple lines if the
|
||||||
// continuation line begins with a space or horizontal tab. All linear
|
// continuation line begins with a space or horizontal tab. All linear
|
||||||
// white space, including folding, has the same semantics as SP. A
|
// white space, including folding, has the same semantics as SP. A
|
||||||
// recipient MAY replace any linear white space with a single SP before
|
// recipient MAY replace any linear white space with a single SP before
|
||||||
// interpreting the field value or forwarding the message downstream.
|
// interpreting the field value or forwarding the message downstream.
|
||||||
if ($last_match && preg_match('/^\s+(.*)/', $line, $match)) {
|
if ($last_match && preg_match('/^\s+(.*)/', $line, $match)) {
|
||||||
// append to previous header value
|
// append to previous header value
|
||||||
$headers[$x-1] .= ' '.rtrim($match[1]);
|
$headers[$x-1] .= ' '.rtrim($match[1]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
$line_last = $line;
|
$line_last = $line;
|
||||||
|
|
||||||
// split header name and value
|
// split header name and value
|
||||||
if (preg_match('/^Set-Cookie\s*:\s*(.*)/i', $line, $match)) {
|
if (preg_match('/^Set-Cookie\s*:\s*(.*)/i', $line, $match)) {
|
||||||
$headers[$x++] = rtrim($match[1]);
|
$headers[$x++] = rtrim($match[1]);
|
||||||
$last_match = true;
|
$last_match = true;
|
||||||
} else {
|
} else {
|
||||||
$last_match = false;
|
$last_match = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return $headers;
|
return $headers;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set Cookie
|
* Set Cookie
|
||||||
* @param string $domain
|
* @param string $domain
|
||||||
* @param string $path
|
* @param string $path
|
||||||
* @param string $name cookie name
|
* @param string $name cookie name
|
||||||
* @param string $value cookie value
|
* @param string $value cookie value
|
||||||
* @param bool $secure
|
* @param bool $secure
|
||||||
* @param int $expires expiry time (null if session cookie, <= 0 will delete cookie)
|
* @param int $expires expiry time (null if session cookie, <= 0 will delete cookie)
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
function set_cookie($domain, $path, $name, $value, $secure=false, $expires=null)
|
function set_cookie($domain, $path, $name, $value, $secure=false, $expires=null)
|
||||||
{
|
{
|
||||||
if ($domain == '') return;
|
if ($domain == '') return;
|
||||||
if ($path == '') return;
|
if ($path == '') return;
|
||||||
if ($name == '') return;
|
if ($name == '') return;
|
||||||
// check if cookie needs to go
|
// check if cookie needs to go
|
||||||
if (isset($expires) && ($expires <= 0)) {
|
if (isset($expires) && ($expires <= 0)) {
|
||||||
if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
|
if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if ($value == '') return;
|
if ($value == '') return;
|
||||||
$this->cookies[$domain][$path][$name] = array($value, $secure, $expires);
|
$this->cookies[$domain][$path][$name] = array($value, $secure, $expires);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Clear cookies - [domain [,path [,name]]] - call method with no arguments to clear all cookies.
|
* Clear cookies - [domain [,path [,name]]] - call method with no arguments to clear all cookies.
|
||||||
* @param string $domain
|
* @param string $domain
|
||||||
* @param string $path
|
* @param string $path
|
||||||
* @param string $name
|
* @param string $name
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
function clear($domain=null, $path=null, $name=null)
|
function clear($domain=null, $path=null, $name=null)
|
||||||
{
|
{
|
||||||
if (!isset($domain)) {
|
if (!isset($domain)) {
|
||||||
$this->cookies = array();
|
$this->cookies = array();
|
||||||
} elseif (!isset($path)) {
|
} elseif (!isset($path)) {
|
||||||
if (isset($this->cookies[$domain])) unset($this->cookies[$domain]);
|
if (isset($this->cookies[$domain])) unset($this->cookies[$domain]);
|
||||||
} elseif (!isset($name)) {
|
} elseif (!isset($name)) {
|
||||||
if (isset($this->cookies[$domain][$path])) unset($this->cookies[$domain][$path]);
|
if (isset($this->cookies[$domain][$path])) unset($this->cookies[$domain][$path]);
|
||||||
} elseif (isset($name)) {
|
} elseif (isset($name)) {
|
||||||
if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
|
if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compare string length - used for sorting
|
* Compare string length - used for sorting
|
||||||
* @access private
|
* @access private
|
||||||
* @return int
|
* @return int
|
||||||
*/
|
*/
|
||||||
function _cmp_length($a, $b)
|
function _cmp_length($a, $b)
|
||||||
{
|
{
|
||||||
$la = strlen($a); $lb = strlen($b);
|
$la = strlen($a); $lb = strlen($b);
|
||||||
if ($la == $lb) return 0;
|
if ($la == $lb) return 0;
|
||||||
return ($la > $lb) ? -1 : 1;
|
return ($la > $lb) ? -1 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reduce domain
|
* Reduce domain
|
||||||
* @param string $domain
|
* @param string $domain
|
||||||
* @return string
|
* @return string
|
||||||
* @access private
|
* @access private
|
||||||
*/
|
*/
|
||||||
function _reduce_domain($domain)
|
function _reduce_domain($domain)
|
||||||
{
|
{
|
||||||
if ($domain == '') return '';
|
if ($domain == '') return '';
|
||||||
if (substr($domain, 0, 1) == '.') return substr($domain, 1);
|
if (substr($domain, 0, 1) == '.') return substr($domain, 1);
|
||||||
return substr($domain, strpos($domain, '.'));
|
return substr($domain, strpos($domain, '.'));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Path match - check if path1 path-matches path2
|
* Path match - check if path1 path-matches path2
|
||||||
*
|
*
|
||||||
* From RFC 2965:
|
* From RFC 2965:
|
||||||
* <i>For two strings that represent paths, P1 and P2, P1 path-matches P2
|
* <i>For two strings that represent paths, P1 and P2, P1 path-matches P2
|
||||||
* if P2 is a prefix of P1 (including the case where P1 and P2 string-
|
* if P2 is a prefix of P1 (including the case where P1 and P2 string-
|
||||||
* compare equal). Thus, the string /tec/waldo path-matches /tec.</i>
|
* compare equal). Thus, the string /tec/waldo path-matches /tec.</i>
|
||||||
* @param string $path1
|
* @param string $path1
|
||||||
* @param string $path2
|
* @param string $path2
|
||||||
* @return bool
|
* @return bool
|
||||||
* @access private
|
* @access private
|
||||||
*/
|
*/
|
||||||
function _path_match($path1, $path2)
|
function _path_match($path1, $path2)
|
||||||
{
|
{
|
||||||
return (substr($path1, 0, strlen($path2)) == $path2);
|
return (substr($path1, 0, strlen($path2)) == $path2);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Domain match - check if domain1 domain-matches domain2
|
* Domain match - check if domain1 domain-matches domain2
|
||||||
*
|
*
|
||||||
* A few extracts from RFC 2965:
|
* A few extracts from RFC 2965:
|
||||||
* - A Set-Cookie2 from request-host y.x.foo.com for Domain=.foo.com
|
* - A Set-Cookie2 from request-host y.x.foo.com for Domain=.foo.com
|
||||||
* would be rejected, because H is y.x and contains a dot.
|
* would be rejected, because H is y.x and contains a dot.
|
||||||
*
|
*
|
||||||
* - A Set-Cookie2 from request-host x.foo.com for Domain=.foo.com
|
* - A Set-Cookie2 from request-host x.foo.com for Domain=.foo.com
|
||||||
* would be accepted.
|
* would be accepted.
|
||||||
*
|
*
|
||||||
* - A Set-Cookie2 with Domain=.com or Domain=.com., will always be
|
* - A Set-Cookie2 with Domain=.com or Domain=.com., will always be
|
||||||
* rejected, because there is no embedded dot.
|
* rejected, because there is no embedded dot.
|
||||||
*
|
*
|
||||||
* - A Set-Cookie2 from request-host example for Domain=.local will
|
* - A Set-Cookie2 from request-host example for Domain=.local will
|
||||||
* be accepted, because the effective host name for the request-
|
* be accepted, because the effective host name for the request-
|
||||||
* host is example.local, and example.local domain-matches .local.
|
* host is example.local, and example.local domain-matches .local.
|
||||||
*
|
*
|
||||||
* I'm ignoring the first point for now (must check to see how other browsers handle
|
* I'm ignoring the first point for now (must check to see how other browsers handle
|
||||||
* this rule for Set-Cookie headers)
|
* this rule for Set-Cookie headers)
|
||||||
*
|
*
|
||||||
* @param string $domain1
|
* @param string $domain1
|
||||||
* @param string $domain2
|
* @param string $domain2
|
||||||
* @return bool
|
* @return bool
|
||||||
* @access private
|
* @access private
|
||||||
*/
|
*/
|
||||||
function _domain_match($domain1, $domain2)
|
function _domain_match($domain1, $domain2)
|
||||||
{
|
{
|
||||||
$domain1 = strtolower($domain1);
|
$domain1 = strtolower($domain1);
|
||||||
$domain2 = strtolower($domain2);
|
$domain2 = strtolower($domain2);
|
||||||
while (strpos($domain1, '.') !== false) {
|
while (strpos($domain1, '.') !== false) {
|
||||||
if ($domain1 == $domain2) return true;
|
if ($domain1 == $domain2) return true;
|
||||||
$domain1 = $this->_reduce_domain($domain1);
|
$domain1 = $this->_reduce_domain($domain1);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
?>
|
|
File diff suppressed because it is too large
Load Diff
@ -1,79 +1,78 @@
|
|||||||
<?php
|
<?php
|
||||||
/**
|
/**
|
||||||
* Humble HTTP Agent extension for SimplePie_File
|
* Humble HTTP Agent extension for SimplePie_File
|
||||||
*
|
*
|
||||||
* This class is designed to extend and override SimplePie_File
|
* This class is designed to extend and override SimplePie_File
|
||||||
* in order to prevent duplicate HTTP requests being sent out.
|
* in order to prevent duplicate HTTP requests being sent out.
|
||||||
* The idea is to initialise an instance of Humble HTTP Agent
|
* The idea is to initialise an instance of Humble HTTP Agent
|
||||||
* and attach it, to a static class variable, of this class.
|
* and attach it, to a static class variable, of this class.
|
||||||
* SimplePie will then automatically initialise this class
|
* SimplePie will then automatically initialise this class
|
||||||
*
|
*
|
||||||
* @date 2011-02-28
|
* @date 2011-02-28
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class SimplePie_HumbleHttpAgent extends SimplePie_File
|
class SimplePie_HumbleHttpAgent extends SimplePie_File
|
||||||
{
|
{
|
||||||
protected static $agent;
|
protected static $agent;
|
||||||
var $url;
|
var $url;
|
||||||
var $useragent;
|
var $useragent;
|
||||||
var $success = true;
|
var $success = true;
|
||||||
var $headers = array();
|
var $headers = array();
|
||||||
var $body;
|
var $body;
|
||||||
var $status_code;
|
var $status_code;
|
||||||
var $redirects = 0;
|
var $redirects = 0;
|
||||||
var $error;
|
var $error;
|
||||||
var $method = SIMPLEPIE_FILE_SOURCE_NONE;
|
var $method = SIMPLEPIE_FILE_SOURCE_NONE;
|
||||||
|
|
||||||
public static function set_agent(HumbleHttpAgent $agent) {
|
public static function set_agent(HumbleHttpAgent $agent) {
|
||||||
self::$agent = $agent;
|
self::$agent = $agent;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false) {
|
public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false) {
|
||||||
if (class_exists('idna_convert'))
|
if (class_exists('idna_convert'))
|
||||||
{
|
{
|
||||||
$idn = new idna_convert();
|
$idn = new idna_convert();
|
||||||
$parsed = SimplePie_Misc::parse_url($url);
|
$parsed = SimplePie_Misc::parse_url($url);
|
||||||
$url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']);
|
$url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']);
|
||||||
}
|
}
|
||||||
$this->url = $url;
|
$this->url = $url;
|
||||||
$this->useragent = $useragent;
|
$this->useragent = $useragent;
|
||||||
if (preg_match('/^http(s)?:\/\//i', $url))
|
if (preg_match('/^http(s)?:\/\//i', $url))
|
||||||
{
|
{
|
||||||
if (!is_array($headers))
|
if (!is_array($headers))
|
||||||
{
|
{
|
||||||
$headers = array();
|
$headers = array();
|
||||||
}
|
}
|
||||||
$this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL;
|
$this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL;
|
||||||
$headers2 = array();
|
$headers2 = array();
|
||||||
foreach ($headers as $key => $value) {
|
foreach ($headers as $key => $value) {
|
||||||
$headers2[] = "$key: $value";
|
$headers2[] = "$key: $value";
|
||||||
}
|
}
|
||||||
//TODO: allow for HTTP headers
|
//TODO: allow for HTTP headers
|
||||||
// curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2);
|
// curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2);
|
||||||
|
|
||||||
$response = self::$agent->get($url);
|
$response = self::$agent->get($url);
|
||||||
|
|
||||||
if ($response === false || !isset($response['status_code'])) {
|
if ($response === false || !isset($response['status_code'])) {
|
||||||
$this->error = 'failed to fetch URL';
|
$this->error = 'failed to fetch URL';
|
||||||
$this->success = false;
|
$this->success = false;
|
||||||
} else {
|
} else {
|
||||||
// The extra lines at the end are there to satisfy SimplePie's HTTP parser.
|
// The extra lines at the end are there to satisfy SimplePie's HTTP parser.
|
||||||
// The class expects a full HTTP message, whereas we're giving it only
|
// The class expects a full HTTP message, whereas we're giving it only
|
||||||
// headers - the new lines indicate the start of the body.
|
// headers - the new lines indicate the start of the body.
|
||||||
$parser = new SimplePie_HTTP_Parser($response['headers']."\r\n\r\n");
|
$parser = new SimplePie_HTTP_Parser($response['headers']."\r\n\r\n");
|
||||||
if ($parser->parse()) {
|
if ($parser->parse()) {
|
||||||
$this->headers = $parser->headers;
|
$this->headers = $parser->headers;
|
||||||
//$this->body = $parser->body;
|
//$this->body = $parser->body;
|
||||||
$this->body = $response['body'];
|
$this->body = $response['body'];
|
||||||
$this->status_code = $parser->status_code;
|
$this->status_code = $parser->status_code;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
$this->error = 'invalid URL';
|
$this->error = 'invalid URL';
|
||||||
$this->success = false;
|
$this->success = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
?>
|
|
File diff suppressed because it is too large
Load Diff
2274
inc/3rdparty/libraries/readability/Readability.php
vendored
2274
inc/3rdparty/libraries/readability/Readability.php
vendored
File diff suppressed because it is too large
Load Diff
361
inc/3rdparty/makefulltextfeed.php
vendored
361
inc/3rdparty/makefulltextfeed.php
vendored
@ -3,8 +3,8 @@
|
|||||||
// Author: Keyvan Minoukadeh
|
// Author: Keyvan Minoukadeh
|
||||||
// Copyright (c) 2013 Keyvan Minoukadeh
|
// Copyright (c) 2013 Keyvan Minoukadeh
|
||||||
// License: AGPLv3
|
// License: AGPLv3
|
||||||
// Version: 3.1
|
// Version: 3.2
|
||||||
// Date: 2013-03-05
|
// Date: 2013-05-13
|
||||||
// More info: http://fivefilters.org/content-only/
|
// More info: http://fivefilters.org/content-only/
|
||||||
// Help: http://help.fivefilters.org
|
// Help: http://help.fivefilters.org
|
||||||
|
|
||||||
@ -25,12 +25,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
// Usage
|
// Usage
|
||||||
// -----
|
// -----
|
||||||
// Request this file passing it your feed in the querystring: makefulltextfeed.php?url=mysite.org
|
// Request this file passing it a web page or feed URL in the querystring: makefulltextfeed.php?url=example.org/article
|
||||||
// The following options can be passed in the querystring:
|
// For more request parameters, see http://help.fivefilters.org/customer/portal/articles/226660-usage
|
||||||
// * URL: url=[feed or website url] (required, should be URL-encoded - in php: urlencode($url))
|
|
||||||
// * URL points to HTML (not feed): html=true (optional, by default it's automatically detected)
|
|
||||||
// * API key: key=[api key] (optional, refer to config.php)
|
|
||||||
// * Max entries to process: max=[max number of items] (optional)
|
|
||||||
|
|
||||||
error_reporting(E_ALL ^ E_NOTICE);
|
error_reporting(E_ALL ^ E_NOTICE);
|
||||||
ini_set("display_errors", 1);
|
ini_set("display_errors", 1);
|
||||||
@ -76,8 +72,8 @@ header('X-Robots-Tag: noindex, nofollow');
|
|||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
// Check if service is enabled
|
// Check if service is enabled
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
if (!$options->enabled) {
|
if (!$options->enabled) {
|
||||||
die('The full-text RSS service is currently disabled');
|
die('The full-text RSS service is currently disabled');
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
@ -121,8 +117,8 @@ $options->smart_cache = $options->smart_cache && function_exists('apc_inc');
|
|||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
// Check for feed URL
|
// Check for feed URL
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
if (!isset($_GET['url'])) {
|
if (!isset($_GET['url'])) {
|
||||||
die('No URL supplied');
|
die('No URL supplied');
|
||||||
}
|
}
|
||||||
$url = trim($_GET['url']);
|
$url = trim($_GET['url']);
|
||||||
if (strtolower(substr($url, 0, 7)) == 'feed://') {
|
if (strtolower(substr($url, 0, 7)) == 'feed://') {
|
||||||
@ -161,10 +157,12 @@ if (isset($_GET['key']) && ($key_index = array_search($_GET['key'], $options->ap
|
|||||||
if (isset($_GET['links'])) $redirect .= '&links='.urlencode($_GET['links']);
|
if (isset($_GET['links'])) $redirect .= '&links='.urlencode($_GET['links']);
|
||||||
if (isset($_GET['exc'])) $redirect .= '&exc='.urlencode($_GET['exc']);
|
if (isset($_GET['exc'])) $redirect .= '&exc='.urlencode($_GET['exc']);
|
||||||
if (isset($_GET['format'])) $redirect .= '&format='.urlencode($_GET['format']);
|
if (isset($_GET['format'])) $redirect .= '&format='.urlencode($_GET['format']);
|
||||||
if (isset($_GET['callback'])) $redirect .= '&callback='.urlencode($_GET['callback']);
|
if (isset($_GET['callback'])) $redirect .= '&callback='.urlencode($_GET['callback']);
|
||||||
if (isset($_GET['l'])) $redirect .= '&l='.urlencode($_GET['l']);
|
if (isset($_GET['l'])) $redirect .= '&l='.urlencode($_GET['l']);
|
||||||
if (isset($_GET['xss'])) $redirect .= '&xss';
|
if (isset($_GET['xss'])) $redirect .= '&xss';
|
||||||
if (isset($_GET['use_extracted_title'])) $redirect .= '&use_extracted_title';
|
if (isset($_GET['use_extracted_title'])) $redirect .= '&use_extracted_title';
|
||||||
|
if (isset($_GET['content'])) $redirect .= '&content='.urlencode($_GET['content']);
|
||||||
|
if (isset($_GET['summary'])) $redirect .= '&summary='.urlencode($_GET['summary']);
|
||||||
if (isset($_GET['debug'])) $redirect .= '&debug';
|
if (isset($_GET['debug'])) $redirect .= '&debug';
|
||||||
if ($debug_mode) {
|
if ($debug_mode) {
|
||||||
debug('Redirecting to hide access key, follow URL below to continue');
|
debug('Redirecting to hide access key, follow URL below to continue');
|
||||||
@ -177,7 +175,7 @@ if (isset($_GET['key']) && ($key_index = array_search($_GET['key'], $options->ap
|
|||||||
|
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
// Set timezone.
|
// Set timezone.
|
||||||
// Prevents warnings, but needs more testing -
|
// Prevents warnings, but needs more testing -
|
||||||
// perhaps if timezone is set in php.ini we
|
// perhaps if timezone is set in php.ini we
|
||||||
// don't need to set it at all...
|
// don't need to set it at all...
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
@ -199,7 +197,7 @@ if (isset($_GET['key']) && isset($_GET['hash']) && isset($options->api_keys[(int
|
|||||||
}
|
}
|
||||||
$key_index = ($valid_key) ? (int)$_GET['key'] : 0;
|
$key_index = ($valid_key) ? (int)$_GET['key'] : 0;
|
||||||
if (!$valid_key && $options->key_required) {
|
if (!$valid_key && $options->key_required) {
|
||||||
die('A valid key must be supplied');
|
die('A valid key must be supplied');
|
||||||
}
|
}
|
||||||
if (!$valid_key && isset($_GET['key']) && $_GET['key'] != '') {
|
if (!$valid_key && isset($_GET['key']) && $_GET['key'] != '') {
|
||||||
die('The entered key is invalid');
|
die('The entered key is invalid');
|
||||||
@ -250,6 +248,28 @@ if ($options->favour_feed_titles == 'user') {
|
|||||||
$favour_feed_titles = $options->favour_feed_titles;
|
$favour_feed_titles = $options->favour_feed_titles;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////
|
||||||
|
// Include full content in output?
|
||||||
|
///////////////////////////////////////////////
|
||||||
|
if ($options->content === 'user') {
|
||||||
|
if (isset($_GET['content']) && $_GET['content'] === '0') {
|
||||||
|
$options->content = false;
|
||||||
|
} else {
|
||||||
|
$options->content = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////
|
||||||
|
// Include summaries in output?
|
||||||
|
///////////////////////////////////////////////
|
||||||
|
if ($options->summary === 'user') {
|
||||||
|
if (isset($_GET['summary']) && $_GET['summary'] === '1') {
|
||||||
|
$options->summary = true;
|
||||||
|
} else {
|
||||||
|
$options->summary = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
// Exclude items if extraction fails
|
// Exclude items if extraction fails
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
@ -272,15 +292,6 @@ if ($options->detect_language === 'user') {
|
|||||||
$detect_language = $options->detect_language;
|
$detect_language = $options->detect_language;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($detect_language >= 2) {
|
|
||||||
$language_codes = array('albanian' => 'sq','arabic' => 'ar','azeri' => 'az','bengali' => 'bn','bulgarian' => 'bg',
|
|
||||||
'cebuano' => 'ceb', // ISO 639-2
|
|
||||||
'croatian' => 'hr','czech' => 'cs','danish' => 'da','dutch' => 'nl','english' => 'en','estonian' => 'et','farsi' => 'fa','finnish' => 'fi','french' => 'fr','german' => 'de','hausa' => 'ha',
|
|
||||||
'hawaiian' => 'haw', // ISO 639-2
|
|
||||||
'hindi' => 'hi','hungarian' => 'hu','icelandic' => 'is','indonesian' => 'id','italian' => 'it','kazakh' => 'kk','kyrgyz' => 'ky','latin' => 'la','latvian' => 'lv','lithuanian' => 'lt','macedonian' => 'mk','mongolian' => 'mn','nepali' => 'ne','norwegian' => 'no','pashto' => 'ps',
|
|
||||||
'pidgin' => 'cpe', // ISO 639-2
|
|
||||||
'polish' => 'pl','portuguese' => 'pt','romanian' => 'ro','russian' => 'ru','serbian' => 'sr','slovak' => 'sk','slovene' => 'sl','somali' => 'so','spanish' => 'es','swahili' => 'sw','swedish' => 'sv','tagalog' => 'tl','turkish' => 'tr','ukrainian' => 'uk','urdu' => 'ur','uzbek' => 'uz','vietnamese' => 'vi','welsh' => 'cy');
|
|
||||||
}
|
|
||||||
$use_cld = extension_loaded('cld') && (version_compare(PHP_VERSION, '5.3.0') >= 0);
|
$use_cld = extension_loaded('cld') && (version_compare(PHP_VERSION, '5.3.0') >= 0);
|
||||||
|
|
||||||
/////////////////////////////////////
|
/////////////////////////////////////
|
||||||
@ -330,7 +341,7 @@ if ($options->cors) header('Access-Control-Allow-Origin: *');
|
|||||||
//////////////////////////////////
|
//////////////////////////////////
|
||||||
if ($options->caching) {
|
if ($options->caching) {
|
||||||
debug('Caching is enabled...');
|
debug('Caching is enabled...');
|
||||||
$cache_id = md5($max.$url.$valid_key.$links.$favour_feed_titles.$xss_filter.$exclude_on_fail.$format.$detect_language.(int)isset($_GET['pubsub']));
|
$cache_id = md5($max.$url.(int)$valid_key.$links.(int)$favour_feed_titles.(int)$options->content.(int)$options->summary.(int)$xss_filter.(int)$exclude_on_fail.$format.$detect_language.(int)isset($_GET['pubsub']));
|
||||||
$check_cache = true;
|
$check_cache = true;
|
||||||
if ($options->apc && $options->smart_cache) {
|
if ($options->apc && $options->smart_cache) {
|
||||||
apc_add("cache.$cache_id", 0, 10*60);
|
apc_add("cache.$cache_id", 0, 10*60);
|
||||||
@ -468,7 +479,7 @@ if ($img_url = $feed->get_image_url()) {
|
|||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Loop through feed items
|
// Loop through feed items
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
$items = $feed->get_items(0, $max);
|
$items = $feed->get_items(0, $max);
|
||||||
// Request all feed items in parallel (if supported)
|
// Request all feed items in parallel (if supported)
|
||||||
$urls_sanitized = array();
|
$urls_sanitized = array();
|
||||||
$urls = array();
|
$urls = array();
|
||||||
@ -550,24 +561,43 @@ foreach ($items as $key => $item) {
|
|||||||
$is_single_page = false;
|
$is_single_page = false;
|
||||||
if ($single_page_response = getSinglePage($item, $html, $effective_url)) {
|
if ($single_page_response = getSinglePage($item, $html, $effective_url)) {
|
||||||
$is_single_page = true;
|
$is_single_page = true;
|
||||||
$html = $single_page_response['body'];
|
|
||||||
// remove strange things
|
|
||||||
$html = str_replace('</[>', '', $html);
|
|
||||||
$html = convert_to_utf8($html, $single_page_response['headers']);
|
|
||||||
$effective_url = $single_page_response['effective_url'];
|
$effective_url = $single_page_response['effective_url'];
|
||||||
debug("Retrieved single-page view from $effective_url");
|
// check if action defined for returned Content-Type
|
||||||
|
$mime_info = get_mime_action_info($single_page_response['headers']);
|
||||||
|
if (isset($mime_info['action'])) {
|
||||||
|
if ($mime_info['action'] == 'exclude') {
|
||||||
|
continue; // skip this feed item entry
|
||||||
|
} elseif ($mime_info['action'] == 'link') {
|
||||||
|
if ($mime_info['type'] == 'image') {
|
||||||
|
$html = "<a href=\"$effective_url\"><img src=\"$effective_url\" alt=\"{$mime_info['name']}\" /></a>";
|
||||||
|
} else {
|
||||||
|
$html = "<a href=\"$effective_url\">Download {$mime_info['name']}</a>";
|
||||||
|
}
|
||||||
|
$extracted_title = $mime_info['name'];
|
||||||
|
$do_content_extraction = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ($do_content_extraction) {
|
||||||
|
$html = $single_page_response['body'];
|
||||||
|
// remove strange things
|
||||||
|
$html = str_replace('</[>', '', $html);
|
||||||
|
$html = convert_to_utf8($html, $single_page_response['headers']);
|
||||||
|
debug("Retrieved single-page view from $effective_url");
|
||||||
|
}
|
||||||
unset($single_page_response);
|
unset($single_page_response);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
if ($do_content_extraction) {
|
||||||
debug('--------');
|
debug('--------');
|
||||||
debug('Attempting to extract content');
|
debug('Attempting to extract content');
|
||||||
$extract_result = $extractor->process($html, $effective_url);
|
$extract_result = $extractor->process($html, $effective_url);
|
||||||
$readability = $extractor->readability;
|
$readability = $extractor->readability;
|
||||||
$content_block = ($extract_result) ? $extractor->getContent() : null;
|
$content_block = ($extract_result) ? $extractor->getContent() : null;
|
||||||
$extracted_title = ($extract_result) ? $extractor->getTitle() : '';
|
$extracted_title = ($extract_result) ? $extractor->getTitle() : '';
|
||||||
// Deal with multi-page articles
|
// Deal with multi-page articles
|
||||||
//die('Next: '.$extractor->getNextPageUrl());
|
//die('Next: '.$extractor->getNextPageUrl());
|
||||||
$is_multi_page = (!$is_single_page && $extract_result && $extractor->getNextPageUrl());
|
$is_multi_page = (!$is_single_page && $extract_result && $extractor->getNextPageUrl());
|
||||||
if ($options->multipage && $is_multi_page) {
|
if ($options->multipage && $is_multi_page && $options->content) {
|
||||||
debug('--------');
|
debug('--------');
|
||||||
debug('Attempting to process multi-page article');
|
debug('Attempting to process multi-page article');
|
||||||
$multi_page_urls = array();
|
$multi_page_urls = array();
|
||||||
@ -580,7 +610,7 @@ foreach ($items as $key => $item) {
|
|||||||
// check it's not what we have already!
|
// check it's not what we have already!
|
||||||
if (!in_array($next_page_url, $multi_page_urls)) {
|
if (!in_array($next_page_url, $multi_page_urls)) {
|
||||||
// it's not, so let's attempt to fetch it
|
// it's not, so let's attempt to fetch it
|
||||||
$multi_page_urls[] = $next_page_url;
|
$multi_page_urls[] = $next_page_url;
|
||||||
$_prev_ref = $http->referer;
|
$_prev_ref = $http->referer;
|
||||||
if (($response = $http->get($next_page_url, true)) && $response['status_code'] < 300) {
|
if (($response = $http->get($next_page_url, true)) && $response['status_code'] < 300) {
|
||||||
// make sure mime type is not something with a different action associated
|
// make sure mime type is not something with a different action associated
|
||||||
@ -605,13 +635,15 @@ foreach ($items as $key => $item) {
|
|||||||
// did we successfully deal with this multi-page article?
|
// did we successfully deal with this multi-page article?
|
||||||
if (empty($multi_page_content)) {
|
if (empty($multi_page_content)) {
|
||||||
debug('Failed to extract all parts of multi-page article, so not going to include them');
|
debug('Failed to extract all parts of multi-page article, so not going to include them');
|
||||||
$multi_page_content[] = $readability->dom->createElement('p')->innerHTML = '<em>This article appears to continue on subsequent pages which we could not extract</em>';
|
$_page = $readability->dom->createElement('p');
|
||||||
|
$_page->innerHTML = '<em>This article appears to continue on subsequent pages which we could not extract</em>';
|
||||||
|
$multi_page_content[] = $_page;
|
||||||
}
|
}
|
||||||
foreach ($multi_page_content as $_page) {
|
foreach ($multi_page_content as $_page) {
|
||||||
$_page = $content_block->ownerDocument->importNode($_page, true);
|
$_page = $content_block->ownerDocument->importNode($_page, true);
|
||||||
$content_block->appendChild($_page);
|
$content_block->appendChild($_page);
|
||||||
}
|
}
|
||||||
unset($multi_page_urls, $multi_page_content, $page_mime_info, $next_page_url);
|
unset($multi_page_urls, $multi_page_content, $page_mime_info, $next_page_url, $_page);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// use extracted title for both feed and item title if we're using single-item dummy feed
|
// use extracted title for both feed and item title if we're using single-item dummy feed
|
||||||
@ -658,7 +690,7 @@ foreach ($items as $key => $item) {
|
|||||||
} else {
|
} else {
|
||||||
$html = $content_block->ownerDocument->saveXML($content_block); // essentially outerHTML
|
$html = $content_block->ownerDocument->saveXML($content_block); // essentially outerHTML
|
||||||
}
|
}
|
||||||
unset($content_block);
|
//unset($content_block);
|
||||||
// post-processing cleanup
|
// post-processing cleanup
|
||||||
$html = preg_replace('!<p>[\s\h\v]*</p>!u', '', $html);
|
$html = preg_replace('!<p>[\s\h\v]*</p>!u', '', $html);
|
||||||
if ($links == 'remove') {
|
if ($links == 'remove') {
|
||||||
@ -671,130 +703,155 @@ foreach ($items as $key => $item) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($valid_key && isset($_GET['pubsub'])) { // used only on fivefilters.org at the moment
|
if ($valid_key && isset($_GET['pubsub'])) { // used only on fivefilters.org at the moment
|
||||||
$newitem->addElement('guid', 'http://fivefilters.org/content-only/redirect.php?url='.urlencode($item->get_permalink()), array('isPermaLink'=>'false'));
|
$newitem->addElement('guid', 'http://fivefilters.org/content-only/redirect.php?url='.urlencode($item->get_permalink()), array('isPermaLink'=>'false'));
|
||||||
|
} else {
|
||||||
|
$newitem->addElement('guid', $item->get_permalink(), array('isPermaLink'=>'true'));
|
||||||
|
}
|
||||||
|
// filter xss?
|
||||||
|
if ($xss_filter) {
|
||||||
|
debug('Filtering HTML to remove XSS');
|
||||||
|
$html = htmLawed::hl($html, array('safe'=>1, 'deny_attribute'=>'style', 'comment'=>1, 'cdata'=>1));
|
||||||
|
}
|
||||||
|
|
||||||
|
// add content
|
||||||
|
if ($options->summary === true) {
|
||||||
|
// get summary
|
||||||
|
$summary = '';
|
||||||
|
if (!$do_content_extraction) {
|
||||||
|
$summary = $html;
|
||||||
} else {
|
} else {
|
||||||
$newitem->addElement('guid', $item->get_permalink(), array('isPermaLink'=>'true'));
|
// Try to get first few paragraphs
|
||||||
}
|
if (isset($content_block) && ($content_block instanceof DOMElement)) {
|
||||||
// filter xss?
|
$_paras = $content_block->getElementsByTagName('p');
|
||||||
if ($xss_filter) {
|
foreach ($_paras as $_para) {
|
||||||
debug('Filtering HTML to remove XSS');
|
$summary .= preg_replace("/[\n\r\t ]+/", ' ', $_para->textContent).' ';
|
||||||
$html = htmLawed::hl($html, array('safe'=>1, 'deny_attribute'=>'style', 'comment'=>1, 'cdata'=>1));
|
if (strlen($summary) > 200) break;
|
||||||
}
|
|
||||||
$newitem->setDescription($html);
|
|
||||||
|
|
||||||
// set date
|
|
||||||
if ((int)$item->get_date('U') > 0) {
|
|
||||||
$newitem->setDate((int)$item->get_date('U'));
|
|
||||||
} elseif ($extractor->getDate()) {
|
|
||||||
$newitem->setDate($extractor->getDate());
|
|
||||||
}
|
|
||||||
|
|
||||||
// add authors
|
|
||||||
if ($authors = $item->get_authors()) {
|
|
||||||
foreach ($authors as $author) {
|
|
||||||
// for some feeds, SimplePie stores author's name as email, e.g. http://feeds.feedburner.com/nymag/intel
|
|
||||||
if ($author->get_name() !== null) {
|
|
||||||
$newitem->addElement('dc:creator', $author->get_name());
|
|
||||||
} elseif ($author->get_email() !== null) {
|
|
||||||
$newitem->addElement('dc:creator', $author->get_email());
|
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
} elseif ($authors = $extractor->getAuthors()) {
|
$summary = $html;
|
||||||
//TODO: make sure the list size is reasonable
|
|
||||||
foreach ($authors as $author) {
|
|
||||||
// TODO: xpath often selects authors from other articles linked from the page.
|
|
||||||
// for now choose first item
|
|
||||||
$newitem->addElement('dc:creator', $author);
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
unset($_paras, $_para);
|
||||||
// add language
|
$summary = get_excerpt($summary);
|
||||||
if ($detect_language) {
|
$newitem->setDescription($summary);
|
||||||
$language = $extractor->getLanguage();
|
if ($options->content) $newitem->setElement('content:encoded', $html);
|
||||||
if (!$language) $language = $feed->get_language();
|
} else {
|
||||||
if (($detect_language == 3 || (!$language && $detect_language == 2)) && $text_sample) {
|
if ($options->content) $newitem->setDescription($html);
|
||||||
try {
|
}
|
||||||
if ($use_cld) {
|
|
||||||
// Use PHP-CLD extension
|
// set date
|
||||||
$php_cld = 'CLD\detect'; // in quotes to prevent PHP 5.2 parse error
|
if ((int)$item->get_date('U') > 0) {
|
||||||
$res = $php_cld($text_sample);
|
$newitem->setDate((int)$item->get_date('U'));
|
||||||
if (is_array($res) && count($res) > 0) {
|
} elseif ($extractor->getDate()) {
|
||||||
$language = $res[0]['code'];
|
$newitem->setDate($extractor->getDate());
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
//die('what');
|
// add authors
|
||||||
// Use PEAR's Text_LanguageDetect
|
if ($authors = $item->get_authors()) {
|
||||||
if (!isset($l)) {
|
foreach ($authors as $author) {
|
||||||
$l = new Text_LanguageDetect('libraries/language-detect/lang.dat', 'libraries/language-detect/unicode_blocks.dat');
|
// for some feeds, SimplePie stores author's name as email, e.g. http://feeds.feedburner.com/nymag/intel
|
||||||
}
|
if ($author->get_name() !== null) {
|
||||||
$l_result = $l->detect($text_sample, 1);
|
$newitem->addElement('dc:creator', $author->get_name());
|
||||||
if (count($l_result) > 0) {
|
} elseif ($author->get_email() !== null) {
|
||||||
$language = $language_codes[key($l_result)];
|
$newitem->addElement('dc:creator', $author->get_email());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
} elseif ($authors = $extractor->getAuthors()) {
|
||||||
|
//TODO: make sure the list size is reasonable
|
||||||
|
foreach ($authors as $author) {
|
||||||
|
// TODO: xpath often selects authors from other articles linked from the page.
|
||||||
|
// for now choose first item
|
||||||
|
$newitem->addElement('dc:creator', $author);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// add language
|
||||||
|
if ($detect_language) {
|
||||||
|
$language = $extractor->getLanguage();
|
||||||
|
if (!$language) $language = $feed->get_language();
|
||||||
|
if (($detect_language == 3 || (!$language && $detect_language == 2)) && $text_sample) {
|
||||||
|
try {
|
||||||
|
if ($use_cld) {
|
||||||
|
// Use PHP-CLD extension
|
||||||
|
$php_cld = 'CLD\detect'; // in quotes to prevent PHP 5.2 parse error
|
||||||
|
$res = $php_cld($text_sample);
|
||||||
|
if (is_array($res) && count($res) > 0) {
|
||||||
|
$language = $res[0]['code'];
|
||||||
}
|
}
|
||||||
} catch (Exception $e) {
|
} else {
|
||||||
//die('error: '.$e);
|
//die('what');
|
||||||
// do nothing
|
// Use PEAR's Text_LanguageDetect
|
||||||
}
|
if (!isset($l)) {
|
||||||
}
|
$l = new Text_LanguageDetect();
|
||||||
if ($language && (strlen($language) < 7)) {
|
$l->setNameMode(2); // return ISO 639-1 codes (e.g. "en")
|
||||||
$newitem->addElement('dc:language', $language);
|
}
|
||||||
}
|
$l_result = $l->detect($text_sample, 1);
|
||||||
}
|
if (count($l_result) > 0) {
|
||||||
|
$language = key($l_result);
|
||||||
// add MIME type (if it appeared in our exclusions lists)
|
|
||||||
if (isset($mime_info['mime'])) $newitem->addElement('dc:format', $mime_info['mime']);
|
|
||||||
// add effective URL (URL after redirects)
|
|
||||||
if (isset($effective_url)) {
|
|
||||||
//TODO: ensure $effective_url is valid witout - sometimes it causes problems, e.g.
|
|
||||||
//http://www.siasat.pk/forum/showthread.php?108883-Pakistan-Chowk-by-Rana-Mubashir-<2D>-25th-March-2012-Special-Program-from-Liari-(Karachi)
|
|
||||||
//temporary measure: use utf8_encode()
|
|
||||||
$newitem->addElement('dc:identifier', remove_url_cruft(utf8_encode($effective_url)));
|
|
||||||
} else {
|
|
||||||
$newitem->addElement('dc:identifier', remove_url_cruft($item->get_permalink()));
|
|
||||||
}
|
|
||||||
|
|
||||||
// add categories
|
|
||||||
if ($categories = $item->get_categories()) {
|
|
||||||
foreach ($categories as $category) {
|
|
||||||
if ($category->get_label() !== null) {
|
|
||||||
$newitem->addElement('category', $category->get_label());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// check for enclosures
|
|
||||||
if ($options->keep_enclosures) {
|
|
||||||
if ($enclosures = $item->get_enclosures()) {
|
|
||||||
foreach ($enclosures as $enclosure) {
|
|
||||||
// thumbnails
|
|
||||||
foreach ((array)$enclosure->get_thumbnails() as $thumbnail) {
|
|
||||||
$newitem->addElement('media:thumbnail', '', array('url'=>$thumbnail));
|
|
||||||
}
|
}
|
||||||
if (!$enclosure->get_link()) continue;
|
|
||||||
$enc = array();
|
|
||||||
// Media RSS spec ($enc): http://search.yahoo.com/mrss
|
|
||||||
// SimplePie methods ($enclosure): http://simplepie.org/wiki/reference/start#methods4
|
|
||||||
$enc['url'] = $enclosure->get_link();
|
|
||||||
if ($enclosure->get_length()) $enc['fileSize'] = $enclosure->get_length();
|
|
||||||
if ($enclosure->get_type()) $enc['type'] = $enclosure->get_type();
|
|
||||||
if ($enclosure->get_medium()) $enc['medium'] = $enclosure->get_medium();
|
|
||||||
if ($enclosure->get_expression()) $enc['expression'] = $enclosure->get_expression();
|
|
||||||
if ($enclosure->get_bitrate()) $enc['bitrate'] = $enclosure->get_bitrate();
|
|
||||||
if ($enclosure->get_framerate()) $enc['framerate'] = $enclosure->get_framerate();
|
|
||||||
if ($enclosure->get_sampling_rate()) $enc['samplingrate'] = $enclosure->get_sampling_rate();
|
|
||||||
if ($enclosure->get_channels()) $enc['channels'] = $enclosure->get_channels();
|
|
||||||
if ($enclosure->get_duration()) $enc['duration'] = $enclosure->get_duration();
|
|
||||||
if ($enclosure->get_height()) $enc['height'] = $enclosure->get_height();
|
|
||||||
if ($enclosure->get_width()) $enc['width'] = $enclosure->get_width();
|
|
||||||
if ($enclosure->get_language()) $enc['lang'] = $enclosure->get_language();
|
|
||||||
$newitem->addElement('media:content', '', $enc);
|
|
||||||
}
|
}
|
||||||
|
} catch (Exception $e) {
|
||||||
|
//die('error: '.$e);
|
||||||
|
// do nothing
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* } */
|
if ($language && (strlen($language) < 7)) {
|
||||||
|
$newitem->addElement('dc:language', $language);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// add MIME type (if it appeared in our exclusions lists)
|
||||||
|
if (isset($mime_info['mime'])) $newitem->addElement('dc:format', $mime_info['mime']);
|
||||||
|
// add effective URL (URL after redirects)
|
||||||
|
if (isset($effective_url)) {
|
||||||
|
//TODO: ensure $effective_url is valid witout - sometimes it causes problems, e.g.
|
||||||
|
//http://www.siasat.pk/forum/showthread.php?108883-Pakistan-Chowk-by-Rana-Mubashir-<2D>-25th-March-2012-Special-Program-from-Liari-(Karachi)
|
||||||
|
//temporary measure: use utf8_encode()
|
||||||
|
$newitem->addElement('dc:identifier', remove_url_cruft(utf8_encode($effective_url)));
|
||||||
|
} else {
|
||||||
|
$newitem->addElement('dc:identifier', remove_url_cruft($item->get_permalink()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// add categories
|
||||||
|
if ($categories = $item->get_categories()) {
|
||||||
|
foreach ($categories as $category) {
|
||||||
|
if ($category->get_label() !== null) {
|
||||||
|
$newitem->addElement('category', $category->get_label());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// check for enclosures
|
||||||
|
if ($options->keep_enclosures) {
|
||||||
|
if ($enclosures = $item->get_enclosures()) {
|
||||||
|
foreach ($enclosures as $enclosure) {
|
||||||
|
// thumbnails
|
||||||
|
foreach ((array)$enclosure->get_thumbnails() as $thumbnail) {
|
||||||
|
$newitem->addElement('media:thumbnail', '', array('url'=>$thumbnail));
|
||||||
|
}
|
||||||
|
if (!$enclosure->get_link()) continue;
|
||||||
|
$enc = array();
|
||||||
|
// Media RSS spec ($enc): http://search.yahoo.com/mrss
|
||||||
|
// SimplePie methods ($enclosure): http://simplepie.org/wiki/reference/start#methods4
|
||||||
|
$enc['url'] = $enclosure->get_link();
|
||||||
|
if ($enclosure->get_length()) $enc['fileSize'] = $enclosure->get_length();
|
||||||
|
if ($enclosure->get_type()) $enc['type'] = $enclosure->get_type();
|
||||||
|
if ($enclosure->get_medium()) $enc['medium'] = $enclosure->get_medium();
|
||||||
|
if ($enclosure->get_expression()) $enc['expression'] = $enclosure->get_expression();
|
||||||
|
if ($enclosure->get_bitrate()) $enc['bitrate'] = $enclosure->get_bitrate();
|
||||||
|
if ($enclosure->get_framerate()) $enc['framerate'] = $enclosure->get_framerate();
|
||||||
|
if ($enclosure->get_sampling_rate()) $enc['samplingrate'] = $enclosure->get_sampling_rate();
|
||||||
|
if ($enclosure->get_channels()) $enc['channels'] = $enclosure->get_channels();
|
||||||
|
if ($enclosure->get_duration()) $enc['duration'] = $enclosure->get_duration();
|
||||||
|
if ($enclosure->get_height()) $enc['height'] = $enclosure->get_height();
|
||||||
|
if ($enclosure->get_width()) $enc['width'] = $enclosure->get_width();
|
||||||
|
if ($enclosure->get_language()) $enc['lang'] = $enclosure->get_language();
|
||||||
|
$newitem->addElement('media:content', '', $enc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
$output->addItem($newitem);
|
$output->addItem($newitem);
|
||||||
unset($html);
|
unset($html);
|
||||||
$item_count++;
|
$item_count++;
|
||||||
|
42
inc/3rdparty/makefulltextfeedHelpers.php
vendored
42
inc/3rdparty/makefulltextfeedHelpers.php
vendored
@ -66,6 +66,38 @@ class DummySingleItem {
|
|||||||
// HELPER FUNCTIONS
|
// HELPER FUNCTIONS
|
||||||
///////////////////////////////
|
///////////////////////////////
|
||||||
|
|
||||||
|
// Adapted from WordPress
|
||||||
|
// http://core.trac.wordpress.org/browser/tags/3.5.1/wp-includes/formatting.php#L2173
|
||||||
|
function get_excerpt($text, $num_words=55, $more=null) {
|
||||||
|
if (null === $more) $more = '…';
|
||||||
|
$text = strip_tags($text);
|
||||||
|
//TODO: Check if word count is based on single characters (East Asian characters)
|
||||||
|
/*
|
||||||
|
if (1==2) {
|
||||||
|
$text = trim(preg_replace("/[\n\r\t ]+/", ' ', $text), ' ');
|
||||||
|
preg_match_all('/./u', $text, $words_array);
|
||||||
|
$words_array = array_slice($words_array[0], 0, $num_words + 1);
|
||||||
|
$sep = '';
|
||||||
|
} else {
|
||||||
|
$words_array = preg_split("/[\n\r\t ]+/", $text, $num_words + 1, PREG_SPLIT_NO_EMPTY);
|
||||||
|
$sep = ' ';
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
$words_array = preg_split("/[\n\r\t ]+/", $text, $num_words + 1, PREG_SPLIT_NO_EMPTY);
|
||||||
|
$sep = ' ';
|
||||||
|
if (count($words_array) > $num_words) {
|
||||||
|
array_pop($words_array);
|
||||||
|
$text = implode($sep, $words_array);
|
||||||
|
$text = $text.$more;
|
||||||
|
} else {
|
||||||
|
$text = implode($sep, $words_array);
|
||||||
|
}
|
||||||
|
// trim whitespace at beginning or end of string
|
||||||
|
// See: http://stackoverflow.com/questions/4166896/trim-unicode-whitespace-in-php-5-2
|
||||||
|
$text = preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $text);
|
||||||
|
return $text;
|
||||||
|
}
|
||||||
|
|
||||||
function url_allowed($url) {
|
function url_allowed($url) {
|
||||||
global $options;
|
global $options;
|
||||||
if (!empty($options->allowed_urls)) {
|
if (!empty($options->allowed_urls)) {
|
||||||
@ -165,14 +197,6 @@ function convert_to_utf8($html, $header=null)
|
|||||||
if (strtolower($encoding) != 'utf-8') {
|
if (strtolower($encoding) != 'utf-8') {
|
||||||
debug('Converting to UTF-8');
|
debug('Converting to UTF-8');
|
||||||
$html = SimplePie_Misc::change_encoding($html, $encoding, 'utf-8');
|
$html = SimplePie_Misc::change_encoding($html, $encoding, 'utf-8');
|
||||||
/*
|
|
||||||
if (function_exists('iconv')) {
|
|
||||||
// iconv appears to handle certain character encodings better than mb_convert_encoding
|
|
||||||
$html = iconv($encoding, 'utf-8', $html);
|
|
||||||
} else {
|
|
||||||
$html = mb_convert_encoding($html, 'utf-8', $encoding);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -196,7 +220,7 @@ function makeAbsolute($base, $elem) {
|
|||||||
}
|
}
|
||||||
function makeAbsoluteAttr($base, $e, $attr) {
|
function makeAbsoluteAttr($base, $e, $attr) {
|
||||||
if ($e->hasAttribute($attr)) {
|
if ($e->hasAttribute($attr)) {
|
||||||
// Trim leading and trailing white space. I don't really like this but
|
// Trim leading and trailing white space. I don't really like this but
|
||||||
// unfortunately it does appear on some sites. e.g. <img src=" /path/to/image.jpg" />
|
// unfortunately it does appear on some sites. e.g. <img src=" /path/to/image.jpg" />
|
||||||
$url = trim(str_replace('%20', ' ', $e->getAttribute($attr)));
|
$url = trim(str_replace('%20', ' ', $e->getAttribute($attr)));
|
||||||
$url = str_replace(' ', '%20', $url);
|
$url = str_replace(' ', '%20', $url);
|
||||||
|
5
inc/3rdparty/site_config/index.php
vendored
5
inc/3rdparty/site_config/index.php
vendored
@ -1,3 +1,2 @@
|
|||||||
<?php
|
<?php
|
||||||
// this is here to prevent directory listing over the web
|
// this is here to prevent directory listing over the web
|
||||||
?>
|
|
@ -1 +1 @@
|
|||||||
4
|
2013-05-12T22:53:07Z
|
Loading…
Reference in New Issue
Block a user