mirror of
https://github.com/moparisthebest/wallabag
synced 2024-11-30 04:42:19 -05:00
up to date
This commit is contained in:
commit
35d4e27588
@ -1,4 +1,4 @@
|
|||||||
# How contributing
|
# How to contribute
|
||||||
|
|
||||||
## You found a bug
|
## You found a bug
|
||||||
Please [open a new issue](https://github.com/wallabag/wallabag/issues/new).
|
Please [open a new issue](https://github.com/wallabag/wallabag/issues/new).
|
||||||
|
104
inc/3rdparty/config.php
vendored
104
inc/3rdparty/config.php
vendored
@ -19,7 +19,7 @@ if (!isset($options)) $options = new stdClass();
|
|||||||
// Enable service
|
// Enable service
|
||||||
// ----------------------
|
// ----------------------
|
||||||
// Set this to false if you want to disable the service.
|
// Set this to false if you want to disable the service.
|
||||||
// If set to false, no feed is produced and users will
|
// If set to false, no feed is produced and users will
|
||||||
// be told that the service is disabled.
|
// be told that the service is disabled.
|
||||||
$options->enabled = true;
|
$options->enabled = true;
|
||||||
|
|
||||||
@ -43,10 +43,64 @@ $options->default_entries = 5;
|
|||||||
// ----------------------
|
// ----------------------
|
||||||
// The maximum number of feed items to process when no access key is supplied.
|
// The maximum number of feed items to process when no access key is supplied.
|
||||||
// This limits the user-supplied &max=x value. For example, if the user
|
// This limits the user-supplied &max=x value. For example, if the user
|
||||||
// asks for 20 items to be processed (&max=20), if max_entries is set to
|
// asks for 20 items to be processed (&max=20), if max_entries is set to
|
||||||
// 10, only 10 will be processed.
|
// 10, only 10 will be processed.
|
||||||
$options->max_entries = 10;
|
$options->max_entries = 10;
|
||||||
|
|
||||||
|
// Full content
|
||||||
|
// ----------------------
|
||||||
|
// By default Full-Text RSS includes the extracted content in the output.
|
||||||
|
// You can exclude this from the output by passing '&content=0' in the querystring.
|
||||||
|
//
|
||||||
|
// Possible values...
|
||||||
|
// Always include: true
|
||||||
|
// Never include: false
|
||||||
|
// Include unless user overrides (&content=0): 'user' (default)
|
||||||
|
//
|
||||||
|
// Note: currently this does not disable full content extraction. It simply omits it
|
||||||
|
// from the output.
|
||||||
|
$options->content = 'user';
|
||||||
|
|
||||||
|
// Excerpts
|
||||||
|
// ----------------------
|
||||||
|
// By default Full-Text RSS does not include excerpts in the output.
|
||||||
|
// You can enable this by passing '&summary=1' in the querystring.
|
||||||
|
// This will include a plain text excerpt from the extracted content.
|
||||||
|
//
|
||||||
|
// Possible values...
|
||||||
|
// Always include: true (recommended for new users)
|
||||||
|
// Never include: false
|
||||||
|
// Don't include unless user overrides (&summary=1): 'user' (default)
|
||||||
|
//
|
||||||
|
// Important: if both content and excerpts are requested, the excerpt will be
|
||||||
|
// placed in the description element and the full content inside content:encoded.
|
||||||
|
// If excerpts are not requested, the full content will go inside the description element.
|
||||||
|
//
|
||||||
|
// Why are we not returning both excerpts and content by default?
|
||||||
|
// Mainly for backward compatibility.
|
||||||
|
// Excerpts should appear in the feed item's description element. Previous versions
|
||||||
|
// of Full-Text RSS did not return excerpts, so the description element was always
|
||||||
|
// used for the full content (as recommended by the RSS advisory). When returning both,
|
||||||
|
// we need somewhere else to place the content (content:encoded).
|
||||||
|
// Having both enabled should not create any problems for news readers, but it may create
|
||||||
|
// problems for developers upgrading from one of our earlier versions who may now find
|
||||||
|
// their applications are returning excerpts instead of the full content they were
|
||||||
|
// expecting. To avoid such surprises for users who are upgrading Full-Text RSS,
|
||||||
|
// excerpts must be explicitly requested in the querystring by default.
|
||||||
|
//
|
||||||
|
// Why not use a different element name for excerpts?
|
||||||
|
// According to the RSS advisory:
|
||||||
|
// "Publishers who employ summaries should store the summary in description and
|
||||||
|
// the full content in content:encoded, ordering description first within the item.
|
||||||
|
// On items with no summary, the full content should be stored in description."
|
||||||
|
// See: http://www.rssboard.org/rss-profile#namespace-elements-content-encoded
|
||||||
|
//
|
||||||
|
// For more consistent element naming, we recommend new users set this option to true.
|
||||||
|
// The full content can still be excluded via the querystring, but the element names
|
||||||
|
// will not change: when $options->summary = true, the description element will always
|
||||||
|
// be reserved for the excerpt and content:encoded always for full content.
|
||||||
|
$options->summary = 'user';
|
||||||
|
|
||||||
// Rewrite relative URLs
|
// Rewrite relative URLs
|
||||||
// ----------------------
|
// ----------------------
|
||||||
// With this enabled relative URLs found in the extracted content
|
// With this enabled relative URLs found in the extracted content
|
||||||
@ -67,7 +121,7 @@ $options->exclude_items_on_fail = 'user';
|
|||||||
// Enable multi-page support
|
// Enable multi-page support
|
||||||
// -------------------------
|
// -------------------------
|
||||||
// If enabled, we will try to follow next page links on multi-page articles.
|
// If enabled, we will try to follow next page links on multi-page articles.
|
||||||
// Currently this only happens for sites where next_page_link has been defined
|
// Currently this only happens for sites where next_page_link has been defined
|
||||||
// in a site config file.
|
// in a site config file.
|
||||||
$options->multipage = true;
|
$options->multipage = true;
|
||||||
|
|
||||||
@ -125,10 +179,10 @@ $options->detect_language = 1;
|
|||||||
|
|
||||||
// Registration key
|
// Registration key
|
||||||
// ---------------
|
// ---------------
|
||||||
// The registration key is optional. It is not required to use Full-Text RSS,
|
// The registration key is optional. It is not required to use Full-Text RSS,
|
||||||
// and does not affect the normal operation of Full-Text RSS. It is currently
|
// and does not affect the normal operation of Full-Text RSS. It is currently
|
||||||
// only used on admin pages which help you update site patterns with the
|
// only used on admin pages which help you update site patterns with the
|
||||||
// latest version offered by FiveFilters.org. For these admin-related
|
// latest version offered by FiveFilters.org. For these admin-related
|
||||||
// tasks to complete, we will require a valid registration key.
|
// tasks to complete, we will require a valid registration key.
|
||||||
// If you would like one, you can purchase the latest version of Full-Text RSS
|
// If you would like one, you can purchase the latest version of Full-Text RSS
|
||||||
// at http://fivefilters.org/content-only/
|
// at http://fivefilters.org/content-only/
|
||||||
@ -144,12 +198,12 @@ $options->registration_key = '';
|
|||||||
// ----------------------
|
// ----------------------
|
||||||
// Certain pages/actions, e.g. updating site patterns with our online tool, will require admin credentials.
|
// Certain pages/actions, e.g. updating site patterns with our online tool, will require admin credentials.
|
||||||
// To use these pages, enter a password here and you'll be prompted for it when you try to access those pages.
|
// To use these pages, enter a password here and you'll be prompted for it when you try to access those pages.
|
||||||
// If no password or username is set, pages requiring admin privelages will be inaccessible.
|
// If no password or username is set, pages requiring admin privelages will be inaccessible.
|
||||||
// The default username is 'admin'.
|
// The default username is 'admin'.
|
||||||
// If overriding with an environment variable, separate username and password with a colon, e.g.:
|
// If overriding with an environment variable, separate username and password with a colon, e.g.:
|
||||||
// ftr_admin_credentials: admin:my-secret-password
|
// ftr_admin_credentials: admin:my-secret-password
|
||||||
// Example: $options->admin_credentials = array('username'=>'admin', 'password'=>'my-secret-password');
|
// Example: $options->admin_credentials = array('username'=>'admin', 'password'=>'my-secret-password');
|
||||||
$options->admin_credentials = array('username'=>'admin', 'password'=>'admin');
|
$options->admin_credentials = array('username'=>'admin', 'password'=>'');
|
||||||
|
|
||||||
// URLs to allow
|
// URLs to allow
|
||||||
// ----------------------
|
// ----------------------
|
||||||
@ -178,12 +232,12 @@ $options->key_required = false;
|
|||||||
// ----------------------
|
// ----------------------
|
||||||
// By default, when processing feeds, we assume item titles in the feed
|
// By default, when processing feeds, we assume item titles in the feed
|
||||||
// have not been truncated. So after processing web pages, the extracted titles
|
// have not been truncated. So after processing web pages, the extracted titles
|
||||||
// are not used in the generated feed. If you prefer to have extracted titles in
|
// are not used in the generated feed. If you prefer to have extracted titles in
|
||||||
// the feed you can either set this to false, in which case we will always favour
|
// the feed you can either set this to false, in which case we will always favour
|
||||||
// extracted titles. Alternatively, if set to 'user' (default) we'll use the
|
// extracted titles. Alternatively, if set to 'user' (default) we'll use the
|
||||||
// extracted title if you pass '&use_extracted_title' in the querystring.
|
// extracted title if you pass '&use_extracted_title' in the querystring.
|
||||||
// Possible values:
|
// Possible values:
|
||||||
// * Favour feed titles: true
|
// * Favour feed titles: true
|
||||||
// * Favour extracted titles: false
|
// * Favour extracted titles: false
|
||||||
// * Favour feed titles with user override: 'user' (default)
|
// * Favour feed titles with user override: 'user' (default)
|
||||||
// Note: this has no effect when the input URL is to a web page - in these cases
|
// Note: this has no effect when the input URL is to a web page - in these cases
|
||||||
@ -192,17 +246,17 @@ $options->favour_feed_titles = 'user';
|
|||||||
|
|
||||||
// Access keys (password protected access)
|
// Access keys (password protected access)
|
||||||
// ------------------------------------
|
// ------------------------------------
|
||||||
// NOTE: You do not need an API key from fivefilters.org to run your own
|
// NOTE: You do not need an API key from fivefilters.org to run your own
|
||||||
// copy of the code. This is here if you'd like to restrict access to
|
// copy of the code. This is here if you'd like to restrict access to
|
||||||
// _your_ copy.
|
// _your_ copy.
|
||||||
// Keys let you group users - those with a key and those without - and
|
// Keys let you group users - those with a key and those without - and
|
||||||
// restrict access to the service to those without a key.
|
// restrict access to the service to those without a key.
|
||||||
// If you want everyone to access the service in the same way, you can
|
// If you want everyone to access the service in the same way, you can
|
||||||
// leave the array below empty and ignore the access key options further down.
|
// leave the array below empty and ignore the access key options further down.
|
||||||
// The options further down let you control how the service should behave
|
// The options further down let you control how the service should behave
|
||||||
// in each mode.
|
// in each mode.
|
||||||
// Note: Explicitly including the index number (1 and 2 in the examples below)
|
// Note: Explicitly including the index number (1 and 2 in the examples below)
|
||||||
// is highly recommended (when generating feeds, we encode the key and
|
// is highly recommended (when generating feeds, we encode the key and
|
||||||
// refer to it by index number and hash).
|
// refer to it by index number and hash).
|
||||||
$options->api_keys = array();
|
$options->api_keys = array();
|
||||||
// Example:
|
// Example:
|
||||||
@ -232,13 +286,13 @@ $options->max_entries_with_key = 10;
|
|||||||
// filter the resulting HTML for XSS attacks, making it redundant for
|
// filter the resulting HTML for XSS attacks, making it redundant for
|
||||||
// Full-Text RSS do the same. Similarly with frameworks/CMS which display
|
// Full-Text RSS do the same. Similarly with frameworks/CMS which display
|
||||||
// feed content - the content should be treated like any other user-submitted content.
|
// feed content - the content should be treated like any other user-submitted content.
|
||||||
//
|
//
|
||||||
// If you are writing an application yourself which is processing feeds generated by
|
// If you are writing an application yourself which is processing feeds generated by
|
||||||
// Full-Text RSS, you can either filter the HTML yourself to remove potential XSS attacks
|
// Full-Text RSS, you can either filter the HTML yourself to remove potential XSS attacks
|
||||||
// or enable this option. This might be useful if you are processing our generated
|
// or enable this option. This might be useful if you are processing our generated
|
||||||
// feeds with JavaScript on the client side - although there's client side xss
|
// feeds with JavaScript on the client side - although there's client side xss
|
||||||
// filtering available too, e.g. https://code.google.com/p/google-caja/wiki/JsHtmlSanitizer
|
// filtering available too, e.g. https://code.google.com/p/google-caja/wiki/JsHtmlSanitizer
|
||||||
//
|
//
|
||||||
// If enabled, we'll pass retrieved HTML content through htmLawed with
|
// If enabled, we'll pass retrieved HTML content through htmLawed with
|
||||||
// safe flag on and style attributes denied, see
|
// safe flag on and style attributes denied, see
|
||||||
// http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/htmLawed_README.htm#s3.6
|
// http://www.bioinformatics.org/phplabware/internal_utilities/htmLawed/htmLawed_README.htm#s3.6
|
||||||
@ -253,8 +307,8 @@ $options->xss_filter = 'user';
|
|||||||
// Allowed parsers
|
// Allowed parsers
|
||||||
// ----------------------
|
// ----------------------
|
||||||
// Full-Text RSS attempts to use PHP's libxml extension to process HTML.
|
// Full-Text RSS attempts to use PHP's libxml extension to process HTML.
|
||||||
// While fast, on some sites it may not always produce good results.
|
// While fast, on some sites it may not always produce good results.
|
||||||
// For these sites, you can specify an alternative HTML parser:
|
// For these sites, you can specify an alternative HTML parser:
|
||||||
// parser: html5lib
|
// parser: html5lib
|
||||||
// The html5lib parser is bundled with Full-Text RSS.
|
// The html5lib parser is bundled with Full-Text RSS.
|
||||||
// see http://code.google.com/p/html5lib/
|
// see http://code.google.com/p/html5lib/
|
||||||
@ -273,7 +327,7 @@ $options->cors = false;
|
|||||||
|
|
||||||
// Use APC user cache?
|
// Use APC user cache?
|
||||||
// ----------------------
|
// ----------------------
|
||||||
// If enabled we will store site config files (when requested
|
// If enabled we will store site config files (when requested
|
||||||
// for the first time) in APC's user cache. Keys prefixed with 'sc.'
|
// for the first time) in APC's user cache. Keys prefixed with 'sc.'
|
||||||
// This improves performance by reducing disk access.
|
// This improves performance by reducing disk access.
|
||||||
// Note: this has no effect if APC is unavailable on your server.
|
// Note: this has no effect if APC is unavailable on your server.
|
||||||
@ -346,7 +400,7 @@ $options->rewrite_url = array(
|
|||||||
// Valid actions:
|
// Valid actions:
|
||||||
// * 'exclude' - exclude this item from the result
|
// * 'exclude' - exclude this item from the result
|
||||||
// * 'link' - create HTML link to the item
|
// * 'link' - create HTML link to the item
|
||||||
$options->content_type_exc = array(
|
$options->content_type_exc = array(
|
||||||
'application/pdf' => array('action'=>'link', 'name'=>'PDF'),
|
'application/pdf' => array('action'=>'link', 'name'=>'PDF'),
|
||||||
'image' => array('action'=>'link', 'name'=>'Image'),
|
'image' => array('action'=>'link', 'name'=>'Image'),
|
||||||
'audio' => array('action'=>'link', 'name'=>'Audio'),
|
'audio' => array('action'=>'link', 'name'=>'Audio'),
|
||||||
@ -375,13 +429,13 @@ $options->cache_cleanup = 100;
|
|||||||
/// DO NOT CHANGE ANYTHING BELOW THIS ///////////
|
/// DO NOT CHANGE ANYTHING BELOW THIS ///////////
|
||||||
/////////////////////////////////////////////////
|
/////////////////////////////////////////////////
|
||||||
|
|
||||||
if (!defined('_FF_FTR_VERSION')) define('_FF_FTR_VERSION', '3.1');
|
if (!defined('_FF_FTR_VERSION')) define('_FF_FTR_VERSION', '3.2');
|
||||||
|
|
||||||
if (basename(__FILE__) == 'config.php') {
|
if (basename(__FILE__) == 'config.php') {
|
||||||
if (file_exists(dirname(__FILE__).'/custom_config.php')) {
|
if (file_exists(dirname(__FILE__).'/custom_config.php')) {
|
||||||
require_once dirname(__FILE__).'/custom_config.php';
|
require_once dirname(__FILE__).'/custom_config.php';
|
||||||
}
|
}
|
||||||
|
|
||||||
// check for environment variables - often used on cloud platforms
|
// check for environment variables - often used on cloud platforms
|
||||||
// environment variables should be prefixed with 'ftr_', e.g.
|
// environment variables should be prefixed with 'ftr_', e.g.
|
||||||
// ftr_max_entries: 1
|
// ftr_max_entries: 1
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,338 +1,343 @@
|
|||||||
<?php
|
<?php
|
||||||
/**
|
/**
|
||||||
* Site Config
|
* Site Config
|
||||||
*
|
*
|
||||||
* Each instance of this class should hold extraction patterns and other directives
|
* Each instance of this class should hold extraction patterns and other directives
|
||||||
* for a website. See ContentExtractor class to see how it's used.
|
* for a website. See ContentExtractor class to see how it's used.
|
||||||
*
|
*
|
||||||
* @version 0.7
|
* @version 0.8
|
||||||
* @date 2012-08-27
|
* @date 2013-04-16
|
||||||
* @author Keyvan Minoukadeh
|
* @author Keyvan Minoukadeh
|
||||||
* @copyright 2012 Keyvan Minoukadeh
|
* @copyright 2013 Keyvan Minoukadeh
|
||||||
* @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
|
* @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class SiteConfig
|
class SiteConfig
|
||||||
{
|
{
|
||||||
// Use first matching element as title (0 or more xpath expressions)
|
// Use first matching element as title (0 or more xpath expressions)
|
||||||
public $title = array();
|
public $title = array();
|
||||||
|
|
||||||
// Use first matching element as body (0 or more xpath expressions)
|
// Use first matching element as body (0 or more xpath expressions)
|
||||||
public $body = array();
|
public $body = array();
|
||||||
|
|
||||||
// Use first matching element as author (0 or more xpath expressions)
|
// Use first matching element as author (0 or more xpath expressions)
|
||||||
public $author = array();
|
public $author = array();
|
||||||
|
|
||||||
// Use first matching element as date (0 or more xpath expressions)
|
// Use first matching element as date (0 or more xpath expressions)
|
||||||
public $date = array();
|
public $date = array();
|
||||||
|
|
||||||
// Strip elements matching these xpath expressions (0 or more)
|
// Strip elements matching these xpath expressions (0 or more)
|
||||||
public $strip = array();
|
public $strip = array();
|
||||||
|
|
||||||
// Strip elements which contain these strings (0 or more) in the id or class attribute
|
// Strip elements which contain these strings (0 or more) in the id or class attribute
|
||||||
public $strip_id_or_class = array();
|
public $strip_id_or_class = array();
|
||||||
|
|
||||||
// Strip images which contain these strings (0 or more) in the src attribute
|
// Strip images which contain these strings (0 or more) in the src attribute
|
||||||
public $strip_image_src = array();
|
public $strip_image_src = array();
|
||||||
|
|
||||||
// Additional HTTP headers to send
|
// Additional HTTP headers to send
|
||||||
// NOT YET USED
|
// NOT YET USED
|
||||||
public $http_header = array();
|
public $http_header = array();
|
||||||
|
|
||||||
// Process HTML with tidy before creating DOM (bool or null if undeclared)
|
// Process HTML with tidy before creating DOM (bool or null if undeclared)
|
||||||
public $tidy = null;
|
public $tidy = null;
|
||||||
|
|
||||||
protected $default_tidy = true; // used if undeclared
|
protected $default_tidy = true; // used if undeclared
|
||||||
|
|
||||||
// Autodetect title/body if xpath expressions fail to produce results.
|
// Autodetect title/body if xpath expressions fail to produce results.
|
||||||
// Note that this applies to title and body separately, ie.
|
// Note that this applies to title and body separately, ie.
|
||||||
// * if we get a body match but no title match, this option will determine whether we autodetect title
|
// * if we get a body match but no title match, this option will determine whether we autodetect title
|
||||||
// * if neither match, this determines whether we autodetect title and body.
|
// * if neither match, this determines whether we autodetect title and body.
|
||||||
// Also note that this only applies when there is at least one xpath expression in title or body, ie.
|
// Also note that this only applies when there is at least one xpath expression in title or body, ie.
|
||||||
// * if title and body are both empty (no xpath expressions), this option has no effect (both title and body will be auto-detected)
|
// * if title and body are both empty (no xpath expressions), this option has no effect (both title and body will be auto-detected)
|
||||||
// * if there's an xpath expression for title and none for body, body will be auto-detected and this option will determine whether we auto-detect title if the xpath expression for it fails to produce results.
|
// * if there's an xpath expression for title and none for body, body will be auto-detected and this option will determine whether we auto-detect title if the xpath expression for it fails to produce results.
|
||||||
// Usage scenario: you want to extract something specific from a set of URLs, e.g. a table, and if the table is not found, you want to ignore the entry completely. Auto-detection is unlikely to succeed here, so you construct your patterns and set this option to false. Another scenario may be a site where auto-detection has proven to fail (or worse, picked up the wrong content).
|
// Usage scenario: you want to extract something specific from a set of URLs, e.g. a table, and if the table is not found, you want to ignore the entry completely. Auto-detection is unlikely to succeed here, so you construct your patterns and set this option to false. Another scenario may be a site where auto-detection has proven to fail (or worse, picked up the wrong content).
|
||||||
// bool or null if undeclared
|
// bool or null if undeclared
|
||||||
public $autodetect_on_failure = null;
|
public $autodetect_on_failure = null;
|
||||||
protected $default_autodetect_on_failure = true; // used if undeclared
|
protected $default_autodetect_on_failure = true; // used if undeclared
|
||||||
|
|
||||||
// Clean up content block - attempt to remove elements that appear to be superfluous
|
// Clean up content block - attempt to remove elements that appear to be superfluous
|
||||||
// bool or null if undeclared
|
// bool or null if undeclared
|
||||||
public $prune = null;
|
public $prune = null;
|
||||||
protected $default_prune = true; // used if undeclared
|
protected $default_prune = true; // used if undeclared
|
||||||
|
|
||||||
// Test URL - if present, can be used to test the config above
|
// Test URL - if present, can be used to test the config above
|
||||||
public $test_url = array();
|
public $test_url = array();
|
||||||
|
|
||||||
// Single-page link - should identify a link element or URL pointing to the page holding the entire article
|
// Single-page link - should identify a link element or URL pointing to the page holding the entire article
|
||||||
// This is useful for sites which split their articles across multiple pages. Links to such pages tend to
|
// This is useful for sites which split their articles across multiple pages. Links to such pages tend to
|
||||||
// display the first page with links to the other pages at the bottom. Often there is also a link to a page
|
// display the first page with links to the other pages at the bottom. Often there is also a link to a page
|
||||||
// which displays the entire article on one page (e.g. 'print view').
|
// which displays the entire article on one page (e.g. 'print view').
|
||||||
// This should be an XPath expression identifying the link to that page. If present and we find a match,
|
// This should be an XPath expression identifying the link to that page. If present and we find a match,
|
||||||
// we will retrieve that page and the rest of the options in this config will be applied to the new page.
|
// we will retrieve that page and the rest of the options in this config will be applied to the new page.
|
||||||
public $single_page_link = array();
|
public $single_page_link = array();
|
||||||
|
|
||||||
public $next_page_link = array();
|
public $next_page_link = array();
|
||||||
|
|
||||||
// Single-page link in feed? - same as above, but patterns applied to item description HTML taken from feed
|
// Single-page link in feed? - same as above, but patterns applied to item description HTML taken from feed
|
||||||
public $single_page_link_in_feed = array();
|
public $single_page_link_in_feed = array();
|
||||||
|
|
||||||
// Which parser to use for turning raw HTML into a DOMDocument (either 'libxml' or 'html5lib')
|
// Which parser to use for turning raw HTML into a DOMDocument (either 'libxml' or 'html5lib')
|
||||||
// string or null if undeclared
|
// string or null if undeclared
|
||||||
public $parser = null;
|
public $parser = null;
|
||||||
protected $default_parser = 'libxml'; // used if undeclared
|
protected $default_parser = 'libxml'; // used if undeclared
|
||||||
|
|
||||||
// Strings to search for in HTML before processing begins (used with $replace_string)
|
// Strings to search for in HTML before processing begins (used with $replace_string)
|
||||||
public $find_string = array();
|
public $find_string = array();
|
||||||
// Strings to replace those found in $find_string before HTML processing begins
|
// Strings to replace those found in $find_string before HTML processing begins
|
||||||
public $replace_string = array();
|
public $replace_string = array();
|
||||||
|
|
||||||
// the options below cannot be set in the config files which this class represents
|
// the options below cannot be set in the config files which this class represents
|
||||||
|
|
||||||
//public $cache_in_apc = false; // used to decide if we should cache in apc or not
|
//public $cache_in_apc = false; // used to decide if we should cache in apc or not
|
||||||
public $cache_key = null;
|
public $cache_key = null;
|
||||||
public static $debug = false;
|
public static $debug = false;
|
||||||
protected static $apc = false;
|
protected static $apc = false;
|
||||||
protected static $config_path;
|
protected static $config_path;
|
||||||
protected static $config_path_fallback;
|
protected static $config_path_fallback;
|
||||||
protected static $config_cache = array();
|
protected static $config_cache = array();
|
||||||
const HOSTNAME_REGEX = '/^(([a-zA-Z0-9-]*[a-zA-Z0-9])\.)*([A-Za-z0-9-]*[A-Za-z0-9])$/';
|
const HOSTNAME_REGEX = '/^(([a-zA-Z0-9-]*[a-zA-Z0-9])\.)*([A-Za-z0-9-]*[A-Za-z0-9])$/';
|
||||||
|
|
||||||
protected static function debug($msg) {
|
protected static function debug($msg) {
|
||||||
if (self::$debug) {
|
if (self::$debug) {
|
||||||
//$mem = round(memory_get_usage()/1024, 2);
|
//$mem = round(memory_get_usage()/1024, 2);
|
||||||
//$memPeak = round(memory_get_peak_usage()/1024, 2);
|
//$memPeak = round(memory_get_peak_usage()/1024, 2);
|
||||||
echo '* ',$msg;
|
echo '* ',$msg;
|
||||||
//echo ' - mem used: ',$mem," (peak: $memPeak)\n";
|
//echo ' - mem used: ',$mem," (peak: $memPeak)\n";
|
||||||
echo "\n";
|
echo "\n";
|
||||||
ob_flush();
|
ob_flush();
|
||||||
flush();
|
flush();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// enable APC caching of certain site config files?
|
// enable APC caching of certain site config files?
|
||||||
// If enabled the following site config files will be
|
// If enabled the following site config files will be
|
||||||
// cached in APC cache (when requested for first time):
|
// cached in APC cache (when requested for first time):
|
||||||
// * anything in site_config/custom/ and its corresponding file in site_config/standard/
|
// * anything in site_config/custom/ and its corresponding file in site_config/standard/
|
||||||
// * the site config files associated with HTML fingerprints
|
// * the site config files associated with HTML fingerprints
|
||||||
// * the global site config file
|
// * the global site config file
|
||||||
// returns true if enabled, false otherwise
|
// returns true if enabled, false otherwise
|
||||||
public static function use_apc($apc=true) {
|
public static function use_apc($apc=true) {
|
||||||
if (!function_exists('apc_add')) {
|
if (!function_exists('apc_add')) {
|
||||||
if ($apc) self::debug('APC will not be used (function apc_add does not exist)');
|
if ($apc) self::debug('APC will not be used (function apc_add does not exist)');
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
self::$apc = $apc;
|
self::$apc = $apc;
|
||||||
return $apc;
|
return $apc;
|
||||||
}
|
}
|
||||||
|
|
||||||
// return bool or null
|
// return bool or null
|
||||||
public function tidy($use_default=true) {
|
public function tidy($use_default=true) {
|
||||||
if ($use_default) return (isset($this->tidy)) ? $this->tidy : $this->default_tidy;
|
if ($use_default) return (isset($this->tidy)) ? $this->tidy : $this->default_tidy;
|
||||||
return $this->tidy;
|
return $this->tidy;
|
||||||
}
|
}
|
||||||
|
|
||||||
// return bool or null
|
// return bool or null
|
||||||
public function prune($use_default=true) {
|
public function prune($use_default=true) {
|
||||||
if ($use_default) return (isset($this->prune)) ? $this->prune : $this->default_prune;
|
if ($use_default) return (isset($this->prune)) ? $this->prune : $this->default_prune;
|
||||||
return $this->prune;
|
return $this->prune;
|
||||||
}
|
}
|
||||||
|
|
||||||
// return string or null
|
// return string or null
|
||||||
public function parser($use_default=true) {
|
public function parser($use_default=true) {
|
||||||
if ($use_default) return (isset($this->parser)) ? $this->parser : $this->default_parser;
|
if ($use_default) return (isset($this->parser)) ? $this->parser : $this->default_parser;
|
||||||
return $this->parser;
|
return $this->parser;
|
||||||
}
|
}
|
||||||
|
|
||||||
// return bool or null
|
// return bool or null
|
||||||
public function autodetect_on_failure($use_default=true) {
|
public function autodetect_on_failure($use_default=true) {
|
||||||
if ($use_default) return (isset($this->autodetect_on_failure)) ? $this->autodetect_on_failure : $this->default_autodetect_on_failure;
|
if ($use_default) return (isset($this->autodetect_on_failure)) ? $this->autodetect_on_failure : $this->default_autodetect_on_failure;
|
||||||
return $this->autodetect_on_failure;
|
return $this->autodetect_on_failure;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static function set_config_path($path, $fallback=null) {
|
public static function set_config_path($path, $fallback=null) {
|
||||||
self::$config_path = $path;
|
self::$config_path = $path;
|
||||||
self::$config_path_fallback = $fallback;
|
self::$config_path_fallback = $fallback;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static function add_to_cache($key, SiteConfig $config, $use_apc=true) {
|
public static function add_to_cache($key, SiteConfig $config, $use_apc=true) {
|
||||||
$key = strtolower($key);
|
$key = strtolower($key);
|
||||||
if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
|
if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
|
||||||
if ($config->cache_key) $key = $config->cache_key;
|
if ($config->cache_key) $key = $config->cache_key;
|
||||||
self::$config_cache[$key] = $config;
|
self::$config_cache[$key] = $config;
|
||||||
if (self::$apc && $use_apc) {
|
if (self::$apc && $use_apc) {
|
||||||
self::debug("Adding site config to APC cache with key sc.$key");
|
self::debug("Adding site config to APC cache with key sc.$key");
|
||||||
apc_add("sc.$key", $config);
|
apc_add("sc.$key", $config);
|
||||||
}
|
}
|
||||||
self::debug("Cached site config with key $key");
|
self::debug("Cached site config with key $key");
|
||||||
}
|
}
|
||||||
|
|
||||||
public static function is_cached($key) {
|
public static function is_cached($key) {
|
||||||
$key = strtolower($key);
|
$key = strtolower($key);
|
||||||
if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
|
if (substr($key, 0, 4) == 'www.') $key = substr($key, 4);
|
||||||
if (array_key_exists($key, self::$config_cache)) {
|
if (array_key_exists($key, self::$config_cache)) {
|
||||||
return true;
|
return true;
|
||||||
} elseif (self::$apc && (bool)apc_fetch("sc.$key")) {
|
} elseif (self::$apc && (bool)apc_fetch("sc.$key")) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function append(SiteConfig $newconfig) {
|
public function append(SiteConfig $newconfig) {
|
||||||
// check for commands where we accept multiple statements (no test_url)
|
// check for commands where we accept multiple statements (no test_url)
|
||||||
foreach (array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header', 'find_string', 'replace_string') as $var) {
|
foreach (array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header') as $var) {
|
||||||
// append array elements for this config variable from $newconfig to this config
|
// append array elements for this config variable from $newconfig to this config
|
||||||
//$this->$var = $this->$var + $newconfig->$var;
|
//$this->$var = $this->$var + $newconfig->$var;
|
||||||
$this->$var = array_unique(array_merge($this->$var, $newconfig->$var));
|
$this->$var = array_unique(array_merge($this->$var, $newconfig->$var));
|
||||||
}
|
}
|
||||||
// check for single statement commands
|
// check for single statement commands
|
||||||
// we do not overwrite existing non null values
|
// we do not overwrite existing non null values
|
||||||
foreach (array('tidy', 'prune', 'parser', 'autodetect_on_failure') as $var) {
|
foreach (array('tidy', 'prune', 'parser', 'autodetect_on_failure') as $var) {
|
||||||
if ($this->$var === null) $this->$var = $newconfig->$var;
|
if ($this->$var === null) $this->$var = $newconfig->$var;
|
||||||
}
|
}
|
||||||
}
|
// treat find_string and replace_string separately (don't apply array_unique) (thanks fabrizio!)
|
||||||
|
foreach (array('find_string', 'replace_string') as $var) {
|
||||||
// returns SiteConfig instance if an appropriate one is found, false otherwise
|
// append array elements for this config variable from $newconfig to this config
|
||||||
// if $exact_host_match is true, we will not look for wildcard config matches
|
//$this->$var = $this->$var + $newconfig->$var;
|
||||||
// by default if host is 'test.example.org' we will look for and load '.example.org.txt' if it exists
|
$this->$var = array_merge($this->$var, $newconfig->$var);
|
||||||
public static function build($host, $exact_host_match=false) {
|
}
|
||||||
$host = strtolower($host);
|
}
|
||||||
if (substr($host, 0, 4) == 'www.') $host = substr($host, 4);
|
|
||||||
if (!$host || (strlen($host) > 200) || !preg_match(self::HOSTNAME_REGEX, ltrim($host, '.'))) return false;
|
// returns SiteConfig instance if an appropriate one is found, false otherwise
|
||||||
// check for site configuration
|
// if $exact_host_match is true, we will not look for wildcard config matches
|
||||||
$try = array($host);
|
// by default if host is 'test.example.org' we will look for and load '.example.org.txt' if it exists
|
||||||
// should we look for wildcard matches
|
public static function build($host, $exact_host_match=false) {
|
||||||
if (!$exact_host_match) {
|
$host = strtolower($host);
|
||||||
$split = explode('.', $host);
|
if (substr($host, 0, 4) == 'www.') $host = substr($host, 4);
|
||||||
if (count($split) > 1) {
|
if (!$host || (strlen($host) > 200) || !preg_match(self::HOSTNAME_REGEX, ltrim($host, '.'))) return false;
|
||||||
array_shift($split);
|
// check for site configuration
|
||||||
$try[] = '.'.implode('.', $split);
|
$try = array($host);
|
||||||
}
|
// should we look for wildcard matches
|
||||||
}
|
if (!$exact_host_match) {
|
||||||
|
$split = explode('.', $host);
|
||||||
// look for site config file in primary folder
|
if (count($split) > 1) {
|
||||||
self::debug(". looking for site config for $host in primary folder");
|
array_shift($split);
|
||||||
foreach ($try as $h) {
|
$try[] = '.'.implode('.', $split);
|
||||||
if (array_key_exists($h, self::$config_cache)) {
|
}
|
||||||
self::debug("... site config for $h already loaded in this request");
|
}
|
||||||
return self::$config_cache[$h];
|
|
||||||
} elseif (self::$apc && ($sconfig = apc_fetch("sc.$h"))) {
|
// look for site config file in primary folder
|
||||||
self::debug("... site config for $h in APC cache");
|
self::debug(". looking for site config for $host in primary folder");
|
||||||
return $sconfig;
|
foreach ($try as $h) {
|
||||||
} elseif (file_exists(self::$config_path."/$h.txt")) {
|
if (array_key_exists($h, self::$config_cache)) {
|
||||||
self::debug("... found site config ($h.txt)");
|
self::debug("... site config for $h already loaded in this request");
|
||||||
$file_primary = self::$config_path."/$h.txt";
|
return self::$config_cache[$h];
|
||||||
$matched_name = $h;
|
} elseif (self::$apc && ($sconfig = apc_fetch("sc.$h"))) {
|
||||||
break;
|
self::debug("... site config for $h in APC cache");
|
||||||
}
|
return $sconfig;
|
||||||
}
|
} elseif (file_exists(self::$config_path."/$h.txt")) {
|
||||||
|
self::debug("... found site config ($h.txt)");
|
||||||
// if we found site config, process it
|
$file_primary = self::$config_path."/$h.txt";
|
||||||
if (isset($file_primary)) {
|
$matched_name = $h;
|
||||||
$config_lines = file($file_primary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
|
break;
|
||||||
if (!$config_lines || !is_array($config_lines)) return false;
|
}
|
||||||
$config = self::build_from_array($config_lines);
|
}
|
||||||
// if APC caching is available and enabled, mark this for cache
|
|
||||||
//$config->cache_in_apc = true;
|
// if we found site config, process it
|
||||||
$config->cache_key = $matched_name;
|
if (isset($file_primary)) {
|
||||||
|
$config_lines = file($file_primary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
|
||||||
// if autodetec on failure is off (on by default) we do not need to look
|
if (!$config_lines || !is_array($config_lines)) return false;
|
||||||
// in secondary folder
|
$config = self::build_from_array($config_lines);
|
||||||
if (!$config->autodetect_on_failure()) {
|
// if APC caching is available and enabled, mark this for cache
|
||||||
self::debug('... autodetect on failure is disabled (no other site config files will be loaded)');
|
//$config->cache_in_apc = true;
|
||||||
return $config;
|
$config->cache_key = $matched_name;
|
||||||
}
|
|
||||||
}
|
// if autodetec on failure is off (on by default) we do not need to look
|
||||||
|
// in secondary folder
|
||||||
// look for site config file in secondary folder
|
if (!$config->autodetect_on_failure()) {
|
||||||
if (isset(self::$config_path_fallback)) {
|
self::debug('... autodetect on failure is disabled (no other site config files will be loaded)');
|
||||||
self::debug(". looking for site config for $host in secondary folder");
|
return $config;
|
||||||
foreach ($try as $h) {
|
}
|
||||||
if (file_exists(self::$config_path_fallback."/$h.txt")) {
|
}
|
||||||
self::debug("... found site config in secondary folder ($h.txt)");
|
|
||||||
$file_secondary = self::$config_path_fallback."/$h.txt";
|
// look for site config file in secondary folder
|
||||||
$matched_name = $h;
|
if (isset(self::$config_path_fallback)) {
|
||||||
break;
|
self::debug(". looking for site config for $host in secondary folder");
|
||||||
}
|
foreach ($try as $h) {
|
||||||
}
|
if (file_exists(self::$config_path_fallback."/$h.txt")) {
|
||||||
if (!isset($file_secondary)) {
|
self::debug("... found site config in secondary folder ($h.txt)");
|
||||||
self::debug("... no site config match in secondary folder");
|
$file_secondary = self::$config_path_fallback."/$h.txt";
|
||||||
}
|
$matched_name = $h;
|
||||||
}
|
break;
|
||||||
|
}
|
||||||
// return false if no config file found
|
}
|
||||||
if (!isset($file_primary) && !isset($file_secondary)) {
|
if (!isset($file_secondary)) {
|
||||||
self::debug("... no site config match for $host");
|
self::debug("... no site config match in secondary folder");
|
||||||
return false;
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// return primary config if secondary not found
|
// return false if no config file found
|
||||||
if (!isset($file_secondary) && isset($config)) {
|
if (!isset($file_primary) && !isset($file_secondary)) {
|
||||||
return $config;
|
self::debug("... no site config match for $host");
|
||||||
}
|
return false;
|
||||||
|
}
|
||||||
// process secondary config file
|
|
||||||
$config_lines = file($file_secondary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
|
// return primary config if secondary not found
|
||||||
if (!$config_lines || !is_array($config_lines)) {
|
if (!isset($file_secondary) && isset($config)) {
|
||||||
// failed to process secondary
|
return $config;
|
||||||
if (isset($config)) {
|
}
|
||||||
// return primary config
|
|
||||||
return $config;
|
// process secondary config file
|
||||||
} else {
|
$config_lines = file($file_secondary, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
|
||||||
return false;
|
if (!$config_lines || !is_array($config_lines)) {
|
||||||
}
|
// failed to process secondary
|
||||||
}
|
if (isset($config)) {
|
||||||
|
// return primary config
|
||||||
// merge with primary and return
|
return $config;
|
||||||
if (isset($config)) {
|
} else {
|
||||||
self::debug('. merging config files');
|
return false;
|
||||||
$config->append(self::build_from_array($config_lines));
|
}
|
||||||
return $config;
|
}
|
||||||
} else {
|
|
||||||
// return just secondary
|
// merge with primary and return
|
||||||
$config = self::build_from_array($config_lines);
|
if (isset($config)) {
|
||||||
// if APC caching is available and enabled, mark this for cache
|
self::debug('. merging config files');
|
||||||
//$config->cache_in_apc = true;
|
$config->append(self::build_from_array($config_lines));
|
||||||
$config->cache_key = $matched_name;
|
return $config;
|
||||||
return $config;
|
} else {
|
||||||
}
|
// return just secondary
|
||||||
}
|
$config = self::build_from_array($config_lines);
|
||||||
|
// if APC caching is available and enabled, mark this for cache
|
||||||
public static function build_from_array(array $lines) {
|
//$config->cache_in_apc = true;
|
||||||
$config = new SiteConfig();
|
$config->cache_key = $matched_name;
|
||||||
foreach ($lines as $line) {
|
return $config;
|
||||||
$line = trim($line);
|
}
|
||||||
|
}
|
||||||
// skip comments, empty lines
|
|
||||||
if ($line == '' || $line[0] == '#') continue;
|
public static function build_from_array(array $lines) {
|
||||||
|
$config = new SiteConfig();
|
||||||
// get command
|
foreach ($lines as $line) {
|
||||||
$command = explode(':', $line, 2);
|
$line = trim($line);
|
||||||
// if there's no colon ':', skip this line
|
|
||||||
if (count($command) != 2) continue;
|
// skip comments, empty lines
|
||||||
$val = trim($command[1]);
|
if ($line == '' || $line[0] == '#') continue;
|
||||||
$command = trim($command[0]);
|
|
||||||
if ($command == '' || $val == '') continue;
|
// get command
|
||||||
|
$command = explode(':', $line, 2);
|
||||||
// check for commands where we accept multiple statements
|
// if there's no colon ':', skip this line
|
||||||
if (in_array($command, array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header', 'test_url', 'find_string', 'replace_string'))) {
|
if (count($command) != 2) continue;
|
||||||
array_push($config->$command, $val);
|
$val = trim($command[1]);
|
||||||
// check for single statement commands that evaluate to true or false
|
$command = trim($command[0]);
|
||||||
} elseif (in_array($command, array('tidy', 'prune', 'autodetect_on_failure'))) {
|
if ($command == '' || $val == '') continue;
|
||||||
$config->$command = ($val == 'yes');
|
|
||||||
// check for single statement commands stored as strings
|
// check for commands where we accept multiple statements
|
||||||
} elseif (in_array($command, array('parser'))) {
|
if (in_array($command, array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'next_page_link', 'http_header', 'test_url', 'find_string', 'replace_string'))) {
|
||||||
$config->$command = $val;
|
array_push($config->$command, $val);
|
||||||
// check for replace_string(find): replace
|
// check for single statement commands that evaluate to true or false
|
||||||
} elseif ((substr($command, -1) == ')') && preg_match('!^([a-z0-9_]+)\((.*?)\)$!i', $command, $match)) {
|
} elseif (in_array($command, array('tidy', 'prune', 'autodetect_on_failure'))) {
|
||||||
if (in_array($match[1], array('replace_string'))) {
|
$config->$command = ($val == 'yes');
|
||||||
$command = $match[1];
|
// check for single statement commands stored as strings
|
||||||
array_push($config->find_string, $match[2]);
|
} elseif (in_array($command, array('parser'))) {
|
||||||
array_push($config->$command, $val);
|
$config->$command = $val;
|
||||||
}
|
// check for replace_string(find): replace
|
||||||
}
|
} elseif ((substr($command, -1) == ')') && preg_match('!^([a-z0-9_]+)\((.*?)\)$!i', $command, $match)) {
|
||||||
}
|
if (in_array($match[1], array('replace_string'))) {
|
||||||
return $config;
|
$command = $match[1];
|
||||||
}
|
array_push($config->find_string, $match[2]);
|
||||||
}
|
array_push($config->$command, $val);
|
||||||
?>
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $config;
|
||||||
|
}
|
||||||
|
}
|
100
inc/3rdparty/libraries/feedwriter/FeedItem.php
vendored
Normal file → Executable file
100
inc/3rdparty/libraries/feedwriter/FeedItem.php
vendored
Normal file → Executable file
@ -1,7 +1,7 @@
|
|||||||
<?php
|
<?php
|
||||||
/**
|
/**
|
||||||
* Univarsel Feed Writer
|
* Univarsel Feed Writer
|
||||||
*
|
*
|
||||||
* FeedItem class - Used as feed element in FeedWriter class
|
* FeedItem class - Used as feed element in FeedWriter class
|
||||||
*
|
*
|
||||||
* @package UnivarselFeedWriter
|
* @package UnivarselFeedWriter
|
||||||
@ -12,20 +12,20 @@
|
|||||||
{
|
{
|
||||||
private $elements = array(); //Collection of feed elements
|
private $elements = array(); //Collection of feed elements
|
||||||
private $version;
|
private $version;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor
|
* Constructor
|
||||||
*
|
*
|
||||||
* @param contant (RSS1/RSS2/ATOM) RSS2 is default.
|
* @param contant (RSS1/RSS2/ATOM) RSS2 is default.
|
||||||
*/
|
*/
|
||||||
function __construct($version = RSS2)
|
function __construct($version = RSS2)
|
||||||
{
|
{
|
||||||
$this->version = $version;
|
$this->version = $version;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set element (overwrites existing elements with $elementName)
|
* Set element (overwrites existing elements with $elementName)
|
||||||
*
|
*
|
||||||
* @access public
|
* @access public
|
||||||
* @param srting The tag name of an element
|
* @param srting The tag name of an element
|
||||||
* @param srting The content of tag
|
* @param srting The content of tag
|
||||||
@ -38,11 +38,11 @@
|
|||||||
unset($this->elements[$elementName]);
|
unset($this->elements[$elementName]);
|
||||||
}
|
}
|
||||||
$this->addElement($elementName, $content, $attributes);
|
$this->addElement($elementName, $content, $attributes);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add an element to elements array
|
* Add an element to elements array
|
||||||
*
|
*
|
||||||
* @access public
|
* @access public
|
||||||
* @param srting The tag name of an element
|
* @param srting The tag name of an element
|
||||||
* @param srting The content of tag
|
* @param srting The content of tag
|
||||||
@ -61,11 +61,11 @@
|
|||||||
$this->elements[$elementName][$i]['content'] = $content;
|
$this->elements[$elementName][$i]['content'] = $content;
|
||||||
$this->elements[$elementName][$i]['attributes'] = $attributes;
|
$this->elements[$elementName][$i]['attributes'] = $attributes;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set multiple feed elements from an array.
|
* Set multiple feed elements from an array.
|
||||||
* Elements which have attributes cannot be added by this method
|
* Elements which have attributes cannot be added by this method
|
||||||
*
|
*
|
||||||
* @access public
|
* @access public
|
||||||
* @param array array of elements in 'tagName' => 'tagContent' format.
|
* @param array array of elements in 'tagName' => 'tagContent' format.
|
||||||
* @return void
|
* @return void
|
||||||
@ -73,15 +73,15 @@
|
|||||||
public function addElementArray($elementArray)
|
public function addElementArray($elementArray)
|
||||||
{
|
{
|
||||||
if(! is_array($elementArray)) return;
|
if(! is_array($elementArray)) return;
|
||||||
foreach ($elementArray as $elementName => $content)
|
foreach ($elementArray as $elementName => $content)
|
||||||
{
|
{
|
||||||
$this->addElement($elementName, $content);
|
$this->addElement($elementName, $content);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the collection of elements in this feed item
|
* Return the collection of elements in this feed item
|
||||||
*
|
*
|
||||||
* @access public
|
* @access public
|
||||||
* @return array
|
* @return array
|
||||||
*/
|
*/
|
||||||
@ -89,68 +89,74 @@
|
|||||||
{
|
{
|
||||||
return $this->elements;
|
return $this->elements;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wrapper functions ------------------------------------------------------
|
// Wrapper functions ------------------------------------------------------
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the 'dscription' element of feed item
|
* Set the 'dscription' element of feed item
|
||||||
*
|
*
|
||||||
* @access public
|
* @access public
|
||||||
* @param string The content of 'description' element
|
* @param string The content of 'description' element
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
public function setDescription($description)
|
public function setDescription($description)
|
||||||
{
|
{
|
||||||
$this->setElement('description', $description);
|
$tag = ($this->version == ATOM)? 'summary' : 'description';
|
||||||
|
$this->setElement($tag, $description);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @desc Set the 'title' element of feed item
|
* @desc Set the 'title' element of feed item
|
||||||
* @access public
|
* @access public
|
||||||
* @param string The content of 'title' element
|
* @param string The content of 'title' element
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
public function setTitle($title)
|
public function setTitle($title)
|
||||||
{
|
{
|
||||||
$this->setElement('title', $title);
|
$this->setElement('title', $title);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the 'date' element of feed item
|
* Set the 'date' element of feed item
|
||||||
*
|
*
|
||||||
* @access public
|
* @access public
|
||||||
* @param string The content of 'date' element
|
* @param string The content of 'date' element
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
public function setDate($date)
|
public function setDate($date)
|
||||||
{
|
{
|
||||||
if(! is_numeric($date))
|
if(! is_numeric($date))
|
||||||
{
|
{
|
||||||
$date = strtotime($date);
|
$date = strtotime($date);
|
||||||
}
|
}
|
||||||
|
|
||||||
if($this->version == RSS2)
|
if($this->version == ATOM)
|
||||||
{
|
{
|
||||||
$tag = 'pubDate';
|
$tag = 'updated';
|
||||||
$value = date(DATE_RSS, $date);
|
$value = date(DATE_ATOM, $date);
|
||||||
}
|
}
|
||||||
else
|
elseif($this->version == RSS2)
|
||||||
{
|
{
|
||||||
$tag = 'dc:date';
|
$tag = 'pubDate';
|
||||||
$value = date("Y-m-d", $date);
|
$value = date(DATE_RSS, $date);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
$this->setElement($tag, $value);
|
{
|
||||||
|
$tag = 'dc:date';
|
||||||
|
$value = date("Y-m-d", $date);
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->setElement($tag, $value);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the 'link' element of feed item
|
* Set the 'link' element of feed item
|
||||||
*
|
*
|
||||||
* @access public
|
* @access public
|
||||||
* @param string The content of 'link' element
|
* @param string The content of 'link' element
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
public function setLink($link)
|
public function setLink($link)
|
||||||
{
|
{
|
||||||
if($this->version == RSS2 || $this->version == RSS1)
|
if($this->version == RSS2 || $this->version == RSS1)
|
||||||
{
|
{
|
||||||
@ -161,27 +167,27 @@
|
|||||||
{
|
{
|
||||||
$this->setElement('link','',array('href'=>$link));
|
$this->setElement('link','',array('href'=>$link));
|
||||||
$this->setElement('id', FeedWriter::uuid($link,'urn:uuid:'));
|
$this->setElement('id', FeedWriter::uuid($link,'urn:uuid:'));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the 'source' element of feed item
|
* Set the 'source' element of feed item
|
||||||
*
|
*
|
||||||
* @access public
|
* @access public
|
||||||
* @param string The content of 'source' element
|
* @param string The content of 'source' element
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
public function setSource($link)
|
public function setSource($link)
|
||||||
{
|
{
|
||||||
$attributes = array('url'=>$link);
|
$attributes = array('url'=>$link);
|
||||||
$this->setElement('source', "wallabag",$attributes);
|
$this->setElement('source', "wallabag",$attributes);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the 'encloser' element of feed item
|
* Set the 'encloser' element of feed item
|
||||||
* For RSS 2.0 only
|
* For RSS 2.0 only
|
||||||
*
|
*
|
||||||
* @access public
|
* @access public
|
||||||
* @param string The url attribute of encloser tag
|
* @param string The url attribute of encloser tag
|
||||||
* @param string The length attribute of encloser tag
|
* @param string The length attribute of encloser tag
|
||||||
@ -193,6 +199,6 @@
|
|||||||
$attributes = array('url'=>$url, 'length'=>$length, 'type'=>$type);
|
$attributes = array('url'=>$url, 'length'=>$length, 'type'=>$type);
|
||||||
$this->setElement('enclosure','',$attributes);
|
$this->setElement('enclosure','',$attributes);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // end of class FeedItem
|
} // end of class FeedItem
|
||||||
?>
|
?>
|
15
inc/3rdparty/libraries/feedwriter/FeedWriter.php
vendored
15
inc/3rdparty/libraries/feedwriter/FeedWriter.php
vendored
@ -2,6 +2,7 @@
|
|||||||
define('RSS2', 1, true);
|
define('RSS2', 1, true);
|
||||||
define('JSON', 2, true);
|
define('JSON', 2, true);
|
||||||
define('JSONP', 3, true);
|
define('JSONP', 3, true);
|
||||||
|
define('ATOM', 4, true);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Univarsel Feed Writer class
|
* Univarsel Feed Writer class
|
||||||
@ -101,11 +102,11 @@ define('JSONP', 3, true);
|
|||||||
header('Content-type: application/javascript; charset=UTF-8');
|
header('Content-type: application/javascript; charset=UTF-8');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($this->version == JSON || $this->version == JSONP) {
|
if ($this->version == JSON || $this->version == JSONP) {
|
||||||
$this->json = new stdClass();
|
$this->json = new stdClass();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
$this->printHead();
|
$this->printHead();
|
||||||
$this->printChannels();
|
$this->printChannels();
|
||||||
@ -116,6 +117,11 @@ define('JSONP', 3, true);
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function &getItems()
|
||||||
|
{
|
||||||
|
return $this->items;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new FeedItem.
|
* Create a new FeedItem.
|
||||||
*
|
*
|
||||||
@ -199,7 +205,8 @@ define('JSONP', 3, true);
|
|||||||
*/
|
*/
|
||||||
public function setDescription($description)
|
public function setDescription($description)
|
||||||
{
|
{
|
||||||
$this->setChannelElement('description', $description);
|
$tag = ($this->version == ATOM)? 'subtitle' : 'description';
|
||||||
|
$this->setChannelElement($tag, $description);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -244,7 +251,7 @@ define('JSONP', 3, true);
|
|||||||
{
|
{
|
||||||
$out = '<?xml version="1.0" encoding="utf-8"?>'."\n";
|
$out = '<?xml version="1.0" encoding="utf-8"?>'."\n";
|
||||||
if ($this->xsl) $out .= '<?xml-stylesheet type="text/xsl" href="'.htmlspecialchars($this->xsl).'"?>' . PHP_EOL;
|
if ($this->xsl) $out .= '<?xml-stylesheet type="text/xsl" href="'.htmlspecialchars($this->xsl).'"?>' . PHP_EOL;
|
||||||
$out .= '<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/">' . PHP_EOL;
|
$out .= '<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/">' . PHP_EOL;
|
||||||
echo $out;
|
echo $out;
|
||||||
}
|
}
|
||||||
elseif ($this->version == JSON || $this->version == JSONP)
|
elseif ($this->version == JSON || $this->version == JSONP)
|
||||||
|
13
inc/3rdparty/libraries/html5/TreeBuilder.php
vendored
13
inc/3rdparty/libraries/html5/TreeBuilder.php
vendored
@ -134,6 +134,7 @@ class HTML5_TreeBuilder {
|
|||||||
|
|
||||||
// Namespaces for foreign content
|
// Namespaces for foreign content
|
||||||
const NS_HTML = null; // to prevent DOM from requiring NS on everything
|
const NS_HTML = null; // to prevent DOM from requiring NS on everything
|
||||||
|
const NS_XHTML = 'http://www.w3.org/1999/xhtml';
|
||||||
const NS_MATHML = 'http://www.w3.org/1998/Math/MathML';
|
const NS_MATHML = 'http://www.w3.org/1998/Math/MathML';
|
||||||
const NS_SVG = 'http://www.w3.org/2000/svg';
|
const NS_SVG = 'http://www.w3.org/2000/svg';
|
||||||
const NS_XLINK = 'http://www.w3.org/1999/xlink';
|
const NS_XLINK = 'http://www.w3.org/1999/xlink';
|
||||||
@ -3157,11 +3158,19 @@ class HTML5_TreeBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private function insertElement($token, $append = true) {
|
private function insertElement($token, $append = true) {
|
||||||
$el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
|
//$el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
|
||||||
|
$namespaceURI = strpos($token['name'], ':') ? self::NS_XHTML : self::NS_HTML;
|
||||||
|
$el = $this->dom->createElementNS($namespaceURI, $token['name']);
|
||||||
|
|
||||||
if (!empty($token['attr'])) {
|
if (!empty($token['attr'])) {
|
||||||
foreach($token['attr'] as $attr) {
|
foreach($token['attr'] as $attr) {
|
||||||
if(!$el->hasAttribute($attr['name'])) {
|
|
||||||
|
// mike@macgirvin.com 2011-11-17, check attribute name for
|
||||||
|
// validity (ignoring extenders and combiners) as illegal chars in names
|
||||||
|
// causes everything to abort
|
||||||
|
|
||||||
|
$valid = preg_match('/^[a-zA-Z\_\:]([\-a-zA-Z0-9\_\:\.]+$)/',$attr['name']);
|
||||||
|
if($attr['name'] && (!$el->hasAttribute($attr['name'])) && ($valid)) {
|
||||||
$el->setAttribute($attr['name'], $attr['value']);
|
$el->setAttribute($attr['name'], $attr['value']);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,404 +1,403 @@
|
|||||||
<?php
|
<?php
|
||||||
/**
|
/**
|
||||||
* Cookie Jar
|
* Cookie Jar
|
||||||
*
|
*
|
||||||
* PHP class for handling cookies, as defined by the Netscape spec:
|
* PHP class for handling cookies, as defined by the Netscape spec:
|
||||||
* <http://curl.haxx.se/rfc/cookie_spec.html>
|
* <http://curl.haxx.se/rfc/cookie_spec.html>
|
||||||
*
|
*
|
||||||
* This class should be used to handle cookies (storing cookies from HTTP response messages, and
|
* This class should be used to handle cookies (storing cookies from HTTP response messages, and
|
||||||
* sending out cookies in HTTP request messages). This has been adapted for FiveFilters.org
|
* sending out cookies in HTTP request messages). This has been adapted for FiveFilters.org
|
||||||
* from the original version used in HTTP Navigator. See http://www.keyvan.net/code/http-navigator/
|
* from the original version used in HTTP Navigator. See http://www.keyvan.net/code/http-navigator/
|
||||||
*
|
*
|
||||||
* This class is mainly based on Cookies.pm <http://search.cpan.org/author/GAAS/libwww-perl-5.65/
|
* This class is mainly based on Cookies.pm <http://search.cpan.org/author/GAAS/libwww-perl-5.65/
|
||||||
* lib/HTTP/Cookies.pm> from the libwww-perl collection <http://www.linpro.no/lwp/>.
|
* lib/HTTP/Cookies.pm> from the libwww-perl collection <http://www.linpro.no/lwp/>.
|
||||||
* Unlike Cookies.pm, this class only supports the Netscape cookie spec, not RFC 2965.
|
* Unlike Cookies.pm, this class only supports the Netscape cookie spec, not RFC 2965.
|
||||||
*
|
*
|
||||||
* @version 0.5
|
* @version 0.5
|
||||||
* @date 2011-03-15
|
* @date 2011-03-15
|
||||||
* @see http://php.net/HttpRequestPool
|
* @see http://php.net/HttpRequestPool
|
||||||
* @author Keyvan Minoukadeh
|
* @author Keyvan Minoukadeh
|
||||||
* @copyright 2011 Keyvan Minoukadeh
|
* @copyright 2011 Keyvan Minoukadeh
|
||||||
* @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
|
* @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class CookieJar
|
class CookieJar
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
* Cookies - array containing all cookies.
|
* Cookies - array containing all cookies.
|
||||||
*
|
*
|
||||||
* <pre>
|
* <pre>
|
||||||
* Cookies are stored like this:
|
* Cookies are stored like this:
|
||||||
* [domain][path][name] = array
|
* [domain][path][name] = array
|
||||||
* where array is:
|
* where array is:
|
||||||
* 0 => value, 1 => secure, 2 => expires
|
* 0 => value, 1 => secure, 2 => expires
|
||||||
* </pre>
|
* </pre>
|
||||||
* @var array
|
* @var array
|
||||||
* @access private
|
* @access private
|
||||||
*/
|
*/
|
||||||
public $cookies = array();
|
public $cookies = array();
|
||||||
public $debug = false;
|
public $debug = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor
|
* Constructor
|
||||||
*/
|
*/
|
||||||
function __construct() {
|
function __construct() {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected function debug($msg, $file=null, $line=null) {
|
protected function debug($msg, $file=null, $line=null) {
|
||||||
if ($this->debug) {
|
if ($this->debug) {
|
||||||
$mem = round(memory_get_usage()/1024, 2);
|
$mem = round(memory_get_usage()/1024, 2);
|
||||||
$memPeak = round(memory_get_peak_usage()/1024, 2);
|
$memPeak = round(memory_get_peak_usage()/1024, 2);
|
||||||
echo '* ',$msg;
|
echo '* ',$msg;
|
||||||
if (isset($file, $line)) echo " ($file line $line)";
|
if (isset($file, $line)) echo " ($file line $line)";
|
||||||
echo ' - mem used: ',$mem," (peak: $memPeak)\n";
|
echo ' - mem used: ',$mem," (peak: $memPeak)\n";
|
||||||
ob_flush();
|
ob_flush();
|
||||||
flush();
|
flush();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get matching cookies
|
* Get matching cookies
|
||||||
*
|
*
|
||||||
* Only use this method if you cannot use add_cookie_header(), for example, if you want to use
|
* Only use this method if you cannot use add_cookie_header(), for example, if you want to use
|
||||||
* this cookie jar class without using the request class.
|
* this cookie jar class without using the request class.
|
||||||
*
|
*
|
||||||
* @param array $param associative array containing 'domain', 'path', 'secure' keys
|
* @param array $param associative array containing 'domain', 'path', 'secure' keys
|
||||||
* @return string
|
* @return string
|
||||||
* @see add_cookie_header()
|
* @see add_cookie_header()
|
||||||
*/
|
*/
|
||||||
public function getMatchingCookies($url)
|
public function getMatchingCookies($url)
|
||||||
{
|
{
|
||||||
if (($parts = @parse_url($url)) && isset($parts['scheme'], $parts['host'], $parts['path'])) {
|
if (($parts = @parse_url($url)) && isset($parts['scheme'], $parts['host'], $parts['path'])) {
|
||||||
$param['domain'] = $parts['host'];
|
$param['domain'] = $parts['host'];
|
||||||
$param['path'] = $parts['path'];
|
$param['path'] = $parts['path'];
|
||||||
$param['secure'] = (strtolower($parts['scheme']) == 'https');
|
$param['secure'] = (strtolower($parts['scheme']) == 'https');
|
||||||
unset($parts);
|
unset($parts);
|
||||||
} else {
|
} else {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// RFC 2965 notes:
|
// RFC 2965 notes:
|
||||||
// If multiple cookies satisfy the criteria above, they are ordered in
|
// If multiple cookies satisfy the criteria above, they are ordered in
|
||||||
// the Cookie header such that those with more specific Path attributes
|
// the Cookie header such that those with more specific Path attributes
|
||||||
// precede those with less specific. Ordering with respect to other
|
// precede those with less specific. Ordering with respect to other
|
||||||
// attributes (e.g., Domain) is unspecified.
|
// attributes (e.g., Domain) is unspecified.
|
||||||
$domain = $param['domain'];
|
$domain = $param['domain'];
|
||||||
if (strpos($domain, '.') === false) $domain .= '.local';
|
if (strpos($domain, '.') === false) $domain .= '.local';
|
||||||
$request_path = $param['path'];
|
$request_path = $param['path'];
|
||||||
if ($request_path == '') $request_path = '/';
|
if ($request_path == '') $request_path = '/';
|
||||||
$request_secure = $param['secure'];
|
$request_secure = $param['secure'];
|
||||||
$now = time();
|
$now = time();
|
||||||
$matched_cookies = array();
|
$matched_cookies = array();
|
||||||
// domain - find matching domains
|
// domain - find matching domains
|
||||||
$this->debug('Finding matching domains for '.$domain, __FILE__, __LINE__);
|
$this->debug('Finding matching domains for '.$domain, __FILE__, __LINE__);
|
||||||
while (strpos($domain, '.') !== false) {
|
while (strpos($domain, '.') !== false) {
|
||||||
if (isset($this->cookies[$domain])) {
|
if (isset($this->cookies[$domain])) {
|
||||||
$this->debug(' domain match found: '.$domain);
|
$this->debug(' domain match found: '.$domain);
|
||||||
$cookies =& $this->cookies[$domain];
|
$cookies =& $this->cookies[$domain];
|
||||||
} else {
|
} else {
|
||||||
$domain = $this->_reduce_domain($domain);
|
$domain = $this->_reduce_domain($domain);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// paths - find matching paths starting from most specific
|
// paths - find matching paths starting from most specific
|
||||||
$this->debug(' - Finding matching paths for '.$request_path);
|
$this->debug(' - Finding matching paths for '.$request_path);
|
||||||
$paths = array_keys($cookies);
|
$paths = array_keys($cookies);
|
||||||
usort($paths, array($this, '_cmp_length'));
|
usort($paths, array($this, '_cmp_length'));
|
||||||
foreach ($paths as $path) {
|
foreach ($paths as $path) {
|
||||||
// continue to next cookie if request path does not path-match cookie path
|
// continue to next cookie if request path does not path-match cookie path
|
||||||
if (!$this->_path_match($request_path, $path)) continue;
|
if (!$this->_path_match($request_path, $path)) continue;
|
||||||
// loop through cookie names
|
// loop through cookie names
|
||||||
$this->debug(' path match found: '.$path);
|
$this->debug(' path match found: '.$path);
|
||||||
foreach ($cookies[$path] as $name => $values) {
|
foreach ($cookies[$path] as $name => $values) {
|
||||||
// if this cookie is secure but request isn't, continue to next cookie
|
// if this cookie is secure but request isn't, continue to next cookie
|
||||||
if ($values[1] && !$request_secure) continue;
|
if ($values[1] && !$request_secure) continue;
|
||||||
// if cookie is not a session cookie and has expired, continue to next cookie
|
// if cookie is not a session cookie and has expired, continue to next cookie
|
||||||
if (is_int($values[2]) && ($values[2] < $now)) continue;
|
if (is_int($values[2]) && ($values[2] < $now)) continue;
|
||||||
// cookie matches request
|
// cookie matches request
|
||||||
$this->debug(' cookie match: '.$name.'='.$values[0]);
|
$this->debug(' cookie match: '.$name.'='.$values[0]);
|
||||||
$matched_cookies[] = $name.'='.$values[0];
|
$matched_cookies[] = $name.'='.$values[0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
$domain = $this->_reduce_domain($domain);
|
$domain = $this->_reduce_domain($domain);
|
||||||
}
|
}
|
||||||
// return cookies
|
// return cookies
|
||||||
return implode('; ', $matched_cookies);
|
return implode('; ', $matched_cookies);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse Set-Cookie values.
|
* Parse Set-Cookie values.
|
||||||
*
|
*
|
||||||
* Only use this method if you cannot use extract_cookies(), for example, if you want to use
|
* Only use this method if you cannot use extract_cookies(), for example, if you want to use
|
||||||
* this cookie jar class without using the response class.
|
* this cookie jar class without using the response class.
|
||||||
*
|
*
|
||||||
* @param array $set_cookies array holding 1 or more "Set-Cookie" header values
|
* @param array $set_cookies array holding 1 or more "Set-Cookie" header values
|
||||||
* @param array $param associative array containing 'host', 'path' keys
|
* @param array $param associative array containing 'host', 'path' keys
|
||||||
* @return void
|
* @return void
|
||||||
* @see extract_cookies()
|
* @see extract_cookies()
|
||||||
*/
|
*/
|
||||||
public function storeCookies($url, $set_cookies)
|
public function storeCookies($url, $set_cookies)
|
||||||
{
|
{
|
||||||
if (count($set_cookies) == 0) return;
|
if (count($set_cookies) == 0) return;
|
||||||
$param = @parse_url($url);
|
$param = @parse_url($url);
|
||||||
if (!is_array($param) || !isset($param['host'])) return;
|
if (!is_array($param) || !isset($param['host'])) return;
|
||||||
$request_host = $param['host'];
|
$request_host = $param['host'];
|
||||||
if (strpos($request_host, '.') === false) $request_host .= '.local';
|
if (strpos($request_host, '.') === false) $request_host .= '.local';
|
||||||
$request_path = @$param['path'];
|
$request_path = @$param['path'];
|
||||||
if ($request_path == '') $request_path = '/';
|
if ($request_path == '') $request_path = '/';
|
||||||
//
|
//
|
||||||
// loop through set-cookie headers
|
// loop through set-cookie headers
|
||||||
//
|
//
|
||||||
foreach ($set_cookies as $set_cookie) {
|
foreach ($set_cookies as $set_cookie) {
|
||||||
$this->debug('Parsing: '.$set_cookie);
|
$this->debug('Parsing: '.$set_cookie);
|
||||||
// temporary cookie store (before adding to jar)
|
// temporary cookie store (before adding to jar)
|
||||||
$tmp_cookie = array();
|
$tmp_cookie = array();
|
||||||
$param = explode(';', $set_cookie);
|
$param = explode(';', $set_cookie);
|
||||||
// loop through params
|
// loop through params
|
||||||
for ($x=0; $x<count($param); $x++) {
|
for ($x=0; $x<count($param); $x++) {
|
||||||
$key_val = explode('=', $param[$x], 2);
|
$key_val = explode('=', $param[$x], 2);
|
||||||
if (count($key_val) != 2) {
|
if (count($key_val) != 2) {
|
||||||
// if the first param isn't a name=value pair, continue to the next set-cookie
|
// if the first param isn't a name=value pair, continue to the next set-cookie
|
||||||
// header
|
// header
|
||||||
if ($x == 0) continue 2;
|
if ($x == 0) continue 2;
|
||||||
// check for secure flag
|
// check for secure flag
|
||||||
if (strtolower(trim($key_val[0])) == 'secure') $tmp_cookie['secure'] = true;
|
if (strtolower(trim($key_val[0])) == 'secure') $tmp_cookie['secure'] = true;
|
||||||
// continue to next param
|
// continue to next param
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
list($key, $val) = array_map('trim', $key_val);
|
list($key, $val) = array_map('trim', $key_val);
|
||||||
// first name=value pair is the cookie name and value
|
// first name=value pair is the cookie name and value
|
||||||
// the name and value are stored under 'name' and 'value' to avoid conflicts
|
// the name and value are stored under 'name' and 'value' to avoid conflicts
|
||||||
// with later parameters.
|
// with later parameters.
|
||||||
if ($x == 0) {
|
if ($x == 0) {
|
||||||
$tmp_cookie = array('name'=>$key, 'value'=>$val);
|
$tmp_cookie = array('name'=>$key, 'value'=>$val);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
$key = strtolower($key);
|
$key = strtolower($key);
|
||||||
if (in_array($key, array('expires', 'path', 'domain', 'secure'))) {
|
if (in_array($key, array('expires', 'path', 'domain', 'secure'))) {
|
||||||
$tmp_cookie[$key] = $val;
|
$tmp_cookie[$key] = $val;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//
|
//
|
||||||
// set cookie
|
// set cookie
|
||||||
//
|
//
|
||||||
// check domain
|
// check domain
|
||||||
if (isset($tmp_cookie['domain']) && ($tmp_cookie['domain'] != $request_host) &&
|
if (isset($tmp_cookie['domain']) && ($tmp_cookie['domain'] != $request_host) &&
|
||||||
($tmp_cookie['domain'] != ".$request_host")) {
|
($tmp_cookie['domain'] != ".$request_host")) {
|
||||||
$domain = $tmp_cookie['domain'];
|
$domain = $tmp_cookie['domain'];
|
||||||
if ((strpos($domain, '.') === false) && ($domain != 'local')) {
|
if ((strpos($domain, '.') === false) && ($domain != 'local')) {
|
||||||
$this->debug(' - domain "'.$domain.'" has no dot and is not a local domain');
|
$this->debug(' - domain "'.$domain.'" has no dot and is not a local domain');
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (preg_match('/\.[0-9]+$/', $domain)) {
|
if (preg_match('/\.[0-9]+$/', $domain)) {
|
||||||
$this->debug(' - domain "'.$domain.'" appears to be an ip address');
|
$this->debug(' - domain "'.$domain.'" appears to be an ip address');
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (substr($domain, 0, 1) != '.') $domain = ".$domain";
|
if (substr($domain, 0, 1) != '.') $domain = ".$domain";
|
||||||
if (!$this->_domain_match($request_host, $domain)) {
|
if (!$this->_domain_match($request_host, $domain)) {
|
||||||
$this->debug(' - request host "'.$request_host.'" does not domain-match "'.$domain.'"');
|
$this->debug(' - request host "'.$request_host.'" does not domain-match "'.$domain.'"');
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// if domain is not specified in the set-cookie header, domain will default to
|
// if domain is not specified in the set-cookie header, domain will default to
|
||||||
// the request host
|
// the request host
|
||||||
$domain = $request_host;
|
$domain = $request_host;
|
||||||
}
|
}
|
||||||
// check path
|
// check path
|
||||||
if (isset($tmp_cookie['path']) && ($tmp_cookie['path'] != '')) {
|
if (isset($tmp_cookie['path']) && ($tmp_cookie['path'] != '')) {
|
||||||
$path = urldecode($tmp_cookie['path']);
|
$path = urldecode($tmp_cookie['path']);
|
||||||
if (!$this->_path_match($request_path, $path)) {
|
if (!$this->_path_match($request_path, $path)) {
|
||||||
$this->debug(' - request path "'.$request_path.'" does not path-match "'.$path.'"');
|
$this->debug(' - request path "'.$request_path.'" does not path-match "'.$path.'"');
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
$path = $request_path;
|
$path = $request_path;
|
||||||
$path = substr($path, 0, strrpos($path, '/'));
|
$path = substr($path, 0, strrpos($path, '/'));
|
||||||
if ($path == '') $path = '/';
|
if ($path == '') $path = '/';
|
||||||
}
|
}
|
||||||
// check if secure
|
// check if secure
|
||||||
$secure = (isset($tmp_cookie['secure'])) ? true : false;
|
$secure = (isset($tmp_cookie['secure'])) ? true : false;
|
||||||
// check expiry
|
// check expiry
|
||||||
if (isset($tmp_cookie['expires'])) {
|
if (isset($tmp_cookie['expires'])) {
|
||||||
if (($expires = strtotime($tmp_cookie['expires'])) < 0) {
|
if (($expires = strtotime($tmp_cookie['expires'])) < 0) {
|
||||||
$expires = null;
|
$expires = null;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
$expires = null;
|
$expires = null;
|
||||||
}
|
}
|
||||||
// set cookie
|
// set cookie
|
||||||
$this->set_cookie($domain, $path, $tmp_cookie['name'], $tmp_cookie['value'], $secure, $expires);
|
$this->set_cookie($domain, $path, $tmp_cookie['name'], $tmp_cookie['value'], $secure, $expires);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// return array of set-cookie values extracted from HTTP response headers (string $h)
|
// return array of set-cookie values extracted from HTTP response headers (string $h)
|
||||||
public function extractCookies($h) {
|
public function extractCookies($h) {
|
||||||
$x = 0;
|
$x = 0;
|
||||||
$lines = 0;
|
$lines = 0;
|
||||||
$headers = array();
|
$headers = array();
|
||||||
$last_match = false;
|
$last_match = false;
|
||||||
$h = explode("\n", $h);
|
$h = explode("\n", $h);
|
||||||
foreach ($h as $line) {
|
foreach ($h as $line) {
|
||||||
$line = rtrim($line);
|
$line = rtrim($line);
|
||||||
$lines++;
|
$lines++;
|
||||||
|
|
||||||
$trimmed_line = trim($line);
|
$trimmed_line = trim($line);
|
||||||
if (isset($line_last)) {
|
if (isset($line_last)) {
|
||||||
// check if we have \r\n\r\n (indicating the end of headers)
|
// check if we have \r\n\r\n (indicating the end of headers)
|
||||||
// some servers will not use CRLF (\r\n), so we make CR (\r) optional.
|
// some servers will not use CRLF (\r\n), so we make CR (\r) optional.
|
||||||
// if (preg_match('/\015?\012\015?\012/', $line_last.$line)) {
|
// if (preg_match('/\015?\012\015?\012/', $line_last.$line)) {
|
||||||
// break;
|
// break;
|
||||||
// }
|
// }
|
||||||
// As an alternative, we can check if the current trimmed line is empty
|
// As an alternative, we can check if the current trimmed line is empty
|
||||||
if ($trimmed_line == '') {
|
if ($trimmed_line == '') {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// check for continuation line...
|
// check for continuation line...
|
||||||
// RFC 2616 Section 2.2 "Basic Rules":
|
// RFC 2616 Section 2.2 "Basic Rules":
|
||||||
// HTTP/1.1 header field values can be folded onto multiple lines if the
|
// HTTP/1.1 header field values can be folded onto multiple lines if the
|
||||||
// continuation line begins with a space or horizontal tab. All linear
|
// continuation line begins with a space or horizontal tab. All linear
|
||||||
// white space, including folding, has the same semantics as SP. A
|
// white space, including folding, has the same semantics as SP. A
|
||||||
// recipient MAY replace any linear white space with a single SP before
|
// recipient MAY replace any linear white space with a single SP before
|
||||||
// interpreting the field value or forwarding the message downstream.
|
// interpreting the field value or forwarding the message downstream.
|
||||||
if ($last_match && preg_match('/^\s+(.*)/', $line, $match)) {
|
if ($last_match && preg_match('/^\s+(.*)/', $line, $match)) {
|
||||||
// append to previous header value
|
// append to previous header value
|
||||||
$headers[$x-1] .= ' '.rtrim($match[1]);
|
$headers[$x-1] .= ' '.rtrim($match[1]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
$line_last = $line;
|
$line_last = $line;
|
||||||
|
|
||||||
// split header name and value
|
// split header name and value
|
||||||
if (preg_match('/^Set-Cookie\s*:\s*(.*)/i', $line, $match)) {
|
if (preg_match('/^Set-Cookie\s*:\s*(.*)/i', $line, $match)) {
|
||||||
$headers[$x++] = rtrim($match[1]);
|
$headers[$x++] = rtrim($match[1]);
|
||||||
$last_match = true;
|
$last_match = true;
|
||||||
} else {
|
} else {
|
||||||
$last_match = false;
|
$last_match = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return $headers;
|
return $headers;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set Cookie
|
* Set Cookie
|
||||||
* @param string $domain
|
* @param string $domain
|
||||||
* @param string $path
|
* @param string $path
|
||||||
* @param string $name cookie name
|
* @param string $name cookie name
|
||||||
* @param string $value cookie value
|
* @param string $value cookie value
|
||||||
* @param bool $secure
|
* @param bool $secure
|
||||||
* @param int $expires expiry time (null if session cookie, <= 0 will delete cookie)
|
* @param int $expires expiry time (null if session cookie, <= 0 will delete cookie)
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
function set_cookie($domain, $path, $name, $value, $secure=false, $expires=null)
|
function set_cookie($domain, $path, $name, $value, $secure=false, $expires=null)
|
||||||
{
|
{
|
||||||
if ($domain == '') return;
|
if ($domain == '') return;
|
||||||
if ($path == '') return;
|
if ($path == '') return;
|
||||||
if ($name == '') return;
|
if ($name == '') return;
|
||||||
// check if cookie needs to go
|
// check if cookie needs to go
|
||||||
if (isset($expires) && ($expires <= 0)) {
|
if (isset($expires) && ($expires <= 0)) {
|
||||||
if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
|
if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if ($value == '') return;
|
if ($value == '') return;
|
||||||
$this->cookies[$domain][$path][$name] = array($value, $secure, $expires);
|
$this->cookies[$domain][$path][$name] = array($value, $secure, $expires);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Clear cookies - [domain [,path [,name]]] - call method with no arguments to clear all cookies.
|
* Clear cookies - [domain [,path [,name]]] - call method with no arguments to clear all cookies.
|
||||||
* @param string $domain
|
* @param string $domain
|
||||||
* @param string $path
|
* @param string $path
|
||||||
* @param string $name
|
* @param string $name
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
function clear($domain=null, $path=null, $name=null)
|
function clear($domain=null, $path=null, $name=null)
|
||||||
{
|
{
|
||||||
if (!isset($domain)) {
|
if (!isset($domain)) {
|
||||||
$this->cookies = array();
|
$this->cookies = array();
|
||||||
} elseif (!isset($path)) {
|
} elseif (!isset($path)) {
|
||||||
if (isset($this->cookies[$domain])) unset($this->cookies[$domain]);
|
if (isset($this->cookies[$domain])) unset($this->cookies[$domain]);
|
||||||
} elseif (!isset($name)) {
|
} elseif (!isset($name)) {
|
||||||
if (isset($this->cookies[$domain][$path])) unset($this->cookies[$domain][$path]);
|
if (isset($this->cookies[$domain][$path])) unset($this->cookies[$domain][$path]);
|
||||||
} elseif (isset($name)) {
|
} elseif (isset($name)) {
|
||||||
if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
|
if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compare string length - used for sorting
|
* Compare string length - used for sorting
|
||||||
* @access private
|
* @access private
|
||||||
* @return int
|
* @return int
|
||||||
*/
|
*/
|
||||||
function _cmp_length($a, $b)
|
function _cmp_length($a, $b)
|
||||||
{
|
{
|
||||||
$la = strlen($a); $lb = strlen($b);
|
$la = strlen($a); $lb = strlen($b);
|
||||||
if ($la == $lb) return 0;
|
if ($la == $lb) return 0;
|
||||||
return ($la > $lb) ? -1 : 1;
|
return ($la > $lb) ? -1 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reduce domain
|
* Reduce domain
|
||||||
* @param string $domain
|
* @param string $domain
|
||||||
* @return string
|
* @return string
|
||||||
* @access private
|
* @access private
|
||||||
*/
|
*/
|
||||||
function _reduce_domain($domain)
|
function _reduce_domain($domain)
|
||||||
{
|
{
|
||||||
if ($domain == '') return '';
|
if ($domain == '') return '';
|
||||||
if (substr($domain, 0, 1) == '.') return substr($domain, 1);
|
if (substr($domain, 0, 1) == '.') return substr($domain, 1);
|
||||||
return substr($domain, strpos($domain, '.'));
|
return substr($domain, strpos($domain, '.'));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Path match - check if path1 path-matches path2
|
* Path match - check if path1 path-matches path2
|
||||||
*
|
*
|
||||||
* From RFC 2965:
|
* From RFC 2965:
|
||||||
* <i>For two strings that represent paths, P1 and P2, P1 path-matches P2
|
* <i>For two strings that represent paths, P1 and P2, P1 path-matches P2
|
||||||
* if P2 is a prefix of P1 (including the case where P1 and P2 string-
|
* if P2 is a prefix of P1 (including the case where P1 and P2 string-
|
||||||
* compare equal). Thus, the string /tec/waldo path-matches /tec.</i>
|
* compare equal). Thus, the string /tec/waldo path-matches /tec.</i>
|
||||||
* @param string $path1
|
* @param string $path1
|
||||||
* @param string $path2
|
* @param string $path2
|
||||||
* @return bool
|
* @return bool
|
||||||
* @access private
|
* @access private
|
||||||
*/
|
*/
|
||||||
function _path_match($path1, $path2)
|
function _path_match($path1, $path2)
|
||||||
{
|
{
|
||||||
return (substr($path1, 0, strlen($path2)) == $path2);
|
return (substr($path1, 0, strlen($path2)) == $path2);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Domain match - check if domain1 domain-matches domain2
|
* Domain match - check if domain1 domain-matches domain2
|
||||||
*
|
*
|
||||||
* A few extracts from RFC 2965:
|
* A few extracts from RFC 2965:
|
||||||
* - A Set-Cookie2 from request-host y.x.foo.com for Domain=.foo.com
|
* - A Set-Cookie2 from request-host y.x.foo.com for Domain=.foo.com
|
||||||
* would be rejected, because H is y.x and contains a dot.
|
* would be rejected, because H is y.x and contains a dot.
|
||||||
*
|
*
|
||||||
* - A Set-Cookie2 from request-host x.foo.com for Domain=.foo.com
|
* - A Set-Cookie2 from request-host x.foo.com for Domain=.foo.com
|
||||||
* would be accepted.
|
* would be accepted.
|
||||||
*
|
*
|
||||||
* - A Set-Cookie2 with Domain=.com or Domain=.com., will always be
|
* - A Set-Cookie2 with Domain=.com or Domain=.com., will always be
|
||||||
* rejected, because there is no embedded dot.
|
* rejected, because there is no embedded dot.
|
||||||
*
|
*
|
||||||
* - A Set-Cookie2 from request-host example for Domain=.local will
|
* - A Set-Cookie2 from request-host example for Domain=.local will
|
||||||
* be accepted, because the effective host name for the request-
|
* be accepted, because the effective host name for the request-
|
||||||
* host is example.local, and example.local domain-matches .local.
|
* host is example.local, and example.local domain-matches .local.
|
||||||
*
|
*
|
||||||
* I'm ignoring the first point for now (must check to see how other browsers handle
|
* I'm ignoring the first point for now (must check to see how other browsers handle
|
||||||
* this rule for Set-Cookie headers)
|
* this rule for Set-Cookie headers)
|
||||||
*
|
*
|
||||||
* @param string $domain1
|
* @param string $domain1
|
||||||
* @param string $domain2
|
* @param string $domain2
|
||||||
* @return bool
|
* @return bool
|
||||||
* @access private
|
* @access private
|
||||||
*/
|
*/
|
||||||
function _domain_match($domain1, $domain2)
|
function _domain_match($domain1, $domain2)
|
||||||
{
|
{
|
||||||
$domain1 = strtolower($domain1);
|
$domain1 = strtolower($domain1);
|
||||||
$domain2 = strtolower($domain2);
|
$domain2 = strtolower($domain2);
|
||||||
while (strpos($domain1, '.') !== false) {
|
while (strpos($domain1, '.') !== false) {
|
||||||
if ($domain1 == $domain2) return true;
|
if ($domain1 == $domain2) return true;
|
||||||
$domain1 = $this->_reduce_domain($domain1);
|
$domain1 = $this->_reduce_domain($domain1);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
?>
|
|
File diff suppressed because it is too large
Load Diff
@ -1,79 +1,78 @@
|
|||||||
<?php
|
<?php
|
||||||
/**
|
/**
|
||||||
* Humble HTTP Agent extension for SimplePie_File
|
* Humble HTTP Agent extension for SimplePie_File
|
||||||
*
|
*
|
||||||
* This class is designed to extend and override SimplePie_File
|
* This class is designed to extend and override SimplePie_File
|
||||||
* in order to prevent duplicate HTTP requests being sent out.
|
* in order to prevent duplicate HTTP requests being sent out.
|
||||||
* The idea is to initialise an instance of Humble HTTP Agent
|
* The idea is to initialise an instance of Humble HTTP Agent
|
||||||
* and attach it, to a static class variable, of this class.
|
* and attach it, to a static class variable, of this class.
|
||||||
* SimplePie will then automatically initialise this class
|
* SimplePie will then automatically initialise this class
|
||||||
*
|
*
|
||||||
* @date 2011-02-28
|
* @date 2011-02-28
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class SimplePie_HumbleHttpAgent extends SimplePie_File
|
class SimplePie_HumbleHttpAgent extends SimplePie_File
|
||||||
{
|
{
|
||||||
protected static $agent;
|
protected static $agent;
|
||||||
var $url;
|
var $url;
|
||||||
var $useragent;
|
var $useragent;
|
||||||
var $success = true;
|
var $success = true;
|
||||||
var $headers = array();
|
var $headers = array();
|
||||||
var $body;
|
var $body;
|
||||||
var $status_code;
|
var $status_code;
|
||||||
var $redirects = 0;
|
var $redirects = 0;
|
||||||
var $error;
|
var $error;
|
||||||
var $method = SIMPLEPIE_FILE_SOURCE_NONE;
|
var $method = SIMPLEPIE_FILE_SOURCE_NONE;
|
||||||
|
|
||||||
public static function set_agent(HumbleHttpAgent $agent) {
|
public static function set_agent(HumbleHttpAgent $agent) {
|
||||||
self::$agent = $agent;
|
self::$agent = $agent;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false) {
|
public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false) {
|
||||||
if (class_exists('idna_convert'))
|
if (class_exists('idna_convert'))
|
||||||
{
|
{
|
||||||
$idn = new idna_convert();
|
$idn = new idna_convert();
|
||||||
$parsed = SimplePie_Misc::parse_url($url);
|
$parsed = SimplePie_Misc::parse_url($url);
|
||||||
$url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']);
|
$url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']);
|
||||||
}
|
}
|
||||||
$this->url = $url;
|
$this->url = $url;
|
||||||
$this->useragent = $useragent;
|
$this->useragent = $useragent;
|
||||||
if (preg_match('/^http(s)?:\/\//i', $url))
|
if (preg_match('/^http(s)?:\/\//i', $url))
|
||||||
{
|
{
|
||||||
if (!is_array($headers))
|
if (!is_array($headers))
|
||||||
{
|
{
|
||||||
$headers = array();
|
$headers = array();
|
||||||
}
|
}
|
||||||
$this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL;
|
$this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL;
|
||||||
$headers2 = array();
|
$headers2 = array();
|
||||||
foreach ($headers as $key => $value) {
|
foreach ($headers as $key => $value) {
|
||||||
$headers2[] = "$key: $value";
|
$headers2[] = "$key: $value";
|
||||||
}
|
}
|
||||||
//TODO: allow for HTTP headers
|
//TODO: allow for HTTP headers
|
||||||
// curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2);
|
// curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2);
|
||||||
|
|
||||||
$response = self::$agent->get($url);
|
$response = self::$agent->get($url);
|
||||||
|
|
||||||
if ($response === false || !isset($response['status_code'])) {
|
if ($response === false || !isset($response['status_code'])) {
|
||||||
$this->error = 'failed to fetch URL';
|
$this->error = 'failed to fetch URL';
|
||||||
$this->success = false;
|
$this->success = false;
|
||||||
} else {
|
} else {
|
||||||
// The extra lines at the end are there to satisfy SimplePie's HTTP parser.
|
// The extra lines at the end are there to satisfy SimplePie's HTTP parser.
|
||||||
// The class expects a full HTTP message, whereas we're giving it only
|
// The class expects a full HTTP message, whereas we're giving it only
|
||||||
// headers - the new lines indicate the start of the body.
|
// headers - the new lines indicate the start of the body.
|
||||||
$parser = new SimplePie_HTTP_Parser($response['headers']."\r\n\r\n");
|
$parser = new SimplePie_HTTP_Parser($response['headers']."\r\n\r\n");
|
||||||
if ($parser->parse()) {
|
if ($parser->parse()) {
|
||||||
$this->headers = $parser->headers;
|
$this->headers = $parser->headers;
|
||||||
//$this->body = $parser->body;
|
//$this->body = $parser->body;
|
||||||
$this->body = $response['body'];
|
$this->body = $response['body'];
|
||||||
$this->status_code = $parser->status_code;
|
$this->status_code = $parser->status_code;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
$this->error = 'invalid URL';
|
$this->error = 'invalid URL';
|
||||||
$this->success = false;
|
$this->success = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
?>
|
|
File diff suppressed because it is too large
Load Diff
57
inc/3rdparty/libraries/language-detect/LanguageDetect/Exception.php
vendored
Normal file
57
inc/3rdparty/libraries/language-detect/LanguageDetect/Exception.php
vendored
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
<?php
|
||||||
|
class Text_LanguageDetect_Exception extends Exception
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Database file could not be found
|
||||||
|
*/
|
||||||
|
const DB_NOT_FOUND = 10;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Database file found, but not readable
|
||||||
|
*/
|
||||||
|
const DB_NOT_READABLE = 11;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Database file is empty
|
||||||
|
*/
|
||||||
|
const DB_EMPTY = 12;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Database contents is not a PHP array
|
||||||
|
*/
|
||||||
|
const DB_NOT_ARRAY = 13;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Magic quotes are activated
|
||||||
|
*/
|
||||||
|
const MAGIC_QUOTES = 14;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parameter of invalid type passed to method
|
||||||
|
*/
|
||||||
|
const PARAM_TYPE = 20;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Character in parameter is invalid
|
||||||
|
*/
|
||||||
|
const INVALID_CHAR = 21;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Language is not in the database
|
||||||
|
*/
|
||||||
|
const UNKNOWN_LANGUAGE = 30;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Error during block detection
|
||||||
|
*/
|
||||||
|
const BLOCK_DETECTION = 40;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Error while clustering languages
|
||||||
|
*/
|
||||||
|
const NO_HIGHEST_KEY = 50;
|
||||||
|
}
|
339
inc/3rdparty/libraries/language-detect/LanguageDetect/ISO639.php
vendored
Normal file
339
inc/3rdparty/libraries/language-detect/LanguageDetect/ISO639.php
vendored
Normal file
@ -0,0 +1,339 @@
|
|||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* Part of Text_LanguageDetect
|
||||||
|
*
|
||||||
|
* PHP version 5
|
||||||
|
*
|
||||||
|
* @category Text
|
||||||
|
* @package Text_LanguageDetect
|
||||||
|
* @author Christian Weiske <cweiske@php.net>
|
||||||
|
* @copyright 2011 Christian Weiske <cweiske@php.net>
|
||||||
|
* @license http://www.debian.org/misc/bsd.license BSD
|
||||||
|
* @version SVN: $Id$
|
||||||
|
* @link http://pear.php.net/package/Text_LanguageDetect/
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides a mapping between the languages from lang.dat and the
|
||||||
|
* ISO 639-1 and ISO-639-2 codes.
|
||||||
|
*
|
||||||
|
* Note that this class contains only languages that exist in lang.dat.
|
||||||
|
*
|
||||||
|
* @category Text
|
||||||
|
* @package Text_LanguageDetect
|
||||||
|
* @author Christian Weiske <cweiske@php.net>
|
||||||
|
* @copyright 2011 Christian Weiske <cweiske@php.net>
|
||||||
|
* @license http://www.debian.org/misc/bsd.license BSD
|
||||||
|
* @link http://www.loc.gov/standards/iso639-2/php/code_list.php
|
||||||
|
*/
|
||||||
|
class Text_LanguageDetect_ISO639
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Maps all language names from the language database to the
|
||||||
|
* ISO 639-1 2-letter language code.
|
||||||
|
*
|
||||||
|
* NULL indicates that there is no 2-letter code.
|
||||||
|
*
|
||||||
|
* @var array
|
||||||
|
*/
|
||||||
|
public static $nameToCode2 = array(
|
||||||
|
'albanian' => 'sq',
|
||||||
|
'arabic' => 'ar',
|
||||||
|
'azeri' => 'az',
|
||||||
|
'bengali' => 'bn',
|
||||||
|
'bulgarian' => 'bg',
|
||||||
|
'cebuano' => null,
|
||||||
|
'croatian' => 'hr',
|
||||||
|
'czech' => 'cs',
|
||||||
|
'danish' => 'da',
|
||||||
|
'dutch' => 'nl',
|
||||||
|
'english' => 'en',
|
||||||
|
'estonian' => 'et',
|
||||||
|
'farsi' => 'fa',
|
||||||
|
'finnish' => 'fi',
|
||||||
|
'french' => 'fr',
|
||||||
|
'german' => 'de',
|
||||||
|
'hausa' => 'ha',
|
||||||
|
'hawaiian' => null,
|
||||||
|
'hindi' => 'hi',
|
||||||
|
'hungarian' => 'hu',
|
||||||
|
'icelandic' => 'is',
|
||||||
|
'indonesian' => 'id',
|
||||||
|
'italian' => 'it',
|
||||||
|
'kazakh' => 'kk',
|
||||||
|
'kyrgyz' => 'ky',
|
||||||
|
'latin' => 'la',
|
||||||
|
'latvian' => 'lv',
|
||||||
|
'lithuanian' => 'lt',
|
||||||
|
'macedonian' => 'mk',
|
||||||
|
'mongolian' => 'mn',
|
||||||
|
'nepali' => 'ne',
|
||||||
|
'norwegian' => 'no',
|
||||||
|
'pashto' => 'ps',
|
||||||
|
'pidgin' => null,
|
||||||
|
'polish' => 'pl',
|
||||||
|
'portuguese' => 'pt',
|
||||||
|
'romanian' => 'ro',
|
||||||
|
'russian' => 'ru',
|
||||||
|
'serbian' => 'sr',
|
||||||
|
'slovak' => 'sk',
|
||||||
|
'slovene' => 'sl',
|
||||||
|
'somali' => 'so',
|
||||||
|
'spanish' => 'es',
|
||||||
|
'swahili' => 'sw',
|
||||||
|
'swedish' => 'sv',
|
||||||
|
'tagalog' => 'tl',
|
||||||
|
'turkish' => 'tr',
|
||||||
|
'ukrainian' => 'uk',
|
||||||
|
'urdu' => 'ur',
|
||||||
|
'uzbek' => 'uz',
|
||||||
|
'vietnamese' => 'vi',
|
||||||
|
'welsh' => 'cy',
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maps all language names from the language database to the
|
||||||
|
* ISO 639-2 3-letter language code.
|
||||||
|
*
|
||||||
|
* @var array
|
||||||
|
*/
|
||||||
|
public static $nameToCode3 = array(
|
||||||
|
'albanian' => 'sqi',
|
||||||
|
'arabic' => 'ara',
|
||||||
|
'azeri' => 'aze',
|
||||||
|
'bengali' => 'ben',
|
||||||
|
'bulgarian' => 'bul',
|
||||||
|
'cebuano' => 'ceb',
|
||||||
|
'croatian' => 'hrv',
|
||||||
|
'czech' => 'ces',
|
||||||
|
'danish' => 'dan',
|
||||||
|
'dutch' => 'nld',
|
||||||
|
'english' => 'eng',
|
||||||
|
'estonian' => 'est',
|
||||||
|
'farsi' => 'fas',
|
||||||
|
'finnish' => 'fin',
|
||||||
|
'french' => 'fra',
|
||||||
|
'german' => 'deu',
|
||||||
|
'hausa' => 'hau',
|
||||||
|
'hawaiian' => 'haw',
|
||||||
|
'hindi' => 'hin',
|
||||||
|
'hungarian' => 'hun',
|
||||||
|
'icelandic' => 'isl',
|
||||||
|
'indonesian' => 'ind',
|
||||||
|
'italian' => 'ita',
|
||||||
|
'kazakh' => 'kaz',
|
||||||
|
'kyrgyz' => 'kir',
|
||||||
|
'latin' => 'lat',
|
||||||
|
'latvian' => 'lav',
|
||||||
|
'lithuanian' => 'lit',
|
||||||
|
'macedonian' => 'mkd',
|
||||||
|
'mongolian' => 'mon',
|
||||||
|
'nepali' => 'nep',
|
||||||
|
'norwegian' => 'nor',
|
||||||
|
'pashto' => 'pus',
|
||||||
|
'pidgin' => 'crp',
|
||||||
|
'polish' => 'pol',
|
||||||
|
'portuguese' => 'por',
|
||||||
|
'romanian' => 'ron',
|
||||||
|
'russian' => 'rus',
|
||||||
|
'serbian' => 'srp',
|
||||||
|
'slovak' => 'slk',
|
||||||
|
'slovene' => 'slv',
|
||||||
|
'somali' => 'som',
|
||||||
|
'spanish' => 'spa',
|
||||||
|
'swahili' => 'swa',
|
||||||
|
'swedish' => 'swe',
|
||||||
|
'tagalog' => 'tgl',
|
||||||
|
'turkish' => 'tur',
|
||||||
|
'ukrainian' => 'ukr',
|
||||||
|
'urdu' => 'urd',
|
||||||
|
'uzbek' => 'uzb',
|
||||||
|
'vietnamese' => 'vie',
|
||||||
|
'welsh' => 'cym',
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maps ISO 639-1 2-letter language codes to the language names
|
||||||
|
* in the language database
|
||||||
|
*
|
||||||
|
* Not all languages have a 2 letter code, so some are missing
|
||||||
|
*
|
||||||
|
* @var array
|
||||||
|
*/
|
||||||
|
public static $code2ToName = array(
|
||||||
|
'ar' => 'arabic',
|
||||||
|
'az' => 'azeri',
|
||||||
|
'bg' => 'bulgarian',
|
||||||
|
'bn' => 'bengali',
|
||||||
|
'cs' => 'czech',
|
||||||
|
'cy' => 'welsh',
|
||||||
|
'da' => 'danish',
|
||||||
|
'de' => 'german',
|
||||||
|
'en' => 'english',
|
||||||
|
'es' => 'spanish',
|
||||||
|
'et' => 'estonian',
|
||||||
|
'fa' => 'farsi',
|
||||||
|
'fi' => 'finnish',
|
||||||
|
'fr' => 'french',
|
||||||
|
'ha' => 'hausa',
|
||||||
|
'hi' => 'hindi',
|
||||||
|
'hr' => 'croatian',
|
||||||
|
'hu' => 'hungarian',
|
||||||
|
'id' => 'indonesian',
|
||||||
|
'is' => 'icelandic',
|
||||||
|
'it' => 'italian',
|
||||||
|
'kk' => 'kazakh',
|
||||||
|
'ky' => 'kyrgyz',
|
||||||
|
'la' => 'latin',
|
||||||
|
'lt' => 'lithuanian',
|
||||||
|
'lv' => 'latvian',
|
||||||
|
'mk' => 'macedonian',
|
||||||
|
'mn' => 'mongolian',
|
||||||
|
'ne' => 'nepali',
|
||||||
|
'nl' => 'dutch',
|
||||||
|
'no' => 'norwegian',
|
||||||
|
'pl' => 'polish',
|
||||||
|
'ps' => 'pashto',
|
||||||
|
'pt' => 'portuguese',
|
||||||
|
'ro' => 'romanian',
|
||||||
|
'ru' => 'russian',
|
||||||
|
'sk' => 'slovak',
|
||||||
|
'sl' => 'slovene',
|
||||||
|
'so' => 'somali',
|
||||||
|
'sq' => 'albanian',
|
||||||
|
'sr' => 'serbian',
|
||||||
|
'sv' => 'swedish',
|
||||||
|
'sw' => 'swahili',
|
||||||
|
'tl' => 'tagalog',
|
||||||
|
'tr' => 'turkish',
|
||||||
|
'uk' => 'ukrainian',
|
||||||
|
'ur' => 'urdu',
|
||||||
|
'uz' => 'uzbek',
|
||||||
|
'vi' => 'vietnamese',
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maps ISO 639-2 3-letter language codes to the language names
|
||||||
|
* in the language database.
|
||||||
|
*
|
||||||
|
* @var array
|
||||||
|
*/
|
||||||
|
public static $code3ToName = array(
|
||||||
|
'ara' => 'arabic',
|
||||||
|
'aze' => 'azeri',
|
||||||
|
'ben' => 'bengali',
|
||||||
|
'bul' => 'bulgarian',
|
||||||
|
'ceb' => 'cebuano',
|
||||||
|
'ces' => 'czech',
|
||||||
|
'crp' => 'pidgin',
|
||||||
|
'cym' => 'welsh',
|
||||||
|
'dan' => 'danish',
|
||||||
|
'deu' => 'german',
|
||||||
|
'eng' => 'english',
|
||||||
|
'est' => 'estonian',
|
||||||
|
'fas' => 'farsi',
|
||||||
|
'fin' => 'finnish',
|
||||||
|
'fra' => 'french',
|
||||||
|
'hau' => 'hausa',
|
||||||
|
'haw' => 'hawaiian',
|
||||||
|
'hin' => 'hindi',
|
||||||
|
'hrv' => 'croatian',
|
||||||
|
'hun' => 'hungarian',
|
||||||
|
'ind' => 'indonesian',
|
||||||
|
'isl' => 'icelandic',
|
||||||
|
'ita' => 'italian',
|
||||||
|
'kaz' => 'kazakh',
|
||||||
|
'kir' => 'kyrgyz',
|
||||||
|
'lat' => 'latin',
|
||||||
|
'lav' => 'latvian',
|
||||||
|
'lit' => 'lithuanian',
|
||||||
|
'mkd' => 'macedonian',
|
||||||
|
'mon' => 'mongolian',
|
||||||
|
'nep' => 'nepali',
|
||||||
|
'nld' => 'dutch',
|
||||||
|
'nor' => 'norwegian',
|
||||||
|
'pol' => 'polish',
|
||||||
|
'por' => 'portuguese',
|
||||||
|
'pus' => 'pashto',
|
||||||
|
'rom' => 'romanian',
|
||||||
|
'rus' => 'russian',
|
||||||
|
'slk' => 'slovak',
|
||||||
|
'slv' => 'slovene',
|
||||||
|
'som' => 'somali',
|
||||||
|
'spa' => 'spanish',
|
||||||
|
'sqi' => 'albanian',
|
||||||
|
'srp' => 'serbian',
|
||||||
|
'swa' => 'swahili',
|
||||||
|
'swe' => 'swedish',
|
||||||
|
'tgl' => 'tagalog',
|
||||||
|
'tur' => 'turkish',
|
||||||
|
'ukr' => 'ukrainian',
|
||||||
|
'urd' => 'urdu',
|
||||||
|
'uzb' => 'uzbek',
|
||||||
|
'vie' => 'vietnamese',
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the 2-letter ISO 639-1 code for the given language name.
|
||||||
|
*
|
||||||
|
* @param string $lang English language name like "swedish"
|
||||||
|
*
|
||||||
|
* @return string Two-letter language code (e.g. "sv") or NULL if not found
|
||||||
|
*/
|
||||||
|
public static function nameToCode2($lang)
|
||||||
|
{
|
||||||
|
$lang = strtolower($lang);
|
||||||
|
if (!isset(self::$nameToCode2[$lang])) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return self::$nameToCode2[$lang];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the 3-letter ISO 639-2 code for the given language name.
|
||||||
|
*
|
||||||
|
* @param string $lang English language name like "swedish"
|
||||||
|
*
|
||||||
|
* @return string Three-letter language code (e.g. "swe") or NULL if not found
|
||||||
|
*/
|
||||||
|
public static function nameToCode3($lang)
|
||||||
|
{
|
||||||
|
$lang = strtolower($lang);
|
||||||
|
if (!isset(self::$nameToCode3[$lang])) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return self::$nameToCode3[$lang];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the language name for the given 2-letter ISO 639-1 code.
|
||||||
|
*
|
||||||
|
* @param string $code Two-letter language code (e.g. "sv")
|
||||||
|
*
|
||||||
|
* @return string English language name like "swedish"
|
||||||
|
*/
|
||||||
|
public static function code2ToName($code)
|
||||||
|
{
|
||||||
|
$lang = strtolower($code);
|
||||||
|
if (!isset(self::$code2ToName[$code])) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return self::$code2ToName[$code];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the language name for the given 3-letter ISO 639-2 code.
|
||||||
|
*
|
||||||
|
* @param string $code Three-letter language code (e.g. "swe")
|
||||||
|
*
|
||||||
|
* @return string English language name like "swedish"
|
||||||
|
*/
|
||||||
|
public static function code3ToName($code)
|
||||||
|
{
|
||||||
|
$lang = strtolower($code);
|
||||||
|
if (!isset(self::$code3ToName[$code])) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return self::$code3ToName[$code];
|
||||||
|
}
|
||||||
|
}
|
@ -8,7 +8,7 @@
|
|||||||
* @author Nicholas Pisarro
|
* @author Nicholas Pisarro
|
||||||
* @copyright 2006
|
* @copyright 2006
|
||||||
* @license BSD
|
* @license BSD
|
||||||
* @version CVS: $Id: Parser.php,v 1.5 2006/03/11 05:45:05 taak Exp $
|
* @version CVS: $Id: Parser.php 322327 2012-01-15 17:55:59Z cweiske $
|
||||||
* @link http://pear.php.net/package/Text_LanguageDetect/
|
* @link http://pear.php.net/package/Text_LanguageDetect/
|
||||||
* @link http://langdetect.blogspot.com/
|
* @link http://langdetect.blogspot.com/
|
||||||
*/
|
*/
|
||||||
@ -28,7 +28,7 @@
|
|||||||
* @author Nicholas Pisarro
|
* @author Nicholas Pisarro
|
||||||
* @copyright 2006
|
* @copyright 2006
|
||||||
* @license BSD
|
* @license BSD
|
||||||
* @version release: 0.2.3
|
* @version release: 0.3.0
|
||||||
*/
|
*/
|
||||||
class Text_LanguageDetect_Parser extends Text_LanguageDetect
|
class Text_LanguageDetect_Parser extends Text_LanguageDetect
|
||||||
{
|
{
|
||||||
@ -102,21 +102,17 @@ class Text_LanguageDetect_Parser extends Text_LanguageDetect
|
|||||||
* @access private
|
* @access private
|
||||||
* @param string $string string to be parsed
|
* @param string $string string to be parsed
|
||||||
*/
|
*/
|
||||||
function Text_LanguageDetect_Parser($string, $db=null, $unicode_db=null) {
|
function Text_LanguageDetect_Parser($string) {
|
||||||
if (isset($db)) $this->_db_filename = $db;
|
|
||||||
if (isset($unicode_db)) $this->_unicode_db_filename = $unicode_db;
|
|
||||||
$this->_string = $string;
|
$this->_string = $string;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if a string is suitable for parsing
|
* Returns true if a string is suitable for parsing
|
||||||
*
|
*
|
||||||
* @static
|
|
||||||
* @access public
|
|
||||||
* @param string $str input string to test
|
* @param string $str input string to test
|
||||||
* @return bool true if acceptable, false if not
|
* @return bool true if acceptable, false if not
|
||||||
*/
|
*/
|
||||||
function validateString($str) {
|
public static function validateString($str) {
|
||||||
if (!empty($str) && strlen($str) > 3 && preg_match('/\S/', $str)) {
|
if (!empty($str) && strlen($str) > 3 && preg_match('/\S/', $str)) {
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
@ -222,8 +218,7 @@ class Text_LanguageDetect_Parser extends Text_LanguageDetect
|
|||||||
|
|
||||||
// unicode startup
|
// unicode startup
|
||||||
if ($this->_compile_unicode) {
|
if ($this->_compile_unicode) {
|
||||||
$blocks =& $this->_read_unicode_block_db();
|
$blocks = $this->_read_unicode_block_db();
|
||||||
|
|
||||||
$block_count = count($blocks);
|
$block_count = count($blocks);
|
||||||
|
|
||||||
$skipped_count = 0;
|
$skipped_count = 0;
|
||||||
@ -349,6 +344,4 @@ class Text_LanguageDetect_Parser extends Text_LanguageDetect
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
|
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
|
||||||
|
|
||||||
?>
|
|
2274
inc/3rdparty/libraries/readability/Readability.php
vendored
2274
inc/3rdparty/libraries/readability/Readability.php
vendored
File diff suppressed because it is too large
Load Diff
363
inc/3rdparty/makefulltextfeed.php
vendored
363
inc/3rdparty/makefulltextfeed.php
vendored
@ -3,8 +3,8 @@
|
|||||||
// Author: Keyvan Minoukadeh
|
// Author: Keyvan Minoukadeh
|
||||||
// Copyright (c) 2013 Keyvan Minoukadeh
|
// Copyright (c) 2013 Keyvan Minoukadeh
|
||||||
// License: AGPLv3
|
// License: AGPLv3
|
||||||
// Version: 3.1
|
// Version: 3.2
|
||||||
// Date: 2013-03-05
|
// Date: 2013-05-13
|
||||||
// More info: http://fivefilters.org/content-only/
|
// More info: http://fivefilters.org/content-only/
|
||||||
// Help: http://help.fivefilters.org
|
// Help: http://help.fivefilters.org
|
||||||
|
|
||||||
@ -25,14 +25,10 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
// Usage
|
// Usage
|
||||||
// -----
|
// -----
|
||||||
// Request this file passing it your feed in the querystring: makefulltextfeed.php?url=mysite.org
|
// Request this file passing it a web page or feed URL in the querystring: makefulltextfeed.php?url=example.org/article
|
||||||
// The following options can be passed in the querystring:
|
// For more request parameters, see http://help.fivefilters.org/customer/portal/articles/226660-usage
|
||||||
// * URL: url=[feed or website url] (required, should be URL-encoded - in php: urlencode($url))
|
|
||||||
// * URL points to HTML (not feed): html=true (optional, by default it's automatically detected)
|
|
||||||
// * API key: key=[api key] (optional, refer to config.php)
|
|
||||||
// * Max entries to process: max=[max number of items] (optional)
|
|
||||||
|
|
||||||
error_reporting(E_ALL ^ E_NOTICE);
|
//error_reporting(E_ALL ^ E_NOTICE);
|
||||||
ini_set("display_errors", 1);
|
ini_set("display_errors", 1);
|
||||||
@set_time_limit(120);
|
@set_time_limit(120);
|
||||||
|
|
||||||
@ -76,8 +72,8 @@ header('X-Robots-Tag: noindex, nofollow');
|
|||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
// Check if service is enabled
|
// Check if service is enabled
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
if (!$options->enabled) {
|
if (!$options->enabled) {
|
||||||
die('The full-text RSS service is currently disabled');
|
die('The full-text RSS service is currently disabled');
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
@ -121,8 +117,8 @@ $options->smart_cache = $options->smart_cache && function_exists('apc_inc');
|
|||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
// Check for feed URL
|
// Check for feed URL
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
if (!isset($_GET['url'])) {
|
if (!isset($_GET['url'])) {
|
||||||
die('No URL supplied');
|
die('No URL supplied');
|
||||||
}
|
}
|
||||||
$url = trim($_GET['url']);
|
$url = trim($_GET['url']);
|
||||||
if (strtolower(substr($url, 0, 7)) == 'feed://') {
|
if (strtolower(substr($url, 0, 7)) == 'feed://') {
|
||||||
@ -161,10 +157,12 @@ if (isset($_GET['key']) && ($key_index = array_search($_GET['key'], $options->ap
|
|||||||
if (isset($_GET['links'])) $redirect .= '&links='.urlencode($_GET['links']);
|
if (isset($_GET['links'])) $redirect .= '&links='.urlencode($_GET['links']);
|
||||||
if (isset($_GET['exc'])) $redirect .= '&exc='.urlencode($_GET['exc']);
|
if (isset($_GET['exc'])) $redirect .= '&exc='.urlencode($_GET['exc']);
|
||||||
if (isset($_GET['format'])) $redirect .= '&format='.urlencode($_GET['format']);
|
if (isset($_GET['format'])) $redirect .= '&format='.urlencode($_GET['format']);
|
||||||
if (isset($_GET['callback'])) $redirect .= '&callback='.urlencode($_GET['callback']);
|
if (isset($_GET['callback'])) $redirect .= '&callback='.urlencode($_GET['callback']);
|
||||||
if (isset($_GET['l'])) $redirect .= '&l='.urlencode($_GET['l']);
|
if (isset($_GET['l'])) $redirect .= '&l='.urlencode($_GET['l']);
|
||||||
if (isset($_GET['xss'])) $redirect .= '&xss';
|
if (isset($_GET['xss'])) $redirect .= '&xss';
|
||||||
if (isset($_GET['use_extracted_title'])) $redirect .= '&use_extracted_title';
|
if (isset($_GET['use_extracted_title'])) $redirect .= '&use_extracted_title';
|
||||||
|
if (isset($_GET['content'])) $redirect .= '&content='.urlencode($_GET['content']);
|
||||||
|
if (isset($_GET['summary'])) $redirect .= '&summary='.urlencode($_GET['summary']);
|
||||||
if (isset($_GET['debug'])) $redirect .= '&debug';
|
if (isset($_GET['debug'])) $redirect .= '&debug';
|
||||||
if ($debug_mode) {
|
if ($debug_mode) {
|
||||||
debug('Redirecting to hide access key, follow URL below to continue');
|
debug('Redirecting to hide access key, follow URL below to continue');
|
||||||
@ -177,7 +175,7 @@ if (isset($_GET['key']) && ($key_index = array_search($_GET['key'], $options->ap
|
|||||||
|
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
// Set timezone.
|
// Set timezone.
|
||||||
// Prevents warnings, but needs more testing -
|
// Prevents warnings, but needs more testing -
|
||||||
// perhaps if timezone is set in php.ini we
|
// perhaps if timezone is set in php.ini we
|
||||||
// don't need to set it at all...
|
// don't need to set it at all...
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
@ -199,7 +197,7 @@ if (isset($_GET['key']) && isset($_GET['hash']) && isset($options->api_keys[(int
|
|||||||
}
|
}
|
||||||
$key_index = ($valid_key) ? (int)$_GET['key'] : 0;
|
$key_index = ($valid_key) ? (int)$_GET['key'] : 0;
|
||||||
if (!$valid_key && $options->key_required) {
|
if (!$valid_key && $options->key_required) {
|
||||||
die('A valid key must be supplied');
|
die('A valid key must be supplied');
|
||||||
}
|
}
|
||||||
if (!$valid_key && isset($_GET['key']) && $_GET['key'] != '') {
|
if (!$valid_key && isset($_GET['key']) && $_GET['key'] != '') {
|
||||||
die('The entered key is invalid');
|
die('The entered key is invalid');
|
||||||
@ -250,6 +248,28 @@ if ($options->favour_feed_titles == 'user') {
|
|||||||
$favour_feed_titles = $options->favour_feed_titles;
|
$favour_feed_titles = $options->favour_feed_titles;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////
|
||||||
|
// Include full content in output?
|
||||||
|
///////////////////////////////////////////////
|
||||||
|
if ($options->content === 'user') {
|
||||||
|
if (isset($_GET['content']) && $_GET['content'] === '0') {
|
||||||
|
$options->content = false;
|
||||||
|
} else {
|
||||||
|
$options->content = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////
|
||||||
|
// Include summaries in output?
|
||||||
|
///////////////////////////////////////////////
|
||||||
|
if ($options->summary === 'user') {
|
||||||
|
if (isset($_GET['summary']) && $_GET['summary'] === '1') {
|
||||||
|
$options->summary = true;
|
||||||
|
} else {
|
||||||
|
$options->summary = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
// Exclude items if extraction fails
|
// Exclude items if extraction fails
|
||||||
///////////////////////////////////////////////
|
///////////////////////////////////////////////
|
||||||
@ -272,15 +292,6 @@ if ($options->detect_language === 'user') {
|
|||||||
$detect_language = $options->detect_language;
|
$detect_language = $options->detect_language;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($detect_language >= 2) {
|
|
||||||
$language_codes = array('albanian' => 'sq','arabic' => 'ar','azeri' => 'az','bengali' => 'bn','bulgarian' => 'bg',
|
|
||||||
'cebuano' => 'ceb', // ISO 639-2
|
|
||||||
'croatian' => 'hr','czech' => 'cs','danish' => 'da','dutch' => 'nl','english' => 'en','estonian' => 'et','farsi' => 'fa','finnish' => 'fi','french' => 'fr','german' => 'de','hausa' => 'ha',
|
|
||||||
'hawaiian' => 'haw', // ISO 639-2
|
|
||||||
'hindi' => 'hi','hungarian' => 'hu','icelandic' => 'is','indonesian' => 'id','italian' => 'it','kazakh' => 'kk','kyrgyz' => 'ky','latin' => 'la','latvian' => 'lv','lithuanian' => 'lt','macedonian' => 'mk','mongolian' => 'mn','nepali' => 'ne','norwegian' => 'no','pashto' => 'ps',
|
|
||||||
'pidgin' => 'cpe', // ISO 639-2
|
|
||||||
'polish' => 'pl','portuguese' => 'pt','romanian' => 'ro','russian' => 'ru','serbian' => 'sr','slovak' => 'sk','slovene' => 'sl','somali' => 'so','spanish' => 'es','swahili' => 'sw','swedish' => 'sv','tagalog' => 'tl','turkish' => 'tr','ukrainian' => 'uk','urdu' => 'ur','uzbek' => 'uz','vietnamese' => 'vi','welsh' => 'cy');
|
|
||||||
}
|
|
||||||
$use_cld = extension_loaded('cld') && (version_compare(PHP_VERSION, '5.3.0') >= 0);
|
$use_cld = extension_loaded('cld') && (version_compare(PHP_VERSION, '5.3.0') >= 0);
|
||||||
|
|
||||||
/////////////////////////////////////
|
/////////////////////////////////////
|
||||||
@ -330,7 +341,7 @@ if ($options->cors) header('Access-Control-Allow-Origin: *');
|
|||||||
//////////////////////////////////
|
//////////////////////////////////
|
||||||
if ($options->caching) {
|
if ($options->caching) {
|
||||||
debug('Caching is enabled...');
|
debug('Caching is enabled...');
|
||||||
$cache_id = md5($max.$url.$valid_key.$links.$favour_feed_titles.$xss_filter.$exclude_on_fail.$format.$detect_language.(int)isset($_GET['pubsub']));
|
$cache_id = md5($max.$url.(int)$valid_key.$links.(int)$favour_feed_titles.(int)$options->content.(int)$options->summary.(int)$xss_filter.(int)$exclude_on_fail.$format.$detect_language.(int)isset($_GET['pubsub']));
|
||||||
$check_cache = true;
|
$check_cache = true;
|
||||||
if ($options->apc && $options->smart_cache) {
|
if ($options->apc && $options->smart_cache) {
|
||||||
apc_add("cache.$cache_id", 0, 10*60);
|
apc_add("cache.$cache_id", 0, 10*60);
|
||||||
@ -468,7 +479,7 @@ if ($img_url = $feed->get_image_url()) {
|
|||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Loop through feed items
|
// Loop through feed items
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
$items = $feed->get_items(0, $max);
|
$items = $feed->get_items(0, $max);
|
||||||
// Request all feed items in parallel (if supported)
|
// Request all feed items in parallel (if supported)
|
||||||
$urls_sanitized = array();
|
$urls_sanitized = array();
|
||||||
$urls = array();
|
$urls = array();
|
||||||
@ -550,24 +561,43 @@ foreach ($items as $key => $item) {
|
|||||||
$is_single_page = false;
|
$is_single_page = false;
|
||||||
if ($single_page_response = getSinglePage($item, $html, $effective_url)) {
|
if ($single_page_response = getSinglePage($item, $html, $effective_url)) {
|
||||||
$is_single_page = true;
|
$is_single_page = true;
|
||||||
$html = $single_page_response['body'];
|
|
||||||
// remove strange things
|
|
||||||
$html = str_replace('</[>', '', $html);
|
|
||||||
$html = convert_to_utf8($html, $single_page_response['headers']);
|
|
||||||
$effective_url = $single_page_response['effective_url'];
|
$effective_url = $single_page_response['effective_url'];
|
||||||
debug("Retrieved single-page view from $effective_url");
|
// check if action defined for returned Content-Type
|
||||||
|
$mime_info = get_mime_action_info($single_page_response['headers']);
|
||||||
|
if (isset($mime_info['action'])) {
|
||||||
|
if ($mime_info['action'] == 'exclude') {
|
||||||
|
continue; // skip this feed item entry
|
||||||
|
} elseif ($mime_info['action'] == 'link') {
|
||||||
|
if ($mime_info['type'] == 'image') {
|
||||||
|
$html = "<a href=\"$effective_url\"><img src=\"$effective_url\" alt=\"{$mime_info['name']}\" /></a>";
|
||||||
|
} else {
|
||||||
|
$html = "<a href=\"$effective_url\">Download {$mime_info['name']}</a>";
|
||||||
|
}
|
||||||
|
$extracted_title = $mime_info['name'];
|
||||||
|
$do_content_extraction = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ($do_content_extraction) {
|
||||||
|
$html = $single_page_response['body'];
|
||||||
|
// remove strange things
|
||||||
|
$html = str_replace('</[>', '', $html);
|
||||||
|
$html = convert_to_utf8($html, $single_page_response['headers']);
|
||||||
|
debug("Retrieved single-page view from $effective_url");
|
||||||
|
}
|
||||||
unset($single_page_response);
|
unset($single_page_response);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
if ($do_content_extraction) {
|
||||||
debug('--------');
|
debug('--------');
|
||||||
debug('Attempting to extract content');
|
debug('Attempting to extract content');
|
||||||
$extract_result = $extractor->process($html, $effective_url);
|
$extract_result = $extractor->process($html, $effective_url);
|
||||||
$readability = $extractor->readability;
|
$readability = $extractor->readability;
|
||||||
$content_block = ($extract_result) ? $extractor->getContent() : null;
|
$content_block = ($extract_result) ? $extractor->getContent() : null;
|
||||||
$extracted_title = ($extract_result) ? $extractor->getTitle() : '';
|
$extracted_title = ($extract_result) ? $extractor->getTitle() : '';
|
||||||
// Deal with multi-page articles
|
// Deal with multi-page articles
|
||||||
//die('Next: '.$extractor->getNextPageUrl());
|
//die('Next: '.$extractor->getNextPageUrl());
|
||||||
$is_multi_page = (!$is_single_page && $extract_result && $extractor->getNextPageUrl());
|
$is_multi_page = (!$is_single_page && $extract_result && $extractor->getNextPageUrl());
|
||||||
if ($options->multipage && $is_multi_page) {
|
if ($options->multipage && $is_multi_page && $options->content) {
|
||||||
debug('--------');
|
debug('--------');
|
||||||
debug('Attempting to process multi-page article');
|
debug('Attempting to process multi-page article');
|
||||||
$multi_page_urls = array();
|
$multi_page_urls = array();
|
||||||
@ -580,7 +610,7 @@ foreach ($items as $key => $item) {
|
|||||||
// check it's not what we have already!
|
// check it's not what we have already!
|
||||||
if (!in_array($next_page_url, $multi_page_urls)) {
|
if (!in_array($next_page_url, $multi_page_urls)) {
|
||||||
// it's not, so let's attempt to fetch it
|
// it's not, so let's attempt to fetch it
|
||||||
$multi_page_urls[] = $next_page_url;
|
$multi_page_urls[] = $next_page_url;
|
||||||
$_prev_ref = $http->referer;
|
$_prev_ref = $http->referer;
|
||||||
if (($response = $http->get($next_page_url, true)) && $response['status_code'] < 300) {
|
if (($response = $http->get($next_page_url, true)) && $response['status_code'] < 300) {
|
||||||
// make sure mime type is not something with a different action associated
|
// make sure mime type is not something with a different action associated
|
||||||
@ -605,13 +635,15 @@ foreach ($items as $key => $item) {
|
|||||||
// did we successfully deal with this multi-page article?
|
// did we successfully deal with this multi-page article?
|
||||||
if (empty($multi_page_content)) {
|
if (empty($multi_page_content)) {
|
||||||
debug('Failed to extract all parts of multi-page article, so not going to include them');
|
debug('Failed to extract all parts of multi-page article, so not going to include them');
|
||||||
$multi_page_content[] = $readability->dom->createElement('p')->innerHTML = '<em>This article appears to continue on subsequent pages which we could not extract</em>';
|
$_page = $readability->dom->createElement('p');
|
||||||
|
$_page->innerHTML = '<em>This article appears to continue on subsequent pages which we could not extract</em>';
|
||||||
|
$multi_page_content[] = $_page;
|
||||||
}
|
}
|
||||||
foreach ($multi_page_content as $_page) {
|
foreach ($multi_page_content as $_page) {
|
||||||
$_page = $content_block->ownerDocument->importNode($_page, true);
|
$_page = $content_block->ownerDocument->importNode($_page, true);
|
||||||
$content_block->appendChild($_page);
|
$content_block->appendChild($_page);
|
||||||
}
|
}
|
||||||
unset($multi_page_urls, $multi_page_content, $page_mime_info, $next_page_url);
|
unset($multi_page_urls, $multi_page_content, $page_mime_info, $next_page_url, $_page);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// use extracted title for both feed and item title if we're using single-item dummy feed
|
// use extracted title for both feed and item title if we're using single-item dummy feed
|
||||||
@ -658,7 +690,7 @@ foreach ($items as $key => $item) {
|
|||||||
} else {
|
} else {
|
||||||
$html = $content_block->ownerDocument->saveXML($content_block); // essentially outerHTML
|
$html = $content_block->ownerDocument->saveXML($content_block); // essentially outerHTML
|
||||||
}
|
}
|
||||||
unset($content_block);
|
//unset($content_block);
|
||||||
// post-processing cleanup
|
// post-processing cleanup
|
||||||
$html = preg_replace('!<p>[\s\h\v]*</p>!u', '', $html);
|
$html = preg_replace('!<p>[\s\h\v]*</p>!u', '', $html);
|
||||||
if ($links == 'remove') {
|
if ($links == 'remove') {
|
||||||
@ -671,130 +703,155 @@ foreach ($items as $key => $item) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($valid_key && isset($_GET['pubsub'])) { // used only on fivefilters.org at the moment
|
if ($valid_key && isset($_GET['pubsub'])) { // used only on fivefilters.org at the moment
|
||||||
$newitem->addElement('guid', 'http://fivefilters.org/content-only/redirect.php?url='.urlencode($item->get_permalink()), array('isPermaLink'=>'false'));
|
$newitem->addElement('guid', 'http://fivefilters.org/content-only/redirect.php?url='.urlencode($item->get_permalink()), array('isPermaLink'=>'false'));
|
||||||
|
} else {
|
||||||
|
$newitem->addElement('guid', $item->get_permalink(), array('isPermaLink'=>'true'));
|
||||||
|
}
|
||||||
|
// filter xss?
|
||||||
|
if ($xss_filter) {
|
||||||
|
debug('Filtering HTML to remove XSS');
|
||||||
|
$html = htmLawed::hl($html, array('safe'=>1, 'deny_attribute'=>'style', 'comment'=>1, 'cdata'=>1));
|
||||||
|
}
|
||||||
|
|
||||||
|
// add content
|
||||||
|
if ($options->summary === true) {
|
||||||
|
// get summary
|
||||||
|
$summary = '';
|
||||||
|
if (!$do_content_extraction) {
|
||||||
|
$summary = $html;
|
||||||
} else {
|
} else {
|
||||||
$newitem->addElement('guid', $item->get_permalink(), array('isPermaLink'=>'true'));
|
// Try to get first few paragraphs
|
||||||
}
|
if (isset($content_block) && ($content_block instanceof DOMElement)) {
|
||||||
// filter xss?
|
$_paras = $content_block->getElementsByTagName('p');
|
||||||
if ($xss_filter) {
|
foreach ($_paras as $_para) {
|
||||||
debug('Filtering HTML to remove XSS');
|
$summary .= preg_replace("/[\n\r\t ]+/", ' ', $_para->textContent).' ';
|
||||||
$html = htmLawed::hl($html, array('safe'=>1, 'deny_attribute'=>'style', 'comment'=>1, 'cdata'=>1));
|
if (strlen($summary) > 200) break;
|
||||||
}
|
|
||||||
$newitem->setDescription($html);
|
|
||||||
|
|
||||||
// set date
|
|
||||||
if ((int)$item->get_date('U') > 0) {
|
|
||||||
$newitem->setDate((int)$item->get_date('U'));
|
|
||||||
} elseif ($extractor->getDate()) {
|
|
||||||
$newitem->setDate($extractor->getDate());
|
|
||||||
}
|
|
||||||
|
|
||||||
// add authors
|
|
||||||
if ($authors = $item->get_authors()) {
|
|
||||||
foreach ($authors as $author) {
|
|
||||||
// for some feeds, SimplePie stores author's name as email, e.g. http://feeds.feedburner.com/nymag/intel
|
|
||||||
if ($author->get_name() !== null) {
|
|
||||||
$newitem->addElement('dc:creator', $author->get_name());
|
|
||||||
} elseif ($author->get_email() !== null) {
|
|
||||||
$newitem->addElement('dc:creator', $author->get_email());
|
|
||||||
}
|
}
|
||||||
}
|
} else {
|
||||||
} elseif ($authors = $extractor->getAuthors()) {
|
$summary = $html;
|
||||||
//TODO: make sure the list size is reasonable
|
|
||||||
foreach ($authors as $author) {
|
|
||||||
// TODO: xpath often selects authors from other articles linked from the page.
|
|
||||||
// for now choose first item
|
|
||||||
$newitem->addElement('dc:creator', $author);
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
unset($_paras, $_para);
|
||||||
// add language
|
$summary = get_excerpt($summary);
|
||||||
if ($detect_language) {
|
$newitem->setDescription($summary);
|
||||||
$language = $extractor->getLanguage();
|
if ($options->content) $newitem->setElement('content:encoded', $html);
|
||||||
if (!$language) $language = $feed->get_language();
|
} else {
|
||||||
if (($detect_language == 3 || (!$language && $detect_language == 2)) && $text_sample) {
|
if ($options->content) $newitem->setDescription($html);
|
||||||
try {
|
}
|
||||||
if ($use_cld) {
|
|
||||||
// Use PHP-CLD extension
|
// set date
|
||||||
$php_cld = 'CLD\detect'; // in quotes to prevent PHP 5.2 parse error
|
if ((int)$item->get_date('U') > 0) {
|
||||||
$res = $php_cld($text_sample);
|
$newitem->setDate((int)$item->get_date('U'));
|
||||||
if (is_array($res) && count($res) > 0) {
|
} elseif ($extractor->getDate()) {
|
||||||
$language = $res[0]['code'];
|
$newitem->setDate($extractor->getDate());
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
//die('what');
|
// add authors
|
||||||
// Use PEAR's Text_LanguageDetect
|
if ($authors = $item->get_authors()) {
|
||||||
if (!isset($l)) {
|
foreach ($authors as $author) {
|
||||||
$l = new Text_LanguageDetect('libraries/language-detect/lang.dat', 'libraries/language-detect/unicode_blocks.dat');
|
// for some feeds, SimplePie stores author's name as email, e.g. http://feeds.feedburner.com/nymag/intel
|
||||||
}
|
if ($author->get_name() !== null) {
|
||||||
$l_result = $l->detect($text_sample, 1);
|
$newitem->addElement('dc:creator', $author->get_name());
|
||||||
if (count($l_result) > 0) {
|
} elseif ($author->get_email() !== null) {
|
||||||
$language = $language_codes[key($l_result)];
|
$newitem->addElement('dc:creator', $author->get_email());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
} elseif ($authors = $extractor->getAuthors()) {
|
||||||
|
//TODO: make sure the list size is reasonable
|
||||||
|
foreach ($authors as $author) {
|
||||||
|
// TODO: xpath often selects authors from other articles linked from the page.
|
||||||
|
// for now choose first item
|
||||||
|
$newitem->addElement('dc:creator', $author);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// add language
|
||||||
|
if ($detect_language) {
|
||||||
|
$language = $extractor->getLanguage();
|
||||||
|
if (!$language) $language = $feed->get_language();
|
||||||
|
if (($detect_language == 3 || (!$language && $detect_language == 2)) && $text_sample) {
|
||||||
|
try {
|
||||||
|
if ($use_cld) {
|
||||||
|
// Use PHP-CLD extension
|
||||||
|
$php_cld = 'CLD\detect'; // in quotes to prevent PHP 5.2 parse error
|
||||||
|
$res = $php_cld($text_sample);
|
||||||
|
if (is_array($res) && count($res) > 0) {
|
||||||
|
$language = $res[0]['code'];
|
||||||
}
|
}
|
||||||
} catch (Exception $e) {
|
} else {
|
||||||
//die('error: '.$e);
|
//die('what');
|
||||||
// do nothing
|
// Use PEAR's Text_LanguageDetect
|
||||||
}
|
if (!isset($l)) {
|
||||||
}
|
$l = new Text_LanguageDetect();
|
||||||
if ($language && (strlen($language) < 7)) {
|
$l->setNameMode(2); // return ISO 639-1 codes (e.g. "en")
|
||||||
$newitem->addElement('dc:language', $language);
|
}
|
||||||
}
|
$l_result = $l->detect($text_sample, 1);
|
||||||
}
|
if (count($l_result) > 0) {
|
||||||
|
$language = key($l_result);
|
||||||
// add MIME type (if it appeared in our exclusions lists)
|
|
||||||
if (isset($mime_info['mime'])) $newitem->addElement('dc:format', $mime_info['mime']);
|
|
||||||
// add effective URL (URL after redirects)
|
|
||||||
if (isset($effective_url)) {
|
|
||||||
//TODO: ensure $effective_url is valid witout - sometimes it causes problems, e.g.
|
|
||||||
//http://www.siasat.pk/forum/showthread.php?108883-Pakistan-Chowk-by-Rana-Mubashir-<2D>-25th-March-2012-Special-Program-from-Liari-(Karachi)
|
|
||||||
//temporary measure: use utf8_encode()
|
|
||||||
$newitem->addElement('dc:identifier', remove_url_cruft(utf8_encode($effective_url)));
|
|
||||||
} else {
|
|
||||||
$newitem->addElement('dc:identifier', remove_url_cruft($item->get_permalink()));
|
|
||||||
}
|
|
||||||
|
|
||||||
// add categories
|
|
||||||
if ($categories = $item->get_categories()) {
|
|
||||||
foreach ($categories as $category) {
|
|
||||||
if ($category->get_label() !== null) {
|
|
||||||
$newitem->addElement('category', $category->get_label());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// check for enclosures
|
|
||||||
if ($options->keep_enclosures) {
|
|
||||||
if ($enclosures = $item->get_enclosures()) {
|
|
||||||
foreach ($enclosures as $enclosure) {
|
|
||||||
// thumbnails
|
|
||||||
foreach ((array)$enclosure->get_thumbnails() as $thumbnail) {
|
|
||||||
$newitem->addElement('media:thumbnail', '', array('url'=>$thumbnail));
|
|
||||||
}
|
}
|
||||||
if (!$enclosure->get_link()) continue;
|
|
||||||
$enc = array();
|
|
||||||
// Media RSS spec ($enc): http://search.yahoo.com/mrss
|
|
||||||
// SimplePie methods ($enclosure): http://simplepie.org/wiki/reference/start#methods4
|
|
||||||
$enc['url'] = $enclosure->get_link();
|
|
||||||
if ($enclosure->get_length()) $enc['fileSize'] = $enclosure->get_length();
|
|
||||||
if ($enclosure->get_type()) $enc['type'] = $enclosure->get_type();
|
|
||||||
if ($enclosure->get_medium()) $enc['medium'] = $enclosure->get_medium();
|
|
||||||
if ($enclosure->get_expression()) $enc['expression'] = $enclosure->get_expression();
|
|
||||||
if ($enclosure->get_bitrate()) $enc['bitrate'] = $enclosure->get_bitrate();
|
|
||||||
if ($enclosure->get_framerate()) $enc['framerate'] = $enclosure->get_framerate();
|
|
||||||
if ($enclosure->get_sampling_rate()) $enc['samplingrate'] = $enclosure->get_sampling_rate();
|
|
||||||
if ($enclosure->get_channels()) $enc['channels'] = $enclosure->get_channels();
|
|
||||||
if ($enclosure->get_duration()) $enc['duration'] = $enclosure->get_duration();
|
|
||||||
if ($enclosure->get_height()) $enc['height'] = $enclosure->get_height();
|
|
||||||
if ($enclosure->get_width()) $enc['width'] = $enclosure->get_width();
|
|
||||||
if ($enclosure->get_language()) $enc['lang'] = $enclosure->get_language();
|
|
||||||
$newitem->addElement('media:content', '', $enc);
|
|
||||||
}
|
}
|
||||||
|
} catch (Exception $e) {
|
||||||
|
//die('error: '.$e);
|
||||||
|
// do nothing
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* } */
|
if ($language && (strlen($language) < 7)) {
|
||||||
|
$newitem->addElement('dc:language', $language);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// add MIME type (if it appeared in our exclusions lists)
|
||||||
|
if (isset($mime_info['mime'])) $newitem->addElement('dc:format', $mime_info['mime']);
|
||||||
|
// add effective URL (URL after redirects)
|
||||||
|
if (isset($effective_url)) {
|
||||||
|
//TODO: ensure $effective_url is valid witout - sometimes it causes problems, e.g.
|
||||||
|
//http://www.siasat.pk/forum/showthread.php?108883-Pakistan-Chowk-by-Rana-Mubashir-<2D>-25th-March-2012-Special-Program-from-Liari-(Karachi)
|
||||||
|
//temporary measure: use utf8_encode()
|
||||||
|
$newitem->addElement('dc:identifier', remove_url_cruft(utf8_encode($effective_url)));
|
||||||
|
} else {
|
||||||
|
$newitem->addElement('dc:identifier', remove_url_cruft($item->get_permalink()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// add categories
|
||||||
|
if ($categories = $item->get_categories()) {
|
||||||
|
foreach ($categories as $category) {
|
||||||
|
if ($category->get_label() !== null) {
|
||||||
|
$newitem->addElement('category', $category->get_label());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// check for enclosures
|
||||||
|
if ($options->keep_enclosures) {
|
||||||
|
if ($enclosures = $item->get_enclosures()) {
|
||||||
|
foreach ($enclosures as $enclosure) {
|
||||||
|
// thumbnails
|
||||||
|
foreach ((array)$enclosure->get_thumbnails() as $thumbnail) {
|
||||||
|
$newitem->addElement('media:thumbnail', '', array('url'=>$thumbnail));
|
||||||
|
}
|
||||||
|
if (!$enclosure->get_link()) continue;
|
||||||
|
$enc = array();
|
||||||
|
// Media RSS spec ($enc): http://search.yahoo.com/mrss
|
||||||
|
// SimplePie methods ($enclosure): http://simplepie.org/wiki/reference/start#methods4
|
||||||
|
$enc['url'] = $enclosure->get_link();
|
||||||
|
if ($enclosure->get_length()) $enc['fileSize'] = $enclosure->get_length();
|
||||||
|
if ($enclosure->get_type()) $enc['type'] = $enclosure->get_type();
|
||||||
|
if ($enclosure->get_medium()) $enc['medium'] = $enclosure->get_medium();
|
||||||
|
if ($enclosure->get_expression()) $enc['expression'] = $enclosure->get_expression();
|
||||||
|
if ($enclosure->get_bitrate()) $enc['bitrate'] = $enclosure->get_bitrate();
|
||||||
|
if ($enclosure->get_framerate()) $enc['framerate'] = $enclosure->get_framerate();
|
||||||
|
if ($enclosure->get_sampling_rate()) $enc['samplingrate'] = $enclosure->get_sampling_rate();
|
||||||
|
if ($enclosure->get_channels()) $enc['channels'] = $enclosure->get_channels();
|
||||||
|
if ($enclosure->get_duration()) $enc['duration'] = $enclosure->get_duration();
|
||||||
|
if ($enclosure->get_height()) $enc['height'] = $enclosure->get_height();
|
||||||
|
if ($enclosure->get_width()) $enc['width'] = $enclosure->get_width();
|
||||||
|
if ($enclosure->get_language()) $enc['lang'] = $enclosure->get_language();
|
||||||
|
$newitem->addElement('media:content', '', $enc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
$output->addItem($newitem);
|
$output->addItem($newitem);
|
||||||
unset($html);
|
unset($html);
|
||||||
$item_count++;
|
$item_count++;
|
||||||
|
42
inc/3rdparty/makefulltextfeedHelpers.php
vendored
42
inc/3rdparty/makefulltextfeedHelpers.php
vendored
@ -66,6 +66,38 @@ class DummySingleItem {
|
|||||||
// HELPER FUNCTIONS
|
// HELPER FUNCTIONS
|
||||||
///////////////////////////////
|
///////////////////////////////
|
||||||
|
|
||||||
|
// Adapted from WordPress
|
||||||
|
// http://core.trac.wordpress.org/browser/tags/3.5.1/wp-includes/formatting.php#L2173
|
||||||
|
function get_excerpt($text, $num_words=55, $more=null) {
|
||||||
|
if (null === $more) $more = '…';
|
||||||
|
$text = strip_tags($text);
|
||||||
|
//TODO: Check if word count is based on single characters (East Asian characters)
|
||||||
|
/*
|
||||||
|
if (1==2) {
|
||||||
|
$text = trim(preg_replace("/[\n\r\t ]+/", ' ', $text), ' ');
|
||||||
|
preg_match_all('/./u', $text, $words_array);
|
||||||
|
$words_array = array_slice($words_array[0], 0, $num_words + 1);
|
||||||
|
$sep = '';
|
||||||
|
} else {
|
||||||
|
$words_array = preg_split("/[\n\r\t ]+/", $text, $num_words + 1, PREG_SPLIT_NO_EMPTY);
|
||||||
|
$sep = ' ';
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
$words_array = preg_split("/[\n\r\t ]+/", $text, $num_words + 1, PREG_SPLIT_NO_EMPTY);
|
||||||
|
$sep = ' ';
|
||||||
|
if (count($words_array) > $num_words) {
|
||||||
|
array_pop($words_array);
|
||||||
|
$text = implode($sep, $words_array);
|
||||||
|
$text = $text.$more;
|
||||||
|
} else {
|
||||||
|
$text = implode($sep, $words_array);
|
||||||
|
}
|
||||||
|
// trim whitespace at beginning or end of string
|
||||||
|
// See: http://stackoverflow.com/questions/4166896/trim-unicode-whitespace-in-php-5-2
|
||||||
|
$text = preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $text);
|
||||||
|
return $text;
|
||||||
|
}
|
||||||
|
|
||||||
function url_allowed($url) {
|
function url_allowed($url) {
|
||||||
global $options;
|
global $options;
|
||||||
if (!empty($options->allowed_urls)) {
|
if (!empty($options->allowed_urls)) {
|
||||||
@ -165,14 +197,6 @@ function convert_to_utf8($html, $header=null)
|
|||||||
if (strtolower($encoding) != 'utf-8') {
|
if (strtolower($encoding) != 'utf-8') {
|
||||||
debug('Converting to UTF-8');
|
debug('Converting to UTF-8');
|
||||||
$html = SimplePie_Misc::change_encoding($html, $encoding, 'utf-8');
|
$html = SimplePie_Misc::change_encoding($html, $encoding, 'utf-8');
|
||||||
/*
|
|
||||||
if (function_exists('iconv')) {
|
|
||||||
// iconv appears to handle certain character encodings better than mb_convert_encoding
|
|
||||||
$html = iconv($encoding, 'utf-8', $html);
|
|
||||||
} else {
|
|
||||||
$html = mb_convert_encoding($html, 'utf-8', $encoding);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -196,7 +220,7 @@ function makeAbsolute($base, $elem) {
|
|||||||
}
|
}
|
||||||
function makeAbsoluteAttr($base, $e, $attr) {
|
function makeAbsoluteAttr($base, $e, $attr) {
|
||||||
if ($e->hasAttribute($attr)) {
|
if ($e->hasAttribute($attr)) {
|
||||||
// Trim leading and trailing white space. I don't really like this but
|
// Trim leading and trailing white space. I don't really like this but
|
||||||
// unfortunately it does appear on some sites. e.g. <img src=" /path/to/image.jpg" />
|
// unfortunately it does appear on some sites. e.g. <img src=" /path/to/image.jpg" />
|
||||||
$url = trim(str_replace('%20', ' ', $e->getAttribute($attr)));
|
$url = trim(str_replace('%20', ' ', $e->getAttribute($attr)));
|
||||||
$url = str_replace(' ', '%20', $url);
|
$url = str_replace(' ', '%20', $url);
|
||||||
|
12
inc/3rdparty/site_config/custom/dailymotion.com.txt
vendored
Executable file
12
inc/3rdparty/site_config/custom/dailymotion.com.txt
vendored
Executable file
@ -0,0 +1,12 @@
|
|||||||
|
title: //title
|
||||||
|
body: //iframe
|
||||||
|
|
||||||
|
replace_string(<![CDATA[): _
|
||||||
|
replace_string(]]>): _
|
||||||
|
|
||||||
|
single_page_link: //link[@type='application/xml+oembed']
|
||||||
|
|
||||||
|
prune: no
|
||||||
|
tidy: no
|
||||||
|
|
||||||
|
http://www.dailymotion.com/video/x1vk5oh_before-they-were-on-game-of-thrones_people
|
3
inc/3rdparty/site_config/custom/index.php
vendored
Normal file
3
inc/3rdparty/site_config/custom/index.php
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
<?php
|
||||||
|
// this is here to prevent directory listing over the web
|
||||||
|
?>
|
11
inc/3rdparty/site_config/custom/ted.com.txt
vendored
Executable file
11
inc/3rdparty/site_config/custom/ted.com.txt
vendored
Executable file
@ -0,0 +1,11 @@
|
|||||||
|
title: //title
|
||||||
|
body: //div[@class='talk-article__body talk-transcript__body'] | //div[@class='media__image media__image--thumb talk-link__image']
|
||||||
|
|
||||||
|
strip_id_or_class: talk-transcript__para__time
|
||||||
|
|
||||||
|
single_page_link: //a[@id='hero-transcript-link']
|
||||||
|
|
||||||
|
#prune: no
|
||||||
|
tidy: no
|
||||||
|
|
||||||
|
test_url: http://www.ted.com/talks/andrew_solomon_how_the_worst_moments_in_our_lives_make_us_who_we_are
|
5
inc/3rdparty/site_config/index.php
vendored
5
inc/3rdparty/site_config/index.php
vendored
@ -1,3 +1,2 @@
|
|||||||
<?php
|
<?php
|
||||||
// this is here to prevent directory listing over the web
|
// this is here to prevent directory listing over the web
|
||||||
?>
|
|
@ -1 +1 @@
|
|||||||
4
|
2013-05-12T22:53:07Z
|
@ -1083,11 +1083,10 @@ class Poche
|
|||||||
$config = $this->store->getConfigUser($user_id);
|
$config = $this->store->getConfigUser($user_id);
|
||||||
|
|
||||||
if ($config == null) {
|
if ($config == null) {
|
||||||
die(_('User with this id (' . $user_id . ') does not exist.'));
|
die(sprintf(_('User with this id (%d) does not exist.'), $user_id));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!in_array($type, $allowed_types) ||
|
if (!in_array($type, $allowed_types) || $token != $config['token']) {
|
||||||
$token != $config['token']) {
|
|
||||||
die(_('Uh, there is a problem while generating feeds.'));
|
die(_('Uh, there is a problem while generating feeds.'));
|
||||||
}
|
}
|
||||||
// Check the token
|
// Check the token
|
||||||
@ -1145,16 +1144,18 @@ class Poche
|
|||||||
$config = HTMLPurifier_Config::createDefault();
|
$config = HTMLPurifier_Config::createDefault();
|
||||||
$config->set('Cache.SerializerPath', CACHE);
|
$config->set('Cache.SerializerPath', CACHE);
|
||||||
$config->set('HTML.SafeIframe', true);
|
$config->set('HTML.SafeIframe', true);
|
||||||
$config->set('URI.SafeIframeRegexp', '%^(https?:)?//(www\.youtube(?:-nocookie)?\.com/embed/|player\.vimeo\.com/video/)%'); //allow YouTube and Vimeo$purifier = new HTMLPurifier($config);
|
|
||||||
|
//allow YouTube, Vimeo and dailymotion videos
|
||||||
|
$config->set('URI.SafeIframeRegexp', '%^(https?:)?//(www\.youtube(?:-nocookie)?\.com/embed/|player\.vimeo\.com/video/|www\.dailymotion\.com/embed/video/)%');
|
||||||
|
|
||||||
return new HTMLPurifier($config);
|
return new HTMLPurifier($config);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* handle epub
|
* handle epub
|
||||||
*/
|
*/
|
||||||
public function createEpub() {
|
public function createEpub() {
|
||||||
|
|
||||||
switch ($_GET['method']) {
|
switch ($_GET['method']) {
|
||||||
case 'id':
|
case 'id':
|
||||||
$entryID = filter_var($_GET['id'],FILTER_SANITIZE_NUMBER_INT);
|
$entryID = filter_var($_GET['id'],FILTER_SANITIZE_NUMBER_INT);
|
||||||
@ -1190,7 +1191,7 @@ class Poche
|
|||||||
break;
|
break;
|
||||||
case 'default':
|
case 'default':
|
||||||
die(_('Uh, there is a problem while generating epub.'));
|
die(_('Uh, there is a problem while generating epub.'));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$content_start =
|
$content_start =
|
||||||
@ -1203,10 +1204,9 @@ class Poche
|
|||||||
. "<body>\n";
|
. "<body>\n";
|
||||||
|
|
||||||
$bookEnd = "</body>\n</html>\n";
|
$bookEnd = "</body>\n</html>\n";
|
||||||
|
|
||||||
$log = new Logger("wallabag", TRUE);
|
$log = new Logger("wallabag", TRUE);
|
||||||
$fileDir = CACHE;
|
$fileDir = CACHE;
|
||||||
|
|
||||||
|
|
||||||
$book = new EPub(EPub::BOOK_VERSION_EPUB3, DEBUG_POCHE);
|
$book = new EPub(EPub::BOOK_VERSION_EPUB3, DEBUG_POCHE);
|
||||||
$log->logLine("new EPub()");
|
$log->logLine("new EPub()");
|
||||||
@ -1215,7 +1215,7 @@ class Poche
|
|||||||
$log->logLine("Zip version: " . Zip::VERSION);
|
$log->logLine("Zip version: " . Zip::VERSION);
|
||||||
$log->logLine("getCurrentServerURL: " . $book->getCurrentServerURL());
|
$log->logLine("getCurrentServerURL: " . $book->getCurrentServerURL());
|
||||||
$log->logLine("getCurrentPageURL..: " . $book->getCurrentPageURL());
|
$log->logLine("getCurrentPageURL..: " . $book->getCurrentPageURL());
|
||||||
|
|
||||||
$book->setTitle(_('wallabag\'s articles'));
|
$book->setTitle(_('wallabag\'s articles'));
|
||||||
$book->setIdentifier("http://$_SERVER[HTTP_HOST]", EPub::IDENTIFIER_URI); // Could also be the ISBN number, prefered for published books, or a UUID.
|
$book->setIdentifier("http://$_SERVER[HTTP_HOST]", EPub::IDENTIFIER_URI); // Could also be the ISBN number, prefered for published books, or a UUID.
|
||||||
//$book->setLanguage("en"); // Not needed, but included for the example, Language is mandatory, but EPub defaults to "en". Use RFC3066 Language codes, such as "en", "da", "fr" etc.
|
//$book->setLanguage("en"); // Not needed, but included for the example, Language is mandatory, but EPub defaults to "en". Use RFC3066 Language codes, such as "en", "da", "fr" etc.
|
||||||
@ -1225,39 +1225,39 @@ class Poche
|
|||||||
$book->setDate(time()); // Strictly not needed as the book date defaults to time().
|
$book->setDate(time()); // Strictly not needed as the book date defaults to time().
|
||||||
//$book->setRights("Copyright and licence information specific for the book."); // As this is generated, this _could_ contain the name or licence information of the user who purchased the book, if needed. If this is used that way, the identifier must also be made unique for the book.
|
//$book->setRights("Copyright and licence information specific for the book."); // As this is generated, this _could_ contain the name or licence information of the user who purchased the book, if needed. If this is used that way, the identifier must also be made unique for the book.
|
||||||
$book->setSourceURL("http://$_SERVER[HTTP_HOST]");
|
$book->setSourceURL("http://$_SERVER[HTTP_HOST]");
|
||||||
|
|
||||||
$book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, "PHP");
|
$book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, "PHP");
|
||||||
$book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, "wallabag");
|
$book->addDublinCoreMetadata(DublinCore::CONTRIBUTOR, "wallabag");
|
||||||
|
|
||||||
$cssData = "body {\n margin-left: .5em;\n margin-right: .5em;\n text-align: justify;\n}\n\np {\n font-family: serif;\n font-size: 10pt;\n text-align: justify;\n text-indent: 1em;\n margin-top: 0px;\n margin-bottom: 1ex;\n}\n\nh1, h2 {\n font-family: sans-serif;\n font-style: italic;\n text-align: center;\n background-color: #6b879c;\n color: white;\n width: 100%;\n}\n\nh1 {\n margin-bottom: 2px;\n}\n\nh2 {\n margin-top: -2px;\n margin-bottom: 2px;\n}\n";
|
$cssData = "body {\n margin-left: .5em;\n margin-right: .5em;\n text-align: justify;\n}\n\np {\n font-family: serif;\n font-size: 10pt;\n text-align: justify;\n text-indent: 1em;\n margin-top: 0px;\n margin-bottom: 1ex;\n}\n\nh1, h2 {\n font-family: sans-serif;\n font-style: italic;\n text-align: center;\n background-color: #6b879c;\n color: white;\n width: 100%;\n}\n\nh1 {\n margin-bottom: 2px;\n}\n\nh2 {\n margin-top: -2px;\n margin-bottom: 2px;\n}\n";
|
||||||
|
|
||||||
$log->logLine("Add Cover");
|
$log->logLine("Add Cover");
|
||||||
|
|
||||||
$fullTitle = "<h1> " . $bookTitle . "</h1>\n";
|
$fullTitle = "<h1> " . $bookTitle . "</h1>\n";
|
||||||
|
|
||||||
$book->setCoverImage("Cover.png", file_get_contents("themes/baggy/img/apple-touch-icon-152.png"), "image/png", $fullTitle);
|
$book->setCoverImage("Cover.png", file_get_contents("themes/baggy/img/apple-touch-icon-152.png"), "image/png", $fullTitle);
|
||||||
|
|
||||||
$cover = $content_start . '<div style="text-align:center;"><p>' . _('Produced by wallabag with PHPePub') . '</p><p>'. _('Please open <a href="https://github.com/wallabag/wallabag/issues" >an issue</a> if you have trouble with the display of this E-Book on your device.') . '</p></div>' . $bookEnd;
|
$cover = $content_start . '<div style="text-align:center;"><p>' . _('Produced by wallabag with PHPePub') . '</p><p>'. _('Please open <a href="https://github.com/wallabag/wallabag/issues" >an issue</a> if you have trouble with the display of this E-Book on your device.') . '</p></div>' . $bookEnd;
|
||||||
|
|
||||||
//$book->addChapter("Table of Contents", "TOC.xhtml", NULL, false, EPub::EXTERNAL_REF_IGNORE);
|
//$book->addChapter("Table of Contents", "TOC.xhtml", NULL, false, EPub::EXTERNAL_REF_IGNORE);
|
||||||
$book->addChapter("Notices", "Cover2.html", $cover);
|
$book->addChapter("Notices", "Cover2.html", $cover);
|
||||||
|
|
||||||
$book->buildTOC();
|
$book->buildTOC();
|
||||||
|
|
||||||
foreach ($entries as $entry) { //set tags as subjects
|
foreach ($entries as $entry) { //set tags as subjects
|
||||||
$tags = $this->store->retrieveTagsByEntry($entry['id']);
|
$tags = $this->store->retrieveTagsByEntry($entry['id']);
|
||||||
foreach ($tags as $tag) {
|
foreach ($tags as $tag) {
|
||||||
$book->setSubject($tag['value']);
|
$book->setSubject($tag['value']);
|
||||||
}
|
}
|
||||||
|
|
||||||
$log->logLine("Set up parameters");
|
$log->logLine("Set up parameters");
|
||||||
|
|
||||||
$chapter = $content_start . $entry['content'] . $bookEnd;
|
$chapter = $content_start . $entry['content'] . $bookEnd;
|
||||||
$book->addChapter($entry['title'], htmlspecialchars($entry['title']) . ".html", $chapter, true, EPub::EXTERNAL_REF_ADD);
|
$book->addChapter($entry['title'], htmlspecialchars($entry['title']) . ".html", $chapter, true, EPub::EXTERNAL_REF_ADD);
|
||||||
$log->logLine("Added chapter " . $entry['title']);
|
$log->logLine("Added chapter " . $entry['title']);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (DEBUG_POCHE) {
|
if (DEBUG_POCHE) {
|
||||||
$epuplog = $book->getLog();
|
$epuplog = $book->getLog();
|
||||||
$book->addChapter("Log", "Log.html", $content_start . $log->getLog() . "\n</pre>" . $bookEnd); // log generation
|
$book->addChapter("Log", "Log.html", $content_start . $log->getLog() . "\n</pre>" . $bookEnd); // log generation
|
||||||
}
|
}
|
||||||
|
@ -18,8 +18,6 @@ class Tools
|
|||||||
die(_('Oops, it seems you don\'t have PHP 5.'));
|
die(_('Oops, it seems you don\'t have PHP 5.'));
|
||||||
}
|
}
|
||||||
|
|
||||||
error_reporting(E_ALL);
|
|
||||||
|
|
||||||
function stripslashesDeep($value) {
|
function stripslashesDeep($value) {
|
||||||
return is_array($value)
|
return is_array($value)
|
||||||
? array_map('stripslashesDeep', $value)
|
? array_map('stripslashesDeep', $value)
|
||||||
@ -60,7 +58,11 @@ class Tools
|
|||||||
}
|
}
|
||||||
|
|
||||||
$host = (isset($_SERVER['HTTP_X_FORWARDED_HOST']) ? $_SERVER['HTTP_X_FORWARDED_HOST'] : (isset($_SERVER['HTTP_HOST']) ? $_SERVER['HTTP_HOST'] : $_SERVER['SERVER_NAME']));
|
$host = (isset($_SERVER['HTTP_X_FORWARDED_HOST']) ? $_SERVER['HTTP_X_FORWARDED_HOST'] : (isset($_SERVER['HTTP_HOST']) ? $_SERVER['HTTP_HOST'] : $_SERVER['SERVER_NAME']));
|
||||||
|
|
||||||
|
if (strpos($host, ':') !== false) {
|
||||||
|
$serverport = '';
|
||||||
|
}
|
||||||
|
|
||||||
return 'http' . ($https ? 's' : '') . '://'
|
return 'http' . ($https ? 's' : '') . '://'
|
||||||
. $host . $serverport . $scriptname;
|
. $host . $serverport . $scriptname;
|
||||||
}
|
}
|
||||||
|
@ -30,7 +30,12 @@
|
|||||||
|
|
||||||
@define ('MODE_DEMO', FALSE);
|
@define ('MODE_DEMO', FALSE);
|
||||||
@define ('DEBUG_POCHE', FALSE);
|
@define ('DEBUG_POCHE', FALSE);
|
||||||
@define ('DOWNLOAD_PICTURES', FALSE);
|
|
||||||
|
//default level of error reporting in application. Developers should override it in their config.inc.php: set to E_ALL.
|
||||||
|
@define ('ERROR_REPORTING', E_ALL & ~E_NOTICE);
|
||||||
|
|
||||||
|
@define ('DOWNLOAD_PICTURES', FALSE); # This can slow down the process of adding articles
|
||||||
|
@define ('REGENERATE_PICTURES_QUALITY', 75);
|
||||||
@define ('CONVERT_LINKS_FOOTNOTES', FALSE);
|
@define ('CONVERT_LINKS_FOOTNOTES', FALSE);
|
||||||
@define ('REVERT_FORCED_PARAGRAPH_ELEMENTS', FALSE);
|
@define ('REVERT_FORCED_PARAGRAPH_ELEMENTS', FALSE);
|
||||||
@define ('SHARE_TWITTER', TRUE);
|
@define ('SHARE_TWITTER', TRUE);
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
function filtre_picture($content, $url, $id)
|
function filtre_picture($content, $url, $id)
|
||||||
{
|
{
|
||||||
$matches = array();
|
$matches = array();
|
||||||
|
$processing_pictures = array(); // list of processing image to avoid processing the same pictures twice
|
||||||
preg_match_all('#<\s*(img)[^>]+src="([^"]*)"[^>]*>#Si', $content, $matches, PREG_SET_ORDER);
|
preg_match_all('#<\s*(img)[^>]+src="([^"]*)"[^>]*>#Si', $content, $matches, PREG_SET_ORDER);
|
||||||
foreach($matches as $i => $link) {
|
foreach($matches as $i => $link) {
|
||||||
$link[1] = trim($link[1]);
|
$link[1] = trim($link[1]);
|
||||||
@ -22,8 +23,17 @@ function filtre_picture($content, $url, $id)
|
|||||||
$filename = basename(parse_url($absolute_path, PHP_URL_PATH));
|
$filename = basename(parse_url($absolute_path, PHP_URL_PATH));
|
||||||
$directory = create_assets_directory($id);
|
$directory = create_assets_directory($id);
|
||||||
$fullpath = $directory . '/' . $filename;
|
$fullpath = $directory . '/' . $filename;
|
||||||
download_pictures($absolute_path, $fullpath);
|
|
||||||
$content = str_replace($matches[$i][2], $fullpath, $content);
|
if (in_array($absolute_path, $processing_pictures) === true) {
|
||||||
|
// replace picture's URL only if processing is OK : already processing -> go to next picture
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (download_pictures($absolute_path, $fullpath) === true) {
|
||||||
|
$content = str_replace($matches[$i][2], $fullpath, $content);
|
||||||
|
}
|
||||||
|
|
||||||
|
$processing_pictures[] = $absolute_path;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -64,6 +74,8 @@ function get_absolute_link($relative_link, $url) {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Téléchargement des images
|
* Téléchargement des images
|
||||||
|
*
|
||||||
|
* @return bool true if the download and processing is OK, false else
|
||||||
*/
|
*/
|
||||||
function download_pictures($absolute_path, $fullpath)
|
function download_pictures($absolute_path, $fullpath)
|
||||||
{
|
{
|
||||||
@ -73,9 +85,44 @@ function download_pictures($absolute_path, $fullpath)
|
|||||||
if(file_exists($fullpath)) {
|
if(file_exists($fullpath)) {
|
||||||
unlink($fullpath);
|
unlink($fullpath);
|
||||||
}
|
}
|
||||||
$fp = fopen($fullpath, 'x');
|
|
||||||
fwrite($fp, $rawdata);
|
// check extension
|
||||||
fclose($fp);
|
$file_ext = strrchr($fullpath, '.');
|
||||||
|
$whitelist = array(".jpg",".jpeg",".gif",".png");
|
||||||
|
if (!(in_array($file_ext, $whitelist))) {
|
||||||
|
Tools::logm('processed image with not allowed extension. Skipping ' . $fullpath);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// check headers
|
||||||
|
$imageinfo = getimagesize($absolute_path);
|
||||||
|
if ($imageinfo['mime'] != 'image/gif' && $imageinfo['mime'] != 'image/jpeg'&& $imageinfo['mime'] != 'image/jpg'&& $imageinfo['mime'] != 'image/png') {
|
||||||
|
Tools::logm('processed image with bad header. Skipping ' . $fullpath);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// regenerate image
|
||||||
|
$im = imagecreatefromstring($rawdata);
|
||||||
|
if ($im === false) {
|
||||||
|
Tools::logm('error while regenerating image ' . $fullpath);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch ($imageinfo['mime']) {
|
||||||
|
case 'image/gif':
|
||||||
|
$result = imagegif($im, $fullpath);
|
||||||
|
break;
|
||||||
|
case 'image/jpeg':
|
||||||
|
case 'image/jpg':
|
||||||
|
$result = imagejpeg($im, $fullpath, REGENERATE_PICTURES_QUALITY);
|
||||||
|
break;
|
||||||
|
case 'image/png':
|
||||||
|
$result = imagepng($im, $fullpath, ceil(REGENERATE_PICTURES_QUALITY / 100 * 9));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
imagedestroy($im);
|
||||||
|
|
||||||
|
return $result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -8,10 +8,15 @@
|
|||||||
* @license http://www.wtfpl.net/ see COPYING file
|
* @license http://www.wtfpl.net/ see COPYING file
|
||||||
*/
|
*/
|
||||||
|
|
||||||
define ('POCHE', '1.6.1');
|
define ('POCHE', '1.7.0');
|
||||||
require 'check_setup.php';
|
require 'check_setup.php';
|
||||||
require_once 'inc/poche/global.inc.php';
|
require_once 'inc/poche/global.inc.php';
|
||||||
|
|
||||||
|
# Set error reporting level
|
||||||
|
if (defined('ERROR_REPORTING')) {
|
||||||
|
error_reporting(ERROR_REPORTING);
|
||||||
|
}
|
||||||
|
|
||||||
# Start session
|
# Start session
|
||||||
Session::$sessionName = 'poche';
|
Session::$sessionName = 'poche';
|
||||||
Session::init();
|
Session::init();
|
||||||
|
@ -979,8 +979,8 @@ blockquote {
|
|||||||
content: none;
|
content: none;
|
||||||
}
|
}
|
||||||
.logo {
|
.logo {
|
||||||
width: 1.5em;
|
width: 1.25em;
|
||||||
height: 1.5em;
|
height: 1.25em;
|
||||||
left: 0;
|
left: 0;
|
||||||
top: 0;
|
top: 0;
|
||||||
}
|
}
|
||||||
@ -1030,6 +1030,7 @@ blockquote {
|
|||||||
margin-left: 1.5em;
|
margin-left: 1.5em;
|
||||||
padding-right: 1.5em;
|
padding-right: 1.5em;
|
||||||
position: static;
|
position: static;
|
||||||
|
margin-top: 3em;
|
||||||
}
|
}
|
||||||
#article_toolbar .topPosF {
|
#article_toolbar .topPosF {
|
||||||
display: none;
|
display: none;
|
||||||
|
14
themes/default/_search-form.twig
Normal file → Executable file
14
themes/default/_search-form.twig
Normal file → Executable file
@ -7,17 +7,3 @@
|
|||||||
</p>
|
</p>
|
||||||
</form>
|
</form>
|
||||||
</div>
|
</div>
|
||||||
<script type="text/javascript">
|
|
||||||
$(document).ready(function() {
|
|
||||||
|
|
||||||
$("#search-form").hide();
|
|
||||||
|
|
||||||
$("#search").click(function(){
|
|
||||||
$("#search-form").toggle();
|
|
||||||
$("#search").toggleClass("current");
|
|
||||||
$("#search-arrow").toggleClass("arrow-down");
|
|
||||||
});
|
|
||||||
|
|
||||||
|
|
||||||
});
|
|
||||||
</script>
|
|
@ -384,8 +384,8 @@ a#bagit-form-close {
|
|||||||
background-color: #000;
|
background-color: #000;
|
||||||
color: #fff;
|
color: #fff;
|
||||||
padding: 0 4px 1px 3px;
|
padding: 0 4px 1px 3px;
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
font-size: 0.7em;
|
font-size: 0.7em;
|
||||||
border-radius: 4px;
|
border-radius: 4px;
|
||||||
}
|
}
|
||||||
.add-to-wallabag-link-after:hover, .add-to-wallabag-link-after:active {
|
.add-to-wallabag-link-after:hover, .add-to-wallabag-link-after:active {
|
||||||
@ -394,6 +394,23 @@ a#bagit-form-close {
|
|||||||
.add-to-wallabag-link-after:visited {
|
.add-to-wallabag-link-after:visited {
|
||||||
color: #999;
|
color: #999;
|
||||||
}
|
}
|
||||||
|
a.add-to-wallabag-link-after {
|
||||||
|
visibility: hidden;
|
||||||
|
position: absolute;
|
||||||
|
opacity: 0;
|
||||||
|
transition-duration: 2s;
|
||||||
|
transition-timing-function: ease-out;
|
||||||
|
}
|
||||||
|
#article article a:hover + a.add-to-wallabag-link-after, a.add-to-wallabag-link-after:hover {
|
||||||
|
opacity: 1;
|
||||||
|
visibility: visible;
|
||||||
|
transition-duration: .3s;
|
||||||
|
transition-timing-function: ease-in;
|
||||||
|
}
|
||||||
|
a.add-to-wallabag-link-after:after {
|
||||||
|
content: "w";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#add-link-result {
|
#add-link-result {
|
||||||
display: inline;
|
display: inline;
|
||||||
|
Loading…
Reference in New Issue
Block a user