poche now uses Full Text RSS to fetch content

This commit is contained in:
Nicolas Lœuillet 2013-08-25 20:10:23 +02:00
parent fccd59e2e3
commit ec3972361d
53 changed files with 20042 additions and 82 deletions

View File

@ -1,5 +1,6 @@
poche is based on :
* PHP Readability https://bitbucket.org/fivefilters/php-readability
* Full Text RSS http://code.fivefilters.org/full-text-rss/src
* Encoding https://github.com/neitanod/forceutf8
* logo by Brightmix http://www.iconfinder.com/icondetails/43256/128/jeans_monotone_pocket_icon
* icons http://icomoon.io

View File

@ -0,0 +1,612 @@
<?php
/**
* Content Extractor
*
* Uses patterns specified in site config files and auto detection (hNews/PHP Readability)
* to extract content from HTML files.
*
* @version 0.8
* @date 2012-02-21
* @author Keyvan Minoukadeh
* @copyright 2011 Keyvan Minoukadeh
* @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
*/
class ContentExtractor
{
protected static $tidy_config = array(
'clean' => true,
'output-xhtml' => true,
'logical-emphasis' => true,
'show-body-only' => false,
'new-blocklevel-tags' => 'article, aside, footer, header, hgroup, menu, nav, section, details, datagrid',
'new-inline-tags' => 'mark, time, meter, progress, data',
'wrap' => 0,
'drop-empty-paras' => true,
'drop-proprietary-attributes' => false,
'enclose-text' => true,
'enclose-block-text' => true,
'merge-divs' => true,
'merge-spans' => true,
'char-encoding' => 'utf8',
'hide-comments' => true
);
protected $html;
protected $config;
protected $title;
protected $author = array();
protected $language;
protected $date;
protected $body;
protected $success = false;
public $fingerprints = array();
public $readability;
public $debug = false;
function __construct($path, $fallback=null) {
SiteConfig::set_config_path($path, $fallback);
}
protected function debug($msg) {
if ($this->debug) {
$mem = round(memory_get_usage()/1024, 2);
$memPeak = round(memory_get_peak_usage()/1024, 2);
echo '* ',$msg;
echo ' - mem used: ',$mem," (peak: $memPeak)\n";
ob_flush();
flush();
}
}
public function reset() {
$this->html = null;
$this->readability = null;
$this->config = null;
$this->title = null;
$this->body = null;
$this->author = array();
$this->language = null;
$this->date = null;
$this->success = false;
}
public function findHostUsingFingerprints($html) {
$this->debug('Checking fingerprints...');
$head = substr($html, 0, 8000);
foreach ($this->fingerprints as $_fp => $_fphost) {
$lookin = 'html';
if (is_array($_fphost)) {
if (isset($_fphost['head']) && $_fphost['head']) {
$lookin = 'head';
}
$_fphost = $_fphost['hostname'];
}
if (strpos($$lookin, $_fp) !== false) {
$this->debug("Found match: $_fphost");
return $_fphost;
}
}
return false;
}
// returns true on success, false on failure
// $smart_tidy indicates that if tidy is used and no results are produced, we will
// try again without it. Tidy helps us deal with PHP's patchy HTML parsing most of the time
// but it has problems of its own which we try to avoid with this option.
public function process($html, $url, $smart_tidy=true) {
$this->reset();
// extract host name
$host = @parse_url($url, PHP_URL_HOST);
if (!($this->config = SiteConfig::build($host))) {
// no match, check HTML for fingerprints
if (!empty($this->fingerprints) && ($_fphost = $this->findHostUsingFingerprints($html))) {
$this->config = SiteConfig::build($_fphost);
}
unset($_fphost);
if (!$this->config) {
// no match, so use defaults
$this->config = new SiteConfig();
}
}
// store copy of config in our static cache array in case we need to process another URL
SiteConfig::add_to_cache($host, $this->config);
// do string replacements
foreach ($this->config->replace_string as $_repl) {
$html = str_replace($_repl[0], $_repl[1], $html);
}
unset($_repl);
// use tidy (if it exists)?
// This fixes problems with some sites which would otherwise
// trouble DOMDocument's HTML parsing. (Although sometimes it
// makes matters worse, which is why you can override it in site config files.)
$tidied = false;
if ($this->config->tidy && function_exists('tidy_parse_string') && $smart_tidy) {
$this->debug('Using Tidy');
$tidy = tidy_parse_string($html, self::$tidy_config, 'UTF8');
if (tidy_clean_repair($tidy)) {
$original_html = $html;
$tidied = true;
// $html = $tidy->value;
}
$body = $tidy->body();
if (preg_replace('/\s+/', '', $body->value) !== "<body></body>") {
$html = $tidy->value;
}
unset($tidy);
}
// load and parse html
$this->readability = new Readability($html, $url);
// we use xpath to find elements in the given HTML document
// see http://en.wikipedia.org/wiki/XPath_1.0
$xpath = new DOMXPath($this->readability->dom);
// try to get title
foreach ($this->config->title as $pattern) {
$elems = @$xpath->evaluate($pattern, $this->readability->dom);
if (is_string($elems)) {
$this->debug('Title expression evaluated as string');
$this->title = trim($elems);
break;
} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
$this->debug('Title matched');
$this->title = $elems->item(0)->textContent;
// remove title from document
try {
$elems->item(0)->parentNode->removeChild($elems->item(0));
} catch (DOMException $e) {
// do nothing
}
break;
}
}
// try to get author (if it hasn't already been set)
if (empty($this->author)) {
foreach ($this->config->author as $pattern) {
$elems = @$xpath->evaluate($pattern, $this->readability->dom);
if (is_string($elems)) {
$this->debug('Author expression evaluated as string');
if (trim($elems) != '') {
$this->author[] = trim($elems);
break;
}
} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
foreach ($elems as $elem) {
if (!isset($elem->parentNode)) continue;
$this->author[] = trim($elem->textContent);
}
if (!empty($this->author)) break;
}
}
}
// try to get language
$_lang_xpath = array('//html[@lang]/@lang', '//meta[@name="DC.language"]/@content');
foreach ($_lang_xpath as $pattern) {
$elems = @$xpath->evaluate($pattern, $this->readability->dom);
if (is_string($elems)) {
if (trim($elems) != '') {
$this->language = trim($elems);
break;
}
} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
foreach ($elems as $elem) {
if (!isset($elem->parentNode)) continue;
$this->language = trim($elem->textContent);
}
if ($this->language) break;
}
}
// try to get date
foreach ($this->config->date as $pattern) {
$elems = @$xpath->evaluate($pattern, $this->readability->dom);
if (is_string($elems)) {
$this->debug('Date expression evaluated as string');
$this->date = strtotime(trim($elems, "; \t\n\r\0\x0B"));
} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
$this->debug('Date matched');
$this->date = $elems->item(0)->textContent;
$this->date = strtotime(trim($this->date, "; \t\n\r\0\x0B"));
// remove date from document
// $elems->item(0)->parentNode->removeChild($elems->item(0));
}
if (!$this->date) {
$this->date = null;
} else {
break;
}
}
// strip elements (using xpath expressions)
foreach ($this->config->strip as $pattern) {
$elems = @$xpath->query($pattern, $this->readability->dom);
// check for matches
if ($elems && $elems->length > 0) {
$this->debug('Stripping '.$elems->length.' elements (strip)');
for ($i=$elems->length-1; $i >= 0; $i--) {
$elems->item($i)->parentNode->removeChild($elems->item($i));
}
}
}
// strip elements (using id and class attribute values)
foreach ($this->config->strip_id_or_class as $string) {
$string = strtr($string, array("'"=>'', '"'=>''));
$elems = @$xpath->query("//*[contains(@class, '$string') or contains(@id, '$string')]", $this->readability->dom);
// check for matches
if ($elems && $elems->length > 0) {
$this->debug('Stripping '.$elems->length.' elements (strip_id_or_class)');
for ($i=$elems->length-1; $i >= 0; $i--) {
$elems->item($i)->parentNode->removeChild($elems->item($i));
}
}
}
// strip images (using src attribute values)
foreach ($this->config->strip_image_src as $string) {
$string = strtr($string, array("'"=>'', '"'=>''));
$elems = @$xpath->query("//img[contains(@src, '$string')]", $this->readability->dom);
// check for matches
if ($elems && $elems->length > 0) {
$this->debug('Stripping '.$elems->length.' image elements');
for ($i=$elems->length-1; $i >= 0; $i--) {
$elems->item($i)->parentNode->removeChild($elems->item($i));
}
}
}
// strip elements using Readability.com and Instapaper.com ignore class names
// .entry-unrelated and .instapaper_ignore
// See https://www.readability.com/publishers/guidelines/#view-plainGuidelines
// and http://blog.instapaper.com/post/730281947
$elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' entry-unrelated ') or contains(concat(' ',normalize-space(@class),' '),' instapaper_ignore ')]", $this->readability->dom);
// check for matches
if ($elems && $elems->length > 0) {
$this->debug('Stripping '.$elems->length.' .entry-unrelated,.instapaper_ignore elements');
for ($i=$elems->length-1; $i >= 0; $i--) {
$elems->item($i)->parentNode->removeChild($elems->item($i));
}
}
// strip elements that contain style="display: none;"
$elems = @$xpath->query("//*[contains(@style,'display:none')]", $this->readability->dom);
// check for matches
if ($elems && $elems->length > 0) {
$this->debug('Stripping '.$elems->length.' elements with inline display:none style');
for ($i=$elems->length-1; $i >= 0; $i--) {
$elems->item($i)->parentNode->removeChild($elems->item($i));
}
}
// try to get body
foreach ($this->config->body as $pattern) {
$elems = @$xpath->query($pattern, $this->readability->dom);
// check for matches
if ($elems && $elems->length > 0) {
$this->debug('Body matched');
if ($elems->length == 1) {
$this->body = $elems->item(0);
// prune (clean up elements that may not be content)
if ($this->config->prune) {
$this->debug('Pruning content');
$this->readability->prepArticle($this->body);
}
break;
} else {
$this->body = $this->readability->dom->createElement('div');
$this->debug($elems->length.' body elems found');
foreach ($elems as $elem) {
if (!isset($elem->parentNode)) continue;
$isDescendant = false;
foreach ($this->body->childNodes as $parent) {
if ($this->isDescendant($parent, $elem)) {
$isDescendant = true;
break;
}
}
if ($isDescendant) {
$this->debug('Element is child of another body element, skipping.');
} else {
// prune (clean up elements that may not be content)
if ($this->config->prune) {
$this->debug('Pruning content');
$this->readability->prepArticle($elem);
}
$this->debug('Element added to body');
$this->body->appendChild($elem);
}
}
}
}
}
// auto detect?
$detect_title = $detect_body = $detect_author = $detect_date = false;
// detect title?
if (!isset($this->title)) {
if (empty($this->config->title) || $this->config->autodetect_on_failure) {
$detect_title = true;
}
}
// detect body?
if (!isset($this->body)) {
if (empty($this->config->body) || $this->config->autodetect_on_failure) {
$detect_body = true;
}
}
// detect author?
if (empty($this->author)) {
if (empty($this->config->author) || $this->config->autodetect_on_failure) {
$detect_author = true;
}
}
// detect date?
if (!isset($this->date)) {
if (empty($this->config->date) || $this->config->autodetect_on_failure) {
$detect_date = true;
}
}
// check for hNews
if ($detect_title || $detect_body) {
// check for hentry
$elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' hentry ')]", $this->readability->dom);
if ($elems && $elems->length > 0) {
$this->debug('hNews: found hentry');
$hentry = $elems->item(0);
if ($detect_title) {
// check for entry-title
$elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' entry-title ')]", $hentry);
if ($elems && $elems->length > 0) {
$this->debug('hNews: found entry-title');
$this->title = $elems->item(0)->textContent;
// remove title from document
$elems->item(0)->parentNode->removeChild($elems->item(0));
$detect_title = false;
}
}
if ($detect_date) {
// check for time element with pubdate attribute
$elems = @$xpath->query(".//time[@pubdate] | .//abbr[contains(concat(' ',normalize-space(@class),' '),' published ')]", $hentry);
if ($elems && $elems->length > 0) {
$this->debug('hNews: found publication date');
$this->date = strtotime(trim($elems->item(0)->textContent));
// remove date from document
//$elems->item(0)->parentNode->removeChild($elems->item(0));
if ($this->date) {
$detect_date = false;
} else {
$this->date = null;
}
}
}
if ($detect_author) {
// check for time element with pubdate attribute
$elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' vcard ') and (contains(concat(' ',normalize-space(@class),' '),' author ') or contains(concat(' ',normalize-space(@class),' '),' byline '))]", $hentry);
if ($elems && $elems->length > 0) {
$this->debug('hNews: found author');
$author = $elems->item(0);
$fn = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' fn ')]", $author);
if ($fn && $fn->length > 0) {
foreach ($fn as $_fn) {
if (trim($_fn->textContent) != '') {
$this->author[] = trim($_fn->textContent);
}
}
} else {
if (trim($author->textContent) != '') {
$this->author[] = trim($author->textContent);
}
}
$detect_author = empty($this->author);
}
}
// check for entry-content.
// according to hAtom spec, if there are multiple elements marked entry-content,
// we include all of these in the order they appear - see http://microformats.org/wiki/hatom#Entry_Content
if ($detect_body) {
$elems = @$xpath->query(".//*[contains(concat(' ',normalize-space(@class),' '),' entry-content ')]", $hentry);
if ($elems && $elems->length > 0) {
$this->debug('hNews: found entry-content');
if ($elems->length == 1) {
// what if it's empty? (some sites misuse hNews - place their content outside an empty entry-content element)
$e = $elems->item(0);
if (($e->tagName == 'img') || (trim($e->textContent) != '')) {
$this->body = $elems->item(0);
// prune (clean up elements that may not be content)
if ($this->config->prune) {
$this->debug('Pruning content');
$this->readability->prepArticle($this->body);
}
$detect_body = false;
} else {
$this->debug('hNews: skipping entry-content - appears not to contain content');
}
unset($e);
} else {
$this->body = $this->readability->dom->createElement('div');
$this->debug($elems->length.' entry-content elems found');
foreach ($elems as $elem) {
if (!isset($elem->parentNode)) continue;
$isDescendant = false;
foreach ($this->body->childNodes as $parent) {
if ($this->isDescendant($parent, $elem)) {
$isDescendant = true;
break;
}
}
if ($isDescendant) {
$this->debug('Element is child of another body element, skipping.');
} else {
// prune (clean up elements that may not be content)
if ($this->config->prune) {
$this->debug('Pruning content');
$this->readability->prepArticle($elem);
}
$this->debug('Element added to body');
$this->body->appendChild($elem);
}
}
$detect_body = false;
}
}
}
}
}
// check for elements marked with instapaper_title
if ($detect_title) {
// check for instapaper_title
$elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' instapaper_title ')]", $this->readability->dom);
if ($elems && $elems->length > 0) {
$this->debug('title found (.instapaper_title)');
$this->title = $elems->item(0)->textContent;
// remove title from document
$elems->item(0)->parentNode->removeChild($elems->item(0));
$detect_title = false;
}
}
// check for elements marked with instapaper_body
if ($detect_body) {
$elems = @$xpath->query("//*[contains(concat(' ',normalize-space(@class),' '),' instapaper_body ')]", $this->readability->dom);
if ($elems && $elems->length > 0) {
$this->debug('body found (.instapaper_body)');
$this->body = $elems->item(0);
// prune (clean up elements that may not be content)
if ($this->config->prune) {
$this->debug('Pruning content');
$this->readability->prepArticle($this->body);
}
$detect_body = false;
}
}
// Find author in rel="author" marked element
// We only use this if there's exactly one.
// If there's more than one, it could indicate more than
// one author, but it could also indicate that we're processing
// a page listing different articles with different authors.
if ($detect_author) {
$elems = @$xpath->query("//a[contains(concat(' ',normalize-space(@rel),' '),' author ')]", $this->readability->dom);
if ($elems && $elems->length == 1) {
$this->debug('Author found (rel="author")');
$author = trim($elems->item(0)->textContent);
if ($author != '') {
$this->author[] = $author;
$detect_author = false;
}
}
}
// Find date in pubdate marked time element
// For the same reason given above, we only use this
// if there's exactly one element.
if ($detect_date) {
$elems = @$xpath->query("//time[@pubdate]", $this->readability->dom);
if ($elems && $elems->length == 1) {
$this->debug('Date found (pubdate marked time element)');
$this->date = strtotime(trim($elems->item(0)->textContent));
// remove date from document
//$elems->item(0)->parentNode->removeChild($elems->item(0));
if ($this->date) {
$detect_date = false;
} else {
$this->date = null;
}
}
}
// still missing title or body, so we detect using Readability
if ($detect_title || $detect_body) {
$this->debug('Using Readability');
// clone body if we're only using Readability for title (otherwise it may interfere with body element)
if (isset($this->body)) $this->body = $this->body->cloneNode(true);
$success = $this->readability->init();
}
if ($detect_title) {
$this->debug('Detecting title');
$this->title = $this->readability->getTitle()->textContent;
}
if ($detect_body && $success) {
$this->debug('Detecting body');
$this->body = $this->readability->getContent();
if ($this->body->childNodes->length == 1 && $this->body->firstChild->nodeType === XML_ELEMENT_NODE) {
$this->body = $this->body->firstChild;
}
// prune (clean up elements that may not be content)
if ($this->config->prune) {
$this->debug('Pruning content');
$this->readability->prepArticle($this->body);
}
}
if (isset($this->body)) {
// remove scripts
$this->readability->removeScripts($this->body);
// remove any h1-h6 elements that appear as first thing in the body
// and which match our title
if (isset($this->title) && ($this->title != '')) {
$firstChild = $this->body->firstChild;
while ($firstChild->nodeType && ($firstChild->nodeType !== XML_ELEMENT_NODE)) {
$firstChild = $firstChild->nextSibling;
}
if (($firstChild->nodeType === XML_ELEMENT_NODE)
&& in_array(strtolower($firstChild->tagName), array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))
&& (strtolower(trim($firstChild->textContent)) == strtolower(trim($this->title)))) {
$this->body->removeChild($firstChild);
}
}
$this->success = true;
}
// if we've had no success and we've used tidy, there's a chance
// that tidy has messed up. So let's try again without tidy...
if (!$this->success && $tidied && $smart_tidy) {
$this->debug('Trying again without tidy');
$this->process($original_html, $url, false);
}
return $this->success;
}
private function isDescendant(DOMElement $parent, DOMElement $child) {
$node = $child->parentNode;
while ($node != null) {
if ($node->isSameNode($parent)) return true;
$node = $node->parentNode;
}
return false;
}
public function getContent() {
return $this->body;
}
public function getTitle() {
return $this->title;
}
public function getAuthors() {
return $this->author;
}
public function getLanguage() {
return $this->language;
}
public function getDate() {
return $this->date;
}
public function getSiteConfig() {
return $this->config;
}
}
?>

View File

@ -0,0 +1,184 @@
<?php
/**
* Site Config
*
* Each instance of this class should hold extraction patterns and other directives
* for a website. See ContentExtractor class to see how it's used.
*
* @version 0.6
* @date 2011-10-30
* @author Keyvan Minoukadeh
* @copyright 2011 Keyvan Minoukadeh
* @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
*/
class SiteConfig
{
// Use first matching element as title (0 or more xpath expressions)
public $title = array();
// Use first matching element as body (0 or more xpath expressions)
public $body = array();
// Use first matching element as author (0 or more xpath expressions)
public $author = array();
// Use first matching element as date (0 or more xpath expressions)
public $date = array();
// Strip elements matching these xpath expressions (0 or more)
public $strip = array();
// Strip elements which contain these strings (0 or more) in the id or class attribute
public $strip_id_or_class = array();
// Strip images which contain these strings (0 or more) in the src attribute
public $strip_image_src = array();
// Additional HTTP headers to send
// NOT YET USED
public $http_header = array();
// Process HTML with tidy before creating DOM
public $tidy = true;
// Autodetect title/body if xpath expressions fail to produce results.
// Note that this applies to title and body separately, ie.
// * if we get a body match but no title match, this option will determine whether we autodetect title
// * if neither match, this determines whether we autodetect title and body.
// Also note that this only applies when there is at least one xpath expression in title or body, ie.
// * if title and body are both empty (no xpath expressions), this option has no effect (both title and body will be auto-detected)
// * if there's an xpath expression for title and none for body, body will be auto-detected and this option will determine whether we auto-detect title if the xpath expression for it fails to produce results.
// Usage scenario: you want to extract something specific from a set of URLs, e.g. a table, and if the table is not found, you want to ignore the entry completely. Auto-detection is unlikely to succeed here, so you construct your patterns and set this option to false. Another scenario may be a site where auto-detection has proven to fail (or worse, picked up the wrong content).
public $autodetect_on_failure = true;
// Clean up content block - attempt to remove elements that appear to be superfluous
public $prune = true;
// Test URL - if present, can be used to test the config above
public $test_url = null;
// Single-page link - should identify a link element or URL pointing to the page holding the entire article
// This is useful for sites which split their articles across multiple pages. Links to such pages tend to
// display the first page with links to the other pages at the bottom. Often there is also a link to a page
// which displays the entire article on one page (e.g. 'print view').
// This should be an XPath expression identifying the link to that page. If present and we find a match,
// we will retrieve that page and the rest of the options in this config will be applied to the new page.
public $single_page_link = array();
// Single-page link in feed? - same as above, but patterns applied to item description HTML taken from feed
public $single_page_link_in_feed = array();
// TODO: which parser to use for turning raw HTML into a DOMDocument
public $parser = 'libxml';
// String replacement to be made on HTML before processing begins
public $replace_string = array();
// the options below cannot be set in the config files which this class represents
public static $debug = false;
protected static $config_path;
protected static $config_path_fallback;
protected static $config_cache = array();
const HOSTNAME_REGEX = '/^(([a-zA-Z0-9-]*[a-zA-Z0-9])\.)*([A-Za-z0-9-]*[A-Za-z0-9])$/';
protected static function debug($msg) {
if (self::$debug) {
$mem = round(memory_get_usage()/1024, 2);
$memPeak = round(memory_get_peak_usage()/1024, 2);
echo '* ',$msg;
echo ' - mem used: ',$mem," (peak: $memPeak)\n";
ob_flush();
flush();
}
}
public static function set_config_path($path, $fallback=null) {
self::$config_path = $path;
self::$config_path_fallback = $fallback;
}
public static function add_to_cache($host, SiteConfig $config) {
$host = strtolower($host);
self::$config_cache[$host] = $config;
}
// returns SiteConfig instance if an appropriate one is found, false otherwise
public static function build($host) {
$host = strtolower($host);
if (substr($host, 0, 4) == 'www.') $host = substr($host, 4);
if (!$host || (strlen($host) > 200) || !preg_match(self::HOSTNAME_REGEX, $host)) return false;
// check for site configuration
$try = array($host);
$split = explode('.', $host);
if (count($split) > 1) {
array_shift($split);
$try[] = '.'.implode('.', $split);
}
foreach ($try as $h) {
if (array_key_exists($h, self::$config_cache)) {
self::debug("... cached ($h)");
return self::$config_cache[$h];
} elseif (file_exists(self::$config_path."/$h.txt")) {
self::debug("... from file ($h)");
$file = self::$config_path."/$h.txt";
break;
}
}
if (!isset($file)) {
if (isset(self::$config_path_fallback)) {
self::debug("... trying fallback ($host)");
foreach ($try as $h) {
if (file_exists(self::$config_path_fallback."/$h.txt")) {
self::debug("... from fallback file ($h)");
$file = self::$config_path_fallback."/$h.txt";
break;
}
}
if (!isset($file)) {
self::debug("... no match in fallback directory");
return false;
}
} else {
self::debug("... no match ($host)");
return false;
}
}
$config_file = file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
if (!$config_file || !is_array($config_file)) return false;
$config = new SiteConfig();
foreach ($config_file as $line) {
$line = trim($line);
// skip comments, empty lines
if ($line == '' || $line[0] == '#') continue;
// get command
$command = explode(':', $line, 2);
// if there's no colon ':', skip this line
if (count($command) != 2) continue;
$val = trim($command[1]);
$command = trim($command[0]);
if ($command == '' || $val == '') continue;
// check for commands where we accept multiple statements
if (in_array($command, array('title', 'body', 'author', 'date', 'strip', 'strip_id_or_class', 'strip_image_src', 'single_page_link', 'single_page_link_in_feed', 'http_header'))) {
array_push($config->$command, $val);
// check for single statement commands that evaluate to true or false
} elseif (in_array($command, array('tidy', 'prune', 'autodetect_on_failure'))) {
$config->$command = ($val == 'yes');
// check for single statement commands stored as strings
} elseif (in_array($command, array('test_url', 'parser'))) {
$config->$command = $val;
} elseif ((substr($command, -1) == ')') && preg_match('!^([a-z0-9_]+)\((.*?)\)$!i', $command, $match)) {
if (in_array($match[1], array('replace_string'))) {
$command = $match[1];
array_push($config->$command, array($match[2], $val));
}
}
}
return $config;
}
}
?>

View File

@ -0,0 +1,24 @@
<?php
// create single item dummy feed object
class DummySingleItemFeed {
public $item;
function __construct($url) { $this->item = new DummySingleItem($url); }
public function get_title() { return ''; }
public function get_description() { return 'Content extracted from '.$this->item->url; }
public function get_link() { return $this->item->url; }
public function get_language() { return false; }
public function get_image_url() { return false; }
public function get_items($start=0, $max=1) { return array(0=>$this->item); }
}
class DummySingleItem {
public $url;
function __construct($url) { $this->url = $url; }
public function get_permalink() { return $this->url; }
public function get_title() { return ''; }
public function get_date($format='') { return false; }
public function get_author($key=0) { return null; }
public function get_authors() { return null; }
public function get_description() { return ''; }
public function get_enclosure($key=0, $prefer=null) { return null; }
public function get_enclosures() { return null; }
}

167
inc/3rdparty/feedwriter/FeedItem.php vendored Normal file
View File

@ -0,0 +1,167 @@
<?php
/**
* Univarsel Feed Writer
*
* FeedItem class - Used as feed element in FeedWriter class
*
* @package UnivarselFeedWriter
* @author Anis uddin Ahmad <anisniit@gmail.com>
* @link http://www.ajaxray.com/projects/rss
*/
class FeedItem
{
private $elements = array(); //Collection of feed elements
private $version;
/**
* Constructor
*
* @param contant (RSS1/RSS2/ATOM) RSS2 is default.
*/
function __construct($version = RSS2)
{
$this->version = $version;
}
/**
* Add an element to elements array
*
* @access public
* @param srting The tag name of an element
* @param srting The content of tag
* @param array Attributes(if any) in 'attrName' => 'attrValue' format
* @return void
*/
public function addElement($elementName, $content, $attributes = null)
{
$this->elements[$elementName]['name'] = $elementName;
$this->elements[$elementName]['content'] = $content;
$this->elements[$elementName]['attributes'] = $attributes;
}
/**
* Set multiple feed elements from an array.
* Elements which have attributes cannot be added by this method
*
* @access public
* @param array array of elements in 'tagName' => 'tagContent' format.
* @return void
*/
public function addElementArray($elementArray)
{
if(! is_array($elementArray)) return;
foreach ($elementArray as $elementName => $content)
{
$this->addElement($elementName, $content);
}
}
/**
* Return the collection of elements in this feed item
*
* @access public
* @return array
*/
public function getElements()
{
return $this->elements;
}
// Wrapper functions ------------------------------------------------------
/**
* Set the 'dscription' element of feed item
*
* @access public
* @param string The content of 'description' element
* @return void
*/
public function setDescription($description)
{
$tag = ($this->version == ATOM)? 'summary' : 'description';
$this->addElement($tag, $description);
}
/**
* @desc Set the 'title' element of feed item
* @access public
* @param string The content of 'title' element
* @return void
*/
public function setTitle($title)
{
$this->addElement('title', $title);
}
/**
* Set the 'date' element of feed item
*
* @access public
* @param string The content of 'date' element
* @return void
*/
public function setDate($date)
{
if(! is_numeric($date))
{
$date = strtotime($date);
}
if($this->version == ATOM)
{
$tag = 'updated';
$value = date(DATE_ATOM, $date);
}
elseif($this->version == RSS2)
{
$tag = 'pubDate';
$value = date(DATE_RSS, $date);
}
else
{
$tag = 'dc:date';
$value = date("Y-m-d", $date);
}
$this->addElement($tag, $value);
}
/**
* Set the 'link' element of feed item
*
* @access public
* @param string The content of 'link' element
* @return void
*/
public function setLink($link)
{
if($this->version == RSS2 || $this->version == RSS1)
{
$this->addElement('link', $link);
}
else
{
$this->addElement('link','',array('href'=>$link));
$this->addElement('id', FeedWriter::uuid($link,'urn:uuid:'));
}
}
/**
* Set the 'encloser' element of feed item
* For RSS 2.0 only
*
* @access public
* @param string The url attribute of encloser tag
* @param string The length attribute of encloser tag
* @param string The type attribute of encloser tag
* @return void
*/
public function setEncloser($url, $length, $type)
{
$attributes = array('url'=>$url, 'length'=>$length, 'type'=>$type);
$this->addElement('enclosure','',$attributes);
}
} // end of class FeedItem
?>

434
inc/3rdparty/feedwriter/FeedWriter.php vendored Normal file
View File

@ -0,0 +1,434 @@
<?php
define('RSS2', 1, true);
define('JSON', 2, true);
define('ATOM', 3, true);
/**
* Univarsel Feed Writer class
*
* Genarate RSS2 or JSON (original: RSS 1.0, RSS2.0 and ATOM Feed)
*
* Modified for FiveFilters.org's Full-Text RSS project
* to allow for inclusion of hubs, JSON output.
* Stripped RSS1 and ATOM support.
*
* @package UnivarselFeedWriter
* @author Anis uddin Ahmad <anisniit@gmail.com>
* @link http://www.ajaxray.com/projects/rss
*/
class FeedWriter
{
private $self = null; // self URL - http://feed2.w3.org/docs/warning/MissingAtomSelfLink.html
private $hubs = array(); // PubSubHubbub hubs
private $channels = array(); // Collection of channel elements
private $items = array(); // Collection of items as object of FeedItem class.
private $data = array(); // Store some other version wise data
private $CDATAEncoding = array(); // The tag names which have to encoded as CDATA
private $xsl = null; // stylesheet to render RSS (used by Chrome)
private $json = null; // JSON object
private $version = null;
/**
* Constructor
*
* @param constant the version constant (RSS2 or JSON).
*/
function __construct($version = RSS2)
{
$this->version = $version;
// Setting default value for assential channel elements
$this->channels['title'] = $version . ' Feed';
$this->channels['link'] = 'http://www.ajaxray.com/blog';
//Tag names to encode in CDATA
$this->CDATAEncoding = array('description', 'content:encoded', 'content', 'subtitle', 'summary');
}
public function setFormat($format) {
$this->version = $format;
}
// Start # public functions ---------------------------------------------
/**
* Set a channel element
* @access public
* @param srting name of the channel tag
* @param string content of the channel tag
* @return void
*/
public function setChannelElement($elementName, $content)
{
$this->channels[$elementName] = $content ;
}
/**
* Set multiple channel elements from an array. Array elements
* should be 'channelName' => 'channelContent' format.
*
* @access public
* @param array array of channels
* @return void
*/
public function setChannelElementsFromArray($elementArray)
{
if(! is_array($elementArray)) return;
foreach ($elementArray as $elementName => $content)
{
$this->setChannelElement($elementName, $content);
}
}
/**
* Genarate the actual RSS/JSON file
*
* @access public
* @return void
*/
public function genarateFeed()
{
if ($this->version == RSS2) {
header('Content-type: text/xml; charset=UTF-8');
} elseif ($this->version == JSON) {
header('Content-type: application/json; charset=UTF-8');
$this->json = new stdClass();
}
$this->printHead();
$this->printChannels();
$this->printItems();
$this->printTale();
if ($this->version == JSON) {
echo json_encode($this->json);
}
}
/**
* Create a new FeedItem.
*
* @access public
* @return object instance of FeedItem class
*/
public function createNewItem()
{
$Item = new FeedItem($this->version);
return $Item;
}
/**
* Add a FeedItem to the main class
*
* @access public
* @param object instance of FeedItem class
* @return void
*/
public function addItem($feedItem)
{
$this->items[] = $feedItem;
}
// Wrapper functions -------------------------------------------------------------------
/**
* Set the 'title' channel element
*
* @access public
* @param srting value of 'title' channel tag
* @return void
*/
public function setTitle($title)
{
$this->setChannelElement('title', $title);
}
/**
* Add a hub to the channel element
*
* @access public
* @param string URL
* @return void
*/
public function addHub($hub)
{
$this->hubs[] = $hub;
}
/**
* Set XSL URL
*
* @access public
* @param string URL
* @return void
*/
public function setXsl($xsl)
{
$this->xsl = $xsl;
}
/**
* Set self URL
*
* @access public
* @param string URL
* @return void
*/
public function setSelf($self)
{
$this->self = $self;
}
/**
* Set the 'description' channel element
*
* @access public
* @param srting value of 'description' channel tag
* @return void
*/
public function setDescription($desciption)
{
$tag = ($this->version == ATOM)? 'subtitle' : 'description';
$this->setChannelElement($tag, $desciption);
}
/**
* Set the 'link' channel element
*
* @access public
* @param srting value of 'link' channel tag
* @return void
*/
public function setLink($link)
{
$this->setChannelElement('link', $link);
}
/**
* Set the 'image' channel element
*
* @access public
* @param srting title of image
* @param srting link url of the imahe
* @param srting path url of the image
* @return void
*/
public function setImage($title, $link, $url)
{
$this->setChannelElement('image', array('title'=>$title, 'link'=>$link, 'url'=>$url));
}
// End # public functions ----------------------------------------------
// Start # private functions ----------------------------------------------
/**
* Prints the xml and rss namespace
*
* @access private
* @return void
*/
private function printHead()
{
if ($this->version == RSS2)
{
$out = '<?xml version="1.0" encoding="utf-8"?>'."\n";
if ($this->xsl) $out .= '<?xml-stylesheet type="text/xsl" href="'.htmlspecialchars($this->xsl).'"?>' . PHP_EOL;
$out .= '<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/">' . PHP_EOL;
echo $out;
}
elseif ($this->version == JSON)
{
$this->json->rss = array('@attributes' => array('version' => '2.0'));
}
}
/**
* Closes the open tags at the end of file
*
* @access private
* @return void
*/
private function printTale()
{
if ($this->version == RSS2)
{
echo '</channel>',PHP_EOL,'</rss>';
}
// do nothing for JSON
}
/**
* Creates a single node as xml format
*
* @access private
* @param string name of the tag
* @param mixed tag value as string or array of nested tags in 'tagName' => 'tagValue' format
* @param array Attributes(if any) in 'attrName' => 'attrValue' format
* @return string formatted xml tag
*/
private function makeNode($tagName, $tagContent, $attributes = null)
{
if ($this->version == RSS2)
{
$nodeText = '';
$attrText = '';
if (is_array($attributes))
{
foreach ($attributes as $key => $value)
{
$attrText .= " $key=\"$value\" ";
}
}
$nodeText .= "<{$tagName}{$attrText}>";
if (is_array($tagContent))
{
foreach ($tagContent as $key => $value)
{
$nodeText .= $this->makeNode($key, $value);
}
}
else
{
//$nodeText .= (in_array($tagName, $this->CDATAEncoding))? $tagContent : htmlentities($tagContent);
$nodeText .= htmlspecialchars($tagContent);
}
//$nodeText .= (in_array($tagName, $this->CDATAEncoding))? "]]></$tagName>" : "</$tagName>";
$nodeText .= "</$tagName>";
return $nodeText . PHP_EOL;
}
elseif ($this->version == JSON)
{
$tagName = (string)$tagName;
$tagName = strtr($tagName, ':', '_');
$node = null;
if (!$tagContent && is_array($attributes) && count($attributes))
{
$node = array('@attributes' => $this->json_keys($attributes));
} else {
if (is_array($tagContent)) {
$node = $this->json_keys($tagContent);
} else {
$node = $tagContent;
}
}
return $node;
}
return ''; // should not get here
}
private function json_keys(array $array) {
$new = array();
foreach ($array as $key => $val) {
if (is_string($key)) $key = strtr($key, ':', '_');
if (is_array($val)) {
$new[$key] = $this->json_keys($val);
} else {
$new[$key] = $val;
}
}
return $new;
}
/**
* @desc Print channels
* @access private
* @return void
*/
private function printChannels()
{
//Start channel tag
switch ($this->version)
{
case RSS2:
echo '<channel>' . PHP_EOL;
// add hubs
foreach ($this->hubs as $hub) {
//echo $this->makeNode('link', '', array('rel'=>'hub', 'href'=>$hub, 'xmlns'=>'http://www.w3.org/2005/Atom'));
echo '<link rel="hub" href="'.htmlspecialchars($hub).'" xmlns="http://www.w3.org/2005/Atom" />' . PHP_EOL;
}
// add self
if (isset($this->self)) {
//echo $this->makeNode('link', '', array('rel'=>'self', 'href'=>$this->self, 'xmlns'=>'http://www.w3.org/2005/Atom'));
echo '<link rel="self" href="'.htmlspecialchars($this->self).'" xmlns="http://www.w3.org/2005/Atom" />' . PHP_EOL;
}
//Print Items of channel
foreach ($this->channels as $key => $value)
{
echo $this->makeNode($key, $value);
}
break;
case JSON:
$this->json->rss['channel'] = (object)$this->json_keys($this->channels);
break;
}
}
/**
* Prints formatted feed items
*
* @access private
* @return void
*/
private function printItems()
{
foreach ($this->items as $item)
{
$thisItems = $item->getElements();
echo $this->startItem();
if ($this->version == JSON) {
$json_item = array();
}
foreach ($thisItems as $feedItem )
{
if ($this->version == RSS2) {
echo $this->makeNode($feedItem['name'], $feedItem['content'], $feedItem['attributes']);
} elseif ($this->version == JSON) {
$json_item[strtr($feedItem['name'], ':', '_')] = $this->makeNode($feedItem['name'], $feedItem['content'], $feedItem['attributes']);
}
}
echo $this->endItem();
if ($this->version == JSON) {
if (count($this->items) > 1) {
$this->json->rss['channel']->item[] = $json_item;
} else {
$this->json->rss['channel']->item = $json_item;
}
}
}
}
/**
* Make the starting tag of channels
*
* @access private
* @return void
*/
private function startItem()
{
if ($this->version == RSS2)
{
echo '<item>' . PHP_EOL;
}
// nothing for JSON
}
/**
* Closes feed item tag
*
* @access private
* @return void
*/
private function endItem()
{
if ($this->version == RSS2)
{
echo '</item>' . PHP_EOL;
}
// nothing for JSON
}
// End # private functions ----------------------------------------------
}

View File

@ -0,0 +1,404 @@
<?php
/**
* Cookie Jar
*
* PHP class for handling cookies, as defined by the Netscape spec:
* <http://curl.haxx.se/rfc/cookie_spec.html>
*
* This class should be used to handle cookies (storing cookies from HTTP response messages, and
* sending out cookies in HTTP request messages). This has been adapted for FiveFilters.org
* from the original version used in HTTP Navigator. See http://www.keyvan.net/code/http-navigator/
*
* This class is mainly based on Cookies.pm <http://search.cpan.org/author/GAAS/libwww-perl-5.65/
* lib/HTTP/Cookies.pm> from the libwww-perl collection <http://www.linpro.no/lwp/>.
* Unlike Cookies.pm, this class only supports the Netscape cookie spec, not RFC 2965.
*
* @version 0.5
* @date 2011-03-15
* @see http://php.net/HttpRequestPool
* @author Keyvan Minoukadeh
* @copyright 2011 Keyvan Minoukadeh
* @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
*/
class CookieJar
{
/**
* Cookies - array containing all cookies.
*
* <pre>
* Cookies are stored like this:
* [domain][path][name] = array
* where array is:
* 0 => value, 1 => secure, 2 => expires
* </pre>
* @var array
* @access private
*/
public $cookies = array();
public $debug = false;
/**
* Constructor
*/
function __construct() {
}
protected function debug($msg, $file=null, $line=null) {
if ($this->debug) {
$mem = round(memory_get_usage()/1024, 2);
$memPeak = round(memory_get_peak_usage()/1024, 2);
echo '* ',$msg;
if (isset($file, $line)) echo " ($file line $line)";
echo ' - mem used: ',$mem," (peak: $memPeak)\n";
ob_flush();
flush();
}
}
/**
* Get matching cookies
*
* Only use this method if you cannot use add_cookie_header(), for example, if you want to use
* this cookie jar class without using the request class.
*
* @param array $param associative array containing 'domain', 'path', 'secure' keys
* @return string
* @see add_cookie_header()
*/
public function getMatchingCookies($url)
{
if (($parts = @parse_url($url)) && isset($parts['scheme'], $parts['host'], $parts['path'])) {
$param['domain'] = $parts['host'];
$param['path'] = $parts['path'];
$param['secure'] = (strtolower($parts['scheme']) == 'https');
unset($parts);
} else {
return false;
}
// RFC 2965 notes:
// If multiple cookies satisfy the criteria above, they are ordered in
// the Cookie header such that those with more specific Path attributes
// precede those with less specific. Ordering with respect to other
// attributes (e.g., Domain) is unspecified.
$domain = $param['domain'];
if (strpos($domain, '.') === false) $domain .= '.local';
$request_path = $param['path'];
if ($request_path == '') $request_path = '/';
$request_secure = $param['secure'];
$now = time();
$matched_cookies = array();
// domain - find matching domains
$this->debug('Finding matching domains for '.$domain, __FILE__, __LINE__);
while (strpos($domain, '.') !== false) {
if (isset($this->cookies[$domain])) {
$this->debug(' domain match found: '.$domain);
$cookies =& $this->cookies[$domain];
} else {
$domain = $this->_reduce_domain($domain);
continue;
}
// paths - find matching paths starting from most specific
$this->debug(' - Finding matching paths for '.$request_path);
$paths = array_keys($cookies);
usort($paths, array($this, '_cmp_length'));
foreach ($paths as $path) {
// continue to next cookie if request path does not path-match cookie path
if (!$this->_path_match($request_path, $path)) continue;
// loop through cookie names
$this->debug(' path match found: '.$path);
foreach ($cookies[$path] as $name => $values) {
// if this cookie is secure but request isn't, continue to next cookie
if ($values[1] && !$request_secure) continue;
// if cookie is not a session cookie and has expired, continue to next cookie
if (is_int($values[2]) && ($values[2] < $now)) continue;
// cookie matches request
$this->debug(' cookie match: '.$name.'='.$values[0]);
$matched_cookies[] = $name.'='.$values[0];
}
}
$domain = $this->_reduce_domain($domain);
}
// return cookies
return implode('; ', $matched_cookies);
}
/**
* Parse Set-Cookie values.
*
* Only use this method if you cannot use extract_cookies(), for example, if you want to use
* this cookie jar class without using the response class.
*
* @param array $set_cookies array holding 1 or more "Set-Cookie" header values
* @param array $param associative array containing 'host', 'path' keys
* @return void
* @see extract_cookies()
*/
public function storeCookies($url, $set_cookies)
{
if (count($set_cookies) == 0) return;
$param = @parse_url($url);
if (!is_array($param) || !isset($param['host'])) return;
$request_host = $param['host'];
if (strpos($request_host, '.') === false) $request_host .= '.local';
$request_path = @$param['path'];
if ($request_path == '') $request_path = '/';
//
// loop through set-cookie headers
//
foreach ($set_cookies as $set_cookie) {
$this->debug('Parsing: '.$set_cookie);
// temporary cookie store (before adding to jar)
$tmp_cookie = array();
$param = explode(';', $set_cookie);
// loop through params
for ($x=0; $x<count($param); $x++) {
$key_val = explode('=', $param[$x], 2);
if (count($key_val) != 2) {
// if the first param isn't a name=value pair, continue to the next set-cookie
// header
if ($x == 0) continue 2;
// check for secure flag
if (strtolower(trim($key_val[0])) == 'secure') $tmp_cookie['secure'] = true;
// continue to next param
continue;
}
list($key, $val) = array_map('trim', $key_val);
// first name=value pair is the cookie name and value
// the name and value are stored under 'name' and 'value' to avoid conflicts
// with later parameters.
if ($x == 0) {
$tmp_cookie = array('name'=>$key, 'value'=>$val);
continue;
}
$key = strtolower($key);
if (in_array($key, array('expires', 'path', 'domain', 'secure'))) {
$tmp_cookie[$key] = $val;
}
}
//
// set cookie
//
// check domain
if (isset($tmp_cookie['domain']) && ($tmp_cookie['domain'] != $request_host) &&
($tmp_cookie['domain'] != ".$request_host")) {
$domain = $tmp_cookie['domain'];
if ((strpos($domain, '.') === false) && ($domain != 'local')) {
$this->debug(' - domain "'.$domain.'" has no dot and is not a local domain');
continue;
}
if (preg_match('/\.[0-9]+$/', $domain)) {
$this->debug(' - domain "'.$domain.'" appears to be an ip address');
continue;
}
if (substr($domain, 0, 1) != '.') $domain = ".$domain";
if (!$this->_domain_match($request_host, $domain)) {
$this->debug(' - request host "'.$request_host.'" does not domain-match "'.$domain.'"');
continue;
}
} else {
// if domain is not specified in the set-cookie header, domain will default to
// the request host
$domain = $request_host;
}
// check path
if (isset($tmp_cookie['path']) && ($tmp_cookie['path'] != '')) {
$path = urldecode($tmp_cookie['path']);
if (!$this->_path_match($request_path, $path)) {
$this->debug(' - request path "'.$request_path.'" does not path-match "'.$path.'"');
continue;
}
} else {
$path = $request_path;
$path = substr($path, 0, strrpos($path, '/'));
if ($path == '') $path = '/';
}
// check if secure
$secure = (isset($tmp_cookie['secure'])) ? true : false;
// check expiry
if (isset($tmp_cookie['expires'])) {
if (($expires = strtotime($tmp_cookie['expires'])) < 0) {
$expires = null;
}
} else {
$expires = null;
}
// set cookie
$this->set_cookie($domain, $path, $tmp_cookie['name'], $tmp_cookie['value'], $secure, $expires);
}
}
// return array of set-cookie values extracted from HTTP response headers (string $h)
public function extractCookies($h) {
$x = 0;
$lines = 0;
$headers = array();
$last_match = false;
$h = explode("\n", $h);
foreach ($h as $line) {
$line = rtrim($line);
$lines++;
$trimmed_line = trim($line);
if (isset($line_last)) {
// check if we have \r\n\r\n (indicating the end of headers)
// some servers will not use CRLF (\r\n), so we make CR (\r) optional.
// if (preg_match('/\015?\012\015?\012/', $line_last.$line)) {
// break;
// }
// As an alternative, we can check if the current trimmed line is empty
if ($trimmed_line == '') {
break;
}
// check for continuation line...
// RFC 2616 Section 2.2 "Basic Rules":
// HTTP/1.1 header field values can be folded onto multiple lines if the
// continuation line begins with a space or horizontal tab. All linear
// white space, including folding, has the same semantics as SP. A
// recipient MAY replace any linear white space with a single SP before
// interpreting the field value or forwarding the message downstream.
if ($last_match && preg_match('/^\s+(.*)/', $line, $match)) {
// append to previous header value
$headers[$x-1] .= ' '.rtrim($match[1]);
continue;
}
}
$line_last = $line;
// split header name and value
if (preg_match('/^Set-Cookie\s*:\s*(.*)/i', $line, $match)) {
$headers[$x++] = rtrim($match[1]);
$last_match = true;
} else {
$last_match = false;
}
}
return $headers;
}
/**
* Set Cookie
* @param string $domain
* @param string $path
* @param string $name cookie name
* @param string $value cookie value
* @param bool $secure
* @param int $expires expiry time (null if session cookie, <= 0 will delete cookie)
* @return void
*/
function set_cookie($domain, $path, $name, $value, $secure=false, $expires=null)
{
if ($domain == '') return;
if ($path == '') return;
if ($name == '') return;
// check if cookie needs to go
if (isset($expires) && ($expires <= 0)) {
if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
return;
}
if ($value == '') return;
$this->cookies[$domain][$path][$name] = array($value, $secure, $expires);
return;
}
/**
* Clear cookies - [domain [,path [,name]]] - call method with no arguments to clear all cookies.
* @param string $domain
* @param string $path
* @param string $name
* @return void
*/
function clear($domain=null, $path=null, $name=null)
{
if (!isset($domain)) {
$this->cookies = array();
} elseif (!isset($path)) {
if (isset($this->cookies[$domain])) unset($this->cookies[$domain]);
} elseif (!isset($name)) {
if (isset($this->cookies[$domain][$path])) unset($this->cookies[$domain][$path]);
} elseif (isset($name)) {
if (isset($this->cookies[$domain][$path][$name])) unset($this->cookies[$domain][$path][$name]);
}
}
/**
* Compare string length - used for sorting
* @access private
* @return int
*/
function _cmp_length($a, $b)
{
$la = strlen($a); $lb = strlen($b);
if ($la == $lb) return 0;
return ($la > $lb) ? -1 : 1;
}
/**
* Reduce domain
* @param string $domain
* @return string
* @access private
*/
function _reduce_domain($domain)
{
if ($domain == '') return '';
if (substr($domain, 0, 1) == '.') return substr($domain, 1);
return substr($domain, strpos($domain, '.'));
}
/**
* Path match - check if path1 path-matches path2
*
* From RFC 2965:
* <i>For two strings that represent paths, P1 and P2, P1 path-matches P2
* if P2 is a prefix of P1 (including the case where P1 and P2 string-
* compare equal). Thus, the string /tec/waldo path-matches /tec.</i>
* @param string $path1
* @param string $path2
* @return bool
* @access private
*/
function _path_match($path1, $path2)
{
return (substr($path1, 0, strlen($path2)) == $path2);
}
/**
* Domain match - check if domain1 domain-matches domain2
*
* A few extracts from RFC 2965:
* - A Set-Cookie2 from request-host y.x.foo.com for Domain=.foo.com
* would be rejected, because H is y.x and contains a dot.
*
* - A Set-Cookie2 from request-host x.foo.com for Domain=.foo.com
* would be accepted.
*
* - A Set-Cookie2 with Domain=.com or Domain=.com., will always be
* rejected, because there is no embedded dot.
*
* - A Set-Cookie2 from request-host example for Domain=.local will
* be accepted, because the effective host name for the request-
* host is example.local, and example.local domain-matches .local.
*
* I'm ignoring the first point for now (must check to see how other browsers handle
* this rule for Set-Cookie headers)
*
* @param string $domain1
* @param string $domain2
* @return bool
* @access private
*/
function _domain_match($domain1, $domain2)
{
$domain1 = strtolower($domain1);
$domain2 = strtolower($domain2);
while (strpos($domain1, '.') !== false) {
if ($domain1 == $domain2) return true;
$domain1 = $this->_reduce_domain($domain1);
continue;
}
return false;
}
}
?>

View File

@ -0,0 +1,720 @@
<?php
/**
* Humble HTTP Agent
*
* This class is designed to take advantage of parallel HTTP requests
* offered by PHP's PECL HTTP extension or the curl_multi_* functions.
* For environments which do not have these options, it reverts to standard sequential
* requests (using file_get_contents())
*
* @version 1.0
* @date 2012-02-09
* @see http://php.net/HttpRequestPool
* @author Keyvan Minoukadeh
* @copyright 2011-2012 Keyvan Minoukadeh
* @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
*/
class HumbleHttpAgent
{
const METHOD_REQUEST_POOL = 1;
const METHOD_CURL_MULTI = 2;
const METHOD_FILE_GET_CONTENTS = 4;
//const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1';
const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.92 Safari/535.2';
const UA_PHP = 'PHP/5.2';
const REF_GOOGLE = 'http://www.google.co.uk/url?sa=t&source=web&cd=1';
protected $requests = array();
protected $redirectQueue = array();
protected $requestOptions;
protected $maxParallelRequests = 5;
protected $cache = null; //TODO
protected $httpContext;
protected $minimiseMemoryUse = false; //TODO
protected $debug = false;
protected $method;
protected $cookieJar;
public $rewriteHashbangFragment = true; // see http://code.google.com/web/ajaxcrawling/docs/specification.html
public $maxRedirects = 5;
public $userAgentMap = array();
public $rewriteUrls = array();
public $userAgentDefault;
public $referer;
//public $userAgent = 'Mozilla/5.0';
// Prevent certain file/mime types
// HTTP responses which match these content types will
// be returned without body.
public $headerOnlyTypes = array();
// URLs ending with one of these extensions will
// prompt Humble HTTP Agent to send a HEAD request first
// to see if returned content type matches $headerOnlyTypes.
public $headerOnlyClues = array('pdf','mp3','zip','exe','gif','gzip','gz','jpeg','jpg','mpg','mpeg','png','ppt','mov');
//TODO: set max file size
//TODO: normalise headers
function __construct($requestOptions=null, $method=null) {
$this->userAgentDefault = self::UA_BROWSER;
$this->referer = self::REF_GOOGLE;
// set the request method
if (in_array($method, array(1,2,4))) {
$this->method = $method;
} else {
if (class_exists('HttpRequestPool')) {
$this->method = self::METHOD_REQUEST_POOL;
} elseif (function_exists('curl_multi_init')) {
$this->method = self::METHOD_CURL_MULTI;
} else {
$this->method = self::METHOD_FILE_GET_CONTENTS;
}
}
if ($this->method == self::METHOD_CURL_MULTI) {
require_once(dirname(__FILE__).'/RollingCurl.php');
}
// create cookie jar
$this->cookieJar = new CookieJar();
// set request options (redirect must be 0)
$this->requestOptions = array(
'timeout' => 15,
'redirect' => 0 // we handle redirects manually so we can rewrite the new hashbang URLs that are creeping up over the web
// TODO: test onprogress?
);
if (is_array($requestOptions)) {
$this->requestOptions = array_merge($this->requestOptions, $requestOptions);
}
$this->httpContext = array(
'http' => array(
'ignore_errors' => true,
'timeout' => $this->requestOptions['timeout'],
'max_redirects' => $this->requestOptions['redirect'],
'header' => "Accept: */*\r\n"
)
);
}
protected function debug($msg) {
if ($this->debug) {
$mem = round(memory_get_usage()/1024, 2);
$memPeak = round(memory_get_peak_usage()/1024, 2);
echo '* ',$msg;
echo ' - mem used: ',$mem," (peak: $memPeak)\n";
ob_flush();
flush();
}
}
protected function getUserAgent($url, $asArray=false) {
$host = @parse_url($url, PHP_URL_HOST);
if (strtolower(substr($host, 0, 4)) == 'www.') {
$host = substr($host, 4);
}
if ($host) {
$try = array($host);
$split = explode('.', $host);
if (count($split) > 1) {
array_shift($split);
$try[] = '.'.implode('.', $split);
}
foreach ($try as $h) {
if (isset($this->userAgentMap[$h])) {
$ua = $this->userAgentMap[$h];
break;
}
}
}
if (!isset($ua)) $ua = $this->userAgentDefault;
if ($asArray) {
return array('User-Agent' => $ua);
} else {
return 'User-Agent: '.$ua;
}
}
public function rewriteHashbangFragment($url) {
// return $url if there's no '#!'
if (strpos($url, '#!') === false) return $url;
// split $url and rewrite
// TODO: is SimplePie_IRI included?
$iri = new SimplePie_IRI($url);
$fragment = substr($iri->fragment, 1); // strip '!'
$iri->fragment = null;
if (isset($iri->query)) {
parse_str($iri->query, $query);
} else {
$query = array();
}
$query['_escaped_fragment_'] = (string)$fragment;
$iri->query = str_replace('%2F', '/', http_build_query($query)); // needed for some sites
return $iri->get_iri();
}
public function removeFragment($url) {
$pos = strpos($url, '#');
if ($pos === false) {
return $url;
} else {
return substr($url, 0, $pos);
}
}
public function rewriteUrls($url) {
foreach ($this->rewriteUrls as $find => $action) {
if (strpos($url, $find) !== false) {
if (is_array($action)) {
return strtr($url, $action);
}
}
}
return $url;
}
public function enableDebug($bool=true) {
$this->debug = (bool)$bool;
}
public function minimiseMemoryUse($bool = true) {
$this->minimiseMemoryUse = $bool;
}
public function setMaxParallelRequests($max) {
$this->maxParallelRequests = $max;
}
public function validateUrl($url) {
$url = filter_var($url, FILTER_SANITIZE_URL);
$test = filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
// deal with bug http://bugs.php.net/51192 (present in PHP 5.2.13 and PHP 5.3.2)
if ($test === false) {
$test = filter_var(strtr($url, '-', '_'), FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
}
if ($test !== false && $test !== null && preg_match('!^https?://!', $url)) {
return $url;
} else {
return false;
}
}
public function fetchAll(array $urls) {
$this->fetchAllOnce($urls, $isRedirect=false);
$redirects = 0;
while (!empty($this->redirectQueue) && ++$redirects <= $this->maxRedirects) {
$this->debug("Following redirects #$redirects...");
$this->fetchAllOnce($this->redirectQueue, $isRedirect=true);
}
}
// fetch all URLs without following redirects
public function fetchAllOnce(array $urls, $isRedirect=false) {
if (!$isRedirect) $urls = array_unique($urls);
if (empty($urls)) return;
//////////////////////////////////////////////////////
// parallel (HttpRequestPool)
if ($this->method == self::METHOD_REQUEST_POOL) {
$this->debug('Starting parallel fetch (HttpRequestPool)');
try {
while (count($urls) > 0) {
$this->debug('Processing set of '.min($this->maxParallelRequests, count($urls)));
$subset = array_splice($urls, 0, $this->maxParallelRequests);
$pool = new HttpRequestPool();
foreach ($subset as $orig => $url) {
if (!$isRedirect) $orig = $url;
unset($this->redirectQueue[$orig]);
$this->debug("...$url");
if (!$isRedirect && isset($this->requests[$url])) {
$this->debug("......in memory");
/*
} elseif ($this->isCached($url)) {
$this->debug("......is cached");
if (!$this->minimiseMemoryUse) {
$this->requests[$url] = $this->getCached($url);
}
*/
} else {
$this->debug("......adding to pool");
$req_url = $this->rewriteUrls($url);
$req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
$req_url = $this->removeFragment($req_url);
if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) {
$_meth = HttpRequest::METH_HEAD;
} else {
$_meth = HttpRequest::METH_GET;
unset($this->requests[$orig]['wrongGuess']);
}
$httpRequest = new HttpRequest($req_url, $_meth, $this->requestOptions);
// send cookies, if we have any
if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
$this->debug("......sending cookies: $cookies");
$httpRequest->addHeaders(array('Cookie' => $cookies));
}
//$httpRequest->addHeaders(array('User-Agent' => $this->userAgent));
$httpRequest->addHeaders($this->getUserAgent($req_url, true));
// add referer for picky sites
$httpRequest->addheaders(array('Referer' => $this->referer));
$this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest);
$this->requests[$orig]['original_url'] = $orig;
$pool->attach($httpRequest);
}
}
// did we get anything into the pool?
if (count($pool) > 0) {
$this->debug('Sending request...');
try {
$pool->send();
} catch (HttpRequestPoolException $e) {
// do nothing
}
$this->debug('Received responses');
foreach($subset as $orig => $url) {
if (!$isRedirect) $orig = $url;
$request = $this->requests[$orig]['httpRequest'];
//$this->requests[$orig]['headers'] = $this->headersToString($request->getResponseHeader());
// getResponseHeader() doesn't return status line, so, for consistency...
$this->requests[$orig]['headers'] = substr($request->getRawResponseMessage(), 0, $request->getResponseInfo('header_size'));
// check content type
// TODO: use getResponseHeader('content-type') or getResponseInfo()
if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
$this->requests[$orig]['body'] = '';
$_header_only_type = true;
$this->debug('Header only type returned');
} else {
$this->requests[$orig]['body'] = $request->getResponseBody();
$_header_only_type = false;
}
$this->requests[$orig]['effective_url'] = $request->getResponseInfo('effective_url');
$this->requests[$orig]['status_code'] = $status_code = $request->getResponseCode();
// is redirect?
if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && $request->getResponseHeader('location')) {
$redirectURL = $request->getResponseHeader('location');
if (!preg_match('!^https?://!i', $redirectURL)) {
$redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
}
if ($this->validateURL($redirectURL)) {
$this->debug('Redirect detected. Valid URL: '.$redirectURL);
// store any cookies
$cookies = $request->getResponseHeader('set-cookie');
if ($cookies && !is_array($cookies)) $cookies = array($cookies);
if ($cookies) $this->cookieJar->storeCookies($url, $cookies);
$this->redirectQueue[$orig] = $redirectURL;
} else {
$this->debug('Redirect detected. Invalid URL: '.$redirectURL);
}
} elseif (!$_header_only_type && $request->getMethod() === HttpRequest::METH_HEAD) {
// the response content-type did not match our 'header only' types,
// but we'd issues a HEAD request because we assumed it would. So
// let's queue a proper GET request for this item...
$this->debug('Wrong guess at content-type, queing GET request');
$this->requests[$orig]['wrongGuess'] = true;
$this->redirectQueue[$orig] = $this->requests[$orig]['effective_url'];
}
//die($url.' -multi- '.$request->getResponseInfo('effective_url'));
$pool->detach($request);
unset($this->requests[$orig]['httpRequest'], $request);
/*
if ($this->minimiseMemoryUse) {
if ($this->cache($url)) {
unset($this->requests[$url]);
}
}
*/
}
}
}
} catch (HttpException $e) {
$this->debug($e);
return false;
}
}
//////////////////////////////////////////////////////////
// parallel (curl_multi_*)
elseif ($this->method == self::METHOD_CURL_MULTI) {
$this->debug('Starting parallel fetch (curl_multi_*)');
while (count($urls) > 0) {
$this->debug('Processing set of '.min($this->maxParallelRequests, count($urls)));
$subset = array_splice($urls, 0, $this->maxParallelRequests);
$pool = new RollingCurl(array($this, 'handleCurlResponse'));
$pool->window_size = count($subset);
foreach ($subset as $orig => $url) {
if (!$isRedirect) $orig = $url;
unset($this->redirectQueue[$orig]);
$this->debug("...$url");
if (!$isRedirect && isset($this->requests[$url])) {
$this->debug("......in memory");
/*
} elseif ($this->isCached($url)) {
$this->debug("......is cached");
if (!$this->minimiseMemoryUse) {
$this->requests[$url] = $this->getCached($url);
}
*/
} else {
$this->debug("......adding to pool");
$req_url = $this->rewriteUrls($url);
$req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
$req_url = $this->removeFragment($req_url);
if (!empty($this->headerOnlyTypes) && !isset($this->requests[$orig]['wrongGuess']) && $this->possibleUnsupportedType($req_url)) {
$_meth = 'HEAD';
} else {
$_meth = 'GET';
unset($this->requests[$orig]['wrongGuess']);
}
$headers = array();
//$headers[] = 'User-Agent: '.$this->userAgent;
$headers[] = $this->getUserAgent($req_url);
// add referer for picky sites
$headers[] = 'Referer: '.$this->referer;
// send cookies, if we have any
if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
$this->debug("......sending cookies: $cookies");
$headers[] = 'Cookie: '.$cookies;
}
$httpRequest = new RollingCurlRequest($req_url, $_meth, null, $headers, array(
CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'],
CURLOPT_TIMEOUT => $this->requestOptions['timeout']
));
$httpRequest->set_original_url($orig);
$this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest);
$this->requests[$orig]['original_url'] = $orig; // TODO: is this needed anymore?
$pool->add($httpRequest);
}
}
// did we get anything into the pool?
if (count($pool) > 0) {
$this->debug('Sending request...');
$pool->execute(); // this will call handleCurlResponse() and populate $this->requests[$orig]
$this->debug('Received responses');
foreach($subset as $orig => $url) {
if (!$isRedirect) $orig = $url;
// $this->requests[$orig]['headers']
// $this->requests[$orig]['body']
// $this->requests[$orig]['effective_url']
// check content type
if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
$this->requests[$orig]['body'] = '';
$_header_only_type = true;
$this->debug('Header only type returned');
} else {
$_header_only_type = false;
}
$status_code = $this->requests[$orig]['status_code'];
if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {
$redirectURL = $this->requests[$orig]['location'];
if (!preg_match('!^https?://!i', $redirectURL)) {
$redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
}
if ($this->validateURL($redirectURL)) {
$this->debug('Redirect detected. Valid URL: '.$redirectURL);
// store any cookies
$cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies);
$this->redirectQueue[$orig] = $redirectURL;
} else {
$this->debug('Redirect detected. Invalid URL: '.$redirectURL);
}
} elseif (!$_header_only_type && $this->requests[$orig]['method'] == 'HEAD') {
// the response content-type did not match our 'header only' types,
// but we'd issues a HEAD request because we assumed it would. So
// let's queue a proper GET request for this item...
$this->debug('Wrong guess at content-type, queing GET request');
$this->requests[$orig]['wrongGuess'] = true;
$this->redirectQueue[$orig] = $this->requests[$orig]['effective_url'];
}
// die($url.' -multi- '.$request->getResponseInfo('effective_url'));
unset($this->requests[$orig]['httpRequest'], $this->requests[$orig]['method']);
}
}
}
}
//////////////////////////////////////////////////////
// sequential (file_get_contents)
else {
$this->debug('Starting sequential fetch (file_get_contents)');
$this->debug('Processing set of '.count($urls));
foreach ($urls as $orig => $url) {
if (!$isRedirect) $orig = $url;
unset($this->redirectQueue[$orig]);
$this->debug("...$url");
if (!$isRedirect && isset($this->requests[$url])) {
$this->debug("......in memory");
/*
} elseif ($this->isCached($url)) {
$this->debug("......is cached");
if (!$this->minimiseMemoryUse) {
$this->requests[$url] = $this->getCached($url);
}
*/
} else {
$this->debug("Sending request for $url");
$this->requests[$orig]['original_url'] = $orig;
$req_url = $this->rewriteUrls($url);
$req_url = ($this->rewriteHashbangFragment) ? $this->rewriteHashbangFragment($req_url) : $req_url;
$req_url = $this->removeFragment($req_url);
// send cookies, if we have any
$httpContext = $this->httpContext;
$httpContext['http']['header'] .= $this->getUserAgent($req_url)."\r\n";
// add referer for picky sites
$httpContext['http']['header'] .= 'Referer: '.$this->referer."\r\n";
if ($cookies = $this->cookieJar->getMatchingCookies($req_url)) {
$this->debug("......sending cookies: $cookies");
$httpContext['http']['header'] .= 'Cookie: '.$cookies."\r\n";
}
if (false !== ($html = @file_get_contents($req_url, false, stream_context_create($httpContext)))) {
$this->debug('Received response');
// get status code
if (!isset($http_response_header[0]) || !preg_match('!^HTTP/\d+\.\d+\s+(\d+)!', trim($http_response_header[0]), $match)) {
$this->debug('Error: no status code found');
// TODO: handle error - no status code
} else {
$this->requests[$orig]['headers'] = $this->headersToString($http_response_header, false);
// check content type
if ($this->headerOnlyType($this->requests[$orig]['headers'])) {
$this->requests[$orig]['body'] = '';
} else {
$this->requests[$orig]['body'] = $html;
}
$this->requests[$orig]['effective_url'] = $req_url;
$this->requests[$orig]['status_code'] = $status_code = (int)$match[1];
unset($match);
// handle redirect
if (preg_match('/^Location:(.*?)$/m', $this->requests[$orig]['headers'], $match)) {
$this->requests[$orig]['location'] = trim($match[1]);
}
if ((in_array($status_code, array(300, 301, 302, 303, 307)) || $status_code > 307 && $status_code < 400) && isset($this->requests[$orig]['location'])) {
$redirectURL = $this->requests[$orig]['location'];
if (!preg_match('!^https?://!i', $redirectURL)) {
$redirectURL = SimplePie_Misc::absolutize_url($redirectURL, $url);
}
if ($this->validateURL($redirectURL)) {
$this->debug('Redirect detected. Valid URL: '.$redirectURL);
// store any cookies
$cookies = $this->cookieJar->extractCookies($this->requests[$orig]['headers']);
if (!empty($cookies)) $this->cookieJar->storeCookies($url, $cookies);
$this->redirectQueue[$orig] = $redirectURL;
} else {
$this->debug('Redirect detected. Invalid URL: '.$redirectURL);
}
}
}
} else {
$this->debug('Error retrieving URL');
//print_r($req_url);
//print_r($http_response_header);
//print_r($html);
// TODO: handle error - failed to retrieve URL
}
}
}
}
}
public function handleCurlResponse($response, $info, $request) {
$orig = $request->url_original;
$this->requests[$orig]['headers'] = substr($response, 0, $info['header_size']);
$this->requests[$orig]['body'] = substr($response, $info['header_size']);
$this->requests[$orig]['method'] = $request->method;
$this->requests[$orig]['effective_url'] = $info['url'];
$this->requests[$orig]['status_code'] = (int)$info['http_code'];
if (preg_match('/^Location:(.*?)$/m', $this->requests[$orig]['headers'], $match)) {
$this->requests[$orig]['location'] = trim($match[1]);
}
}
protected function headersToString(array $headers, $associative=true) {
if (!$associative) {
return implode("\n", $headers);
} else {
$str = '';
foreach ($headers as $key => $val) {
if (is_array($val)) {
foreach ($val as $v) $str .= "$key: $v\n";
} else {
$str .= "$key: $val\n";
}
}
return rtrim($str);
}
}
public function get($url, $remove=false, $gzdecode=true) {
$url = "$url";
if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) {
$this->debug("URL already fetched - in memory ($url, effective: {$this->requests[$url]['effective_url']})");
$response = $this->requests[$url];
/*
} elseif ($this->isCached($url)) {
$this->debug("URL already fetched - in disk cache ($url)");
$response = $this->getCached($url);
$this->requests[$url] = $response;
*/
} else {
$this->debug("Fetching URL ($url)");
$this->fetchAll(array($url));
if (isset($this->requests[$url]) && isset($this->requests[$url]['body'])) {
$response = $this->requests[$url];
} else {
$this->debug("Request failed");
$response = false;
}
}
/*
if ($this->minimiseMemoryUse && $response) {
$this->cache($url);
unset($this->requests[$url]);
}
*/
if ($remove && $response) unset($this->requests[$url]);
if ($gzdecode && stripos($response['headers'], 'Content-Encoding: gzip')) {
if ($html = gzdecode($response['body'])) {
$response['body'] = $html;
}
}
return $response;
}
public function parallelSupport() {
return class_exists('HttpRequestPool') || function_exists('curl_multi_init');
}
private function headerOnlyType($headers) {
if (preg_match('!^Content-Type:\s*(([a-z-]+)/([^;\r\n ]+))!im', $headers, $match)) {
// look for full mime type (e.g. image/jpeg) or just type (e.g. image)
$match[1] = strtolower(trim($match[1]));
$match[2] = strtolower(trim($match[2]));
foreach (array($match[1], $match[2]) as $mime) {
if (in_array($mime, $this->headerOnlyTypes)) return true;
}
}
return false;
}
private function possibleUnsupportedType($url) {
$path = @parse_url($url, PHP_URL_PATH);
if ($path && strpos($path, '.') !== false) {
$ext = strtolower(trim(pathinfo($path, PATHINFO_EXTENSION)));
return in_array($ext, $this->headerOnlyClues);
}
return false;
}
}
// gzdecode from http://www.php.net/manual/en/function.gzdecode.php#82930
if (!function_exists('gzdecode')) {
function gzdecode($data,&$filename='',&$error='',$maxlength=null)
{
$len = strlen($data);
if ($len < 18 || strcmp(substr($data,0,2),"\x1f\x8b")) {
$error = "Not in GZIP format.";
return null; // Not GZIP format (See RFC 1952)
}
$method = ord(substr($data,2,1)); // Compression method
$flags = ord(substr($data,3,1)); // Flags
if ($flags & 31 != $flags) {
$error = "Reserved bits not allowed.";
return null;
}
// NOTE: $mtime may be negative (PHP integer limitations)
$mtime = unpack("V", substr($data,4,4));
$mtime = $mtime[1];
$xfl = substr($data,8,1);
$os = substr($data,8,1);
$headerlen = 10;
$extralen = 0;
$extra = "";
if ($flags & 4) {
// 2-byte length prefixed EXTRA data in header
if ($len - $headerlen - 2 < 8) {
return false; // invalid
}
$extralen = unpack("v",substr($data,8,2));
$extralen = $extralen[1];
if ($len - $headerlen - 2 - $extralen < 8) {
return false; // invalid
}
$extra = substr($data,10,$extralen);
$headerlen += 2 + $extralen;
}
$filenamelen = 0;
$filename = "";
if ($flags & 8) {
// C-style string
if ($len - $headerlen - 1 < 8) {
return false; // invalid
}
$filenamelen = strpos(substr($data,$headerlen),chr(0));
if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 < 8) {
return false; // invalid
}
$filename = substr($data,$headerlen,$filenamelen);
$headerlen += $filenamelen + 1;
}
$commentlen = 0;
$comment = "";
if ($flags & 16) {
// C-style string COMMENT data in header
if ($len - $headerlen - 1 < 8) {
return false; // invalid
}
$commentlen = strpos(substr($data,$headerlen),chr(0));
if ($commentlen === false || $len - $headerlen - $commentlen - 1 < 8) {
return false; // Invalid header format
}
$comment = substr($data,$headerlen,$commentlen);
$headerlen += $commentlen + 1;
}
$headercrc = "";
if ($flags & 2) {
// 2-bytes (lowest order) of CRC32 on header present
if ($len - $headerlen - 2 < 8) {
return false; // invalid
}
$calccrc = crc32(substr($data,0,$headerlen)) & 0xffff;
$headercrc = unpack("v", substr($data,$headerlen,2));
$headercrc = $headercrc[1];
if ($headercrc != $calccrc) {
$error = "Header checksum failed.";
return false; // Bad header CRC
}
$headerlen += 2;
}
// GZIP FOOTER
$datacrc = unpack("V",substr($data,-8,4));
$datacrc = sprintf('%u',$datacrc[1] & 0xFFFFFFFF);
$isize = unpack("V",substr($data,-4));
$isize = $isize[1];
// decompression:
$bodylen = $len-$headerlen-8;
if ($bodylen < 1) {
// IMPLEMENTATION BUG!
return null;
}
$body = substr($data,$headerlen,$bodylen);
$data = "";
if ($bodylen > 0) {
switch ($method) {
case 8:
// Currently the only supported compression method:
$data = gzinflate($body,$maxlength);
break;
default:
$error = "Unknown compression method.";
return false;
}
} // zero-byte body content is allowed
// Verifiy CRC32
$crc = sprintf("%u",crc32($data));
$crcOK = $crc == $datacrc;
$lenOK = $isize == strlen($data);
if (!$lenOK || !$crcOK) {
$error = ( $lenOK ? '' : 'Length check FAILED. ') . ( $crcOK ? '' : 'Checksum FAILED.');
return false;
}
return $data;
}
}
?>

View File

@ -0,0 +1,402 @@
<?php
/*
Authored by Josh Fraser (www.joshfraser.com)
Released under Apache License 2.0
Maintained by Alexander Makarov, http://rmcreative.ru/
Modified by Keyvan Minoukadeh for the Five Filters project: http://fivefilters.org
*/
/**
* Class that represent a single curl request
*/
class RollingCurlRequest {
public $url = false;
public $url_original = false; // used for tracking redirects
public $method = 'GET';
public $post_data = null;
public $headers = null;
public $options = null;
/**
* @param string $url
* @param string $method
* @param $post_data
* @param $headers
* @param $options
* @return void
*/
function __construct($url, $method = "GET", $post_data = null, $headers = null, $options = null) {
$this->url = $url;
$this->url_original = $url;
$this->method = $method;
$this->post_data = $post_data;
$this->headers = $headers;
$this->options = $options;
}
/**
* @param string $url
* @return void
*/
public function set_original_url($url) {
$this->url_original = $url;
}
/**
* @return void
*/
public function __destruct() {
unset($this->url, $this->url_original, $this->method, $this->post_data, $this->headers, $this->options);
}
}
/**
* RollingCurl custom exception
*/
class RollingCurlException extends Exception {
}
/**
* Class that holds a rolling queue of curl requests.
*
* @throws RollingCurlException
*/
class RollingCurl implements Countable {
/**
* @var int
*
* Window size is the max number of simultaneous connections allowed.
*
* REMEMBER TO RESPECT THE SERVERS:
* Sending too many requests at one time can easily be perceived
* as a DOS attack. Increase this window_size if you are making requests
* to multiple servers or have permission from the receving server admins.
*/
private $window_size = 5;
/**
* @var float
*
* Timeout is the timeout used for curl_multi_select.
*/
private $timeout = 10;
/**
* @var string|array
*
* Callback function to be applied to each result.
*/
private $callback;
/**
* @var array
*
* Set your base options that you want to be used with EVERY request.
*/
protected $options = array(
CURLOPT_SSL_VERIFYPEER => 0,
CURLOPT_RETURNTRANSFER => 1,
CURLOPT_CONNECTTIMEOUT => 30,
CURLOPT_TIMEOUT => 30
);
/**
* @var array
*/
private $headers = array();
/**
* @var Request[]
*
* The request queue
*/
private $requests = array();
/**
* @var RequestMap[]
*
* Maps handles to request indexes
*/
private $requestMap = array();
/**
* @param $callback
* Callback function to be applied to each result.
*
* Can be specified as 'my_callback_function'
* or array($object, 'my_callback_method').
*
* Function should take three parameters: $response, $info, $request.
* $response is response body, $info is additional curl info.
* $request is the original request
*
* @return void
*/
function __construct($callback = null) {
$this->callback = $callback;
}
/**
* @param string $name
* @return mixed
*/
public function __get($name) {
return (isset($this->{$name})) ? $this->{$name} : null;
}
/**
* @param string $name
* @param mixed $value
* @return bool
*/
public function __set($name, $value) {
// append the base options & headers
if ($name == "options" || $name == "headers") {
$this->{$name} = $value + $this->{$name};
} else {
$this->{$name} = $value;
}
return true;
}
/**
* Count number of requests added (Countable interface)
*
* @return int
*/
public function count() {
return count($this->requests);
}
/**
* Add a request to the request queue
*
* @param Request $request
* @return bool
*/
public function add($request) {
$this->requests[] = $request;
return true;
}
/**
* Create new Request and add it to the request queue
*
* @param string $url
* @param string $method
* @param $post_data
* @param $headers
* @param $options
* @return bool
*/
public function request($url, $method = "GET", $post_data = null, $headers = null, $options = null) {
$this->requests[] = new RollingCurlRequest($url, $method, $post_data, $headers, $options);
return true;
}
/**
* Perform GET request
*
* @param string $url
* @param $headers
* @param $options
* @return bool
*/
public function get($url, $headers = null, $options = null) {
return $this->request($url, "GET", null, $headers, $options);
}
/**
* Perform POST request
*
* @param string $url
* @param $post_data
* @param $headers
* @param $options
* @return bool
*/
public function post($url, $post_data = null, $headers = null, $options = null) {
return $this->request($url, "POST", $post_data, $headers, $options);
}
/**
* Execute processing
*
* @param int $window_size Max number of simultaneous connections
* @return string|bool
*/
public function execute($window_size = null) {
// rolling curl window must always be greater than 1
if (sizeof($this->requests) == 1) {
return $this->single_curl();
} else {
// start the rolling curl. window_size is the max number of simultaneous connections
return $this->rolling_curl($window_size);
}
}
/**
* Performs a single curl request
*
* @access private
* @return string
*/
private function single_curl() {
$ch = curl_init();
$request = array_shift($this->requests);
$options = $this->get_options($request);
curl_setopt_array($ch, $options);
$output = curl_exec($ch);
$info = curl_getinfo($ch);
// it's not neccesary to set a callback for one-off requests
if ($this->callback) {
$callback = $this->callback;
if (is_callable($this->callback)) {
call_user_func($callback, $output, $info, $request);
}
}
else
return $output;
return true;
}
/**
* Performs multiple curl requests
*
* @access private
* @throws RollingCurlException
* @param int $window_size Max number of simultaneous connections
* @return bool
*/
private function rolling_curl($window_size = null) {
if ($window_size)
$this->window_size = $window_size;
// make sure the rolling window isn't greater than the # of urls
if (sizeof($this->requests) < $this->window_size)
$this->window_size = sizeof($this->requests);
if ($this->window_size < 2) {
throw new RollingCurlException("Window size must be greater than 1");
}
$master = curl_multi_init();
// start the first batch of requests
for ($i = 0; $i < $this->window_size; $i++) {
$ch = curl_init();
$options = $this->get_options($this->requests[$i]);
curl_setopt_array($ch, $options);
curl_multi_add_handle($master, $ch);
// Add to our request Maps
$key = (string) $ch;
$this->requestMap[$key] = $i;
}
do {
while (($execrun = curl_multi_exec($master, $running)) == CURLM_CALL_MULTI_PERFORM) ;
if ($execrun != CURLM_OK)
break;
// a request was just completed -- find out which one
while ($done = curl_multi_info_read($master)) {
// get the info and content returned on the request
$info = curl_getinfo($done['handle']);
$output = curl_multi_getcontent($done['handle']);
// send the return values to the callback function.
$callback = $this->callback;
if (is_callable($callback)) {
$key = (string) $done['handle'];
$request = $this->requests[$this->requestMap[$key]];
unset($this->requestMap[$key]);
call_user_func($callback, $output, $info, $request);
}
// start a new request (it's important to do this before removing the old one)
if ($i < sizeof($this->requests) && isset($this->requests[$i]) && $i < count($this->requests)) {
$ch = curl_init();
$options = $this->get_options($this->requests[$i]);
curl_setopt_array($ch, $options);
curl_multi_add_handle($master, $ch);
// Add to our request Maps
$key = (string) $ch;
$this->requestMap[$key] = $i;
$i++;
}
// remove the curl handle that just completed
curl_multi_remove_handle($master, $done['handle']);
}
// Block for data in / output; error handling is done by curl_multi_exec
//if ($running) curl_multi_select($master, $this->timeout);
// removing timeout as it causes problems on Windows with PHP 5.3.5 and Curl 7.20.0
if ($running) curl_multi_select($master);
} while ($running);
curl_multi_close($master);
return true;
}
/**
* Helper function to set up a new request by setting the appropriate options
*
* @access private
* @param Request $request
* @return array
*/
private function get_options($request) {
// options for this entire curl object
$options = $this->__get('options');
// We're managing reirects in PHP - allows us to intervene and rewrite/block URLs
// before the next request goes out.
$options[CURLOPT_FOLLOWLOCATION] = 0;
$options[CURLOPT_MAXREDIRS] = 0;
//if (ini_get('safe_mode') == 'Off' || !ini_get('safe_mode')) {
// $options[CURLOPT_FOLLOWLOCATION] = 1;
// $options[CURLOPT_MAXREDIRS] = 5;
//}
$headers = $this->__get('headers');
// append custom headers for this specific request
if ($request->headers) {
$headers = $headers + $request->headers;
}
// append custom options for this specific request
if ($request->options) {
$options = $request->options + $options;
}
// set the request URL
$options[CURLOPT_URL] = $request->url;
if ($headers) {
$options[CURLOPT_HTTPHEADER] = $headers;
}
// return response headers
$options[CURLOPT_HEADER] = 1;
// send HEAD request?
if ($request->method == 'HEAD') {
$options[CURLOPT_NOBODY] = 1;
}
return $options;
}
/**
* @return void
*/
public function __destruct() {
unset($this->window_size, $this->callback, $this->options, $this->headers, $this->requests);
}
}

View File

@ -0,0 +1,79 @@
<?php
/**
* Humble HTTP Agent extension for SimplePie_File
*
* This class is designed to extend and override SimplePie_File
* in order to prevent duplicate HTTP requests being sent out.
* The idea is to initialise an instance of Humble HTTP Agent
* and attach it, to a static class variable, of this class.
* SimplePie will then automatically initialise this class
*
* @date 2011-02-28
*/
class SimplePie_HumbleHttpAgent extends SimplePie_File
{
protected static $agent;
var $url;
var $useragent;
var $success = true;
var $headers = array();
var $body;
var $status_code;
var $redirects = 0;
var $error;
var $method = SIMPLEPIE_FILE_SOURCE_NONE;
public static function set_agent(HumbleHttpAgent $agent) {
self::$agent = $agent;
}
public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false) {
if (class_exists('idna_convert'))
{
$idn = new idna_convert();
$parsed = SimplePie_Misc::parse_url($url);
$url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']);
}
$this->url = $url;
$this->useragent = $useragent;
if (preg_match('/^http(s)?:\/\//i', $url))
{
if (!is_array($headers))
{
$headers = array();
}
$this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL;
$headers2 = array();
foreach ($headers as $key => $value) {
$headers2[] = "$key: $value";
}
//TODO: allow for HTTP headers
// curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2);
$response = self::$agent->get($url);
if ($response === false || !isset($response['status_code'])) {
$this->error = 'failed to fetch URL';
$this->success = false;
} else {
// The extra lines at the end are there to satisfy SimplePie's HTTP parser.
// The class expects a full HTTP message, whereas we're giving it only
// headers - the new lines indicate the start of the body.
$parser = new SimplePie_HTTP_Parser($response['headers']."\r\n\r\n");
if ($parser->parse()) {
$this->headers = $parser->headers;
//$this->body = $parser->body;
$this->body = $response['body'];
$this->status_code = $parser->status_code;
}
}
}
else
{
$this->error = 'invalid URL';
$this->success = false;
}
}
}
?>

26
inc/3rdparty/simplepie/LICENSE.txt vendored Normal file
View File

@ -0,0 +1,26 @@
Copyright (c) 2004-2007, Ryan Parman and Geoffrey Sneddon.
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are
permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this list
of conditions and the following disclaimer in the documentation and/or other materials
provided with the distribution.
* Neither the name of the SimplePie Team nor the names of its contributors may be used
to endorse or promote products derived from this software without specific prior
written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

56
inc/3rdparty/simplepie/SimplePie.php vendored Normal file
View File

@ -0,0 +1,56 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* SimplePie class.
*
* Class for backward compatibility.
*
* @package SimplePie
*/
class SimplePie extends SimplePie_Core
{
}

View File

@ -0,0 +1,103 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
class SimplePie_Author
{
var $name;
var $link;
var $email;
// Constructor, used to input the data
public function __construct($name = null, $link = null, $email = null)
{
$this->name = $name;
$this->link = $link;
$this->email = $email;
}
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
public function get_name()
{
if ($this->name !== null)
{
return $this->name;
}
else
{
return null;
}
}
public function get_link()
{
if ($this->link !== null)
{
return $this->link;
}
else
{
return null;
}
}
public function get_email()
{
if ($this->email !== null)
{
return $this->email;
}
else
{
return null;
}
}
}

View File

@ -0,0 +1,109 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
class SimplePie_Cache
{
/**
* Cache handler classes
*
* These receive 3 parameters to their constructor, as documented in
* {@see register()}
* @var array
*/
protected static $handlers = array(
'mysql' => 'SimplePie_Cache_MySQL',
'memcache' => 'SimplePie_Cache_Memcache',
);
/**
* Don't call the constructor. Please.
*/
private function __construct() { }
/**
* Create a new SimplePie_Cache object
*/
public static function create($location, $filename, $extension)
{
$type = explode(':', $location, 2);
$type = $type[0];
if (!empty(self::$handlers[$type]))
{
$class = self::$handlers[$type];
return new $class($location, $filename, $extension);
}
return new SimplePie_Cache_File($location, $filename, $extension);
}
/**
* Register a handler
*
* @param string $type DSN type to register for
* @param string $class Name of handler class. Must implement SimplePie_Cache_Base
*/
public static function register($type, $class)
{
self::$handlers[$type] = $class;
}
/**
* Parse a URL into an array
*
* @param string $url
* @return array
*/
public static function parse_URL($url)
{
$params = parse_url($url);
$params['extras'] = array();
if (isset($params['query']))
{
parse_str($params['query'], $params['extras']);
}
return $params;
}
}

View File

@ -0,0 +1,102 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
interface SimplePie_Cache_Base
{
/**
* Feed cache type
*/
const TYPE_FEED = 'spc';
/**
* Image cache type
*/
const TYPE_IMAGE = 'spi';
/**
* Create a new cache object
*
* @param string $location Location string (from SimplePie::$cache_location)
* @param string $name Unique ID for the cache
* @param string $type Either TYPE_FEED for SimplePie data, or TYPE_IMAGE for image data
*/
public function __construct($location, $name, $type);
/**
* Save data to the cache
*
* @param array|SimplePie $data Data to store in the cache. If passed a SimplePie object, only cache the $data property
*/
public function save($data);
/**
* Retrieve the data saved to the cache
*
* @return array Data for SimplePie::$data
*/
public function load();
/**
* Retrieve the last modified time for the cache
*
* @return int Timestamp
*/
public function mtime();
/**
* Set the last modified time to the current time
*
* @return bool Success status
*/
public function touch();
/**
* Remove the cache
*
* @return bool Success status
*/
public function unlink();
}

View File

@ -0,0 +1,124 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
abstract class SimplePie_Cache_DB implements SimplePie_Cache_Base
{
protected static function prepare_simplepie_object_for_cache(&$data)
{
$items = $data->get_items();
$items_by_id = array();
if (!empty($items))
{
foreach ($items as $item)
{
$items_by_id[$item->get_id()] = $item;
}
if (count($items_by_id) !== count($items))
{
$items_by_id = array();
foreach ($items as $item)
{
$items_by_id[$item->get_id(true)] = $item;
}
}
if (isset($data->data['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['feed'][0]))
{
$channel =& $data->data['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['feed'][0];
}
elseif (isset($data->data['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['feed'][0]))
{
$channel =& $data->data['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['feed'][0];
}
elseif (isset($data->data['child'][SIMPLEPIE_NAMESPACE_RDF]['RDF'][0]))
{
$channel =& $data->data['child'][SIMPLEPIE_NAMESPACE_RDF]['RDF'][0];
}
elseif (isset($data->data['child'][SIMPLEPIE_NAMESPACE_RSS_20]['rss'][0]['child'][SIMPLEPIE_NAMESPACE_RSS_20]['channel'][0]))
{
$channel =& $data->data['child'][SIMPLEPIE_NAMESPACE_RSS_20]['rss'][0]['child'][SIMPLEPIE_NAMESPACE_RSS_20]['channel'][0];
}
else
{
$channel = null;
}
if ($channel !== null)
{
if (isset($channel['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['entry']))
{
unset($channel['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['entry']);
}
if (isset($channel['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['entry']))
{
unset($channel['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['entry']);
}
if (isset($channel['child'][SIMPLEPIE_NAMESPACE_RSS_10]['item']))
{
unset($channel['child'][SIMPLEPIE_NAMESPACE_RSS_10]['item']);
}
if (isset($channel['child'][SIMPLEPIE_NAMESPACE_RSS_090]['item']))
{
unset($channel['child'][SIMPLEPIE_NAMESPACE_RSS_090]['item']);
}
if (isset($channel['child'][SIMPLEPIE_NAMESPACE_RSS_20]['item']))
{
unset($channel['child'][SIMPLEPIE_NAMESPACE_RSS_20]['item']);
}
}
if (isset($data->data['items']))
{
unset($data->data['items']);
}
if (isset($data->data['ordered_items']))
{
unset($data->data['ordered_items']);
}
}
return array(serialize($data->data), $items_by_id);
}
}

View File

@ -0,0 +1,112 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
class SimplePie_Cache_File implements SimplePie_Cache_Base
{
protected $location;
protected $filename;
protected $extension;
protected $name;
public function __construct($location, $filename, $extension)
{
$this->location = $location;
$this->filename = $filename;
$this->extension = $extension;
$this->name = "$this->location/$this->filename.$this->extension";
}
public function save($data)
{
if (file_exists($this->name) && is_writeable($this->name) || file_exists($this->location) && is_writeable($this->location))
{
if (is_a($data, 'SimplePie'))
{
$data = $data->data;
}
$data = serialize($data);
return (bool) file_put_contents($this->name, $data);
}
return false;
}
public function load()
{
if (file_exists($this->name) && is_readable($this->name))
{
return unserialize(file_get_contents($this->name));
}
return false;
}
public function mtime()
{
if (file_exists($this->name))
{
return filemtime($this->name);
}
return false;
}
public function touch()
{
if (file_exists($this->name))
{
return touch($this->name);
}
return false;
}
public function unlink()
{
if (file_exists($this->name))
{
return unlink($this->name);
}
return false;
}
}

View File

@ -0,0 +1,118 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
class SimplePie_Cache_Memcache implements SimplePie_Cache_Base
{
protected $cache;
protected $options;
protected $name;
public function __construct($url, $filename, $extension)
{
$this->options = array(
'host' => '127.0.0.1',
'port' => 11211,
'extras' => array(
'timeout' => 3600, // one hour
'prefix' => 'simplepie_',
),
);
$this->options = array_merge_recursive($this->options, SimplePie_Cache::parse_URL($url));
$this->name = $this->options['extras']['prefix'] . md5("$filename:$extension");
$this->cache = new Memcache();
$this->cache->addServer($this->options['host'], (int) $this->options['port']);
}
public function save($data)
{
if (is_a($data, 'SimplePie'))
{
$data = $data->data;
}
return $this->cache->set($this->name, serialize($data), MEMCACHE_COMPRESSED, (int) $this->options['extras']['timeout']);
}
public function load()
{
$data = $this->cache->get($this->name);
if ($data !== false)
{
return unserialize($data);
}
return false;
}
public function mtime()
{
$data = $this->cache->get($this->name);
if ($data !== false)
{
// essentially ignore the mtime because Memcache expires on it's own
return time();
}
return false;
}
public function touch()
{
$data = $this->cache->get($this->name);
if ($data !== false)
{
return $this->cache->set($this->name, $data, MEMCACHE_COMPRESSED, (int) $this->duration);
}
return false;
}
public function unlink()
{
return $this->cache->delete($this->name);
}
}

View File

@ -0,0 +1,378 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
class SimplePie_Cache_MySQL extends SimplePie_Cache_DB
{
protected $mysql;
protected $options;
protected $id;
public function __construct($url, $name, $extension)
{
$this->options = array(
'user' => null,
'pass' => null,
'host' => '127.0.0.1',
'port' => '3306',
'path' => '',
'extras' => array(
'prefix' => '',
),
);
$this->options = array_merge_recursive($this->options, SimplePie_Cache::parse_URL($url));
// Path is prefixed with a "/"
$this->options['dbname'] = substr($this->options['path'], 1);
try
{
$this->mysql = new PDO("mysql:dbname={$this->options['dbname']};host={$this->options['host']};port={$this->options['port']}", $this->options['user'], $this->options['pass'], array(PDO::MYSQL_ATTR_INIT_COMMAND => 'SET NAMES utf8'));
}
catch (PDOException $e)
{
$this->mysql = null;
return;
}
$this->id = $name . $extension;
if (!$query = $this->mysql->query('SHOW TABLES'))
{
$this->mysql = null;
return;
}
$db = array();
while ($row = $query->fetchColumn())
{
$db[] = $row;
}
if (!in_array($this->options['extras']['prefix'] . 'cache_data', $db))
{
$query = $this->mysql->exec('CREATE TABLE `' . $this->options['extras']['prefix'] . 'cache_data` (`id` TEXT CHARACTER SET utf8 NOT NULL, `items` SMALLINT NOT NULL DEFAULT 0, `data` BLOB NOT NULL, `mtime` INT UNSIGNED NOT NULL, UNIQUE (`id`(125)))');
if ($query === false)
{
$this->mysql = null;
}
}
if (!in_array($this->options['extras']['prefix'] . 'items', $db))
{
$query = $this->mysql->exec('CREATE TABLE `' . $this->options['extras']['prefix'] . 'items` (`feed_id` TEXT CHARACTER SET utf8 NOT NULL, `id` TEXT CHARACTER SET utf8 NOT NULL, `data` TEXT CHARACTER SET utf8 NOT NULL, `posted` INT UNSIGNED NOT NULL, INDEX `feed_id` (`feed_id`(125)))');
if ($query === false)
{
$this->mysql = null;
}
}
}
public function save($data)
{
if ($this->mysql === null)
{
return false;
}
if (is_a($data, 'SimplePie'))
{
$data = clone $data;
$prepared = self::prepare_simplepie_object_for_cache($data);
$query = $this->mysql->prepare('SELECT COUNT(*) FROM `' . $this->options['extras']['prefix'] . 'cache_data` WHERE `id` = :feed');
$query->bindValue(':feed', $this->id);
if ($query->execute())
{
if ($query->fetchColumn() > 0)
{
$items = count($prepared[1]);
if ($items)
{
$sql = 'UPDATE `' . $this->options['extras']['prefix'] . 'cache_data` SET `items` = :items, `data` = :data, `mtime` = :time WHERE `id` = :feed';
$query = $this->mysql->prepare($sql);
$query->bindValue(':items', $items);
}
else
{
$sql = 'UPDATE `' . $this->options['extras']['prefix'] . 'cache_data` SET `data` = :data, `mtime` = :time WHERE `id` = :feed';
$query = $this->mysql->prepare($sql);
}
$query->bindValue(':data', $prepared[0]);
$query->bindValue(':time', time());
$query->bindValue(':feed', $this->id);
if (!$query->execute())
{
return false;
}
}
else
{
$query = $this->mysql->prepare('INSERT INTO `' . $this->options['extras']['prefix'] . 'cache_data` (`id`, `items`, `data`, `mtime`) VALUES(:feed, :count, :data, :time)');
$query->bindValue(':feed', $this->id);
$query->bindValue(':count', count($prepared[1]));
$query->bindValue(':data', $prepared[0]);
$query->bindValue(':time', time());
if (!$query->execute())
{
return false;
}
}
$ids = array_keys($prepared[1]);
if (!empty($ids))
{
foreach ($ids as $id)
{
$database_ids[] = $this->mysql->quote($id);
}
$query = $this->mysql->prepare('SELECT `id` FROM `' . $this->options['extras']['prefix'] . 'items` WHERE `id` = ' . implode(' OR `id` = ', $database_ids) . ' AND `feed_id` = :feed');
$query->bindValue(':feed', $this->id);
if ($query->execute())
{
$existing_ids = array();
while ($row = $query->fetchColumn())
{
$existing_ids[] = $row;
}
$new_ids = array_diff($ids, $existing_ids);
foreach ($new_ids as $new_id)
{
if (!($date = $prepared[1][$new_id]->get_date('U')))
{
$date = time();
}
$query = $this->mysql->prepare('INSERT INTO `' . $this->options['extras']['prefix'] . 'items` (`feed_id`, `id`, `data`, `posted`) VALUES(:feed, :id, :data, :date)');
$query->bindValue(':feed', $this->id);
$query->bindValue(':id', $new_id);
$query->bindValue(':data', serialize($prepared[1][$new_id]->data));
$query->bindValue(':date', $date);
if (!$query->execute())
{
return false;
}
}
return true;
}
}
else
{
return true;
}
}
}
else
{
$query = $this->mysql->prepare('SELECT `id` FROM `' . $this->options['extras']['prefix'] . 'cache_data` WHERE `id` = :feed');
$query->bindValue(':feed', $this->id);
if ($query->execute())
{
if ($query->rowCount() > 0)
{
$query = $this->mysql->prepare('UPDATE `' . $this->options['extras']['prefix'] . 'cache_data` SET `items` = 0, `data` = :data, `mtime` = :time WHERE `id` = :feed');
$query->bindValue(':data', serialize($data));
$query->bindValue(':time', time());
$query->bindValue(':feed', $this->id);
if ($this->execute())
{
return true;
}
}
else
{
$query = $this->mysql->prepare('INSERT INTO `' . $this->options['extras']['prefix'] . 'cache_data` (`id`, `items`, `data`, `mtime`) VALUES(:id, 0, :data, :time)');
$query->bindValue(':id', $this->id);
$query->bindValue(':data', serialize($data));
$query->bindValue(':time', time());
if ($query->execute())
{
return true;
}
}
}
}
return false;
}
public function load()
{
if ($this->mysql === null)
{
return false;
}
$query = $this->mysql->prepare('SELECT `items`, `data` FROM `' . $this->options['extras']['prefix'] . 'cache_data` WHERE `id` = :id');
$query->bindValue(':id', $this->id);
if ($query->execute() && ($row = $query->fetch()))
{
$data = unserialize($row[1]);
if (isset($this->options['items'][0]))
{
$items = (int) $this->options['items'][0];
}
else
{
$items = (int) $row[0];
}
if ($items !== 0)
{
if (isset($data['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['feed'][0]))
{
$feed =& $data['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['feed'][0];
}
elseif (isset($data['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['feed'][0]))
{
$feed =& $data['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['feed'][0];
}
elseif (isset($data['child'][SIMPLEPIE_NAMESPACE_RDF]['RDF'][0]))
{
$feed =& $data['child'][SIMPLEPIE_NAMESPACE_RDF]['RDF'][0];
}
elseif (isset($data['child'][SIMPLEPIE_NAMESPACE_RSS_20]['rss'][0]))
{
$feed =& $data['child'][SIMPLEPIE_NAMESPACE_RSS_20]['rss'][0];
}
else
{
$feed = null;
}
if ($feed !== null)
{
$sql = 'SELECT `data` FROM `' . $this->options['extras']['prefix'] . 'items` WHERE `feed_id` = :feed ORDER BY `posted` DESC';
if ($items > 0)
{
$sql .= ' LIMIT ' . $items;
}
$query = $this->mysql->prepare($sql);
$query->bindValue(':feed', $this->id);
if ($query->execute())
{
while ($row = $query->fetchColumn())
{
$feed['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['entry'][] = unserialize($row);
}
}
else
{
return false;
}
}
}
return $data;
}
return false;
}
public function mtime()
{
if ($this->mysql === null)
{
return false;
}
$query = $this->mysql->prepare('SELECT `mtime` FROM `' . $this->options['extras']['prefix'] . 'cache_data` WHERE `id` = :id');
$query->bindValue(':id', $this->id);
if ($query->execute() && ($time = $query->fetchColumn()))
{
return $time;
}
else
{
return false;
}
}
public function touch()
{
if ($this->mysql === null)
{
return false;
}
$query = $this->mysql->prepare('UPDATE `' . $this->options['extras']['prefix'] . 'cache_data` SET `mtime` = :time WHERE `id` = :id');
$query->bindValue(':time', time());
$query->bindValue(':id', $this->id);
if ($query->execute() && $query->rowCount() > 0)
{
return true;
}
else
{
return false;
}
}
public function unlink()
{
if ($this->mysql === null)
{
return false;
}
$query = $this->mysql->prepare('DELETE FROM `' . $this->options['extras']['prefix'] . 'cache_data` WHERE `id` = :id');
$query->bindValue(':id', $this->id);
$query2 = $this->mysql->prepare('DELETE FROM `' . $this->options['extras']['prefix'] . 'items` WHERE `feed_id` = :id');
$query2->bindValue(':id', $this->id);
if ($query->execute() && $query2->execute())
{
return true;
}
else
{
return false;
}
}
}

View File

@ -0,0 +1,131 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
class SimplePie_Caption
{
var $type;
var $lang;
var $startTime;
var $endTime;
var $text;
// Constructor, used to input the data
public function __construct($type = null, $lang = null, $startTime = null, $endTime = null, $text = null)
{
$this->type = $type;
$this->lang = $lang;
$this->startTime = $startTime;
$this->endTime = $endTime;
$this->text = $text;
}
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
public function get_endtime()
{
if ($this->endTime !== null)
{
return $this->endTime;
}
else
{
return null;
}
}
public function get_language()
{
if ($this->lang !== null)
{
return $this->lang;
}
else
{
return null;
}
}
public function get_starttime()
{
if ($this->startTime !== null)
{
return $this->startTime;
}
else
{
return null;
}
}
public function get_text()
{
if ($this->text !== null)
{
return $this->text;
}
else
{
return null;
}
}
public function get_type()
{
if ($this->type !== null)
{
return $this->type;
}
else
{
return null;
}
}
}

View File

@ -0,0 +1,103 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
class SimplePie_Category
{
var $term;
var $scheme;
var $label;
// Constructor, used to input the data
public function __construct($term = null, $scheme = null, $label = null)
{
$this->term = $term;
$this->scheme = $scheme;
$this->label = $label;
}
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
public function get_term()
{
if ($this->term !== null)
{
return $this->term;
}
else
{
return null;
}
}
public function get_scheme()
{
if ($this->scheme !== null)
{
return $this->scheme;
}
else
{
return null;
}
}
public function get_label()
{
if ($this->label !== null)
{
return $this->label;
}
else
{
return $this->get_term();
}
}
}

View File

@ -0,0 +1,325 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Content-type sniffing
*
* Based on the rules in http://tools.ietf.org/html/draft-abarth-mime-sniff-06
* @package SimplePie
*/
class SimplePie_Content_Type_Sniffer
{
/**
* File object
*
* @var SimplePie_File
*/
var $file;
/**
* Create an instance of the class with the input file
*
* @param SimplePie_Content_Type_Sniffer $file Input file
*/
public function __construct($file)
{
$this->file = $file;
}
/**
* Get the Content-Type of the specified file
*
* @return string Actual Content-Type
*/
public function get_type()
{
if (isset($this->file->headers['content-type']))
{
if (!isset($this->file->headers['content-encoding'])
&& ($this->file->headers['content-type'] === 'text/plain'
|| $this->file->headers['content-type'] === 'text/plain; charset=ISO-8859-1'
|| $this->file->headers['content-type'] === 'text/plain; charset=iso-8859-1'
|| $this->file->headers['content-type'] === 'text/plain; charset=UTF-8'))
{
return $this->text_or_binary();
}
if (($pos = strpos($this->file->headers['content-type'], ';')) !== false)
{
$official = substr($this->file->headers['content-type'], 0, $pos);
}
else
{
$official = $this->file->headers['content-type'];
}
$official = trim(strtolower($official));
if ($official === 'unknown/unknown'
|| $official === 'application/unknown')
{
return $this->unknown();
}
elseif (substr($official, -4) === '+xml'
|| $official === 'text/xml'
|| $official === 'application/xml')
{
return $official;
}
elseif (substr($official, 0, 6) === 'image/')
{
if ($return = $this->image())
{
return $return;
}
else
{
return $official;
}
}
elseif ($official === 'text/html')
{
return $this->feed_or_html();
}
else
{
return $official;
}
}
else
{
return $this->unknown();
}
}
/**
* Sniff text or binary
*
* @return string Actual Content-Type
*/
public function text_or_binary()
{
if (substr($this->file->body, 0, 2) === "\xFE\xFF"
|| substr($this->file->body, 0, 2) === "\xFF\xFE"
|| substr($this->file->body, 0, 4) === "\x00\x00\xFE\xFF"
|| substr($this->file->body, 0, 3) === "\xEF\xBB\xBF")
{
return 'text/plain';
}
elseif (preg_match('/[\x00-\x08\x0E-\x1A\x1C-\x1F]/', $this->file->body))
{
return 'application/octect-stream';
}
else
{
return 'text/plain';
}
}
/**
* Sniff unknown
*
* @return string Actual Content-Type
*/
public function unknown()
{
$ws = strspn($this->file->body, "\x09\x0A\x0B\x0C\x0D\x20");
if (strtolower(substr($this->file->body, $ws, 14)) === '<!doctype html'
|| strtolower(substr($this->file->body, $ws, 5)) === '<html'
|| strtolower(substr($this->file->body, $ws, 7)) === '<script')
{
return 'text/html';
}
elseif (substr($this->file->body, 0, 5) === '%PDF-')
{
return 'application/pdf';
}
elseif (substr($this->file->body, 0, 11) === '%!PS-Adobe-')
{
return 'application/postscript';
}
elseif (substr($this->file->body, 0, 6) === 'GIF87a'
|| substr($this->file->body, 0, 6) === 'GIF89a')
{
return 'image/gif';
}
elseif (substr($this->file->body, 0, 8) === "\x89\x50\x4E\x47\x0D\x0A\x1A\x0A")
{
return 'image/png';
}
elseif (substr($this->file->body, 0, 3) === "\xFF\xD8\xFF")
{
return 'image/jpeg';
}
elseif (substr($this->file->body, 0, 2) === "\x42\x4D")
{
return 'image/bmp';
}
elseif (substr($this->file->body, 0, 4) === "\x00\x00\x01\x00")
{
return 'image/vnd.microsoft.icon';
}
else
{
return $this->text_or_binary();
}
}
/**
* Sniff images
*
* @return string Actual Content-Type
*/
public function image()
{
if (substr($this->file->body, 0, 6) === 'GIF87a'
|| substr($this->file->body, 0, 6) === 'GIF89a')
{
return 'image/gif';
}
elseif (substr($this->file->body, 0, 8) === "\x89\x50\x4E\x47\x0D\x0A\x1A\x0A")
{
return 'image/png';
}
elseif (substr($this->file->body, 0, 3) === "\xFF\xD8\xFF")
{
return 'image/jpeg';
}
elseif (substr($this->file->body, 0, 2) === "\x42\x4D")
{
return 'image/bmp';
}
elseif (substr($this->file->body, 0, 4) === "\x00\x00\x01\x00")
{
return 'image/vnd.microsoft.icon';
}
else
{
return false;
}
}
/**
* Sniff HTML
*
* @return string Actual Content-Type
*/
public function feed_or_html()
{
$len = strlen($this->file->body);
$pos = strspn($this->file->body, "\x09\x0A\x0D\x20");
while ($pos < $len)
{
switch ($this->file->body[$pos])
{
case "\x09":
case "\x0A":
case "\x0D":
case "\x20":
$pos += strspn($this->file->body, "\x09\x0A\x0D\x20", $pos);
continue 2;
case '<':
$pos++;
break;
default:
return 'text/html';
}
if (substr($this->file->body, $pos, 3) === '!--')
{
$pos += 3;
if ($pos < $len && ($pos = strpos($this->file->body, '-->', $pos)) !== false)
{
$pos += 3;
}
else
{
return 'text/html';
}
}
elseif (substr($this->file->body, $pos, 1) === '!')
{
if ($pos < $len && ($pos = strpos($this->file->body, '>', $pos)) !== false)
{
$pos++;
}
else
{
return 'text/html';
}
}
elseif (substr($this->file->body, $pos, 1) === '?')
{
if ($pos < $len && ($pos = strpos($this->file->body, '?>', $pos)) !== false)
{
$pos += 2;
}
else
{
return 'text/html';
}
}
elseif (substr($this->file->body, $pos, 3) === 'rss'
|| substr($this->file->body, $pos, 7) === 'rdf:RDF')
{
return 'application/rss+xml';
}
elseif (substr($this->file->body, $pos, 4) === 'feed')
{
return 'application/atom+xml';
}
else
{
return 'text/html';
}
}
return 'text/html';
}
}

View File

@ -0,0 +1,89 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
class SimplePie_Copyright
{
var $url;
var $label;
// Constructor, used to input the data
public function __construct($url = null, $label = null)
{
$this->url = $url;
$this->label = $label;
}
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
public function get_url()
{
if ($this->url !== null)
{
return $this->url;
}
else
{
return null;
}
}
public function get_attribution()
{
if ($this->label !== null)
{
return $this->label;
}
else
{
return null;
}
}
}

2838
inc/3rdparty/simplepie/SimplePie/Core.php vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,103 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
class SimplePie_Credit
{
var $role;
var $scheme;
var $name;
// Constructor, used to input the data
public function __construct($role = null, $scheme = null, $name = null)
{
$this->role = $role;
$this->scheme = $scheme;
$this->name = $name;
}
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
public function get_role()
{
if ($this->role !== null)
{
return $this->role;
}
else
{
return null;
}
}
public function get_scheme()
{
if ($this->scheme !== null)
{
return $this->scheme;
}
else
{
return null;
}
}
public function get_name()
{
if ($this->name !== null)
{
return $this->name;
}
else
{
return null;
}
}
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,990 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
class SimplePie_Enclosure
{
var $bitrate;
var $captions;
var $categories;
var $channels;
var $copyright;
var $credits;
var $description;
var $duration;
var $expression;
var $framerate;
var $handler;
var $hashes;
var $height;
var $javascript;
var $keywords;
var $lang;
var $length;
var $link;
var $medium;
var $player;
var $ratings;
var $restrictions;
var $samplingrate;
var $thumbnails;
var $title;
var $type;
var $width;
// Constructor, used to input the data
public function __construct($link = null, $type = null, $length = null, $javascript = null, $bitrate = null, $captions = null, $categories = null, $channels = null, $copyright = null, $credits = null, $description = null, $duration = null, $expression = null, $framerate = null, $hashes = null, $height = null, $keywords = null, $lang = null, $medium = null, $player = null, $ratings = null, $restrictions = null, $samplingrate = null, $thumbnails = null, $title = null, $width = null)
{
$this->bitrate = $bitrate;
$this->captions = $captions;
$this->categories = $categories;
$this->channels = $channels;
$this->copyright = $copyright;
$this->credits = $credits;
$this->description = $description;
$this->duration = $duration;
$this->expression = $expression;
$this->framerate = $framerate;
$this->hashes = $hashes;
$this->height = $height;
$this->keywords = $keywords;
$this->lang = $lang;
$this->length = $length;
$this->link = $link;
$this->medium = $medium;
$this->player = $player;
$this->ratings = $ratings;
$this->restrictions = $restrictions;
$this->samplingrate = $samplingrate;
$this->thumbnails = $thumbnails;
$this->title = $title;
$this->type = $type;
$this->width = $width;
if (class_exists('idna_convert'))
{
$idn = new idna_convert();
$parsed = SimplePie_Misc::parse_url($link);
$this->link = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']);
}
$this->handler = $this->get_handler(); // Needs to load last
}
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
public function get_bitrate()
{
if ($this->bitrate !== null)
{
return $this->bitrate;
}
else
{
return null;
}
}
public function get_caption($key = 0)
{
$captions = $this->get_captions();
if (isset($captions[$key]))
{
return $captions[$key];
}
else
{
return null;
}
}
public function get_captions()
{
if ($this->captions !== null)
{
return $this->captions;
}
else
{
return null;
}
}
public function get_category($key = 0)
{
$categories = $this->get_categories();
if (isset($categories[$key]))
{
return $categories[$key];
}
else
{
return null;
}
}
public function get_categories()
{
if ($this->categories !== null)
{
return $this->categories;
}
else
{
return null;
}
}
public function get_channels()
{
if ($this->channels !== null)
{
return $this->channels;
}
else
{
return null;
}
}
public function get_copyright()
{
if ($this->copyright !== null)
{
return $this->copyright;
}
else
{
return null;
}
}
public function get_credit($key = 0)
{
$credits = $this->get_credits();
if (isset($credits[$key]))
{
return $credits[$key];
}
else
{
return null;
}
}
public function get_credits()
{
if ($this->credits !== null)
{
return $this->credits;
}
else
{
return null;
}
}
public function get_description()
{
if ($this->description !== null)
{
return $this->description;
}
else
{
return null;
}
}
public function get_duration($convert = false)
{
if ($this->duration !== null)
{
if ($convert)
{
$time = SimplePie_Misc::time_hms($this->duration);
return $time;
}
else
{
return $this->duration;
}
}
else
{
return null;
}
}
public function get_expression()
{
if ($this->expression !== null)
{
return $this->expression;
}
else
{
return 'full';
}
}
public function get_extension()
{
if ($this->link !== null)
{
$url = SimplePie_Misc::parse_url($this->link);
if ($url['path'] !== '')
{
return pathinfo($url['path'], PATHINFO_EXTENSION);
}
}
return null;
}
public function get_framerate()
{
if ($this->framerate !== null)
{
return $this->framerate;
}
else
{
return null;
}
}
public function get_handler()
{
return $this->get_real_type(true);
}
public function get_hash($key = 0)
{
$hashes = $this->get_hashes();
if (isset($hashes[$key]))
{
return $hashes[$key];
}
else
{
return null;
}
}
public function get_hashes()
{
if ($this->hashes !== null)
{
return $this->hashes;
}
else
{
return null;
}
}
public function get_height()
{
if ($this->height !== null)
{
return $this->height;
}
else
{
return null;
}
}
public function get_language()
{
if ($this->lang !== null)
{
return $this->lang;
}
else
{
return null;
}
}
public function get_keyword($key = 0)
{
$keywords = $this->get_keywords();
if (isset($keywords[$key]))
{
return $keywords[$key];
}
else
{
return null;
}
}
public function get_keywords()
{
if ($this->keywords !== null)
{
return $this->keywords;
}
else
{
return null;
}
}
public function get_length()
{
if ($this->length !== null)
{
return $this->length;
}
else
{
return null;
}
}
public function get_link()
{
if ($this->link !== null)
{
return urldecode($this->link);
}
else
{
return null;
}
}
public function get_medium()
{
if ($this->medium !== null)
{
return $this->medium;
}
else
{
return null;
}
}
public function get_player()
{
if ($this->player !== null)
{
return $this->player;
}
else
{
return null;
}
}
public function get_rating($key = 0)
{
$ratings = $this->get_ratings();
if (isset($ratings[$key]))
{
return $ratings[$key];
}
else
{
return null;
}
}
public function get_ratings()
{
if ($this->ratings !== null)
{
return $this->ratings;
}
else
{
return null;
}
}
public function get_restriction($key = 0)
{
$restrictions = $this->get_restrictions();
if (isset($restrictions[$key]))
{
return $restrictions[$key];
}
else
{
return null;
}
}
public function get_restrictions()
{
if ($this->restrictions !== null)
{
return $this->restrictions;
}
else
{
return null;
}
}
public function get_sampling_rate()
{
if ($this->samplingrate !== null)
{
return $this->samplingrate;
}
else
{
return null;
}
}
public function get_size()
{
$length = $this->get_length();
if ($length !== null)
{
return round($length/1048576, 2);
}
else
{
return null;
}
}
public function get_thumbnail($key = 0)
{
$thumbnails = $this->get_thumbnails();
if (isset($thumbnails[$key]))
{
return $thumbnails[$key];
}
else
{
return null;
}
}
public function get_thumbnails()
{
if ($this->thumbnails !== null)
{
return $this->thumbnails;
}
else
{
return null;
}
}
public function get_title()
{
if ($this->title !== null)
{
return $this->title;
}
else
{
return null;
}
}
public function get_type()
{
if ($this->type !== null)
{
return $this->type;
}
else
{
return null;
}
}
public function get_width()
{
if ($this->width !== null)
{
return $this->width;
}
else
{
return null;
}
}
public function native_embed($options='')
{
return $this->embed($options, true);
}
/**
* @todo If the dimensions for media:content are defined, use them when width/height are set to 'auto'.
*/
public function embed($options = '', $native = false)
{
// Set up defaults
$audio = '';
$video = '';
$alt = '';
$altclass = '';
$loop = 'false';
$width = 'auto';
$height = 'auto';
$bgcolor = '#ffffff';
$mediaplayer = '';
$widescreen = false;
$handler = $this->get_handler();
$type = $this->get_real_type();
// Process options and reassign values as necessary
if (is_array($options))
{
extract($options);
}
else
{
$options = explode(',', $options);
foreach($options as $option)
{
$opt = explode(':', $option, 2);
if (isset($opt[0], $opt[1]))
{
$opt[0] = trim($opt[0]);
$opt[1] = trim($opt[1]);
switch ($opt[0])
{
case 'audio':
$audio = $opt[1];
break;
case 'video':
$video = $opt[1];
break;
case 'alt':
$alt = $opt[1];
break;
case 'altclass':
$altclass = $opt[1];
break;
case 'loop':
$loop = $opt[1];
break;
case 'width':
$width = $opt[1];
break;
case 'height':
$height = $opt[1];
break;
case 'bgcolor':
$bgcolor = $opt[1];
break;
case 'mediaplayer':
$mediaplayer = $opt[1];
break;
case 'widescreen':
$widescreen = $opt[1];
break;
}
}
}
}
$mime = explode('/', $type, 2);
$mime = $mime[0];
// Process values for 'auto'
if ($width === 'auto')
{
if ($mime === 'video')
{
if ($height === 'auto')
{
$width = 480;
}
elseif ($widescreen)
{
$width = round((intval($height)/9)*16);
}
else
{
$width = round((intval($height)/3)*4);
}
}
else
{
$width = '100%';
}
}
if ($height === 'auto')
{
if ($mime === 'audio')
{
$height = 0;
}
elseif ($mime === 'video')
{
if ($width === 'auto')
{
if ($widescreen)
{
$height = 270;
}
else
{
$height = 360;
}
}
elseif ($widescreen)
{
$height = round((intval($width)/16)*9);
}
else
{
$height = round((intval($width)/4)*3);
}
}
else
{
$height = 376;
}
}
elseif ($mime === 'audio')
{
$height = 0;
}
// Set proper placeholder value
if ($mime === 'audio')
{
$placeholder = $audio;
}
elseif ($mime === 'video')
{
$placeholder = $video;
}
$embed = '';
// Odeo Feed MP3's
if ($handler === 'odeo')
{
if ($native)
{
$embed .= '<embed src="http://odeo.com/flash/audio_player_fullsize.swf" pluginspage="http://adobe.com/go/getflashplayer" type="application/x-shockwave-flash" quality="high" width="440" height="80" wmode="transparent" allowScriptAccess="any" flashvars="valid_sample_rate=true&external_url=' . $this->get_link() . '"></embed>';
}
else
{
$embed .= '<script type="text/javascript">embed_odeo("' . $this->get_link() . '");</script>';
}
}
// Flash
elseif ($handler === 'flash')
{
if ($native)
{
$embed .= "<embed src=\"" . $this->get_link() . "\" pluginspage=\"http://adobe.com/go/getflashplayer\" type=\"$type\" quality=\"high\" width=\"$width\" height=\"$height\" bgcolor=\"$bgcolor\" loop=\"$loop\"></embed>";
}
else
{
$embed .= "<script type='text/javascript'>embed_flash('$bgcolor', '$width', '$height', '" . $this->get_link() . "', '$loop', '$type');</script>";
}
}
// Flash Media Player file types.
// Preferred handler for MP3 file types.
elseif ($handler === 'fmedia' || ($handler === 'mp3' && $mediaplayer !== ''))
{
$height += 20;
if ($native)
{
$embed .= "<embed src=\"$mediaplayer\" pluginspage=\"http://adobe.com/go/getflashplayer\" type=\"application/x-shockwave-flash\" quality=\"high\" width=\"$width\" height=\"$height\" wmode=\"transparent\" flashvars=\"file=" . rawurlencode($this->get_link().'?file_extension=.'.$this->get_extension()) . "&autostart=false&repeat=$loop&showdigits=true&showfsbutton=false\"></embed>";
}
else
{
$embed .= "<script type='text/javascript'>embed_flv('$width', '$height', '" . rawurlencode($this->get_link().'?file_extension=.'.$this->get_extension()) . "', '$placeholder', '$loop', '$mediaplayer');</script>";
}
}
// QuickTime 7 file types. Need to test with QuickTime 6.
// Only handle MP3's if the Flash Media Player is not present.
elseif ($handler === 'quicktime' || ($handler === 'mp3' && $mediaplayer === ''))
{
$height += 16;
if ($native)
{
if ($placeholder !== '')
{
$embed .= "<embed type=\"$type\" style=\"cursor:hand; cursor:pointer;\" href=\"" . $this->get_link() . "\" src=\"$placeholder\" width=\"$width\" height=\"$height\" autoplay=\"false\" target=\"myself\" controller=\"false\" loop=\"$loop\" scale=\"aspect\" bgcolor=\"$bgcolor\" pluginspage=\"http://apple.com/quicktime/download/\"></embed>";
}
else
{
$embed .= "<embed type=\"$type\" style=\"cursor:hand; cursor:pointer;\" src=\"" . $this->get_link() . "\" width=\"$width\" height=\"$height\" autoplay=\"false\" target=\"myself\" controller=\"true\" loop=\"$loop\" scale=\"aspect\" bgcolor=\"$bgcolor\" pluginspage=\"http://apple.com/quicktime/download/\"></embed>";
}
}
else
{
$embed .= "<script type='text/javascript'>embed_quicktime('$type', '$bgcolor', '$width', '$height', '" . $this->get_link() . "', '$placeholder', '$loop');</script>";
}
}
// Windows Media
elseif ($handler === 'wmedia')
{
$height += 45;
if ($native)
{
$embed .= "<embed type=\"application/x-mplayer2\" src=\"" . $this->get_link() . "\" autosize=\"1\" width=\"$width\" height=\"$height\" showcontrols=\"1\" showstatusbar=\"0\" showdisplay=\"0\" autostart=\"0\"></embed>";
}
else
{
$embed .= "<script type='text/javascript'>embed_wmedia('$width', '$height', '" . $this->get_link() . "');</script>";
}
}
// Everything else
else $embed .= '<a href="' . $this->get_link() . '" class="' . $altclass . '">' . $alt . '</a>';
return $embed;
}
public function get_real_type($find_handler = false)
{
// If it's Odeo, let's get it out of the way.
if (substr(strtolower($this->get_link()), 0, 15) === 'http://odeo.com')
{
return 'odeo';
}
// Mime-types by handler.
$types_flash = array('application/x-shockwave-flash', 'application/futuresplash'); // Flash
$types_fmedia = array('video/flv', 'video/x-flv','flv-application/octet-stream'); // Flash Media Player
$types_quicktime = array('audio/3gpp', 'audio/3gpp2', 'audio/aac', 'audio/x-aac', 'audio/aiff', 'audio/x-aiff', 'audio/mid', 'audio/midi', 'audio/x-midi', 'audio/mp4', 'audio/m4a', 'audio/x-m4a', 'audio/wav', 'audio/x-wav', 'video/3gpp', 'video/3gpp2', 'video/m4v', 'video/x-m4v', 'video/mp4', 'video/mpeg', 'video/x-mpeg', 'video/quicktime', 'video/sd-video'); // QuickTime
$types_wmedia = array('application/asx', 'application/x-mplayer2', 'audio/x-ms-wma', 'audio/x-ms-wax', 'video/x-ms-asf-plugin', 'video/x-ms-asf', 'video/x-ms-wm', 'video/x-ms-wmv', 'video/x-ms-wvx'); // Windows Media
$types_mp3 = array('audio/mp3', 'audio/x-mp3', 'audio/mpeg', 'audio/x-mpeg'); // MP3
if ($this->get_type() !== null)
{
$type = strtolower($this->type);
}
else
{
$type = null;
}
// If we encounter an unsupported mime-type, check the file extension and guess intelligently.
if (!in_array($type, array_merge($types_flash, $types_fmedia, $types_quicktime, $types_wmedia, $types_mp3)))
{
switch (strtolower($this->get_extension()))
{
// Audio mime-types
case 'aac':
case 'adts':
$type = 'audio/acc';
break;
case 'aif':
case 'aifc':
case 'aiff':
case 'cdda':
$type = 'audio/aiff';
break;
case 'bwf':
$type = 'audio/wav';
break;
case 'kar':
case 'mid':
case 'midi':
case 'smf':
$type = 'audio/midi';
break;
case 'm4a':
$type = 'audio/x-m4a';
break;
case 'mp3':
case 'swa':
$type = 'audio/mp3';
break;
case 'wav':
$type = 'audio/wav';
break;
case 'wax':
$type = 'audio/x-ms-wax';
break;
case 'wma':
$type = 'audio/x-ms-wma';
break;
// Video mime-types
case '3gp':
case '3gpp':
$type = 'video/3gpp';
break;
case '3g2':
case '3gp2':
$type = 'video/3gpp2';
break;
case 'asf':
$type = 'video/x-ms-asf';
break;
case 'flv':
$type = 'video/x-flv';
break;
case 'm1a':
case 'm1s':
case 'm1v':
case 'm15':
case 'm75':
case 'mp2':
case 'mpa':
case 'mpeg':
case 'mpg':
case 'mpm':
case 'mpv':
$type = 'video/mpeg';
break;
case 'm4v':
$type = 'video/x-m4v';
break;
case 'mov':
case 'qt':
$type = 'video/quicktime';
break;
case 'mp4':
case 'mpg4':
$type = 'video/mp4';
break;
case 'sdv':
$type = 'video/sd-video';
break;
case 'wm':
$type = 'video/x-ms-wm';
break;
case 'wmv':
$type = 'video/x-ms-wmv';
break;
case 'wvx':
$type = 'video/x-ms-wvx';
break;
// Flash mime-types
case 'spl':
$type = 'application/futuresplash';
break;
case 'swf':
$type = 'application/x-shockwave-flash';
break;
}
}
if ($find_handler)
{
if (in_array($type, $types_flash))
{
return 'flash';
}
elseif (in_array($type, $types_fmedia))
{
return 'fmedia';
}
elseif (in_array($type, $types_quicktime))
{
return 'quicktime';
}
elseif (in_array($type, $types_wmedia))
{
return 'wmedia';
}
elseif (in_array($type, $types_mp3))
{
return 'mp3';
}
else
{
return null;
}
}
else
{
return $type;
}
}
}

View File

@ -0,0 +1,278 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* @todo Move to properly supporting RFC2616 (HTTP/1.1)
*/
class SimplePie_File
{
var $url;
var $useragent;
var $success = true;
var $headers = array();
var $body;
var $status_code;
var $redirects = 0;
var $error;
var $method = SIMPLEPIE_FILE_SOURCE_NONE;
public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false)
{
if (class_exists('idna_convert'))
{
$idn = new idna_convert();
$parsed = SimplePie_Misc::parse_url($url);
$url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']);
}
$this->url = $url;
$this->useragent = $useragent;
if (preg_match('/^http(s)?:\/\//i', $url))
{
if ($useragent === null)
{
$useragent = ini_get('user_agent');
$this->useragent = $useragent;
}
if (!is_array($headers))
{
$headers = array();
}
if (!$force_fsockopen && function_exists('curl_exec'))
{
$this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_CURL;
$fp = curl_init();
$headers2 = array();
foreach ($headers as $key => $value)
{
$headers2[] = "$key: $value";
}
if (version_compare(SimplePie_Misc::get_curl_version(), '7.10.5', '>='))
{
curl_setopt($fp, CURLOPT_ENCODING, '');
}
curl_setopt($fp, CURLOPT_URL, $url);
curl_setopt($fp, CURLOPT_HEADER, 1);
curl_setopt($fp, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($fp, CURLOPT_TIMEOUT, $timeout);
curl_setopt($fp, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($fp, CURLOPT_REFERER, $url);
curl_setopt($fp, CURLOPT_USERAGENT, $useragent);
curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2);
if (!ini_get('open_basedir') && !ini_get('safe_mode') && version_compare(SimplePie_Misc::get_curl_version(), '7.15.2', '>='))
{
curl_setopt($fp, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($fp, CURLOPT_MAXREDIRS, $redirects);
}
$this->headers = curl_exec($fp);
if (curl_errno($fp) === 23 || curl_errno($fp) === 61)
{
curl_setopt($fp, CURLOPT_ENCODING, 'none');
$this->headers = curl_exec($fp);
}
if (curl_errno($fp))
{
$this->error = 'cURL error ' . curl_errno($fp) . ': ' . curl_error($fp);
$this->success = false;
}
else
{
$info = curl_getinfo($fp);
curl_close($fp);
$this->headers = explode("\r\n\r\n", $this->headers, $info['redirect_count'] + 1);
$this->headers = array_pop($this->headers);
$parser = new SimplePie_HTTP_Parser($this->headers);
if ($parser->parse())
{
$this->headers = $parser->headers;
$this->body = $parser->body;
$this->status_code = $parser->status_code;
if ((in_array($this->status_code, array(300, 301, 302, 303, 307)) || $this->status_code > 307 && $this->status_code < 400) && isset($this->headers['location']) && $this->redirects < $redirects)
{
$this->redirects++;
$location = SimplePie_Misc::absolutize_url($this->headers['location'], $url);
return $this->__construct($location, $timeout, $redirects, $headers, $useragent, $force_fsockopen);
}
}
}
}
else
{
$this->method = SIMPLEPIE_FILE_SOURCE_REMOTE | SIMPLEPIE_FILE_SOURCE_FSOCKOPEN;
$url_parts = parse_url($url);
$socket_host = $url_parts['host'];
if (isset($url_parts['scheme']) && strtolower($url_parts['scheme']) === 'https')
{
$socket_host = "ssl://$url_parts[host]";
$url_parts['port'] = 443;
}
if (!isset($url_parts['port']))
{
$url_parts['port'] = 80;
}
$fp = @fsockopen($socket_host, $url_parts['port'], $errno, $errstr, $timeout);
if (!$fp)
{
$this->error = 'fsockopen error: ' . $errstr;
$this->success = false;
}
else
{
stream_set_timeout($fp, $timeout);
if (isset($url_parts['path']))
{
if (isset($url_parts['query']))
{
$get = "$url_parts[path]?$url_parts[query]";
}
else
{
$get = $url_parts['path'];
}
}
else
{
$get = '/';
}
$out = "GET $get HTTP/1.1\r\n";
$out .= "Host: $url_parts[host]\r\n";
$out .= "User-Agent: $useragent\r\n";
if (extension_loaded('zlib'))
{
$out .= "Accept-Encoding: x-gzip,gzip,deflate\r\n";
}
if (isset($url_parts['user']) && isset($url_parts['pass']))
{
$out .= "Authorization: Basic " . base64_encode("$url_parts[user]:$url_parts[pass]") . "\r\n";
}
foreach ($headers as $key => $value)
{
$out .= "$key: $value\r\n";
}
$out .= "Connection: Close\r\n\r\n";
fwrite($fp, $out);
$info = stream_get_meta_data($fp);
$this->headers = '';
while (!$info['eof'] && !$info['timed_out'])
{
$this->headers .= fread($fp, 1160);
$info = stream_get_meta_data($fp);
}
if (!$info['timed_out'])
{
$parser = new SimplePie_HTTP_Parser($this->headers);
if ($parser->parse())
{
$this->headers = $parser->headers;
$this->body = $parser->body;
$this->status_code = $parser->status_code;
if ((in_array($this->status_code, array(300, 301, 302, 303, 307)) || $this->status_code > 307 && $this->status_code < 400) && isset($this->headers['location']) && $this->redirects < $redirects)
{
$this->redirects++;
$location = SimplePie_Misc::absolutize_url($this->headers['location'], $url);
return $this->__construct($location, $timeout, $redirects, $headers, $useragent, $force_fsockopen);
}
if (isset($this->headers['content-encoding']))
{
// Hey, we act dumb elsewhere, so let's do that here too
switch (strtolower(trim($this->headers['content-encoding'], "\x09\x0A\x0D\x20")))
{
case 'gzip':
case 'x-gzip':
$decoder = new SimplePie_gzdecode($this->body);
if (!$decoder->parse())
{
$this->error = 'Unable to decode HTTP "gzip" stream';
$this->success = false;
}
else
{
$this->body = $decoder->data;
}
break;
case 'deflate':
if (($body = gzuncompress($this->body)) === false)
{
if (($body = gzinflate($this->body)) === false)
{
$this->error = 'Unable to decode HTTP "deflate" stream';
$this->success = false;
}
}
$this->body = $body;
break;
default:
$this->error = 'Unknown content coding';
$this->success = false;
}
}
}
}
else
{
$this->error = 'fsocket timed out';
$this->success = false;
}
fclose($fp);
}
}
}
else
{
$this->method = SIMPLEPIE_FILE_SOURCE_LOCAL | SIMPLEPIE_FILE_SOURCE_FILE_GET_CONTENTS;
if (!$this->body = file_get_contents($url))
{
$this->error = 'file_get_contents could not read the file';
$this->success = false;
}
}
}
}

View File

@ -0,0 +1,492 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* HTTP Response Parser
*
* @package SimplePie
*/
class SimplePie_HTTP_Parser
{
/**
* HTTP Version
*
* @var float
*/
public $http_version = 0.0;
/**
* Status code
*
* @var int
*/
public $status_code = 0;
/**
* Reason phrase
*
* @var string
*/
public $reason = '';
/**
* Key/value pairs of the headers
*
* @var array
*/
public $headers = array();
/**
* Body of the response
*
* @var string
*/
public $body = '';
/**
* Current state of the state machine
*
* @var string
*/
protected $state = 'http_version';
/**
* Input data
*
* @var string
*/
protected $data = '';
/**
* Input data length (to avoid calling strlen() everytime this is needed)
*
* @var int
*/
protected $data_length = 0;
/**
* Current position of the pointer
*
* @var int
*/
protected $position = 0;
/**
* Name of the hedaer currently being parsed
*
* @var string
*/
protected $name = '';
/**
* Value of the hedaer currently being parsed
*
* @var string
*/
protected $value = '';
/**
* Create an instance of the class with the input data
*
* @param string $data Input data
*/
public function __construct($data)
{
$this->data = $data;
$this->data_length = strlen($this->data);
}
/**
* Parse the input data
*
* @return bool true on success, false on failure
*/
public function parse()
{
while ($this->state && $this->state !== 'emit' && $this->has_data())
{
$state = $this->state;
$this->$state();
}
$this->data = '';
if ($this->state === 'emit' || $this->state === 'body')
{
return true;
}
else
{
$this->http_version = '';
$this->status_code = '';
$this->reason = '';
$this->headers = array();
$this->body = '';
return false;
}
}
/**
* Check whether there is data beyond the pointer
*
* @return bool true if there is further data, false if not
*/
protected function has_data()
{
return (bool) ($this->position < $this->data_length);
}
/**
* See if the next character is LWS
*
* @return bool true if the next character is LWS, false if not
*/
protected function is_linear_whitespace()
{
return (bool) ($this->data[$this->position] === "\x09"
|| $this->data[$this->position] === "\x20"
|| ($this->data[$this->position] === "\x0A"
&& isset($this->data[$this->position + 1])
&& ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20")));
}
/**
* Parse the HTTP version
*/
protected function http_version()
{
if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/')
{
$len = strspn($this->data, '0123456789.', 5);
$this->http_version = substr($this->data, 5, $len);
$this->position += 5 + $len;
if (substr_count($this->http_version, '.') <= 1)
{
$this->http_version = (float) $this->http_version;
$this->position += strspn($this->data, "\x09\x20", $this->position);
$this->state = 'status';
}
else
{
$this->state = false;
}
}
else
{
$this->state = false;
}
}
/**
* Parse the status code
*/
protected function status()
{
if ($len = strspn($this->data, '0123456789', $this->position))
{
$this->status_code = (int) substr($this->data, $this->position, $len);
$this->position += $len;
$this->state = 'reason';
}
else
{
$this->state = false;
}
}
/**
* Parse the reason phrase
*/
protected function reason()
{
$len = strcspn($this->data, "\x0A", $this->position);
$this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20");
$this->position += $len + 1;
$this->state = 'new_line';
}
/**
* Deal with a new line, shifting data around as needed
*/
protected function new_line()
{
$this->value = trim($this->value, "\x0D\x20");
if ($this->name !== '' && $this->value !== '')
{
$this->name = strtolower($this->name);
// We should only use the last Content-Type header. c.f. issue #1
if (isset($this->headers[$this->name]) && $this->name !== 'content-type')
{
$this->headers[$this->name] .= ', ' . $this->value;
}
else
{
$this->headers[$this->name] = $this->value;
}
}
$this->name = '';
$this->value = '';
if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A")
{
$this->position += 2;
$this->state = 'body';
}
elseif ($this->data[$this->position] === "\x0A")
{
$this->position++;
$this->state = 'body';
}
else
{
$this->state = 'name';
}
}
/**
* Parse a header name
*/
protected function name()
{
$len = strcspn($this->data, "\x0A:", $this->position);
if (isset($this->data[$this->position + $len]))
{
if ($this->data[$this->position + $len] === "\x0A")
{
$this->position += $len;
$this->state = 'new_line';
}
else
{
$this->name = substr($this->data, $this->position, $len);
$this->position += $len + 1;
$this->state = 'value';
}
}
else
{
$this->state = false;
}
}
/**
* Parse LWS, replacing consecutive LWS characters with a single space
*/
protected function linear_whitespace()
{
do
{
if (substr($this->data, $this->position, 2) === "\x0D\x0A")
{
$this->position += 2;
}
elseif ($this->data[$this->position] === "\x0A")
{
$this->position++;
}
$this->position += strspn($this->data, "\x09\x20", $this->position);
} while ($this->has_data() && $this->is_linear_whitespace());
$this->value .= "\x20";
}
/**
* See what state to move to while within non-quoted header values
*/
protected function value()
{
if ($this->is_linear_whitespace())
{
$this->linear_whitespace();
}
else
{
switch ($this->data[$this->position])
{
case '"':
// Workaround for ETags: we have to include the quotes as
// part of the tag.
if (strtolower($this->name) === 'etag')
{
$this->value .= '"';
$this->position++;
$this->state = 'value_char';
break;
}
$this->position++;
$this->state = 'quote';
break;
case "\x0A":
$this->position++;
$this->state = 'new_line';
break;
default:
$this->state = 'value_char';
break;
}
}
}
/**
* Parse a header value while outside quotes
*/
protected function value_char()
{
$len = strcspn($this->data, "\x09\x20\x0A\"", $this->position);
$this->value .= substr($this->data, $this->position, $len);
$this->position += $len;
$this->state = 'value';
}
/**
* See what state to move to while within quoted header values
*/
protected function quote()
{
if ($this->is_linear_whitespace())
{
$this->linear_whitespace();
}
else
{
switch ($this->data[$this->position])
{
case '"':
$this->position++;
$this->state = 'value';
break;
case "\x0A":
$this->position++;
$this->state = 'new_line';
break;
case '\\':
$this->position++;
$this->state = 'quote_escaped';
break;
default:
$this->state = 'quote_char';
break;
}
}
}
/**
* Parse a header value while within quotes
*/
protected function quote_char()
{
$len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position);
$this->value .= substr($this->data, $this->position, $len);
$this->position += $len;
$this->state = 'value';
}
/**
* Parse an escaped character within quotes
*/
protected function quote_escaped()
{
$this->value .= $this->data[$this->position];
$this->position++;
$this->state = 'quote';
}
/**
* Parse the body
*/
protected function body()
{
$this->body = substr($this->data, $this->position);
if (!empty($this->headers['transfer-encoding']))
{
unset($this->headers['transfer-encoding']);
$this->state = 'chunked';
}
else
{
$this->state = 'emit';
}
}
/**
* Parsed a "Transfer-Encoding: chunked" body
*/
protected function chunked()
{
if (!preg_match('/^[0-9a-f]+(\s|\r|\n)+/mi', trim($this->body)))
{
$this->state = 'emit';
return;
}
$decoded = '';
$encoded = $this->body;
while (true)
{
$is_chunked = (bool) preg_match( '/^([0-9a-f]+)(\s|\r|\n)+/mi', $encoded, $matches );
if (!$is_chunked)
{
// Looks like it's not chunked after all
$this->state = 'emit';
return;
}
$length = hexdec($matches[1]);
$chunk_length = strlen($matches[0]);
$decoded .= $part = substr($encoded, $chunk_length, $length);
$encoded = ltrim(substr($encoded, $chunk_length + $length), "\r\n");
if (trim($encoded) === '0')
{
$this->state = 'emit';
$this->body = $decoded;
return;
}
}
}
}

997
inc/3rdparty/simplepie/SimplePie/IRI.php vendored Normal file
View File

@ -0,0 +1,997 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* IRI parser/serialiser
*
* @package SimplePie
*/
class SimplePie_IRI
{
/**
* Scheme
*
* @access private
* @var string
*/
var $scheme;
/**
* User Information
*
* @access private
* @var string
*/
var $userinfo;
/**
* Host
*
* @access private
* @var string
*/
var $host;
/**
* Port
*
* @access private
* @var string
*/
var $port;
/**
* Path
*
* @access private
* @var string
*/
var $path;
/**
* Query
*
* @access private
* @var string
*/
var $query;
/**
* Fragment
*
* @access private
* @var string
*/
var $fragment;
/**
* Whether the object represents a valid IRI
*
* @access private
* @var array
*/
var $valid = array();
/**
* Return the entire IRI when you try and read the object as a string
*
* @access public
* @return string
*/
public function __toString()
{
return $this->get_iri();
}
/**
* Create a new IRI object, from a specified string
*
* @access public
* @param string $iri
* @return SimplePie_IRI
*/
public function __construct($iri)
{
$iri = (string) $iri;
if ($iri !== '')
{
$parsed = $this->parse_iri($iri);
$this->set_scheme($parsed['scheme']);
$this->set_authority($parsed['authority']);
$this->set_path($parsed['path']);
$this->set_query($parsed['query']);
$this->set_fragment($parsed['fragment']);
}
}
/**
* Create a new IRI object by resolving a relative IRI
*
* @static
* @access public
* @param SimplePie_IRI $base Base IRI
* @param string $relative Relative IRI
* @return SimplePie_IRI
*/
public static function absolutize($base, $relative)
{
$relative = (string) $relative;
if ($relative !== '')
{
$relative = new SimplePie_IRI($relative);
if ($relative->get_scheme() !== null)
{
$target = $relative;
}
elseif ($base->get_iri() !== null)
{
if ($relative->get_authority() !== null)
{
$target = $relative;
$target->set_scheme($base->get_scheme());
}
else
{
$target = new SimplePie_IRI('');
$target->set_scheme($base->get_scheme());
$target->set_userinfo($base->get_userinfo());
$target->set_host($base->get_host());
$target->set_port($base->get_port());
if ($relative->get_path() !== null)
{
if (strpos($relative->get_path(), '/') === 0)
{
$target->set_path($relative->get_path());
}
elseif (($base->get_userinfo() !== null || $base->get_host() !== null || $base->get_port() !== null) && $base->get_path() === null)
{
$target->set_path('/' . $relative->get_path());
}
elseif (($last_segment = strrpos($base->get_path(), '/')) !== false)
{
$target->set_path(substr($base->get_path(), 0, $last_segment + 1) . $relative->get_path());
}
else
{
$target->set_path($relative->get_path());
}
$target->set_query($relative->get_query());
}
else
{
$target->set_path($base->get_path());
if ($relative->get_query() !== null)
{
$target->set_query($relative->get_query());
}
elseif ($base->get_query() !== null)
{
$target->set_query($base->get_query());
}
}
}
$target->set_fragment($relative->get_fragment());
}
else
{
// No base URL, just return the relative URL
$target = $relative;
}
}
else
{
$target = $base;
}
return $target;
}
/**
* Parse an IRI into scheme/authority/path/query/fragment segments
*
* @access private
* @param string $iri
* @return array
*/
public function parse_iri($iri)
{
preg_match('/^(([^:\/?#]+):)?(\/\/([^\/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?$/', $iri, $match);
for ($i = count($match); $i <= 9; $i++)
{
$match[$i] = '';
}
return array('scheme' => $match[2], 'authority' => $match[4], 'path' => $match[5], 'query' => $match[7], 'fragment' => $match[9]);
}
/**
* Remove dot segments from a path
*
* @access private
* @param string $input
* @return string
*/
public function remove_dot_segments($input)
{
$output = '';
while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..')
{
// A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise,
if (strpos($input, '../') === 0)
{
$input = substr($input, 3);
}
elseif (strpos($input, './') === 0)
{
$input = substr($input, 2);
}
// B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise,
elseif (strpos($input, '/./') === 0)
{
$input = substr_replace($input, '/', 0, 3);
}
elseif ($input === '/.')
{
$input = '/';
}
// C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise,
elseif (strpos($input, '/../') === 0)
{
$input = substr_replace($input, '/', 0, 4);
$output = substr_replace($output, '', strrpos($output, '/'));
}
elseif ($input === '/..')
{
$input = '/';
$output = substr_replace($output, '', strrpos($output, '/'));
}
// D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise,
elseif ($input === '.' || $input === '..')
{
$input = '';
}
// E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer
elseif (($pos = strpos($input, '/', 1)) !== false)
{
$output .= substr($input, 0, $pos);
$input = substr_replace($input, '', 0, $pos);
}
else
{
$output .= $input;
$input = '';
}
}
return $output . $input;
}
/**
* Replace invalid character with percent encoding
*
* @param string $string Input string
* @param string $valid_chars Valid characters not in iunreserved or iprivate (this is ASCII-only)
* @param int $case Normalise case
* @param bool $iprivate Allow iprivate
* @return string
*/
protected function replace_invalid_with_pct_encoding($string, $valid_chars, $case = SIMPLEPIE_SAME_CASE, $iprivate = false)
{
// Normalize as many pct-encoded sections as possible
$string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', array(&$this, 'remove_iunreserved_percent_encoded'), $string);
// Replace invalid percent characters
$string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string);
// Add unreserved and % to $valid_chars (the latter is safe because all
// pct-encoded sections are now valid).
$valid_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%';
// Now replace any bytes that aren't allowed with their pct-encoded versions
$position = 0;
$strlen = strlen($string);
while (($position += strspn($string, $valid_chars, $position)) < $strlen)
{
$value = ord($string[$position]);
// Start position
$start = $position;
// By default we are valid
$valid = true;
// No one byte sequences are valid due to the while.
// Two byte sequence:
if (($value & 0xE0) === 0xC0)
{
$character = ($value & 0x1F) << 6;
$length = 2;
$remaining = 1;
}
// Three byte sequence:
elseif (($value & 0xF0) === 0xE0)
{
$character = ($value & 0x0F) << 12;
$length = 3;
$remaining = 2;
}
// Four byte sequence:
elseif (($value & 0xF8) === 0xF0)
{
$character = ($value & 0x07) << 18;
$length = 4;
$remaining = 3;
}
// Invalid byte:
else
{
$valid = false;
$length = 1;
$remaining = 0;
}
if ($remaining)
{
if ($position + $length <= $strlen)
{
for ($position++; $remaining; $position++)
{
$value = ord($string[$position]);
// Check that the byte is valid, then add it to the character:
if (($value & 0xC0) === 0x80)
{
$character |= ($value & 0x3F) << (--$remaining * 6);
}
// If it is invalid, count the sequence as invalid and reprocess the current byte:
else
{
$valid = false;
$position--;
break;
}
}
}
else
{
$position = $strlen - 1;
$valid = false;
}
}
// Percent encode anything invalid or not in ucschar
if (
// Invalid sequences
!$valid
// Non-shortest form sequences are invalid
|| $length > 1 && $character <= 0x7F
|| $length > 2 && $character <= 0x7FF
|| $length > 3 && $character <= 0xFFFF
// Outside of range of ucschar codepoints
// Noncharacters
|| ($character & 0xFFFE) === 0xFFFE
|| $character >= 0xFDD0 && $character <= 0xFDEF
|| (
// Everything else not in ucschar
$character > 0xD7FF && $character < 0xF900
|| $character < 0xA0
|| $character > 0xEFFFD
)
&& (
// Everything not in iprivate, if it applies
!$iprivate
|| $character < 0xE000
|| $character > 0x10FFFD
)
)
{
// If we were a character, pretend we weren't, but rather an error.
if ($valid)
$position--;
for ($j = $start; $j <= $position; $j++)
{
$string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1);
$j += 2;
$position += 2;
$strlen += 2;
}
}
}
// Normalise case
if ($case & SIMPLEPIE_LOWERCASE)
{
$string = strtolower($string);
}
elseif ($case & SIMPLEPIE_UPPERCASE)
{
$string = strtoupper($string);
}
return $string;
}
/**
* Callback function for preg_replace_callback.
*
* Removes sequences of percent encoded bytes that represent UTF-8
* encoded characters in iunreserved
*
* @param array $match PCRE match
* @return string Replacement
*/
protected function remove_iunreserved_percent_encoded($match)
{
// As we just have valid percent encoded sequences we can just explode
// and ignore the first member of the returned array (an empty string).
$bytes = explode('%', $match[0]);
// Initialize the new string (this is what will be returned) and that
// there are no bytes remaining in the current sequence (unsurprising
// at the first byte!).
$string = '';
$remaining = 0;
// Loop over each and every byte, and set $value to its value
for ($i = 1, $len = count($bytes); $i < $len; $i++)
{
$value = hexdec($bytes[$i]);
// If we're the first byte of sequence:
if (!$remaining)
{
// Start position
$start = $i;
// By default we are valid
$valid = true;
// One byte sequence:
if ($value <= 0x7F)
{
$character = $value;
$length = 1;
}
// Two byte sequence:
elseif (($value & 0xE0) === 0xC0)
{
$character = ($value & 0x1F) << 6;
$length = 2;
$remaining = 1;
}
// Three byte sequence:
elseif (($value & 0xF0) === 0xE0)
{
$character = ($value & 0x0F) << 12;
$length = 3;
$remaining = 2;
}
// Four byte sequence:
elseif (($value & 0xF8) === 0xF0)
{
$character = ($value & 0x07) << 18;
$length = 4;
$remaining = 3;
}
// Invalid byte:
else
{
$valid = false;
$remaining = 0;
}
}
// Continuation byte:
else
{
// Check that the byte is valid, then add it to the character:
if (($value & 0xC0) === 0x80)
{
$remaining--;
$character |= ($value & 0x3F) << ($remaining * 6);
}
// If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence:
else
{
$valid = false;
$remaining = 0;
$i--;
}
}
// If we've reached the end of the current byte sequence, append it to Unicode::$data
if (!$remaining)
{
// Percent encode anything invalid or not in iunreserved
if (
// Invalid sequences
!$valid
// Non-shortest form sequences are invalid
|| $length > 1 && $character <= 0x7F
|| $length > 2 && $character <= 0x7FF
|| $length > 3 && $character <= 0xFFFF
// Outside of range of iunreserved codepoints
|| $character < 0x2D
|| $character > 0xEFFFD
// Noncharacters
|| ($character & 0xFFFE) === 0xFFFE
|| $character >= 0xFDD0 && $character <= 0xFDEF
// Everything else not in iunreserved (this is all BMP)
|| $character === 0x2F
|| $character > 0x39 && $character < 0x41
|| $character > 0x5A && $character < 0x61
|| $character > 0x7A && $character < 0x7E
|| $character > 0x7E && $character < 0xA0
|| $character > 0xD7FF && $character < 0xF900
)
{
for ($j = $start; $j <= $i; $j++)
{
$string .= '%' . strtoupper($bytes[$j]);
}
}
else
{
for ($j = $start; $j <= $i; $j++)
{
$string .= chr(hexdec($bytes[$j]));
}
}
}
}
// If we have any bytes left over they are invalid (i.e., we are
// mid-way through a multi-byte sequence)
if ($remaining)
{
for ($j = $start; $j < $len; $j++)
{
$string .= '%' . strtoupper($bytes[$j]);
}
}
return $string;
}
/**
* Check if the object represents a valid IRI
*
* @access public
* @return bool
*/
public function is_valid()
{
return array_sum($this->valid) === count($this->valid);
}
/**
* Set the scheme. Returns true on success, false on failure (if there are
* any invalid characters).
*
* @access public
* @param string $scheme
* @return bool
*/
public function set_scheme($scheme)
{
if ($scheme === null || $scheme === '')
{
$this->scheme = null;
}
else
{
$len = strlen($scheme);
switch (true)
{
case $len > 1:
if (!strspn($scheme, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-.', 1))
{
$this->scheme = null;
$this->valid[__FUNCTION__] = false;
return false;
}
case $len > 0:
if (!strspn($scheme, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', 0, 1))
{
$this->scheme = null;
$this->valid[__FUNCTION__] = false;
return false;
}
}
$this->scheme = strtolower($scheme);
}
$this->valid[__FUNCTION__] = true;
return true;
}
/**
* Set the authority. Returns true on success, false on failure (if there are
* any invalid characters).
*
* @access public
* @param string $authority
* @return bool
*/
public function set_authority($authority)
{
if (($userinfo_end = strrpos($authority, '@')) !== false)
{
$userinfo = substr($authority, 0, $userinfo_end);
$authority = substr($authority, $userinfo_end + 1);
}
else
{
$userinfo = null;
}
if (($port_start = strpos($authority, ':')) !== false)
{
$port = substr($authority, $port_start + 1);
if ($port === false)
{
$port = null;
}
$authority = substr($authority, 0, $port_start);
}
else
{
$port = null;
}
return $this->set_userinfo($userinfo) && $this->set_host($authority) && $this->set_port($port);
}
/**
* Set the userinfo.
*
* @access public
* @param string $userinfo
* @return bool
*/
public function set_userinfo($userinfo)
{
if ($userinfo === null || $userinfo === '')
{
$this->userinfo = null;
}
else
{
$this->userinfo = $this->replace_invalid_with_pct_encoding($userinfo, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~!$&\'()*+,;=:');
}
$this->valid[__FUNCTION__] = true;
return true;
}
/**
* Set the host. Returns true on success, false on failure (if there are
* any invalid characters).
*
* @access public
* @param string $host
* @return bool
*/
public function set_host($host)
{
if ($host === null || $host === '')
{
$this->host = null;
$this->valid[__FUNCTION__] = true;
return true;
}
elseif ($host[0] === '[' && substr($host, -1) === ']')
{
if (SimplePie_Net_IPv6::checkIPv6(substr($host, 1, -1)))
{
$this->host = $host;
$this->valid[__FUNCTION__] = true;
return true;
}
else
{
$this->host = null;
$this->valid[__FUNCTION__] = false;
return false;
}
}
else
{
$this->host = $this->replace_invalid_with_pct_encoding($host, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~!$&\'()*+,;=', SIMPLEPIE_LOWERCASE);
$this->valid[__FUNCTION__] = true;
return true;
}
}
/**
* Set the port. Returns true on success, false on failure (if there are
* any invalid characters).
*
* @access public
* @param string $port
* @return bool
*/
public function set_port($port)
{
if ($port === null || $port === '')
{
$this->port = null;
$this->valid[__FUNCTION__] = true;
return true;
}
elseif (strspn($port, '0123456789') === strlen($port))
{
$this->port = (int) $port;
$this->valid[__FUNCTION__] = true;
return true;
}
else
{
$this->port = null;
$this->valid[__FUNCTION__] = false;
return false;
}
}
/**
* Set the path.
*
* @access public
* @param string $path
* @return bool
*/
public function set_path($path)
{
if ($path === null || $path === '')
{
$this->path = null;
$this->valid[__FUNCTION__] = true;
return true;
}
elseif (substr($path, 0, 2) === '//' && $this->userinfo === null && $this->host === null && $this->port === null)
{
$this->path = null;
$this->valid[__FUNCTION__] = false;
return false;
}
else
{
$this->path = $this->replace_invalid_with_pct_encoding($path, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~!$&\'()*+,;=@/');
if ($this->scheme !== null)
{
$this->path = $this->remove_dot_segments($this->path);
}
$this->valid[__FUNCTION__] = true;
return true;
}
}
/**
* Set the query.
*
* @access public
* @param string $query
* @return bool
*/
public function set_query($query)
{
if ($query === null || $query === '')
{
$this->query = null;
}
else
{
$this->query = $this->replace_invalid_with_pct_encoding($query, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~!$\'()*+,;:@/?&=');
}
$this->valid[__FUNCTION__] = true;
return true;
}
/**
* Set the fragment.
*
* @access public
* @param string $fragment
* @return bool
*/
public function set_fragment($fragment)
{
if ($fragment === null || $fragment === '')
{
$this->fragment = null;
}
else
{
$this->fragment = $this->replace_invalid_with_pct_encoding($fragment, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~!$&\'()*+,;=:@/?');
}
$this->valid[__FUNCTION__] = true;
return true;
}
/**
* Get the complete IRI
*
* @access public
* @return string
*/
public function get_iri()
{
$iri = '';
if ($this->scheme !== null)
{
$iri .= $this->scheme . ':';
}
if (($authority = $this->get_authority()) !== null)
{
$iri .= '//' . $authority;
}
if ($this->path !== null)
{
$iri .= $this->path;
}
if ($this->query !== null)
{
$iri .= '?' . $this->query;
}
if ($this->fragment !== null)
{
$iri .= '#' . $this->fragment;
}
if ($iri !== '')
{
return $iri;
}
else
{
return null;
}
}
/**
* Get the scheme
*
* @access public
* @return string
*/
public function get_scheme()
{
return $this->scheme;
}
/**
* Get the complete authority
*
* @access public
* @return string
*/
public function get_authority()
{
$authority = '';
if ($this->userinfo !== null)
{
$authority .= $this->userinfo . '@';
}
if ($this->host !== null)
{
$authority .= $this->host;
}
if ($this->port !== null)
{
$authority .= ':' . $this->port;
}
if ($authority !== '')
{
return $authority;
}
else
{
return null;
}
}
/**
* Get the user information
*
* @access public
* @return string
*/
public function get_userinfo()
{
return $this->userinfo;
}
/**
* Get the host
*
* @access public
* @return string
*/
public function get_host()
{
return $this->host;
}
/**
* Get the port
*
* @access public
* @return string
*/
public function get_port()
{
return $this->port;
}
/**
* Get the path
*
* @access public
* @return string
*/
public function get_path()
{
return $this->path;
}
/**
* Get the query
*
* @access public
* @return string
*/
public function get_query()
{
return $this->query;
}
/**
* Get the fragment
*
* @access public
* @return string
*/
public function get_fragment()
{
return $this->fragment;
}
}

2576
inc/3rdparty/simplepie/SimplePie/Item.php vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,314 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
class SimplePie_Locator
{
var $useragent;
var $timeout;
var $file;
var $local = array();
var $elsewhere = array();
var $file_class = 'SimplePie_File';
var $cached_entities = array();
var $http_base;
var $base;
var $base_location = 0;
var $checked_feeds = 0;
var $max_checked_feeds = 10;
var $content_type_sniffer_class = 'SimplePie_Content_Type_Sniffer';
public function __construct(&$file, $timeout = 10, $useragent = null, $file_class = 'SimplePie_File', $max_checked_feeds = 10, $content_type_sniffer_class = 'SimplePie_Content_Type_Sniffer')
{
$this->file =& $file;
$this->file_class = $file_class;
$this->useragent = $useragent;
$this->timeout = $timeout;
$this->max_checked_feeds = $max_checked_feeds;
$this->content_type_sniffer_class = $content_type_sniffer_class;
}
public function find($type = SIMPLEPIE_LOCATOR_ALL, &$working)
{
if ($this->is_feed($this->file))
{
return $this->file;
}
if ($this->file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
{
$sniffer = new $this->content_type_sniffer_class($this->file);
if ($sniffer->get_type() !== 'text/html')
{
return null;
}
}
if ($type & ~SIMPLEPIE_LOCATOR_NONE)
{
$this->get_base();
}
if ($type & SIMPLEPIE_LOCATOR_AUTODISCOVERY && $working = $this->autodiscovery())
{
return $working[0];
}
if ($type & (SIMPLEPIE_LOCATOR_LOCAL_EXTENSION | SIMPLEPIE_LOCATOR_LOCAL_BODY | SIMPLEPIE_LOCATOR_REMOTE_EXTENSION | SIMPLEPIE_LOCATOR_REMOTE_BODY) && $this->get_links())
{
if ($type & SIMPLEPIE_LOCATOR_LOCAL_EXTENSION && $working = $this->extension($this->local))
{
return $working;
}
if ($type & SIMPLEPIE_LOCATOR_LOCAL_BODY && $working = $this->body($this->local))
{
return $working;
}
if ($type & SIMPLEPIE_LOCATOR_REMOTE_EXTENSION && $working = $this->extension($this->elsewhere))
{
return $working;
}
if ($type & SIMPLEPIE_LOCATOR_REMOTE_BODY && $working = $this->body($this->elsewhere))
{
return $working;
}
}
return null;
}
public function is_feed(&$file)
{
if ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
{
$sniffer = new $this->content_type_sniffer_class($file);
$sniffed = $sniffer->get_type();
if (in_array($sniffed, array('application/rss+xml', 'application/rdf+xml', 'text/rdf', 'application/atom+xml', 'text/xml', 'application/xml')))
{
return true;
}
else
{
return false;
}
}
elseif ($file->method & SIMPLEPIE_FILE_SOURCE_LOCAL)
{
return true;
}
else
{
return false;
}
}
public function get_base()
{
$this->http_base = $this->file->url;
$this->base = $this->http_base;
$elements = SimplePie_Misc::get_element('base', $this->file->body);
foreach ($elements as $element)
{
if ($element['attribs']['href']['data'] !== '')
{
$this->base = SimplePie_Misc::absolutize_url(trim($element['attribs']['href']['data']), $this->http_base);
$this->base_location = $element['offset'];
break;
}
}
}
public function autodiscovery()
{
$links = array_merge(SimplePie_Misc::get_element('link', $this->file->body), SimplePie_Misc::get_element('a', $this->file->body), SimplePie_Misc::get_element('area', $this->file->body));
$done = array();
$feeds = array();
foreach ($links as $link)
{
if ($this->checked_feeds === $this->max_checked_feeds)
{
break;
}
if (isset($link['attribs']['href']['data']) && isset($link['attribs']['rel']['data']))
{
$rel = array_unique(SimplePie_Misc::space_seperated_tokens(strtolower($link['attribs']['rel']['data'])));
if ($this->base_location < $link['offset'])
{
$href = SimplePie_Misc::absolutize_url(trim($link['attribs']['href']['data']), $this->base);
}
else
{
$href = SimplePie_Misc::absolutize_url(trim($link['attribs']['href']['data']), $this->http_base);
}
if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && !empty($link['attribs']['type']['data']) && in_array(strtolower(SimplePie_Misc::parse_mime($link['attribs']['type']['data'])), array('application/rss+xml', 'application/atom+xml'))) && !isset($feeds[$href]))
{
$this->checked_feeds++;
$headers = array(
'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
);
$feed = new $this->file_class($href, $this->timeout, 5, $headers, $this->useragent);
if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
{
$feeds[$href] = $feed;
}
}
$done[] = $href;
}
}
if (!empty($feeds))
{
return array_values($feeds);
}
else
{
return null;
}
}
public function get_links()
{
$links = SimplePie_Misc::get_element('a', $this->file->body);
foreach ($links as $link)
{
if (isset($link['attribs']['href']['data']))
{
$href = trim($link['attribs']['href']['data']);
$parsed = SimplePie_Misc::parse_url($href);
if ($parsed['scheme'] === '' || preg_match('/^(http(s)|feed)?$/i', $parsed['scheme']))
{
if ($this->base_location < $link['offset'])
{
$href = SimplePie_Misc::absolutize_url(trim($link['attribs']['href']['data']), $this->base);
}
else
{
$href = SimplePie_Misc::absolutize_url(trim($link['attribs']['href']['data']), $this->http_base);
}
$current = SimplePie_Misc::parse_url($this->file->url);
if ($parsed['authority'] === '' || $parsed['authority'] === $current['authority'])
{
$this->local[] = $href;
}
else
{
$this->elsewhere[] = $href;
}
}
}
}
$this->local = array_unique($this->local);
$this->elsewhere = array_unique($this->elsewhere);
if (!empty($this->local) || !empty($this->elsewhere))
{
return true;
}
return null;
}
public function extension(&$array)
{
foreach ($array as $key => $value)
{
if ($this->checked_feeds === $this->max_checked_feeds)
{
break;
}
if (in_array(strtolower(strrchr($value, '.')), array('.rss', '.rdf', '.atom', '.xml')))
{
$this->checked_feeds++;
$headers = array(
'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
);
$feed = new $this->file_class($value, $this->timeout, 5, $headers, $this->useragent);
if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
{
return $feed;
}
else
{
unset($array[$key]);
}
}
}
return null;
}
public function body(&$array)
{
foreach ($array as $key => $value)
{
if ($this->checked_feeds === $this->max_checked_feeds)
{
break;
}
if (preg_match('/(rss|rdf|atom|xml)/i', $value))
{
$this->checked_feeds++;
$headers = array(
'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
);
$feed = new $this->file_class($value, $this->timeout, 5, null, $this->useragent);
if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
{
return $feed;
}
else
{
unset($array[$key]);
}
}
}
return null;
}
}

2365
inc/3rdparty/simplepie/SimplePie/Misc.php vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,258 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Class to validate and to work with IPv6 addresses.
*
* @package SimplePie
* @copyright 2003-2005 The PHP Group
* @license http://www.opensource.org/licenses/bsd-license.php
* @link http://pear.php.net/package/Net_IPv6
* @author Alexander Merz <alexander.merz@web.de>
* @author elfrink at introweb dot nl
* @author Josh Peck <jmp at joshpeck dot org>
* @author Geoffrey Sneddon <geoffers@gmail.com>
*/
class SimplePie_Net_IPv6
{
/**
* Removes a possible existing netmask specification of an IP address.
*
* @param string $ip the (compressed) IP as Hex representation
* @return string the IP the without netmask
* @since 1.1.0
* @access public
* @static
*/
public static function removeNetmaskSpec($ip)
{
if (strpos($ip, '/') !== false)
{
list($addr, $nm) = explode('/', $ip);
}
else
{
$addr = $ip;
}
return $addr;
}
/**
* Uncompresses an IPv6 address
*
* RFC 2373 allows you to compress zeros in an address to '::'. This
* function expects an valid IPv6 address and expands the '::' to
* the required zeros.
*
* Example: FF01::101 -> FF01:0:0:0:0:0:0:101
* ::1 -> 0:0:0:0:0:0:0:1
*
* @access public
* @static
* @param string $ip a valid IPv6-address (hex format)
* @return string the uncompressed IPv6-address (hex format)
*/
public static function Uncompress($ip)
{
$uip = SimplePie_Net_IPv6::removeNetmaskSpec($ip);
$c1 = -1;
$c2 = -1;
if (strpos($ip, '::') !== false)
{
list($ip1, $ip2) = explode('::', $ip);
if ($ip1 === '')
{
$c1 = -1;
}
else
{
$pos = 0;
if (($pos = substr_count($ip1, ':')) > 0)
{
$c1 = $pos;
}
else
{
$c1 = 0;
}
}
if ($ip2 === '')
{
$c2 = -1;
}
else
{
$pos = 0;
if (($pos = substr_count($ip2, ':')) > 0)
{
$c2 = $pos;
}
else
{
$c2 = 0;
}
}
if (strstr($ip2, '.'))
{
$c2++;
}
// ::
if ($c1 === -1 && $c2 === -1)
{
$uip = '0:0:0:0:0:0:0:0';
}
// ::xxx
else if ($c1 === -1)
{
$fill = str_repeat('0:', 7 - $c2);
$uip = str_replace('::', $fill, $uip);
}
// xxx::
else if ($c2 === -1)
{
$fill = str_repeat(':0', 7 - $c1);
$uip = str_replace('::', $fill, $uip);
}
// xxx::xxx
else
{
$fill = str_repeat(':0:', 6 - $c2 - $c1);
$uip = str_replace('::', $fill, $uip);
$uip = str_replace('::', ':', $uip);
}
}
return $uip;
}
/**
* Splits an IPv6 address into the IPv6 and a possible IPv4 part
*
* RFC 2373 allows you to note the last two parts of an IPv6 address as
* an IPv4 compatible address
*
* Example: 0:0:0:0:0:0:13.1.68.3
* 0:0:0:0:0:FFFF:129.144.52.38
*
* @access public
* @static
* @param string $ip a valid IPv6-address (hex format)
* @return array [0] contains the IPv6 part, [1] the IPv4 part (hex format)
*/
public static function SplitV64($ip)
{
$ip = SimplePie_Net_IPv6::Uncompress($ip);
if (strstr($ip, '.'))
{
$pos = strrpos($ip, ':');
$ip[$pos] = '_';
$ipPart = explode('_', $ip);
return $ipPart;
}
else
{
return array($ip, '');
}
}
/**
* Checks an IPv6 address
*
* Checks if the given IP is IPv6-compatible
*
* @access public
* @static
* @param string $ip a valid IPv6-address
* @return bool true if $ip is an IPv6 address
*/
public static function checkIPv6($ip)
{
$ipPart = SimplePie_Net_IPv6::SplitV64($ip);
$count = 0;
if (!empty($ipPart[0]))
{
$ipv6 = explode(':', $ipPart[0]);
for ($i = 0; $i < count($ipv6); $i++)
{
$dec = hexdec($ipv6[$i]);
$hex = strtoupper(preg_replace('/^[0]{1,3}(.*[0-9a-fA-F])$/', '\\1', $ipv6[$i]));
if ($ipv6[$i] >= 0 && $dec <= 65535 && $hex === strtoupper(dechex($dec)))
{
$count++;
}
}
if ($count === 8)
{
return true;
}
elseif ($count === 6 && !empty($ipPart[1]))
{
$ipv4 = explode('.', $ipPart[1]);
$count = 0;
foreach ($ipv4 as $ipv4_part)
{
if ($ipv4_part >= 0 && $ipv4_part <= 255 && preg_match('/^\d{1,3}$/', $ipv4_part))
{
$count++;
}
}
if ($count === 4)
{
return true;
}
}
else
{
return false;
}
}
else
{
return false;
}
}
}

View File

@ -0,0 +1,983 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Date Parser
*
* @package SimplePie
*/
class SimplePie_Parse_Date
{
/**
* Input data
*
* @access protected
* @var string
*/
var $date;
/**
* List of days, calendar day name => ordinal day number in the week
*
* @access protected
* @var array
*/
var $day = array(
// English
'mon' => 1,
'monday' => 1,
'tue' => 2,
'tuesday' => 2,
'wed' => 3,
'wednesday' => 3,
'thu' => 4,
'thursday' => 4,
'fri' => 5,
'friday' => 5,
'sat' => 6,
'saturday' => 6,
'sun' => 7,
'sunday' => 7,
// Dutch
'maandag' => 1,
'dinsdag' => 2,
'woensdag' => 3,
'donderdag' => 4,
'vrijdag' => 5,
'zaterdag' => 6,
'zondag' => 7,
// French
'lundi' => 1,
'mardi' => 2,
'mercredi' => 3,
'jeudi' => 4,
'vendredi' => 5,
'samedi' => 6,
'dimanche' => 7,
// German
'montag' => 1,
'dienstag' => 2,
'mittwoch' => 3,
'donnerstag' => 4,
'freitag' => 5,
'samstag' => 6,
'sonnabend' => 6,
'sonntag' => 7,
// Italian
'lunedì' => 1,
'martedì' => 2,
'mercoledì' => 3,
'giovedì' => 4,
'venerdì' => 5,
'sabato' => 6,
'domenica' => 7,
// Spanish
'lunes' => 1,
'martes' => 2,
'miércoles' => 3,
'jueves' => 4,
'viernes' => 5,
'sábado' => 6,
'domingo' => 7,
// Finnish
'maanantai' => 1,
'tiistai' => 2,
'keskiviikko' => 3,
'torstai' => 4,
'perjantai' => 5,
'lauantai' => 6,
'sunnuntai' => 7,
// Hungarian
'hétfő' => 1,
'kedd' => 2,
'szerda' => 3,
'csütörtok' => 4,
'péntek' => 5,
'szombat' => 6,
'vasárnap' => 7,
// Greek
'Δευ' => 1,
'Τρι' => 2,
'Τετ' => 3,
'Πεμ' => 4,
'Παρ' => 5,
'Σαβ' => 6,
'Κυρ' => 7,
);
/**
* List of months, calendar month name => calendar month number
*
* @access protected
* @var array
*/
var $month = array(
// English
'jan' => 1,
'january' => 1,
'feb' => 2,
'february' => 2,
'mar' => 3,
'march' => 3,
'apr' => 4,
'april' => 4,
'may' => 5,
// No long form of May
'jun' => 6,
'june' => 6,
'jul' => 7,
'july' => 7,
'aug' => 8,
'august' => 8,
'sep' => 9,
'september' => 8,
'oct' => 10,
'october' => 10,
'nov' => 11,
'november' => 11,
'dec' => 12,
'december' => 12,
// Dutch
'januari' => 1,
'februari' => 2,
'maart' => 3,
'april' => 4,
'mei' => 5,
'juni' => 6,
'juli' => 7,
'augustus' => 8,
'september' => 9,
'oktober' => 10,
'november' => 11,
'december' => 12,
// French
'janvier' => 1,
'février' => 2,
'mars' => 3,
'avril' => 4,
'mai' => 5,
'juin' => 6,
'juillet' => 7,
'août' => 8,
'septembre' => 9,
'octobre' => 10,
'novembre' => 11,
'décembre' => 12,
// German
'januar' => 1,
'februar' => 2,
'märz' => 3,
'april' => 4,
'mai' => 5,
'juni' => 6,
'juli' => 7,
'august' => 8,
'september' => 9,
'oktober' => 10,
'november' => 11,
'dezember' => 12,
// Italian
'gennaio' => 1,
'febbraio' => 2,
'marzo' => 3,
'aprile' => 4,
'maggio' => 5,
'giugno' => 6,
'luglio' => 7,
'agosto' => 8,
'settembre' => 9,
'ottobre' => 10,
'novembre' => 11,
'dicembre' => 12,
// Spanish
'enero' => 1,
'febrero' => 2,
'marzo' => 3,
'abril' => 4,
'mayo' => 5,
'junio' => 6,
'julio' => 7,
'agosto' => 8,
'septiembre' => 9,
'setiembre' => 9,
'octubre' => 10,
'noviembre' => 11,
'diciembre' => 12,
// Finnish
'tammikuu' => 1,
'helmikuu' => 2,
'maaliskuu' => 3,
'huhtikuu' => 4,
'toukokuu' => 5,
'kesäkuu' => 6,
'heinäkuu' => 7,
'elokuu' => 8,
'suuskuu' => 9,
'lokakuu' => 10,
'marras' => 11,
'joulukuu' => 12,
// Hungarian
'január' => 1,
'február' => 2,
'március' => 3,
'április' => 4,
'május' => 5,
'június' => 6,
'július' => 7,
'augusztus' => 8,
'szeptember' => 9,
'október' => 10,
'november' => 11,
'december' => 12,
// Greek
'Ιαν' => 1,
'Φεβ' => 2,
'Μάώ' => 3,
'Μαώ' => 3,
'Απρ' => 4,
'Μάι' => 5,
'Μαϊ' => 5,
'Μαι' => 5,
'Ιούν' => 6,
'Ιον' => 6,
'Ιούλ' => 7,
'Ιολ' => 7,
'Αύγ' => 8,
'Αυγ' => 8,
'Σεπ' => 9,
'Οκτ' => 10,
'Νοέ' => 11,
'Δεκ' => 12,
);
/**
* List of timezones, abbreviation => offset from UTC
*
* @access protected
* @var array
*/
var $timezone = array(
'ACDT' => 37800,
'ACIT' => 28800,
'ACST' => 34200,
'ACT' => -18000,
'ACWDT' => 35100,
'ACWST' => 31500,
'AEDT' => 39600,
'AEST' => 36000,
'AFT' => 16200,
'AKDT' => -28800,
'AKST' => -32400,
'AMDT' => 18000,
'AMT' => -14400,
'ANAST' => 46800,
'ANAT' => 43200,
'ART' => -10800,
'AZOST' => -3600,
'AZST' => 18000,
'AZT' => 14400,
'BIOT' => 21600,
'BIT' => -43200,
'BOT' => -14400,
'BRST' => -7200,
'BRT' => -10800,
'BST' => 3600,
'BTT' => 21600,
'CAST' => 18000,
'CAT' => 7200,
'CCT' => 23400,
'CDT' => -18000,
'CEDT' => 7200,
'CET' => 3600,
'CGST' => -7200,
'CGT' => -10800,
'CHADT' => 49500,
'CHAST' => 45900,
'CIST' => -28800,
'CKT' => -36000,
'CLDT' => -10800,
'CLST' => -14400,
'COT' => -18000,
'CST' => -21600,
'CVT' => -3600,
'CXT' => 25200,
'DAVT' => 25200,
'DTAT' => 36000,
'EADT' => -18000,
'EAST' => -21600,
'EAT' => 10800,
'ECT' => -18000,
'EDT' => -14400,
'EEST' => 10800,
'EET' => 7200,
'EGT' => -3600,
'EKST' => 21600,
'EST' => -18000,
'FJT' => 43200,
'FKDT' => -10800,
'FKST' => -14400,
'FNT' => -7200,
'GALT' => -21600,
'GEDT' => 14400,
'GEST' => 10800,
'GFT' => -10800,
'GILT' => 43200,
'GIT' => -32400,
'GST' => 14400,
'GST' => -7200,
'GYT' => -14400,
'HAA' => -10800,
'HAC' => -18000,
'HADT' => -32400,
'HAE' => -14400,
'HAP' => -25200,
'HAR' => -21600,
'HAST' => -36000,
'HAT' => -9000,
'HAY' => -28800,
'HKST' => 28800,
'HMT' => 18000,
'HNA' => -14400,
'HNC' => -21600,
'HNE' => -18000,
'HNP' => -28800,
'HNR' => -25200,
'HNT' => -12600,
'HNY' => -32400,
'IRDT' => 16200,
'IRKST' => 32400,
'IRKT' => 28800,
'IRST' => 12600,
'JFDT' => -10800,
'JFST' => -14400,
'JST' => 32400,
'KGST' => 21600,
'KGT' => 18000,
'KOST' => 39600,
'KOVST' => 28800,
'KOVT' => 25200,
'KRAST' => 28800,
'KRAT' => 25200,
'KST' => 32400,
'LHDT' => 39600,
'LHST' => 37800,
'LINT' => 50400,
'LKT' => 21600,
'MAGST' => 43200,
'MAGT' => 39600,
'MAWT' => 21600,
'MDT' => -21600,
'MESZ' => 7200,
'MEZ' => 3600,
'MHT' => 43200,
'MIT' => -34200,
'MNST' => 32400,
'MSDT' => 14400,
'MSST' => 10800,
'MST' => -25200,
'MUT' => 14400,
'MVT' => 18000,
'MYT' => 28800,
'NCT' => 39600,
'NDT' => -9000,
'NFT' => 41400,
'NMIT' => 36000,
'NOVST' => 25200,
'NOVT' => 21600,
'NPT' => 20700,
'NRT' => 43200,
'NST' => -12600,
'NUT' => -39600,
'NZDT' => 46800,
'NZST' => 43200,
'OMSST' => 25200,
'OMST' => 21600,
'PDT' => -25200,
'PET' => -18000,
'PETST' => 46800,
'PETT' => 43200,
'PGT' => 36000,
'PHOT' => 46800,
'PHT' => 28800,
'PKT' => 18000,
'PMDT' => -7200,
'PMST' => -10800,
'PONT' => 39600,
'PST' => -28800,
'PWT' => 32400,
'PYST' => -10800,
'PYT' => -14400,
'RET' => 14400,
'ROTT' => -10800,
'SAMST' => 18000,
'SAMT' => 14400,
'SAST' => 7200,
'SBT' => 39600,
'SCDT' => 46800,
'SCST' => 43200,
'SCT' => 14400,
'SEST' => 3600,
'SGT' => 28800,
'SIT' => 28800,
'SRT' => -10800,
'SST' => -39600,
'SYST' => 10800,
'SYT' => 7200,
'TFT' => 18000,
'THAT' => -36000,
'TJT' => 18000,
'TKT' => -36000,
'TMT' => 18000,
'TOT' => 46800,
'TPT' => 32400,
'TRUT' => 36000,
'TVT' => 43200,
'TWT' => 28800,
'UYST' => -7200,
'UYT' => -10800,
'UZT' => 18000,
'VET' => -14400,
'VLAST' => 39600,
'VLAT' => 36000,
'VOST' => 21600,
'VUT' => 39600,
'WAST' => 7200,
'WAT' => 3600,
'WDT' => 32400,
'WEST' => 3600,
'WFT' => 43200,
'WIB' => 25200,
'WIT' => 32400,
'WITA' => 28800,
'WKST' => 18000,
'WST' => 28800,
'YAKST' => 36000,
'YAKT' => 32400,
'YAPT' => 36000,
'YEKST' => 21600,
'YEKT' => 18000,
);
/**
* Cached PCRE for SimplePie_Parse_Date::$day
*
* @access protected
* @var string
*/
var $day_pcre;
/**
* Cached PCRE for SimplePie_Parse_Date::$month
*
* @access protected
* @var string
*/
var $month_pcre;
/**
* Array of user-added callback methods
*
* @access private
* @var array
*/
var $built_in = array();
/**
* Array of user-added callback methods
*
* @access private
* @var array
*/
var $user = array();
/**
* Create new SimplePie_Parse_Date object, and set self::day_pcre,
* self::month_pcre, and self::built_in
*
* @access private
*/
public function __construct()
{
$this->day_pcre = '(' . implode(array_keys($this->day), '|') . ')';
$this->month_pcre = '(' . implode(array_keys($this->month), '|') . ')';
static $cache;
if (!isset($cache[get_class($this)]))
{
$all_methods = get_class_methods($this);
foreach ($all_methods as $method)
{
if (strtolower(substr($method, 0, 5)) === 'date_')
{
$cache[get_class($this)][] = $method;
}
}
}
foreach ($cache[get_class($this)] as $method)
{
$this->built_in[] = $method;
}
}
/**
* Get the object
*
* @access public
*/
public static function get()
{
static $object;
if (!$object)
{
$object = new SimplePie_Parse_Date;
}
return $object;
}
/**
* Parse a date
*
* @final
* @access public
* @param string $date Date to parse
* @return int Timestamp corresponding to date string, or false on failure
*/
public function parse($date)
{
foreach ($this->user as $method)
{
if (($returned = call_user_func($method, $date)) !== false)
{
return $returned;
}
}
foreach ($this->built_in as $method)
{
if (($returned = call_user_func(array(&$this, $method), $date)) !== false)
{
return $returned;
}
}
return false;
}
/**
* Add a callback method to parse a date
*
* @final
* @access public
* @param callback $callback
*/
public function add_callback($callback)
{
if (is_callable($callback))
{
$this->user[] = $callback;
}
else
{
trigger_error('User-supplied function must be a valid callback', E_USER_WARNING);
}
}
/**
* Parse a superset of W3C-DTF (allows hyphens and colons to be omitted, as
* well as allowing any of upper or lower case "T", horizontal tabs, or
* spaces to be used as the time seperator (including more than one))
*
* @access protected
* @return int Timestamp
*/
public function date_w3cdtf($date)
{
static $pcre;
if (!$pcre)
{
$year = '([0-9]{4})';
$month = $day = $hour = $minute = $second = '([0-9]{2})';
$decimal = '([0-9]*)';
$zone = '(?:(Z)|([+\-])([0-9]{1,2}):?([0-9]{1,2}))';
$pcre = '/^' . $year . '(?:-?' . $month . '(?:-?' . $day . '(?:[Tt\x09\x20]+' . $hour . '(?::?' . $minute . '(?::?' . $second . '(?:.' . $decimal . ')?)?)?' . $zone . ')?)?)?$/';
}
if (preg_match($pcre, $date, $match))
{
/*
Capturing subpatterns:
1: Year
2: Month
3: Day
4: Hour
5: Minute
6: Second
7: Decimal fraction of a second
8: Zulu
9: Timezone ±
10: Timezone hours
11: Timezone minutes
*/
// Fill in empty matches
for ($i = count($match); $i <= 3; $i++)
{
$match[$i] = '1';
}
for ($i = count($match); $i <= 7; $i++)
{
$match[$i] = '0';
}
// Numeric timezone
if (isset($match[9]) && $match[9] !== '')
{
$timezone = $match[10] * 3600;
$timezone += $match[11] * 60;
if ($match[9] === '-')
{
$timezone = 0 - $timezone;
}
}
else
{
$timezone = 0;
}
// Convert the number of seconds to an integer, taking decimals into account
$second = round($match[6] + $match[7] / pow(10, strlen($match[7])));
return gmmktime($match[4], $match[5], $second, $match[2], $match[3], $match[1]) - $timezone;
}
else
{
return false;
}
}
/**
* Remove RFC822 comments
*
* @access protected
* @param string $data Data to strip comments from
* @return string Comment stripped string
*/
public function remove_rfc2822_comments($string)
{
$string = (string) $string;
$position = 0;
$length = strlen($string);
$depth = 0;
$output = '';
while ($position < $length && ($pos = strpos($string, '(', $position)) !== false)
{
$output .= substr($string, $position, $pos - $position);
$position = $pos + 1;
if ($string[$pos - 1] !== '\\')
{
$depth++;
while ($depth && $position < $length)
{
$position += strcspn($string, '()', $position);
if ($string[$position - 1] === '\\')
{
$position++;
continue;
}
elseif (isset($string[$position]))
{
switch ($string[$position])
{
case '(':
$depth++;
break;
case ')':
$depth--;
break;
}
$position++;
}
else
{
break;
}
}
}
else
{
$output .= '(';
}
}
$output .= substr($string, $position);
return $output;
}
/**
* Parse RFC2822's date format
*
* @access protected
* @return int Timestamp
*/
public function date_rfc2822($date)
{
static $pcre;
if (!$pcre)
{
$wsp = '[\x09\x20]';
$fws = '(?:' . $wsp . '+|' . $wsp . '*(?:\x0D\x0A' . $wsp . '+)+)';
$optional_fws = $fws . '?';
$day_name = $this->day_pcre;
$month = $this->month_pcre;
$day = '([0-9]{1,2})';
$hour = $minute = $second = '([0-9]{2})';
$year = '([0-9]{2,4})';
$num_zone = '([+\-])([0-9]{2})([0-9]{2})';
$character_zone = '([A-Z]{1,5})';
$zone = '(?:' . $num_zone . '|' . $character_zone . ')';
$pcre = '/(?:' . $optional_fws . $day_name . $optional_fws . ',)?' . $optional_fws . $day . $fws . $month . $fws . $year . $fws . $hour . $optional_fws . ':' . $optional_fws . $minute . '(?:' . $optional_fws . ':' . $optional_fws . $second . ')?' . $fws . $zone . '/i';
}
if (preg_match($pcre, $this->remove_rfc2822_comments($date), $match))
{
/*
Capturing subpatterns:
1: Day name
2: Day
3: Month
4: Year
5: Hour
6: Minute
7: Second
8: Timezone ±
9: Timezone hours
10: Timezone minutes
11: Alphabetic timezone
*/
// Find the month number
$month = $this->month[strtolower($match[3])];
// Numeric timezone
if ($match[8] !== '')
{
$timezone = $match[9] * 3600;
$timezone += $match[10] * 60;
if ($match[8] === '-')
{
$timezone = 0 - $timezone;
}
}
// Character timezone
elseif (isset($this->timezone[strtoupper($match[11])]))
{
$timezone = $this->timezone[strtoupper($match[11])];
}
// Assume everything else to be -0000
else
{
$timezone = 0;
}
// Deal with 2/3 digit years
if ($match[4] < 50)
{
$match[4] += 2000;
}
elseif ($match[4] < 1000)
{
$match[4] += 1900;
}
// Second is optional, if it is empty set it to zero
if ($match[7] !== '')
{
$second = $match[7];
}
else
{
$second = 0;
}
return gmmktime($match[5], $match[6], $second, $month, $match[2], $match[4]) - $timezone;
}
else
{
return false;
}
}
/**
* Parse RFC850's date format
*
* @access protected
* @return int Timestamp
*/
public function date_rfc850($date)
{
static $pcre;
if (!$pcre)
{
$space = '[\x09\x20]+';
$day_name = $this->day_pcre;
$month = $this->month_pcre;
$day = '([0-9]{1,2})';
$year = $hour = $minute = $second = '([0-9]{2})';
$zone = '([A-Z]{1,5})';
$pcre = '/^' . $day_name . ',' . $space . $day . '-' . $month . '-' . $year . $space . $hour . ':' . $minute . ':' . $second . $space . $zone . '$/i';
}
if (preg_match($pcre, $date, $match))
{
/*
Capturing subpatterns:
1: Day name
2: Day
3: Month
4: Year
5: Hour
6: Minute
7: Second
8: Timezone
*/
// Month
$month = $this->month[strtolower($match[3])];
// Character timezone
if (isset($this->timezone[strtoupper($match[8])]))
{
$timezone = $this->timezone[strtoupper($match[8])];
}
// Assume everything else to be -0000
else
{
$timezone = 0;
}
// Deal with 2 digit year
if ($match[4] < 50)
{
$match[4] += 2000;
}
else
{
$match[4] += 1900;
}
return gmmktime($match[5], $match[6], $match[7], $month, $match[2], $match[4]) - $timezone;
}
else
{
return false;
}
}
/**
* Parse C99's asctime()'s date format
*
* @access protected
* @return int Timestamp
*/
public function date_asctime($date)
{
static $pcre;
if (!$pcre)
{
$space = '[\x09\x20]+';
$wday_name = $this->day_pcre;
$mon_name = $this->month_pcre;
$day = '([0-9]{1,2})';
$hour = $sec = $min = '([0-9]{2})';
$year = '([0-9]{4})';
$terminator = '\x0A?\x00?';
$pcre = '/^' . $wday_name . $space . $mon_name . $space . $day . $space . $hour . ':' . $min . ':' . $sec . $space . $year . $terminator . '$/i';
}
if (preg_match($pcre, $date, $match))
{
/*
Capturing subpatterns:
1: Day name
2: Month
3: Day
4: Hour
5: Minute
6: Second
7: Year
*/
$month = $this->month[strtolower($match[2])];
return gmmktime($match[4], $match[5], $match[6], $month, $match[3], $match[7]);
}
else
{
return false;
}
}
/**
* Parse dates using strtotime()
*
* @access protected
* @return int Timestamp
*/
public function date_strtotime($date)
{
$strtotime = strtotime($date);
if ($strtotime === -1 || $strtotime === false)
{
return false;
}
else
{
return $strtotime;
}
}
}

View File

@ -0,0 +1,387 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
class SimplePie_Parser
{
var $error_code;
var $error_string;
var $current_line;
var $current_column;
var $current_byte;
var $separator = ' ';
var $namespace = array('');
var $element = array('');
var $xml_base = array('');
var $xml_base_explicit = array(false);
var $xml_lang = array('');
var $data = array();
var $datas = array(array());
var $current_xhtml_construct = -1;
var $encoding;
public function parse(&$data, $encoding)
{
// Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character
if (strtoupper($encoding) === 'US-ASCII')
{
$this->encoding = 'UTF-8';
}
else
{
$this->encoding = $encoding;
}
// Strip BOM:
// UTF-32 Big Endian BOM
if (substr($data, 0, 4) === "\x00\x00\xFE\xFF")
{
$data = substr($data, 4);
}
// UTF-32 Little Endian BOM
elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00")
{
$data = substr($data, 4);
}
// UTF-16 Big Endian BOM
elseif (substr($data, 0, 2) === "\xFE\xFF")
{
$data = substr($data, 2);
}
// UTF-16 Little Endian BOM
elseif (substr($data, 0, 2) === "\xFF\xFE")
{
$data = substr($data, 2);
}
// UTF-8 BOM
elseif (substr($data, 0, 3) === "\xEF\xBB\xBF")
{
$data = substr($data, 3);
}
if (substr($data, 0, 5) === '<?xml' && strspn(substr($data, 5, 1), "\x09\x0A\x0D\x20") && ($pos = strpos($data, '?>')) !== false)
{
$declaration = new SimplePie_XML_Declaration_Parser(substr($data, 5, $pos - 5));
if ($declaration->parse())
{
$data = substr($data, $pos + 2);
$data = '<?xml version="' . $declaration->version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' . $data;
}
else
{
$this->error_string = 'SimplePie bug! Please report this!';
return false;
}
}
$return = true;
static $xml_is_sane = null;
if ($xml_is_sane === null)
{
$parser_check = xml_parser_create();
xml_parse_into_struct($parser_check, '<foo>&amp;</foo>', $values);
xml_parser_free($parser_check);
$xml_is_sane = isset($values[0]['value']);
}
// Create the parser
if ($xml_is_sane)
{
$xml = xml_parser_create_ns($this->encoding, $this->separator);
xml_parser_set_option($xml, XML_OPTION_SKIP_WHITE, 1);
xml_parser_set_option($xml, XML_OPTION_CASE_FOLDING, 0);
xml_set_object($xml, $this);
xml_set_character_data_handler($xml, 'cdata');
xml_set_element_handler($xml, 'tag_open', 'tag_close');
// Parse!
if (!xml_parse($xml, $data, true))
{
$this->error_code = xml_get_error_code($xml);
$this->error_string = xml_error_string($this->error_code);
$return = false;
}
$this->current_line = xml_get_current_line_number($xml);
$this->current_column = xml_get_current_column_number($xml);
$this->current_byte = xml_get_current_byte_index($xml);
xml_parser_free($xml);
return $return;
}
else
{
libxml_clear_errors();
$xml = new XMLReader();
$xml->xml($data);
while (@$xml->read())
{
switch ($xml->nodeType)
{
case constant('XMLReader::END_ELEMENT'):
if ($xml->namespaceURI !== '')
{
$tagName = $xml->namespaceURI . $this->separator . $xml->localName;
}
else
{
$tagName = $xml->localName;
}
$this->tag_close(null, $tagName);
break;
case constant('XMLReader::ELEMENT'):
$empty = $xml->isEmptyElement;
if ($xml->namespaceURI !== '')
{
$tagName = $xml->namespaceURI . $this->separator . $xml->localName;
}
else
{
$tagName = $xml->localName;
}
$attributes = array();
while ($xml->moveToNextAttribute())
{
if ($xml->namespaceURI !== '')
{
$attrName = $xml->namespaceURI . $this->separator . $xml->localName;
}
else
{
$attrName = $xml->localName;
}
$attributes[$attrName] = $xml->value;
}
$this->tag_open(null, $tagName, $attributes);
if ($empty)
{
$this->tag_close(null, $tagName);
}
break;
case constant('XMLReader::TEXT'):
case constant('XMLReader::CDATA'):
$this->cdata(null, $xml->value);
break;
}
}
if ($error = libxml_get_last_error())
{
$this->error_code = $error->code;
$this->error_string = $error->message;
$this->current_line = $error->line;
$this->current_column = $error->column;
return false;
}
else
{
return true;
}
}
}
public function get_error_code()
{
return $this->error_code;
}
public function get_error_string()
{
return $this->error_string;
}
public function get_current_line()
{
return $this->current_line;
}
public function get_current_column()
{
return $this->current_column;
}
public function get_current_byte()
{
return $this->current_byte;
}
public function get_data()
{
return $this->data;
}
public function tag_open($parser, $tag, $attributes)
{
list($this->namespace[], $this->element[]) = $this->split_ns($tag);
$attribs = array();
foreach ($attributes as $name => $value)
{
list($attrib_namespace, $attribute) = $this->split_ns($name);
$attribs[$attrib_namespace][$attribute] = $value;
}
if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['base']))
{
$this->xml_base[] = SimplePie_Misc::absolutize_url($attribs[SIMPLEPIE_NAMESPACE_XML]['base'], end($this->xml_base));
$this->xml_base_explicit[] = true;
}
else
{
$this->xml_base[] = end($this->xml_base);
$this->xml_base_explicit[] = end($this->xml_base_explicit);
}
if (isset($attribs[SIMPLEPIE_NAMESPACE_XML]['lang']))
{
$this->xml_lang[] = $attribs[SIMPLEPIE_NAMESPACE_XML]['lang'];
}
else
{
$this->xml_lang[] = end($this->xml_lang);
}
if ($this->current_xhtml_construct >= 0)
{
$this->current_xhtml_construct++;
if (end($this->namespace) === SIMPLEPIE_NAMESPACE_XHTML)
{
$this->data['data'] .= '<' . end($this->element);
if (isset($attribs['']))
{
foreach ($attribs[''] as $name => $value)
{
$this->data['data'] .= ' ' . $name . '="' . htmlspecialchars($value, ENT_COMPAT, $this->encoding) . '"';
}
}
$this->data['data'] .= '>';
}
}
else
{
$this->datas[] =& $this->data;
$this->data =& $this->data['child'][end($this->namespace)][end($this->element)][];
$this->data = array('data' => '', 'attribs' => $attribs, 'xml_base' => end($this->xml_base), 'xml_base_explicit' => end($this->xml_base_explicit), 'xml_lang' => end($this->xml_lang));
if ((end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_03 && in_array(end($this->element), array('title', 'tagline', 'copyright', 'info', 'summary', 'content')) && isset($attribs['']['mode']) && $attribs['']['mode'] === 'xml')
|| (end($this->namespace) === SIMPLEPIE_NAMESPACE_ATOM_10 && in_array(end($this->element), array('rights', 'subtitle', 'summary', 'info', 'title', 'content')) && isset($attribs['']['type']) && $attribs['']['type'] === 'xhtml'))
{
$this->current_xhtml_construct = 0;
}
}
}
public function cdata($parser, $cdata)
{
if ($this->current_xhtml_construct >= 0)
{
$this->data['data'] .= htmlspecialchars($cdata, ENT_QUOTES, $this->encoding);
}
else
{
$this->data['data'] .= $cdata;
}
}
public function tag_close($parser, $tag)
{
if ($this->current_xhtml_construct >= 0)
{
$this->current_xhtml_construct--;
if (end($this->namespace) === SIMPLEPIE_NAMESPACE_XHTML && !in_array(end($this->element), array('area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param')))
{
$this->data['data'] .= '</' . end($this->element) . '>';
}
}
if ($this->current_xhtml_construct === -1)
{
$this->data =& $this->datas[count($this->datas) - 1];
array_pop($this->datas);
}
array_pop($this->element);
array_pop($this->namespace);
array_pop($this->xml_base);
array_pop($this->xml_base_explicit);
array_pop($this->xml_lang);
}
public function split_ns($string)
{
static $cache = array();
if (!isset($cache[$string]))
{
if ($pos = strpos($string, $this->separator))
{
static $separator_length;
if (!$separator_length)
{
$separator_length = strlen($this->separator);
}
$namespace = substr($string, 0, $pos);
$local_name = substr($string, $pos + $separator_length);
if (strtolower($namespace) === SIMPLEPIE_NAMESPACE_ITUNES)
{
$namespace = SIMPLEPIE_NAMESPACE_ITUNES;
}
// Normalize the Media RSS namespaces
if ($namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG ||
$namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG2 ||
$namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG3 ||
$namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG4 ||
$namespace === SIMPLEPIE_NAMESPACE_MEDIARSS_WRONG5 )
{
$namespace = SIMPLEPIE_NAMESPACE_MEDIARSS;
}
$cache[$string] = array($namespace, $local_name);
}
else
{
$cache[$string] = array('', $string);
}
}
return $cache[$string];
}
}

View File

@ -0,0 +1,88 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
class SimplePie_Rating
{
var $scheme;
var $value;
// Constructor, used to input the data
public function __construct($scheme = null, $value = null)
{
$this->scheme = $scheme;
$this->value = $value;
}
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
public function get_scheme()
{
if ($this->scheme !== null)
{
return $this->scheme;
}
else
{
return null;
}
}
public function get_value()
{
if ($this->value !== null)
{
return $this->value;
}
else
{
return null;
}
}
}

View File

@ -0,0 +1,102 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
class SimplePie_Restriction
{
var $relationship;
var $type;
var $value;
// Constructor, used to input the data
public function __construct($relationship = null, $type = null, $value = null)
{
$this->relationship = $relationship;
$this->type = $type;
$this->value = $value;
}
public function __toString()
{
// There is no $this->data here
return md5(serialize($this));
}
public function get_relationship()
{
if ($this->relationship !== null)
{
return $this->relationship;
}
else
{
return null;
}
}
public function get_type()
{
if ($this->type !== null)
{
return $this->type;
}
else
{
return null;
}
}
public function get_value()
{
if ($this->value !== null)
{
return $this->value;
}
else
{
return null;
}
}
}

View File

@ -0,0 +1,400 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* @todo Move to using an actual HTML parser (this will allow tags to be properly stripped, and to switch between HTML and XHTML), this will also make it easier to shorten a string while preserving HTML tags
*/
class SimplePie_Sanitize
{
// Private vars
var $base;
// Options
var $remove_div = true;
var $image_handler = '';
var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style');
var $encode_instead_of_strip = false;
var $strip_attributes = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc');
var $strip_comments = false;
var $output_encoding = 'UTF-8';
var $enable_cache = true;
var $cache_location = './cache';
var $cache_name_function = 'md5';
var $cache_class = 'SimplePie_Cache';
var $file_class = 'SimplePie_File';
var $timeout = 10;
var $useragent = '';
var $force_fsockopen = false;
var $replace_url_attributes = array(
'a' => 'href',
'area' => 'href',
'blockquote' => 'cite',
'del' => 'cite',
'form' => 'action',
'img' => array('longdesc', 'src'),
'input' => 'src',
'ins' => 'cite',
'q' => 'cite'
);
public function remove_div($enable = true)
{
$this->remove_div = (bool) $enable;
}
public function set_image_handler($page = false)
{
if ($page)
{
$this->image_handler = (string) $page;
}
else
{
$this->image_handler = false;
}
}
public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie_Cache')
{
if (isset($enable_cache))
{
$this->enable_cache = (bool) $enable_cache;
}
if ($cache_location)
{
$this->cache_location = (string) $cache_location;
}
if ($cache_name_function)
{
$this->cache_name_function = (string) $cache_name_function;
}
if ($cache_class)
{
$this->cache_class = (string) $cache_class;
}
}
public function pass_file_data($file_class = 'SimplePie_File', $timeout = 10, $useragent = '', $force_fsockopen = false)
{
if ($file_class)
{
$this->file_class = (string) $file_class;
}
if ($timeout)
{
$this->timeout = (string) $timeout;
}
if ($useragent)
{
$this->useragent = (string) $useragent;
}
if ($force_fsockopen)
{
$this->force_fsockopen = (string) $force_fsockopen;
}
}
public function strip_htmltags($tags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'))
{
if ($tags)
{
if (is_array($tags))
{
$this->strip_htmltags = $tags;
}
else
{
$this->strip_htmltags = explode(',', $tags);
}
}
else
{
$this->strip_htmltags = false;
}
}
public function encode_instead_of_strip($encode = false)
{
$this->encode_instead_of_strip = (bool) $encode;
}
public function strip_attributes($attribs = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'))
{
if ($attribs)
{
if (is_array($attribs))
{
$this->strip_attributes = $attribs;
}
else
{
$this->strip_attributes = explode(',', $attribs);
}
}
else
{
$this->strip_attributes = false;
}
}
public function strip_comments($strip = false)
{
$this->strip_comments = (bool) $strip;
}
public function set_output_encoding($encoding = 'UTF-8')
{
$this->output_encoding = (string) $encoding;
}
/**
* Set element/attribute key/value pairs of HTML attributes
* containing URLs that need to be resolved relative to the feed
*
* @access public
* @since 1.0
* @param array $element_attribute Element/attribute key/value pairs
*/
public function set_url_replacements($element_attribute = array('a' => 'href', 'area' => 'href', 'blockquote' => 'cite', 'del' => 'cite', 'form' => 'action', 'img' => array('longdesc', 'src'), 'input' => 'src', 'ins' => 'cite', 'q' => 'cite'))
{
$this->replace_url_attributes = (array) $element_attribute;
}
public function sanitize($data, $type, $base = '')
{
$data = trim($data);
if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI)
{
if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML)
{
if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data))
{
$type |= SIMPLEPIE_CONSTRUCT_HTML;
}
else
{
$type |= SIMPLEPIE_CONSTRUCT_TEXT;
}
}
if ($type & SIMPLEPIE_CONSTRUCT_BASE64)
{
$data = base64_decode($data);
}
if ($type & SIMPLEPIE_CONSTRUCT_XHTML)
{
if ($this->remove_div)
{
$data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data);
$data = preg_replace('/<\/div>$/', '', $data);
}
else
{
$data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data);
}
}
if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML))
{
// Strip comments
if ($this->strip_comments)
{
$data = SimplePie_Misc::strip_comments($data);
}
// Strip out HTML tags and attributes that might cause various security problems.
// Based on recommendations by Mark Pilgrim at:
// http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely
if ($this->strip_htmltags)
{
foreach ($this->strip_htmltags as $tag)
{
$pcre = "/<($tag)" . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . "(>(.*)<\/$tag" . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>|(\/)?>)/siU';
while (preg_match($pcre, $data))
{
$data = preg_replace_callback($pcre, array(&$this, 'do_strip_htmltags'), $data);
}
}
}
if ($this->strip_attributes)
{
foreach ($this->strip_attributes as $attrib)
{
$data = preg_replace('/(<[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*)' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . trim($attrib) . '(?:\s*=\s*(?:"(?:[^"]*)"|\'(?:[^\']*)\'|(?:[^\x09\x0A\x0B\x0C\x0D\x20\x22\x27\x3E][^\x09\x0A\x0B\x0C\x0D\x20\x3E]*)?))?' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>/', '\1\2\3>', $data);
}
}
// Replace relative URLs
$this->base = $base;
foreach ($this->replace_url_attributes as $element => $attributes)
{
$data = $this->replace_urls($data, $element, $attributes);
}
// If image handling (caching, etc.) is enabled, cache and rewrite all the image tags.
if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache)
{
$images = SimplePie_Misc::get_element('img', $data);
foreach ($images as $img)
{
if (isset($img['attribs']['src']['data']))
{
$image_url = call_user_func($this->cache_name_function, $img['attribs']['src']['data']);
$cache = call_user_func(array($this->cache_class, 'create'), $this->cache_location, $image_url, 'spi');
if ($cache->load())
{
$img['attribs']['src']['data'] = $this->image_handler . $image_url;
$data = str_replace($img['full'], SimplePie_Misc::element_implode($img), $data);
}
else
{
$file = new $this->file_class($img['attribs']['src']['data'], $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen);
$headers = $file->headers;
if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300)))
{
if ($cache->save(array('headers' => $file->headers, 'body' => $file->body)))
{
$img['attribs']['src']['data'] = $this->image_handler . $image_url;
$data = str_replace($img['full'], SimplePie_Misc::element_implode($img), $data);
}
else
{
trigger_error("$this->cache_location is not writeable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
}
}
}
}
}
}
// Having (possibly) taken stuff out, there may now be whitespace at the beginning/end of the data
$data = trim($data);
}
if ($type & SIMPLEPIE_CONSTRUCT_IRI)
{
$data = SimplePie_Misc::absolutize_url($data, $base);
}
if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI))
{
$data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8');
}
if ($this->output_encoding !== 'UTF-8')
{
$data = SimplePie_Misc::change_encoding($data, 'UTF-8', $this->output_encoding);
}
}
return $data;
}
public function replace_urls($data, $tag, $attributes)
{
if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags))
{
$elements = SimplePie_Misc::get_element($tag, $data);
foreach ($elements as $element)
{
if (is_array($attributes))
{
foreach ($attributes as $attribute)
{
if (isset($element['attribs'][$attribute]['data']))
{
$element['attribs'][$attribute]['data'] = SimplePie_Misc::absolutize_url($element['attribs'][$attribute]['data'], $this->base);
$new_element = SimplePie_Misc::element_implode($element);
$data = str_replace($element['full'], $new_element, $data);
$element['full'] = $new_element;
}
}
}
elseif (isset($element['attribs'][$attributes]['data']))
{
$element['attribs'][$attributes]['data'] = SimplePie_Misc::absolutize_url($element['attribs'][$attributes]['data'], $this->base);
$data = str_replace($element['full'], SimplePie_Misc::element_implode($element), $data);
}
}
}
return $data;
}
public function do_strip_htmltags($match)
{
if ($this->encode_instead_of_strip)
{
if (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
{
$match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8');
$match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8');
return "&lt;$match[1]$match[2]&gt;$match[3]&lt;/$match[1]&gt;";
}
else
{
return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8');
}
}
elseif (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
{
return $match[4];
}
else
{
return '';
}
}
}

View File

@ -0,0 +1,597 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
class SimplePie_Source
{
var $item;
var $data = array();
public function __construct($item, $data)
{
$this->item = $item;
$this->data = $data;
}
public function __toString()
{
return md5(serialize($this->data));
}
public function get_source_tags($namespace, $tag)
{
if (isset($this->data['child'][$namespace][$tag]))
{
return $this->data['child'][$namespace][$tag];
}
else
{
return null;
}
}
public function get_base($element = array())
{
return $this->item->get_base($element);
}
public function sanitize($data, $type, $base = '')
{
return $this->item->sanitize($data, $type, $base);
}
public function get_item()
{
return $this->item;
}
public function get_title()
{
if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'title'))
{
return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_10_construct_type($return[0]['attribs']), $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'title'))
{
return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_03_construct_type($return[0]['attribs']), $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'title'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_MAYBE_HTML, $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_090, 'title'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_MAYBE_HTML, $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'title'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_MAYBE_HTML, $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'title'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'title'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
else
{
return null;
}
}
public function get_category($key = 0)
{
$categories = $this->get_categories();
if (isset($categories[$key]))
{
return $categories[$key];
}
else
{
return null;
}
}
public function get_categories()
{
$categories = array();
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'category') as $category)
{
$term = null;
$scheme = null;
$label = null;
if (isset($category['attribs']['']['term']))
{
$term = $this->sanitize($category['attribs']['']['term'], SIMPLEPIE_CONSTRUCT_TEXT);
}
if (isset($category['attribs']['']['scheme']))
{
$scheme = $this->sanitize($category['attribs']['']['scheme'], SIMPLEPIE_CONSTRUCT_TEXT);
}
if (isset($category['attribs']['']['label']))
{
$label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_TEXT);
}
$categories[] = new $this->item->feed->category_class($term, $scheme, $label);
}
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'category') as $category)
{
// This is really the label, but keep this as the term also for BC.
// Label will also work on retrieving because that falls back to term.
$term = $this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT);
if (isset($category['attribs']['']['domain']))
{
$scheme = $this->sanitize($category['attribs']['']['domain'], SIMPLEPIE_CONSTRUCT_TEXT);
}
else
{
$scheme = null;
}
$categories[] = new $this->item->feed->category_class($term, $scheme, null);
}
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'subject') as $category)
{
$categories[] = new $this->item->feed->category_class($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null);
}
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'subject') as $category)
{
$categories[] = new $this->item->feed->category_class($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null);
}
if (!empty($categories))
{
return SimplePie_Misc::array_unique($categories);
}
else
{
return null;
}
}
public function get_author($key = 0)
{
$authors = $this->get_authors();
if (isset($authors[$key]))
{
return $authors[$key];
}
else
{
return null;
}
}
public function get_authors()
{
$authors = array();
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'author') as $author)
{
$name = null;
$uri = null;
$email = null;
if (isset($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data']))
{
$name = $this->sanitize($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
if (isset($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]['data']))
{
$uri = $this->sanitize($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]));
}
if (isset($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['email'][0]['data']))
{
$email = $this->sanitize($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['email'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
if ($name !== null || $email !== null || $uri !== null)
{
$authors[] = new $this->item->feed->author_class($name, $uri, $email);
}
}
if ($author = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'author'))
{
$name = null;
$url = null;
$email = null;
if (isset($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['name'][0]['data']))
{
$name = $this->sanitize($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['name'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
if (isset($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['url'][0]['data']))
{
$url = $this->sanitize($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['url'][0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['url'][0]));
}
if (isset($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['email'][0]['data']))
{
$email = $this->sanitize($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['email'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
if ($name !== null || $email !== null || $url !== null)
{
$authors[] = new $this->item->feed->author_class($name, $url, $email);
}
}
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'creator') as $author)
{
$authors[] = new $this->item->feed->author_class($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null);
}
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'creator') as $author)
{
$authors[] = new $this->item->feed->author_class($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null);
}
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'author') as $author)
{
$authors[] = new $this->item->feed->author_class($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null);
}
if (!empty($authors))
{
return SimplePie_Misc::array_unique($authors);
}
else
{
return null;
}
}
public function get_contributor($key = 0)
{
$contributors = $this->get_contributors();
if (isset($contributors[$key]))
{
return $contributors[$key];
}
else
{
return null;
}
}
public function get_contributors()
{
$contributors = array();
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'contributor') as $contributor)
{
$name = null;
$uri = null;
$email = null;
if (isset($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data']))
{
$name = $this->sanitize($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
if (isset($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]['data']))
{
$uri = $this->sanitize($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]));
}
if (isset($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['email'][0]['data']))
{
$email = $this->sanitize($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['email'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
if ($name !== null || $email !== null || $uri !== null)
{
$contributors[] = new $this->item->feed->author_class($name, $uri, $email);
}
}
foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'contributor') as $contributor)
{
$name = null;
$url = null;
$email = null;
if (isset($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['name'][0]['data']))
{
$name = $this->sanitize($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['name'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
if (isset($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['url'][0]['data']))
{
$url = $this->sanitize($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['url'][0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['url'][0]));
}
if (isset($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['email'][0]['data']))
{
$email = $this->sanitize($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['email'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
if ($name !== null || $email !== null || $url !== null)
{
$contributors[] = new $this->item->feed->author_class($name, $url, $email);
}
}
if (!empty($contributors))
{
return SimplePie_Misc::array_unique($contributors);
}
else
{
return null;
}
}
public function get_link($key = 0, $rel = 'alternate')
{
$links = $this->get_links($rel);
if (isset($links[$key]))
{
return $links[$key];
}
else
{
return null;
}
}
/**
* Added for parity between the parent-level and the item/entry-level.
*/
public function get_permalink()
{
return $this->get_link(0);
}
public function get_links($rel = 'alternate')
{
if (!isset($this->data['links']))
{
$this->data['links'] = array();
if ($links = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'link'))
{
foreach ($links as $link)
{
if (isset($link['attribs']['']['href']))
{
$link_rel = (isset($link['attribs']['']['rel'])) ? $link['attribs']['']['rel'] : 'alternate';
$this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($link));
}
}
}
if ($links = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'link'))
{
foreach ($links as $link)
{
if (isset($link['attribs']['']['href']))
{
$link_rel = (isset($link['attribs']['']['rel'])) ? $link['attribs']['']['rel'] : 'alternate';
$this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($link));
}
}
}
if ($links = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'link'))
{
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($links[0]));
}
if ($links = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_090, 'link'))
{
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($links[0]));
}
if ($links = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'link'))
{
$this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($links[0]));
}
$keys = array_keys($this->data['links']);
foreach ($keys as $key)
{
if (SimplePie_Misc::is_isegment_nz_nc($key))
{
if (isset($this->data['links'][SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY . $key]))
{
$this->data['links'][SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY . $key] = array_merge($this->data['links'][$key], $this->data['links'][SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY . $key]);
$this->data['links'][$key] =& $this->data['links'][SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY . $key];
}
else
{
$this->data['links'][SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY . $key] =& $this->data['links'][$key];
}
}
elseif (substr($key, 0, 41) === SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY)
{
$this->data['links'][substr($key, 41)] =& $this->data['links'][$key];
}
$this->data['links'][$key] = array_unique($this->data['links'][$key]);
}
}
if (isset($this->data['links'][$rel]))
{
return $this->data['links'][$rel];
}
else
{
return null;
}
}
public function get_description()
{
if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'subtitle'))
{
return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_10_construct_type($return[0]['attribs']), $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'tagline'))
{
return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_03_construct_type($return[0]['attribs']), $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'description'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_MAYBE_HTML, $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_090, 'description'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_MAYBE_HTML, $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'description'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_MAYBE_HTML, $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'description'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'description'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'summary'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_HTML, $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'subtitle'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_HTML, $this->get_base($return[0]));
}
else
{
return null;
}
}
public function get_copyright()
{
if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'rights'))
{
return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_10_construct_type($return[0]['attribs']), $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'copyright'))
{
return $this->sanitize($return[0]['data'], SimplePie_Misc::atom_03_construct_type($return[0]['attribs']), $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'copyright'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'rights'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'rights'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
else
{
return null;
}
}
public function get_language()
{
if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'language'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'language'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'language'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
}
elseif (isset($this->data['xml_lang']))
{
return $this->sanitize($this->data['xml_lang'], SIMPLEPIE_CONSTRUCT_TEXT);
}
else
{
return null;
}
}
public function get_latitude()
{
if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_W3C_BASIC_GEO, 'lat'))
{
return (float) $return[0]['data'];
}
elseif (($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_GEORSS, 'point')) && preg_match('/^((?:-)?[0-9]+(?:\.[0-9]+)) ((?:-)?[0-9]+(?:\.[0-9]+))$/', trim($return[0]['data']), $match))
{
return (float) $match[1];
}
else
{
return null;
}
}
public function get_longitude()
{
if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_W3C_BASIC_GEO, 'long'))
{
return (float) $return[0]['data'];
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_W3C_BASIC_GEO, 'lon'))
{
return (float) $return[0]['data'];
}
elseif (($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_GEORSS, 'point')) && preg_match('/^((?:-)?[0-9]+(?:\.[0-9]+)) ((?:-)?[0-9]+(?:\.[0-9]+))$/', trim($return[0]['data']), $match))
{
return (float) $match[2];
}
else
{
return null;
}
}
public function get_image_url()
{
if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'image'))
{
return $this->sanitize($return[0]['attribs']['']['href'], SIMPLEPIE_CONSTRUCT_IRI);
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'logo'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($return[0]));
}
elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'icon'))
{
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($return[0]));
}
else
{
return null;
}
}
}

View File

@ -0,0 +1,362 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* Parses the XML Declaration
*
* @package SimplePie
*/
class SimplePie_XML_Declaration_Parser
{
/**
* XML Version
*
* @access public
* @var string
*/
var $version = '1.0';
/**
* Encoding
*
* @access public
* @var string
*/
var $encoding = 'UTF-8';
/**
* Standalone
*
* @access public
* @var bool
*/
var $standalone = false;
/**
* Current state of the state machine
*
* @access private
* @var string
*/
var $state = 'before_version_name';
/**
* Input data
*
* @access private
* @var string
*/
var $data = '';
/**
* Input data length (to avoid calling strlen() everytime this is needed)
*
* @access private
* @var int
*/
var $data_length = 0;
/**
* Current position of the pointer
*
* @var int
* @access private
*/
var $position = 0;
/**
* Create an instance of the class with the input data
*
* @access public
* @param string $data Input data
*/
public function __construct($data)
{
$this->data = $data;
$this->data_length = strlen($this->data);
}
/**
* Parse the input data
*
* @access public
* @return bool true on success, false on failure
*/
public function parse()
{
while ($this->state && $this->state !== 'emit' && $this->has_data())
{
$state = $this->state;
$this->$state();
}
$this->data = '';
if ($this->state === 'emit')
{
return true;
}
else
{
$this->version = '';
$this->encoding = '';
$this->standalone = '';
return false;
}
}
/**
* Check whether there is data beyond the pointer
*
* @access private
* @return bool true if there is further data, false if not
*/
public function has_data()
{
return (bool) ($this->position < $this->data_length);
}
/**
* Advance past any whitespace
*
* @return int Number of whitespace characters passed
*/
public function skip_whitespace()
{
$whitespace = strspn($this->data, "\x09\x0A\x0D\x20", $this->position);
$this->position += $whitespace;
return $whitespace;
}
/**
* Read value
*/
public function get_value()
{
$quote = substr($this->data, $this->position, 1);
if ($quote === '"' || $quote === "'")
{
$this->position++;
$len = strcspn($this->data, $quote, $this->position);
if ($this->has_data())
{
$value = substr($this->data, $this->position, $len);
$this->position += $len + 1;
return $value;
}
}
return false;
}
public function before_version_name()
{
if ($this->skip_whitespace())
{
$this->state = 'version_name';
}
else
{
$this->state = false;
}
}
public function version_name()
{
if (substr($this->data, $this->position, 7) === 'version')
{
$this->position += 7;
$this->skip_whitespace();
$this->state = 'version_equals';
}
else
{
$this->state = false;
}
}
public function version_equals()
{
if (substr($this->data, $this->position, 1) === '=')
{
$this->position++;
$this->skip_whitespace();
$this->state = 'version_value';
}
else
{
$this->state = false;
}
}
public function version_value()
{
if ($this->version = $this->get_value())
{
$this->skip_whitespace();
if ($this->has_data())
{
$this->state = 'encoding_name';
}
else
{
$this->state = 'emit';
}
}
else
{
$this->state = false;
}
}
public function encoding_name()
{
if (substr($this->data, $this->position, 8) === 'encoding')
{
$this->position += 8;
$this->skip_whitespace();
$this->state = 'encoding_equals';
}
else
{
$this->state = 'standalone_name';
}
}
public function encoding_equals()
{
if (substr($this->data, $this->position, 1) === '=')
{
$this->position++;
$this->skip_whitespace();
$this->state = 'encoding_value';
}
else
{
$this->state = false;
}
}
public function encoding_value()
{
if ($this->encoding = $this->get_value())
{
$this->skip_whitespace();
if ($this->has_data())
{
$this->state = 'standalone_name';
}
else
{
$this->state = 'emit';
}
}
else
{
$this->state = false;
}
}
public function standalone_name()
{
if (substr($this->data, $this->position, 10) === 'standalone')
{
$this->position += 10;
$this->skip_whitespace();
$this->state = 'standalone_equals';
}
else
{
$this->state = false;
}
}
public function standalone_equals()
{
if (substr($this->data, $this->position, 1) === '=')
{
$this->position++;
$this->skip_whitespace();
$this->state = 'standalone_value';
}
else
{
$this->state = false;
}
}
public function standalone_value()
{
if ($standalone = $this->get_value())
{
switch ($standalone)
{
case 'yes':
$this->standalone = true;
break;
case 'no':
$this->standalone = false;
break;
default:
$this->state = false;
return;
}
$this->skip_whitespace();
if ($this->has_data())
{
$this->state = false;
}
else
{
$this->state = 'emit';
}
}
else
{
$this->state = false;
}
}
}

View File

@ -0,0 +1,355 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
/**
* gzdecode
*
* @package SimplePie
*/
class SimplePie_gzdecode
{
/**
* Compressed data
*
* @access private
* @see gzdecode::$data
*/
var $compressed_data;
/**
* Size of compressed data
*
* @access private
*/
var $compressed_size;
/**
* Minimum size of a valid gzip string
*
* @access private
*/
var $min_compressed_size = 18;
/**
* Current position of pointer
*
* @access private
*/
var $position = 0;
/**
* Flags (FLG)
*
* @access private
*/
var $flags;
/**
* Uncompressed data
*
* @access public
* @see gzdecode::$compressed_data
*/
var $data;
/**
* Modified time
*
* @access public
*/
var $MTIME;
/**
* Extra Flags
*
* @access public
*/
var $XFL;
/**
* Operating System
*
* @access public
*/
var $OS;
/**
* Subfield ID 1
*
* @access public
* @see gzdecode::$extra_field
* @see gzdecode::$SI2
*/
var $SI1;
/**
* Subfield ID 2
*
* @access public
* @see gzdecode::$extra_field
* @see gzdecode::$SI1
*/
var $SI2;
/**
* Extra field content
*
* @access public
* @see gzdecode::$SI1
* @see gzdecode::$SI2
*/
var $extra_field;
/**
* Original filename
*
* @access public
*/
var $filename;
/**
* Human readable comment
*
* @access public
*/
var $comment;
/**
* Don't allow anything to be set
*
* @access public
*/
public function __set($name, $value)
{
trigger_error("Cannot write property $name", E_USER_ERROR);
}
/**
* Set the compressed string and related properties
*
* @access public
*/
public function __construct($data)
{
$this->compressed_data = $data;
$this->compressed_size = strlen($data);
}
/**
* Decode the GZIP stream
*
* @access public
*/
public function parse()
{
if ($this->compressed_size >= $this->min_compressed_size)
{
// Check ID1, ID2, and CM
if (substr($this->compressed_data, 0, 3) !== "\x1F\x8B\x08")
{
return false;
}
// Get the FLG (FLaGs)
$this->flags = ord($this->compressed_data[3]);
// FLG bits above (1 << 4) are reserved
if ($this->flags > 0x1F)
{
return false;
}
// Advance the pointer after the above
$this->position += 4;
// MTIME
$mtime = substr($this->compressed_data, $this->position, 4);
// Reverse the string if we're on a big-endian arch because l is the only signed long and is machine endianness
if (current(unpack('S', "\x00\x01")) === 1)
{
$mtime = strrev($mtime);
}
$this->MTIME = current(unpack('l', $mtime));
$this->position += 4;
// Get the XFL (eXtra FLags)
$this->XFL = ord($this->compressed_data[$this->position++]);
// Get the OS (Operating System)
$this->OS = ord($this->compressed_data[$this->position++]);
// Parse the FEXTRA
if ($this->flags & 4)
{
// Read subfield IDs
$this->SI1 = $this->compressed_data[$this->position++];
$this->SI2 = $this->compressed_data[$this->position++];
// SI2 set to zero is reserved for future use
if ($this->SI2 === "\x00")
{
return false;
}
// Get the length of the extra field
$len = current(unpack('v', substr($this->compressed_data, $this->position, 2)));
$this->position += 2;
// Check the length of the string is still valid
$this->min_compressed_size += $len + 4;
if ($this->compressed_size >= $this->min_compressed_size)
{
// Set the extra field to the given data
$this->extra_field = substr($this->compressed_data, $this->position, $len);
$this->position += $len;
}
else
{
return false;
}
}
// Parse the FNAME
if ($this->flags & 8)
{
// Get the length of the filename
$len = strcspn($this->compressed_data, "\x00", $this->position);
// Check the length of the string is still valid
$this->min_compressed_size += $len + 1;
if ($this->compressed_size >= $this->min_compressed_size)
{
// Set the original filename to the given string
$this->filename = substr($this->compressed_data, $this->position, $len);
$this->position += $len + 1;
}
else
{
return false;
}
}
// Parse the FCOMMENT
if ($this->flags & 16)
{
// Get the length of the comment
$len = strcspn($this->compressed_data, "\x00", $this->position);
// Check the length of the string is still valid
$this->min_compressed_size += $len + 1;
if ($this->compressed_size >= $this->min_compressed_size)
{
// Set the original comment to the given string
$this->comment = substr($this->compressed_data, $this->position, $len);
$this->position += $len + 1;
}
else
{
return false;
}
}
// Parse the FHCRC
if ($this->flags & 2)
{
// Check the length of the string is still valid
$this->min_compressed_size += $len + 2;
if ($this->compressed_size >= $this->min_compressed_size)
{
// Read the CRC
$crc = current(unpack('v', substr($this->compressed_data, $this->position, 2)));
// Check the CRC matches
if ((crc32(substr($this->compressed_data, 0, $this->position)) & 0xFFFF) === $crc)
{
$this->position += 2;
}
else
{
return false;
}
}
else
{
return false;
}
}
// Decompress the actual data
if (($this->data = gzinflate(substr($this->compressed_data, $this->position, -8))) === false)
{
return false;
}
else
{
$this->position = $this->compressed_size - 8;
}
// Check CRC of data
$crc = current(unpack('V', substr($this->compressed_data, $this->position, 4)));
$this->position += 4;
/*if (extension_loaded('hash') && sprintf('%u', current(unpack('V', hash('crc32b', $this->data)))) !== sprintf('%u', $crc))
{
return false;
}*/
// Check ISIZE of data
$isize = current(unpack('V', substr($this->compressed_data, $this->position, 4)));
$this->position += 4;
if (sprintf('%u', strlen($this->data) & 0xFFFFFFFF) !== sprintf('%u', $isize))
{
return false;
}
// Wow, against all odds, we've actually got a valid gzip string
return true;
}
else
{
return false;
}
}
}

View File

@ -0,0 +1,80 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3-dev
* @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @todo phpDoc comments
*/
// autoloader
spl_autoload_register(array(new SimplePie_Autoloader(), 'autoload'));
/**
* SimplePie Autoloader class.
*
* @package SimplePie
*/
class SimplePie_Autoloader
{
/**
* Constructor.
*/
public function __construct()
{
$this->path = dirname(__FILE__);
}
/**
* Autoloader.
*
* @param string $class The name of the class to attempt to load.
*/
public function autoload($class)
{
// see if this request should be handled by this autoloader
if (strpos($class, 'SimplePie') !== 0) {
return;
}
$filename = $this->path . DIRECTORY_SEPARATOR . str_replace('_', DIRECTORY_SEPARATOR, $class) . '.php';
include $filename;
}
}

6
inc/3rdparty/site_config/README.txt vendored Normal file
View File

@ -0,0 +1,6 @@
Full-Text RSS Site Patterns
---------------------------
Site patterns allow you to specify what should be extracted from specific sites.
Please see http://help.fivefilters.org/customer/portal/articles/223153-site-patterns for more information.

View File

@ -0,0 +1,7 @@
title: //title
body: //div[@class='post-content']
prune: no
tidy: no
test_url: http://www.inthepoche.com/?post/poche-hosting

3
inc/3rdparty/site_config/index.php vendored Normal file
View File

@ -0,0 +1,3 @@
<?php
// this is here to prevent directory listing over the web
?>

View File

@ -0,0 +1,19 @@
title: //h1[@id='firstHeading']
body: //div[@id = 'bodyContent']
strip_id_or_class: editsection
#strip_id_or_class: toc
strip_id_or_class: vertical-navbox
strip: //table[@id='toc']
strip: //div[@id='catlinks']
strip: //div[@id='jump-to-nav']
strip: //div[@class='thumbcaption']//div[@class='magnify']
strip: //table[@class='navbox']
strip: //table[contains(@class, 'infobox')]
strip: //div[@class='dablink']
strip: //div[@id='contentSub']
strip: //table[contains(@class, 'metadata')]
strip: //*[contains(@class, 'noprint')]
strip: //span[@title='pronunciation:']
prune: no
tidy: no
test_url: http://en.wikipedia.org/wiki/Christopher_Lloyd

View File

@ -0,0 +1,3 @@
<?php
// this is here to prevent directory listing over the web
?>

View File

@ -0,0 +1,2 @@
<?php
return 1;

View File

@ -156,36 +156,31 @@ class Poche
switch ($action)
{
case 'add':
if($parametres_url = $url->fetchContent()) {
if ($this->store->add($url->getUrl(), $parametres_url['title'], $parametres_url['content'], $this->user->getId())) {
Tools::logm('add link ' . $url->getUrl());
$sequence = '';
if (STORAGE == 'postgres') {
$sequence = 'entries_id_seq';
}
$last_id = $this->store->getLastId($sequence);
if (DOWNLOAD_PICTURES) {
$content = filtre_picture($parametres_url['content'], $url->getUrl(), $last_id);
Tools::logm('updating content article');
$this->store->updateContent($last_id, $content, $this->user->getId());
}
if (!$import) {
$this->messages->add('s', _('the link has been added successfully'));
}
$content = $url->extract();
if ($this->store->add($url->getUrl(), $content['title'], $content['body'], $this->user->getId())) {
Tools::logm('add link ' . $url->getUrl());
$sequence = '';
if (STORAGE == 'postgres') {
$sequence = 'entries_id_seq';
}
else {
if (!$import) {
$this->messages->add('e', _('error during insertion : the link wasn\'t added'));
Tools::logm('error during insertion : the link wasn\'t added ' . $url->getUrl());
}
$last_id = $this->store->getLastId($sequence);
if (DOWNLOAD_PICTURES) {
$content = filtre_picture($parametres_url['body'], $url->getUrl(), $last_id);
Tools::logm('updating content article');
$this->store->updateContent($last_id, $content, $this->user->getId());
}
if (!$import) {
$this->messages->add('s', _('the link has been added successfully'));
}
}
else {
if (!$import) {
$this->messages->add('e', _('error during fetching content : the link wasn\'t added'));
Tools::logm('error during content fetch ' . $url->getUrl());
$this->messages->add('e', _('error during insertion : the link wasn\'t added'));
Tools::logm('error during insertion : the link wasn\'t added ' . $url->getUrl());
}
}
if (!$import) {
Tools::redirect();
}
@ -220,7 +215,6 @@ class Poche
}
break;
default:
Tools::logm('action ' . $action . 'doesn\'t exist');
break;
}
}

View File

@ -12,6 +12,45 @@ class Url
{
public $url;
private $fingerprints = array(
// Posterous
'<meta name="generator" content="Posterous"' => array('hostname'=>'fingerprint.posterous.com', 'head'=>true),
// Blogger
'<meta content=\'blogger\' name=\'generator\'' => array('hostname'=>'fingerprint.blogspot.com', 'head'=>true),
'<meta name="generator" content="Blogger"' => array('hostname'=>'fingerprint.blogspot.com', 'head'=>true),
// WordPress (self-hosted and hosted)
'<meta name="generator" content="WordPress' => array('hostname'=>'fingerprint.wordpress.com', 'head'=>true)
);
private $user_agents = array( 'lifehacker.com' => 'PHP/5.2',
'gawker.com' => 'PHP/5.2',
'deadspin.com' => 'PHP/5.2',
'kotaku.com' => 'PHP/5.2',
'jezebel.com' => 'PHP/5.2',
'io9.com' => 'PHP/5.2',
'jalopnik.com' => 'PHP/5.2',
'gizmodo.com' => 'PHP/5.2',
'.wikipedia.org' => 'Mozilla/5.2'
);
private $content_type_exc = array(
'application/pdf' => array('action'=>'link', 'name'=>'PDF'),
'image' => array('action'=>'link', 'name'=>'Image'),
'audio' => array('action'=>'link', 'name'=>'Audio'),
'video' => array('action'=>'link', 'name'=>'Video')
);
private $rewrite_url = array(
// Rewrite public Google Docs URLs to point to HTML view:
// if a URL contains docs.google.com, replace /Doc? with /View?
'docs.google.com' => array('/Doc?' => '/View?'),
'tnr.com' => array('tnr.com/article/' => 'tnr.com/print/article/'),
'.m.wikipedia.org' => array('.m.wikipedia.org' => '.wikipedia.org')
);
private $rewrite_relative_urls = true;
private $error_message = '[unable to retrieve full-text content]';
function __construct($url)
{
$this->url = base64_decode($url);
@ -25,76 +64,332 @@ class Url
$this->url = $url;
}
public function isCorrect()
{
public function isCorrect() {
return filter_var($this->url, FILTER_VALIDATE_URL) !== FALSE;
}
public function clean()
{
$url = html_entity_decode(trim($this->url));
public function extract() {
global $http, $extractor;
$extractor = new ContentExtractor(dirname(__FILE__).'/../3rdparty/site_config/custom', dirname(__FILE__).'/../3rdparty/site_config/standard');
$extractor->fingerprints = $this->fingerprints;
$stuff = strpos($url,'&utm_source=');
if ($stuff !== FALSE)
$url = substr($url, 0, $stuff);
$stuff = strpos($url,'?utm_source=');
if ($stuff !== FALSE)
$url = substr($url, 0, $stuff);
$stuff = strpos($url,'#xtor=RSS-');
if ($stuff !== FALSE)
$url = substr($url, 0, $stuff);
$http = new HumbleHttpAgent();
$http->userAgentMap = $this->user_agents;
$http->headerOnlyTypes = array_keys($this->content_type_exc);
$http->rewriteUrls = $this->rewrite_url;
$http->userAgentDefault = HumbleHttpAgent::UA_PHP;
// configure SimplePie HTTP extension class to use our HumbleHttpAgent instance
SimplePie_HumbleHttpAgent::set_agent($http);
$feed = new SimplePie();
// some feeds use the text/html content type - force_feed tells SimplePie to process anyway
$feed->force_feed(true);
$feed->set_file_class('SimplePie_HumbleHttpAgent');
$feed->feed_url = $this->url;
$feed->set_autodiscovery_level(SIMPLEPIE_LOCATOR_NONE);
$feed->set_timeout(20);
$feed->enable_cache(false);
$feed->set_stupidly_fast(true);
$feed->enable_order_by_date(false); // we don't want to do anything to the feed
$feed->set_url_replacements(array());
// initialise the feed
// the @ suppresses notices which on some servers causes a 500 internal server error
$result = @$feed->init();
if ($result && (!is_array($feed->data) || count($feed->data) == 0)) {
die('Sorry, no feed items found');
}
// from now on, we'll identify ourselves as a browser
$http->userAgentDefault = HumbleHttpAgent::UA_BROWSER;
unset($feed, $result);
$this->url = $url;
$feed = new DummySingleItemFeed($this->url);
$items = $feed->get_items(0, 1);
// Request all feed items in parallel (if supported)
$urls_sanitized = array();
$urls = array();
foreach ($items as $key => $item) {
$permalink = htmlspecialchars_decode($item->get_permalink());
// Colons in URL path segments get encoded by SimplePie, yet some sites expect them unencoded
$permalink = str_replace('%3A', ':', $permalink);
if ($permalink) {
$urls_sanitized[] = $permalink;
}
$urls[$key] = $permalink;
}
$http->fetchAll($urls_sanitized);
foreach ($items as $key => $item) {
$do_content_extraction = true;
$extract_result = false;
$permalink = $urls[$key];
// TODO: Allow error codes - some sites return correct content with error status
// e.g. prospectmagazine.co.uk returns 403
if ($permalink && ($response = $http->get($permalink, true)) && ($response['status_code'] < 300 || $response['status_code'] > 400)) {
$effective_url = $response['effective_url'];
// check if action defined for returned Content-Type
$type = null;
if (preg_match('!^Content-Type:\s*(([-\w]+)/([-\w\+]+))!im', $response['headers'], $match)) {
// look for full mime type (e.g. image/jpeg) or just type (e.g. image)
$match[1] = strtolower(trim($match[1]));
$match[2] = strtolower(trim($match[2]));
foreach (array($match[1], $match[2]) as $_mime) {
if (isset($this->content_type_exc[$_mime])) {
$type = $match[1];
$_act = $this->content_type_exc[$_mime]['action'];
$_name = $this->content_type_exc[$_mime]['name'];
if ($_act == 'exclude') {
continue 2; // skip this feed item entry
} elseif ($_act == 'link') {
if ($match[2] == 'image') {
$html = "<a href=\"$effective_url\"><img src=\"$effective_url\" alt=\"$_name\" /></a>";
} else {
$html = "<a href=\"$effective_url\">Download $_name</a>";
}
$title = $_name;
$do_content_extraction = false;
break;
}
}
}
unset($_mime, $_act, $_name, $match);
}
if ($do_content_extraction) {
$html = $response['body'];
// remove strange things
$html = str_replace('</[>', '', $html);
$html = $this->convert_to_utf8($html, $response['headers']);
// check site config for single page URL - fetch it if found
if ($single_page_response = $this->getSinglePage($item, $html, $effective_url)) {
$html = $single_page_response['body'];
// remove strange things
$html = str_replace('</[>', '', $html);
$html = $this->convert_to_utf8($html, $single_page_response['headers']);
$effective_url = $single_page_response['effective_url'];
unset($single_page_response);
}
$extract_result = $extractor->process($html, $effective_url);
$readability = $extractor->readability;
$content_block = ($extract_result) ? $extractor->getContent() : null;
}
}
if ($do_content_extraction) {
// if we failed to extract content...
if (!$extract_result) {
$html = $this->error_message;
// keep the original item description
$html .= $item->get_description();
} else {
$readability->clean($content_block, 'select');
if ($this->rewrite_relative_urls) $this->makeAbsolute($effective_url, $content_block);
if ($content_block->childNodes->length == 1 && $content_block->firstChild->nodeType === XML_ELEMENT_NODE) {
$html = $content_block->firstChild->innerHTML;
} else {
$html = $content_block->innerHTML;
}
// post-processing cleanup
$html = preg_replace('!<p>[\s\h\v]*</p>!u', '', $html);
}
}
}
$title = ($extractor->getTitle() != '' ? $extractor->getTitle() : _('Untitled'));
$content = array ('title' => $title, 'body' => $html);
return $content;
}
public function fetchContent()
private function convert_to_utf8($html, $header=null)
{
if ($this->isCorrect()) {
$this->clean();
$html = Encoding::toUTF8(Tools::getFile($this->getUrl()));
# if Tools::getFile() if not able to retrieve HTTPS content, try the same URL with HTTP protocol
if (!preg_match('!^https?://!i', $this->getUrl()) && (!isset($html) || strlen($html) <= 0)) {
$this->setUrl('http://' . $this->getUrl());
$html = Encoding::toUTF8(Tools::getFile($this->getUrl()));
$encoding = null;
if ($html || $header) {
if (is_array($header)) $header = implode("\n", $header);
if (!$header || !preg_match_all('/^Content-Type:\s+([^;]+)(?:;\s*charset=["\']?([^;"\'\n]*))?/im', $header, $match, PREG_SET_ORDER)) {
// error parsing the response
} else {
$match = end($match); // get last matched element (in case of redirects)
if (isset($match[2])) $encoding = trim($match[2], "\"' \r\n\0\x0B\t");
}
if (function_exists('tidy_parse_string')) {
$tidy = tidy_parse_string($html, array(), 'UTF8');
$tidy->cleanRepair();
//Warning: tidy might fail so, ensure there is still a content
$body = $tidy->body();
//hasChildren does not seem to work, just check the string
//returned (and do not forget to clean the white spaces)
if (preg_replace('/\s+/', '', $body->value) !== "<body></body>") {
$html = $tidy->value;
// TODO: check to see if encoding is supported (can we convert it?)
// If it's not, result will be empty string.
// For now we'll check for invalid encoding types returned by some sites, e.g. 'none'
// Problem URL: http://facta.co.jp/blog/archives/20111026001026.html
if (!$encoding || $encoding == 'none') {
// search for encoding in HTML - only look at the first 35000 characters
$html_head = substr($html, 0, 40000);
if (preg_match('/^<\?xml\s+version=(?:"[^"]*"|\'[^\']*\')\s+encoding=("[^"]*"|\'[^\']*\')/s', $html_head, $match)) {
$encoding = trim($match[1], '"\'');
} elseif (preg_match('/<meta\s+http-equiv=["\']?Content-Type["\']? content=["\'][^;]+;\s*charset=["\']?([^;"\'>]+)/i', $html_head, $match)) {
$encoding = trim($match[1]);
} elseif (preg_match_all('/<meta\s+([^>]+)>/i', $html_head, $match)) {
foreach ($match[1] as $_test) {
if (preg_match('/charset=["\']?([^"\']+)/i', $_test, $_m)) {
$encoding = trim($_m[1]);
break;
}
}
}
}
$parameters = array();
if (isset($html) and strlen($html) > 0)
{
$readability = new Readability($html, $this->getUrl());
$readability->convertLinksToFootnotes = CONVERT_LINKS_FOOTNOTES;
$readability->revertForcedParagraphElements = REVERT_FORCED_PARAGRAPH_ELEMENTS;
if($readability->init())
{
$content = $readability->articleContent->innerHTML;
$parameters['title'] = ($readability->articleTitle->innerHTML != '' ? $readability->articleTitle->innerHTML : _('Untitled'));
$parameters['content'] = $content;
return $parameters;
if (isset($encoding)) $encoding = trim($encoding);
// trim is important here!
if (!$encoding || (strtolower($encoding) == 'iso-8859-1')) {
// replace MS Word smart qutoes
$trans = array();
$trans[chr(130)] = '&sbquo;'; // Single Low-9 Quotation Mark
$trans[chr(131)] = '&fnof;'; // Latin Small Letter F With Hook
$trans[chr(132)] = '&bdquo;'; // Double Low-9 Quotation Mark
$trans[chr(133)] = '&hellip;'; // Horizontal Ellipsis
$trans[chr(134)] = '&dagger;'; // Dagger
$trans[chr(135)] = '&Dagger;'; // Double Dagger
$trans[chr(136)] = '&circ;'; // Modifier Letter Circumflex Accent
$trans[chr(137)] = '&permil;'; // Per Mille Sign
$trans[chr(138)] = '&Scaron;'; // Latin Capital Letter S With Caron
$trans[chr(139)] = '&lsaquo;'; // Single Left-Pointing Angle Quotation Mark
$trans[chr(140)] = '&OElig;'; // Latin Capital Ligature OE
$trans[chr(145)] = '&lsquo;'; // Left Single Quotation Mark
$trans[chr(146)] = '&rsquo;'; // Right Single Quotation Mark
$trans[chr(147)] = '&ldquo;'; // Left Double Quotation Mark
$trans[chr(148)] = '&rdquo;'; // Right Double Quotation Mark
$trans[chr(149)] = '&bull;'; // Bullet
$trans[chr(150)] = '&ndash;'; // En Dash
$trans[chr(151)] = '&mdash;'; // Em Dash
$trans[chr(152)] = '&tilde;'; // Small Tilde
$trans[chr(153)] = '&trade;'; // Trade Mark Sign
$trans[chr(154)] = '&scaron;'; // Latin Small Letter S With Caron
$trans[chr(155)] = '&rsaquo;'; // Single Right-Pointing Angle Quotation Mark
$trans[chr(156)] = '&oelig;'; // Latin Small Ligature OE
$trans[chr(159)] = '&Yuml;'; // Latin Capital Letter Y With Diaeresis
$html = strtr($html, $trans);
}
if (!$encoding) {
$encoding = 'utf-8';
} else {
if (strtolower($encoding) != 'utf-8') {
$html = SimplePie_Misc::change_encoding($html, $encoding, 'utf-8');
/*
if (function_exists('iconv')) {
// iconv appears to handle certain character encodings better than mb_convert_encoding
$html = iconv($encoding, 'utf-8', $html);
} else {
$html = mb_convert_encoding($html, 'utf-8', $encoding);
}
*/
}
}
}
else {
#$msg->add('e', _('error during url preparation : the link is not valid'));
Tools::logm($this->getUrl() . ' is not a valid url');
}
return $html;
}
return FALSE;
private function makeAbsolute($base, $elem) {
$base = new SimplePie_IRI($base);
// remove '//' in URL path (used to prevent URLs from resolving properly)
// TODO: check if this is still the case
if (isset($base->path)) $base->path = preg_replace('!//+!', '/', $base->path);
foreach(array('a'=>'href', 'img'=>'src') as $tag => $attr) {
$elems = $elem->getElementsByTagName($tag);
for ($i = $elems->length-1; $i >= 0; $i--) {
$e = $elems->item($i);
//$e->parentNode->replaceChild($articleContent->ownerDocument->createTextNode($e->textContent), $e);
$this->makeAbsoluteAttr($base, $e, $attr);
}
if (strtolower($elem->tagName) == $tag) $this->makeAbsoluteAttr($base, $elem, $attr);
}
}
private function makeAbsoluteAttr($base, $e, $attr) {
if ($e->hasAttribute($attr)) {
// Trim leading and trailing white space. I don't really like this but
// unfortunately it does appear on some sites. e.g. <img src=" /path/to/image.jpg" />
$url = trim(str_replace('%20', ' ', $e->getAttribute($attr)));
$url = str_replace(' ', '%20', $url);
if (!preg_match('!https?://!i', $url)) {
if ($absolute = SimplePie_IRI::absolutize($base, $url)) {
$e->setAttribute($attr, $absolute);
}
}
}
}
private function makeAbsoluteStr($base, $url) {
$base = new SimplePie_IRI($base);
// remove '//' in URL path (causes URLs not to resolve properly)
if (isset($base->path)) $base->path = preg_replace('!//+!', '/', $base->path);
if (preg_match('!^https?://!i', $url)) {
// already absolute
return $url;
} else {
if ($absolute = SimplePie_IRI::absolutize($base, $url)) {
return $absolute;
}
return false;
}
}
// returns single page response, or false if not found
private function getSinglePage($item, $html, $url) {
global $http, $extractor;
$host = @parse_url($url, PHP_URL_HOST);
$site_config = SiteConfig::build($host);
if ($site_config === false) {
// check for fingerprints
if (!empty($extractor->fingerprints) && ($_fphost = $extractor->findHostUsingFingerprints($html))) {
$site_config = SiteConfig::build($_fphost);
}
if ($site_config === false) $site_config = new SiteConfig();
SiteConfig::add_to_cache($host, $site_config);
return false;
} else {
SiteConfig::add_to_cache($host, $site_config);
}
$splink = null;
if (!empty($site_config->single_page_link)) {
$splink = $site_config->single_page_link;
} elseif (!empty($site_config->single_page_link_in_feed)) {
// single page link xpath is targeted at feed
$splink = $site_config->single_page_link_in_feed;
// so let's replace HTML with feed item description
$html = $item->get_description();
}
if (isset($splink)) {
// Build DOM tree from HTML
$readability = new Readability($html, $url);
$xpath = new DOMXPath($readability->dom);
// Loop through single_page_link xpath expressions
$single_page_url = null;
foreach ($splink as $pattern) {
$elems = @$xpath->evaluate($pattern, $readability->dom);
if (is_string($elems)) {
$single_page_url = trim($elems);
break;
} elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
foreach ($elems as $item) {
if ($item instanceof DOMElement && $item->hasAttribute('href')) {
$single_page_url = $item->getAttribute('href');
break;
} elseif ($item instanceof DOMAttr && $item->value) {
$single_page_url = $item->value;
break;
}
}
}
}
// If we've got URL, resolve against $url
if (isset($single_page_url) && ($single_page_url = $this->makeAbsoluteStr($url, $single_page_url))) {
// check it's not what we have already!
if ($single_page_url != $url) {
// it's not, so let's try to fetch it...
$_prev_ref = $http->referer;
$http->referer = $single_page_url;
if (($response = $http->get($single_page_url, true)) && $response['status_code'] < 300) {
$http->referer = $_prev_ref;
return $response;
}
$http->referer = $_prev_ref;
}
}
}
return false;
}
}

View File

@ -30,10 +30,25 @@ require_once __DIR__ . '/../../inc/3rdparty/simple_html_dom.php';
require_once __DIR__ . '/../../inc/3rdparty/paginator.php';
require_once __DIR__ . '/../../inc/3rdparty/Session.class.php';
require_once __DIR__ . '/../../inc/3rdparty/simplepie/SimplePieAutoloader.php';
require_once __DIR__ . '/../../inc/3rdparty/simplepie/SimplePie/Core.php';
require_once __DIR__ . '/../../inc/3rdparty/content-extractor/ContentExtractor.php';
require_once __DIR__ . '/../../inc/3rdparty/content-extractor/SiteConfig.php';
require_once __DIR__ . '/../../inc/3rdparty/humble-http-agent/HumbleHttpAgent.php';
require_once __DIR__ . '/../../inc/3rdparty/humble-http-agent/SimplePie_HumbleHttpAgent.php';
require_once __DIR__ . '/../../inc/3rdparty/humble-http-agent/CookieJar.php';
require_once __DIR__ . '/../../inc/3rdparty/feedwriter/FeedItem.php';
require_once __DIR__ . '/../../inc/3rdparty/feedwriter/FeedWriter.php';
require_once __DIR__ . '/../../inc/3rdparty/feedwriter/DummySingleItemFeed.php';
if (DOWNLOAD_PICTURES) {
require_once __DIR__ . '/../../inc/poche/pochePictures.php';
}
if (!ini_get('date.timezone') || !@date_default_timezone_set(ini_get('date.timezone'))) {
date_default_timezone_set('UTC');
}
$poche = new Poche();
#XSRF protection with token
// if (!empty($_POST)) {

View File

@ -17,8 +17,6 @@ define ('STORAGE_PASSWORD', 'postgres'); # leave blank for sqlite
define ('MODE_DEMO', FALSE);
define ('DEBUG_POCHE', FALSE);
define ('CONVERT_LINKS_FOOTNOTES', FALSE);
define ('REVERT_FORCED_PARAGRAPH_ELEMENTS', FALSE);
define ('DOWNLOAD_PICTURES', FALSE);
define ('SHARE_TWITTER', TRUE);
define ('SHARE_MAIL', TRUE);