/', '', $data);
+ $data = preg_replace('/<\/div>$/', '', $data);
+ }
+ else
+ {
+ $data = preg_replace('/^
/', '
', $data);
+ }
+ }
+
+ if ($type & SIMPLEPIE_CONSTRUCT_IRI)
+ {
+ $absolute = $this->registry->call('Misc', 'absolutize_url', array($data, $base));
+ if ($absolute !== false)
+ {
+ $data = $absolute;
+ }
+ }
+
+ if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI))
+ {
+ $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8');
+ }
+
+ if ($this->output_encoding !== 'UTF-8')
+ {
+ $data = $this->registry->call('Misc', 'change_encoding', array($data, 'UTF-8', $this->output_encoding));
+ }
+ }
+ return $data;
+ }
+
+ protected function preprocess($html, $type)
+ {
+ $ret = '';
+ if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML)
+ {
+ // Atom XHTML constructs are wrapped with a div by default
+ // Note: No protection if $html contains a stray
!
+ $html = '
' . $html . '
';
+ $ret .= '';
+ $content_type = 'text/html';
+ }
+ else
+ {
+ $ret .= '';
+ $content_type = 'application/xhtml+xml';
+ }
+
+ $ret .= '';
+ $ret .= '
';
+ $ret .= '' . $html . '';
+ return $ret;
+ }
+
+ public function replace_urls($document, $tag, $attributes)
+ {
+ if (!is_array($attributes))
+ {
+ $attributes = array($attributes);
+ }
+
+ if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags))
+ {
+ $elements = $document->getElementsByTagName($tag);
+ foreach ($elements as $element)
+ {
+ foreach ($attributes as $attribute)
+ {
+ if ($element->hasAttribute($attribute))
+ {
+ $value = $this->registry->call('Misc', 'absolutize_url', array($element->getAttribute($attribute), $this->base));
+ if ($value !== false)
+ {
+ $element->setAttribute($attribute, $value);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ public function do_strip_htmltags($match)
+ {
+ if ($this->encode_instead_of_strip)
+ {
+ if (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
+ {
+ $match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8');
+ $match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8');
+ return "<$match[1]$match[2]>$match[3]</$match[1]>";
+ }
+ else
+ {
+ return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8');
+ }
+ }
+ elseif (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
+ {
+ return $match[4];
+ }
+ else
+ {
+ return '';
+ }
+ }
+
+ protected function strip_tag($tag, $document, $type)
+ {
+ $xpath = new DOMXPath($document);
+ $elements = $xpath->query('body//' . $tag);
+ if ($this->encode_instead_of_strip)
+ {
+ foreach ($elements as $element)
+ {
+ $fragment = $document->createDocumentFragment();
+
+ // For elements which aren't script or style, include the tag itself
+ if (!in_array($tag, array('script', 'style')))
+ {
+ $text = '<' . $tag;
+ if ($element->hasAttributes())
+ {
+ $attrs = array();
+ foreach ($element->attributes as $name => $attr)
+ {
+ $value = $attr->value;
+
+ // In XHTML, empty values should never exist, so we repeat the value
+ if (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_XHTML))
+ {
+ $value = $name;
+ }
+ // For HTML, empty is fine
+ elseif (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_HTML))
+ {
+ $attrs[] = $name;
+ continue;
+ }
+
+ // Standard attribute text
+ $attrs[] = $name . '="' . $attr->value . '"';
+ }
+ $text .= ' ' . implode(' ', $attrs);
+ }
+ $text .= '>';
+ $fragment->appendChild(new DOMText($text));
+ }
+
+ $number = $element->childNodes->length;
+ for ($i = $number; $i > 0; $i--)
+ {
+ $child = $element->childNodes->item(0);
+ $fragment->appendChild($child);
+ }
+
+ if (!in_array($tag, array('script', 'style')))
+ {
+ $fragment->appendChild(new DOMText('' . $tag . '>'));
+ }
+
+ $element->parentNode->replaceChild($fragment, $element);
+ }
+
+ return;
+ }
+ elseif (in_array($tag, array('script', 'style')))
+ {
+ foreach ($elements as $element)
+ {
+ $element->parentNode->removeChild($element);
+ }
+
+ return;
+ }
+ else
+ {
+ foreach ($elements as $element)
+ {
+ $fragment = $document->createDocumentFragment();
+ $number = $element->childNodes->length;
+ for ($i = $number; $i > 0; $i--)
+ {
+ $child = $element->childNodes->item(0);
+ $fragment->appendChild($child);
+ }
+
+ $element->parentNode->replaceChild($fragment, $element);
+ }
+ }
+ }
+
+ protected function strip_attr($attrib, $document)
+ {
+ $xpath = new DOMXPath($document);
+ $elements = $xpath->query('//*[@' . $attrib . ']');
+
+ foreach ($elements as $element)
+ {
+ $element->removeAttribute($attrib);
+ }
+ }
+}
diff --git a/vendor/full-text-rss/libraries/simplepie/library/SimplePie/Source.php b/vendor/full-text-rss/libraries/simplepie/library/SimplePie/Source.php
new file mode 100644
index 0000000..51d8e6c
--- /dev/null
+++ b/vendor/full-text-rss/libraries/simplepie/library/SimplePie/Source.php
@@ -0,0 +1,611 @@
+`
+ *
+ * Used by {@see SimplePie_Item::get_source()}
+ *
+ * This class can be overloaded with {@see SimplePie::set_source_class()}
+ *
+ * @package SimplePie
+ * @subpackage API
+ */
+class SimplePie_Source
+{
+ var $item;
+ var $data = array();
+ protected $registry;
+
+ public function __construct($item, $data)
+ {
+ $this->item = $item;
+ $this->data = $data;
+ }
+
+ public function set_registry(SimplePie_Registry $registry)
+ {
+ $this->registry = $registry;
+ }
+
+ public function __toString()
+ {
+ return md5(serialize($this->data));
+ }
+
+ public function get_source_tags($namespace, $tag)
+ {
+ if (isset($this->data['child'][$namespace][$tag]))
+ {
+ return $this->data['child'][$namespace][$tag];
+ }
+ else
+ {
+ return null;
+ }
+ }
+
+ public function get_base($element = array())
+ {
+ return $this->item->get_base($element);
+ }
+
+ public function sanitize($data, $type, $base = '')
+ {
+ return $this->item->sanitize($data, $type, $base);
+ }
+
+ public function get_item()
+ {
+ return $this->item;
+ }
+
+ public function get_title()
+ {
+ if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'title'))
+ {
+ return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_10_construct_type', array($return[0]['attribs'])), $this->get_base($return[0]));
+ }
+ elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'title'))
+ {
+ return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($return[0]['attribs'])), $this->get_base($return[0]));
+ }
+ elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'title'))
+ {
+ return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_MAYBE_HTML, $this->get_base($return[0]));
+ }
+ elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_090, 'title'))
+ {
+ return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_MAYBE_HTML, $this->get_base($return[0]));
+ }
+ elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'title'))
+ {
+ return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_MAYBE_HTML, $this->get_base($return[0]));
+ }
+ elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'title'))
+ {
+ return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
+ }
+ elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'title'))
+ {
+ return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
+ }
+ else
+ {
+ return null;
+ }
+ }
+
+ public function get_category($key = 0)
+ {
+ $categories = $this->get_categories();
+ if (isset($categories[$key]))
+ {
+ return $categories[$key];
+ }
+ else
+ {
+ return null;
+ }
+ }
+
+ public function get_categories()
+ {
+ $categories = array();
+
+ foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'category') as $category)
+ {
+ $term = null;
+ $scheme = null;
+ $label = null;
+ if (isset($category['attribs']['']['term']))
+ {
+ $term = $this->sanitize($category['attribs']['']['term'], SIMPLEPIE_CONSTRUCT_TEXT);
+ }
+ if (isset($category['attribs']['']['scheme']))
+ {
+ $scheme = $this->sanitize($category['attribs']['']['scheme'], SIMPLEPIE_CONSTRUCT_TEXT);
+ }
+ if (isset($category['attribs']['']['label']))
+ {
+ $label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_TEXT);
+ }
+ $categories[] = $this->registry->create('Category', array($term, $scheme, $label));
+ }
+ foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'category') as $category)
+ {
+ // This is really the label, but keep this as the term also for BC.
+ // Label will also work on retrieving because that falls back to term.
+ $term = $this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT);
+ if (isset($category['attribs']['']['domain']))
+ {
+ $scheme = $this->sanitize($category['attribs']['']['domain'], SIMPLEPIE_CONSTRUCT_TEXT);
+ }
+ else
+ {
+ $scheme = null;
+ }
+ $categories[] = $this->registry->create('Category', array($term, $scheme, null));
+ }
+ foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'subject') as $category)
+ {
+ $categories[] = $this->registry->create('Category', array($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null));
+ }
+ foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'subject') as $category)
+ {
+ $categories[] = $this->registry->create('Category', array($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null));
+ }
+
+ if (!empty($categories))
+ {
+ return array_unique($categories);
+ }
+ else
+ {
+ return null;
+ }
+ }
+
+ public function get_author($key = 0)
+ {
+ $authors = $this->get_authors();
+ if (isset($authors[$key]))
+ {
+ return $authors[$key];
+ }
+ else
+ {
+ return null;
+ }
+ }
+
+ public function get_authors()
+ {
+ $authors = array();
+ foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'author') as $author)
+ {
+ $name = null;
+ $uri = null;
+ $email = null;
+ if (isset($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data']))
+ {
+ $name = $this->sanitize($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
+ }
+ if (isset($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]['data']))
+ {
+ $uri = $this->sanitize($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]));
+ }
+ if (isset($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['email'][0]['data']))
+ {
+ $email = $this->sanitize($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['email'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
+ }
+ if ($name !== null || $email !== null || $uri !== null)
+ {
+ $authors[] = $this->registry->create('Author', array($name, $uri, $email));
+ }
+ }
+ if ($author = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'author'))
+ {
+ $name = null;
+ $url = null;
+ $email = null;
+ if (isset($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['name'][0]['data']))
+ {
+ $name = $this->sanitize($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['name'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
+ }
+ if (isset($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['url'][0]['data']))
+ {
+ $url = $this->sanitize($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['url'][0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['url'][0]));
+ }
+ if (isset($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['email'][0]['data']))
+ {
+ $email = $this->sanitize($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['email'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
+ }
+ if ($name !== null || $email !== null || $url !== null)
+ {
+ $authors[] = $this->registry->create('Author', array($name, $url, $email));
+ }
+ }
+ foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'creator') as $author)
+ {
+ $authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null));
+ }
+ foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'creator') as $author)
+ {
+ $authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null));
+ }
+ foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'author') as $author)
+ {
+ $authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null));
+ }
+
+ if (!empty($authors))
+ {
+ return array_unique($authors);
+ }
+ else
+ {
+ return null;
+ }
+ }
+
+ public function get_contributor($key = 0)
+ {
+ $contributors = $this->get_contributors();
+ if (isset($contributors[$key]))
+ {
+ return $contributors[$key];
+ }
+ else
+ {
+ return null;
+ }
+ }
+
+ public function get_contributors()
+ {
+ $contributors = array();
+ foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'contributor') as $contributor)
+ {
+ $name = null;
+ $uri = null;
+ $email = null;
+ if (isset($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data']))
+ {
+ $name = $this->sanitize($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
+ }
+ if (isset($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]['data']))
+ {
+ $uri = $this->sanitize($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]));
+ }
+ if (isset($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['email'][0]['data']))
+ {
+ $email = $this->sanitize($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['email'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
+ }
+ if ($name !== null || $email !== null || $uri !== null)
+ {
+ $contributors[] = $this->registry->create('Author', array($name, $uri, $email));
+ }
+ }
+ foreach ((array) $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'contributor') as $contributor)
+ {
+ $name = null;
+ $url = null;
+ $email = null;
+ if (isset($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['name'][0]['data']))
+ {
+ $name = $this->sanitize($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['name'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
+ }
+ if (isset($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['url'][0]['data']))
+ {
+ $url = $this->sanitize($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['url'][0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['url'][0]));
+ }
+ if (isset($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['email'][0]['data']))
+ {
+ $email = $this->sanitize($contributor['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['email'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
+ }
+ if ($name !== null || $email !== null || $url !== null)
+ {
+ $contributors[] = $this->registry->create('Author', array($name, $url, $email));
+ }
+ }
+
+ if (!empty($contributors))
+ {
+ return array_unique($contributors);
+ }
+ else
+ {
+ return null;
+ }
+ }
+
+ public function get_link($key = 0, $rel = 'alternate')
+ {
+ $links = $this->get_links($rel);
+ if (isset($links[$key]))
+ {
+ return $links[$key];
+ }
+ else
+ {
+ return null;
+ }
+ }
+
+ /**
+ * Added for parity between the parent-level and the item/entry-level.
+ */
+ public function get_permalink()
+ {
+ return $this->get_link(0);
+ }
+
+ public function get_links($rel = 'alternate')
+ {
+ if (!isset($this->data['links']))
+ {
+ $this->data['links'] = array();
+ if ($links = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'link'))
+ {
+ foreach ($links as $link)
+ {
+ if (isset($link['attribs']['']['href']))
+ {
+ $link_rel = (isset($link['attribs']['']['rel'])) ? $link['attribs']['']['rel'] : 'alternate';
+ $this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($link));
+ }
+ }
+ }
+ if ($links = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'link'))
+ {
+ foreach ($links as $link)
+ {
+ if (isset($link['attribs']['']['href']))
+ {
+ $link_rel = (isset($link['attribs']['']['rel'])) ? $link['attribs']['']['rel'] : 'alternate';
+ $this->data['links'][$link_rel][] = $this->sanitize($link['attribs']['']['href'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($link));
+
+ }
+ }
+ }
+ if ($links = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'link'))
+ {
+ $this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($links[0]));
+ }
+ if ($links = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_090, 'link'))
+ {
+ $this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($links[0]));
+ }
+ if ($links = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'link'))
+ {
+ $this->data['links']['alternate'][] = $this->sanitize($links[0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($links[0]));
+ }
+
+ $keys = array_keys($this->data['links']);
+ foreach ($keys as $key)
+ {
+ if ($this->registry->call('Misc', 'is_isegment_nz_nc', array($key)))
+ {
+ if (isset($this->data['links'][SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY . $key]))
+ {
+ $this->data['links'][SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY . $key] = array_merge($this->data['links'][$key], $this->data['links'][SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY . $key]);
+ $this->data['links'][$key] =& $this->data['links'][SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY . $key];
+ }
+ else
+ {
+ $this->data['links'][SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY . $key] =& $this->data['links'][$key];
+ }
+ }
+ elseif (substr($key, 0, 41) === SIMPLEPIE_IANA_LINK_RELATIONS_REGISTRY)
+ {
+ $this->data['links'][substr($key, 41)] =& $this->data['links'][$key];
+ }
+ $this->data['links'][$key] = array_unique($this->data['links'][$key]);
+ }
+ }
+
+ if (isset($this->data['links'][$rel]))
+ {
+ return $this->data['links'][$rel];
+ }
+ else
+ {
+ return null;
+ }
+ }
+
+ public function get_description()
+ {
+ if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'subtitle'))
+ {
+ return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_10_construct_type', array($return[0]['attribs'])), $this->get_base($return[0]));
+ }
+ elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'tagline'))
+ {
+ return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($return[0]['attribs'])), $this->get_base($return[0]));
+ }
+ elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'description'))
+ {
+ return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_MAYBE_HTML, $this->get_base($return[0]));
+ }
+ elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_090, 'description'))
+ {
+ return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_MAYBE_HTML, $this->get_base($return[0]));
+ }
+ elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'description'))
+ {
+ return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_MAYBE_HTML, $this->get_base($return[0]));
+ }
+ elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'description'))
+ {
+ return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
+ }
+ elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'description'))
+ {
+ return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
+ }
+ elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'summary'))
+ {
+ return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_HTML, $this->get_base($return[0]));
+ }
+ elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'subtitle'))
+ {
+ return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_HTML, $this->get_base($return[0]));
+ }
+ else
+ {
+ return null;
+ }
+ }
+
+ public function get_copyright()
+ {
+ if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'rights'))
+ {
+ return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_10_construct_type', array($return[0]['attribs'])), $this->get_base($return[0]));
+ }
+ elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'copyright'))
+ {
+ return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($return[0]['attribs'])), $this->get_base($return[0]));
+ }
+ elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'copyright'))
+ {
+ return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
+ }
+ elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'rights'))
+ {
+ return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
+ }
+ elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'rights'))
+ {
+ return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
+ }
+ else
+ {
+ return null;
+ }
+ }
+
+ public function get_language()
+ {
+ if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'language'))
+ {
+ return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
+ }
+ elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_11, 'language'))
+ {
+ return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
+ }
+ elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_DC_10, 'language'))
+ {
+ return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT);
+ }
+ elseif (isset($this->data['xml_lang']))
+ {
+ return $this->sanitize($this->data['xml_lang'], SIMPLEPIE_CONSTRUCT_TEXT);
+ }
+ else
+ {
+ return null;
+ }
+ }
+
+ public function get_latitude()
+ {
+ if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_W3C_BASIC_GEO, 'lat'))
+ {
+ return (float) $return[0]['data'];
+ }
+ elseif (($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_GEORSS, 'point')) && preg_match('/^((?:-)?[0-9]+(?:\.[0-9]+)) ((?:-)?[0-9]+(?:\.[0-9]+))$/', trim($return[0]['data']), $match))
+ {
+ return (float) $match[1];
+ }
+ else
+ {
+ return null;
+ }
+ }
+
+ public function get_longitude()
+ {
+ if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_W3C_BASIC_GEO, 'long'))
+ {
+ return (float) $return[0]['data'];
+ }
+ elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_W3C_BASIC_GEO, 'lon'))
+ {
+ return (float) $return[0]['data'];
+ }
+ elseif (($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_GEORSS, 'point')) && preg_match('/^((?:-)?[0-9]+(?:\.[0-9]+)) ((?:-)?[0-9]+(?:\.[0-9]+))$/', trim($return[0]['data']), $match))
+ {
+ return (float) $match[2];
+ }
+ else
+ {
+ return null;
+ }
+ }
+
+ public function get_image_url()
+ {
+ if ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'image'))
+ {
+ return $this->sanitize($return[0]['attribs']['']['href'], SIMPLEPIE_CONSTRUCT_IRI);
+ }
+ elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'logo'))
+ {
+ return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($return[0]));
+ }
+ elseif ($return = $this->get_source_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'icon'))
+ {
+ return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_IRI, $this->get_base($return[0]));
+ }
+ else
+ {
+ return null;
+ }
+ }
+}
+
diff --git a/vendor/full-text-rss/libraries/simplepie/library/SimplePie/XML/Declaration/Parser.php b/vendor/full-text-rss/libraries/simplepie/library/SimplePie/XML/Declaration/Parser.php
new file mode 100644
index 0000000..aec19f1
--- /dev/null
+++ b/vendor/full-text-rss/libraries/simplepie/library/SimplePie/XML/Declaration/Parser.php
@@ -0,0 +1,362 @@
+data = $data;
+ $this->data_length = strlen($this->data);
+ }
+
+ /**
+ * Parse the input data
+ *
+ * @access public
+ * @return bool true on success, false on failure
+ */
+ public function parse()
+ {
+ while ($this->state && $this->state !== 'emit' && $this->has_data())
+ {
+ $state = $this->state;
+ $this->$state();
+ }
+ $this->data = '';
+ if ($this->state === 'emit')
+ {
+ return true;
+ }
+ else
+ {
+ $this->version = '';
+ $this->encoding = '';
+ $this->standalone = '';
+ return false;
+ }
+ }
+
+ /**
+ * Check whether there is data beyond the pointer
+ *
+ * @access private
+ * @return bool true if there is further data, false if not
+ */
+ public function has_data()
+ {
+ return (bool) ($this->position < $this->data_length);
+ }
+
+ /**
+ * Advance past any whitespace
+ *
+ * @return int Number of whitespace characters passed
+ */
+ public function skip_whitespace()
+ {
+ $whitespace = strspn($this->data, "\x09\x0A\x0D\x20", $this->position);
+ $this->position += $whitespace;
+ return $whitespace;
+ }
+
+ /**
+ * Read value
+ */
+ public function get_value()
+ {
+ $quote = substr($this->data, $this->position, 1);
+ if ($quote === '"' || $quote === "'")
+ {
+ $this->position++;
+ $len = strcspn($this->data, $quote, $this->position);
+ if ($this->has_data())
+ {
+ $value = substr($this->data, $this->position, $len);
+ $this->position += $len + 1;
+ return $value;
+ }
+ }
+ return false;
+ }
+
+ public function before_version_name()
+ {
+ if ($this->skip_whitespace())
+ {
+ $this->state = 'version_name';
+ }
+ else
+ {
+ $this->state = false;
+ }
+ }
+
+ public function version_name()
+ {
+ if (substr($this->data, $this->position, 7) === 'version')
+ {
+ $this->position += 7;
+ $this->skip_whitespace();
+ $this->state = 'version_equals';
+ }
+ else
+ {
+ $this->state = false;
+ }
+ }
+
+ public function version_equals()
+ {
+ if (substr($this->data, $this->position, 1) === '=')
+ {
+ $this->position++;
+ $this->skip_whitespace();
+ $this->state = 'version_value';
+ }
+ else
+ {
+ $this->state = false;
+ }
+ }
+
+ public function version_value()
+ {
+ if ($this->version = $this->get_value())
+ {
+ $this->skip_whitespace();
+ if ($this->has_data())
+ {
+ $this->state = 'encoding_name';
+ }
+ else
+ {
+ $this->state = 'emit';
+ }
+ }
+ else
+ {
+ $this->state = false;
+ }
+ }
+
+ public function encoding_name()
+ {
+ if (substr($this->data, $this->position, 8) === 'encoding')
+ {
+ $this->position += 8;
+ $this->skip_whitespace();
+ $this->state = 'encoding_equals';
+ }
+ else
+ {
+ $this->state = 'standalone_name';
+ }
+ }
+
+ public function encoding_equals()
+ {
+ if (substr($this->data, $this->position, 1) === '=')
+ {
+ $this->position++;
+ $this->skip_whitespace();
+ $this->state = 'encoding_value';
+ }
+ else
+ {
+ $this->state = false;
+ }
+ }
+
+ public function encoding_value()
+ {
+ if ($this->encoding = $this->get_value())
+ {
+ $this->skip_whitespace();
+ if ($this->has_data())
+ {
+ $this->state = 'standalone_name';
+ }
+ else
+ {
+ $this->state = 'emit';
+ }
+ }
+ else
+ {
+ $this->state = false;
+ }
+ }
+
+ public function standalone_name()
+ {
+ if (substr($this->data, $this->position, 10) === 'standalone')
+ {
+ $this->position += 10;
+ $this->skip_whitespace();
+ $this->state = 'standalone_equals';
+ }
+ else
+ {
+ $this->state = false;
+ }
+ }
+
+ public function standalone_equals()
+ {
+ if (substr($this->data, $this->position, 1) === '=')
+ {
+ $this->position++;
+ $this->skip_whitespace();
+ $this->state = 'standalone_value';
+ }
+ else
+ {
+ $this->state = false;
+ }
+ }
+
+ public function standalone_value()
+ {
+ if ($standalone = $this->get_value())
+ {
+ switch ($standalone)
+ {
+ case 'yes':
+ $this->standalone = true;
+ break;
+
+ case 'no':
+ $this->standalone = false;
+ break;
+
+ default:
+ $this->state = false;
+ return;
+ }
+
+ $this->skip_whitespace();
+ if ($this->has_data())
+ {
+ $this->state = false;
+ }
+ else
+ {
+ $this->state = 'emit';
+ }
+ }
+ else
+ {
+ $this->state = false;
+ }
+ }
+}
diff --git a/vendor/full-text-rss/libraries/simplepie/library/SimplePie/gzdecode.php b/vendor/full-text-rss/libraries/simplepie/library/SimplePie/gzdecode.php
new file mode 100644
index 0000000..52e024e
--- /dev/null
+++ b/vendor/full-text-rss/libraries/simplepie/library/SimplePie/gzdecode.php
@@ -0,0 +1,371 @@
+compressed_data = $data;
+ $this->compressed_size = strlen($data);
+ }
+
+ /**
+ * Decode the GZIP stream
+ *
+ * @return bool Successfulness
+ */
+ public function parse()
+ {
+ if ($this->compressed_size >= $this->min_compressed_size)
+ {
+ // Check ID1, ID2, and CM
+ if (substr($this->compressed_data, 0, 3) !== "\x1F\x8B\x08")
+ {
+ return false;
+ }
+
+ // Get the FLG (FLaGs)
+ $this->flags = ord($this->compressed_data[3]);
+
+ // FLG bits above (1 << 4) are reserved
+ if ($this->flags > 0x1F)
+ {
+ return false;
+ }
+
+ // Advance the pointer after the above
+ $this->position += 4;
+
+ // MTIME
+ $mtime = substr($this->compressed_data, $this->position, 4);
+ // Reverse the string if we're on a big-endian arch because l is the only signed long and is machine endianness
+ if (current(unpack('S', "\x00\x01")) === 1)
+ {
+ $mtime = strrev($mtime);
+ }
+ $this->MTIME = current(unpack('l', $mtime));
+ $this->position += 4;
+
+ // Get the XFL (eXtra FLags)
+ $this->XFL = ord($this->compressed_data[$this->position++]);
+
+ // Get the OS (Operating System)
+ $this->OS = ord($this->compressed_data[$this->position++]);
+
+ // Parse the FEXTRA
+ if ($this->flags & 4)
+ {
+ // Read subfield IDs
+ $this->SI1 = $this->compressed_data[$this->position++];
+ $this->SI2 = $this->compressed_data[$this->position++];
+
+ // SI2 set to zero is reserved for future use
+ if ($this->SI2 === "\x00")
+ {
+ return false;
+ }
+
+ // Get the length of the extra field
+ $len = current(unpack('v', substr($this->compressed_data, $this->position, 2)));
+ $this->position += 2;
+
+ // Check the length of the string is still valid
+ $this->min_compressed_size += $len + 4;
+ if ($this->compressed_size >= $this->min_compressed_size)
+ {
+ // Set the extra field to the given data
+ $this->extra_field = substr($this->compressed_data, $this->position, $len);
+ $this->position += $len;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ // Parse the FNAME
+ if ($this->flags & 8)
+ {
+ // Get the length of the filename
+ $len = strcspn($this->compressed_data, "\x00", $this->position);
+
+ // Check the length of the string is still valid
+ $this->min_compressed_size += $len + 1;
+ if ($this->compressed_size >= $this->min_compressed_size)
+ {
+ // Set the original filename to the given string
+ $this->filename = substr($this->compressed_data, $this->position, $len);
+ $this->position += $len + 1;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ // Parse the FCOMMENT
+ if ($this->flags & 16)
+ {
+ // Get the length of the comment
+ $len = strcspn($this->compressed_data, "\x00", $this->position);
+
+ // Check the length of the string is still valid
+ $this->min_compressed_size += $len + 1;
+ if ($this->compressed_size >= $this->min_compressed_size)
+ {
+ // Set the original comment to the given string
+ $this->comment = substr($this->compressed_data, $this->position, $len);
+ $this->position += $len + 1;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ // Parse the FHCRC
+ if ($this->flags & 2)
+ {
+ // Check the length of the string is still valid
+ $this->min_compressed_size += $len + 2;
+ if ($this->compressed_size >= $this->min_compressed_size)
+ {
+ // Read the CRC
+ $crc = current(unpack('v', substr($this->compressed_data, $this->position, 2)));
+
+ // Check the CRC matches
+ if ((crc32(substr($this->compressed_data, 0, $this->position)) & 0xFFFF) === $crc)
+ {
+ $this->position += 2;
+ }
+ else
+ {
+ return false;
+ }
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+ // Decompress the actual data
+ if (($this->data = gzinflate(substr($this->compressed_data, $this->position, -8))) === false)
+ {
+ return false;
+ }
+ else
+ {
+ $this->position = $this->compressed_size - 8;
+ }
+
+ // Check CRC of data
+ $crc = current(unpack('V', substr($this->compressed_data, $this->position, 4)));
+ $this->position += 4;
+ /*if (extension_loaded('hash') && sprintf('%u', current(unpack('V', hash('crc32b', $this->data)))) !== sprintf('%u', $crc))
+ {
+ return false;
+ }*/
+
+ // Check ISIZE of data
+ $isize = current(unpack('V', substr($this->compressed_data, $this->position, 4)));
+ $this->position += 4;
+ if (sprintf('%u', strlen($this->data) & 0xFFFFFFFF) !== sprintf('%u', $isize))
+ {
+ return false;
+ }
+
+ // Wow, against all odds, we've actually got a valid gzip string
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+}
diff --git a/vendor/full-text-rss/license.txt b/vendor/full-text-rss/license.txt
new file mode 100644
index 0000000..dba13ed
--- /dev/null
+++ b/vendor/full-text-rss/license.txt
@@ -0,0 +1,661 @@
+ GNU AFFERO GENERAL PUBLIC LICENSE
+ Version 3, 19 November 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+our General Public Licenses are intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ Developers that use our General Public Licenses protect your rights
+with two steps: (1) assert copyright on the software, and (2) offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.
+
+ A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate. Many developers of free software are heartened and
+encouraged by the resulting cooperation. However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.
+
+ The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community. It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server. Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.
+
+ An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals. This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing under
+this license.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU Affero General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Remote Network Interaction; Use with the GNU General Public License.
+
+ Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your version
+supports such interaction) an opportunity to receive the Corresponding
+Source of your version by providing access to the Corresponding Source
+from a network server at no charge, through some standard or customary
+means of facilitating copying of software. This Corresponding Source
+shall include the Corresponding Source for any work covered by version 3
+of the GNU General Public License that is incorporated pursuant to the
+following paragraph.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU Affero General Public License from time to time. Such new versions
+will be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU Affero General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU Affero General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU Affero General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+
+ Copyright (C)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source. For example, if your program is a web application, its
+interface could display a "Source" link that leads users to an archive
+of the code. There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for the
+specific requirements.
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU AGPL, see
+.
diff --git a/vendor/full-text-rss/makefulltextfeed.php b/vendor/full-text-rss/makefulltextfeed.php
new file mode 100644
index 0000000..5a1b90f
--- /dev/null
+++ b/vendor/full-text-rss/makefulltextfeed.php
@@ -0,0 +1,1195 @@
+.
+*/
+
+// Usage
+// -----
+// Request this file passing it your feed in the querystring: makefulltextfeed.php?url=mysite.org
+// The following options can be passed in the querystring:
+// * URL: url=[feed or website url] (required, should be URL-encoded - in php: urlencode($url))
+// * URL points to HTML (not feed): html=true (optional, by default it's automatically detected)
+// * API key: key=[api key] (optional, refer to config.php)
+// * Max entries to process: max=[max number of items] (optional)
+
+error_reporting(E_ALL ^ E_NOTICE);
+ini_set("display_errors", 1);
+@set_time_limit(120);
+
+// Deal with magic quotes
+if (get_magic_quotes_gpc()) {
+ $process = array(&$_GET, &$_POST, &$_REQUEST);
+ while (list($key, $val) = each($process)) {
+ foreach ($val as $k => $v) {
+ unset($process[$key][$k]);
+ if (is_array($v)) {
+ $process[$key][stripslashes($k)] = $v;
+ $process[] = &$process[$key][stripslashes($k)];
+ } else {
+ $process[$key][stripslashes($k)] = stripslashes($v);
+ }
+ }
+ }
+ unset($process);
+}
+
+// set include path
+set_include_path(realpath(dirname(__FILE__).'/libraries').PATH_SEPARATOR.get_include_path());
+// Autoloading of classes allows us to include files only when they're
+// needed. If we've got a cached copy, for example, only Zend_Cache is loaded.
+function autoload($class_name) {
+ static $dir = null;
+ if ($dir === null) $dir = dirname(__FILE__).'/libraries/';
+ static $mapping = array(
+ // Include FeedCreator for RSS/Atom creation
+ 'FeedWriter' => 'feedwriter/FeedWriter.php',
+ 'FeedItem' => 'feedwriter/FeedItem.php',
+ // Include ContentExtractor and Readability for identifying and extracting content from URLs
+ 'ContentExtractor' => 'content-extractor/ContentExtractor.php',
+ 'SiteConfig' => 'content-extractor/SiteConfig.php',
+ 'Readability' => 'readability/Readability.php',
+ // Include Humble HTTP Agent to allow parallel requests and response caching
+ 'HumbleHttpAgent' => 'humble-http-agent/HumbleHttpAgent.php',
+ 'SimplePie_HumbleHttpAgent' => 'humble-http-agent/SimplePie_HumbleHttpAgent.php',
+ 'CookieJar' => 'humble-http-agent/CookieJar.php',
+ // Include Zend Cache to improve performance (cache results)
+ 'Zend_Cache' => 'Zend/Cache.php',
+ // Language detect
+ 'Text_LanguageDetect' => 'language-detect/LanguageDetect.php',
+ // HTML5 Lib
+ 'HTML5_Parser' => 'html5/Parser.php',
+ // htmLawed - used if XSS filter is enabled (xss_filter)
+ 'htmLawed' => 'htmLawed/htmLawed.php'
+ );
+ if (isset($mapping[$class_name])) {
+ debug("** Loading class $class_name ({$mapping[$class_name]})");
+ require $dir.$mapping[$class_name];
+ return true;
+ } else {
+ return false;
+ }
+}
+spl_autoload_register('autoload');
+require dirname(__FILE__).'/libraries/simplepie/autoloader.php';
+
+////////////////////////////////
+// Load config file
+////////////////////////////////
+require dirname(__FILE__).'/config.php';
+
+////////////////////////////////
+// Prevent indexing/following by search engines because:
+// 1. The content is already public and presumably indexed (why create duplicates?)
+// 2. Not doing so might increase number of requests from search engines, thus increasing server load
+// Note: feed readers and services such as Yahoo Pipes will not be affected by this header.
+// Note: Using Disallow in a robots.txt file will be more effective (search engines will check
+// that before even requesting makefulltextfeed.php).
+////////////////////////////////
+header('X-Robots-Tag: noindex, nofollow');
+
+////////////////////////////////
+// Check if service is enabled
+////////////////////////////////
+if (!$options->enabled) {
+ die('The full-text RSS service is currently disabled');
+}
+
+////////////////////////////////
+// Debug mode?
+// See the config file for debug options.
+////////////////////////////////
+$debug_mode = false;
+if (isset($_GET['debug'])) {
+ if ($options->debug === true || $options->debug == 'user') {
+ $debug_mode = true;
+ } elseif ($options->debug == 'admin') {
+ session_start();
+ $debug_mode = (@$_SESSION['auth'] == 1);
+ }
+ if ($debug_mode) {
+ header('Content-Type: text/plain; charset=utf-8');
+ } else {
+ if ($options->debug == 'admin') {
+ die('You must be logged in to the admin area to see debug output.');
+ } else {
+ die('Debugging is disabled.');
+ }
+ }
+}
+
+////////////////////////////////
+// Check for APC
+////////////////////////////////
+$options->apc = $options->apc && function_exists('apc_add');
+if ($options->apc) {
+ debug('APC is enabled and available on server');
+} else {
+ debug('APC is disabled or not available on server');
+}
+
+////////////////////////////////
+// Check for smart cache
+////////////////////////////////
+$options->smart_cache = $options->smart_cache && function_exists('apc_inc');
+
+////////////////////////////////
+// Check for feed URL
+////////////////////////////////
+if (!isset($_GET['url'])) {
+ die('No URL supplied');
+}
+$url = trim($_GET['url']);
+if (strtolower(substr($url, 0, 7)) == 'feed://') {
+ $url = 'http://'.substr($url, 7);
+}
+if (!preg_match('!^https?://.+!i', $url)) {
+ $url = 'http://'.$url;
+}
+
+$url = filter_var($url, FILTER_SANITIZE_URL);
+$test = filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
+// deal with bug http://bugs.php.net/51192 (present in PHP 5.2.13 and PHP 5.3.2)
+if ($test === false) {
+ $test = filter_var(strtr($url, '-', '_'), FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
+}
+if ($test !== false && $test !== null && preg_match('!^https?://!', $url)) {
+ // all okay
+ unset($test);
+} else {
+ die('Invalid URL supplied');
+}
+debug("Supplied URL: $url");
+
+/////////////////////////////////
+// Redirect to hide API key
+/////////////////////////////////
+if (isset($_GET['key']) && ($key_index = array_search($_GET['key'], $options->api_keys)) !== false) {
+ $host = $_SERVER['HTTP_HOST'];
+ $path = rtrim(dirname($_SERVER['SCRIPT_NAME']), '/\\');
+ $_qs_url = (strtolower(substr($url, 0, 7)) == 'http://') ? substr($url, 7) : $url;
+ $redirect = 'http://'.htmlspecialchars($host.$path).'/makefulltextfeed.php?url='.urlencode($_qs_url);
+ $redirect .= '&key='.$key_index;
+ $redirect .= '&hash='.urlencode(sha1($_GET['key'].$url));
+ if (isset($_GET['html'])) $redirect .= '&html='.urlencode($_GET['html']);
+ if (isset($_GET['max'])) $redirect .= '&max='.(int)$_GET['max'];
+ if (isset($_GET['links'])) $redirect .= '&links='.urlencode($_GET['links']);
+ if (isset($_GET['exc'])) $redirect .= '&exc='.urlencode($_GET['exc']);
+ if (isset($_GET['format'])) $redirect .= '&format='.urlencode($_GET['format']);
+ if (isset($_GET['callback'])) $redirect .= '&callback='.urlencode($_GET['callback']);
+ if (isset($_GET['l'])) $redirect .= '&l='.urlencode($_GET['l']);
+ if (isset($_GET['xss'])) $redirect .= '&xss';
+ if (isset($_GET['use_extracted_title'])) $redirect .= '&use_extracted_title';
+ if (isset($_GET['debug'])) $redirect .= '&debug';
+ if ($debug_mode) {
+ debug('Redirecting to hide access key, follow URL below to continue');
+ debug("Location: $redirect");
+ } else {
+ header("Location: $redirect");
+ }
+ exit;
+}
+
+///////////////////////////////////////////////
+// Set timezone.
+// Prevents warnings, but needs more testing -
+// perhaps if timezone is set in php.ini we
+// don't need to set it at all...
+///////////////////////////////////////////////
+if (!ini_get('date.timezone') || !@date_default_timezone_set(ini_get('date.timezone'))) {
+ date_default_timezone_set('UTC');
+}
+
+///////////////////////////////////////////////
+// Check if the request is explicitly for an HTML page
+///////////////////////////////////////////////
+$html_only = (isset($_GET['html']) && ($_GET['html'] == '1' || $_GET['html'] == 'true'));
+
+///////////////////////////////////////////////
+// Check if valid key supplied
+///////////////////////////////////////////////
+$valid_key = false;
+if (isset($_GET['key']) && isset($_GET['hash']) && isset($options->api_keys[(int)$_GET['key']])) {
+ $valid_key = ($_GET['hash'] == sha1($options->api_keys[(int)$_GET['key']].$url));
+}
+$key_index = ($valid_key) ? (int)$_GET['key'] : 0;
+if (!$valid_key && $options->key_required) {
+ die('A valid key must be supplied');
+}
+if (!$valid_key && isset($_GET['key']) && $_GET['key'] != '') {
+ die('The entered key is invalid');
+}
+
+if (file_exists('custom_init.php')) require 'custom_init.php';
+
+///////////////////////////////////////////////
+// Check URL against list of blacklisted URLs
+///////////////////////////////////////////////
+if (!url_allowed($url)) die('URL blocked');
+
+///////////////////////////////////////////////
+// Max entries
+// see config.php to find these values
+///////////////////////////////////////////////
+if (isset($_GET['max'])) {
+ $max = (int)$_GET['max'];
+ if ($valid_key) {
+ $max = min($max, $options->max_entries_with_key);
+ } else {
+ $max = min($max, $options->max_entries);
+ }
+} else {
+ if ($valid_key) {
+ $max = $options->default_entries_with_key;
+ } else {
+ $max = $options->default_entries;
+ }
+}
+
+///////////////////////////////////////////////
+// Link handling
+///////////////////////////////////////////////
+if (isset($_GET['links']) && in_array($_GET['links'], array('preserve', 'footnotes', 'remove'))) {
+ $links = $_GET['links'];
+} else {
+ $links = 'preserve';
+}
+
+///////////////////////////////////////////////
+// Favour item titles in feed?
+///////////////////////////////////////////////
+$favour_feed_titles = true;
+if ($options->favour_feed_titles == 'user') {
+ $favour_feed_titles = !isset($_GET['use_extracted_title']);
+} else {
+ $favour_feed_titles = $options->favour_feed_titles;
+}
+
+///////////////////////////////////////////////
+// Exclude items if extraction fails
+///////////////////////////////////////////////
+if ($options->exclude_items_on_fail === 'user') {
+ $exclude_on_fail = (isset($_GET['exc']) && ($_GET['exc'] == '1'));
+} else {
+ $exclude_on_fail = $options->exclude_items_on_fail;
+}
+
+///////////////////////////////////////////////
+// Detect language
+///////////////////////////////////////////////
+if ($options->detect_language === 'user') {
+ if (isset($_GET['l'])) {
+ $detect_language = (int)$_GET['l'];
+ } else {
+ $detect_language = 1;
+ }
+} else {
+ $detect_language = $options->detect_language;
+}
+
+if ($detect_language >= 2) {
+ $language_codes = array('albanian' => 'sq','arabic' => 'ar','azeri' => 'az','bengali' => 'bn','bulgarian' => 'bg',
+ 'cebuano' => 'ceb', // ISO 639-2
+ 'croatian' => 'hr','czech' => 'cs','danish' => 'da','dutch' => 'nl','english' => 'en','estonian' => 'et','farsi' => 'fa','finnish' => 'fi','french' => 'fr','german' => 'de','hausa' => 'ha',
+ 'hawaiian' => 'haw', // ISO 639-2
+ 'hindi' => 'hi','hungarian' => 'hu','icelandic' => 'is','indonesian' => 'id','italian' => 'it','kazakh' => 'kk','kyrgyz' => 'ky','latin' => 'la','latvian' => 'lv','lithuanian' => 'lt','macedonian' => 'mk','mongolian' => 'mn','nepali' => 'ne','norwegian' => 'no','pashto' => 'ps',
+ 'pidgin' => 'cpe', // ISO 639-2
+ 'polish' => 'pl','portuguese' => 'pt','romanian' => 'ro','russian' => 'ru','serbian' => 'sr','slovak' => 'sk','slovene' => 'sl','somali' => 'so','spanish' => 'es','swahili' => 'sw','swedish' => 'sv','tagalog' => 'tl','turkish' => 'tr','ukrainian' => 'uk','urdu' => 'ur','uzbek' => 'uz','vietnamese' => 'vi','welsh' => 'cy');
+}
+$use_cld = extension_loaded('cld') && (version_compare(PHP_VERSION, '5.3.0') >= 0);
+
+/////////////////////////////////////
+// Check for valid format
+// (stick to RSS (or RSS as JSON) for the time being)
+/////////////////////////////////////
+if (isset($_GET['format']) && $_GET['format'] == 'json') {
+ $format = 'json';
+} else {
+ $format = 'rss';
+}
+
+/////////////////////////////////////
+// Should we do XSS filtering?
+/////////////////////////////////////
+if ($options->xss_filter === 'user') {
+ $xss_filter = isset($_GET['xss']);
+} else {
+ $xss_filter = $options->xss_filter;
+}
+if (!$xss_filter && isset($_GET['xss'])) {
+ die('XSS filtering is disabled in config');
+}
+
+/////////////////////////////////////
+// Check for JSONP
+// Regex from https://gist.github.com/1217080
+/////////////////////////////////////
+$callback = null;
+if ($format =='json' && isset($_GET['callback'])) {
+ $callback = trim($_GET['callback']);
+ foreach (explode('.', $callback) as $_identifier) {
+ if (!preg_match('/^[a-zA-Z_$][0-9a-zA-Z_$]*(?:\[(?:".+"|\'.+\'|\d+)\])*?$/', $_identifier)) {
+ die('Invalid JSONP callback');
+ }
+ }
+ debug("JSONP callback: $callback");
+}
+
+//////////////////////////////////
+// Enable Cross-Origin Resource Sharing (CORS)
+//////////////////////////////////
+if ($options->cors) header('Access-Control-Allow-Origin: *');
+
+//////////////////////////////////
+// Check for cached copy
+//////////////////////////////////
+if ($options->caching) {
+ debug('Caching is enabled...');
+ $cache_id = md5($max.$url.$valid_key.$links.$favour_feed_titles.$xss_filter.$exclude_on_fail.$format.$detect_language.(int)isset($_GET['pubsub']));
+ $check_cache = true;
+ if ($options->apc && $options->smart_cache) {
+ apc_add("cache.$cache_id", 0, 10*60);
+ $apc_cache_hits = (int)apc_fetch("cache.$cache_id");
+ $check_cache = ($apc_cache_hits >= 2);
+ apc_inc("cache.$cache_id");
+ if ($check_cache) {
+ debug('Cache key found in APC, we\'ll try to load cache file from disk');
+ } else {
+ debug('Cache key not found in APC');
+ }
+ }
+ if ($check_cache) {
+ $cache = get_cache();
+ if ($data = $cache->load($cache_id)) {
+ if ($debug_mode) {
+ debug('Loaded cached copy');
+ exit;
+ }
+ if ($format == 'json') {
+ if ($callback === null) {
+ header('Content-type: application/json; charset=UTF-8');
+ } else {
+ header('Content-type: application/javascript; charset=UTF-8');
+ }
+ } else {
+ header('Content-type: text/xml; charset=UTF-8');
+ header('X-content-type-options: nosniff');
+ }
+ if (headers_sent()) die('Some data has already been output, can\'t send RSS file');
+ if ($callback) {
+ echo "$callback($data);";
+ } else {
+ echo $data;
+ }
+ exit;
+ }
+ }
+}
+
+//////////////////////////////////
+// Set Expires header
+//////////////////////////////////
+if (!$debug_mode) {
+ header('Expires: ' . gmdate('D, d M Y H:i:s', time()+(60*10)) . ' GMT');
+}
+
+//////////////////////////////////
+// Set up HTTP agent
+//////////////////////////////////
+$http = new HumbleHttpAgent();
+$http->debug = $debug_mode;
+$http->userAgentMap = $options->user_agents;
+$http->headerOnlyTypes = array_keys($options->content_type_exc);
+$http->rewriteUrls = $options->rewrite_url;
+
+//////////////////////////////////
+// Set up Content Extractor
+//////////////////////////////////
+$extractor = new ContentExtractor(dirname(__FILE__).'/site_config/custom', dirname(__FILE__).'/site_config/standard');
+$extractor->debug = $debug_mode;
+SiteConfig::$debug = $debug_mode;
+SiteConfig::use_apc($options->apc);
+$extractor->fingerprints = $options->fingerprints;
+$extractor->allowedParsers = $options->allowed_parsers;
+
+////////////////////////////////
+// Get RSS/Atom feed
+////////////////////////////////
+if (!$html_only) {
+ debug('--------');
+ debug("Attempting to process URL as feed");
+ // Send user agent header showing PHP (prevents a HTML response from feedburner)
+ $http->userAgentDefault = HumbleHttpAgent::UA_PHP;
+ // configure SimplePie HTTP extension class to use our HumbleHttpAgent instance
+ SimplePie_HumbleHttpAgent::set_agent($http);
+ $feed = new SimplePie();
+ // some feeds use the text/html content type - force_feed tells SimplePie to process anyway
+ $feed->force_feed(true);
+ $feed->set_file_class('SimplePie_HumbleHttpAgent');
+ //$feed->set_feed_url($url); // colons appearing in the URL's path get encoded
+ $feed->feed_url = $url;
+ $feed->set_autodiscovery_level(SIMPLEPIE_LOCATOR_NONE);
+ $feed->set_timeout(20);
+ $feed->enable_cache(false);
+ $feed->set_stupidly_fast(true);
+ $feed->enable_order_by_date(false); // we don't want to do anything to the feed
+ $feed->set_url_replacements(array());
+ // initialise the feed
+ // the @ suppresses notices which on some servers causes a 500 internal server error
+ $result = @$feed->init();
+ //$feed->handle_content_type();
+ //$feed->get_title();
+ if ($result && (!is_array($feed->data) || count($feed->data) == 0)) {
+ die('Sorry, no feed items found');
+ }
+ // from now on, we'll identify ourselves as a browser
+ $http->userAgentDefault = HumbleHttpAgent::UA_BROWSER;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Our given URL is not a feed, so let's create our own feed with a single item:
+// the given URL. This basically treats all non-feed URLs as if they were
+// single-item feeds.
+////////////////////////////////////////////////////////////////////////////////
+$isDummyFeed = false;
+if ($html_only || !$result) {
+ debug('--------');
+ debug("Constructing a single-item feed from URL");
+ $isDummyFeed = true;
+ unset($feed, $result);
+ // create single item dummy feed object
+ class DummySingleItemFeed {
+ public $item;
+ function __construct($url) { $this->item = new DummySingleItem($url); }
+ public function get_title() { return ''; }
+ public function get_description() { return 'Content extracted from '.$this->item->url; }
+ public function get_link() { return $this->item->url; }
+ public function get_language() { return false; }
+ public function get_image_url() { return false; }
+ public function get_items($start=0, $max=1) { return array(0=>$this->item); }
+ }
+ class DummySingleItem {
+ public $url;
+ function __construct($url) { $this->url = $url; }
+ public function get_permalink() { return $this->url; }
+ public function get_title() { return null; }
+ public function get_date($format='') { return false; }
+ public function get_author($key=0) { return null; }
+ public function get_authors() { return null; }
+ public function get_description() { return ''; }
+ public function get_enclosure($key=0, $prefer=null) { return null; }
+ public function get_enclosures() { return null; }
+ public function get_categories() { return null; }
+ }
+ $feed = new DummySingleItemFeed($url);
+}
+
+////////////////////////////////////////////
+// Create full-text feed
+////////////////////////////////////////////
+$output = new FeedWriter();
+$output->setTitle(strip_tags($feed->get_title()));
+$output->setDescription(strip_tags($feed->get_description()));
+$output->setXsl('css/feed.xsl'); // Chrome uses this, most browsers ignore it
+if ($valid_key && isset($_GET['pubsub'])) { // used only on fivefilters.org at the moment
+ $output->addHub('http://fivefilters.superfeedr.com/');
+ $output->addHub('http://pubsubhubbub.appspot.com/');
+ $output->setSelf('http://'.$_SERVER['HTTP_HOST'].$_SERVER['REQUEST_URI']);
+}
+$output->setLink($feed->get_link()); // Google Reader uses this for pulling in favicons
+if ($img_url = $feed->get_image_url()) {
+ $output->setImage($feed->get_title(), $feed->get_link(), $img_url);
+}
+
+////////////////////////////////////////////
+// Loop through feed items
+////////////////////////////////////////////
+$items = $feed->get_items(0, $max);
+// Request all feed items in parallel (if supported)
+$urls_sanitized = array();
+$urls = array();
+foreach ($items as $key => $item) {
+ $permalink = htmlspecialchars_decode($item->get_permalink());
+ // Colons in URL path segments get encoded by SimplePie, yet some sites expect them unencoded
+ $permalink = str_replace('%3A', ':', $permalink);
+ // validateUrl() strips non-ascii characters
+ // simplepie already sanitizes URLs so let's not do it again here.
+ //$permalink = $http->validateUrl($permalink);
+ if ($permalink) {
+ $urls_sanitized[] = $permalink;
+ }
+ $urls[$key] = $permalink;
+}
+debug('--------');
+debug('Fetching feed items');
+$http->fetchAll($urls_sanitized);
+//$http->cacheAll();
+
+// count number of items added to full feed
+$item_count = 0;
+
+foreach ($items as $key => $item) {
+ debug('--------');
+ debug('Processing feed item '.($item_count+1));
+ $do_content_extraction = true;
+ $extract_result = false;
+ $text_sample = null;
+ $permalink = $urls[$key];
+ debug("Item URL: $permalink");
+ $extracted_title = '';
+ $feed_item_title = $item->get_title();
+ if ($feed_item_title !== null) {
+ $feed_item_title = strip_tags(htmlspecialchars_decode($feed_item_title));
+ }
+ $newitem = $output->createNewItem();
+ $newitem->setTitle($feed_item_title);
+ if ($valid_key && isset($_GET['pubsub'])) { // used only on fivefilters.org at the moment
+ if ($permalink !== false) {
+ $newitem->setLink('http://fivefilters.org/content-only/redirect.php?url='.urlencode($permalink));
+ } else {
+ $newitem->setLink('http://fivefilters.org/content-only/redirect.php?url='.urlencode($item->get_permalink()));
+ }
+ } else {
+ if ($permalink !== false) {
+ $newitem->setLink($permalink);
+ } else {
+ $newitem->setLink($item->get_permalink());
+ }
+ }
+ //if ($permalink && ($response = $http->get($permalink, true)) && $response['status_code'] < 300) {
+ // Allowing error codes - some sites return correct content with error status
+ // e.g. prospectmagazine.co.uk returns 403
+ if ($permalink && ($response = $http->get($permalink, true)) && ($response['status_code'] < 300 || $response['status_code'] > 400)) {
+ $effective_url = $response['effective_url'];
+ if (!url_allowed($effective_url)) continue;
+ // check if action defined for returned Content-Type
+ $mime_info = get_mime_action_info($response['headers']);
+ if (isset($mime_info['action'])) {
+ if ($mime_info['action'] == 'exclude') {
+ continue; // skip this feed item entry
+ } elseif ($mime_info['action'] == 'link') {
+ if ($mime_info['type'] == 'image') {
+ $html = "";
+ } else {
+ $html = "Download {$mime_info['name']}";
+ }
+ $extracted_title = $mime_info['name'];
+ $do_content_extraction = false;
+ }
+ }
+ if ($do_content_extraction) {
+ $html = $response['body'];
+ // remove strange things
+ $html = str_replace('[>', '', $html);
+ $html = convert_to_utf8($html, $response['headers']);
+ // check site config for single page URL - fetch it if found
+ $is_single_page = false;
+ if ($single_page_response = getSinglePage($item, $html, $effective_url)) {
+ $is_single_page = true;
+ $html = $single_page_response['body'];
+ // remove strange things
+ $html = str_replace('[>', '', $html);
+ $html = convert_to_utf8($html, $single_page_response['headers']);
+ $effective_url = $single_page_response['effective_url'];
+ debug("Retrieved single-page view from $effective_url");
+ unset($single_page_response);
+ }
+ debug('--------');
+ debug('Attempting to extract content');
+ $extract_result = $extractor->process($html, $effective_url);
+ $readability = $extractor->readability;
+ $content_block = ($extract_result) ? $extractor->getContent() : null;
+ $extracted_title = ($extract_result) ? $extractor->getTitle() : '';
+ // Deal with multi-page articles
+ //die('Next: '.$extractor->getNextPageUrl());
+ $is_multi_page = (!$is_single_page && $extract_result && $extractor->getNextPageUrl());
+ if ($options->multipage && $is_multi_page) {
+ debug('--------');
+ debug('Attempting to process multi-page article');
+ $multi_page_urls = array();
+ $multi_page_content = array();
+ while ($next_page_url = $extractor->getNextPageUrl()) {
+ debug('--------');
+ debug('Processing next page: '.$next_page_url);
+ // If we've got URL, resolve against $url
+ if ($next_page_url = makeAbsoluteStr($effective_url, $next_page_url)) {
+ // check it's not what we have already!
+ if (!in_array($next_page_url, $multi_page_urls)) {
+ // it's not, so let's attempt to fetch it
+ $multi_page_urls[] = $next_page_url;
+ $_prev_ref = $http->referer;
+ if (($response = $http->get($next_page_url, true)) && $response['status_code'] < 300) {
+ // make sure mime type is not something with a different action associated
+ $page_mime_info = get_mime_action_info($response['headers']);
+ if (!isset($page_mime_info['action'])) {
+ $html = $response['body'];
+ // remove strange things
+ $html = str_replace('[>', '', $html);
+ $html = convert_to_utf8($html, $response['headers']);
+ if ($extractor->process($html, $next_page_url)) {
+ $multi_page_content[] = $extractor->getContent();
+ continue;
+ } else { debug('Failed to extract content'); }
+ } else { debug('MIME type requires different action'); }
+ } else { debug('Failed to fetch URL'); }
+ } else { debug('URL already processed'); }
+ } else { debug('Failed to resolve against '.$effective_url); }
+ // failed to process next_page_url, so cancel further requests
+ $multi_page_content = array();
+ break;
+ }
+ // did we successfully deal with this multi-page article?
+ if (empty($multi_page_content)) {
+ debug('Failed to extract all parts of multi-page article, so not going to include them');
+ $multi_page_content[] = $readability->dom->createElement('p')->innerHTML = 'This article appears to continue on subsequent pages which we could not extract';
+ }
+ foreach ($multi_page_content as $_page) {
+ $_page = $content_block->ownerDocument->importNode($_page, true);
+ $content_block->appendChild($_page);
+ }
+ unset($multi_page_urls, $multi_page_content, $page_mime_info, $next_page_url);
+ }
+ }
+ // use extracted title for both feed and item title if we're using single-item dummy feed
+ if ($isDummyFeed) {
+ $output->setTitle($extracted_title);
+ $newitem->setTitle($extracted_title);
+ } else {
+ // use extracted title instead of feed item title?
+ if (!$favour_feed_titles && $extracted_title != '') {
+ debug('Using extracted title in generated feed');
+ $newitem->setTitle($extracted_title);
+ }
+ }
+ }
+ if ($do_content_extraction) {
+ // if we failed to extract content...
+ if (!$extract_result) {
+ if ($exclude_on_fail) {
+ debug('Failed to extract, so skipping (due to exclude on fail parameter)');
+ continue; // skip this and move to next item
+ }
+ //TODO: get text sample for language detection
+ $html = $options->error_message;
+ // keep the original item description
+ $html .= $item->get_description();
+ } else {
+ $readability->clean($content_block, 'select');
+ if ($options->rewrite_relative_urls) makeAbsolute($effective_url, $content_block);
+ // footnotes
+ if (($links == 'footnotes') && (strpos($effective_url, 'wikipedia.org') === false)) {
+ $readability->addFootnotes($content_block);
+ }
+ // remove nesting: = test
+ while ($content_block->childNodes->length == 1 && $content_block->firstChild->nodeType === XML_ELEMENT_NODE) {
+ // only follow these tag names
+ if (!in_array(strtolower($content_block->tagName), array('div', 'article', 'section', 'header', 'footer'))) break;
+ //$html = $content_block->firstChild->innerHTML; // FTR 2.9.5
+ $content_block = $content_block->firstChild;
+ }
+ // convert content block to HTML string
+ // Need to preserve things like body: //img[@id='feature']
+ if (in_array(strtolower($content_block->tagName), array('div', 'article', 'section', 'header', 'footer'))) {
+ $html = $content_block->innerHTML;
+ } else {
+ $html = $content_block->ownerDocument->saveXML($content_block); // essentially outerHTML
+ }
+ unset($content_block);
+ // post-processing cleanup
+ $html = preg_replace('![\s\h\v]*
!u', '', $html);
+ if ($links == 'remove') {
+ $html = preg_replace('!?a[^>]*>!', '', $html);
+ }
+ // get text sample for language detection
+ $text_sample = strip_tags(substr($html, 0, 500));
+ $html = make_substitutions($options->message_to_prepend).$html;
+ $html .= make_substitutions($options->message_to_append);
+ }
+ }
+
+ if ($valid_key && isset($_GET['pubsub'])) { // used only on fivefilters.org at the moment
+ $newitem->addElement('guid', 'http://fivefilters.org/content-only/redirect.php?url='.urlencode($item->get_permalink()), array('isPermaLink'=>'false'));
+ } else {
+ $newitem->addElement('guid', $item->get_permalink(), array('isPermaLink'=>'true'));
+ }
+ // filter xss?
+ if ($xss_filter) {
+ debug('Filtering HTML to remove XSS');
+ $html = htmLawed::hl($html, array('safe'=>1, 'deny_attribute'=>'style', 'comment'=>1, 'cdata'=>1));
+ }
+ $newitem->setDescription($html);
+
+ // set date
+ if ((int)$item->get_date('U') > 0) {
+ $newitem->setDate((int)$item->get_date('U'));
+ } elseif ($extractor->getDate()) {
+ $newitem->setDate($extractor->getDate());
+ }
+
+ // add authors
+ if ($authors = $item->get_authors()) {
+ foreach ($authors as $author) {
+ // for some feeds, SimplePie stores author's name as email, e.g. http://feeds.feedburner.com/nymag/intel
+ if ($author->get_name() !== null) {
+ $newitem->addElement('dc:creator', $author->get_name());
+ } elseif ($author->get_email() !== null) {
+ $newitem->addElement('dc:creator', $author->get_email());
+ }
+ }
+ } elseif ($authors = $extractor->getAuthors()) {
+ //TODO: make sure the list size is reasonable
+ foreach ($authors as $author) {
+ // TODO: xpath often selects authors from other articles linked from the page.
+ // for now choose first item
+ $newitem->addElement('dc:creator', $author);
+ break;
+ }
+ }
+
+ // add language
+ if ($detect_language) {
+ $language = $extractor->getLanguage();
+ if (!$language) $language = $feed->get_language();
+ if (($detect_language == 3 || (!$language && $detect_language == 2)) && $text_sample) {
+ try {
+ if ($use_cld) {
+ // Use PHP-CLD extension
+ $php_cld = 'CLD\detect'; // in quotes to prevent PHP 5.2 parse error
+ $res = $php_cld($text_sample);
+ if (is_array($res) && count($res) > 0) {
+ $language = $res[0]['code'];
+ }
+ } else {
+ //die('what');
+ // Use PEAR's Text_LanguageDetect
+ if (!isset($l)) {
+ $l = new Text_LanguageDetect('libraries/language-detect/lang.dat', 'libraries/language-detect/unicode_blocks.dat');
+ }
+ $l_result = $l->detect($text_sample, 1);
+ if (count($l_result) > 0) {
+ $language = $language_codes[key($l_result)];
+ }
+ }
+ } catch (Exception $e) {
+ //die('error: '.$e);
+ // do nothing
+ }
+ }
+ if ($language && (strlen($language) < 7)) {
+ $newitem->addElement('dc:language', $language);
+ }
+ }
+
+ // add MIME type (if it appeared in our exclusions lists)
+ if (isset($mime_info['mime'])) $newitem->addElement('dc:format', $mime_info['mime']);
+ // add effective URL (URL after redirects)
+ if (isset($effective_url)) {
+ //TODO: ensure $effective_url is valid witout - sometimes it causes problems, e.g.
+ //http://www.siasat.pk/forum/showthread.php?108883-Pakistan-Chowk-by-Rana-Mubashir--25th-March-2012-Special-Program-from-Liari-(Karachi)
+ //temporary measure: use utf8_encode()
+ $newitem->addElement('dc:identifier', remove_url_cruft(utf8_encode($effective_url)));
+ } else {
+ $newitem->addElement('dc:identifier', remove_url_cruft($item->get_permalink()));
+ }
+
+ // add categories
+ if ($categories = $item->get_categories()) {
+ foreach ($categories as $category) {
+ if ($category->get_label() !== null) {
+ $newitem->addElement('category', $category->get_label());
+ }
+ }
+ }
+
+ // check for enclosures
+ if ($options->keep_enclosures) {
+ if ($enclosures = $item->get_enclosures()) {
+ foreach ($enclosures as $enclosure) {
+ // thumbnails
+ foreach ((array)$enclosure->get_thumbnails() as $thumbnail) {
+ $newitem->addElement('media:thumbnail', '', array('url'=>$thumbnail));
+ }
+ if (!$enclosure->get_link()) continue;
+ $enc = array();
+ // Media RSS spec ($enc): http://search.yahoo.com/mrss
+ // SimplePie methods ($enclosure): http://simplepie.org/wiki/reference/start#methods4
+ $enc['url'] = $enclosure->get_link();
+ if ($enclosure->get_length()) $enc['fileSize'] = $enclosure->get_length();
+ if ($enclosure->get_type()) $enc['type'] = $enclosure->get_type();
+ if ($enclosure->get_medium()) $enc['medium'] = $enclosure->get_medium();
+ if ($enclosure->get_expression()) $enc['expression'] = $enclosure->get_expression();
+ if ($enclosure->get_bitrate()) $enc['bitrate'] = $enclosure->get_bitrate();
+ if ($enclosure->get_framerate()) $enc['framerate'] = $enclosure->get_framerate();
+ if ($enclosure->get_sampling_rate()) $enc['samplingrate'] = $enclosure->get_sampling_rate();
+ if ($enclosure->get_channels()) $enc['channels'] = $enclosure->get_channels();
+ if ($enclosure->get_duration()) $enc['duration'] = $enclosure->get_duration();
+ if ($enclosure->get_height()) $enc['height'] = $enclosure->get_height();
+ if ($enclosure->get_width()) $enc['width'] = $enclosure->get_width();
+ if ($enclosure->get_language()) $enc['lang'] = $enclosure->get_language();
+ $newitem->addElement('media:content', '', $enc);
+ }
+ }
+ }
+ /* } */
+ $output->addItem($newitem);
+ unset($html);
+ $item_count++;
+}
+
+// output feed
+debug('Done!');
+/*
+if ($debug_mode) {
+ $_apc_data = apc_cache_info('user');
+ var_dump($_apc_data); exit;
+}
+*/
+if (!$debug_mode) {
+ if ($callback) echo "$callback("; // if $callback is set, $format also == 'json'
+ if ($format == 'json') $output->setFormat(($callback === null) ? JSON : JSONP);
+ $add_to_cache = $options->caching;
+ // is smart cache mode enabled?
+ if ($add_to_cache && $options->apc && $options->smart_cache) {
+ // yes, so only cache if this is the second request for this URL
+ $add_to_cache = ($apc_cache_hits >= 2);
+ // purge cache
+ if ($options->cache_cleanup > 0) {
+ if (rand(1, $options->cache_cleanup) == 1) {
+ // apc purge code adapted from from http://www.thimbleopensource.com/tutorials-snippets/php-apc-expunge-script
+ $_apc_data = apc_cache_info('user');
+ foreach ($_apc_data['cache_list'] as $_apc_item) {
+ if ($_apc_item['ttl'] > 0 && ($_apc_item['ttl'] + $_apc_item['creation_time'] < time())) {
+ apc_delete($_apc_item['info']);
+ }
+ }
+ }
+ }
+ }
+ if ($add_to_cache) {
+ ob_start();
+ $output->genarateFeed();
+ $output = ob_get_contents();
+ ob_end_clean();
+ if ($html_only && $item_count == 0) {
+ // do not cache - in case of temporary server glitch at source URL
+ } else {
+ $cache = get_cache();
+ if ($add_to_cache) $cache->save($output, $cache_id);
+ }
+ echo $output;
+ } else {
+ $output->genarateFeed();
+ }
+ if ($callback) echo ');';
+}
+
+///////////////////////////////
+// HELPER FUNCTIONS
+///////////////////////////////
+
+function url_allowed($url) {
+ global $options;
+ if (!empty($options->allowed_urls)) {
+ $allowed = false;
+ foreach ($options->allowed_urls as $allowurl) {
+ if (stristr($url, $allowurl) !== false) {
+ $allowed = true;
+ break;
+ }
+ }
+ if (!$allowed) return false;
+ } else {
+ foreach ($options->blocked_urls as $blockurl) {
+ if (stristr($url, $blockurl) !== false) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+//////////////////////////////////////////////
+// Convert $html to UTF8
+// (uses HTTP headers and HTML to find encoding)
+// adapted from http://stackoverflow.com/questions/910793/php-detect-encoding-and-make-everything-utf-8
+//////////////////////////////////////////////
+function convert_to_utf8($html, $header=null)
+{
+ $encoding = null;
+ if ($html || $header) {
+ if (is_array($header)) $header = implode("\n", $header);
+ if (!$header || !preg_match_all('/^Content-Type:\s+([^;]+)(?:;\s*charset=["\']?([^;"\'\n]*))?/im', $header, $match, PREG_SET_ORDER)) {
+ // error parsing the response
+ debug('Could not find Content-Type header in HTTP response');
+ } else {
+ $match = end($match); // get last matched element (in case of redirects)
+ if (isset($match[2])) $encoding = trim($match[2], "\"' \r\n\0\x0B\t");
+ }
+ // TODO: check to see if encoding is supported (can we convert it?)
+ // If it's not, result will be empty string.
+ // For now we'll check for invalid encoding types returned by some sites, e.g. 'none'
+ // Problem URL: http://facta.co.jp/blog/archives/20111026001026.html
+ if (!$encoding || $encoding == 'none') {
+ // search for encoding in HTML - only look at the first 50000 characters
+ // Why 50000? See, for example, http://www.lemonde.fr/festival-de-cannes/article/2012/05/23/deux-cretes-en-goguette-sur-la-croisette_1705732_766360.html
+ // TODO: improve this so it looks at smaller chunks first
+ $html_head = substr($html, 0, 50000);
+ if (preg_match('/^<\?xml\s+version=(?:"[^"]*"|\'[^\']*\')\s+encoding=("[^"]*"|\'[^\']*\')/s', $html_head, $match)) {
+ $encoding = trim($match[1], '"\'');
+ } elseif (preg_match('/]+)/i', $html_head, $match)) {
+ $encoding = trim($match[1]);
+ } elseif (preg_match_all('/]+)>/i', $html_head, $match)) {
+ foreach ($match[1] as $_test) {
+ if (preg_match('/charset=["\']?([^"\']+)/i', $_test, $_m)) {
+ $encoding = trim($_m[1]);
+ break;
+ }
+ }
+ }
+ }
+ if (isset($encoding)) $encoding = trim($encoding);
+ // trim is important here!
+ if (!$encoding || (strtolower($encoding) == 'iso-8859-1')) {
+ // replace MS Word smart qutoes
+ $trans = array();
+ $trans[chr(130)] = '‚'; // Single Low-9 Quotation Mark
+ $trans[chr(131)] = 'ƒ'; // Latin Small Letter F With Hook
+ $trans[chr(132)] = '„'; // Double Low-9 Quotation Mark
+ $trans[chr(133)] = '…'; // Horizontal Ellipsis
+ $trans[chr(134)] = '†'; // Dagger
+ $trans[chr(135)] = '‡'; // Double Dagger
+ $trans[chr(136)] = 'ˆ'; // Modifier Letter Circumflex Accent
+ $trans[chr(137)] = '‰'; // Per Mille Sign
+ $trans[chr(138)] = 'Š'; // Latin Capital Letter S With Caron
+ $trans[chr(139)] = '‹'; // Single Left-Pointing Angle Quotation Mark
+ $trans[chr(140)] = 'Œ'; // Latin Capital Ligature OE
+ $trans[chr(145)] = '‘'; // Left Single Quotation Mark
+ $trans[chr(146)] = '’'; // Right Single Quotation Mark
+ $trans[chr(147)] = '“'; // Left Double Quotation Mark
+ $trans[chr(148)] = '”'; // Right Double Quotation Mark
+ $trans[chr(149)] = '•'; // Bullet
+ $trans[chr(150)] = '–'; // En Dash
+ $trans[chr(151)] = '—'; // Em Dash
+ $trans[chr(152)] = '˜'; // Small Tilde
+ $trans[chr(153)] = '™'; // Trade Mark Sign
+ $trans[chr(154)] = 'š'; // Latin Small Letter S With Caron
+ $trans[chr(155)] = '›'; // Single Right-Pointing Angle Quotation Mark
+ $trans[chr(156)] = 'œ'; // Latin Small Ligature OE
+ $trans[chr(159)] = 'Ÿ'; // Latin Capital Letter Y With Diaeresis
+ $html = strtr($html, $trans);
+ }
+ if (!$encoding) {
+ debug('No character encoding found, so treating as UTF-8');
+ $encoding = 'utf-8';
+ } else {
+ debug('Character encoding: '.$encoding);
+ if (strtolower($encoding) != 'utf-8') {
+ debug('Converting to UTF-8');
+ $html = SimplePie_Misc::change_encoding($html, $encoding, 'utf-8');
+ /*
+ if (function_exists('iconv')) {
+ // iconv appears to handle certain character encodings better than mb_convert_encoding
+ $html = iconv($encoding, 'utf-8', $html);
+ } else {
+ $html = mb_convert_encoding($html, 'utf-8', $encoding);
+ }
+ */
+ }
+ }
+ }
+ return $html;
+}
+
+function makeAbsolute($base, $elem) {
+ $base = new SimplePie_IRI($base);
+ // remove '//' in URL path (used to prevent URLs from resolving properly)
+ // TODO: check if this is still the case
+ if (isset($base->path)) $base->path = preg_replace('!//+!', '/', $base->path);
+ foreach(array('a'=>'href', 'img'=>'src') as $tag => $attr) {
+ $elems = $elem->getElementsByTagName($tag);
+ for ($i = $elems->length-1; $i >= 0; $i--) {
+ $e = $elems->item($i);
+ //$e->parentNode->replaceChild($articleContent->ownerDocument->createTextNode($e->textContent), $e);
+ makeAbsoluteAttr($base, $e, $attr);
+ }
+ if (strtolower($elem->tagName) == $tag) makeAbsoluteAttr($base, $elem, $attr);
+ }
+}
+function makeAbsoluteAttr($base, $e, $attr) {
+ if ($e->hasAttribute($attr)) {
+ // Trim leading and trailing white space. I don't really like this but
+ // unfortunately it does appear on some sites. e.g.
+ $url = trim(str_replace('%20', ' ', $e->getAttribute($attr)));
+ $url = str_replace(' ', '%20', $url);
+ if (!preg_match('!https?://!i', $url)) {
+ if ($absolute = SimplePie_IRI::absolutize($base, $url)) {
+ $e->setAttribute($attr, $absolute);
+ }
+ }
+ }
+}
+function makeAbsoluteStr($base, $url) {
+ $base = new SimplePie_IRI($base);
+ // remove '//' in URL path (causes URLs not to resolve properly)
+ if (isset($base->path)) $base->path = preg_replace('!//+!', '/', $base->path);
+ if (preg_match('!^https?://!i', $url)) {
+ // already absolute
+ return $url;
+ } else {
+ if ($absolute = SimplePie_IRI::absolutize($base, $url)) {
+ return $absolute;
+ }
+ return false;
+ }
+}
+// returns single page response, or false if not found
+function getSinglePage($item, $html, $url) {
+ global $http, $extractor;
+ debug('Looking for site config files to see if single page link exists');
+ $site_config = $extractor->buildSiteConfig($url, $html);
+ $splink = null;
+ if (!empty($site_config->single_page_link)) {
+ $splink = $site_config->single_page_link;
+ } elseif (!empty($site_config->single_page_link_in_feed)) {
+ // single page link xpath is targeted at feed
+ $splink = $site_config->single_page_link_in_feed;
+ // so let's replace HTML with feed item description
+ $html = $item->get_description();
+ }
+ if (isset($splink)) {
+ // Build DOM tree from HTML
+ $readability = new Readability($html, $url);
+ $xpath = new DOMXPath($readability->dom);
+ // Loop through single_page_link xpath expressions
+ $single_page_url = null;
+ foreach ($splink as $pattern) {
+ $elems = @$xpath->evaluate($pattern, $readability->dom);
+ if (is_string($elems)) {
+ $single_page_url = trim($elems);
+ break;
+ } elseif ($elems instanceof DOMNodeList && $elems->length > 0) {
+ foreach ($elems as $item) {
+ if ($item instanceof DOMElement && $item->hasAttribute('href')) {
+ $single_page_url = $item->getAttribute('href');
+ break 2;
+ } elseif ($item instanceof DOMAttr && $item->value) {
+ $single_page_url = $item->value;
+ break 2;
+ }
+ }
+ }
+ }
+ // If we've got URL, resolve against $url
+ if (isset($single_page_url) && ($single_page_url = makeAbsoluteStr($url, $single_page_url))) {
+ // check it's not what we have already!
+ if ($single_page_url != $url) {
+ // it's not, so let's try to fetch it...
+ $_prev_ref = $http->referer;
+ $http->referer = $single_page_url;
+ if (($response = $http->get($single_page_url, true)) && $response['status_code'] < 300) {
+ $http->referer = $_prev_ref;
+ return $response;
+ }
+ $http->referer = $_prev_ref;
+ }
+ }
+ }
+ return false;
+}
+
+// based on content-type http header, decide what to do
+// param: HTTP headers string
+// return: array with keys: 'mime', 'type', 'subtype', 'action', 'name'
+// e.g. array('mime'=>'image/jpeg', 'type'=>'image', 'subtype'=>'jpeg', 'action'=>'link', 'name'=>'Image')
+function get_mime_action_info($headers) {
+ global $options;
+ // check if action defined for returned Content-Type
+ $info = array();
+ if (preg_match('!^Content-Type:\s*(([-\w]+)/([-\w\+]+))!im', $headers, $match)) {
+ // look for full mime type (e.g. image/jpeg) or just type (e.g. image)
+ // match[1] = full mime type, e.g. image/jpeg
+ // match[2] = first part, e.g. image
+ // match[3] = last part, e.g. jpeg
+ $info['mime'] = strtolower(trim($match[1]));
+ $info['type'] = strtolower(trim($match[2]));
+ $info['subtype'] = strtolower(trim($match[3]));
+ foreach (array($info['mime'], $info['type']) as $_mime) {
+ if (isset($options->content_type_exc[$_mime])) {
+ $info['action'] = $options->content_type_exc[$_mime]['action'];
+ $info['name'] = $options->content_type_exc[$_mime]['name'];
+ break;
+ }
+ }
+ }
+ return $info;
+}
+
+function remove_url_cruft($url) {
+ // remove google analytics for the time being
+ // regex adapted from http://navitronic.co.uk/2010/12/removing-google-analytics-cruft-from-urls/
+ // https://gist.github.com/758177
+ return preg_replace('/(\?|\&)utm_[a-z]+=[^\&]+/', '', $url);
+}
+
+function make_substitutions($string) {
+ if ($string == '') return $string;
+ global $item, $effective_url;
+ $string = str_replace('{url}', htmlspecialchars($item->get_permalink()), $string);
+ $string = str_replace('{effective-url}', htmlspecialchars($effective_url), $string);
+ return $string;
+}
+
+function get_cache() {
+ global $options, $valid_key;
+ static $cache = null;
+ if ($cache === null) {
+ $frontendOptions = array(
+ 'lifetime' => 10*60, // cache lifetime of 10 minutes
+ 'automatic_serialization' => false,
+ 'write_control' => false,
+ 'automatic_cleaning_factor' => $options->cache_cleanup,
+ 'ignore_user_abort' => false
+ );
+ $backendOptions = array(
+ 'cache_dir' => ($valid_key) ? $options->cache_dir.'/rss-with-key/' : $options->cache_dir.'/rss/', // directory where to put the cache files
+ 'file_locking' => false,
+ 'read_control' => true,
+ 'read_control_type' => 'strlen',
+ 'hashed_directory_level' => $options->cache_directory_level,
+ 'hashed_directory_perm' => 0777,
+ 'cache_file_perm' => 0664,
+ 'file_name_prefix' => 'ff'
+ );
+ // getting a Zend_Cache_Core object
+ $cache = Zend_Cache::factory('Core', 'File', $frontendOptions, $backendOptions);
+ }
+ return $cache;
+}
+
+function debug($msg) {
+ global $debug_mode;
+ if ($debug_mode) {
+ echo '* ',$msg,"\n";
+ ob_flush();
+ flush();
+ }
+}
diff --git a/vendor/full-text-rss/manifest.yml b/vendor/full-text-rss/manifest.yml
new file mode 100644
index 0000000..746d538
--- /dev/null
+++ b/vendor/full-text-rss/manifest.yml
@@ -0,0 +1,14 @@
+---
+applications:
+ .:
+# name: full-text-rss
+ framework:
+ name: php
+ info:
+ mem: 512M
+ description: PHP Application
+ exec:
+ infra: aws
+# url: ${name}.${target-base}
+ mem: 512M
+ instances: 1
diff --git a/vendor/full-text-rss/site_config/README.txt b/vendor/full-text-rss/site_config/README.txt
new file mode 100644
index 0000000..e966ee7
--- /dev/null
+++ b/vendor/full-text-rss/site_config/README.txt
@@ -0,0 +1,6 @@
+Full-Text RSS Site Patterns
+---------------------------
+
+Site patterns allow you to specify what should be extracted from specific sites.
+
+Please see http://help.fivefilters.org/customer/portal/articles/223153-site-patterns for more information.
\ No newline at end of file
diff --git a/vendor/full-text-rss/site_config/custom/index.php b/vendor/full-text-rss/site_config/custom/index.php
new file mode 100644
index 0000000..a1b767f
--- /dev/null
+++ b/vendor/full-text-rss/site_config/custom/index.php
@@ -0,0 +1,3 @@
+
\ No newline at end of file
diff --git a/vendor/full-text-rss/site_config/index.php b/vendor/full-text-rss/site_config/index.php
new file mode 100644
index 0000000..a1b767f
--- /dev/null
+++ b/vendor/full-text-rss/site_config/index.php
@@ -0,0 +1,3 @@
+
\ No newline at end of file
diff --git a/vendor/full-text-rss/site_config/standard/.about.com.txt b/vendor/full-text-rss/site_config/standard/.about.com.txt
new file mode 100644
index 0000000..4a01e93
--- /dev/null
+++ b/vendor/full-text-rss/site_config/standard/.about.com.txt
@@ -0,0 +1,4 @@
+title: //*[@id='title']//h1
+body: //*[(@id = "articlebody")]
+date: //*[(@id = "date")]
+test_url: http://nutrition.about.com/od/changeyourdiet/qt/healthysnacks.htm
\ No newline at end of file
diff --git a/vendor/full-text-rss/site_config/standard/.allthingsd.com.txt b/vendor/full-text-rss/site_config/standard/.allthingsd.com.txt
new file mode 100644
index 0000000..bbb0f7b
--- /dev/null
+++ b/vendor/full-text-rss/site_config/standard/.allthingsd.com.txt
@@ -0,0 +1,6 @@
+body: //div[@id='content-left']/div[@class='post']
+strip_id_or_class: social
+strip_id_or_class: atd-disqus-disclaimer
+tidy: no
+
+test_url: http://mediamemo.allthingsd.com/20110516/bit-ly-gets-a-new-boss/
\ No newline at end of file
diff --git a/vendor/full-text-rss/site_config/standard/.blog.163.com.txt b/vendor/full-text-rss/site_config/standard/.blog.163.com.txt
new file mode 100644
index 0000000..f5b0142
--- /dev/null
+++ b/vendor/full-text-rss/site_config/standard/.blog.163.com.txt
@@ -0,0 +1,26 @@
+# To administrator:
+# Please replace the hostname with "*.blog.163.com"
+
+# This filter is tested on:
+# http://wangzeke.blog.163.com/blog/static/933015402012410105922228/
+# http://wavow.blog.163.com/blog/static/532284320124117211245/
+# http://elainejeff.blog.163.com/blog/static/1671902912012498727253/
+
+
+strip://*[contains(@class, 'mcnt ztag')]//span[@style = 'display:none;']
+strip://*[contains(@id, 'divTopLink')]
+strip://*[contains(@class, 'phide')]
+strip://*[contains(@class, 'thide')]
+strip://*[contains(@id, 'topbar')]
+strip://*[contains(@class, 'tbar')]
+strip://*[contains(@class, 'snl')]
+strip://*[contains(@id, 'banner')]
+
+
+title://h3
+author://span[contains(@class, 'ztag pre')]
+date://span[contains(@class, 'blogsep')]
+body://div[contains(@class, 'mcnt ztag')]
+
+convert_double_br_tags: yes
+test_url: http://lvqiuluwei1510.blog.163.com/blog/static/9028525120124146100841/
\ No newline at end of file
diff --git a/vendor/full-text-rss/site_config/standard/.blogs.nytimes.com.txt b/vendor/full-text-rss/site_config/standard/.blogs.nytimes.com.txt
new file mode 100644
index 0000000..564a35f
--- /dev/null
+++ b/vendor/full-text-rss/site_config/standard/.blogs.nytimes.com.txt
@@ -0,0 +1,17 @@
+body: //div[@class='entry-content']
+title: //h1[@class='entry-title']
+# Two author lines because krugman.blogs.nytimes.com is a special case
+author: substring-after(//div[@class="box module nocontent"]/h4, "About ")
+author: //address/a
+date: //meta[@name="PUD"]/@content
+date: //*[@class='date']
+
+#Removes related content but cleans up article text
+strip: //ul[@class='toolsList wrap']
+strip_id_or_class:inlineModule
+strip_id_or_class:module
+strip_id_or_class:toolsListContainer
+prune: no
+test_url: http://opinionator.blogs.nytimes.com/2011/02/03/lost-and-gone-forever/
+test_url: http://krugman.blogs.nytimes.com/2012/09/12/a-vote-of-confidence/
+test_url: http://bits.blogs.nytimes.com/2012/01/16/wikipedia-plans-to-go-dark-on-wednesday-to-protest-sopa/
\ No newline at end of file
diff --git a/vendor/full-text-rss/site_config/standard/.blogspot.com.txt b/vendor/full-text-rss/site_config/standard/.blogspot.com.txt
new file mode 100644
index 0000000..570262e
--- /dev/null
+++ b/vendor/full-text-rss/site_config/standard/.blogspot.com.txt
@@ -0,0 +1,11 @@
+date: //*[contains(@class, 'date-header')]
+title://*[contains(@class,'post-title')]
+body://div[contains(@class,'post-body')]
+body://div[contains(@class,'entry-content')]
+strip_comments:no
+prune:no
+
+tidy:yes
+
+test_url: http://themerryone.blogspot.com/2010/08/new-move-new-blog.html
+test_url: http://strobist.blogspot.com/2012/01/qa-down-phase-one-rabbit-hole.html
\ No newline at end of file
diff --git a/vendor/full-text-rss/site_config/standard/.businessinsider.com.txt b/vendor/full-text-rss/site_config/standard/.businessinsider.com.txt
new file mode 100644
index 0000000..d8a8bba
--- /dev/null
+++ b/vendor/full-text-rss/site_config/standard/.businessinsider.com.txt
@@ -0,0 +1,9 @@
+title://div[@class="sl-layout-post"]/h1
+body: //div[contains(@class, 'post-content') or contains(@class, 'KonaBody')]
+strip: //div[contains(@class, "post-sidebar")]
+strip: //div[@id='related-links']
+author://div[@class="byline"]/a
+date://div[@class="byline"]/span[@class="date"]
+prune: no
+
+test_url: http://www.businessinsider.com/as-europe-booms-on-bailout-deal-john-boehner-just-confirmed-that-the-us-is-nowhere-2011-7
\ No newline at end of file
diff --git a/vendor/full-text-rss/site_config/standard/.calepin.co.txt b/vendor/full-text-rss/site_config/standard/.calepin.co.txt
new file mode 100644
index 0000000..5672ebb
--- /dev/null
+++ b/vendor/full-text-rss/site_config/standard/.calepin.co.txt
@@ -0,0 +1,5 @@
+author: //address/a
+date: //article/abbr/@title
+
+# Please consider *.calepin.co
+test_url: http://jokull.calepin.co/nutrition-pt-1.html
\ No newline at end of file
diff --git a/vendor/full-text-rss/site_config/standard/.cnet.com.txt b/vendor/full-text-rss/site_config/standard/.cnet.com.txt
new file mode 100644
index 0000000..74f46ba
--- /dev/null
+++ b/vendor/full-text-rss/site_config/standard/.cnet.com.txt
@@ -0,0 +1,16 @@
+title: //meta[@property="og:title"]/@content
+body: //div[contains(@class, 'postBody')]
+date: //div[@id='nameAndTime']/time
+author: //div[@id='nameAndTime']/span[@class='author']
+
+strip_id_or_class: image-credit
+strip_id_or_class: noAutolink
+strip_id_or_class: related
+
+prune: no
+tidy: no
+
+# early end
+replace_string(Download today's podcast): Download today's podcast
+
+test_url: http://www.cnet.com/8301-13952_1-57367607-81/the-404-981-where-the-world-is-a-vampire-podcast/
\ No newline at end of file
diff --git a/vendor/full-text-rss/site_config/standard/.ctv.ca.txt b/vendor/full-text-rss/site_config/standard/.ctv.ca.txt
new file mode 100644
index 0000000..01327e0
--- /dev/null
+++ b/vendor/full-text-rss/site_config/standard/.ctv.ca.txt
@@ -0,0 +1,8 @@
+title: //h3[@class='jhl']
+body: //div[@class='storyBody']
+strip: //p[contains(., 'Please Add Comments')]//following-sibling::*
+strip: //p[contains(., 'Please Add Comments')]
+strip: //p[em[contains(., 'This story has been updated from its original version')]]
+strip: //hr
+
+test_url: http://montreal.ctv.ca/servlet/an/local/CTVNews/20110914/mtl_construction_110914/20110915?hub=MontrealHome
\ No newline at end of file
diff --git a/vendor/full-text-rss/site_config/standard/.dreamwidth.org.txt b/vendor/full-text-rss/site_config/standard/.dreamwidth.org.txt
new file mode 100644
index 0000000..1392136
--- /dev/null
+++ b/vendor/full-text-rss/site_config/standard/.dreamwidth.org.txt
@@ -0,0 +1,7 @@
+# Please convert this to *.dreamwidth.org, as users receive individual subdomains.
+convert_double_br_tags: yes
+strip_image_src: 'dreamwidth.org'
+strip_id_or_class: 'currents'
+title: //div[contains(@id, 'entrysubj')]
+body: //div[contains(@class, 'usercontent')]
+test_url: http://dw-news.dreamwidth.org/28922.html
\ No newline at end of file
diff --git a/vendor/full-text-rss/site_config/standard/.elpais.com.txt b/vendor/full-text-rss/site_config/standard/.elpais.com.txt
new file mode 100644
index 0000000..86d64b7
--- /dev/null
+++ b/vendor/full-text-rss/site_config/standard/.elpais.com.txt
@@ -0,0 +1,13 @@
+title: //meta[@name='DC.title']/@content
+date: //meta[@name='DC.date']/@content
+body: //div[@class='columna_texto']
+body: //div[@id='cuerpo_noticia']
+prune: no
+
+strip_id_or_class: disposicion_vertical
+strip_id_or_class: ampliar_foto
+
+strip: //div[starts-with(@id, 'sumario') and contains(., 'más información')]
+
+test_url: http://economia.elpais.com/economia/2012/02/07/actualidad/1328611790_342868.html
+test_url: http://internacional.elpais.com/internacional/2012/02/07/actualidad/1328602145_448315.html
\ No newline at end of file
diff --git a/vendor/full-text-rss/site_config/standard/.estadao.com.br.txt b/vendor/full-text-rss/site_config/standard/.estadao.com.br.txt
new file mode 100644
index 0000000..c6349ef
--- /dev/null
+++ b/vendor/full-text-rss/site_config/standard/.estadao.com.br.txt
@@ -0,0 +1,15 @@
+title: //span[@id='ctl00_ctl00_MainContent_MainContent_RecipeImage1_lblRecipeTitle']
+body: //div[@class='img_article'] | //div[@class='article']//div[@class='article_header' or @class='article_content']
+body: //div[@class='texto-noticia']
+
+author: //div[@class='autor']//em
+author: //div[@class='bb-md-noticia-autor']
+
+strip_id_or_class: divulgar
+strip_id_or_class: innerRight
+strip: //div[@class='size' or @class='imprimir']
+
+prune: no
+
+test_url: http://revistapiaui.estadao.com.br/edicao-62/carta-de-havana/la-vida-por-la-izquierda
+test_url: http://economia.estadao.com.br/noticias/economia,pf-panamericano-tambem-irrigou-contas-de-executivos-do-grupo-silvio-santos,94648,0.htm
\ No newline at end of file
diff --git a/vendor/full-text-rss/site_config/standard/.ew.com.txt b/vendor/full-text-rss/site_config/standard/.ew.com.txt
new file mode 100644
index 0000000..1ce69c8
--- /dev/null
+++ b/vendor/full-text-rss/site_config/standard/.ew.com.txt
@@ -0,0 +1,14 @@
+next_page_link: //span[@class='paging-next']/a[contains(., 'NEXT')]
+strip_id_or_class: article-paging
+strip_id_or_class: eyebrow
+strip_id_or_class: underbar
+strip_id_or_class: extras
+strip_id_or_class: share
+strip_id_or_class: recap-links
+strip_id_or_class: tvr-author
+strip_id_or_class: pub-date
+strip_id_or_class: post-title
+
+title: //h1[@class='post-title']
+
+test_url: http://tvrecaps.ew.com/recap/fringe-season-4-episode-2/
\ No newline at end of file
diff --git a/vendor/full-text-rss/site_config/standard/.expressen.se.txt b/vendor/full-text-rss/site_config/standard/.expressen.se.txt
new file mode 100644
index 0000000..d214ba6
--- /dev/null
+++ b/vendor/full-text-rss/site_config/standard/.expressen.se.txt
@@ -0,0 +1,6 @@
+body: //div[@id='article']
+title: //div[@id='article']//div[contains(@class, 'content')]/h1
+strip: //div[@class='art-right']
+strip: //img[contains(@src, 'img/px.gif')]
+prune: no
+test_url: http://kvp.expressen.se/nyheter/1.2575726/kvinna-misstankt-for-angelholmsmord
\ No newline at end of file
diff --git a/vendor/full-text-rss/site_config/standard/.finance.yahoo.com.txt b/vendor/full-text-rss/site_config/standard/.finance.yahoo.com.txt
new file mode 100644
index 0000000..81c18fd
--- /dev/null
+++ b/vendor/full-text-rss/site_config/standard/.finance.yahoo.com.txt
@@ -0,0 +1,12 @@
+title: //meta[@property='og:title']/@content
+body: //div[@id='y-article-bd']
+body: //div[contains(@class, 'yom-art-content')]
+strip: //div[contains(@class, 'related-companies')]
+strip: //div[@id='y-article-related']
+strip: //div[@id='ypf-article-related']
+prune: no
+
+single_page_link: //div[@class='ft']//a[contains(@href, 'page=all')]
+
+test_url: http://sg.finance.yahoo.com/news/Motorola-takes-wraps-249-rsg-3508842732.html?x=0&.v=1
+test_url: http://finance.yahoo.com/news/super-young-retirement-savers.html
\ No newline at end of file
diff --git a/vendor/full-text-rss/site_config/standard/.fivefilters.org.txt b/vendor/full-text-rss/site_config/standard/.fivefilters.org.txt
new file mode 100644
index 0000000..dc1db43
--- /dev/null
+++ b/vendor/full-text-rss/site_config/standard/.fivefilters.org.txt
@@ -0,0 +1 @@
+prune: no
\ No newline at end of file
diff --git a/vendor/full-text-rss/site_config/standard/.fok.nl.txt b/vendor/full-text-rss/site_config/standard/.fok.nl.txt
new file mode 100644
index 0000000..731fbbf
--- /dev/null
+++ b/vendor/full-text-rss/site_config/standard/.fok.nl.txt
@@ -0,0 +1,11 @@
+title: //h1[@class='title']
+body: //div[@id='itemBody']
+
+strip_id_or_class: itemFooter
+
+replace_string(90%;">Lees ook): 0%;">