mirror of
https://github.com/moparisthebest/wallabag
synced 2024-11-27 11:22:17 -05:00
fix bug #209: titles with colon bad parsed
This commit is contained in:
parent
084ec2a63d
commit
b9523a0ba0
46
inc/poche/PocheReadability.php
Normal file
46
inc/poche/PocheReadability.php
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
class PocheReadability extends Readability
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Get the article title as an H1.
|
||||||
|
*
|
||||||
|
* @return DOMElement
|
||||||
|
*/
|
||||||
|
protected function getArticleTitle() {
|
||||||
|
$curTitle = '';
|
||||||
|
$origTitle = '';
|
||||||
|
|
||||||
|
try {
|
||||||
|
$curTitle = $origTitle = $this->getInnerText($this->dom->getElementsByTagName('title')->item(0));
|
||||||
|
} catch(Exception $e) {}
|
||||||
|
|
||||||
|
if (preg_match('/ [\|\-] /', $curTitle))
|
||||||
|
{
|
||||||
|
$curTitle = preg_replace('/(.*)[\|\-] .*/i', '$1', $origTitle);
|
||||||
|
|
||||||
|
if (count(explode(' ', $curTitle)) < 3) {
|
||||||
|
$curTitle = preg_replace('/[^\|\-]*[\|\-](.*)/i', '$1', $origTitle);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if(strlen($curTitle) > 150 || strlen($curTitle) < 15)
|
||||||
|
{
|
||||||
|
$hOnes = $this->dom->getElementsByTagName('h1');
|
||||||
|
if($hOnes->length == 1)
|
||||||
|
{
|
||||||
|
$curTitle = $this->getInnerText($hOnes->item(0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$curTitle = trim($curTitle);
|
||||||
|
|
||||||
|
if (count(explode(' ', $curTitle)) <= 4) {
|
||||||
|
$curTitle = $origTitle;
|
||||||
|
}
|
||||||
|
|
||||||
|
$articleTitle = $this->dom->createElement('h1');
|
||||||
|
$articleTitle->innerHTML = $curTitle;
|
||||||
|
|
||||||
|
return $articleTitle;
|
||||||
|
}
|
||||||
|
}
|
@ -354,7 +354,7 @@ class Url
|
|||||||
}
|
}
|
||||||
if (isset($splink)) {
|
if (isset($splink)) {
|
||||||
// Build DOM tree from HTML
|
// Build DOM tree from HTML
|
||||||
$readability = new Readability($html, $url);
|
$readability = new PocheReadability($html, $url);
|
||||||
$xpath = new DOMXPath($readability->dom);
|
$xpath = new DOMXPath($readability->dom);
|
||||||
// Loop through single_page_link xpath expressions
|
// Loop through single_page_link xpath expressions
|
||||||
$single_page_url = null;
|
$single_page_url = null;
|
||||||
|
@ -20,6 +20,7 @@ require_once __DIR__ . '/../../inc/poche/Url.class.php';
|
|||||||
require_once __DIR__ . '/../../inc/3rdparty/class.messages.php';
|
require_once __DIR__ . '/../../inc/3rdparty/class.messages.php';
|
||||||
require_once __DIR__ . '/../../inc/poche/Poche.class.php';
|
require_once __DIR__ . '/../../inc/poche/Poche.class.php';
|
||||||
require_once __DIR__ . '/../../inc/3rdparty/Readability.php';
|
require_once __DIR__ . '/../../inc/3rdparty/Readability.php';
|
||||||
|
require_once __DIR__ . '/../../inc/poche/PocheReadability.php';
|
||||||
require_once __DIR__ . '/../../inc/3rdparty/Encoding.php';
|
require_once __DIR__ . '/../../inc/3rdparty/Encoding.php';
|
||||||
require_once __DIR__ . '/../../inc/poche/Database.class.php';
|
require_once __DIR__ . '/../../inc/poche/Database.class.php';
|
||||||
require_once __DIR__ . '/../../vendor/autoload.php';
|
require_once __DIR__ . '/../../vendor/autoload.php';
|
||||||
@ -48,10 +49,3 @@ if (!ini_get('date.timezone') || !@date_default_timezone_set(ini_get('date.timez
|
|||||||
}
|
}
|
||||||
|
|
||||||
$poche = new Poche();
|
$poche = new Poche();
|
||||||
#XSRF protection with token
|
|
||||||
// if (!empty($_POST)) {
|
|
||||||
// if (!Session::isToken($_POST['token'])) {
|
|
||||||
// die(_('Wrong token'));
|
|
||||||
// }
|
|
||||||
// unset($_SESSION['tokens']);
|
|
||||||
// }
|
|
Loading…
Reference in New Issue
Block a user