From b9523a0ba09b5641e93fcd8300dd1a9f5145da2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Thu, 12 Sep 2013 19:28:59 +0200 Subject: [PATCH] fix bug #209: titles with colon bad parsed --- inc/poche/PocheReadability.php | 46 ++++++++++++++++++++++++++++++++++ inc/poche/Url.class.php | 2 +- inc/poche/config.inc.php | 10 ++------ 3 files changed, 49 insertions(+), 9 deletions(-) create mode 100644 inc/poche/PocheReadability.php diff --git a/inc/poche/PocheReadability.php b/inc/poche/PocheReadability.php new file mode 100644 index 0000000..48ae90d --- /dev/null +++ b/inc/poche/PocheReadability.php @@ -0,0 +1,46 @@ +getInnerText($this->dom->getElementsByTagName('title')->item(0)); + } catch(Exception $e) {} + + if (preg_match('/ [\|\-] /', $curTitle)) + { + $curTitle = preg_replace('/(.*)[\|\-] .*/i', '$1', $origTitle); + + if (count(explode(' ', $curTitle)) < 3) { + $curTitle = preg_replace('/[^\|\-]*[\|\-](.*)/i', '$1', $origTitle); + } + } + else if(strlen($curTitle) > 150 || strlen($curTitle) < 15) + { + $hOnes = $this->dom->getElementsByTagName('h1'); + if($hOnes->length == 1) + { + $curTitle = $this->getInnerText($hOnes->item(0)); + } + } + + $curTitle = trim($curTitle); + + if (count(explode(' ', $curTitle)) <= 4) { + $curTitle = $origTitle; + } + + $articleTitle = $this->dom->createElement('h1'); + $articleTitle->innerHTML = $curTitle; + + return $articleTitle; + } +} \ No newline at end of file diff --git a/inc/poche/Url.class.php b/inc/poche/Url.class.php index 5a89301..600a216 100644 --- a/inc/poche/Url.class.php +++ b/inc/poche/Url.class.php @@ -354,7 +354,7 @@ class Url } if (isset($splink)) { // Build DOM tree from HTML - $readability = new Readability($html, $url); + $readability = new PocheReadability($html, $url); $xpath = new DOMXPath($readability->dom); // Loop through single_page_link xpath expressions $single_page_url = null; diff --git a/inc/poche/config.inc.php b/inc/poche/config.inc.php index 4552669..aaa26af 100755 --- a/inc/poche/config.inc.php +++ b/inc/poche/config.inc.php @@ -20,6 +20,7 @@ require_once __DIR__ . '/../../inc/poche/Url.class.php'; require_once __DIR__ . '/../../inc/3rdparty/class.messages.php'; require_once __DIR__ . '/../../inc/poche/Poche.class.php'; require_once __DIR__ . '/../../inc/3rdparty/Readability.php'; +require_once __DIR__ . '/../../inc/poche/PocheReadability.php'; require_once __DIR__ . '/../../inc/3rdparty/Encoding.php'; require_once __DIR__ . '/../../inc/poche/Database.class.php'; require_once __DIR__ . '/../../vendor/autoload.php'; @@ -47,11 +48,4 @@ if (!ini_get('date.timezone') || !@date_default_timezone_set(ini_get('date.timez date_default_timezone_set('UTC'); } -$poche = new Poche(); -#XSRF protection with token -// if (!empty($_POST)) { -// if (!Session::isToken($_POST['token'])) { -// die(_('Wrong token')); -// } -// unset($_SESSION['tokens']); -// } \ No newline at end of file +$poche = new Poche(); \ No newline at end of file