From 1c182b6c9d74875054dc8efe75564ceab1bc1abe Mon Sep 17 00:00:00 2001 From: nicosomb Date: Wed, 17 Apr 2013 15:11:57 +0200 Subject: [PATCH] =?UTF-8?q?t=C3=A9l=C3=A9chargement=20des=20images=20d'un?= =?UTF-8?q?=20article?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- inc/config.php | 5 +- inc/db.php | 7 ++- inc/functions.php | 138 +++++++++++++++++++++++++++++++++++++++++----- index.php | 2 +- view.php | 2 +- 5 files changed, 136 insertions(+), 18 deletions(-) diff --git a/inc/config.php b/inc/config.php index d468228..51a25d3 100644 --- a/inc/config.php +++ b/inc/config.php @@ -7,7 +7,10 @@ * @copyright 2013 * @license http://www.wtfpl.net/ see COPYING file */ + +if(!is_dir('db/')){mkdir('db/',0705);} define ('DB_PATH', 'sqlite:./db/poche.sqlite'); +define ('ABS_PATH', 'archiveImg/'); include 'db.php'; include 'functions.php'; @@ -44,4 +47,4 @@ $token = (isset ($_REQUEST['token'])) ? $_REQUEST['token'] : ''; if ($action != '') { action_to_do($action, $id, $url, $token); -} \ No newline at end of file +} diff --git a/inc/db.php b/inc/db.php index 705fc0c..a1674ea 100644 --- a/inc/db.php +++ b/inc/db.php @@ -10,12 +10,15 @@ class db { var $handle; - function __construct($path) { + function __construct($path) + { $this->handle = new PDO($path); + $this->handle->exec('CREATE TABLE IF NOT EXISTS "entries" ("id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL UNIQUE , "title" VARCHAR, "url" VARCHAR UNIQUE , "is_read" INTEGER DEFAULT 0, "is_fav" INTEGER DEFAULT 0, "content" BLOB)'); $this->handle->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION); } - public function getHandle() { + public function getHandle() + { return $this->handle; } } \ No newline at end of file diff --git a/inc/functions.php b/inc/functions.php index 02d2c9d..37abafd 100755 --- a/inc/functions.php +++ b/inc/functions.php @@ -16,8 +16,9 @@ function get_poche_url() } // function define to retrieve url content -function get_external_file($url, $timeout) +function get_external_file($url) { + $timeout = 15; // spoofing FireFox 18.0 $useragent="Mozilla/5.0 (Windows NT 5.1; rv:18.0) Gecko/20100101 Firefox/18.0"; @@ -45,10 +46,7 @@ function get_external_file($url, $timeout) } else { // create http context and add timeout and user-agent - $context = stream_context_create(array('http'=>array('timeout' => $timeout, // Timeout : time until we stop waiting for the response. - 'header'=> "User-Agent: ".$useragent, // spoot Mozilla Firefox - 'follow_location' => true - ))); + $context = stream_context_create(array('http'=>array('timeout' => $timeout,'header'=> "User-Agent: ".$useragent,/*spoot Mozilla Firefox*/'follow_location' => true))); // only download page lesser than 4MB $data = @file_get_contents($url, false, $context, -1, 4000000); // We download at most 4 MB from source. @@ -90,7 +88,7 @@ function get_external_file($url, $timeout) /** * Préparation de l'URL avec récupération du contenu avant insertion en base */ -function prepare_url($url) +function prepare_url($url, $id) { $parametres = array(); $url = html_entity_decode(trim($url)); @@ -112,19 +110,127 @@ function prepare_url($url) $r->convertLinksToFootnotes = TRUE; if($r->init()) { - $title = $r->articleTitle->innerHTML; + $content = $r->articleContent->innerHTML; + $parametres['title'] = $r->articleTitle->innerHTML; + $parametres['content'] = filtre_picture($content, $url, $id); + return $parametres; } } - $parametres['title'] = $title; - $parametres['content'] = $r->articleContent->innerHTML; + logm('error during url preparation'); + return FALSE; +} - return $parametres; +/** + * On modifie les URLS des images dans le corps de l'article + */ +function filtre_picture($content, $url, $id) +{ + $matches = array(); + preg_match_all('#<\s*(img)[^>]+src="([^"]*)"[^>]*>#Si', $content, $matches, PREG_SET_ORDER); + foreach($matches as $i => $link) + { + $link[1] = trim($link[1]); + if (!preg_match('#^(([a-z]+://)|(\#))#', $link[1]) ) + { + $absolute_path = get_absolute_link($link[2],$url); + $filename = basename(parse_url($absolute_path, PHP_URL_PATH)); + $directory = create_assets_directory($id); + $fullpath = $directory . '/' . $filename; + download_pictures($absolute_path, $fullpath); + $content = str_replace($matches[$i][2], $fullpath, $content); + } + + } + + return $content; +} + +/** + * Retourne le lien absolu + */ +function get_absolute_link($relative_link, $url) +{ + /* return if already absolute URL */ + if (parse_url($relative_link, PHP_URL_SCHEME) != '') return $relative_link; + + /* queries and anchors */ + if ($relative_link[0]=='#' || $relative_link[0]=='?') return $url . $relative_link; + + /* parse base URL and convert to local variables: + $scheme, $host, $path */ + extract(parse_url($url)); + + /* remove non-directory element from path */ + $path = preg_replace('#/[^/]*$#', '', $path); + + /* destroy path if relative url points to root */ + if ($relative_link[0] == '/') $path = ''; + + /* dirty absolute URL */ + $abs = $host . $path . '/' . $relative_link; + + /* replace '//' or '/./' or '/foo/../' with '/' */ + $re = array('#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#'); + for($n=1; $n>0; $abs=preg_replace($re, '/', $abs, -1, $n)) {} + + /* absolute URL is ready! */ + return $scheme.'://'.$abs; +} + +/** + * Téléchargement des images + */ + +function download_pictures($absolute_path, $fullpath) +{ + $rawdata = get_external_file($absolute_path); + + if(file_exists($fullpath)) { + unlink($fullpath); + } + $fp = fopen($fullpath, 'x'); + fwrite($fp, $rawdata); + fclose($fp); +} + +/** + * Crée un répertoire de médias pour l'article + */ +function create_assets_directory($id) +{ + $assets_path = ABS_PATH; + if(!is_dir($assets_path)) { + mkdir($assets_path, 0705); + } + + $article_directory = $assets_path . $id; + if(!is_dir($article_directory)) { + mkdir($article_directory, 0705); + } + + return $article_directory; +} + +/** + * Suppression du répertoire d'images + */ +function remove_directory($directory) +{ + if(is_dir($directory)) { + $files = array_diff(scandir($directory), array('.','..')); + foreach ($files as $file) { + // FIXME c'est quoi delTree ?? + (is_dir("$directory/$file")) ? delTree("$directory/$file") : unlink("$directory/$file"); + } + return rmdir($directory); + } } /** * Appel d'une action (mark as fav, archive, delete) */ + function action_to_do($action, $id, $url, $token) { global $db; @@ -135,13 +241,19 @@ function action_to_do($action, $id, $url, $token) if ($url == '') continue; - $parametres_url = prepare_url($url); - $sql_action = 'INSERT INTO entries ( url, title, content ) VALUES (?, ?, ?)'; - $params_action = array($url, $parametres_url['title'], $parametres_url['content']); + $req = $db->getHandle()->query("SELECT id FROM entries ORDER BY id DESC"); + $id = $req->fetchColumn()+1; + + if($parametres_url = prepare_url($url, $id)) { + $sql_action = 'INSERT INTO entries ( id, url, title, content ) VALUES (?,?, ?, ?)'; + $params_action = array($id,$url, $parametres_url['title'], $parametres_url['content']); + } + logm('add link ' . $url); break; case 'delete': if (verif_token($token)) { + remove_directory(ABS_PATH . $id); $sql_action = "DELETE FROM entries WHERE id=?"; $params_action = array($id); logm('delete link #' . $id); diff --git a/index.php b/index.php index c303f8e..d0d008e 100755 --- a/index.php +++ b/index.php @@ -23,4 +23,4 @@ $tpl->draw('head'); $tpl->draw('home'); $tpl->draw('entries'); $tpl->draw('js'); -$tpl->draw('footer'); \ No newline at end of file +$tpl->draw('footer'); diff --git a/view.php b/view.php index 9ba6f62..9aa9a44 100755 --- a/view.php +++ b/view.php @@ -20,7 +20,7 @@ if(!empty($id)) { $tpl->assign('id', $entry[0]['id']); $tpl->assign('url', $entry[0]['url']); $tpl->assign('title', $entry[0]['title']); - $tpl->assign('content', $entry[0]['content']); + $tpl->assign('content', gzinflate(base64_decode($entry[0]['content']))); $tpl->assign('is_fav', $entry[0]['is_fav']); $tpl->assign('is_read', $entry[0]['is_read']); $tpl->assign('load_all_js', 0);