diff --git a/inc/3rdparty/site_config/standard/37signals.com.txt b/inc/3rdparty/site_config/standard/37signals.com.txt new file mode 100644 index 0000000..43a10ae --- /dev/null +++ b/inc/3rdparty/site_config/standard/37signals.com.txt @@ -0,0 +1,6 @@ +title: //div[@class='post_header']//h2/a +author: //span[@class='author'] +date: //span[@class='date'] +body: //div[@id='Content'] + +test_url: http://37signals.com/svn/posts/2785-the-end-of-the-it-department \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/3quarksdaily.com.txt b/inc/3rdparty/site_config/standard/3quarksdaily.com.txt new file mode 100644 index 0000000..c4e7940 --- /dev/null +++ b/inc/3rdparty/site_config/standard/3quarksdaily.com.txt @@ -0,0 +1,9 @@ +body: //div[@class='content'] +date: //div[@class='content']/h2 +strip: //div[@class='content']/h2 +title: //div[@class='content']/h3 + +strip: //div[@id='postmenu'] +strip: //div[@class='trackback'] +tidy: no +test_url: http://www.3quarksdaily.com/3quarksdaily/2012/01/martin-luther-king-i-have-a-dream.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt b/inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt new file mode 100644 index 0000000..b846b05 --- /dev/null +++ b/inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt @@ -0,0 +1,11 @@ +body: //div[@id='main'] +title: //div[@class='intro']/h1 +author: //ul[@class='text-data']/li[@class='author'] +date: //ul[@class='text-data']/li[@class='date'] +convert_double_br_tags: yes +tidy: no + +strip: //div[@class='share'] +strip: //*[@class='zoom'] +strip: //div[@id='disqus_thread'] +test_url: http://3voor12.vpro.nl/nieuws/2012/januari/Ook-website-GroenLinks-woensdag-op-zwart-i-v-m--SOPA.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/43folders.com.txt b/inc/3rdparty/site_config/standard/43folders.com.txt new file mode 100644 index 0000000..e8073f6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/43folders.com.txt @@ -0,0 +1,4 @@ +body: //*[@class = 'content'] +author: //*[@class = 'submitted']/a +date: substring-after(//*[@class = 'submitted']/text(), '|') +test_url: http://www.43folders.com/2011/04/22/cranking \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/500px.com.txt b/inc/3rdparty/site_config/standard/500px.com.txt new file mode 100644 index 0000000..68e6b2d --- /dev/null +++ b/inc/3rdparty/site_config/standard/500px.com.txt @@ -0,0 +1,27 @@ +# very loose setup for both 500px.com/photo/* and 500px.com/blog/* +# photo page example: http://500px.com/photo/4181666 +# blog page example: http://500px.com/blog/110 + +# avoid "no text" error +tidy:no +prune:no + +# reorganize photo page elements +#body://div[contains(@class,'container')] +move_into(body)://div[contains(@id,'thephoto')] +move_into(body)://div[contains(@id,'description')] +move_into(body)://div[contains(@id,'tags')] +move_into(body)://div[contains(@id,'photo-info')] + +# clean photo page info +strip://span[contains(@id,'copyright')] +strip://*[contains(@id,'store')] +strip://*[contains(@id,'user-info')] +strip://*[contains(@id,'photo-stats')] +strip://*[contains(@id,'voting_controls_container')] +strip://*[contains(@id,'more-photos')] +strip://*[contains(@id,'embed-photo')] + +# clean blog page side bar +strip://*[contains(@class,'col d3 clearafter')] +test_url: http://500px.com/photo/3641041?from=editors \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/512pixels.net.txt b/inc/3rdparty/site_config/standard/512pixels.net.txt new file mode 100644 index 0000000..e458980 --- /dev/null +++ b/inc/3rdparty/site_config/standard/512pixels.net.txt @@ -0,0 +1,2 @@ +title: substring-before(//title, '—') +test_url: http://512pixels.net/more-on-linked-lists/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/5by5.tv.txt b/inc/3rdparty/site_config/standard/5by5.tv.txt new file mode 100644 index 0000000..dce0df4 --- /dev/null +++ b/inc/3rdparty/site_config/standard/5by5.tv.txt @@ -0,0 +1,9 @@ +body: //*[@id="episode"] +prune: no +tidy: no + +autodetect_next_page: no +strip_id_or_class: player + +strip://*[@id="header"] +test_url: http://5by5.tv/buildanalyze/60 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/944.com.txt b/inc/3rdparty/site_config/standard/944.com.txt new file mode 100644 index 0000000..84380e7 --- /dev/null +++ b/inc/3rdparty/site_config/standard/944.com.txt @@ -0,0 +1,9 @@ +title: //h2[@class='border'] +body: //div[@class='padding'] + +convert_double_br_tags: yes + +strip: //div[@id='social_sharing'] +strip: //div[@class='socialLinks'] + +test_url: http://www.944.com/articles/mild-obsessions-frock-la-get-to-know-victoria-tik-s-haute-sustainable-fashion-line/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt b/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt new file mode 100644 index 0000000..379592e --- /dev/null +++ b/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt @@ -0,0 +1,10 @@ +title: //meta[@property='og:title']/@content +body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")] + +strip_id_or_class: socialshareprivacy1 +strip_id_or_class: zvaFacebookButton + +tidy: no +prune: no + +test_url: http://www.aachener-nachrichten.de/lokales/aachen-detail-an/2517757 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt b/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt new file mode 100644 index 0000000..4d76fac --- /dev/null +++ b/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt @@ -0,0 +1,10 @@ +title: //meta[@property='og:title']/@content +body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")] + +strip_id_or_class: socialshareprivacy1 +strip_id_or_class: zvaFacebookButton + +tidy: no +prune: no + +test_url: http://www.aachener-zeitung.de/sixcms/detail.php?template=az_detail&id=2552718 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/abc.es.txt b/inc/3rdparty/site_config/standard/abc.es.txt new file mode 100644 index 0000000..a99833d --- /dev/null +++ b/inc/3rdparty/site_config/standard/abc.es.txt @@ -0,0 +1,7 @@ +title: //meta[@property='og:title']/@content +body: //div[@class='datosi' or @class='date' or @class='photo-alt1' or @class='text'] +strip_id_or_class: colB + +prune: no + +test_url: http://www.abc.es/20120209/tv-series/abci-house-ultima-temporada-201202090936.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/abc.net.au.txt b/inc/3rdparty/site_config/standard/abc.net.au.txt new file mode 100644 index 0000000..5e6269c --- /dev/null +++ b/inc/3rdparty/site_config/standard/abc.net.au.txt @@ -0,0 +1,10 @@ +title: //h1 +author: //div[@class="byline"]/a +date: //span[@class="timestamp"] + +strip: //p[@class="topics"] +strip: //h1 +strip: //div[@class="byline"] +strip: //p[@class="published"] +strip: //div[contains(@class,"featured-scroller")] +test_url: http://www.abc.net.au/news/2011-11-08/crabb-carbon-legislation-abbott-demolition/3652544 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/abcnews.go.com.txt b/inc/3rdparty/site_config/standard/abcnews.go.com.txt new file mode 100644 index 0000000..c515d3e --- /dev/null +++ b/inc/3rdparty/site_config/standard/abcnews.go.com.txt @@ -0,0 +1,27 @@ +title: //h1[@class='headline'] +body: //div[@id='storyText'] +# for video entries +body: //img[@id='ff-img'] | //div[@id='meta']//div[contains(@class, 'overview')] +author: //div[@class='byline'] +date: //div[@class='date'] +strip: //*[@id='date_partner'] + +strip: //div[@class='breadcrumb'] +strip: //div[contains(@class,'show_tools')] +strip: //div[@id='sponsoredByAd'] +strip: //div[contains(@class,'rel_container')] +strip: //p[a[starts-with(@href, 'http://www.twitter.com')]] +strip: //p[a[starts-with(@href, 'http://www.facebook.com')]] +strip: //p[contains(., 'Click here to return to')] +#strip_id_or_class: media +strip_id_or_class: mediaplayer + +replace_string( +replace_string:
Price: + +strip_id_or_class: collapsePS +strip_id_or_class: expandPS +strip_id_or_class: psPlaceHolde +strip: //li[contains(., 'update product info') or contains(., 'give feedback on images')] + +test_url: http://www.amazon.com/Common-Sense-Forestry-Living-Mother/dp/1931498210/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/americandrink.net.txt b/inc/3rdparty/site_config/standard/americandrink.net.txt new file mode 100644 index 0000000..dee0e86 --- /dev/null +++ b/inc/3rdparty/site_config/standard/americandrink.net.txt @@ -0,0 +1,6 @@ +title: //div[@class='head']/h2/a +author: //div[@class='head']/a +date: //div[@class='head']/p[@class='date']/a +body: //div[@class='copy'] +strip: //p[@class='meta'] +test_url: http://americandrink.net/post/10567188712/free-the-hooch \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/americascup.com.txt b/inc/3rdparty/site_config/standard/americascup.com.txt new file mode 100644 index 0000000..b1673b6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/americascup.com.txt @@ -0,0 +1,10 @@ +title: //div[@class="editorial-content"]/h3 +body: //div[@class="hero-image" or @class="editorial-content"] + +strip: //ul[@class="hero-caption"] +strip_id_or_class: footer + +prune: no +tidy: no + +test_url: http://www.americascup.com/en/Latest/News/2012/3/Coutts-and-Peyron-tell-transformative-tale-at-Global-Sports-Forum/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt b/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt new file mode 100644 index 0000000..8bf31ec --- /dev/null +++ b/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt @@ -0,0 +1,5 @@ +title: //h1[@class="post-title"] +author: //span[@class="author"]/a +date: //span[@class="date"] +body: //div[@class="post-content main"] +test_url: http://www.americastestkitchenfeed.com/gadgets-and-gear/2012/07/chill-out-with-tovolos-king-cube-silicone-ice-cube-tray/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/anandtech.com.txt b/inc/3rdparty/site_config/standard/anandtech.com.txt new file mode 100644 index 0000000..8067e03 --- /dev/null +++ b/inc/3rdparty/site_config/standard/anandtech.com.txt @@ -0,0 +1,11 @@ +author: //a[@class='b'][1] +date: substring-after(substring-before(//div, 'Posted in'), ' on ') +strip_image_src: /content/images/globals/ +strip: //h2[. = 'Page 1']/preceding::p +strip: //h2 + +prune: no + +single_page_link: concat('http://www.anandtech.com/print/', substring-after(//meta[@property='og:url']/@content, '/show/')) + +test_url: http://www.anandtech.com/show/5812/eurocom-monster-10-clevos-little-monster/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/andyrutledge.com.txt b/inc/3rdparty/site_config/standard/andyrutledge.com.txt new file mode 100644 index 0000000..f9ffd3c --- /dev/null +++ b/inc/3rdparty/site_config/standard/andyrutledge.com.txt @@ -0,0 +1,9 @@ +title: //h2 +author: string('Andy Rutledge') +date: //div[@class='articledate'] +body: //div[@class='copybody'] + +strip: //*[@class='space'] +strip: //*[@class='articleFoot'] + +test_url: http://www.andyrutledge.com/hungry-for-a-better-menu.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt b/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt new file mode 100644 index 0000000..a5c7c08 --- /dev/null +++ b/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt @@ -0,0 +1,9 @@ +title: //h1[@class="title"] + +author: ("Anna Manasova") +# is ignored, unfortunately + +date: //p[@class="date"] + +body: //div[@class="entry"] +test_url: http://annatravelling.wordpress.com/2011/11/07/a-day-of-cooking-thai/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/applature.com.txt b/inc/3rdparty/site_config/standard/applature.com.txt new file mode 100644 index 0000000..a78a615 --- /dev/null +++ b/inc/3rdparty/site_config/standard/applature.com.txt @@ -0,0 +1,18 @@ +title: //h1[contains(@class, 'title')# +body: //div[@id='mainContent']//div[contains(@class, 'section_content')] | //ul[@class='section_footer'] +date: //div[@class='date'] + +strip_id_or_class: sharethis +strip_id_or_class: stats +strip_id_or_class: apply_form +strip_id_or_class: job_map +strip_id_or_class: respond +strip: //h1//span[@class='type'] +strip: //li[@class='print' or @class='map'] + +replace_string(