diff --git a/inc/3rdparty/site_config/standard/24ways.org.txt b/inc/3rdparty/site_config/standard/24ways.org.txt old mode 100644 new mode 100755 index 03bd195..86c9e07 --- a/inc/3rdparty/site_config/standard/24ways.org.txt +++ b/inc/3rdparty/site_config/standard/24ways.org.txt @@ -1,6 +1,6 @@ -title: //div[@class='meta']/h2/a -author: //div[@class='meta']/h2/following-sibling::p/a/text() -date://div[@class='meta']/h2/strong -body: //div[@id='article'] +title: //div[@class='meta']/h2/a +author: //div[@class='meta']/h2/following-sibling::p/a/text() +date://div[@class='meta']/h2/strong +body: //div[@id='article'] strip: //div[@class='domore'] test_url: http://24ways.org/2011/composing-the-new-canon \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/36kr.com.txt b/inc/3rdparty/site_config/standard/36kr.com.txt new file mode 100755 index 0000000..d73d7de --- /dev/null +++ b/inc/3rdparty/site_config/standard/36kr.com.txt @@ -0,0 +1,8 @@ +title: //h1[contains(@class, 'entry-title')] +date: //meta[@name='weibo: article:create_at']/@content +body: //div[contains(@class, 'mainContent')] +strip_id_or_class: related_topics + +prune: no + +test_url: http://www.36kr.com/p/207879.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/37signals.com.txt b/inc/3rdparty/site_config/standard/37signals.com.txt old mode 100644 new mode 100755 index 43a10ae..531cac1 --- a/inc/3rdparty/site_config/standard/37signals.com.txt +++ b/inc/3rdparty/site_config/standard/37signals.com.txt @@ -1,6 +1,6 @@ -title: //div[@class='post_header']//h2/a -author: //span[@class='author'] -date: //span[@class='date'] -body: //div[@id='Content'] +title: //div[@class='post_header']//h2/a +author: //span[@class='author'] +date: //span[@class='date'] +body: //div[@id='Content'] test_url: http://37signals.com/svn/posts/2785-the-end-of-the-it-department \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/3quarksdaily.com.txt b/inc/3rdparty/site_config/standard/3quarksdaily.com.txt old mode 100644 new mode 100755 index c4e7940..80a3958 --- a/inc/3rdparty/site_config/standard/3quarksdaily.com.txt +++ b/inc/3rdparty/site_config/standard/3quarksdaily.com.txt @@ -1,9 +1,9 @@ -body: //div[@class='content'] -date: //div[@class='content']/h2 -strip: //div[@class='content']/h2 -title: //div[@class='content']/h3 - -strip: //div[@id='postmenu'] -strip: //div[@class='trackback'] -tidy: no +body: //div[@class='content'] +date: //div[@class='content']/h2 +strip: //div[@class='content']/h2 +title: //div[@class='content']/h3 + +strip: //div[@id='postmenu'] +strip: //div[@class='trackback'] +tidy: no test_url: http://www.3quarksdaily.com/3quarksdaily/2012/01/martin-luther-king-i-have-a-dream.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt b/inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/43folders.com.txt b/inc/3rdparty/site_config/standard/43folders.com.txt old mode 100644 new mode 100755 index e8073f6..3777c66 --- a/inc/3rdparty/site_config/standard/43folders.com.txt +++ b/inc/3rdparty/site_config/standard/43folders.com.txt @@ -1,4 +1,4 @@ -body: //*[@class = 'content'] -author: //*[@class = 'submitted']/a +body: //*[@class = 'content'] +author: //*[@class = 'submitted']/a date: substring-after(//*[@class = 'submitted']/text(), '|') test_url: http://www.43folders.com/2011/04/22/cranking \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/500px.com.txt b/inc/3rdparty/site_config/standard/500px.com.txt old mode 100644 new mode 100755 index 68e6b2d..b9b7e9d --- a/inc/3rdparty/site_config/standard/500px.com.txt +++ b/inc/3rdparty/site_config/standard/500px.com.txt @@ -1,27 +1,27 @@ -# very loose setup for both 500px.com/photo/* and 500px.com/blog/* -# photo page example: http://500px.com/photo/4181666 -# blog page example: http://500px.com/blog/110 - -# avoid "no text" error -tidy:no -prune:no - -# reorganize photo page elements -#body://div[contains(@class,'container')] -move_into(body)://div[contains(@id,'thephoto')] -move_into(body)://div[contains(@id,'description')] -move_into(body)://div[contains(@id,'tags')] -move_into(body)://div[contains(@id,'photo-info')] - -# clean photo page info -strip://span[contains(@id,'copyright')] -strip://*[contains(@id,'store')] -strip://*[contains(@id,'user-info')] -strip://*[contains(@id,'photo-stats')] -strip://*[contains(@id,'voting_controls_container')] -strip://*[contains(@id,'more-photos')] -strip://*[contains(@id,'embed-photo')] - -# clean blog page side bar +# very loose setup for both 500px.com/photo/* and 500px.com/blog/* +# photo page example: http://500px.com/photo/4181666 +# blog page example: http://500px.com/blog/110 + +# avoid "no text" error +tidy:no +prune:no + +# reorganize photo page elements +#body://div[contains(@class,'container')] +move_into(body)://div[contains(@id,'thephoto')] +move_into(body)://div[contains(@id,'description')] +move_into(body)://div[contains(@id,'tags')] +move_into(body)://div[contains(@id,'photo-info')] + +# clean photo page info +strip://span[contains(@id,'copyright')] +strip://*[contains(@id,'store')] +strip://*[contains(@id,'user-info')] +strip://*[contains(@id,'photo-stats')] +strip://*[contains(@id,'voting_controls_container')] +strip://*[contains(@id,'more-photos')] +strip://*[contains(@id,'embed-photo')] + +# clean blog page side bar strip://*[contains(@class,'col d3 clearafter')] test_url: http://500px.com/photo/3641041?from=editors \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/512pixels.net.txt b/inc/3rdparty/site_config/standard/512pixels.net.txt old mode 100644 new mode 100755 diff --git a/inc/3rdparty/site_config/standard/5by5.tv.txt b/inc/3rdparty/site_config/standard/5by5.tv.txt old mode 100644 new mode 100755 index dce0df4..59b70a9 --- a/inc/3rdparty/site_config/standard/5by5.tv.txt +++ b/inc/3rdparty/site_config/standard/5by5.tv.txt @@ -1,9 +1,9 @@ -body: //*[@id="episode"] -prune: no -tidy: no - -autodetect_next_page: no -strip_id_or_class: player - +body: //*[@id="episode"] +prune: no +tidy: no + +autodetect_next_page: no +strip_id_or_class: player + strip://*[@id="header"] test_url: http://5by5.tv/buildanalyze/60 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/7newsbelize.com.txt b/inc/3rdparty/site_config/standard/7newsbelize.com.txt new file mode 100755 index 0000000..46d09f8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/7newsbelize.com.txt @@ -0,0 +1,7 @@ +title: //*[@id='sstitle'] +body: //div[@id='sstory'] +strip_id_or_class: newsoptions +prune: no + +test_url: http://www.7newsbelize.com/sstory.php?nid=25654 +test_url: http://www.7newsbelize.com/7news.xml \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/944.com.txt b/inc/3rdparty/site_config/standard/944.com.txt old mode 100644 new mode 100755 index 84380e7..8bf6a4c --- a/inc/3rdparty/site_config/standard/944.com.txt +++ b/inc/3rdparty/site_config/standard/944.com.txt @@ -1,9 +1,9 @@ -title: //h2[@class='border'] -body: //div[@class='padding'] - -convert_double_br_tags: yes - -strip: //div[@id='social_sharing'] -strip: //div[@class='socialLinks'] +title: //h2[@class='border'] +body: //div[@class='padding'] + +convert_double_br_tags: yes + +strip: //div[@id='social_sharing'] +strip: //div[@class='socialLinks'] test_url: http://www.944.com/articles/mild-obsessions-frock-la-get-to-know-victoria-tik-s-haute-sustainable-fashion-line/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/README.md b/inc/3rdparty/site_config/standard/README.md new file mode 100755 index 0000000..9040ba8 --- /dev/null +++ b/inc/3rdparty/site_config/standard/README.md @@ -0,0 +1,38 @@ +Full-Text RSS site config files +================ + +[Full-Text RSS](http://fivefilters.org/content-only/), our article extraction tool, makes use of site-specific extraction rules to improve results. Each time a URL is processed, it checks to see if there are extraction rules for the site being processed. If there are no site patterns, it tries to detect the content block automatically. + +This repository contains the site config files we use in Full-Text RSS. + +### Contributing changes + +We chose GitHub for this set of files because they offer one feature which we hope will make contributing changes easier: [file editing](https://github.com/blog/844-forking-with-the-edit-button) through the web interface. + +You can now make changes to any of our site config files and request that your changes be pulled into the main set we maintain. This is what GitHub calls the Fork and Pull model: + +> The Fork & Pull Model lets anyone fork an existing repository and push changes to their personal fork without requiring access be granted to the source repository. The changes must then be pulled into the source repository by the project maintainer. This model reduces the amount of friction for new contributors and is popular with open source projects because it allows people to work independently without upfront coordination. + +When we receive a pull request we'll review the changes and if everything's okay we'll update our copy. + +If a site is not in our set, you can create a file for it in the same way. See [Creating files on GitHub](https://github.com/blog/1327-creating-files-on-github). + +### How to write a site config file + +The quickest and simplest way is to use our [point-and-click interface](http://siteconfig.fivefilters.org). It's a simple tool only intended to create a rule to extract the correct content block. + +For further refinements, e.g. selecting the title, stripping elements, dealing with multi-page articles, please see our [help page](http://help.fivefilters.org/customer/portal/articles/223153-site-patterns). + +### Instapaper + +When we introduced site patterns, we chose to adopt the [same format](http://blog.instapaper.com/post/730281947) used by Instapaper. This allows us to make use of the existing extraction rules contributed by Instapaper users. + +Marco, Instapaper's creator, graciously opened up the database of contributions to everyone: + +> And, recognizing that your efforts could be useful to a wide range of other tools and services, I'll make the list of all of these site-specific configurations available to the public, free, with no strings attached. + +Most of the extraction rules in our set are borrowed from Instapaper. You can see the list maintained by Instapaper at [instapaper.com/bodytext/](http://instapaper.com/bodytext/) (login required). + +### Testing site config files + +Currently you will have to have a copy of Full-Text RSS to test changes to the site config files. In the future we will try to make this process easier. diff --git a/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt b/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt old mode 100644 new mode 100755 index 379592e..b60c15d --- a/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt +++ b/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt @@ -1,10 +1,10 @@ -title: //meta[@property='og:title']/@content -body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")] - -strip_id_or_class: socialshareprivacy1 -strip_id_or_class: zvaFacebookButton - -tidy: no -prune: no - +title: //meta[@property='og:title']/@content +body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")] + +strip_id_or_class: socialshareprivacy1 +strip_id_or_class: zvaFacebookButton + +tidy: no +prune: no + test_url: http://www.aachener-nachrichten.de/lokales/aachen-detail-an/2517757 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt b/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt old mode 100644 new mode 100755 index 4d76fac..013afa4 --- a/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt +++ b/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt @@ -1,10 +1,10 @@ -title: //meta[@property='og:title']/@content -body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")] - -strip_id_or_class: socialshareprivacy1 -strip_id_or_class: zvaFacebookButton - -tidy: no -prune: no - +title: //meta[@property='og:title']/@content +body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")] + +strip_id_or_class: socialshareprivacy1 +strip_id_or_class: zvaFacebookButton + +tidy: no +prune: no + test_url: http://www.aachener-zeitung.de/sixcms/detail.php?template=az_detail&id=2552718 \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/abc.es.txt b/inc/3rdparty/site_config/standard/abc.es.txt old mode 100644 new mode 100755 index a99833d..43aadc4 --- a/inc/3rdparty/site_config/standard/abc.es.txt +++ b/inc/3rdparty/site_config/standard/abc.es.txt @@ -1,7 +1,7 @@ -title: //meta[@property='og:title']/@content -body: //div[@class='datosi' or @class='date' or @class='photo-alt1' or @class='text'] -strip_id_or_class: colB - -prune: no +title: //meta[@property='og:title']/@content +body: //div[@class='datosi' or @class='date' or @class='photo-alt1' or @class='text' or @itemprop='articleBody'] +strip_id_or_class: colB + +prune: no test_url: http://www.abc.es/20120209/tv-series/abci-house-ultima-temporada-201202090936.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/abc.net.au.txt b/inc/3rdparty/site_config/standard/abc.net.au.txt old mode 100644 new mode 100755 index 5e6269c..22b3a0f --- a/inc/3rdparty/site_config/standard/abc.net.au.txt +++ b/inc/3rdparty/site_config/standard/abc.net.au.txt @@ -1,10 +1,18 @@ -title: //h1 -author: //div[@class="byline"]/a -date: //span[@class="timestamp"] - -strip: //p[@class="topics"] -strip: //h1 -strip: //div[@class="byline"] -strip: //p[@class="published"] +title: //div[@class='article section']//h1 +author: //div[@class="byline"]/a +date: //span[@class="timestamp"] +body: //div[@class="page section"] + +strip: //a[@class="inline-caption"] +strip: //p[@class="ticker section noprint"] +strip: //p[@class="topics"] +strip: //h1 +strip: //div[@class="byline"] +strip: //p[@class="published"] strip: //div[contains(@class,"featured-scroller")] -test_url: http://www.abc.net.au/news/2011-11-08/crabb-carbon-legislation-abbott-demolition/3652544 \ No newline at end of file +strip_id_or_class: footer + +tidy: no + +test_url: http://www.abc.net.au/news/2013-03-27/open-speed-highways-change-clp-giles/4597892 +test_url: http://www.abc.net.au/news/2013-04-30/credit-growth-remains-subdued/4660054?section=business diff --git a/inc/3rdparty/site_config/standard/abcnews.go.com.txt b/inc/3rdparty/site_config/standard/abcnews.go.com.txt old mode 100644 new mode 100755 index c515d3e..8d36735 --- a/inc/3rdparty/site_config/standard/abcnews.go.com.txt +++ b/inc/3rdparty/site_config/standard/abcnews.go.com.txt @@ -1,27 +1,27 @@ -title: //h1[@class='headline'] -body: //div[@id='storyText'] -# for video entries -body: //img[@id='ff-img'] | //div[@id='meta']//div[contains(@class, 'overview')] -author: //div[@class='byline'] -date: //div[@class='date'] -strip: //*[@id='date_partner'] - -strip: //div[@class='breadcrumb'] -strip: //div[contains(@class,'show_tools')] -strip: //div[@id='sponsoredByAd'] -strip: //div[contains(@class,'rel_container')] -strip: //p[a[starts-with(@href, 'http://www.twitter.com')]] -strip: //p[a[starts-with(@href, 'http://www.facebook.com')]] -strip: //p[contains(., 'Click here to return to')] -#strip_id_or_class: media -strip_id_or_class: mediaplayer - -replace_string( -replace_string:
Price: - -strip_id_or_class: collapsePS -strip_id_or_class: expandPS -strip_id_or_class: psPlaceHolde -strip: //li[contains(., 'update product info') or contains(., 'give feedback on images')] - +title: //span[@id = 'btAsinTitle'] +body: (//*[@id='prodImageCell']//a)[1] | //div[@id = 'ps-content'] | //span[@id='actualPriceValue'] | //h2[.='Product Details']/following-sibling::div | //div[@class='h2' and .='Product Description']/following-sibling::div +#strip_id_or_class: quantityDropdownDiv +#strip_id_or_class: addToCartSpan +#strip_id_or_class: oneClickDiv +strip_id_or_class: nocontent +strip_id_or_class: masDynamicConten +strip_id_or_class: dynamic-content +prune: no + +find_string: +replace_string:
Price: + +strip_id_or_class: collapsePS +strip_id_or_class: expandPS +strip_id_or_class: psPlaceHolde +strip: //li[contains(., 'update product info') or contains(., 'give feedback on images')] + test_url: http://www.amazon.com/Common-Sense-Forestry-Living-Mother/dp/1931498210/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/americandrink.net.txt b/inc/3rdparty/site_config/standard/americandrink.net.txt old mode 100644 new mode 100755 index dee0e86..7145f3f --- a/inc/3rdparty/site_config/standard/americandrink.net.txt +++ b/inc/3rdparty/site_config/standard/americandrink.net.txt @@ -1,6 +1,6 @@ -title: //div[@class='head']/h2/a -author: //div[@class='head']/a -date: //div[@class='head']/p[@class='date']/a -body: //div[@class='copy'] +title: //div[@class='head']/h2/a +author: //div[@class='head']/a +date: //div[@class='head']/p[@class='date']/a +body: //div[@class='copy'] strip: //p[@class='meta'] test_url: http://americandrink.net/post/10567188712/free-the-hooch \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/americascup.com.txt b/inc/3rdparty/site_config/standard/americascup.com.txt old mode 100644 new mode 100755 index b1673b6..31723f8 --- a/inc/3rdparty/site_config/standard/americascup.com.txt +++ b/inc/3rdparty/site_config/standard/americascup.com.txt @@ -1,10 +1,10 @@ -title: //div[@class="editorial-content"]/h3 -body: //div[@class="hero-image" or @class="editorial-content"] - -strip: //ul[@class="hero-caption"] -strip_id_or_class: footer - -prune: no -tidy: no - +title: //div[@class="editorial-content"]/h3 +body: //div[@class="hero-image" or @class="editorial-content"] + +strip: //ul[@class="hero-caption"] +strip_id_or_class: footer + +prune: no +tidy: no + test_url: http://www.americascup.com/en/Latest/News/2012/3/Coutts-and-Peyron-tell-transformative-tale-at-Global-Sports-Forum/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt b/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt old mode 100644 new mode 100755 index 8bf31ec..c2b62b5 --- a/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt +++ b/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt @@ -1,5 +1,5 @@ -title: //h1[@class="post-title"] -author: //span[@class="author"]/a -date: //span[@class="date"] +title: //h1[@class="post-title"] +author: //span[@class="author"]/a +date: //span[@class="date"] body: //div[@class="post-content main"] test_url: http://www.americastestkitchenfeed.com/gadgets-and-gear/2012/07/chill-out-with-tovolos-king-cube-silicone-ice-cube-tray/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/amptoons.com.txt b/inc/3rdparty/site_config/standard/amptoons.com.txt new file mode 100755 index 0000000..87547c6 --- /dev/null +++ b/inc/3rdparty/site_config/standard/amptoons.com.txt @@ -0,0 +1,8 @@ +title: //title + +body: //div[@class="entry-content"] + +author: //span[@class="author vcard"] + +date: //span[@class="entry-date"] +test_url: http://www.amptoons.com/blog/2013/03/14/open-thread-and-link-farm-i-hate-being-sick-edition/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/anandtech.com.txt b/inc/3rdparty/site_config/standard/anandtech.com.txt old mode 100644 new mode 100755 index 8067e03..7d80491 --- a/inc/3rdparty/site_config/standard/anandtech.com.txt +++ b/inc/3rdparty/site_config/standard/anandtech.com.txt @@ -1,11 +1,11 @@ -author: //a[@class='b'][1] -date: substring-after(substring-before(//div, 'Posted in'), ' on ') -strip_image_src: /content/images/globals/ -strip: //h2[. = 'Page 1']/preceding::p -strip: //h2 - -prune: no - -single_page_link: concat('http://www.anandtech.com/print/', substring-after(//meta[@property='og:url']/@content, '/show/')) - +author: //a[@class='b'][1] +date: substring-after(substring-before(//div, 'Posted in'), ' on ') +strip_image_src: /content/images/globals/ +strip: //h2[. = 'Page 1']/preceding::p +strip: //h2 + +prune: no + +single_page_link: concat('http://www.anandtech.com/print/', substring-after(//meta[@property='og:url']/@content, '/show/')) + test_url: http://www.anandtech.com/show/5812/eurocom-monster-10-clevos-little-monster/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/androidpolice.com.txt b/inc/3rdparty/site_config/standard/androidpolice.com.txt new file mode 100755 index 0000000..8f9b1a2 --- /dev/null +++ b/inc/3rdparty/site_config/standard/androidpolice.com.txt @@ -0,0 +1,5 @@ +body: //div[@class='post_content'] +date: //div[@class='date_day'] | div[@class='date_month'] + +test_url: http://www.androidpolice.com/2014/03/30/music-boss-for-pebble-can-now-control-playback-and-volume-on-chromecast-content-from-your-smartwatch/ + diff --git a/inc/3rdparty/site_config/standard/andyrutledge.com.txt b/inc/3rdparty/site_config/standard/andyrutledge.com.txt old mode 100644 new mode 100755 index f9ffd3c..ce31fcf --- a/inc/3rdparty/site_config/standard/andyrutledge.com.txt +++ b/inc/3rdparty/site_config/standard/andyrutledge.com.txt @@ -1,9 +1,9 @@ -title: //h2 -author: string('Andy Rutledge') -date: //div[@class='articledate'] -body: //div[@class='copybody'] - -strip: //*[@class='space'] -strip: //*[@class='articleFoot'] - +title: //h2 +author: string('Andy Rutledge') +date: //div[@class='articledate'] +body: //div[@class='copybody'] + +strip: //*[@class='space'] +strip: //*[@class='articleFoot'] + test_url: http://www.andyrutledge.com/hungry-for-a-better-menu.php \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt b/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt old mode 100644 new mode 100755 index a5c7c08..2d8937f --- a/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt +++ b/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt @@ -1,9 +1,9 @@ -title: //h1[@class="title"] - -author: ("Anna Manasova") -# is ignored, unfortunately - -date: //p[@class="date"] - +title: //h1[@class="title"] + +author: ("Anna Manasova") +# is ignored, unfortunately + +date: //p[@class="date"] + body: //div[@class="entry"] test_url: http://annatravelling.wordpress.com/2011/11/07/a-day-of-cooking-thai/ \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/applature.com.txt b/inc/3rdparty/site_config/standard/applature.com.txt old mode 100644 new mode 100755 index a78a615..a820bba --- a/inc/3rdparty/site_config/standard/applature.com.txt +++ b/inc/3rdparty/site_config/standard/applature.com.txt @@ -1,18 +1,18 @@ -title: //h1[contains(@class, 'title')# -body: //div[@id='mainContent']//div[contains(@class, 'section_content')] | //ul[@class='section_footer'] -date: //div[@class='date'] - -strip_id_or_class: sharethis -strip_id_or_class: stats -strip_id_or_class: apply_form -strip_id_or_class: job_map -strip_id_or_class: respond -strip: //h1//span[@class='type'] -strip: //li[@class='print' or @class='map'] - -replace_string(