diff --git a/inc/3rdparty/site_config/standard/37signals.com.txt b/inc/3rdparty/site_config/standard/37signals.com.txt
new file mode 100644
index 0000000..43a10ae
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/37signals.com.txt
@@ -0,0 +1,6 @@
+title: //div[@class='post_header']//h2/a
+author: //span[@class='author']
+date: //span[@class='date']
+body: //div[@id='Content']
+
+test_url: http://37signals.com/svn/posts/2785-the-end-of-the-it-department
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/3quarksdaily.com.txt b/inc/3rdparty/site_config/standard/3quarksdaily.com.txt
new file mode 100644
index 0000000..c4e7940
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/3quarksdaily.com.txt
@@ -0,0 +1,9 @@
+body: //div[@class='content']
+date: //div[@class='content']/h2
+strip: //div[@class='content']/h2
+title: //div[@class='content']/h3
+
+strip: //div[@id='postmenu']
+strip: //div[@class='trackback']
+tidy: no
+test_url: http://www.3quarksdaily.com/3quarksdaily/2012/01/martin-luther-king-i-have-a-dream.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt b/inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt
new file mode 100644
index 0000000..b846b05
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/3voor12.vpro.nl.txt
@@ -0,0 +1,11 @@
+body: //div[@id='main']
+title: //div[@class='intro']/h1
+author: //ul[@class='text-data']/li[@class='author']
+date: //ul[@class='text-data']/li[@class='date']
+convert_double_br_tags: yes
+tidy: no
+
+strip: //div[@class='share']
+strip: //*[@class='zoom']
+strip: //div[@id='disqus_thread']
+test_url: http://3voor12.vpro.nl/nieuws/2012/januari/Ook-website-GroenLinks-woensdag-op-zwart-i-v-m--SOPA.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/43folders.com.txt b/inc/3rdparty/site_config/standard/43folders.com.txt
new file mode 100644
index 0000000..e8073f6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/43folders.com.txt
@@ -0,0 +1,4 @@
+body: //*[@class = 'content']
+author: //*[@class = 'submitted']/a
+date: substring-after(//*[@class = 'submitted']/text(), '|')
+test_url: http://www.43folders.com/2011/04/22/cranking
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/500px.com.txt b/inc/3rdparty/site_config/standard/500px.com.txt
new file mode 100644
index 0000000..68e6b2d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/500px.com.txt
@@ -0,0 +1,27 @@
+# very loose setup for both 500px.com/photo/* and 500px.com/blog/*
+# photo page example: http://500px.com/photo/4181666
+# blog page example: http://500px.com/blog/110
+
+# avoid "no text" error
+tidy:no
+prune:no
+
+# reorganize photo page elements
+#body://div[contains(@class,'container')]
+move_into(body)://div[contains(@id,'thephoto')]
+move_into(body)://div[contains(@id,'description')]
+move_into(body)://div[contains(@id,'tags')]
+move_into(body)://div[contains(@id,'photo-info')]
+
+# clean photo page info
+strip://span[contains(@id,'copyright')]
+strip://*[contains(@id,'store')]
+strip://*[contains(@id,'user-info')]
+strip://*[contains(@id,'photo-stats')]
+strip://*[contains(@id,'voting_controls_container')]
+strip://*[contains(@id,'more-photos')]
+strip://*[contains(@id,'embed-photo')]
+
+# clean blog page side bar
+strip://*[contains(@class,'col d3 clearafter')]
+test_url: http://500px.com/photo/3641041?from=editors
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/512pixels.net.txt b/inc/3rdparty/site_config/standard/512pixels.net.txt
new file mode 100644
index 0000000..e458980
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/512pixels.net.txt
@@ -0,0 +1,2 @@
+title: substring-before(//title, '—')
+test_url: http://512pixels.net/more-on-linked-lists/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/5by5.tv.txt b/inc/3rdparty/site_config/standard/5by5.tv.txt
new file mode 100644
index 0000000..dce0df4
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/5by5.tv.txt
@@ -0,0 +1,9 @@
+body: //*[@id="episode"]
+prune: no
+tidy: no
+
+autodetect_next_page: no
+strip_id_or_class: player
+
+strip://*[@id="header"]
+test_url: http://5by5.tv/buildanalyze/60
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/944.com.txt b/inc/3rdparty/site_config/standard/944.com.txt
new file mode 100644
index 0000000..84380e7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/944.com.txt
@@ -0,0 +1,9 @@
+title: //h2[@class='border']
+body: //div[@class='padding']
+
+convert_double_br_tags: yes
+
+strip: //div[@id='social_sharing']
+strip: //div[@class='socialLinks']
+
+test_url: http://www.944.com/articles/mild-obsessions-frock-la-get-to-know-victoria-tik-s-haute-sustainable-fashion-line/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt b/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt
new file mode 100644
index 0000000..379592e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/aachener-nachrichten.de.txt
@@ -0,0 +1,10 @@
+title: //meta[@property='og:title']/@content
+body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")]
+
+strip_id_or_class: socialshareprivacy1
+strip_id_or_class: zvaFacebookButton
+
+tidy: no
+prune: no
+
+test_url: http://www.aachener-nachrichten.de/lokales/aachen-detail-an/2517757
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt b/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt
new file mode 100644
index 0000000..4d76fac
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/aachener-zeitung.de.txt
@@ -0,0 +1,10 @@
+title: //meta[@property='og:title']/@content
+body: //*[@class='fliesstext_detail' or @class='detail_fliesstext'] | //img[@itemprop="image" and starts-with(@src, "/sixcms/media.php/")]
+
+strip_id_or_class: socialshareprivacy1
+strip_id_or_class: zvaFacebookButton
+
+tidy: no
+prune: no
+
+test_url: http://www.aachener-zeitung.de/sixcms/detail.php?template=az_detail&id=2552718
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/abc.es.txt b/inc/3rdparty/site_config/standard/abc.es.txt
new file mode 100644
index 0000000..a99833d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/abc.es.txt
@@ -0,0 +1,7 @@
+title: //meta[@property='og:title']/@content
+body: //div[@class='datosi' or @class='date' or @class='photo-alt1' or @class='text']
+strip_id_or_class: colB
+
+prune: no
+
+test_url: http://www.abc.es/20120209/tv-series/abci-house-ultima-temporada-201202090936.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/abc.net.au.txt b/inc/3rdparty/site_config/standard/abc.net.au.txt
new file mode 100644
index 0000000..5e6269c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/abc.net.au.txt
@@ -0,0 +1,10 @@
+title: //h1
+author: //div[@class="byline"]/a
+date: //span[@class="timestamp"]
+
+strip: //p[@class="topics"]
+strip: //h1
+strip: //div[@class="byline"]
+strip: //p[@class="published"]
+strip: //div[contains(@class,"featured-scroller")]
+test_url: http://www.abc.net.au/news/2011-11-08/crabb-carbon-legislation-abbott-demolition/3652544
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/abcnews.go.com.txt b/inc/3rdparty/site_config/standard/abcnews.go.com.txt
new file mode 100644
index 0000000..c515d3e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/abcnews.go.com.txt
@@ -0,0 +1,27 @@
+title: //h1[@class='headline']
+body: //div[@id='storyText']
+# for video entries
+body: //img[@id='ff-img'] | //div[@id='meta']//div[contains(@class, 'overview')]
+author: //div[@class='byline']
+date: //div[@class='date']
+strip: //*[@id='date_partner']
+
+strip: //div[@class='breadcrumb']
+strip: //div[contains(@class,'show_tools')]
+strip: //div[@id='sponsoredByAd']
+strip: //div[contains(@class,'rel_container')]
+strip: //p[a[starts-with(@href, 'http://www.twitter.com')]]
+strip: //p[a[starts-with(@href, 'http://www.facebook.com')]]
+strip: //p[contains(., 'Click here to return to')]
+#strip_id_or_class: media
+strip_id_or_class: mediaplayer
+
+replace_string(
+replace_string: Price:
+
+strip_id_or_class: collapsePS
+strip_id_or_class: expandPS
+strip_id_or_class: psPlaceHolde
+strip: //li[contains(., 'update product info') or contains(., 'give feedback on images')]
+
+test_url: http://www.amazon.com/Common-Sense-Forestry-Living-Mother/dp/1931498210/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/americandrink.net.txt b/inc/3rdparty/site_config/standard/americandrink.net.txt
new file mode 100644
index 0000000..dee0e86
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/americandrink.net.txt
@@ -0,0 +1,6 @@
+title: //div[@class='head']/h2/a
+author: //div[@class='head']/a
+date: //div[@class='head']/p[@class='date']/a
+body: //div[@class='copy']
+strip: //p[@class='meta']
+test_url: http://americandrink.net/post/10567188712/free-the-hooch
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/americascup.com.txt b/inc/3rdparty/site_config/standard/americascup.com.txt
new file mode 100644
index 0000000..b1673b6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/americascup.com.txt
@@ -0,0 +1,10 @@
+title: //div[@class="editorial-content"]/h3
+body: //div[@class="hero-image" or @class="editorial-content"]
+
+strip: //ul[@class="hero-caption"]
+strip_id_or_class: footer
+
+prune: no
+tidy: no
+
+test_url: http://www.americascup.com/en/Latest/News/2012/3/Coutts-and-Peyron-tell-transformative-tale-at-Global-Sports-Forum/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt b/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt
new file mode 100644
index 0000000..8bf31ec
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/americastestkitchenfeed.com.txt
@@ -0,0 +1,5 @@
+title: //h1[@class="post-title"]
+author: //span[@class="author"]/a
+date: //span[@class="date"]
+body: //div[@class="post-content main"]
+test_url: http://www.americastestkitchenfeed.com/gadgets-and-gear/2012/07/chill-out-with-tovolos-king-cube-silicone-ice-cube-tray/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/anandtech.com.txt b/inc/3rdparty/site_config/standard/anandtech.com.txt
new file mode 100644
index 0000000..8067e03
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/anandtech.com.txt
@@ -0,0 +1,11 @@
+author: //a[@class='b'][1]
+date: substring-after(substring-before(//div, 'Posted in'), ' on ')
+strip_image_src: /content/images/globals/
+strip: //h2[. = 'Page 1']/preceding::p
+strip: //h2
+
+prune: no
+
+single_page_link: concat('http://www.anandtech.com/print/', substring-after(//meta[@property='og:url']/@content, '/show/'))
+
+test_url: http://www.anandtech.com/show/5812/eurocom-monster-10-clevos-little-monster/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/andyrutledge.com.txt b/inc/3rdparty/site_config/standard/andyrutledge.com.txt
new file mode 100644
index 0000000..f9ffd3c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/andyrutledge.com.txt
@@ -0,0 +1,9 @@
+title: //h2
+author: string('Andy Rutledge')
+date: //div[@class='articledate']
+body: //div[@class='copybody']
+
+strip: //*[@class='space']
+strip: //*[@class='articleFoot']
+
+test_url: http://www.andyrutledge.com/hungry-for-a-better-menu.php
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt b/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt
new file mode 100644
index 0000000..a5c7c08
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/annatravelling.wordpress.com.txt
@@ -0,0 +1,9 @@
+title: //h1[@class="title"]
+
+author: ("Anna Manasova")
+# is ignored, unfortunately
+
+date: //p[@class="date"]
+
+body: //div[@class="entry"]
+test_url: http://annatravelling.wordpress.com/2011/11/07/a-day-of-cooking-thai/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/applature.com.txt b/inc/3rdparty/site_config/standard/applature.com.txt
new file mode 100644
index 0000000..a78a615
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/applature.com.txt
@@ -0,0 +1,18 @@
+title: //h1[contains(@class, 'title')#
+body: //div[@id='mainContent']//div[contains(@class, 'section_content')] | //ul[@class='section_footer']
+date: //div[@class='date']
+
+strip_id_or_class: sharethis
+strip_id_or_class: stats
+strip_id_or_class: apply_form
+strip_id_or_class: job_map
+strip_id_or_class: respond
+strip: //h1//span[@class='type']
+strip: //li[@class='print' or @class='map']
+
+replace_string(
):
+replace_string():
+
+prune: no
+
+dissolve: //h2
+test_url: http://www.bbc.co.uk/news/business-15060862
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/benoitmaison.org.txt b/inc/3rdparty/site_config/standard/benoitmaison.org.txt
new file mode 100644
index 0000000..f341d59
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/benoitmaison.org.txt
@@ -0,0 +1,16 @@
+body: //div[@class="entry-content"]
+
+# Remove text ‘Tweet’
+strip: //div[@class="entry-content"]/div[last()]
+
+title: h1[@class="entry-title"]
+
+# If the Instapaper text parser worked with HTML5 tags, we would use:
+date: //time[@class="entry-date"]
+
+# But since it does not, use this more complicated rule:
+date: //div[@class="entry-meta"]/a[@rel="bookmark"]
+
+# Unfortunately, the following rule is overridden by the automatically found author.
+author: ("Benoit Maison")
+test_url: http://www.benoitmaison.org/2011/12/06/why-siri-had-to-start-in-beta/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/berlingske.dk.txt b/inc/3rdparty/site_config/standard/berlingske.dk.txt
new file mode 100644
index 0000000..607c998
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/berlingske.dk.txt
@@ -0,0 +1,3 @@
+title: //h1[@class='headline']
+body: //div[contains(@class, 'article-wrapper')]
+test_url: http://www.berlingske.dk/danmark/festen-er-flyttet-nordpaa
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/betabeat.com.txt b/inc/3rdparty/site_config/standard/betabeat.com.txt
new file mode 100644
index 0000000..7815cf2
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/betabeat.com.txt
@@ -0,0 +1,2 @@
+body: //div[@class="entry-content"]
+test_url: http://www.betabeat.com/2011/07/04/sheryl-sandberg-breaks-through-silicon-valleys-boys-club-sort-of/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/betanews.com.txt b/inc/3rdparty/site_config/standard/betanews.com.txt
new file mode 100644
index 0000000..0eaf085
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/betanews.com.txt
@@ -0,0 +1,7 @@
+# some articles at this site like this one doesn't
+# seem to pick up the article body via normal
+# processing, other articles come through fine
+# http://www.betanews.com/joewilcox/article
+# /Google-is-a-marketing-sensation/1309708375
+body: //*[@id="article"]
+test_url: http://www.betanews.com/joewilcox/article/Google-is-a-marketing-sensation/1309708375
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/biography.com.txt b/inc/3rdparty/site_config/standard/biography.com.txt
new file mode 100644
index 0000000..dc07129
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/biography.com.txt
@@ -0,0 +1,8 @@
+title: //div[contains(@class, 'main-content')]//h1
+body: //div[@class='summary-column'] | //div[contains(@class, 'main-content')]
+
+prune: no
+
+single_page_link: //div[@id='biography-action-links']//a[contains(@href, '/print/')]
+
+test_url: http://www.biography.com/print/profile/martin-luther-9389283
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bitelia.com.txt b/inc/3rdparty/site_config/standard/bitelia.com.txt
new file mode 100644
index 0000000..7bffae9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/bitelia.com.txt
@@ -0,0 +1,2 @@
+body: //*[(@class = "historia")]
+test_url: http://bitelia.com/2011/09/klout-midiendo-influencia
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bjango.com.txt b/inc/3rdparty/site_config/standard/bjango.com.txt
new file mode 100644
index 0000000..6cb0463
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/bjango.com.txt
@@ -0,0 +1,7 @@
+title: //h1[@class='articlehead']
+body: //div[@class='column']
+strip: //h1
+strip: //div[@class='help']
+
+#no author or date/time provided in current layout
+test_url: http://bjango.com/articles/actions/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.arsln.org.txt b/inc/3rdparty/site_config/standard/blog.arsln.org.txt
new file mode 100644
index 0000000..1f43f49
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.arsln.org.txt
@@ -0,0 +1,8 @@
+tidy: no
+prune: no
+date: //article/header/h6/time
+title: //article/header/h3
+author: //meta[@name='author']/@content
+body: //article//post
+
+test_url: http://blog.arsln.org/aska-ayip-oluyor/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.asmartbear.com.txt b/inc/3rdparty/site_config/standard/blog.asmartbear.com.txt
new file mode 100644
index 0000000..81c3bda
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.asmartbear.com.txt
@@ -0,0 +1,7 @@
+title: //title
+author: //span[@class='author vcard']/a
+date: //p[@class='headline_meta']/abbr[@class='published']
+body: //div[@class='format_text entry-content']
+
+strip: //div[@id='dd_ajax_float']
+test_url: http://blog.asmartbear.com/how-to-get-quality-freelance-graphics-design-work-on-a-budget.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt b/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt
new file mode 100644
index 0000000..a4c5aae
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.cloudflare.com.txt
@@ -0,0 +1,9 @@
+# Instapaper gets this back to front and only gets the blog title instead of the article title.
+title: substring-before(//title, '-')
+
+author: //a[ contains(@href, '/people') ]
+
+body: //div[ @class='post' ]
+
+# Date is impossible to retrieve since they use those stupid "fuzzy" dates, inserted through javascript, at posterous.
+test_url: http://blog.cloudflare.com/understanding-analytics-when-is-a-page-view-n
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.fefe.de.txt b/inc/3rdparty/site_config/standard/blog.fefe.de.txt
new file mode 100644
index 0000000..92272b7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.fefe.de.txt
@@ -0,0 +1,5 @@
+title: //h2
+date: //h3
+body: //ul
+
+test_url: http://blog.fefe.de/?ts=b063bf55
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.instagram.com.txt b/inc/3rdparty/site_config/standard/blog.instagram.com.txt
new file mode 100644
index 0000000..3065dd8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.instagram.com.txt
@@ -0,0 +1,11 @@
+# clean Instagram blog a little bit
+
+tidy:no
+prune:no
+
+body://div[contains(@id,'content')]
+
+strip_id_or_class:meta
+strip_id_or_class:notes
+strip_id_or_class:pagination
+test_url: http://blog.instagram.com/post/8757832007/fromwhereistand
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt b/inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt
new file mode 100644
index 0000000..4e467fe
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.jaysalvat.com.txt
@@ -0,0 +1,4 @@
+date: //span[contains(@class, 'date-links')]
+author: //span[contains(@class, 'author-links')]
+body: //div[contains(@class, 'entry-content')]
+test_url: http://blog.jaysalvat.com/article/celui-qui-avait-refait-son-site-web
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.kaelig.fr.txt b/inc/3rdparty/site_config/standard/blog.kaelig.fr.txt
new file mode 100644
index 0000000..ac18ad1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.kaelig.fr.txt
@@ -0,0 +1,5 @@
+body: //*[contains(@class, 'post_content')]
+author: string('Kaelig Deloumeau-Prigent')
+title: //h1[@class='title']
+date: //span[@class='date']
+test_url: http://blog.kaelig.fr/post/24877648508/preprocesseurs-css-renoncer-par-choix-ou-par
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.naver.com.txt b/inc/3rdparty/site_config/standard/blog.naver.com.txt
new file mode 100644
index 0000000..702789a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.naver.com.txt
@@ -0,0 +1,6 @@
+title: //span[@class='pcol1 itemSubjectBoldfont']
+body: //div[@id='postListBody']
+date: //p[@class='date fil5 pcol2']
+single_page_link: /html/frameset/frame[1]/attribute::src
+strip: //div[@class='post-btn']
+test_url: http://blog.naver.com/how2invest/110135068757
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.pchome.net.txt b/inc/3rdparty/site_config/standard/blog.pchome.net.txt
new file mode 100644
index 0000000..3089001
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.pchome.net.txt
@@ -0,0 +1,12 @@
+# PCHOME blog, a popular Chinese blog host
+# Oct 15, 2011
+#
+
+title://*[contains(@class,'imp')]/h2
+
+date://*[contains(@class,'imp')]/span
+body://div[contains(@id,'blog_content')]
+
+
+
+test_url: http://blog.pchome.net/article/462502.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.pinboard.in.txt b/inc/3rdparty/site_config/standard/blog.pinboard.in.txt
new file mode 100644
index 0000000..b7afe45
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.pinboard.in.txt
@@ -0,0 +1,6 @@
+title: //a[@class="blog_title"]
+date: //p[@class="when"]/a
+body: //div[@class="blog_entry"]
+strip_id_or_class:blog_title
+strip_id_or_class:when
+test_url: http://blog.pinboard.in/2011/11/the_social_graph_is_neither/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt b/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt
new file mode 100644
index 0000000..acb9ce8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.sina.com.cn.txt
@@ -0,0 +1,26 @@
+# Sina blog, the most popular blog host in China.
+# Its source code is horrible.
+#
+# Issue:
+# Only the first image in the article is displayed.
+# The rest images are replace by a 1x1 transparent gif by sina blog host.
+#
+
+title://*[contains(@class,'titName SG_txta')]
+author://*[contains(@id,'ownernick')]
+date://*[contains(@class,'time SG_txtc')]
+body://div[contains(@class,'articalContent')]
+
+# Remove redundant content which has span class start with "MASS"
+# Example
+strip://span[contains(@class,'MASS')]
+
+# Remove comment
+strip://div[contains(@class,'allComm')]
+
+# Remove hiden text and link
+strip://ins
+
+tidy:no
+convert_double_br_tags:yes
+test_url: http://blog.sina.com.cn/s/blog_5054769e0102dtja.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.spu.edu.txt b/inc/3rdparty/site_config/standard/blog.spu.edu.txt
new file mode 100644
index 0000000..68bd4e3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.spu.edu.txt
@@ -0,0 +1,2 @@
+body://div[@class='post']
+test_url: http://blog.spu.edu/lectio/from-the-frying-pan-into-the-fire/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blog.wells.ee.txt b/inc/3rdparty/site_config/standard/blog.wells.ee.txt
new file mode 100644
index 0000000..8c8b383
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blog.wells.ee.txt
@@ -0,0 +1,6 @@
+title: //h2/a[@class="no-link title"]
+author: //h2[@id="blog_owner"]
+date: //time
+strip: //h2/a[@class="no-link title"]
+test_url: http://blog.wells.ee/retina
+test_url: http://blog.wells.ee/skeuomorphism
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt b/inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt
new file mode 100644
index 0000000..f630127
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blogs.aljazeera.net.txt
@@ -0,0 +1,8 @@
+# 2011-08-23 [carlo@...] Initial version.
+
+author: //div[@id="blogauthordatebox-node"]//a[@title="View user profile."]/text()
+
+# why yes, I do feel a bit dirty
+date: substring-before( substring-after( substring-after( //div[@id="blogauthordatebox-node"]//td[3], "on " ), ", "), " " )
+
+test_url: http://blogs.aljazeera.net/asia/2011/08/22/peoples-hero
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.forbes.com.txt b/inc/3rdparty/site_config/standard/blogs.forbes.com.txt
new file mode 100644
index 0000000..86580d2
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blogs.forbes.com.txt
@@ -0,0 +1,2 @@
+body: //div[@class='entry']
+test_url: http://blogs.forbes.com/adamhartung/2011/04/08/apple-is-better-managed-than-microsoft/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.hbr.org.txt b/inc/3rdparty/site_config/standard/blogs.hbr.org.txt
new file mode 100644
index 0000000..3664d16
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blogs.hbr.org.txt
@@ -0,0 +1,4 @@
+title: //div[@id='pageFeature']/h1
+body: //div[@id='articleBody']
+strip: //div[@class='module wide']
+test_url: http://blogs.hbr.org/bregman/2011/04/the-1-killer-of-meetings-and-w.html?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+harvardbusiness+%28HBR.org%29
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.msdn.com.txt b/inc/3rdparty/site_config/standard/blogs.msdn.com.txt
new file mode 100644
index 0000000..3d3ec02
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blogs.msdn.com.txt
@@ -0,0 +1,6 @@
+title: //h3[@class="post-name"]
+author: //span[@class="user-name"]
+date: //div[@class="post-date"]
+body: //div[@class="post-content user-defined-markup"]
+footnotes: no
+test_url: http://blogs.msdn.com/b/b8/archive/2011/10/04/designing-the-start-screen.aspx
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.reuters.com.txt b/inc/3rdparty/site_config/standard/blogs.reuters.com.txt
new file mode 100644
index 0000000..6907bcb
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blogs.reuters.com.txt
@@ -0,0 +1,3 @@
+title: //div[@id='single']/h1
+body: //div[@id='postcontent']
+test_url: http://blogs.reuters.com/felix-salmon/2010/07/16/the-value-of-a-strong-brand-apple-edition/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt b/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt
new file mode 100644
index 0000000..a7d1508
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blogs.scientificamerican.com.txt
@@ -0,0 +1,16 @@
+# meta data
+title://h1[@class = 'postTitle']
+author:substring-before(substring-after(//span[@class = 'byline'],'By '),'|')
+date://span[@class = 'datestamp']
+
+#body content
+body://div[@id = 'singleBlogPost']
+
+#reclaim author info
+move_into(//div[@id = 'singleBlogPost'])://div[@id = 'aboutAuthorDiv']
+strip://p[@class = 'moreLink mobileHide']
+
+#cleanup comments, there might be some open
sections
+strip://div[@id = 'comments2']
+strip://h3[a[@href = '#add-comment']]
+test_url: http://blogs.scientificamerican.com/a-blog-around-the-clock/2012/07/10/science-blogs-definition-and-a-history/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt b/inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt
new file mode 100644
index 0000000..ba8bc6e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blogs.smithsonianmag.com.txt
@@ -0,0 +1,15 @@
+# metadata
+author://div[@class = 'post']/div[@class='meta']/a[1]
+date://div[@id = 'rap']/h2[1]
+body://div[@class = 'post']
+
+# wrapping caption and image
+wrap_in(fieldset)://div[contains(@class, 'wp-caption')]
+
+
+# clean up
+strip://div[@class = 'post']/h3[@class = 'storytitle']
+strip://div[@class = 'post']/div[@class = 'social']
+strip://img[@style = 'display:none;']
+strip://img[@height='0' and @width='0']
+test_url: http://blogs.smithsonianmag.com/adventure/2011/10/tips-for-women-traveling-in-turkey/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/blogs.technet.com.txt b/inc/3rdparty/site_config/standard/blogs.technet.com.txt
new file mode 100644
index 0000000..a2909fd
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/blogs.technet.com.txt
@@ -0,0 +1,6 @@
+title: //h3[@class="post-name"]
+author: //span[@class="user-name"]
+date: //div[@class="post-date"]
+body: //div[@class="post-content user-defined-markup"]
+footnotes: no
+test_url: http://blogs.technet.com/b/dlemson/archive/2004/03/03/83304.aspx
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bluetouff.com.txt b/inc/3rdparty/site_config/standard/bluetouff.com.txt
new file mode 100644
index 0000000..fbe7a5c
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/bluetouff.com.txt
@@ -0,0 +1,4 @@
+body://div[@class='entry']
+date://div[@class='meta']
+strip://a[@class='FlattrButton']
+test_url: http://bluetouff.com/2012/03/02/polemique-google-vie-privee/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/boagworld.com.txt b/inc/3rdparty/site_config/standard/boagworld.com.txt
new file mode 100644
index 0000000..91e48fd
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/boagworld.com.txt
@@ -0,0 +1,8 @@
+title: //h1[@class="entry-title"][2]
+author: string("Paul Boag")
+date: substring(//span[@class="meta"], 11)
+body: //article
+strip: //h2
+strip: //h1
+strip: //div[@id="callsToAction"]
+test_url: http://boagworld.com/working-in-web-design/dealing-with-the-dickheads/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/boingboing.net.txt b/inc/3rdparty/site_config/standard/boingboing.net.txt
new file mode 100644
index 0000000..9169e8f
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/boingboing.net.txt
@@ -0,0 +1,11 @@
+# This is far from perfect, but so is BoingBoing's markup
+title: //h2[@class="headline"]
+single_page_link: //h2[@class="headline"]/a
+#date: //p[@class="byline"]
+body: //div[@class="post"]
+
+strip_id_or_class: shareMe
+strip_id_or_class: authorbox
+strip_id_or_class: byline
+
+test_url: http://boingboing.net/2011/10/23/understanding-the-hyperrich-through-the-lens-of-tomorrows-history.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt b/inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt
new file mode 100644
index 0000000..4cc4904
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/boldizsar.palotas.eu.txt
@@ -0,0 +1,3 @@
+title: //h2[@class='entry-title']
+body: //div[@class='entry-content']
+test_url: http://boldizsar.palotas.eu/blog/?p=1394
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/book.douban.com.txt b/inc/3rdparty/site_config/standard/book.douban.com.txt
new file mode 100644
index 0000000..8b95856
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/book.douban.com.txt
@@ -0,0 +1,6 @@
+body: //span[@property='v:description']
+date: //span[@property='v:dtreviewed']
+author: //span[@property='v:reviewer']
+prune: no
+
+test_url: http://book.douban.com/review/2422662/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bookforum.com.txt b/inc/3rdparty/site_config/standard/bookforum.com.txt
new file mode 100644
index 0000000..331f415
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/bookforum.com.txt
@@ -0,0 +1,19 @@
+#metadata
+title://div[@class = 'Topper']/h1
+author://div[@class = 'Topper']/h3
+date://div[@class = 'Topper']/h6
+body://div[@class = 'Core']
+
+
+
+# clean up
+strip://div[@class = 'Topper']/h1
+strip://div[@class = 'Topper']/h3
+strip://div[@class = 'Topper']/h4
+strip://div[@class = 'Topper']/h5
+strip://div[@class = 'Topper']/h6
+strip://br[@clear = 'all']
+strip://div[@class = 'adCore']
+strip://div[@class = 'BookR']
+strip://div[@class = 'InfoBox']
+test_url: http://bookforum.com/inprint/018_04/8595
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/borderhouseblog.com.txt b/inc/3rdparty/site_config/standard/borderhouseblog.com.txt
new file mode 100644
index 0000000..190738d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/borderhouseblog.com.txt
@@ -0,0 +1,7 @@
+title://h1
+author://div[@class="meta"]/span/a
+date://div[@class="date"]
+body://div[@class="content article"]
+strip://div[@class="content article"]/h1
+
+test_url: http://borderhouseblog.com/?p=7832
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bostonglobe.com.txt b/inc/3rdparty/site_config/standard/bostonglobe.com.txt
new file mode 100644
index 0000000..d3e6f43
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/bostonglobe.com.txt
@@ -0,0 +1,16 @@
+# NOTE: If testing this configuration yields bad results, including junk text like "Try BostonGlobe.com today" and "THIS STORY APPEARED IN", please replace the Test URL with a current-day headline link from bostonglobe.com.
+
+title: //div[@class="header"]/h1
+author: substring-after(//div[@class="byline"]/h2[@class="author"],"By ")
+date: //div[@class="byline"]/p[last()]
+body: //div[@class="article-body"]
+
+strip_id_or_class: aside
+strip_id_or_class: promo
+strip_id_or_class: skip-nav
+strip_id_or_class: article-more
+strip_id_or_class: article-bar
+
+# This removes image captions. If the parser starts saving images from bostonglobe.com (currently, it does not), then this directive should be removed.
+strip_id_or_class: figure
+test_url: http://bostonglobe.com/news/nation/2012/03/17/illinois-primary-could-pivotal/PsDzFZqvhEYyXbOcF9FOkO/story.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bostonreview.net.txt b/inc/3rdparty/site_config/standard/bostonreview.net.txt
new file mode 100644
index 0000000..6856701
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/bostonreview.net.txt
@@ -0,0 +1,15 @@
+#basics
+title://h3[@class = 'article_title']
+date://span[@class = 'article_date']
+body://div[@id = 'center_column_article']
+#correct, but author not being picked up in preview
+author://span[@class = 'article_author']
+
+#strips basics from article
+strip_id_or_class:article_title
+strip_id_or_class:article_date
+strip_id_or_class:article_author
+
+#strips pull quotes
+strip_id_or_class:pull_quote
+test_url: http://www.bostonreview.net/BR36.4/megan_pugh_agnes_de_mille_dance.php
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/boundlessline.org.txt b/inc/3rdparty/site_config/standard/boundlessline.org.txt
new file mode 100644
index 0000000..bfc3f3d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/boundlessline.org.txt
@@ -0,0 +1,5 @@
+title: substring-before(//title, '|')
+body: //div[@class="entry"]
+# Remove the author's picture
+strip: //div[@class="entry"]/a[1]
+test_url: http://www.boundlessline.org/2011/06/the-nyts-on-gender-over-the-weekend.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/brainfacts.org.txt b/inc/3rdparty/site_config/standard/brainfacts.org.txt
new file mode 100644
index 0000000..94b0f56
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/brainfacts.org.txt
@@ -0,0 +1,10 @@
+title: //div[@class="standard"]/h1
+author: string("BrainFacts.org")
+date: //div[@class="meta"]/strong
+
+strip: //p[@class="skip"]
+strip: //div[@class="meta"]
+strip: //div[@class="standard"]/h1
+strip: //div[@class="modal"]
+strip: //div[@class="columnRight"]
+test_url: http://brainfacts.org/diseases-disorders/childhood-disorders/articles/2011/autism-the-pervasive-developmental-disorder/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/brandeins.de.txt b/inc/3rdparty/site_config/standard/brandeins.de.txt
new file mode 100644
index 0000000..3753ce6
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/brandeins.de.txt
@@ -0,0 +1,7 @@
+# set body
+body: //div[@id='theContent']
+
+# set title
+title: //div[@id='theContent']/h3
+strip: //div[@id='theContent']/h3
+test_url: http://www.brandeins.de/archiv/magazin/gegessen-wird-immer/artikel/hunger.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt b/inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt
new file mode 100644
index 0000000..1950484
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/brandingstrategyinsider.com.txt
@@ -0,0 +1,3 @@
+date://h2[@class="date-header"]
+body://div[@class="entry-content"]
+test_url: http://www.brandingstrategyinsider.com/2011/12/top-twelve-branding-keys-for-2012.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/brettterpstra.com.txt b/inc/3rdparty/site_config/standard/brettterpstra.com.txt
new file mode 100644
index 0000000..f6f7377
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/brettterpstra.com.txt
@@ -0,0 +1,5 @@
+body: //div[@class='post full']
+title: //h1
+author: substring-after(//title, '- ')
+date: //span[@class='date']
+test_url: http://brettterpstra.com/byword-for-ios/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/brisbanetimes.com.au.txt b/inc/3rdparty/site_config/standard/brisbanetimes.com.au.txt
new file mode 100644
index 0000000..27e6b70
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/brisbanetimes.com.au.txt
@@ -0,0 +1,2 @@
+body: //div[@class='articleBody']
+test_url: http://www.brisbanetimes.com.au/opinion/blogs/blunt-instrument/losing-our-minds--for-24-hours-20120118-1q682.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/brookings.edu.txt b/inc/3rdparty/site_config/standard/brookings.edu.txt
new file mode 100644
index 0000000..9f4fc4e
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/brookings.edu.txt
@@ -0,0 +1,13 @@
+title: //div[@id='contentheader']/h1
+author: //p[@class='attribution']/span[@class='author']/*
+# Is there a way to pull multiple authors? My XPath here is just grabbing the first
+
+date: /html/head/meta[@name="date"]/@content
+body: //div[@class='main-content']
+
+strip: //p[@class='byline']
+strip: //div[@class='img-gallery']
+strip: //div[@class='callout']
+strip: //div[@class='add-your-view']
+convert_double_br_tags: yes
+test_url: http://www.brookings.edu/opinions/2011/1018_cyberattack_libya_goldsmith.aspx
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/brooksreview.net.txt b/inc/3rdparty/site_config/standard/brooksreview.net.txt
new file mode 100644
index 0000000..71cafcd
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/brooksreview.net.txt
@@ -0,0 +1,6 @@
+title: //h1
+body: //div[@class='article']
+body: //div[@class='post']
+date: //*[@id='single']/span
+prune: no
+test_url: http://brooksreview.net/2011/11/readability-agency/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/buquad.com.txt b/inc/3rdparty/site_config/standard/buquad.com.txt
new file mode 100644
index 0000000..a75fa04
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/buquad.com.txt
@@ -0,0 +1,8 @@
+title: //h1
+author: //h2/a
+date: substring-after(//h2, '|')
+strip_id_or_class: 'attachment'
+strip: //h3
+
+body: //div[@class='entry']
+test_url: http://buquad.com/2012/04/09/paul-ryan/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/businessinsider.com.txt b/inc/3rdparty/site_config/standard/businessinsider.com.txt
new file mode 100644
index 0000000..c773db8
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/businessinsider.com.txt
@@ -0,0 +1,12 @@
+title://div[@class="sl-layout-post"]/h1
+body: //div[contains(@class, 'post-content') or contains(@class, 'KonaBody')]
+strip: //div[contains(@class, "post-sidebar")]
+strip: //div[@id='related-links']
+author://div[@class="byline"]/a
+date://div[@class="byline"]/span[@class="date"]
+prune: no
+
+strip://*[contains(@class,'sponsored-text')]
+strip: //div[@id='post_footer']
+
+test_url: http://www.businessinsider.com/microsoft-just-put-one-of-its-hardcore-technical-geniuses-on-xbox-2012-1
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/businessnews.com.tn.txt b/inc/3rdparty/site_config/standard/businessnews.com.tn.txt
new file mode 100644
index 0000000..714cfc9
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/businessnews.com.tn.txt
@@ -0,0 +1,12 @@
+body: //div[@id='article_detail']
+title: //meta[@property='og:title']/@content
+date: //div[@id='date_com_art']//a[@class='date']
+author: //div[@id='article_detail']//font[@class='auteur']
+
+strip_id_or_class: porte_titre_theme
+strip_id_or_class: cont_param
+strip_id_or_class: date_com_art
+
+prune: no
+
+test_url: http://www.businessnews.com.tn/details_article.php?a=31073&t=522&lang=fr&temp=1
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/businessweek.com.txt b/inc/3rdparty/site_config/standard/businessweek.com.txt
new file mode 100644
index 0000000..7b3d063
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/businessweek.com.txt
@@ -0,0 +1,30 @@
+# story has several pages, should be detected
+body: //div[@id='storyBody']
+body: //div[@id='article_body']
+body: //div[@id='story_body']
+
+title://h1[@id='article_headline']
+
+# article author
+author: //p[@class='author']/a
+# story author(s)
+author: substring-after(//p[@class='byline'], 'By ')
+
+# article date
+date: //span[@class='published_date']
+# story date
+date: //span[@class='date']
+
+date: substring-after(//div[contains(@class,'attributor')],'on')
+strip_id_or_class: inset
+strip: //p/span[@class='photoCredit']
+strip: //h1
+
+strip_id_or_class: page_count
+strip_id_or_class: tools
+strip_id_or_class: pagination
+
+single_page_link: //li[@id='stPrint']/a
+
+test_url: http://www.businessweek.com/magazine/buyback-insurance-a-good-deal-for-retailers-07282011.html
+test_url: http://www.businessweek.com/articles/2012-06-06/american-pain-the-largest-u-dot-s-dot-pill-mills-rise-and-fall
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/buzzfeed.com.txt b/inc/3rdparty/site_config/standard/buzzfeed.com.txt
new file mode 100644
index 0000000..6df8bc4
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/buzzfeed.com.txt
@@ -0,0 +1,15 @@
+# Creator: Greg Leuch
+
+# It can be messy.
+tidy:no
+
+# The basic template.
+title: //h1[@data-print='title']
+author: //a[@data-print='author']
+date: //time[@data-print='date']
+body: //div[@data-print='body']
+body: //section[@data-print='body']
+
+# For various things...
+strip: *[@data-print="ignore"]
+test_url: http://www.buzzfeed.com/hgrant/35-reasons-why-dogs-hate-the-holidays
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/bygonebureau.com.txt b/inc/3rdparty/site_config/standard/bygonebureau.com.txt
new file mode 100644
index 0000000..0abb643
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/bygonebureau.com.txt
@@ -0,0 +1,6 @@
+title: //h1
+author: //a[contains(@href, '/author/')]
+date: //*[@class='post-date']
+strip: //*[@class='post-date']
+strip: //h1
+test_url: http://bygonebureau.com/2011/06/20/an-existential-psychoanalysis/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cardboardconnection.com.txt b/inc/3rdparty/site_config/standard/cardboardconnection.com.txt
new file mode 100644
index 0000000..3adc7a3
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/cardboardconnection.com.txt
@@ -0,0 +1,8 @@
+title: //h1[@class='producttabbed-title']
+body: //div[@class='postTabs_divs postTabs_curr_div']
+strip: //div[@class='ratingblock2']
+strip: //p[@id='breadcrumbs']
+strip: //div[@style='display: none']
+
+
+test_url: http://www.cardboardconnection.com/2012-topps-archives-baseball-cards
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/carpeaqua.com.txt b/inc/3rdparty/site_config/standard/carpeaqua.com.txt
new file mode 100644
index 0000000..7ba1ed7
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/carpeaqua.com.txt
@@ -0,0 +1,6 @@
+title: //h2
+body: //div[@class='entry']
+
+prune: no
+# otherwise the footnotes are removed
+test_url: http://carpeaqua.com/2011/03/27/the-intersection-of-power-and-portability/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/catb.org.txt b/inc/3rdparty/site_config/standard/catb.org.txt
new file mode 100644
index 0000000..8908292
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/catb.org.txt
@@ -0,0 +1,7 @@
+body: //div[@class='article']
+strip: //div[@class='revhistory']
+strip: //div[@class='toc']
+tidy: no
+prune: no
+
+test_url: http://catb.org/~esr/faqs/smart-questions.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cbc.ca.txt b/inc/3rdparty/site_config/standard/cbc.ca.txt
new file mode 100644
index 0000000..2530510
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/cbc.ca.txt
@@ -0,0 +1,5 @@
+title: //div[contains(@class, 'headline')]/h1
+author: //h5[contains(@class, 'byline')]
+date: substring-after(//h4[contains(@class, 'posted')], 'Posted: ')
+body: //div[@id="storyboard"]
+test_url: http://www.cbc.ca/news/world/story/2012/01/16/cruise-ship-monday.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/cbsnews.com.txt b/inc/3rdparty/site_config/standard/cbsnews.com.txt
new file mode 100644
index 0000000..4ba3da1
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/cbsnews.com.txt
@@ -0,0 +1,14 @@
+date: //meta[@name="published"]/@content
+date: //div[@class="timeLine"]
+title: //div[@id='contentBody']//h1
+author: //dl[@class="storyBlogByline"]/dd/a
+body: //div[@id='storyMediaBox'] | //div[contains(@class, 'storyText')]
+
+# Content Pruning
+strip: //div[@class="scrollingArrows"]
+strip: //div[@class="timeLine"]
+strip: //dl[@class="storyBlogByline"]
+
+prune: no
+
+test_url: http://www.cbsnews.com/8301-201_162-57366361/rescued-americans-dad-proud-of-the-u.s/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/chareidi.org.txt b/inc/3rdparty/site_config/standard/chareidi.org.txt
new file mode 100644
index 0000000..de34a7d
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/chareidi.org.txt
@@ -0,0 +1,2 @@
+title: //h1
+test_url: http://www.chareidi.org/archives5772/tetzaveh/TZV72adraft.htm
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/chinamining.org.txt b/inc/3rdparty/site_config/standard/chinamining.org.txt
new file mode 100644
index 0000000..ea0df2a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/chinamining.org.txt
@@ -0,0 +1,10 @@
+title: //*[@id='Content']/span[1]
+author: substring-after(substring-before(//*[@id='Content']/span[2], ')'), '(')
+date: substring-before(substring-after(//*[@id='Content']/span[2], 'Updated: '), 'Counter')
+
+strip: //*[@id='Content']/span[1]
+strip: //*[@id='Content']/span[2]
+
+body: //*[@id='Content']
+
+test_url: http://www.chinamining.org/News/2011-07-22/1311319069d48087.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/chomsky.info.txt b/inc/3rdparty/site_config/standard/chomsky.info.txt
new file mode 100644
index 0000000..1d29410
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/chomsky.info.txt
@@ -0,0 +1,5 @@
+title: //div[@class='title']
+author: //div[@class='author']
+prune: no
+
+test_url: http://www.chomsky.info/onchomsky/2002----.htm
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/christianitytoday.com.txt b/inc/3rdparty/site_config/standard/christianitytoday.com.txt
new file mode 100644
index 0000000..44288a4
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/christianitytoday.com.txt
@@ -0,0 +1,13 @@
+title://div[@class='title']
+author://div[@class='byline']/b
+date:substring-after(//div[@class='byline'], 'posted')
+body://div[@id='body']
+wrap_in(h2)://span[@class='subhead']
+wrap_in(i)://p[@class='bio']
+wrap_in(i)://p[@class='copyright']
+strip://div[@class='title']
+strip://div[@class='deck']
+strip://div[@class='byline']
+strip://div[@class='copyright']
+strip://br
+test_url: http://www.christianitytoday.com/ct/2012/aprilweb-only/my-god-forsaken-me.html
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/christianpf.com.txt b/inc/3rdparty/site_config/standard/christianpf.com.txt
new file mode 100644
index 0000000..7f089c5
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/christianpf.com.txt
@@ -0,0 +1,5 @@
+title: //h1[@class="entry-title"]
+author: //*[@class="author vcard fn"]
+date: //*[@class="published"]
+body: //div[(@class = "dd_content_wrap")]
+test_url: http://christianpf.com/do-ibuys-lead-to-more-buying/
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/christies.com.txt b/inc/3rdparty/site_config/standard/christies.com.txt
new file mode 100644
index 0000000..5c5889a
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/christies.com.txt
@@ -0,0 +1,6 @@
+tidy: no
+prune: no
+date: //article//time[@pubdate]
+title: //article/header/h2
+body: //article
+test_url: http://www.christies.com/LotFinder/custom/lot_details_MultiLanguage.aspx?from=salesummary&intObjectID=5556662&sid=e536ed1a-b763-41c4-afcf-c94815ec6eee&LID=3
\ No newline at end of file
diff --git a/inc/3rdparty/site_config/standard/chrome.google.com.txt b/inc/3rdparty/site_config/standard/chrome.google.com.txt
new file mode 100644
index 0000000..d4cc858
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/chrome.google.com.txt
@@ -0,0 +1,9 @@
+body: //pre[@id='cx-desc-text']
+body: //div[contains(@class, 'overview-tab-right-bar-info')]
+title: //h1[contains(@class, 'detail-dialog-title')]
+tidy: no
+prune: no
+replace_string(