2014-07-13 04:15:40 -04:00
|
|
|
title: //div[@class='articleHead']//h1
|
|
|
|
author: //div[@class="author-name"]/a[1]
|
|
|
|
body: //div[@class="main"]
|
|
|
|
|
|
|
|
# remove 'From the Lab' and 'Recent posts' text
|
|
|
|
strip: //div[@class='blogLabel']
|
|
|
|
|
|
|
|
# remove byline and meta info
|
|
|
|
strip: //h1
|
|
|
|
strip: //div[@class="article-meta"]
|
|
|
|
strip: //div[@class="author-info"]
|
|
|
|
|
|
|
|
#strip tags and categories
|
|
|
|
strip: //div[@class="department"]
|
|
|
|
|
|
|
|
#strip product cap links
|
|
|
|
strip: //div[@class="cap-main"]
|
|
|
|
strip: //div[@id="compare-lede"]
|
2013-12-06 04:13:03 -05:00
|
|
|
test_url: http://www.pcworld.com/article/262034/are-printer-companies-gouging-us-on-laser-toner-pricing.html
|