2014-07-13 04:15:40 -04:00
title://div[@class="article-title"]/h1[@class="title"]
date: //p[@class="article-date"]
body://div[contains(@class, "article-body")]
# Trim out related posts at bottom of article
strip://blockquote[@class="memo"]
tidy: no
# Yup, no idea why author won't work...
author://div[@class="page-header article-header clearfix"]/p[@class="title"]
2013-12-06 04:13:03 -05:00
# [Marco:] Author won't work here because the page defines the "home" link under the author's name as rel="author", which always gets priority if the page has defined it.
2014-07-13 04:15:40 -04:00
test_url: http://allthingsd.com/20120513/exclusive-yahoos-thompson-out-levinsohn-in-board-settlement-with-loeb-nears-completion/
test_url: http://allthingsd.com/20131010/google-cio-ben-fried-on-how-google-works/