title: //div[contains(@class, 'headline')]/h1
author: //h5[contains(@class, 'byline')]
date: substring-after(//h4[contains(@class, 'posted')], 'Posted: ')
body: //div[@id="storyboard"]
test_url: http://www.cbc.ca/news/world/story/2012/01/16/cruise-ship-monday.html