1
0
mirror of https://github.com/moparisthebest/wallabag synced 2024-11-15 21:55:09 -05:00
wallabag/vendor/full-text-rss/site_config/standard/neh.gov.txt

17 lines
581 B
Plaintext
Raw Normal View History

2013-12-10 10:19:15 -05:00
#host configuration should be http://www.neh.gov/news/humanities/
#meta data
title:substring-after(substring-after(//title,':'),':')
author:substring-after(//h2[@class = 'subHead'],'By')
date:substring-before(substring-after(//title,':'),':')
#img and caption handling
wrap_in(small)://div[@id = 'mainContent']/table/descendant::p/descendant::text()
wrap_in(fieldset)://div[@id = 'mainContent']/table
# clean up
strip: //table[@class = 'marginpaddingTop']
strip: //h2[@class = 'subHead']
test_url: http://www.neh.gov/news/humanities/2011-11/IslamicScholar.html