mirror of
https://github.com/moparisthebest/wallabag
synced 2024-12-21 06:48:56 -05:00
17 lines
770 B
Plaintext
17 lines
770 B
Plaintext
author: substring-before(substring-after(//div[@class='post-byline'], 'By '), ', on')
|
|
date: substring-after(//div[@class='post-byline'], ', on')
|
|
|
|
# for some reason, the following is producing a "no text [48]" error
|
|
#title: //div[@class='post-headline']
|
|
|
|
# for some reason, the following doesn't appear to isolate just the body copy
|
|
body: //div[@class='post-bodycopy']
|
|
|
|
# we solve the above issue by stripping out everything else we don't want
|
|
# these can probably all be removed if the body: command above worked
|
|
strip_id_or_class: reply
|
|
strip_id_or_class: left
|
|
strip_id_or_class: post-headline
|
|
strip_id_or_class: post-byline
|
|
strip_id_or_class: footer
|
|
test_url: http://www.uni-watch.com/2011/10/18/the-curious-case-of-steve-debergs-microphone-and-speaker/ |