mirror of
https://github.com/moparisthebest/wallabag
synced 2024-11-27 11:22:17 -05:00
Merge pull request #816 from zinnober/dev
Complete rework of faz.net-template
This commit is contained in:
commit
19438d3021
45
inc/3rdparty/site_config/custom/blogs.faz.net.txt
vendored
Normal file
45
inc/3rdparty/site_config/custom/blogs.faz.net.txt
vendored
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
# Author: zinnober
|
||||||
|
|
||||||
|
tidy: no
|
||||||
|
prune: no
|
||||||
|
|
||||||
|
# Set author
|
||||||
|
author: //a[@rel='author']
|
||||||
|
|
||||||
|
# Set date
|
||||||
|
date: //span[@class='Datum']
|
||||||
|
|
||||||
|
# Content is here
|
||||||
|
body: //div[@class='Artikel']
|
||||||
|
|
||||||
|
# Tidy up before article
|
||||||
|
strip: //div[@id='FAZHeaderNeu']
|
||||||
|
strip: //h2[@itemprop='headline']
|
||||||
|
strip: //span[@class='Datum']
|
||||||
|
strip: //span[@class='Autor']
|
||||||
|
strip_id_or_class: ArticlePagerTop
|
||||||
|
strip: //div[@class='FAZArtikelEinleitung']/h2
|
||||||
|
|
||||||
|
# General cleanup
|
||||||
|
strip: //div[@class='clear']
|
||||||
|
strip: //span[@class='Bildnachweis']
|
||||||
|
strip: //iframe
|
||||||
|
strip_id_or_class: Community
|
||||||
|
strip: ' · '
|
||||||
|
|
||||||
|
# Remove tracking and ads
|
||||||
|
strip_image_src: /l.gif?
|
||||||
|
strip: //img[@width='1']
|
||||||
|
strip_id_or_class: invisible
|
||||||
|
strip_id_or_class: Anzeige
|
||||||
|
strip_id_or_class: billboard
|
||||||
|
|
||||||
|
# Remove clutter after article
|
||||||
|
strip_id_or_class: Tagline
|
||||||
|
strip_id_or_class: ArtikelAbbinder
|
||||||
|
strip_id_or_class: FAZArtikelKommentare
|
||||||
|
strip_id_or_class: ArtikelKommentieren
|
||||||
|
strip_id_or_class: FAZContentRight
|
||||||
|
|
||||||
|
# Try it yourself
|
||||||
|
test_url: http://blogs.faz.net/wost/2014/08/17/viel-fuck-und-wenig-guter-sex-1239/
|
111
inc/3rdparty/site_config/standard/faz.net.txt
vendored
Executable file → Normal file
111
inc/3rdparty/site_config/standard/faz.net.txt
vendored
Executable file → Normal file
@ -1,36 +1,101 @@
|
|||||||
|
# Author: zinnober
|
||||||
|
# Complete rewrite of the faz.net template as the standard one is broken
|
||||||
|
# I tried to consider as many page variants as possible, which was some serious work
|
||||||
|
|
||||||
|
tidy: no
|
||||||
|
prune: no
|
||||||
|
|
||||||
# Title
|
# Title
|
||||||
title: //p[@class='Content HeadlineShort']
|
title: //p[@class='Content HeadlineShort']
|
||||||
|
|
||||||
# Authors
|
# Set author
|
||||||
# some are known and have a link, others don't
|
author: substring-after(//span[@class='Autor'], 'von ')
|
||||||
author: substring-after(//span[@class='Autor'], 'Von')
|
author: //span[@class='caps last']/span[@class='caps last']
|
||||||
|
author: //a[@rel='author']
|
||||||
|
|
||||||
# Date
|
# Set date
|
||||||
date: //span[@class='Datum']
|
date: //span[@class='Datum']
|
||||||
|
date: //span[@class='Datum'],/span
|
||||||
|
|
||||||
# Body
|
# Fetch full multipage articles
|
||||||
|
next_page_link: //a[@title='Nächste Seite']
|
||||||
|
|
||||||
|
# Content is here
|
||||||
body: //div[@class='Artikel']
|
body: //div[@class='Artikel']
|
||||||
|
|
||||||
# Removements before body text
|
# Tidy up before article
|
||||||
strip: //div[@class='Breadcrumbs']
|
strip: //div[@id='FAZHeaderNeu']
|
||||||
strip: //div[@class='QuickSearchBox']
|
strip: //h2[@itemprop='headline']
|
||||||
strip: //div[@class='FAZArtikelEinleitung']
|
strip: //span[@class='Datum']
|
||||||
strip: //div[@class='FAZArtikelReiter']
|
strip: //span[@class='Autor']
|
||||||
|
strip_id_or_class: ArticlePagerTop
|
||||||
|
|
||||||
|
# General cleanup
|
||||||
strip: //div[@class='clear']
|
strip: //div[@class='clear']
|
||||||
|
strip: //a[@title='Zur Homepage FAZ.NET']
|
||||||
|
strip: //iframe
|
||||||
|
replace_string( · ):
|
||||||
|
|
||||||
# General removements
|
# Remove tracking and ads
|
||||||
strip: //span[@class='Bildnachweis']
|
strip_image_src: /l.gif?
|
||||||
strip: //img[@class='MediaIcon']
|
strip: //div[contains(@style, 'background-image')]
|
||||||
strip: //div[@class='ArtikelMediaLink']
|
strip: //img[@width='1']
|
||||||
dissolve: //a[img]
|
strip_id_or_class: invisible
|
||||||
|
strip_id_or_class: Anzeige
|
||||||
|
strip_id_or_class: billboard
|
||||||
|
|
||||||
# Removements after body text
|
# Remove various text boxes and social media foo
|
||||||
strip: //div[@class='ArtikelAbbinder']
|
strip_id_or_class: WeitereBeitraege
|
||||||
strip: //div[@class='ArtikelKommentieren Artikelfuss GETS;tk;boxen.top-lesermeinungen;tp;content']
|
strip_id_or_class: WBListe
|
||||||
strip: //div[@class='FAZArtikelKommentare FAZArtikelContent']
|
strip_id_or_class: AutorenModul
|
||||||
strip: //div[@class='FAZArtikelFunktionen']
|
strip_id_or_class: Community
|
||||||
strip: //div[@id='FAZContentRight']
|
strip_id_or_class: SocialMediaStatus
|
||||||
|
strip_id_or_class: RelatedLinkBox
|
||||||
|
strip_id_or_class: MultimediaNavigation
|
||||||
|
strip_id_or_class: IndexTitel
|
||||||
|
|
||||||
# Fix picture captions
|
# Fix picture caps and pictures (use better resolution and remove clutter)
|
||||||
wrap_in(small): //span[@class='Bildunterschrift']/text()
|
strip_id_or_class: LightBoxOverlay
|
||||||
|
strip_id_or_class: exitLarge
|
||||||
|
strip_id_or_class: PagerBox
|
||||||
|
strip_id_or_class: Bildnachweis
|
||||||
|
strip_id_or_class: Bildueberschrift
|
||||||
|
strip_id_or_class: Bildbeschreibung
|
||||||
|
strip_id_or_class: ArtikelBild610
|
||||||
|
strip_id_or_class: MediaLink
|
||||||
|
strip_id_or_class: FotoBoxInnerLeft
|
||||||
|
strip_id_or_class: BilderRelatedLinks
|
||||||
|
|
||||||
|
# Remove clutter after article
|
||||||
|
strip_id_or_class: ArticlePagerBottom
|
||||||
|
strip_id_or_class: backToHome
|
||||||
|
strip_id_or_class: ArtikelAbbinder
|
||||||
|
strip_id_or_class: lesermeinungscontainer
|
||||||
|
strip_id_or_class: ThemenLinks
|
||||||
|
strip_id_or_class: rechtehinweis
|
||||||
|
strip_id_or_class: FAZArtikelMap
|
||||||
|
strip_id_or_class: FAZArtikelKommentare
|
||||||
|
strip_id_or_class: ArtikelKommentieren
|
||||||
|
strip_id_or_class: FAZArtikelFunktionen
|
||||||
|
strip_id_or_class: mailLB
|
||||||
|
strip_id_or_class: FAZContentRight
|
||||||
|
strip_id_or_class: stageModule
|
||||||
|
strip_id_or_class: ContentFooter
|
||||||
|
strip_id_or_class: ServicesFooter
|
||||||
|
strip_id_or_class: FAZFooter
|
||||||
|
|
||||||
|
# Clean up stuff present just in some articles
|
||||||
|
strip_id_or_class: Teaser620
|
||||||
|
strip_id_or_class: TeaserMultimedia
|
||||||
|
strip_id_or_class: VideoBox
|
||||||
|
|
||||||
|
# Remove as soon as Wallabag maight be able to embed flash video
|
||||||
|
strip_id_or_class: mmoObjectAsTeaserInArticle
|
||||||
|
strip_id_or_class: additionalStylesAudioVideo
|
||||||
|
strip_id_or_class: hideMMElements
|
||||||
|
|
||||||
|
# Try it yourself
|
||||||
test_url: http://www.faz.net/aktuell/feuilleton/zum-tod-von-margaret-thatcher-die-reizfigur-12141919.html#Drucken
|
test_url: http://www.faz.net/aktuell/feuilleton/zum-tod-von-margaret-thatcher-die-reizfigur-12141919.html#Drucken
|
||||||
|
test_url: http://www.faz.net/aktuell/politik/inland/allensbach-analyse-im-namen-des-volkes-13106492.html
|
||||||
|
test_url: http://www.faz.net/aktuell/feuilleton/kino/video-filmkritiken/video-filmkritik-when-animals-dream-zerrissene-jugend-13105772.html
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user