Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
j0k3r committed Nov 9, 2017
2 parents 68378e8 + 12ceb4c commit ae319c8
Show file tree
Hide file tree
Showing 9 changed files with 31 additions and 4 deletions.
2 changes: 2 additions & 0 deletions apple.news.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
single_page_link: //p//a[contains(., 'Click here')]
test_url: https://apple.news/AHQREjzH0Ts6iikKhNe6o8w
6 changes: 6 additions & 0 deletions heise.de.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ strip_id_or_class: ad_
# Some optimizations
replace_string(<h5>): <h2>
replace_string(</h5>): </h2>
replace_string(<title>Druckversion - ): <title>
replace_string( | heise online</title>): </title>
replace_string( | c't Magazin</title>): </title>
replace_string( | Telepolis</title>): </title>
replace_string( | heise Security</title>): </title>
replace_string( | heise Autos</title>): </title>
# this line breaks the parser
#replace_string(<span class="bild_rechts" style="width:): <p "
replace_string(<div class="heisebox">): <blockquote>
Expand Down
5 changes: 5 additions & 0 deletions iphon.fr.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
date: //meta[@name="date"]/@content
author: //meta[@name="author"]/@content
strip_id_or_class: follow

test_url: http://www.iphon.fr/post/iphone-8-x-recharge-sans-fil-cable-simultanee-891682
5 changes: 5 additions & 0 deletions lemonde.fr.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ body: //div[@id='articleBody']
find_string: <a target='_blank' onclick='return false;' class='lien_interne conjug'
replace_string: <input type='hidden' style='display:none;'

# Remove the insane cross-linking categories "Toute l’actualité"
find_string: <a class="lien_interne rub"
replace_string: <input type="hidden" style="display:none;"

prune: no

test_url: http://www.lemonde.fr/economie/article/2011/07/05/moody-s-abaisse-la-note-du-portugal-de-quatre-crans_1545237_3234.html
test_url: http://www.lemonde.fr/big-browser/article/2017/10/27/assassinat-de-kennedy-ce-qu-on-a-appris-dans-les-documents-declassifies_5207029_4832693.html
2 changes: 2 additions & 0 deletions lenta.ru.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
body: //div[@itemprop='articleBody'] | //img[@itemprop='image']
strip_id_or_class: b-inline-topics-box
strip: //iframe[@name='banner']

# do not prune embedded videos
prune: no

test_url: https://lenta.ru/news/2016/07/08/weakpound/
test_url: https://lenta.ru/news/2017/11/01/bigdata/
2 changes: 1 addition & 1 deletion nature.com.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
body: //div[@id='content']
body: //div[@id='article']
test_url: https://www.nature.com/npp/journal/v42/n11/full/npp201786a.html
2 changes: 1 addition & 1 deletion slate.fr.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

body: //article[contains(concat(' ',normalize-space(@class),' '),' full_article ')]

strip_id_or_class: media_legend
strip_id_or_class: article_author
strip_id_or_class: tag_articles
strip_id_or_class: article_insert
strip_id_or_class: col-right

strip_id_or_class: tag
strip_id_or_class: category
Expand Down
7 changes: 7 additions & 0 deletions sowetanlive.co.za.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
body: //div[contains(concat(' ',normalize-space(@class),' '),' article-widgets ')]
strip_id_or_class: related_articles
find_string:=s180"
replace_string:=s750"

test_url: https://www.sowetanlive.co.za/rss/?publication=sowetan-live
test_url: https://www.sowetanlive.co.za/sundayworld/news/2017-11-08-manana-should-pay-medical-bills-for-assault-victims-court-told/
4 changes: 2 additions & 2 deletions wsj.com.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ body: //div[@id='wsj-article-wrap']
# is this still used?
body: //div[@id='article_story_body']

author: //h3[@class='byline']/a
author: //meta[@name="author"]/@content
# for slide show content
body: //ul[@id='imageSlide']//li[@class='firstSlide']//img | (//div[@class='txt_body']//p)[1]
date: //li[@class='dateStamp']/small
date: //meta[@itemprop="dateCreated"]/@content

strip_id_or_class: insetFullBracket
strip_id_or_class: insettipBox
Expand Down

0 comments on commit ae319c8

Please sign in to comment.