Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
j0k3r committed Jan 18, 2017
2 parents 4ec2e13 + d6a8ba0 commit 5fd1cfc
Show file tree
Hide file tree
Showing 18 changed files with 77 additions and 35 deletions.
3 changes: 3 additions & 0 deletions LICENSE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
These files are released to the public domain.

See https://creativecommons.org/publicdomain/zero/1.0/ for more information.
7 changes: 6 additions & 1 deletion bloomberg.com.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,10 @@ date: //*[@itemprop='datePublished']/@datetime
author: //meta[@name="sailthru.author"]/@content
body: //*[@itemprop='articleBody' or @itemprop='description']

strip_id_or_class: inline-newsletter
strip_id_or_class: terminal-tout-container
strip_id_or_class: lede

test_url: http://www.bloomberg.com/news/articles/2015-12-30/will-your-job-disappear-by-2024-
test_contains: Every week, hosts Tori Stilwell
test_contains: Every week, hosts Tori Stilwell
test_url: https://www.bloomberg.com/news/articles/2016-12-06/apple-to-start-publishing-ai-research-to-hasten-deep-learning
8 changes: 8 additions & 0 deletions daserste.ndr.de.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Generated by FiveFilters.org's web-based selection tool
# Place this file inside your site_config/custom/ folder
# Source: http://siteconfig.fivefilters.org/grab.php?url=http%3A%2F%2Fdaserste.ndr.de%2Fpanorama%2Faktuell%2FChronik-Rechtsextreme-Vorfaelle-in-der-AfD-2016%2Cafd892.html

title: //h1[contains(concat(' ',normalize-space(@class),' '),' headline ')]
next_page_link: //li[@class='next']/a
body: //div[contains(concat(' ',normalize-space(@class),' '),' modCon ')]//div[contains(concat(' ',normalize-space(@class),' '),' mod ') and (contains(concat(' ',normalize-space(@class),' '),' modA ')) and (contains(concat(' ',normalize-space(@class),' '),' modParagraph '))]//div[contains(concat(' ',normalize-space(@class),' '),' boxCon ')]//div[contains(concat(' ',normalize-space(@class),' '),' box ')]
test_url: http://daserste.ndr.de/panorama/aktuell/Chronik-Rechtsextreme-Vorfaelle-in-der-AfD-2016,afd892.html
33 changes: 8 additions & 25 deletions elektroniknet.de.txt
Original file line number Diff line number Diff line change
@@ -1,27 +1,10 @@
title: //h1
date: //div[@class='datum']
single_page_link: //a[contains(@href, '?type=99')]
date: //time
next_page_link: //li[@class='next']//a

# this hack preserves the intro text, because it would be striped otherwise if the title is set to //h1
dissolve: //div[@class='artikelMeldung']
# 1 page
test_url: http://www.elektroniknet.de/elektronik-automotive/wirtschaft/aus-quattro-gmbh-wird-audi-sport-136464.html
test_contains: etwa 1.200 Mitarbeiter an den Standorten Neckarsulm und Ingolstadt


strip_id_or_class: anzeige
strip_id_or_class: top_page_navigation
strip_id_or_class: cr_image_container
strip_id_or_class: cr_image_reference
strip_id_or_class: cr_image_icon
strip_id_or_class: _close_txt
strip_id_or_class: _close_ico
strip_id_or_class: clearer

strip://h1
strip://h6
strip://div[contains(@id, 'plista')]
strip://img[contains(@id,'tiny')]
strip://img[@class='cr_image']

# strip url at the top
strip: //p[@style='font-size: 10px;']

test_url: http://www.elektroniknet.de/automotive/technik-know-how/sicherheitselektronik/article/87717/0/Besser_als_die_Wirklichkeit/
# 2 pages
test_url: http://www.elektroniknet.de/elektronik-automotive/sonstiges/machine-learning-wird-demnaechst-massiv-an-bedeutung-gewinnen-136362.html
test_contains: Ende 2014 übernahm er seine heutige Position als CEO von MBRDNA
6 changes: 6 additions & 0 deletions facebook.com.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ body: //div[@id='m_story_permalink_view' or contains(@data-sigil, 'm-story-view'
strip_id_or_class: commentable
strip: //div[contains(@data-sigil, 'm-mentions-expand')]

http_header(user-agent): PHP/5.3
strip_id_or_class: copyright
# this remove the cookie policy banner
strip_id_or_class: fbPageBannerInner

prune: no
tidy: no

Expand All @@ -12,3 +17,4 @@ tidy: no

test_url: https://www.facebook.com/permalink.php?story_fbid=10154584776550183&id=294468630182
test_contains: holding an extraordinary session in Brussels this month
test_url: https://www.facebook.com/notes/protect-the-graph/retiring-sha-1-certificates/1814716098768533
5 changes: 5 additions & 0 deletions foley.com.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
http_header(user-agent): Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.92 Safari/535.2

test_url: http://www.foley.com/rss/rss.aspx?id=2
test_url: https://www.foley.com/solicitor-general-sides-with-sandoz-on-interpretation-of-biosimilar-statute-12-13-2016/
test_contains: The Solicitor General of the United States
2 changes: 1 addition & 1 deletion heise.de.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

prune: no

title: //article/h1 | //h1
date: //p[@class='news_datum']
author: //span[@class='author']

Expand Down Expand Up @@ -59,3 +58,4 @@ test_url: http://www.heise.de/newsticker/meldung/Ueberwachungstechnik-Die-global
test_url: http://www.heise.de/newsticker/meldung/Bodenradar-fuer-selbstfahrende-Autos-horcht-unter-die-Strasse-3273941.html
test_url: http://www.heise.de/tp/artikel/49/49473/1.html
test_url: http://www.heise.de/ct/artikel/Die-Neuerungen-von-Linux-3-15-2196231.html
test_url: http://heise.de/-3527918
12 changes: 12 additions & 0 deletions ici.radio-canada.ca.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
body: //article
body: //p[@class='TexteChronique']
body: //div[@class='src-content']

strip: //header
strip: //figure
strip: //div[@class='framed']
strip: //form

test_url: http://ici.radio-canada.ca/nouvelle/1003322/lexique-mots-neige-hiver-guy-bertrand
test_url: http://ici.radio-canada.ca/tele/deuxieme-chance/inscription/
test_url: http://ici.radio-canada.ca/emissions/aujourd_hui_l_histoire/2016-2017/chronique.asp?idChronique=423294
5 changes: 5 additions & 0 deletions indiehackers.com.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
prune: no
body: //div[@itemprop="articleBody"]
strip: //aside

test_url: https://www.indiehackers.com/businesses/paleo-meal-plans
1 change: 0 additions & 1 deletion jdubuzz.com.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ next_page_link: //div[@class="post-content"]/div[@class='row pagination']/a[cont

strip_id_or_class: jdg-recommend
strip_id_or_class: proofreader-bloc
strip_id_or_class: single-test

body: //div[contains(concat(' ',normalize-space(@class),' '),' post-content ')]
test_url: http://www.jdubuzz.com/2015/09/11/le-meilleur-du-jduzap-cest-maintenant/
Expand Down
1 change: 0 additions & 1 deletion journaldugamer.com.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ next_page_link: //div[@class="post-content"]/div[@class='row pagination']/a[cont

strip_id_or_class: jdg-recommend
strip_id_or_class: proofreader-bloc
strip_id_or_class: single-test

body: //div[contains(concat(' ',normalize-space(@class),' '),' post-content ')]
test_url: http://www.journaldugamer.com/2015/09/14/financier-desormais-tete-nintendo/
7 changes: 7 additions & 0 deletions lapresse.ca.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
body: //div[@class='article-page']//p[@class='amorce'] | //div[@class='article-page']//div[contains(@class, 'entry')]
author: //div[@class='infosAuteur']

strip: //ul[@class='stories']

test_url:http://www.lapresse.ca/actualites/national/201611/30/01-5046565-coup-dur-pour-les-radars-photo-plusieurs-constats-pourraient-etre-annules.php
test_url: http://www.lapresse.ca/le-soleil/vivre-ici/la-science-au-quotidien/201610/01/01-5026482-les-vertus-de-leau-degout.php
1 change: 0 additions & 1 deletion news.pixelistes.com.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ next_page_link: //div[@class="post-content"]/div[@class='row pagination']/a[cont

strip_id_or_class: jdg-recommend
strip_id_or_class: proofreader-bloc
strip_id_or_class: single-test

body: //div[contains(concat(' ',normalize-space(@class),' '),' post-content ')]
test_url: http://news.pixelistes.com/pixelistes-partenaire-du-salon-de-la-photo-de-paris/
4 changes: 2 additions & 2 deletions reuters.com.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
title: //h1[@class='headline3']
author: substring-after(//p[@class="byline"], 'By ')
date: //meta[@name="REVISION_DATE"]/@content
body: //div[@id='articleImage' or @id='frame_fd1fade'] | //span[@id='articleText'] | //div[@class='pageNavigation']
body: //div[@id='articleImage' or @id='frame_fd1fade'] | //span[@id='article-text'] | //div[@class='pageNavigation']
strip: //li[@class='next']
strip: //span[@class='articleLocation']
prune: no
tidy: no

test_url: http://www.reuters.com/article/2011/04/08/us-ivorycoast-killings-idUSTRE73732A20110408
test_url: http://www.reuters.com/article/2011/04/08/us-ivorycoast-killings-idUSTRE73732A20110408
3 changes: 2 additions & 1 deletion sz.de.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ strip: //p[@class="anzeige"]
strip: //section[@class="authors"]
strip: //div[contains(@class, "embed")]

test_url: http://www.sueddeutsche.de/muenchen/mietshaus-am-gaertnerplatz-alles-muss-raus-1.1556693
test_url: http://sz.de/1.1556693
test_contains: ist selbst der alte Eigentümer erstaunt
5 changes: 3 additions & 2 deletions tagesschau.de.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@ strip_id_or_class: teaserImTeaser
strip_id_or_class: Comments
strip_id_or_class: mediaInfo
strip: //div[contains(@class, 'mediaCon')]//iframe
strip_id_or_class: metablockwrapper

prune: no

test_url: http://www.tagesschau.de/ausland/snowden-dateien-entschluesselung-101.html
test_contains: Snowden hatte zunächst für
test_url: http://www.tagesschau.de/ausland/aleppo-477.html
test_contains: bevor aus Aleppo ein einziger großer Friedhof wird

test_url: http://www.tagesschau.de/xml/rss2
1 change: 1 addition & 0 deletions tweakers.net.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ body: //div[@itemprop='articleBody']
single_page_link: //a[contains(., 'Singlepage-weergave')]

strip: //q[@class='streamer']
strip_id_or_class: notificationsContainer
prune: no

test_url: http://tweakers.net/feeds/mixed.xml
Expand Down
8 changes: 8 additions & 0 deletions vox.com.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Generated by FiveFilters.org's web-based selection tool
# Place this file inside your site_config/custom/ folder

title: //h1[contains(concat(' ',normalize-space(@class),' '),' c-page-title ')]
body: //div[contains(concat(' ',normalize-space(@class),' '),' c-entry-content ')]
date: //time[contains(concat(' ',normalize-space(@class),' '),' c-byline__item ')]
strip: //h3
test-url: http://www.vox.com/policy-and-politics/2016/11/28/13728086/trump-literally-and-seriously

0 comments on commit 5fd1cfc

Please sign in to comment.