Skip to content

Commit

Permalink
feat: Add getFeed method (#743)
Browse files Browse the repository at this point in the history
* feat: Add `getFeed` method

Extracted from `htmlparser2`. The `FeedHandler` class was equivalent to what this is doing.

* use `as const`
  • Loading branch information
fb55 authored Aug 27, 2021
1 parent d3db022 commit a910757
Show file tree
Hide file tree
Showing 7 changed files with 533 additions and 0 deletions.
27 changes: 27 additions & 0 deletions src/__fixtures__/Documents/Atom_Example.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- http://en.wikipedia.org/wiki/Atom_%28standard%29 -->
<feed xmlns="http://www.w3.org/2005/Atom">
<title>Example Feed</title>
<subtitle>A subtitle.</subtitle>
<link href="http://example.org/feed/" rel="self" />
<link href="http://example.org/" />
<id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id>
<updated>2003-12-13T18:30:02Z</updated>
<author>
<name>John Doe</name>
<email>[email protected]</email>
</author>

<entry>
<title>Atom-Powered Robots Run Amok</title>
<link href="http://example.org/2003/12/13/atom03" />
<link rel="alternate" type="text/html" href="http://example.org/2003/12/13/atom03.html"/>
<link rel="edit" href="http://example.org/2003/12/13/atom03/edit"/>
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
<updated>2003-12-13T18:30:02Z</updated>
<content type="html"><p>Some content.</p></content>
</entry>

<entry/>

</feed>
63 changes: 63 additions & 0 deletions src/__fixtures__/Documents/RDF_Example.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:ev="http://purl.org/rss/1.0/modules/event/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:admin="http://webns.net/mvcb/">
<channel rdf:about="https://github.com/fb55/htmlparser2/">
<title>A title to parse and remember</title>
<link>https://github.com/fb55/htmlparser2/</link>
<description/>
<dc:language>en-us</dc:language>
<dc:rights>Copyright 2015 the authors</dc:rights>
<dc:publisher>[email protected]</dc:publisher>
<dc:creator>[email protected]</dc:creator>
<dc:source>https://github.com/fb55/htmlparser2/</dc:source>
<dc:title>A title to parse and remember</dc:title>
<dc:type>Collection</dc:type>
<syn:updateBase>2011-11-04T09:39:10-07:00</syn:updateBase>
<syn:updateFrequency>4</syn:updateFrequency>
<syn:updatePeriod>hourly</syn:updatePeriod>
<items>
<rdf:Seq>
<rdf:li rdf:resource="http://somefakesite/path/to/something.html"/>
</rdf:Seq>
</items>
</channel>
<item rdf:about="http://somefakesite/path/to/something.html">
<title><![CDATA[ Fast HTML Parsing ]]></title>
<link>
http://somefakesite/path/to/something.html
</link>
<description><![CDATA[
Great test content<br>A link: <a href="http://github.com">Github</a>
]]></description>
<dc:date>2011-11-04T09:35:17-07:00</dc:date>
<dc:language>en-us</dc:language>
<dc:rights>Copyright 2015 the authors</dc:rights>
<dc:source>
http://somefakesite/path/to/something.html
</dc:source>
<dc:title><![CDATA[ Fast HTML Parsing ]]></dc:title>
<dc:type>text</dc:type>
<dcterms:issued>2011-11-04T09:35:17-07:00</dcterms:issued>
</item>
<item rdf:about="http://somefakesite/path/to/something-else.html">
<title><![CDATA[
This space intentionally left blank
]]></title>
<link>
http://somefakesite/path/to/something-else.html
</link>
<description><![CDATA[
The early bird gets the worm
]]></description>
<dc:date>2011-11-04T09:34:54-07:00</dc:date>
<dc:language>en-us</dc:language>
<dc:rights>Copyright 2015 the authors</dc:rights>
<dc:source>
http://somefakesite/path/to/something-else.html
</dc:source>
<dc:title><![CDATA[
This space intentionally left blank
]]></dc:title>
<dc:type>text</dc:type>
<dcterms:issued>2011-11-04T09:34:54-07:00</dcterms:issued>
</item>
</rdf:RDF>
49 changes: 49 additions & 0 deletions src/__fixtures__/Documents/RSS_Example.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
<?xml version="1.0"?>
<!-- http://cyber.law.harvard.edu/rss/examples/rss2sample.xml -->
<rss version="2.0">
<channel>
<title>Liftoff News</title>
<link>http://liftoff.msfc.nasa.gov/</link>
<description>Liftoff to Space Exploration.</description>
<language>en-us</language>
<pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>

<lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>
<docs>http://blogs.law.harvard.edu/tech/rss</docs>
<generator>Weblog Editor 2.0</generator>
<managingEditor>[email protected]</managingEditor>
<webMaster>[email protected]</webMaster>
<item>

<title>Star City</title>
<link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
<description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description>
<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>

</item>
<item>
<description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st.</description>
<pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>

</item>
<item>
<title>The Engine That Does More</title>
<link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>
<description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.</description>
<pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>

</item>
<item>
<title>Astronauts' Dirty Laundry</title>
<link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link>
<description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.</description>
<pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid>

<media:content height="200" medium="image" url="https://picsum.photos/200" width="200"/>
</item>
</channel>
</rss>
101 changes: 101 additions & 0 deletions src/__snapshots__/feeds.spec.ts.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP

exports[`getFeed Atom_Example.xml 1`] = `
Object {
"author": "[email protected]",
"description": "A subtitle.",
"id": "urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6",
"items": Array [
Object {
"description": "Some content.",
"id": "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a",
"link": "http://example.org/2003/12/13/atom03",
"media": Array [],
"pubDate": 2003-12-13T18:30:02.000Z,
"title": "Atom-Powered Robots Run Amok",
},
Object {
"media": Array [],
},
],
"link": "http://example.org/feed/",
"title": "Example Feed",
"type": "atom",
"updated": 2003-12-13T18:30:02.000Z,
}
`;

exports[`getFeed RDF_Example.xml 1`] = `
Object {
"id": "",
"items": Array [
Object {
"description": "Great test content<br>A link: <a href=\\"http://github.com\\">Github</a>",
"link": "http://somefakesite/path/to/something.html",
"media": Array [],
"title": "Fast HTML Parsing",
},
Object {
"description": "The early bird gets the worm",
"link": "http://somefakesite/path/to/something-else.html",
"media": Array [],
"title": "This space intentionally left blank",
},
],
"link": "https://github.com/fb55/htmlparser2/",
"title": "A title to parse and remember",
"type": "rdf",
}
`;

exports[`getFeed RSS_Example.xml 1`] = `
Object {
"author": "[email protected]",
"description": "Liftoff to Space Exploration.",
"id": "",
"items": Array [
Object {
"description": "How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href=\\"http://howe.iki.rssi.ru/GCTC/gctc_e.htm\\">Star City</a>.",
"id": "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573",
"link": "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp",
"media": Array [],
"pubDate": 2003-06-03T09:39:21.000Z,
"title": "Star City",
},
Object {
"description": "Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href=\\"http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm\\">partial eclipse of the Sun</a> on Saturday, May 31st.",
"id": "http://liftoff.msfc.nasa.gov/2003/05/30.html#item572",
"media": Array [],
"pubDate": 2003-05-30T11:06:42.000Z,
},
Object {
"description": "Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.",
"id": "http://liftoff.msfc.nasa.gov/2003/05/27.html#item571",
"link": "http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp",
"media": Array [],
"pubDate": 2003-05-27T08:37:32.000Z,
"title": "The Engine That Does More",
},
Object {
"description": "Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.",
"id": "http://liftoff.msfc.nasa.gov/2003/05/20.html#item570",
"link": "http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp",
"media": Array [
Object {
"height": 200,
"isDefault": false,
"medium": "image",
"url": "https://picsum.photos/200",
"width": 200,
},
],
"pubDate": 2003-05-20T08:56:02.000Z,
"title": "Astronauts' Dirty Laundry",
},
],
"link": "http://liftoff.msfc.nasa.gov/",
"title": "Liftoff News",
"type": "rss",
"updated": 2003-06-10T09:41:01.000Z,
}
`;
23 changes: 23 additions & 0 deletions src/feeds.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Runs tests for feeds

import { getFeed } from "./feeds";
import fs from "fs";
import path from "path";
import { parseDocument } from "htmlparser2";

const documents = path.join(__dirname, "__fixtures__", "Documents");

describe("getFeed", () => {
for (const name of fs.readdirSync(documents)) {
test(name, async () => {
const file = await fs.promises.readFile(
path.join(documents, name),
"utf8"
);
const document = parseDocument(file, { xmlMode: true });
const feed = getFeed(document.children);

expect(feed).toMatchSnapshot();
});
}
});
Loading

0 comments on commit a910757

Please sign in to comment.