-
Notifications
You must be signed in to change notification settings - Fork 2
/
rss.ts
438 lines (347 loc) · 19.7 KB
/
rss.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
import * as Colors from "https://deno.land/[email protected]/fmt/colors.ts";
import {
Context,
Expression,
Invocation,
is_expression,
new_macro,
} from "./tsgen.ts";
import { out_file_absolute, write_file_absolute } from "./out.ts";
import { get_root_url } from "./root_url.ts";
import { get_root_directory } from "./linkname.ts";
import { createHash } from "npm:sha256-uint8array";
import { def_key } from "./defref.ts";
import { new_name } from "./names.ts";
/*
General principle of creating rss feeds: use the `rss_add_item` function to add an item to some feed identified by a title (this system supports multiple feeds generated in the same build process).
Later, use the `build_rss_feeds` macro (exactly once) to aggregate all items into one xml document per feed (title).
*/
const statekey = Symbol("Rss");
interface RssState {
feeds: Map<string, RssItem[]>;
}
function rss_state(ctx: Context): RssState {
const state = ctx.state.get(statekey);
if (state) {
return <RssState> state;
} else {
ctx.state.set(statekey, <RssState>{
feeds: new Map(),
});
return rss_state(ctx);
}
}
export interface RssFeedItemMeta {
name: string,
title?: string,
pubDate: Date,
format?: Intl.DateTimeFormat,
}
export function create_rss_item(feedTitle: string, item: RssFeedItemMeta, description: Expression): Expression {
const macro = new_macro(
undefined,
(expanded, ctx) => {
rss_add_item(feedTitle, {
title: item.title,
link: build_url(item.name, ctx),
description: `<![CDATA[${expanded}]]>`,
pubDate: item.pubDate,
guid: {
guid: build_url(item.name, ctx),
isPermaLink: true,
},
}, ctx);
const state = new_name(item.name, "def", ctx);
if (state === null) {
return "";
} else {
state.set(def_key, {
id: item.name,
singular: item.title ? item.title : item.name,
clazz: "rssItem",
});
}
const rendered = render_rss_feed_item(item, expanded);
write_file_absolute(
[...get_root_directory(ctx), "previews", `${item.name}.html`],
rendered,
ctx,
);
// Create etag.
const hash = createHash().update(rendered).digest("hex");
write_file_absolute(
[...get_root_directory(ctx), "previews", `${item.name}.etag`],
hash,
ctx,
);
return rendered;
}
);
return new Invocation(macro, [description]);
}
function build_url(id: string, ctx: Context): string {
return `${get_root_url(ctx)}more/changes/index.html#${id}`;
}
const default_format = new Intl.DateTimeFormat("en-GB");
function render_rss_feed_item(item: RssFeedItemMeta, description: string): string {
const title = item.title ? item.title : item.name;
const format = item.format ? item.format : default_format;
return `<div class="rssItem">
<a class="rss_item_title" id="${item.name}" href="#${item.name}">${title}</a> <time class="rss_item_time">${format.format(item.pubDate)}</time>
<div class="render_rss_feed_itemDescription">${description}</div>
</div>`;
}
function rss_add_item(feed_title: string, item: RssItem, ctx: Context) {
const state = rss_state(ctx);
const feed = state.feeds.get(feed_title);
if (feed) {
feed.push(item);
} else {
state.feeds.set(feed_title, [item]);
}
}
export interface RssFeedConfig {
// Absolute path at which to place the xml file (uses `write_file_absolute`).
path: string[];
// Sets the elements of the xml <channel> element. The title determines which of the items from prior `rss_add_item` calls get added to this feed.
feed_info: RssFeed;
}
export function build_rss_feeds(feeds: RssFeedConfig[]): Expression {
const macro = new_macro(
(_args, ctx) => {
const state = rss_state(ctx);
const rendered_titles = new Set();
const rendered_feeds: Expression[] = [];
for (const feed of feeds) {
rendered_titles.add(feed.feed_info.title);
let items = state.feeds.get(feed.feed_info.title);
if (!items) {
items = [];
}
rendered_feeds.push(
out_file_absolute(["build", ...feed.path], false, feed_to_xml(feed.feed_info, items)),
);
}
for (const title of state.feeds.keys()) {
if (!rendered_titles.has(title)) {
ctx.error(`Created an item for an rss feed ${Colors.green(title)}, but no such feed was rendered.`);
ctx.halt();
return "";
}
}
return rendered_feeds;
}
);
return new Invocation(macro, []);
}
// According to https://www.rssboard.org/rss-specification
export interface RssFeed {
/* mandatory */
// The name of the channel. It's how people refer to your service. If you have an HTML website that contains the same information as your RSS file, the title of your channel should be the same as the title of your website.
title: string;
// The URL to the HTML website corresponding to the channel.
link: string;
// Phrase or sentence describing the channel.
description: string;
/* optional */
// The language the channel is written in. This allows aggregators to group all Italian language sites, for example, on a single page. A list of allowable values for this element, as provided by Netscape, is [here](https://www.rssboard.org/rss-language-codes). You may also use values defined by the [W3C](https://www.w3.org/TR/REC-html40/struct/dirlang.html#langcodes).
language?: string;
// Copyright notice for content in the channel.
copyright?: string;
// Email address for person responsible for editorial content.
managingEditor?: string;
// Email address for person responsible for technical issues relating to channel.
webMaster?: string;
/* Created automatically by the macros, set to the pubDate of the topmost item. */
// The publication date for the content in the channel. For example, the New York Times publishes on a daily basis, the publication date flips once every 24 hours. That's when the pubDate of the channel changes. All date-times in RSS conform to the Date and Time Specification of [RFC 822](https://datatracker.ietf.org/doc/html/rfc822#section-5), with the exception that the year may be expressed with two characters or four characters (four preferred).
// pubDate?: Date;
/* Created automatically by the macros, set to the pubDate of the topmost item. */
/* Created automatically by the macros, set to the pubDate of the topmost item. */
// The last time the content of the channel changed.
// lastBuildDate?: Date;
/* Created automatically by the macros, set to the pubDate of the topmost item. */
// Specify one or more categories that the channel belongs to. Follows the same rules as the <item>-level [category](https://www.rssboard.org/rss-specification#ltcategorygtSubelementOfLtitemgt) element. More [info](https://www.rssboard.org/rss-specification#syndic8).
category?: RssCategory[];
// A string indicating the program used to generate the channel.
generator?: string;
// A URL that points to the [documentation](https://www.rssboard.org/rss-specification) for the format used in the RSS file. It's probably a pointer to this page. It's for people who might stumble across an RSS file on a Web server 25 years from now and wonder what it is.
docs?: string;
// Allows processes to register with a cloud to be notified of updates to the channel, implementing a lightweight publish-subscribe protocol for RSS feeds. More info [here](https://www.rssboard.org/rss-specification#ltcloudgtSubelementOfLtchannelgt).
cloud?: string;
// ttl stands for time to live. It's a number of minutes that indicates how long a channel can be cached before refreshing from the source. More info [here](https://www.rssboard.org/rss-specification#ltttlgtSubelementOfLtchannelgt).
ttl?: number;
// Specifies a GIF, JPEG or PNG image that can be displayed with the channel. More info [here](https://www.rssboard.org/rss-specification#ltimagegtSubelementOfLtchannelgt).
image?: RssImage;
// The [PICS](https://www.w3.org/PICS/) rating for the channel.
rating?: string;
// Specifies a text input box that can be displayed with the channel. More info [here](https://www.rssboard.org/rss-specification#lttextinputgtSubelementOfLtchannelgt).
textInput?: RssTextInput;
// A hint for aggregators telling them which hours they can skip. This element contains up to 24 <hour> sub-elements whose value is a number between 0 and 23, representing a time in GMT, when aggregators, if they support the feature, may not read the channel on hours listed in the <skipHours> element. The hour beginning at midnight is hour zero.
skipHours?: number[];
// A hint for aggregators telling them which days they can skip. This element contains up to seven <day> sub-elements whose value is Monday, Tuesday, Wednesday, Thursday, Friday, Saturday or Sunday. Aggregators may not read the channel during days listed in the <skipDays> element.
skipDays?: Day[];
// According to the RSS Advisory Board's Best Practices Profile, identifying a feed's URL within the feed makes it more portable, self-contained, and easier to cache. For these reasons, a feed should contain an atom:link used for this purpose.
atomSelf?: string;
}
interface RssCategory {
// A string that identifies a categorization taxonomy.
domain?: string;
// The value of the element is a forward-slash-separated string that identifies a hierarchic location in the indicated taxonomy. Processors may establish conventions for the interpretation of categories.
category: string;
}
interface RssImage {
// Is the URL of a GIF, JPEG or PNG image that represents the channel.
url: string;
// Describes the image, it's used in the ALT attribute of the HTML <img> tag when the channel is rendered in HTML.
title: string;
// Is the URL of the site, when the channel is rendered, the image is a link to the site. (Note, in practice the image <title> and <link> should have the same value as the channel's <title> and <link>.
link: string;
// Number, indicating the width of the image in pixels.
width?: number;
// Number, indicating the height of the image in pixels.
height?: number;
// Contains text that is included in the TITLE attribute of the link formed around the image in the HTML rendering.
description?: string;
}
// A channel may optionally contain a <textInput> sub-element, which contains four required sub-elements.
//
// The purpose of the <textInput> element is something of a mystery. You can use it to specify a search engine box. Or to allow a reader to provide feedback. Most aggregators ignore it.
interface RssTextInput {
// The label of the Submit button in the text input area.
title: string;
// Explains the text input area.
description: string;
// The name of the text object in the text input area.
name: string;
// The URL of the CGI script that processes text input requests.
link: string;
}
enum Day {
Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday,
}
// A channel may contain any number of <item>s. An item may represent a "story" -- much like a story in a newspaper or magazine; if so its description is a synopsis of the story, and the link points to the full story. An item may also be complete in itself, if so, the description contains the text (entity-encoded HTML is allowed; see [examples](https://www.rssboard.org/rss-encoding-examples)), and the link and title may be omitted. All elements of an item are optional, however at least one of title or description must be present.
interface RssItem {
// The title of the item.
title?: string;
// The URL of the item.
link?: string;
// The item synopsis.
description?: string;
// Email address of the author of the item.
//
// It's the email address of the author of the item. For newspapers and magazines syndicating via RSS, the author is the person who wrote the article that the <item> describes. For collaborative weblogs, the author of the item might be different from the managing editor or webmaster. For a weblog authored by a single individual it would make sense to omit the <author> element.
author?: string;
// Includes the item in one or more categories.
categories?: RssCategory[];
// URL of a page for comments relating to the item. More about comments [here](https://www.rssboard.org/rss-weblog-comments-use-case).
comments?: string;
// Describes a media object that is attached to the item.
enclosure?: RssEnclosure;
// A string that uniquely identifies the item.
guid?: RssGuid;
// Indicates when the item was published.
// Its value is a [date]((https://datatracker.ietf.org/doc/html/rfc822#section-5)), indicating when the item was published. If it's a date in the future, aggregators may choose to not display the item until that date.
pubDate?: Date;
// The RSS channel that the item came from.
source?: RssSource;
}
// Describes a media object that is attached to an item.
interface RssEnclosure {
// says where the enclosure is located, must be an http url
url: string;
// says how big it is in bytes
length: number;
// says what its type is, a standard MIME type
type: string;
}
// Guid stands for globally unique identifier. It's a string that uniquely identifies the item. When present, an aggregator may choose to use this string to determine if an item is new.
interface RssGuid {
// There are no rules for the syntax of a guid. Aggregators must view them as a string. It's up to the source of the feed to establish the uniqueness of the string.
guid: string;
// If the guid element has an attribute named isPermaLink with a value of true, the reader may assume that it is a permalink to the item, that is, a url that can be opened in a Web browser, that points to the full item described by the <item> element.
isPermaLink: boolean;
}
// The purpose of this element is to propagate credit for links, to publicize the sources of news items. It can be used in the Post command of an aggregator. It should be generated automatically when forwarding an item from an aggregator to a weblog authoring tool.
interface RssSource {
// The name of the RSS channel that the item came from, derived from its <title>.
source: string;
// Links to the XMLization of the source.
url: string;
}
// Yes, this code is extremely simplistic and could be made more compact.
function feed_to_xml(feed: RssFeed, items: RssItem[]): string {
return `<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>${feed.title}</title>
<link>${feed.link}</link>
<description>${feed.description}</description>${feed.atomSelf ? `\n<atom:link href="${feed.atomSelf}" rel="self" type="application/rss+xml" />` : ""}${feed.language ? `\n<language>${feed.language}</language>` : ""}${feed.copyright ? `\n<copyright>${feed.copyright}</copyright>` : ""}${feed.managingEditor ? `\n<managingEditor>${feed.managingEditor}</managingEditor>` : ""}${feed.webMaster ? `\n<webMaster>${feed.webMaster}</webMaster>` : ""}${items.length > 0 && items[0].pubDate ? `\n<pubDate>${format_date_rfc822(items[0].pubDate)}</pubDate>` : ""}${items.length > 0 && items[0].pubDate ? `\n<lastBuildDate>${format_date_rfc822(items[0].pubDate)}</lastBuildDate>` : ""}${feed.category ? `\n${feed.category.map(category_to_xml).join("\n")}` : ""}${feed.generator ? `\n<generator>${feed.generator}</generator>` : ""}${feed.docs ? `\n<docs>${feed.docs}</docs>` : ""}${feed.cloud ? `\n<cloud>${feed.cloud}</cloud>` : ""}${feed.ttl != undefined ? `\n<ttl>${feed.ttl}</ttl>` : ""}${feed.image ? `\n${image_to_xml(feed.image)}` : ""}${feed.rating ? `\n<rating>${feed.rating}</rating>` : ""}${feed.textInput ? `\n<textInput>${feed.textInput}</textInput>` : ""}${feed.skipHours ? `\n<skipHours>${feed.skipHours.map(hour => `<hour>${hour}</hour>`).join("\n")}</skipHours>` : ""}${feed.skipDays ? `\n<skipDays>${feed.skipDays.map(day_to_xml).join("\n")}</skipDay>` : ""}
${items.map(item => `${item_to_xml(item)}\n`).join("\n")}</channel>
</rss>`;
}
function category_to_xml(category: RssCategory): string {
return `<category${category.domain ? ` domain="${category.domain}"` : ""}>${category.category}</category>`;
}
function image_to_xml(image: RssImage): string {
return `<image>
<url>${image.url}</url>
<title>${image.title}</title>
<link>${image.link}</link>${image.width ? `\n<width>${image.width}</width>` : ""}${image.height ? `\n<height>${image.height}</height>` : ""}${image.description ? `\n<description>${image.description}</description>` : ""}
</image>`;
}
function day_to_xml(day: Day): string {
let day_string = "Monday";
if (day === Day.Tuesday) {
day_string = "Tuesday";
} else if (day === Day.Wednesday) {
day_string = "Wednesday";
} else if (day === Day.Thursday) {
day_string = "Thursday";
} else if (day === Day.Friday) {
day_string = "Friday";
} else if (day === Day.Saturday) {
day_string = "Saturday";
} else if (day === Day.Sunday) {
day_string = "Sunday";
}
return `<day>${day_string}</day>`;
}
function item_to_xml(item: RssItem): string {
return `<item>${item.title ? `\n<title>${item.title}</title>` : ""}${item.link ? `\n<link>${item.link}</link>` : ""}${item.description ? `\n<description>${item.description}</description>` : ""}${item.author ? `\n<author>${item.author}</author>` : ""}${item.categories ? `\n<categories>${item.categories.map(category_to_xml).join("\n")}</categories>` : ""}${item.comments ? `\n<comments>${item.comments}</comments>` : ""}${item.enclosure ? `\n${enclosure_to_xml(item.enclosure)}` : ""}${item.guid ? `\n${guid_to_xml(item.guid)}` : ""}${item.pubDate ? `\n<pubDate>${format_date_rfc822(item.pubDate)}</pubDate>` : ""}${item.source ? `\n${source_to_xml(item.source)}` : ""}
</item>`;
}
function enclosure_to_xml(enclosure: RssEnclosure): string {
return `<enclosure>
<url>${enclosure.url}</url>
<length>${enclosure.length}</length>
<type>${enclosure.type}</type>
</enclosure>`;
}
function guid_to_xml(category: RssGuid): string {
return `<guid ${category.isPermaLink ? `isPermaLink="true"` : `isPermaLink="false"`}>${category.guid}</guid>`;
}
function source_to_xml(source: RssSource): string {
return `<source url="${source.url}">${source.source}</source>`;
}
// rfc-822 date formatting adapted from https://whitep4nth3r.com/blog/how-to-format-dates-for-rss-feeds-rfc-822/
// zero-left-ad numbers to two digits
function pad_number(num_: number) {
let num = `${num_}`;
while (num.length < 2) num = `0${num}`;
return num;
}
function format_date_rfc822(date: Date) {
const dayStrings = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
const monthStrings = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"];
const day = dayStrings[date.getDay()];
const dayNumber = pad_number(date.getDate());
const month = monthStrings[date.getMonth()];
const year = date.getFullYear();
const time = `${pad_number(date.getHours())}:${pad_number(date.getMinutes())}:00`;
const zone = date.getTimezoneOffset();
const zone_positive = zone >= 0;
const zone_hours = Math.floor(Math.abs(zone) / 60);
const zone_minutes = Math.abs(zone) % 60;
const timezone = `${zone_positive ? "+" : "-"}${pad_number(zone_hours)}${pad_number(zone_minutes)}`;
return `${day}, ${dayNumber} ${month} ${year} ${time} ${timezone}`;
}