forked from the-draupnir-project/Draupnir
-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.ts
608 lines (561 loc) · 25.1 KB
/
utils.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
/**
* Copyright (C) 2022 Gnuxie <[email protected]>
* All rights reserved.
*
* This file is modified and is NOT licensed under the Apache License.
* This modified file incorperates work from mjolnir
* https://github.com/matrix-org/mjolnir
* which included the following license notice:
Copyright 2019-2021 The Matrix.org Foundation C.I.C.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*
* However, this file is modified and the modifications in this file
* are NOT distributed, contributed, committed, or licensed under the Apache License.
*/
import opentelemetry from "@opentelemetry/api";
import {
LogLevel,
LogService,
MatrixGlob,
getRequestFn,
setRequestFn,
MatrixError,
} from "matrix-bot-sdk";
import { ClientRequest, IncomingMessage } from "http";
import { default as parseDuration } from "parse-duration";
import * as Sentry from '@sentry/node';
import * as _ from '@sentry/tracing'; // Performing the import activates tracing.
import ManagementRoomOutput from "./ManagementRoomOutput";
import { IConfig } from "./config";
import { MatrixSendClient } from "./MatrixEmitter";
// Define a few aliases to simplify parsing durations.
parseDuration["days"] = parseDuration["day"];
parseDuration["weeks"] = parseDuration["week"] = parseDuration["wk"];
parseDuration["months"] = parseDuration["month"];
parseDuration["years"] = parseDuration["year"];
// ... and reexport it
export { parseDuration };
export function htmlEscape(input: string): string {
return input.replace(/["&<>]/g, (char: string) => ({
['"'.charCodeAt(0)]: """,
["&".charCodeAt(0)]: "&",
["<".charCodeAt(0)]: "<",
[">".charCodeAt(0)]: ">"
})[char.charCodeAt(0)]);
}
export function setToArray<T>(set: Set<T>): T[] {
const arr: T[] = [];
for (const v of set) {
arr.push(v);
}
return arr;
}
export function isTrueJoinEvent(event: any): boolean {
const membership = event['content']['membership'] || 'join';
let prevMembership = "leave";
if (event['unsigned'] && event['unsigned']['prev_content']) {
prevMembership = event['unsigned']['prev_content']['membership'] || 'leave';
}
// We look at the previous membership to filter out profile changes
return membership === 'join' && prevMembership !== "join";
}
/**
* Redact a user's messages in a set of rooms.
* See `getMessagesByUserIn`.
*
* @param client Client to redact the messages with.
* @param managementRoom Management room to log messages back to.
* @param userIdOrGlob A mxid or a glob which is applied to the whole sender field of events in the room, which will be redacted if they match.
* See `MatrixGlob` in matrix-bot-sdk.
* @param targetRoomIds Rooms to redact the messages from.
* @param limit The number of messages to redact from most recent first. If the limit is reached then no further messages will be redacted.
* @param noop Whether to operate in noop mode.
*/
export async function redactUserMessagesIn(client: MatrixSendClient, managementRoom: ManagementRoomOutput, userIdOrGlob: string, targetRoomIds: string[], limit = 1000, noop = false) {
for (const targetRoomId of targetRoomIds) {
await managementRoom.logMessage(LogLevel.DEBUG, "utils#redactUserMessagesIn", `Fetching sent messages for ${userIdOrGlob} in ${targetRoomId} to redact...`, targetRoomId);
try {
await getMessagesByUserIn(client, userIdOrGlob, targetRoomId, limit, async (eventsToRedact) => {
for (const victimEvent of eventsToRedact) {
await managementRoom.logMessage(LogLevel.DEBUG, "utils#redactUserMessagesIn", `Redacting ${victimEvent['event_id']} in ${targetRoomId}`, targetRoomId);
if (!noop) {
await client.redactEvent(targetRoomId, victimEvent['event_id']);
} else {
await managementRoom.logMessage(LogLevel.WARN, "utils#redactUserMessagesIn", `Tried to redact ${victimEvent['event_id']} in ${targetRoomId} but Mjolnir is running in no-op mode`, targetRoomId);
}
}
});
} catch (error) {
await managementRoom.logMessage(LogLevel.ERROR, "utils#redactUserMessagesIn", `Error while trying to redact messages for ${userIdOrGlob} in ${targetRoomId}: ${error}`, targetRoomId);
}
}
}
/**
* Gets all the events sent by a user (or users if using wildcards) in a given room ID, since
* the time they joined.
* @param {MatrixSendClient} client The client to use.
* @param {string} sender The sender. A matrix user id or a wildcard to match multiple senders e.g. *.example.com.
* Can also be used to generically search the sender field e.g. *bob* for all events from senders with "bob" in them.
* See `MatrixGlob` in matrix-bot-sdk.
* @param {string} roomId The room ID to search in.
* @param {number} limit The maximum number of messages to search. Defaults to 1000. This will be a greater or equal
* number of events that are provided to the callback if a wildcard is used, as not all events paginated
* will match the glob. The reason the limit is calculated this way is so that a caller cannot accidentally
* traverse the entire room history.
* @param {function} cb Callback function to handle the events as they are received.
* The callback will only be called if there are any relevant events.
* @returns {Promise<void>} Resolves when either: the limit has been reached, no relevant events could be found or there is no more timeline to paginate.
*/
export async function getMessagesByUserIn(client: MatrixSendClient, sender: string, roomId: string, limit: number, cb: (events: any[]) => void): Promise<void> {
const isGlob = sender.includes("*");
const roomEventFilter = {
rooms: [roomId],
...isGlob ? {} : { senders: [sender] }
};
const matcher = new MatrixGlob(sender);
function testUser(userId: string): boolean {
if (isGlob) {
return matcher.test(userId);
} else {
return userId === sender;
}
}
/**
* The response returned from `backfill`
* See https://spec.matrix.org/latest/client-server-api/#get_matrixclientv3roomsroomidmessages
* for what the fields mean in detail. You have to read the spec even with the summary.
* The `chunk` contains the events in reverse-chronological order.
* The `end` is a token for the end of the `chunk` (where the older events are).
* The `start` is a token for the beginning of the `chunk` (where the most recent events are).
*/
interface BackfillResponse {
chunk?: any[],
end?: string,
start: string
}
/**
* Call `/messages` "backwards".
* @param from a token that was returned previously from this API to start paginating from or
* if `null`, start from the most recent point in the timeline.
* @returns The response part of the `/messages` API, see `BackfillResponse`.
*/
async function backfill(from: string | null): Promise<BackfillResponse> {
const qs = {
filter: JSON.stringify(roomEventFilter),
dir: "b",
...from ? { from } : {}
};
LogService.info("utils", "Backfilling with token: " + from);
return client.doRequest("GET", `/_matrix/client/v3/rooms/${encodeURIComponent(roomId)}/messages`, qs);
}
let processed = 0;
/**
* Filter events from the timeline to events that are from a matching sender and under the limit that can be processed by the callback.
* @param events Events from the room timeline.
* @returns Events that can safely be processed by the callback.
*/
function filterEvents(events: any[]) {
const messages: any[] = [];
for (const event of events) {
if (processed >= limit) return messages; // we have provided enough events.
processed++;
if (testUser(event['sender'])) messages.push(event);
}
return messages;
}
// We check that we have the token because rooms/messages is not required to provide one
// and will not provide one when there is no more history to paginate.
let token: string | null = null;
do {
const bfMessages: BackfillResponse = await backfill(token);
const previousToken: string | null = token;
token = bfMessages['end'] ?? null;
const events = filterEvents(bfMessages['chunk'] || []);
// If we are using a glob, there may be no relevant events in this chunk.
if (events.length > 0) {
await cb(events);
}
// This check exists only because of a Synapse compliance bug https://github.com/matrix-org/synapse/issues/12102.
// We also check after processing events as the `previousToken` can be 'null' if we are at the start of the steam
// and `token` can also be 'null' as we have paginated the entire timeline, but there would be unprocessed events in the
// chunk that was returned in this request.
if (previousToken === token) {
LogService.debug("utils", "Backfill returned same end token - returning early.");
return;
}
} while (token && processed < limit)
}
let isMatrixClientPatchedForConciseExceptions = false;
// The fact that MatrixHttpClient logs every http error as error
// is unacceptable really.
// We will provide our own utility for logging outgoing requests as debug.
LogService.muteModule("MatrixHttpClient");
function isMatrixError(path: string): boolean {
return /^\/_matrix/.test(path)
}
/**
* Patch `MatrixClient` into something that throws concise exceptions.
*
* By default, instances of `MatrixClient` throw instances of `IncomingMessage`
* in case of many errors. Unfortunately, these instances are unusable:
*
* - they are logged as ~800 *lines of code*;
* - there is no error message;
* - they offer no stack.
*
* This method configures `MatrixClient` to ensure that methods that may throw
* instead throws more reasonable insetances of `Error`.
*/
function patchMatrixClientForConciseExceptions() {
if (isMatrixClientPatchedForConciseExceptions) {
return;
}
let originalRequestFn = getRequestFn();
setRequestFn((params: { [k: string]: any }, cb: any) => {
// Store an error early, to maintain *some* semblance of stack.
// We'll only throw the error if there is one.
let error = new Error("STACK CAPTURE");
originalRequestFn(params, function conciseExceptionRequestFn(
err: { [key: string]: unknown }, response: { [key: string]: any }, resBody: unknown
) {
if (!err && (response?.statusCode < 200 || response?.statusCode >= 300)) {
// Normally, converting HTTP Errors into rejections is done by the caller
// of `requestFn` within matrix-bot-sdk. However, this always ends up rejecting
// with an `IncomingMessage` - exactly what we wish to avoid here.
err = response;
// Safety note: In the calling code within matrix-bot-sdk, if we return
// an IncomingMessage as an error, we end up logging an unredacted response,
// which may include tokens, passwords, etc. This could be a grave privacy
// leak. The matrix-bot-sdk typically handles this by sanitizing the data
// before logging it but, by converting the HTTP Error into a rejection
// earlier than expected by the matrix-bot-sdk, we skip this step of
// sanitization.
//
// However, since the error we're creating is an `IncomingMessage`, we
// rewrite it into an `Error` ourselves in this function. Our `Error`
// is even more sanitized (we only include the URL, HTTP method and
// the error response) so we are NOT causing a privacy leak.
if (!(err instanceof IncomingMessage)) {
// Safety check.
throw new TypeError("Internal error: at this stage, the error should be an IncomingMessage");
}
}
if (!(err instanceof IncomingMessage)) {
// In most cases, we're happy with the result.
return cb(err, response, resBody);
}
// However, MatrixClient has a tendency of throwing
// instances of `IncomingMessage` instead of instances
// of `Error`. The former take ~800 lines of log and
// provide no stack trace, which makes them typically
// useless.
const method: string | undefined = err.method
? err.method
: "req" in err && err.req instanceof ClientRequest
? err.req.method
: params.method;
const path: string = err.url
? err.url
: "req" in err && err.req instanceof ClientRequest
? err.req.path
: params.uri ?? '';
let body: unknown = null;
if ("body" in err) {
body = err.body;
}
// Calling code may use `body` to check for errors, so let's
// make sure that we're providing it.
if (typeof body === 'string') {
try {
body = JSON.parse(body, jsonReviver);
} catch (ex) {
// Not JSON.
}
}
let message = `Error during MatrixClient request ${method} ${path}: ${err.statusCode} ${err.statusMessage} -- ${JSON.stringify(body)}`;
error.message = message;
if (body) {
// Define the property but don't make it visible during logging.
Object.defineProperty(error, "body", {
value: body,
enumerable: false,
});
}
// Calling code may use `statusCode` to check for errors, so let's
// make sure that we're providing it.
if ("statusCode" in err) {
// Define the property but don't make it visible during logging.
Object.defineProperty(error, "statusCode", {
value: err.statusCode,
enumerable: false,
});
}
// matrix-appservice-bridge depends on errors being matrix-bot-sdk's MatrixError.
// Since https://github.com/turt2live/matrix-bot-sdk/blob/836c2da7145668b20af7e0d75094b6162164f3dc/src/http.ts#L109
// we wrote this, matrix-bot-sdk has updated so that there is now a MatrixError that is thrown
// when there are errors in the response.
if (isMatrixError(path)) {
const matrixError = new MatrixError(body as any, err.statusCode as any);
matrixError.stack = error.stack;
return cb(matrixError, response, resBody)
} else {
return cb(error, response, resBody);
}
})
});
isMatrixClientPatchedForConciseExceptions = true;
}
const MAX_REQUEST_ATTEMPTS = 15;
const REQUEST_RETRY_BASE_DURATION_MS = 100;
const TRACE_CONCURRENT_REQUESTS = false;
let numberOfConcurrentRequests = 0;
let isMatrixClientPatchedForRetryWhenThrottled = false;
/**
* Patch instances of MatrixClient to make sure that it retries requests
* in case of throttling.
*
* Note: As of this writing, we do not re-attempt requests that timeout,
* only request that are throttled by the server. The rationale is that,
* in case of DoS, we do not wish to make the situation even worse.
*/
function patchMatrixClientForRetry() {
if (isMatrixClientPatchedForRetryWhenThrottled) {
return;
}
let originalRequestFn = getRequestFn();
setRequestFn(async (params: { [k: string]: any }, cb: any) => {
let attempt = 1;
numberOfConcurrentRequests += 1;
if (TRACE_CONCURRENT_REQUESTS) {
console.trace("Current number of concurrent requests", numberOfConcurrentRequests);
}
try {
while (true) {
try {
let result: any[] = await new Promise((resolve, reject) => {
originalRequestFn(params, function requestFnWithRetry(
err: { [key: string]: any }, response: { [key: string]: unknown }, resBody: unknown
) {
// Note: There is no data race on `attempt` as we `await` before continuing
// to the next iteration of the loop.
if (attempt < MAX_REQUEST_ATTEMPTS && err?.body?.errcode === 'M_LIMIT_EXCEEDED') {
// We need to retry.
reject(err);
} else {
if (attempt >= MAX_REQUEST_ATTEMPTS) {
LogService.warn('Mjolnir.client', `Retried request ${params.method} ${params.uri} ${attempt} times, giving up.`);
}
// No need-to-retry error? Lucky us!
// Note that this may very well be an error, just not
// one we need to retry.
resolve([err, response, resBody]);
}
});
});
// This is our final result.
// Pass result, whether success or error.
return cb(...result);
} catch (err) {
// Need to retry.
let retryAfterMs = attempt * attempt * REQUEST_RETRY_BASE_DURATION_MS;
if ("retry_after_ms" in err) {
try {
retryAfterMs = Number.parseInt(err.retry_after_ms, 10);
} catch (ex) {
// Use default value.
}
}
LogService.debug("Mjolnir.client", `Waiting ${retryAfterMs}ms before retrying ${params.method} ${params.uri}`);
await new Promise(resolve => setTimeout(resolve, retryAfterMs));
attempt += 1;
}
}
} finally {
numberOfConcurrentRequests -= 1;
}
});
isMatrixClientPatchedForRetryWhenThrottled = true;
}
let isMatrixClientPatchedForPrototypePollution = false;
function jsonReviver(key: string, value: any): any {
if (key === '__proto__' || key === 'constructor') {
return undefined;
} else {
return value;
}
}
/**
* https://github.com/turt2live/matrix-bot-sdk/blob/c7d16776502c26bbb547a3d667ec92eb50e7026c/src/http.ts#L77-L101 💀 fucking hell!!!!
*
* The following is an inefficient workaround, but you gotta do what you can.
*/
function patchMatrixClientForPrototypePollution() {
if (isMatrixClientPatchedForPrototypePollution) {
return;
}
const originalRequestFn = getRequestFn();
setRequestFn((params: { [k: string]: any }, cb: any) => {
originalRequestFn(params, function conciseExceptionRequestFn(
error: { [key: string]: any }, response: { [key: string]: any }, resBody: unknown
) {
// https://github.com/turt2live/matrix-bot-sdk/blob/c7d16776502c26bbb547a3d667ec92eb50e7026c/src/http.ts#L77-L101
// bring forwards this step and do it safely.
if (typeof resBody === 'string') {
try {
resBody = JSON.parse(resBody, jsonReviver);
} catch (e) {
// we don't care if we fail to parse the JSON as it probably isn't JSON.
}
}
if (typeof response?.body === 'string') {
try {
response.body = JSON.parse(response.body, jsonReviver);
} catch (e) {
// we don't care if we fail to parse the JSON as it probably isn't JSON.
}
}
return cb(error, response, resBody);
})
});
isMatrixClientPatchedForPrototypePollution = true;
}
/**
* Perform any patching deemed necessary to MatrixClient.
*/
export function patchMatrixClient() {
// Note that the order of patches is meaningful.
//
// - `patchMatrixClientForPrototypePollution` converts all JSON bodies to safe JSON before client code can
// parse and use the JSON inappropriately.
// - `patchMatrixClientForConciseExceptions` converts all `IncomingMessage`
// errors into instances of `Error` handled as errors;
// - `patchMatrixClientForRetry` expects that all errors are returned as
// errors.
patchMatrixClientForPrototypePollution();
patchMatrixClientForConciseExceptions();
patchMatrixClientForRetry();
}
patchMatrixClient();
/**
* Initialize Sentry for error monitoring and reporting.
*
* This method is idempotent. If `config` specifies that Sentry
* should not be used, it does nothing.
*/
export function initializeSentry(config: IConfig) {
if (sentryInitialized) {
return;
}
if (config.health.sentry) {
// Configure error monitoring with Sentry.
let sentry = config.health.sentry;
Sentry.init({
dsn: sentry.dsn,
tracesSampleRate: sentry.tracesSampleRate,
});
sentryInitialized = true;
}
}
// Set to `true` once we have initialized `Sentry` to ensure
// that we do not attempt to initialize it more than once.
let sentryInitialized = false;
/**
* Adds a nested span around a function
*
* @param target The function thats annotated
* @param context The content of the function
* @returns The result of the function
*/
export function trace(spanName: string) {
return (_target: any, memberName: string, propertyDescriptor: PropertyDescriptor) => {
return {
get() {
const wrapperFn = (...args: any[]) => {
const tracer = opentelemetry.trace.getTracer(
'draupnir-appservice-tracer'
);
return tracer.startActiveSpan(spanName, async (parentSpan) => {
const result = propertyDescriptor.value.apply(this, args);
parentSpan.end();
return result;
});
}
Object.defineProperty(this, memberName, {
value: wrapperFn,
configurable: true,
writable: true
});
return wrapperFn;
}
}
}
}
/**
* Adds a nested span around a sync function
*
* @param target The function thats annotated
* @param context The content of the function
* @returns The result of the function
*/
export function traceSync(spanName: string) {
return (_target: any, memberName: string, propertyDescriptor: PropertyDescriptor) => {
return {
get() {
const wrapperFn = (...args: any[]) => {
const tracer = opentelemetry.trace.getTracer(
'draupnir-appservice-tracer'
);
return tracer.startActiveSpan(spanName, (parentSpan) => {
const result = propertyDescriptor.value.apply(this, args);
parentSpan.end();
return result;
});
}
Object.defineProperty(this, memberName, {
value: wrapperFn,
configurable: true,
writable: true
});
return wrapperFn;
}
}
}
}
/**
* Adds a independent span around a function
*
* @param target The function thats annotated
* @param context The content of the function
* @returns The result of the function
*/
export function independentTrace(spanName: string) {
return (_target: any, memberName: string, propertyDescriptor: PropertyDescriptor) => {
return {
get() {
const wrapperFn = (...args: any[]) => {
const tracer = opentelemetry.trace.getTracer(
'draupnir-appservice-tracer'
);
const span = tracer.startSpan(spanName);
const result = propertyDescriptor.value.apply(this, args);
span.end();
return result;
}
Object.defineProperty(this, memberName, {
value: wrapperFn,
configurable: true,
writable: true
});
return wrapperFn;
}
}
}
}