diff --git a/.changeset/breezy-plants-act.md b/.changeset/breezy-plants-act.md new file mode 100644 index 000000000..d1c3673b0 --- /dev/null +++ b/.changeset/breezy-plants-act.md @@ -0,0 +1,5 @@ +--- +'myst-parser': patch +--- + +Remove .py from linkify domains diff --git a/package-lock.json b/package-lock.json index d121f350e..88f7f8f54 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11787,6 +11787,14 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/tlds": { + "version": "1.250.0", + "resolved": "https://registry.npmjs.org/tlds/-/tlds-1.250.0.tgz", + "integrity": "sha512-rWsBfFCWKrjM/o2Q1TTUeYQv6tHSd/umUutDjVs6taTuEgRDIreVYIBgWRWW4ot7jp6n0UVUuxhTLWBtUmPu/w==", + "bin": { + "tlds": "bin.js" + } + }, "node_modules/tmp": { "version": "0.0.33", "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.0.33.tgz", @@ -14308,6 +14316,7 @@ "myst-directives": "^1.0.22", "myst-roles": "^1.0.22", "myst-spec": "^0.0.5", + "tlds": "^1.250.0", "unified": "^10.1.1", "unist-builder": "^3.0.0", "unist-util-remove": "^3.1.0", diff --git a/packages/myst-parser/package.json b/packages/myst-parser/package.json index b3c0143b8..4074154f3 100644 --- a/packages/myst-parser/package.json +++ b/packages/myst-parser/package.json @@ -55,6 +55,7 @@ "myst-directives": "^1.0.22", "myst-roles": "^1.0.22", "myst-spec": "^0.0.5", + "tlds": "^1.250.0", "unified": "^10.1.1", "unist-builder": "^3.0.0", "unist-util-remove": "^3.1.0", diff --git a/packages/myst-parser/src/config.ts b/packages/myst-parser/src/config.ts index 72f1e9f7b..664031b45 100644 --- a/packages/myst-parser/src/config.ts +++ b/packages/myst-parser/src/config.ts @@ -67,3 +67,6 @@ export const MARKDOWN_IT_CONFIG = { }, }, }; + +// List of valid TLDs to exclude from linkify +export const EXCLUDE_TLDS = ['py', 'md']; diff --git a/packages/myst-parser/src/myst.ts b/packages/myst-parser/src/myst.ts index c8c05085b..1bf82ad55 100644 --- a/packages/myst-parser/src/myst.ts +++ b/packages/myst-parser/src/myst.ts @@ -3,7 +3,8 @@ import { defaultDirectives } from 'myst-directives'; import { defaultRoles } from 'myst-roles'; import type { Plugin } from 'unified'; import { VFile } from 'vfile'; -import { MARKDOWN_IT_CONFIG } from './config.js'; +import tlds from 'tlds'; +import { EXCLUDE_TLDS, MARKDOWN_IT_CONFIG } from './config.js'; import { tokensToMyst } from './tokensToMyst.js'; import { mathPlugin, @@ -70,6 +71,9 @@ export function createTokenizer(opts?: Options) { } as any, markdownit, ); + if (markdownit.linkify) { + tokenizer.linkify.tlds(tlds.filter((tld) => !EXCLUDE_TLDS.includes(tld))); + } if (extensions.smartquotes) tokenizer.enable('smartquotes'); if (extensions.tables) tokenizer.enable('table'); if (extensions.colonFences) tokenizer.use(colonFencePlugin); diff --git a/packages/myst-parser/tests/linkify.spec.ts b/packages/myst-parser/tests/linkify.spec.ts index 36b66c900..f6d460ffc 100644 --- a/packages/myst-parser/tests/linkify.spec.ts +++ b/packages/myst-parser/tests/linkify.spec.ts @@ -104,4 +104,18 @@ describe('linkify', () => { ], }); }); + it('dont linkify .py', () => { + const content = 'Link in paragraph: example.py'; + const expected = { + type: 'root', + children: [ + { + type: 'paragraph', + children: [{ type: 'text', value: 'Link in paragraph: example.py' }], + }, + ], + }; + expect(stripPositions(mystParse(content))).toEqual(expected); + expect(stripPositions(mystParse(content, { markdownit: { linkify: true } }))).toEqual(expected); + }); });