Skip to content

Commit

Permalink
chore: Make improvements to bring the package to a spotless state.
Browse files Browse the repository at this point in the history
  • Loading branch information
vxern committed Mar 2, 2024
1 parent 646754d commit b3fe6a0
Show file tree
Hide file tree
Showing 13 changed files with 79 additions and 34 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## 0.2.1

- Add documentation for the remaining, undocumented API members.
- Rename `MatchingModes` to `MatchingMode` as a union type.

## 0.2.0

- Document all API members, export them as well.
Expand Down
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ npm run setup

Afterwards, just to ensure the setup ran smoothly and the repository is ready to go, run the test suite:
```
npm run test
npm test
```

> Optional: If you are contributing to the original project, feel free to skip this step.
Expand Down
2 changes: 1 addition & 1 deletion jsr.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"name": "@vxern/dexonline-scraper",
"version": "0.2.0",
"version": "0.2.1",
"exports": "./src/index.ts"
}
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "dexonline-scraper",
"description": "A tiny, battle-tested, performant and documented scraper for dexonline.ro.",
"license": "MIT",
"version": "0.2.0",
"version": "0.2.1",
"type": "module",
"main": "./dist/index.js",
"types": "./dist/index.d.ts",
Expand Down Expand Up @@ -39,7 +39,7 @@
"format": "biome check src/ --apply-unsafe --organize-imports-enabled true",
"build": "tsc",
"test": "mocha --no-warnings",
"publish": "npm run build && npm publish"
"prepublishOnly": "npm test && npm run build && jsr publish"
},
"dependencies": {
"cheerio": "^1.0.0-rc.12"
Expand Down
21 changes: 12 additions & 9 deletions src/constants/copyright.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,4 @@
/**
* @remarks
* This is a list of dictionary identifiers that are under copyright, and cannot be queried without explicit permission.
*
* `dexonline-scraper` filters entries out from them by default, however this can be overriden in the case of
* having obtained explicit permission for a given dictionary.
*/
export default Object.freeze([
const _copyright = [
"Petro-Sedim",
"Legislație",
"DLR",
Expand Down Expand Up @@ -65,4 +58,14 @@ export default Object.freeze([
"DAN",
"Șăineanu, ed. I",
"DASLR",
] satisfies string[]);
];

/**
* This is a list of dictionary identifiers that are under copyright, and cannot be queried without explicit permission.
*
* `dexonline-scraper` filters entries out from them by default, however this can be overriden in the case of
* having obtained explicit permission for a given dictionary.
*/
const copyright: readonly string[] = Object.freeze(_copyright);

export default copyright;
12 changes: 10 additions & 2 deletions src/constants/expressions.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
export default Object.freeze({
const _expressions = {
treeType: /^type-(\w+)$/,
relationType: /^me-(\d+)$/,
tableLemmaWithIndex: /((?:[a-zA-ZăĂâÂîÎșȘțȚ-]+))(<sup>(\d+)<\/sup>)?/,
} as const satisfies Record<string, RegExp>);
} as const;

/**
* This is a collection of regular expressions used internally by `dexonline-scraper` for resolving
* dictionary entries.
*/
const expressions: typeof _expressions = Object.freeze(_expressions);

export default expressions;
9 changes: 7 additions & 2 deletions src/constants/links.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
export default Object.freeze({
const _links = {
definition: (word: string): string => `https://dexonline.ro/definitie/${word}`,
} as const satisfies Record<string, unknown>);
} as const;

/** This is a collection of links used internally by `dexonline-scraper` for resolving dictionary entries. */
const links: typeof _links = Object.freeze(_links);

export default links;
12 changes: 10 additions & 2 deletions src/constants/selectors.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { ContentTabs } from "../options.js";

export default Object.freeze({
const _selectors = {
contentTab: (tab: ContentTabs): string => `#tab_${tab}`,
contentTabs: {
synthesis: {
Expand Down Expand Up @@ -51,4 +51,12 @@ export default Object.freeze({
},
},
},
} as const);
} as const;

/**
* This is a collection of DOM selectors used internally by `dexonline-scraper` for locating different elements
* during scraping of dictionary entries.
*/
const selectors: typeof _selectors = Object.freeze(_selectors);

export default selectors;
24 changes: 13 additions & 11 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,17 @@ import copyrightedDictionaries from "./constants/copyright.js";
import Expressions from "./constants/expressions.js";
import Links from "./constants/links.js";
import Selectors from "./constants/selectors.js";
import { DictionaryFlags, MatchingModes, ParserOptions, SearchOptionsWithWord } from "./options.js";
import { DictionaryFlags, MatchingMode, ParserOptions, SearchOptionsWithWord } from "./options.js";

/**
* A namespace containing functions to scrape inflection models from the inflection ("conjugări / declinări")
* tab on Dexonline.
*/
import * as Inflection from "./tabs/inflection.js";

/**
* A namespace containing functions to scrape dictionary entries from the synthesis ("sinteză") tab on Dexonline.
*/
import * as Synthesis from "./tabs/synthesis.js";

/** The default search options. */
Expand All @@ -22,7 +31,9 @@ const defaultSearchOptionsWithWord = Object.freeze({

/** Represents the results of a word search using `dexonline-scraper`. */
export interface Results {
/** A list of results from the synthesis tab. */
readonly synthesis: Synthesis.DictionaryEntry[];
/** A list of results from the inflection tab. */
readonly inflection: Inflection.InflectionModel[];
}

Expand Down Expand Up @@ -77,14 +88,5 @@ export function parse(contents: string, options: SearchOptionsWithWord<true> = d
return { synthesis, inflection };
}

export {
DictionaryFlags,
MatchingModes,
Synthesis,
Inflection,
Links,
Expressions,
Selectors,
copyrightedDictionaries,
};
export { DictionaryFlags, MatchingMode, Synthesis, Inflection, Links, Expressions, Selectors, copyrightedDictionaries };
export type { ParserOptions };
4 changes: 2 additions & 2 deletions src/options.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ export enum ContentTabs {
}

/** Specifies the strictness of word matching. */
export type MatchingModes =
export type MatchingMode =
/** Consider only lemmas that match the search term exactly. */
| "strict"
/** Consider all lemmas similar to the search term. */
Expand All @@ -27,7 +27,7 @@ export interface ParserOptions {
*
* @defaultValue `"lax"`
*/
readonly mode: MatchingModes;
readonly mode: MatchingMode;

/**
* Specifies whether the parser should exclude copyrighted dictionaries.
Expand Down
11 changes: 11 additions & 0 deletions src/tabs/synthesis.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,13 @@ export type RelationType = "synonym" | "antonym" | "diminutive" | "augmentative"

/** An object containing the relations between a given lemma and other lemmas. */
export interface Relations {
/** A list of synonyms of the given lemma. */
readonly synonyms: string[];
/** A list of antonyms of the given lemma. */
readonly antonyms: string[];
/** A list of diminutive forms of the given lemma. */
readonly diminutives: string[];
/** A list of augmentative forms of the given lemma. */
readonly augmentatives: string[];
}

Expand All @@ -59,16 +63,23 @@ export interface Example extends Row.Row {}

/** A row containing a definition for a given lemma. */
export interface Definition extends Row.Row {
/** A list of sub-definitions. */
readonly definitions: Definition[];
/** A list of examples for this definition. */
readonly examples: Example[];
/** A list of expressions for this definition. */
readonly expressions: Expression[];
/** A list of relations between the given lemma and other lemmas for this definition. */
readonly relations: Relations;
}

/** A row containing an expression featuring a given lemma. */
export interface Expression extends Row.Row {
/** A list of examples for this expression. */
readonly examples: Example[];
/** A list of sub-expressions for this expression. */
readonly expressions: Expression[];
/** A list of relations between the given lemma and other lemmas for this expression. */
readonly relations: Relations;
}

Expand Down
3 changes: 3 additions & 0 deletions src/tabs/synthesis/row.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@ import { ParserOptions } from "../../options.js";

/** The contents of a row. */
interface Contents {
/** A list of tags or "labels" placed found beside a row. */
readonly tags: string[];
/** A list of sources (dictionaries) for where a dictionary entry was sourced from. */
readonly sources: string[];
/** The text in the row. */
readonly value: string;
}

Expand Down

0 comments on commit b3fe6a0

Please sign in to comment.