From da92a1460a1bcee1d34892629a1bd8edaf6689b2 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Wed, 16 Sep 2015 15:12:36 +0200 Subject: [PATCH] Refactor with final changes before 1.0.0 * Update docs; * Remove benchmark. Closes GH-23. --- benchmark.js | 88 ------------------------------- bower.json | 2 +- package.json | 4 +- readme.md | 142 ++++++++++++++++++++++++++++----------------------- 4 files changed, 80 insertions(+), 156 deletions(-) delete mode 100644 benchmark.js diff --git a/benchmark.js b/benchmark.js deleted file mode 100644 index 77368acd..00000000 --- a/benchmark.js +++ /dev/null @@ -1,88 +0,0 @@ -/** - * @author Titus Wormer - * @copyright 2014-2015 Titus Wormer. - * @license MIT - * @module retext - * @fileoverview Benchmark suite for `retext`. - */ - -'use strict'; - -/* global bench suite */ - -/* eslint-env node */ - -/* - * Module dependencies (retext). - */ - -var Retext = require('./'); - -/* - * Test data. - * - * This includes: - * - * - An average sentence (w/ 20 words); - * - An average paragraph (w/ 5 sentences); - * - A (big?) section (w/ 10 paragraphs); - * - A (big?) article (w/ 10 sections); - * - * Source: - * http://www.gutenberg.org/files/10745/10745-h/10745-h.htm - */ - -var sentence = 'Where she had stood was clear, and she was gone since Sir ' + - 'Kay does not choose to assume my quarrel.'; - -var paragraph = 'Thou art a churlish knight to so affront a lady ' + - 'he could not sit upon his horse any longer. ' + - 'For methinks something hath befallen my lord and that he ' + - 'then, after a while, he cried out in great voice. ' + - 'For that light in the sky lieth in the south ' + - 'then Queen Helen fell down in a swoon, and lay. ' + - 'Touch me not, for I am not mortal, but Fay ' + - 'so the Lady of the Lake vanished away, everything behind. ' + - sentence; - -var section = paragraph + Array(10).join('\n\n' + paragraph); - -var article = section + Array(10).join('\n\n' + section); - -/* - * Benchmark suite. - */ - -suite('retext.parse(value, callback);', function () { - var retext = new Retext(); - - /* - * Benchmark a paragraph. - */ - - bench('A paragraph (5 sentences, 100 words)', - function () { - retext.parse(paragraph); - } - ); - - /* - * Benchmark a section. - */ - - bench('A section (10 paragraphs, 50 sentences, 1,000 words)', - function () { - retext.parse(section); - } - ); - - /* - * Benchmark an article. - */ - - bench('An article (100 paragraphs, 500 sentences, 10,000 words)', - function () { - retext.parse(article); - } - ); -}); diff --git a/bower.json b/bower.json index 7fc58daf..55122fab 100644 --- a/bower.json +++ b/bower.json @@ -26,8 +26,8 @@ "build/", "components/", "coverage/", + "lib/", "node_modules/", - "benchmark.js", "build.js", "index.js", "test.js", diff --git a/package.json b/package.json index 181d23f9..a6a07962 100644 --- a/package.json +++ b/package.json @@ -32,7 +32,6 @@ "istanbul": "^0.3.0", "jscs": "^2.0.0", "jscs-jsdoc": "^1.0.0", - "matcha": "^0.6.0", "mdast": "^1.0.0", "mdast-comment-config": "^1.0.0", "mdast-github": "^1.0.0", @@ -54,7 +53,6 @@ "build-bundle": "browserify index.js -s Retext > retext.js", "postbuild-bundle": "esmangle retext.js > retext.min.js", "build-md": "mdast . --quiet", - "build": "npm run build-md && npm run build-bundle", - "benchmark": "matcha benchmark.js" + "build": "npm run build-md && npm run build-bundle" } } diff --git a/readme.md b/readme.md index f4fd744d..2cc54bb4 100644 --- a/readme.md +++ b/readme.md @@ -2,23 +2,19 @@ [![Build Status](https://img.shields.io/travis/wooorm/retext.svg)](https://travis-ci.org/wooorm/retext) [![Coverage Status](https://img.shields.io/codecov/c/github/wooorm/retext.svg)](https://codecov.io/github/wooorm/retext) [![Code Climate](http://img.shields.io/codeclimate/github/wooorm/retext.svg)](https://codeclimate.com/github/wooorm/retext) -> **Retext is going to [change -> soon](https://github.com/wooorm/retext/issues/23). You probably wan’t to use -> the [next, stable, version](https://github.com/wooorm/retext/tree/feature/stable).** - -**retext** is an extensible natural language system—by default using -[**parse-latin**](https://github.com/wooorm/parse-latin) to transform natural -language into **[NLCST](https://github.com/wooorm/nlcst/)**. -**Retext** provides a pluggable system for analysing and manipulating natural -language in JavaScript. NodeJS and the browser. Tests provide 100% coverage. +**retext** is an extensible natural language processor with support for +multiple languages. **Retext** provides a pluggable system for analysing +and manipulating natural language in JavaScript. Node and the browser. +100% coverage. > Rather than being a do-all library for Natural Language Processing (such as > [NLTK](http://www.nltk.org) or [OpenNLP](https://opennlp.apache.org)), -> **retext** aims to be useful for more practical use cases (such as censoring -> profane words or decoding emoticons, but the possibilities are endless) -> instead of more academic goals (research purposes). +> **retext** aims to be useful for more practical use cases (such as checking +> for [insensitive words](https://github.com/wooorm/alex) or decoding +> [emoticons](https://github.com/wooorm/retext-emoji)) instead of more academic +> goals (research purposes). > **retext** is inherently modular—it uses plugins (similar to -> [rework](https://github.com/reworkcss/rework/) for CSS) instead of providing +> [mdast](https://github.com/wooorm/mdast/) for markdown) instead of providing > everything out of the box (such as > [Natural](https://github.com/NaturalNode/natural)). This makes **retext** a > viable tool for use on the web. @@ -38,8 +34,8 @@ globals module, [uncompressed](retext.js) and [compressed](retext.min.js). ## Usage The following example uses [**retext-emoji**](https://github.com/wooorm/retext-emoji) -(to show emoji) and [**retext-smartypants**](https://github.com/wooorm/retext-smartypants) -(for smart punctuation). +to show emoji and [**retext-smartypants**](https://github.com/wooorm/retext-smartypants) +for smart punctuation. Require dependencies: @@ -60,28 +56,27 @@ var processor = retext().use(smartypants).use(emoji, { Process a document: ```javascript -var doc = processor.process( - 'The three wise monkeys [. . .] sometimes called the ' + - 'three mystic apes--are a pictorial maxim. Together ' + - 'they embody the proverbial principle to ("see no evil, ' + - 'hear no evil, speak no evil"). The three monkeys are ' + - 'Mizaru (:see_no_evil:), covering his eyes, who sees no ' + - 'evil; Kikazaru (:hear_no_evil:), covering his ears, ' + - 'who hears no evil; and Iwazaru (:speak_no_evil:), ' + - 'covering his mouth, who speaks no evil.' -); +var doc = processor.process([ + 'The three wise monkeys [. . .] sometimes called the three mystic', + 'apes--are a pictorial maxim. Together they embody the proverbial', + 'principle to ("see no evil, hear no evil, speak no evil"). The', + 'three monkeys are Mizaru (:see_no_evil:), covering his eyes, who', + 'sees no evil; Kikazaru (:hear_no_evil:), covering his ears, who', + 'hears no evil; and Iwazaru (:speak_no_evil:), covering his mouth,', + 'who speaks no evil.' +].join('\n')); ``` Yields (you need a browser which supports emoji to see them): ```text -The three wise monkeys […] sometimes called the three -mystic apes—are a pictorial maxim. Together they -embody the proverbial principle to (“see no evil, -hear no evil, speak no evil”). The three monkeys are -Mizaru (🙈), covering his eyes, who sees no evil; -Kikazaru (🙉), covering his ears, who hears no evil; -and Iwazaru (🙊), covering his mouth, who speaks no evil. +The three wise monkeys […] sometimes called the three mystic +apes—are a pictorial maxim. Together they embody the proverbial +principle to (“see no evil, hear no evil, speak no evil”). The +three monkeys are Mizaru (🙈), covering his eyes, who +sees no evil; Kikazaru (🙉), covering his ears, who +hears no evil; and Iwazaru (🙊), covering his mouth, +who speaks no evil. ``` ## API @@ -106,13 +101,13 @@ Change the way [**retext**](#api) works by using a [plugin](#plugin). **Returns** -`Object`: an instance of Retext: The returned object functions just like +`Object` — an instance of Retext: The returned object functions just like **retext** (it has the same methods), but caches the `use`d plugins. This provides the ability to chain `use` calls to use multiple plugins, but ensures the functioning of the **retext** module does not change for other dependents. -### [retext](#api).process(value\[, done\]) +### [retext](#api).process(value\[, [done](#function-doneerr-file-doc)\]) Parse a text document, apply plugins to it, and compile it into something else. @@ -123,30 +118,47 @@ something else. **Parameters** -* `value` (`string`) — Text document; +* `value` ([`VFile`](https://github.com/wooorm/vfile) or `string`) + — Text document; -* `done` (`function(err, doc, file)`, optional) — Callback invoked when the - output is generated with either an error, or a result. Only strictly - needed when async plugins are used. +* `done` ([`Function`](#function-doneerr-file-doc), optional). **Returns** -`string` or `null`: A document. Formatted in whatever plugins generate. -The result is `null` if a plugin is asynchronous, in which case the callback -`done` should’ve been passed (don’t worry: plugin creators make sure you know -its async). +`string?`: A document. Formatted in whatever plugins generate. The result is +`null` if a plugin is asynchronous, in which case the callback `done` should’ve +been passed (don’t worry: plugin creators make sure you know its async). + +### function done(err, [file](https://github.com/wooorm/vfile), doc) + +Callback invoked when the output is generated with either an error, or the +processed document (represented as a virtual file and a string). + +**Parameters** + +* `err` (`Error?`) — Reason of failure; +* `file` ([`VFile?`](https://github.com/wooorm/vfile)) — Virtual file; +* `doc` (`string?`) — Generated document. + +## Plugin + +### function attacher([retext](#api)\[, options\]) + +A plugin is a function, which takes the **Retext** instance a user attached +the plugin on as a first parameter and optional configuration as a second +parameter. + +A plugin can return a `transformer`. -### plugin +### function transformer([node](https://github.com/wooorm/nlcst), [file](https://github.com/wooorm/vfile)\[, next\]) -A plugin is simply a function, with `function(retext[, options])` as its -signature. The first argument is the **Retext** instance a user attached the -plugin to. The plugin is invoked when a user `use`s the plugin (not when a -document is parsed) and enables the plugin to modify retext. +A transformer changes the provided document (represented as a node and a +virtual file). -The plugin can return another function: `function(NLCSTNode, file[, next])`. -This function is invoked when a document is parsed. +Transformers can be asynchronous, in which case `next` must be invoked +(optionally with an error) when done. -## Plugins +## List of Plugins * [retext-directionality](https://github.com/wooorm/retext-directionality) — (**[demo](http://wooorm.github.io/retext-directionality/)**) @@ -160,10 +172,19 @@ This function is invoked when a document is parsed. — (**[demo](http://wooorm.github.io/retext-double-metaphone/)**) — Implementation of the Double Metaphone algorithm; +* [retext-dutch](https://github.com/wooorm/retext-dutch) + — Dutch language support; + +* [retext-english](https://github.com/wooorm/retext-english) + — English language support; + * [retext-emoji](https://github.com/wooorm/retext-emoji) — (**[demo](http://wooorm.github.io/retext-emoji/)**) — Encode or decode [Gemojis](https://github.com/github/gemoji); +* [retext-equality](https://github.com/wooorm/retext-equality) + — Warn about possible insensitive, inconsiderate language; + * [retext-keywords](https://github.com/wooorm/retext-keywords) — (**[demo](http://wooorm.github.io/retext-keywords/)**) — Extract keywords and keyphrases; @@ -206,8 +227,8 @@ This function is invoked when a document is parsed. ## List of Utilities -Although not **retext** plug-ins, the following projects are useful when -working with the [CST](https://github.com/wooorm/nlcst): +The following projects are useful when working with the syntax tree, +[NLCST](https://github.com/wooorm/nlcst): * [wooorm/nlcst-to-string](https://github.com/wooorm/nlcst-to-string) — Stringify a node; @@ -215,6 +236,9 @@ working with the [CST](https://github.com/wooorm/nlcst): * [wooorm/nlcst-is-literal](https://github.com/wooorm/nlcst-is-literal) — Check whether a node is meant literally; +* [wooorm/nlcst-test](https://github.com/wooorm/nlcst-test) + — Validate a NLCST node; + In addition, see [`wooorm/unist`](https://github.com/wooorm/unist#unist-node-utilties) for other utilities which work with **retext** nodes, but also with [**mdast**](https://github.com/wooorm/mdast) nodes. @@ -222,21 +246,11 @@ for other utilities which work with **retext** nodes, but also with And finally, see [`wooorm/vfile`](https://github.com/wooorm/vfile#related-tools) for a list of utilities for working with virtual files. -## Benchmark - -On a MacBook Air, it parses about 2 big articles, 25 sections, or 230 -paragraphs per second. - -```text - retext.parse(value, callback); - 325 op/s » A paragraph (5 sentences, 100 words) - 33 op/s » A section (10 paragraphs, 50 sentences, 1,000 words) - 3 op/s » An article (100 paragraphs, 500 sentences, 10,000 words) -``` - ## Related * [nlcst](https://github.com/wooorm/nlcst) +* [unist](https://github.com/wooorm/unist) +* [unified](https://github.com/wooorm/unified) ## License