Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add scrubber POC #3

Merged
merged 7 commits into from
Mar 26, 2021
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,7 @@ module.exports = {
],
rules: {
"no-console": "off",
"no-plusplus": "off",
"no-continue": "off",
},
};
16 changes: 16 additions & 0 deletions index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
import cli from "./cli";

import Scrubber from "./scrubber/scrubber";
import { ScrubberAction } from "./scrubber/scrubberTypes";

async function scrub() {
try {
const actions: ScrubberAction[] = [{ type: "remove", tags: ["@remove"] }];
const scrubber = new Scrubber();

await scrubber.parseConfig("scrubber/scrubberConfig.json");
await scrubber.start(actions);
Comment on lines +11 to +12
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To confirm my understanding, default tag settings go in the config file, and results from CLI user prompts are passed as the actions argument?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, that is how it is currently set up.
Alternatively we can just pass all tags through the function and not put any in the config. Thoughs? @alexguo8

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The CLI handles default arguments so if that would reduce the complexity of the code by a lot then I agree we can just pass all tags through the CLI
However both are fine for me

} catch (err) {
console.log(err);
}
}

cli(process.argv);
scrub();
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"scripts": {
"dev": "ts-node index.ts",
"lint": "eslint . --ext .ts,.js",
"lint-fix": "eslint . --ext .ts,.js --fix && prettier --write **/*.ts **/*.js",
"lint-fix": "eslint . --ext .ts,.js --fix",
"prod": "tsc -p . && node bin/index.js"
xinhaoz marked this conversation as resolved.
Show resolved Hide resolved
},
"devDependencies": {
Expand Down
156 changes: 156 additions & 0 deletions scrubber/scrubber.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
import fs from "fs";
import path from "path";
import {
ScrubberAction,
TagNameToAction,
ScrubberConfig,
} from "./scrubberTypes";

const TAG_START_CHAR = "{";
const TAG_END_CHAR = "}";

const FILE_TYPE_COMMENT: { [key: string]: string } = {
js: "//",
ts: "//",
py: "#",
};

function scrubberActionsToDict(actions: ScrubberAction[]): TagNameToAction {
const dict: TagNameToAction = {};
actions.forEach((action) => {
action.tags.forEach((tag: string) => {
dict[tag] = action.type;
});
});
return dict;
}

async function getConfigFile(filename: string): Promise<ScrubberConfig> {
try {
const configString = await fs.readFileSync(filename, "utf8");
return JSON.parse(configString);
} catch (err) {
console.error("Failed to read file ", filename);
throw err;
}
}

async function scrubFile(
filePath: string,
tags: TagNameToAction,
isDryRun: boolean,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice, this is helpful 🔥

): Promise<void> {
return new Promise((resolve, reject) => {
fs.readFile(filePath, { encoding: "utf8" }, async (err, text) => {
if (err) {
reject(err);
}
const lines: string[] = text.split("\n");
const scrubbedLines: string[] = [];
let skip = false;

for (let i = 0; i < lines.length; ++i) {
const line = lines[i];
if (line.length === 0) {
scrubbedLines.push(line);
continue;
}

// Split on whitespace
const tokens = line.trim().split(/[ ]+/);

if (tokens[0] in tags && tokens.length !== 2) {
console.warn(
`WARNING line ${
i + 1
}: possible malformed tag; tags must be on their own line preceded by '}' or followed by '{'`,
);
continue;
}

if (tokens[0] in tags || tokens[1] in tags) {
const tag = tokens[0] in tags ? tokens[0] : tokens[1];
const brace = tag === tokens[0] ? tokens[1] : tokens[0];

if (brace === tokens[1] && brace !== TAG_START_CHAR) {
throw new Error("Malformed tag line: expected '{' after tag name'");
}

if (brace === tokens[0] && brace !== TAG_END_CHAR) {
throw new Error(
"Malformed tag line: expected '}' before tag name'",
);
}

// NOTE: nested tagging is not currently expected and will lead to unexpected behaviour.

if (tags[tag] === "remove") {
skip = brace === TAG_START_CHAR;
}

// We always scrub tags from the final file.
continue;
}

if (skip) {
if (isDryRun) {
console.log(`Skipping line ${i + 1}`);
}
continue;
}

scrubbedLines.push(line);
}

if (isDryRun) return;

fs.writeFileSync(filePath, scrubbedLines.join("\n"));

resolve();
});
});
}

async function scrubDir(dir: string, tags: TagNameToAction, isDryRun: boolean) {
const files = await fs.readdirSync(dir);
const promises = files.map(
async (name: string): Promise<void> => {
const filePath = path.join(dir, name);
const stat = fs.statSync(filePath);
if (stat.isFile()) {
return scrubFile(filePath, tags, isDryRun);
}
if (stat.isDirectory()) {
return scrubDir(filePath, tags, isDryRun);
}
return Promise.resolve();
},
);
await Promise.all(promises);
}

class Scrubber {
tags: TagNameToAction = {};

dirs: string[] = [];

async parseConfig(filename: string): Promise<void> {
// TODO validate config (e.g.properly formed tag names)
const config = await getConfigFile(filename);
this.tags = scrubberActionsToDict(config.actions);
this.dirs = config.dirs;
}

// Scrub files
async start(
actions: ScrubberAction[],
isDryRun: boolean = false,
): Promise<void> {
const tags = { ...this.tags, ...scrubberActionsToDict(actions) };

// TODO: specify file extensions?
await Promise.all(this.dirs.map((dir) => scrubDir(dir, tags, isDryRun)));
}
}

export default Scrubber;
9 changes: 9 additions & 0 deletions scrubber/scrubberConfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"actions": [
{
"type": "keep",
"tags": ["@remove", "@remove2"]
}
],
"dirs": ["test_dir"]
}
17 changes: 17 additions & 0 deletions scrubber/scrubberTypes.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/*
Types required by the scrubber tool.
*/

export type ScrubberActionType = "remove" | "keep";

export type ScrubberAction = {
type: ScrubberActionType;
tags: string[];
};

export type ScrubberConfig = {
actions: ScrubberAction[];
dirs: string[];
};

export type TagNameToAction = { [key: string]: ScrubberActionType };
5 changes: 5 additions & 0 deletions test_dir/test
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
@remove {
this should be gone
} @remove

hello world
5 changes: 5 additions & 0 deletions test_dir/test1
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
@remove {
this should be gone
} @remove

hello world
2 changes: 1 addition & 1 deletion tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

/* Basic Options */
// "incremental": true, /* Enable incremental compilation */
"target": "es5", /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019', 'ES2020', or 'ESNEXT'. */
"target": "es2017", /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019', 'ES2020', or 'ESNEXT'. */
"module": "commonjs", /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', 'es2020', or 'ESNext'. */
// "lib": [], /* Specify library files to be included in the compilation. */
// "allowJs": true, /* Allow javascript files to be compiled. */
Expand Down