Skip to content

Commit

Permalink
Added options for custom output and image paths.
Browse files Browse the repository at this point in the history
  • Loading branch information
gautamdhameja committed May 29, 2021
1 parent 3318889 commit 399efd8
Show file tree
Hide file tree
Showing 11 changed files with 2,830 additions and 415 deletions.
12 changes: 12 additions & 0 deletions .eslintrc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"env": {
"browser": true,
"commonjs": true,
"es2021": true
},
"extends": "eslint:recommended",
"parserOptions": {
"ecmaVersion": 12
},
"rules": {}
}
63 changes: 34 additions & 29 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,30 +1,35 @@
#!/usr/bin/env node
'use strict';

const program = require('commander');
const workflow = require('./lib/workflow');
const packageJson = require('./package.json');

program
.version(packageJson.version)
.command('convertLocal [inputDirectory]')
.description('Converts Medium exported html files to markdown from a local directory.')
.option('-d, --drafts', 'Convert drafts too.')
.option('-f, --frontMatter', 'Add front-matter.')
.option('-i, --images', 'Download images in local directory.')
.action(workflow.processAll);

// Convert from url has been removed.
// Medium posts seem to have updated (random) css classes and html attributes,
// and the reader is unable to extract the article content from the html body.

// program
// .version(packageJson.version)
// .command('convertUrl [url]')
// .description('Converts Medium post to markdown from its url.')
// .option('-o, --outputDir <>', 'Output directory path.')
// .option('-f, --frontMatter', 'Add front-matter.')
// .option('-i, --images', 'Download images in local directory.')
// .action(workflow.processSingle);

program.parse(process.argv);
"use strict";

const program = require("commander");
const workflow = require("./lib/workflow");
const packageJson = require("./package.json");

program
.version(packageJson.version)
.command("convertLocal [inputDirectory]")
.description(
"Converts Medium exported html files to markdown from a local directory."
)
.option("-d, --drafts", "Convert drafts too.")
.option("-f, --frontMatter", "Add front-matter.")
.option("-i, --images", "Download images at default path.")
.option("-op, --path <path>", "Custom path for saving markdown files.")
.option("-ip, --img-path <imgpath>", "Custom path for downloading images.")
.action(workflow.processAll);

// Convert from url has been removed.
// Medium posts seem to have updated (random) css classes and html attributes,
// and the reader is unable to extract the article content from the html body.

// program
// .version(packageJson.version)
// .command('convertUrl [url]')
// .description('Converts Medium post to markdown from its url.')
// .option('-o, --outputDir <>', 'Output directory path.')
// .option('-f, --frontMatter', 'Add front-matter.')
// .option('-i, --images', 'Download images in local directory.')
// .action(workflow.processSingle);

// eslint-disable-next-line no-undef
program.parse(process.argv);
152 changes: 78 additions & 74 deletions lib/converter.js
Original file line number Diff line number Diff line change
@@ -1,74 +1,78 @@
const TurndownService = require('turndown');
const path = require('path');
const url = require('url');
const utils = require('./utils');

const config = {
headingStyle: "atx",
hr: "---",
bulletListMarker: "*",
codeBlockStyle: "fenced",
fence: "```",
emDelimiter: "_",
strongDelimiter: "**",
linkStyle: "inlined",
linkReferenceStyle: "full"
};

const td = new TurndownService(config);
let images = [];
let downloadImages = false;

// parsing figure and figcaption for markdown
td.addRule('figure', {
filter: 'figure',
replacement: function (content) {
// This is a hack based on string parsing;
// ugly and error prone.
// Need to find a better way to do this!

const lines = content.split('\n');
const imageStr = "![](https://cdn-images"
const imageIndex = lines.findIndex((el)=>{return el.includes(imageStr) });
let element = lines[imageIndex];

if (downloadImages === true && element) {
const imgSrc = element.substring(4, element.length - 1);

// This check is important as Medium renders embeds (YouTube, etc.) also as figures.
if (utils.isUrl(imgSrc)) {
const imgFileName = getImageName(imgSrc);
const localImgPath = path.join('img', imgFileName);
element = "![](" + localImgPath + ")";
images.push({
src: imgSrc,
name: imgFileName
});
}
}

if (lines[4]) {
element = [element.slice(0, 2), lines[4], element.slice(2)].join('');
}

return (element || '') + '\n' + (lines[4] || '');
}
})

const convert = function (htmlStr, downloadImagesFlag) {
downloadImages = downloadImagesFlag;
images = [];
return { md: td.turndown(htmlStr), images };
}

const getImageName = function (imgSrc) {
const imgUrl = url.parse(imgSrc);
let imgFileName = path.basename(imgUrl.pathname);
const parsed = path.parse(imgFileName);
const name = parsed.name.replace(/[^a-zA-Z0-9]/g, '__');
const ext = parsed.ext ? parsed.ext : ".jpg"; // if no extension, add .jpg
imgFileName = name + ext;
return imgFileName;
}

module.exports = convert;
const TurndownService = require("turndown");
const path = require("path");
const url = require("url");
const utils = require("./utils");

const config = {
headingStyle: "atx",
hr: "---",
bulletListMarker: "*",
codeBlockStyle: "fenced",
fence: "```",
emDelimiter: "_",
strongDelimiter: "**",
linkStyle: "inlined",
linkReferenceStyle: "full",
};

const td = new TurndownService(config);
let images = [];
let downloadImages = false;
let imgPath = "img";

// Parsing figure and figcaption for markdown.
td.addRule("figure", {
filter: "figure",
replacement: function (content) {
// This is a hack based on string parsing;
// ugly and error prone.
// Need to find a better way to do this!

const lines = content.split("\n");
const imageStr = "![](https://cdn-images";
const imageIndex = lines.findIndex((el) => {
return el.includes(imageStr);
});
let element = lines[imageIndex];

if (downloadImages === true && element) {
const imgSrc = element.substring(4, element.length - 1);

// This check is important as Medium renders embeds (YouTube, etc.) also as figures.
if (utils.isUrl(imgSrc)) {
const imgFileName = getImageName(imgSrc);
const localImgPath = path.join(imgPath, imgFileName);
element = "![](" + localImgPath + ")";
images.push({
src: imgSrc,
name: imgFileName,
});
}
}

if (lines[4]) {
element = [element.slice(0, 2), lines[4], element.slice(2)].join("");
}

return (element || "") + "\n" + (lines[4] || "");
},
});

const convert = function (htmlStr, downloadImagesFlag, imagesPath) {
downloadImages = downloadImagesFlag;
images = [];
imgPath = imagesPath;
return { md: td.turndown(htmlStr), images };
};

const getImageName = function (imgSrc) {
const imgUrl = url.parse(imgSrc);
let imgFileName = path.basename(imgUrl.pathname);
const parsed = path.parse(imgFileName);
const name = parsed.name.replace(/[^a-zA-Z0-9]/g, "__");
const ext = parsed.ext ? parsed.ext : ".jpg"; // If no extension, add .jpg.
imgFileName = name + ext;
return imgFileName;
};

module.exports = convert;
18 changes: 10 additions & 8 deletions lib/downloader.js
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
const fs = require('fs');
const fetch = require('node-fetch');
const fs = require("fs");
const fetch = require("node-fetch");

async function downloadImage(url, localPath) {
await fetch(url, { method: 'HEAD' });
await fetch(url, { method: "HEAD" });

const response = await fetch(url);
const response = await fetch(url);

return await new Promise(resolve => response.body
.pipe(fs.createWriteStream(localPath))
.on('close', resolve(localPath)));
return await new Promise((resolve) =>
response.body
.pipe(fs.createWriteStream(localPath))
.on("close", resolve(localPath))
);
}

module.exports = downloadImage;
module.exports = downloadImage;
Loading

0 comments on commit 399efd8

Please sign in to comment.