Skip to content

Commit

Permalink
Fix No such file or directory: /tmp/mdkatex/... (#17)
Browse files Browse the repository at this point in the history
* fix concurrency issue

When multiple threads are running, they may both
cleanup the cache directory at the same time and
one thread will provoke FileNotFound errors in the
other thread.

Fix: make file write operations atomic

* no vendoring for pathlib2


---------

Co-authored-by: Manuel Barkhau <[email protected]>
  • Loading branch information
tovrstra and mbarkhau authored Jun 24, 2024
1 parent 794518a commit 3365450
Show file tree
Hide file tree
Showing 8 changed files with 85 additions and 174 deletions.
2 changes: 1 addition & 1 deletion Makefile.bootstrapit.make
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ test:
--cov-report term \
--html=reports/pytest/index.html \
--junitxml reports/pytest.xml \
-k "$${PYTEST_FILTER}" \
-k "$${PYTEST_FILTER-$${FLTR}}" \
$(shell cd src/ && ls -1 */__init__.py | awk '{ sub(/\/__init__.py/, "", $$1); print "--cov "$$1 }') \
test/ src/;

Expand Down
5 changes: 4 additions & 1 deletion requirements/integration.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,15 @@ flake8-comprehensions
flake8-junit-report
pylint==2.12.1
pylint-ignore>=2020.1013
mypy

types-Markdown
# pylint doesn't support isort>=5 for now
# https://github.com/PyCQA/pylint/issues/3722
isort<5

mypy==1.4.1
mypy-extensions==1.0.0

# http://doc.pytest.org/en/latest/py27-py34-deprecation.html
# The pytest 4.6 series will be the last to support Python 2.7
# and 3.4, and is scheduled to be released by mid-2019.
Expand Down
4 changes: 2 additions & 2 deletions requirements/pypi.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
# Binary (non-pure) packages may also be listed here, but you
# should see if there is a conda package that suits your needs.

Markdown>=3.0<3.3;python_version<"3.6"
Markdown>=3.0,<3.3;python_version<"3.6"
Markdown>=3.0;python_version>="3.6"
pathlib2; python_version < "3.4"
typing;python_version<"3.5"
pathlib2
# setuptools is required for pkg_resources
setuptools
2 changes: 0 additions & 2 deletions requirements/vendor.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,3 @@
# packages, simply by not including the vendor/ directory in
# the PYTHONPATH. The version from the virtualenv will then
# be loaded instead.

pathlib2
6 changes: 3 additions & 3 deletions src/markdown_katex/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def svg2img(html: str) -> str:
return html


def tex2html(tex: str, options: wrapper.Options = None) -> str:
def tex2html(tex: str, options: wrapper.MaybeOptions = None) -> str:
if options:
no_inline_svg = options.get("no_inline_svg", False)
else:
Expand All @@ -86,7 +86,7 @@ def tex2html(tex: str, options: wrapper.Options = None) -> str:
return result


def md_block2html(block_text: str, default_options: wrapper.Options = None) -> str:
def md_block2html(block_text: str, default_options: wrapper.MaybeOptions = None) -> str:
options: wrapper.Options = {'display-mode': True}

if default_options:
Expand All @@ -113,7 +113,7 @@ def _clean_inline_text(inline_text: str) -> str:
return inline_text


def md_inline2html(inline_text: str, default_options: wrapper.Options = None) -> str:
def md_inline2html(inline_text: str, default_options: wrapper.MaybeOptions = None) -> str:
options = default_options.copy() if default_options else {}
inline_text = _clean_inline_text(inline_text)
return tex2html(inline_text, options)
Expand Down
114 changes: 74 additions & 40 deletions src/markdown_katex/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,14 @@
import hashlib
import platform
import tempfile
import contextlib
import subprocess as sp

import pathlib2 as pl
try:
from pathlib import Path
except ImportError:
from pathlib2 import Path # type: ignore


SIG_NAME_BY_NUM = {
k: v
Expand All @@ -29,11 +34,11 @@
assert SIG_NAME_BY_NUM[15] == 'SIGTERM'


TMP_DIR = pl.Path(tempfile.gettempdir()) / "mdkatex"
CACHE_DIR = Path(tempfile.gettempdir()) / "mdkatex"

LIBDIR: pl.Path = pl.Path(__file__).parent
LIBDIR: Path = Path(__file__).parent
PKG_BIN_DIR = LIBDIR / "bin"
FALLBACK_BIN_DIR = pl.Path("/") / "usr" / "local" / "bin"
FALLBACK_BIN_DIR = Path("/") / "usr" / "local" / "bin"
FALLBACK_BIN_DIR = FALLBACK_BIN_DIR.expanduser()

CMD_NAME = "katex"
Expand All @@ -50,15 +55,23 @@
KATEX_OUTPUT_ENCODING = "UTF-8"

# local cache so we don't have to validate the command every time
TMP_LOCAL_CMD_CACHE = TMP_DIR / "local_katex_cmd.txt"
LOCAL_CMD_CACHE = CACHE_DIR / "local_katex_cmd.txt"


@contextlib.contextmanager
def _atomic_writable_path(final_path: Path):
nonce = hashlib.sha1(os.urandom(8)).hexdigest()
tmp_path = final_path.parent / (final_path.name + "_tmp_" + nonce)
yield tmp_path
tmp_path.rename(final_path)


def _get_env_paths() -> typ.Iterable[pl.Path]:
def _get_env_paths() -> typ.Iterable[Path]:
env_path = os.environ.get('PATH')
if env_path:
path_strs = env_path.split(os.pathsep)
for path_str in path_strs:
yield pl.Path(path_str)
yield Path(path_str)

# search in fallback bin dir regardless of path
if env_path is None or str(FALLBACK_BIN_DIR) not in env_path:
Expand All @@ -81,12 +94,12 @@ def _get_local_bin_candidates() -> typ.List[str]:


def _get_usr_parts() -> typ.Optional[typ.List[str]]:
if TMP_LOCAL_CMD_CACHE.exists():
with TMP_LOCAL_CMD_CACHE.open(mode="r", encoding="utf-8") as fobj:
local_cmd = typ.cast(str, fobj.read())
if LOCAL_CMD_CACHE.exists():
with LOCAL_CMD_CACHE.open(mode="r", encoding="utf-8") as fobj:
local_cmd: str = fobj.read()

local_cmd_parts = local_cmd.split("\n")
if pl.Path(local_cmd_parts[0]).exists():
if Path(local_cmd_parts[0]).exists():
return local_cmd_parts

for path in _get_env_paths():
Expand All @@ -108,16 +121,19 @@ def _get_usr_parts() -> typ.Optional[typ.List[str]]:
except OSError:
continue

TMP_DIR.mkdir(parents=True, exist_ok=True)
with TMP_LOCAL_CMD_CACHE.open(mode="w", encoding="utf-8") as fobj:
fobj.write("\n".join(local_cmd_parts))
CACHE_DIR.mkdir(parents=True, exist_ok=True)
local_cmd_data = "\n".join(local_cmd_parts).encode("utf-8")

with _atomic_writable_path(LOCAL_CMD_CACHE) as tmp_path:
with tmp_path.open(mode="wb") as fobj:
fobj.write(local_cmd_data)

return local_cmd_parts

return None


def _get_pkg_bin_path(osname: str = OSNAME, machine: str = MACHINE) -> pl.Path:
def _get_pkg_bin_path(osname: str = OSNAME, machine: str = MACHINE) -> Path:
if machine == 'AMD64':
machine = 'x86_64'
glob_expr = f"*_{machine}-{osname}*"
Expand Down Expand Up @@ -158,15 +174,16 @@ def read_output(buf: typ.Optional[typ.IO[bytes]]) -> str:
return b"".join(_iter_output_lines(buf)).decode("utf-8")


ArgValue = typ.Union[str, int, float, bool]
Options = typ.Dict[str, ArgValue]
ArgValue = typ.Union[str, int, float, bool]
Options = typ.Dict[str, ArgValue]
MaybeOptions = typ.Optional[Options]


class KatexError(Exception):
pass


def _iter_cmd_parts(options: Options = None) -> typ.Iterable[str]:
def _iter_cmd_parts(options: MaybeOptions = None) -> typ.Iterable[str]:
for cmd_part in get_bin_cmd():
yield cmd_part

Expand Down Expand Up @@ -194,14 +211,15 @@ def _cmd_digest(tex: str, cmd_parts: typ.List[str]) -> str:
return hasher.hexdigest()


def _write_tex2html(cmd_parts: typ.List[str], tex: str, tmp_output_file: pl.Path) -> None:
def _write_tex2html(cmd_parts: typ.List[str], tex: str, tmp_output_file: Path) -> None:
# pylint: disable=consider-using-with ; not supported on py27
tmp_input_file = TMP_DIR / tmp_output_file.name.replace(".html", ".tex")
tmp_input_file = CACHE_DIR / tmp_output_file.name.replace(".html", ".tex")
input_data = tex.encode(KATEX_INPUT_ENCODING)

TMP_DIR.mkdir(parents=True, exist_ok=True)
with tmp_input_file.open(mode="wb") as fobj:
fobj.write(input_data)
CACHE_DIR.mkdir(parents=True, exist_ok=True)
with _atomic_writable_path(tmp_input_file) as tmp_path:
with tmp_path.open(mode="wb") as fobj:
fobj.write(input_data)

cmd_parts.extend(["--input", str(tmp_input_file), "--output", str(tmp_output_file)])
proc = None
Expand Down Expand Up @@ -231,36 +249,52 @@ def _write_tex2html(cmd_parts: typ.List[str], tex: str, tmp_output_file: pl.Path
proc.stdout.close()
if proc.stderr is not None:
proc.stderr.close()
tmp_input_file.unlink()

try:
tmp_input_file.unlink()
except FileNotFoundError:
# A concurrent mdkatex process may have removed the
# input (.tex) file, but that's ok as we only care
# about the output file and one or the other process
# will have written that.
pass

def tex2html(tex: str, options: Options = None) -> str:
cmd_parts = list(_iter_cmd_parts(options))
digest = _cmd_digest(tex, cmd_parts)
tmp_filename = digest + ".html"
tmp_output_file = TMP_DIR / tmp_filename

def tex2html(tex: str, options: MaybeOptions = None) -> str:
cmd_parts = list(_iter_cmd_parts(options))
digest = _cmd_digest(tex, cmd_parts)
cache_filename = digest + ".html"
cache_output_file = CACHE_DIR / cache_filename

try:
if tmp_output_file.exists():
if cache_output_file.exists():
# give cached file a life extension (update mtime)
tmp_output_file.touch()
cache_output_file.touch()
else:
_write_tex2html(cmd_parts, tex, tmp_output_file)
with _atomic_writable_path(cache_output_file) as tmp_output_file:
_write_tex2html(cmd_parts, tex, tmp_output_file)

with tmp_output_file.open(mode="r", encoding=KATEX_OUTPUT_ENCODING) as fobj:
result = typ.cast(str, fobj.read())
with cache_output_file.open(mode="r", encoding=KATEX_OUTPUT_ENCODING) as fobj:
result: str = fobj.read()
return result.strip()
finally:
_cleanup_tmp_dir()
_cleanup_cache_dir()


def _cleanup_tmp_dir() -> None:
def _cleanup_cache_dir() -> None:
min_mtime = time.time() - 24 * 60 * 60
for fpath in TMP_DIR.iterdir():
if fpath.is_file():
for fpath in CACHE_DIR.iterdir():
try:
if not fpath.is_file():
continue

mtime = fpath.stat().st_mtime
if mtime < min_mtime:
fpath.unlink()
if mtime > min_mtime:
continue

fpath.unlink()
except FileNotFoundError:
pass # concurrent thread deleted file before we did


# NOTE: in order to not have to update the code
Expand Down
Loading

0 comments on commit 3365450

Please sign in to comment.