From 3365450e2d4b0b40dab9a63f93eb64fa1622f9f6 Mon Sep 17 00:00:00 2001 From: Toon Verstraelen Date: Mon, 24 Jun 2024 14:02:04 +0200 Subject: [PATCH] Fix No such file or directory: `/tmp/mdkatex/...` (#17) * fix concurrency issue When multiple threads are running, they may both cleanup the cache directory at the same time and one thread will provoke FileNotFound errors in the other thread. Fix: make file write operations atomic * no vendoring for pathlib2 --------- Co-authored-by: Manuel Barkhau --- Makefile.bootstrapit.make | 2 +- requirements/integration.txt | 5 +- requirements/pypi.txt | 4 +- requirements/vendor.txt | 2 - src/markdown_katex/extension.py | 6 +- src/markdown_katex/wrapper.py | 114 ++++++++++++++++++----------- stubs/pathlib2.pyi | 122 -------------------------------- test/test_mdkatex.py | 4 +- 8 files changed, 85 insertions(+), 174 deletions(-) delete mode 100644 stubs/pathlib2.pyi diff --git a/Makefile.bootstrapit.make b/Makefile.bootstrapit.make index 80bbb16..695234e 100644 --- a/Makefile.bootstrapit.make +++ b/Makefile.bootstrapit.make @@ -395,7 +395,7 @@ test: --cov-report term \ --html=reports/pytest/index.html \ --junitxml reports/pytest.xml \ - -k "$${PYTEST_FILTER}" \ + -k "$${PYTEST_FILTER-$${FLTR}}" \ $(shell cd src/ && ls -1 */__init__.py | awk '{ sub(/\/__init__.py/, "", $$1); print "--cov "$$1 }') \ test/ src/; diff --git a/requirements/integration.txt b/requirements/integration.txt index fcadf3b..b99a18c 100644 --- a/requirements/integration.txt +++ b/requirements/integration.txt @@ -20,12 +20,15 @@ flake8-comprehensions flake8-junit-report pylint==2.12.1 pylint-ignore>=2020.1013 -mypy + types-Markdown # pylint doesn't support isort>=5 for now # https://github.com/PyCQA/pylint/issues/3722 isort<5 +mypy==1.4.1 +mypy-extensions==1.0.0 + # http://doc.pytest.org/en/latest/py27-py34-deprecation.html # The pytest 4.6 series will be the last to support Python 2.7 # and 3.4, and is scheduled to be released by mid-2019. diff --git a/requirements/pypi.txt b/requirements/pypi.txt index 86ad203..9d010f6 100644 --- a/requirements/pypi.txt +++ b/requirements/pypi.txt @@ -7,9 +7,9 @@ # Binary (non-pure) packages may also be listed here, but you # should see if there is a conda package that suits your needs. -Markdown>=3.0<3.3;python_version<"3.6" +Markdown>=3.0,<3.3;python_version<"3.6" Markdown>=3.0;python_version>="3.6" +pathlib2; python_version < "3.4" typing;python_version<"3.5" -pathlib2 # setuptools is required for pkg_resources setuptools diff --git a/requirements/vendor.txt b/requirements/vendor.txt index 28ac40d..56ad1a1 100644 --- a/requirements/vendor.txt +++ b/requirements/vendor.txt @@ -21,5 +21,3 @@ # packages, simply by not including the vendor/ directory in # the PYTHONPATH. The version from the virtualenv will then # be loaded instead. - -pathlib2 diff --git a/src/markdown_katex/extension.py b/src/markdown_katex/extension.py index e6626ab..9d8abf3 100644 --- a/src/markdown_katex/extension.py +++ b/src/markdown_katex/extension.py @@ -69,7 +69,7 @@ def svg2img(html: str) -> str: return html -def tex2html(tex: str, options: wrapper.Options = None) -> str: +def tex2html(tex: str, options: wrapper.MaybeOptions = None) -> str: if options: no_inline_svg = options.get("no_inline_svg", False) else: @@ -86,7 +86,7 @@ def tex2html(tex: str, options: wrapper.Options = None) -> str: return result -def md_block2html(block_text: str, default_options: wrapper.Options = None) -> str: +def md_block2html(block_text: str, default_options: wrapper.MaybeOptions = None) -> str: options: wrapper.Options = {'display-mode': True} if default_options: @@ -113,7 +113,7 @@ def _clean_inline_text(inline_text: str) -> str: return inline_text -def md_inline2html(inline_text: str, default_options: wrapper.Options = None) -> str: +def md_inline2html(inline_text: str, default_options: wrapper.MaybeOptions = None) -> str: options = default_options.copy() if default_options else {} inline_text = _clean_inline_text(inline_text) return tex2html(inline_text, options) diff --git a/src/markdown_katex/wrapper.py b/src/markdown_katex/wrapper.py index b90b077..8a96983 100644 --- a/src/markdown_katex/wrapper.py +++ b/src/markdown_katex/wrapper.py @@ -16,9 +16,14 @@ import hashlib import platform import tempfile +import contextlib import subprocess as sp -import pathlib2 as pl +try: + from pathlib import Path +except ImportError: + from pathlib2 import Path # type: ignore + SIG_NAME_BY_NUM = { k: v @@ -29,11 +34,11 @@ assert SIG_NAME_BY_NUM[15] == 'SIGTERM' -TMP_DIR = pl.Path(tempfile.gettempdir()) / "mdkatex" +CACHE_DIR = Path(tempfile.gettempdir()) / "mdkatex" -LIBDIR: pl.Path = pl.Path(__file__).parent +LIBDIR: Path = Path(__file__).parent PKG_BIN_DIR = LIBDIR / "bin" -FALLBACK_BIN_DIR = pl.Path("/") / "usr" / "local" / "bin" +FALLBACK_BIN_DIR = Path("/") / "usr" / "local" / "bin" FALLBACK_BIN_DIR = FALLBACK_BIN_DIR.expanduser() CMD_NAME = "katex" @@ -50,15 +55,23 @@ KATEX_OUTPUT_ENCODING = "UTF-8" # local cache so we don't have to validate the command every time -TMP_LOCAL_CMD_CACHE = TMP_DIR / "local_katex_cmd.txt" +LOCAL_CMD_CACHE = CACHE_DIR / "local_katex_cmd.txt" + + +@contextlib.contextmanager +def _atomic_writable_path(final_path: Path): + nonce = hashlib.sha1(os.urandom(8)).hexdigest() + tmp_path = final_path.parent / (final_path.name + "_tmp_" + nonce) + yield tmp_path + tmp_path.rename(final_path) -def _get_env_paths() -> typ.Iterable[pl.Path]: +def _get_env_paths() -> typ.Iterable[Path]: env_path = os.environ.get('PATH') if env_path: path_strs = env_path.split(os.pathsep) for path_str in path_strs: - yield pl.Path(path_str) + yield Path(path_str) # search in fallback bin dir regardless of path if env_path is None or str(FALLBACK_BIN_DIR) not in env_path: @@ -81,12 +94,12 @@ def _get_local_bin_candidates() -> typ.List[str]: def _get_usr_parts() -> typ.Optional[typ.List[str]]: - if TMP_LOCAL_CMD_CACHE.exists(): - with TMP_LOCAL_CMD_CACHE.open(mode="r", encoding="utf-8") as fobj: - local_cmd = typ.cast(str, fobj.read()) + if LOCAL_CMD_CACHE.exists(): + with LOCAL_CMD_CACHE.open(mode="r", encoding="utf-8") as fobj: + local_cmd: str = fobj.read() local_cmd_parts = local_cmd.split("\n") - if pl.Path(local_cmd_parts[0]).exists(): + if Path(local_cmd_parts[0]).exists(): return local_cmd_parts for path in _get_env_paths(): @@ -108,16 +121,19 @@ def _get_usr_parts() -> typ.Optional[typ.List[str]]: except OSError: continue - TMP_DIR.mkdir(parents=True, exist_ok=True) - with TMP_LOCAL_CMD_CACHE.open(mode="w", encoding="utf-8") as fobj: - fobj.write("\n".join(local_cmd_parts)) + CACHE_DIR.mkdir(parents=True, exist_ok=True) + local_cmd_data = "\n".join(local_cmd_parts).encode("utf-8") + + with _atomic_writable_path(LOCAL_CMD_CACHE) as tmp_path: + with tmp_path.open(mode="wb") as fobj: + fobj.write(local_cmd_data) return local_cmd_parts return None -def _get_pkg_bin_path(osname: str = OSNAME, machine: str = MACHINE) -> pl.Path: +def _get_pkg_bin_path(osname: str = OSNAME, machine: str = MACHINE) -> Path: if machine == 'AMD64': machine = 'x86_64' glob_expr = f"*_{machine}-{osname}*" @@ -158,15 +174,16 @@ def read_output(buf: typ.Optional[typ.IO[bytes]]) -> str: return b"".join(_iter_output_lines(buf)).decode("utf-8") -ArgValue = typ.Union[str, int, float, bool] -Options = typ.Dict[str, ArgValue] +ArgValue = typ.Union[str, int, float, bool] +Options = typ.Dict[str, ArgValue] +MaybeOptions = typ.Optional[Options] class KatexError(Exception): pass -def _iter_cmd_parts(options: Options = None) -> typ.Iterable[str]: +def _iter_cmd_parts(options: MaybeOptions = None) -> typ.Iterable[str]: for cmd_part in get_bin_cmd(): yield cmd_part @@ -194,14 +211,15 @@ def _cmd_digest(tex: str, cmd_parts: typ.List[str]) -> str: return hasher.hexdigest() -def _write_tex2html(cmd_parts: typ.List[str], tex: str, tmp_output_file: pl.Path) -> None: +def _write_tex2html(cmd_parts: typ.List[str], tex: str, tmp_output_file: Path) -> None: # pylint: disable=consider-using-with ; not supported on py27 - tmp_input_file = TMP_DIR / tmp_output_file.name.replace(".html", ".tex") + tmp_input_file = CACHE_DIR / tmp_output_file.name.replace(".html", ".tex") input_data = tex.encode(KATEX_INPUT_ENCODING) - TMP_DIR.mkdir(parents=True, exist_ok=True) - with tmp_input_file.open(mode="wb") as fobj: - fobj.write(input_data) + CACHE_DIR.mkdir(parents=True, exist_ok=True) + with _atomic_writable_path(tmp_input_file) as tmp_path: + with tmp_path.open(mode="wb") as fobj: + fobj.write(input_data) cmd_parts.extend(["--input", str(tmp_input_file), "--output", str(tmp_output_file)]) proc = None @@ -231,36 +249,52 @@ def _write_tex2html(cmd_parts: typ.List[str], tex: str, tmp_output_file: pl.Path proc.stdout.close() if proc.stderr is not None: proc.stderr.close() - tmp_input_file.unlink() + try: + tmp_input_file.unlink() + except FileNotFoundError: + # A concurrent mdkatex process may have removed the + # input (.tex) file, but that's ok as we only care + # about the output file and one or the other process + # will have written that. + pass -def tex2html(tex: str, options: Options = None) -> str: - cmd_parts = list(_iter_cmd_parts(options)) - digest = _cmd_digest(tex, cmd_parts) - tmp_filename = digest + ".html" - tmp_output_file = TMP_DIR / tmp_filename + +def tex2html(tex: str, options: MaybeOptions = None) -> str: + cmd_parts = list(_iter_cmd_parts(options)) + digest = _cmd_digest(tex, cmd_parts) + cache_filename = digest + ".html" + cache_output_file = CACHE_DIR / cache_filename try: - if tmp_output_file.exists(): + if cache_output_file.exists(): # give cached file a life extension (update mtime) - tmp_output_file.touch() + cache_output_file.touch() else: - _write_tex2html(cmd_parts, tex, tmp_output_file) + with _atomic_writable_path(cache_output_file) as tmp_output_file: + _write_tex2html(cmd_parts, tex, tmp_output_file) - with tmp_output_file.open(mode="r", encoding=KATEX_OUTPUT_ENCODING) as fobj: - result = typ.cast(str, fobj.read()) + with cache_output_file.open(mode="r", encoding=KATEX_OUTPUT_ENCODING) as fobj: + result: str = fobj.read() return result.strip() finally: - _cleanup_tmp_dir() + _cleanup_cache_dir() -def _cleanup_tmp_dir() -> None: +def _cleanup_cache_dir() -> None: min_mtime = time.time() - 24 * 60 * 60 - for fpath in TMP_DIR.iterdir(): - if fpath.is_file(): + for fpath in CACHE_DIR.iterdir(): + try: + if not fpath.is_file(): + continue + mtime = fpath.stat().st_mtime - if mtime < min_mtime: - fpath.unlink() + if mtime > min_mtime: + continue + + fpath.unlink() + except FileNotFoundError: + pass # concurrent thread deleted file before we did # NOTE: in order to not have to update the code diff --git a/stubs/pathlib2.pyi b/stubs/pathlib2.pyi deleted file mode 100644 index 307979b..0000000 --- a/stubs/pathlib2.pyi +++ /dev/null @@ -1,122 +0,0 @@ -# Stubs for pathlib (Python 3.4) - -from typing import Any, Generator, IO, Optional, Sequence, Tuple, Type, TypeVar, Union, List -from types import TracebackType -import os -import sys - -_P = TypeVar('_P', bound='PurePath') - -if sys.version_info >= (3, 6): - _PurePathBase = os.PathLike[str] -else: - _PurePathBase = object - -class PurePath(_PurePathBase): - parts = ... # type: Tuple[str, ...] - drive = ... # type: str - root = ... # type: str - anchor = ... # type: str - name = ... # type: str - suffix = ... # type: str - suffixes = ... # type: List[str] - stem = ... # type: str - if sys.version_info < (3, 5): - def __init__(self, *pathsegments: str) -> None: ... - elif sys.version_info < (3, 6): - def __new__(cls: Type[_P], *args: Union[str, PurePath]) -> _P: ... - else: - def __new__(cls: Type[_P], *args: Union[str, os.PathLike[str]]) -> _P: ... - def __hash__(self) -> int: ... - def __lt__(self, other: PurePath) -> bool: ... - def __le__(self, other: PurePath) -> bool: ... - def __gt__(self, other: PurePath) -> bool: ... - def __ge__(self, other: PurePath) -> bool: ... - def __truediv__(self: _P, key: Union[str, PurePath]) -> _P: ... - def __bytes__(self) -> bytes: ... - def as_posix(self) -> str: ... - def as_uri(self) -> str: ... - def is_absolute(self) -> bool: ... - def is_reserved(self) -> bool: ... - def match(self, path_pattern: str) -> bool: ... - def relative_to(self: _P, *other: Union[str, PurePath]) -> _P: ... - def with_name(self: _P, name: str) -> _P: ... - def with_suffix(self: _P, suffix: str) -> _P: ... - def joinpath(self: _P, *other: Union[str, PurePath]) -> _P: ... - - @property - def parents(self: _P) -> Sequence[_P]: ... - @property - def parent(self: _P) -> _P: ... - -class PurePosixPath(PurePath): ... -class PureWindowsPath(PurePath): ... - -class Path(PurePath): - def __enter__(self) -> Path: ... - def __exit__(self, exc_type: Optional[Type[BaseException]], - exc_value: Optional[BaseException], - traceback: Optional[TracebackType]) -> Optional[bool]: ... - @classmethod - def cwd(cls: Type[_P]) -> _P: ... - def stat(self) -> os.stat_result: ... - def chmod(self, mode: int) -> None: ... - def exists(self) -> bool: ... - def glob(self, pattern: str) -> Generator[Path, None, None]: ... - def group(self) -> str: ... - def is_dir(self) -> bool: ... - def is_file(self) -> bool: ... - def is_symlink(self) -> bool: ... - def is_socket(self) -> bool: ... - def is_fifo(self) -> bool: ... - def is_block_device(self) -> bool: ... - def is_char_device(self) -> bool: ... - def iterdir(self) -> Generator[Path, None, None]: ... - def lchmod(self, mode: int) -> None: ... - def lstat(self) -> os.stat_result: ... - if sys.version_info < (3, 5): - def mkdir(self, mode: int = ..., - parents: bool = ...) -> None: ... - else: - def mkdir(self, mode: int = ..., parents: bool = ..., - exist_ok: bool = ...) -> None: ... - def open(self, mode: str = ..., buffering: int = ..., - encoding: Optional[str] = ..., errors: Optional[str] = ..., - newline: Optional[str] = ...) -> IO[Any]: ... - def owner(self) -> str: ... - def rename(self, target: Union[str, PurePath]) -> None: ... - def replace(self, target: Union[str, PurePath]) -> None: ... - if sys.version_info < (3, 6): - def resolve(self: _P) -> _P: ... - else: - def resolve(self: _P, strict: bool = ...) -> _P: ... - def rglob(self, pattern: str) -> Generator[Path, None, None]: ... - def rmdir(self) -> None: ... - def symlink_to(self, target: Union[str, Path], - target_is_directory: bool = ...) -> None: ... - def touch(self, mode: int = ..., exist_ok: bool = ...) -> None: ... - def unlink(self) -> None: ... - - if sys.version_info >= (3, 5): - @classmethod - def home(cls: Type[_P]) -> _P: ... - if sys.version_info < (3, 6): - def __new__(cls: Type[_P], *args: Union[str, PurePath], - **kwargs: Any) -> _P: ... - else: - def __new__(cls: Type[_P], *args: Union[str, os.PathLike[str]], - **kwargs: Any) -> _P: ... - - def absolute(self: _P) -> _P: ... - def expanduser(self: _P) -> _P: ... - def read_bytes(self) -> bytes: ... - def read_text(self, encoding: Optional[str] = ..., - errors: Optional[str] = ...) -> str: ... - def samefile(self, other_path: Union[str, bytes, int, Path]) -> bool: ... - def write_bytes(self, data: bytes) -> int: ... - def write_text(self, data: str, encoding: Optional[str] = ..., - errors: Optional[str] = ...) -> int: ... - - -class PosixPath(Path, PurePosixPath): ... -class WindowsPath(Path, PureWindowsPath): ... diff --git a/test/test_mdkatex.py b/test/test_mdkatex.py index c29349e..283721b 100644 --- a/test/test_mdkatex.py +++ b/test/test_mdkatex.py @@ -34,8 +34,6 @@ DATA_DIR = pl.Path(__file__).parent.parent / "fixture_data" DATA_DIR.mkdir(parents=True, exist_ok=True) -TMP_DIR = pl.Path(tempfile.gettempdir()) / "mdkatex" - BASIC_TEX_TXT = r""" f(x) = \int_{-\infty}^\infty \hat f(\xi)\,e^{2 \pi i \xi x} @@ -410,7 +408,7 @@ def test_html_output(): html = textwrap.dedent(html.lstrip("\n")) html = html.replace("{{result}}", result) - tmp_file = TMP_DIR / "test_output.html" + tmp_file = wrp.CACHE_DIR / "test_output.html" with tmp_file.open(mode="w", encoding="utf-8") as fobj: fobj.write(html)