Skip to content

Commit

Permalink
indexer: support custom hook in the config for extreme configuration
Browse files Browse the repository at this point in the history
also see #147
  • Loading branch information
karlicoss committed Nov 21, 2020
1 parent 08a06c3 commit feef7c3
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 31 deletions.
2 changes: 1 addition & 1 deletion src/promnesia/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from .common import PathIsh, Visit, Source, last, Loc, Results
from .common import PathIsh, Visit, Source, last, Loc, Results, DbVisit, Context, Res


def root() -> Path:
Expand Down
55 changes: 29 additions & 26 deletions src/promnesia/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,49 +13,56 @@
from . import config
from . import server
from .misc import install_server
from .common import PathIsh, get_logger, get_tmpdir, DbVisit, Res
from .common import PathIsh, logger, get_tmpdir, DbVisit, Res
from .common import Source, appdirs, python3, get_system_tz
from .dump import visits_to_sqlite
from .extract import extract_visits, make_filter


def _do_index() -> Iterable[Exception]:
def iter_all_visits() -> Iterator[Res[DbVisit]]:
cfg = config.get()

logger = get_logger()

indexers = cfg.sources

output_dir = cfg.output_dir
# not sure if belongs here??
if not output_dir.exists():
logger.warning("OUTPUT_DIR '%s' didn't exist, creating", output_dir)
output_dir.mkdir(exist_ok=True, parents=True)

# also keep & return errors for further display
errors: List[Exception] = []
hook = cfg.hook

def iter_all_visits() -> Iterator[Res[DbVisit]]:
for idx in indexers:
if isinstance(idx, Exception):
errors.append(idx)
yield idx
continue
# todo use this context? not sure where to attach...
einfo = f'{getattr(idx.ff, "__module__", None)}:{getattr(idx.ff, "__name__", None)} {idx.args} {idx.kwargs}'
for v in extract_visits(idx, src=idx.name):
if isinstance(v, Exception):
errors.append(v)
indexers = cfg.sources
for idx in indexers:
if isinstance(idx, Exception):
yield idx
continue
# todo use this context? not sure where to attach...
einfo = f'{getattr(idx.ff, "__module__", None)}:{getattr(idx.ff, "__name__", None)} {idx.args} {idx.kwargs}'
for v in extract_visits(idx, src=idx.name):
if hook is None:
yield v
else:
try:
yield from hook(v)
except Exception as e:
yield e


def _do_index() -> Iterable[Exception]:
# also keep & return errors for further display
errors: List[Exception] = []
def it():
for v in iter_all_visits():
if isinstance(v, Exception):
errors.append(v)
yield v

dump_errors = visits_to_sqlite(iter_all_visits())
dump_errors = visits_to_sqlite(it())
for e in dump_errors:
logger.exception(e)
errors.append(e)
return errors


def do_index(config_file: Path) -> None:
logger = get_logger()
config.load_from(config_file) # meh.. should be cleaner
try:
errors = list(_do_index())
Expand Down Expand Up @@ -94,7 +101,6 @@ def inner(*args, **kwargs):


def do_demo(*, index_as: str, params: Sequence[str], port: Optional[str], config_file: Optional[Path], name='demo'):
logger = get_logger()
from pprint import pprint
with TemporaryDirectory() as tdir:
outdir = Path(tdir)
Expand Down Expand Up @@ -155,7 +161,6 @@ def read_example_config() -> str:


def config_create(args) -> None:
logger = get_logger()
cfg = user_config_file()
cfgdir = cfg.parent
if cfgdir.exists():
Expand All @@ -169,7 +174,6 @@ def config_create(args) -> None:


def config_check(args) -> None:
logger = get_logger()
cfg = args.config
errors = list(_config_check(cfg))
if len(errors) == 0:
Expand All @@ -180,7 +184,6 @@ def config_check(args) -> None:


def _config_check(cfg: Path) -> Iterable[Exception]:
logger = get_logger()
from subprocess import run

logger.info('config: %s', cfg)
Expand Down
12 changes: 10 additions & 2 deletions src/promnesia/config.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
from pathlib import Path
import os
from typing import List, Optional, Union, NamedTuple, Iterable
from typing import List, Optional, Union, NamedTuple, Iterable, Callable
import importlib
import importlib.util
import warnings

from .common import PathIsh, get_tmpdir, appdirs, default_output_dir, default_cache_dir
from .common import Res, Source
from .common import Res, Source, DbVisit


HookT = Callable[[Res[DbVisit]], Iterable[Res[DbVisit]]]

class Config(NamedTuple):
# TODO remove default from sources once migrated
SOURCES: List = []
Expand All @@ -18,6 +20,9 @@ class Config(NamedTuple):

CACHE_DIR: Optional[PathIsh] = ''
FILTERS: List[str] = []

HOOK: Optional[HookT] = None

#
# NOTE: INDEXERS is deprecated, use SOURCES instead
INDEXERS: List = []
Expand Down Expand Up @@ -73,6 +78,9 @@ def output_dir(self) -> Path:
else:
return default_output_dir()

@property
def hook(self) -> Optional[HookT]:
return self.HOOK

instance: Optional[Config] = None

Expand Down
4 changes: 2 additions & 2 deletions tests/config_tests.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest
from more_itertools import ilen
from typing import Union
from typing import Union, Iterable, List

from promnesia import Source

Expand Down Expand Up @@ -243,7 +243,7 @@ def with_config(cfg: Union[str, Config]):
C.reset()


def index(cfg: Config, check=True):
def index(cfg: Union[str, Config], check=True) -> List[Exception]:
from promnesia.__main__ import _do_index
with with_config(cfg):
errors = list(_do_index())
Expand Down
45 changes: 45 additions & 0 deletions tests/indexer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,51 @@ def test_custom() -> None:
assert len(visits) == 5


def test_hook() -> None:
import promnesia.sources.shellcmd as custom_gen
from promnesia.__main__ import iter_all_visits
with with_config('''
from promnesia import Source
from promnesia.sources import demo
SOURCES = [
Source(demo.index, count=7, name='somename'),
]
from typing import Iterable
from promnesia import DbVisit, Loc, Res
def HOOK(visit: Res[DbVisit]) -> Iterable[Res[DbVisit]]:
# NOTE: might be a good idea to check that the visit is an exception first and yield it intact?
nurl = visit.norm_url
if 'page1' in nurl:
yield visit._replace(norm_url='patched.com')
elif 'page2' in nurl:
None.boom # deliberately crash
elif 'page3' in nurl:
# just don't yield anything! it will be omitted
pass
elif 'page4' in nurl:
# can emit multiple!
yield visit
yield visit
elif 'page6' in nurl:
# patch locator
yield visit._replace(locator=Loc.make(title='some custom timte', href='/can/replace/original/path'))
else:
yield visit
'''):
# TODO hmm might be nice to allow in-pace modifications...
[p0, p1, e2, p41, p42, p5, p6] = list(iter_all_visits())
assert isinstance(p0, DbVisit)
assert p0.norm_url == 'demo.com/page0.html'
assert isinstance(p1, DbVisit)
assert p1.norm_url == 'patched.com'
assert isinstance(e2, Exception)
assert p41 == p42
assert isinstance(p6, DbVisit)
assert p6.locator is not None


TESTDATA_CHROME_HISTORY = "/L/data/promnesia/testdata/chrome-history"

Expand Down

0 comments on commit feef7c3

Please sign in to comment.