Skip to content

Commit

Permalink
Merge pull request #100 from sparks-baird/umap-hdbscan-kwargs
Browse files Browse the repository at this point in the history
fix: change the default behavior of overriding kwarg dicts for UMAP and HDBSCAN
  • Loading branch information
sgbaird authored Aug 13, 2022
2 parents f2ab965 + 214cb34 commit f42a964
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/conda-build-pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
- name: Install dependencies
shell: bash -l {0}
run: |
conda install git conda-build conda-verify anaconda-client conda-forge::grayskull conda-forge::conda-souschef conda-forge::flit conda-forge::coveralls conda-forge::conda-souschef sgbaird::pqdm sgbaird::elmd
conda install git pytest conda-build conda-verify anaconda-client conda-forge::grayskull conda-forge::conda-souschef conda-forge::flit conda-forge::coveralls conda-forge::conda-souschef sgbaird::pqdm sgbaird::elmd
pip install ElM2D==0.4.1
- name: Miniconda build
Expand Down
21 changes: 20 additions & 1 deletion mat_discover/mat_discover_.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,14 +281,18 @@ def __init__(
<https://umap-learn.readthedocs.io/en/latest/parameters.html>`_ and the
`UMAP API
<https://umap-learn.readthedocs.io/en/latest/api.html#umap.umap_.UMAP>`_. If
this contains `dens_lambda` or `n_neighbors` keys, the values in the passed dictionary will take precedence over the corresponding `Discover` kwargs.
this contains `dens_lambda` key, the value in the Discover class kwarg will
take precedence.
hdbscan_kwargs: dict, optional
`hdbscan.HDBSCAN` kwargs that are passed directly into the HDBSCAN
clusterer. By default, None. See `Parameter Selection for HDBSCAN*
<https://hdbscan.readthedocs.io/en/latest/parameter_selection.html>`_ and
the `HDBSCAN API
<https://hdbscan.readthedocs.io/en/latest/api.html#hdbscan.hdbscan_.HDBSCAN>`_.
If ``min_cluster_size`` is not specified, defaults to 50. If
``min_samples`` is not specified, defaults to 1. If ``cluster_selection_epsilon``
is not specified, defaults to 0.63.
References
----------
Expand Down Expand Up @@ -409,6 +413,13 @@ def __init__(
cluster_selection_epsilon=0.63,
min_cluster_size=min_cluster_size,
)
else:
if "min_cluster_size" not in hdbscan_kwargs:
hdbscan_kwargs["min_cluster_size"] = 50
if "min_samples" not in hdbscan_kwargs:
hdbscan_kwargs["min_samples"] = 1
if "cluster_selection_epsilon" not in hdbscan_kwargs:
hdbscan_kwargs["cluster_selection_epsilon"] = 0.63
self.hdbscan_kwargs = hdbscan_kwargs

self.mapper = ElM2D(target=self.dist_device) # type: ignore
Expand Down Expand Up @@ -1183,6 +1194,10 @@ def umap_fit_cluster(self, dm, metric="precomputed", random_state=None):
self.umap_cluster_kwargs["random_state"] = random_state
if metric != "precomputed":
self.umap_cluster_kwargs["metric"] = metric
self.umap_cluster_kwargs["densmap"] = True
self.umap_cluster_kwargs["output_dens"] = True
self.umap_cluster_kwargs["dens_lambda"] = self.dens_lambda
self.umap_cluster_kwargs["low_memory"] = False
with self.Timer("fit-UMAP"):
umap_trans = umap.UMAP(**self.umap_cluster_kwargs).fit(dm)
return umap_trans
Expand Down Expand Up @@ -1215,6 +1230,10 @@ def umap_fit_vis(self, dm, random_state=None):
"""
if random_state is not None:
self.umap_vis_kwargs["random_state"] = random_state
self.umap_cluster_kwargs["densmap"] = True
self.umap_cluster_kwargs["output_dens"] = True
self.umap_cluster_kwargs["dens_lambda"] = self.dens_lambda
self.umap_cluster_kwargs["low_memory"] = False
with self.Timer("fit-vis-UMAP"):
std_trans = umap.UMAP(**self.umap_vis_kwargs).fit(dm)
return std_trans
Expand Down

0 comments on commit f42a964

Please sign in to comment.