piskvorky · mpenkov · Sep 8, 2022 · Mar 4, 2022 · Mar 31, 2022 · Apr 1, 2022
diff --git a/docs/src/auto_examples/core/images/sphx_glr_run_topics_and_transformations_001.png b/docs/src/auto_examples/core/images/sphx_glr_run_topics_and_transformations_001.png
diff --git a/...to_examples/core/images/thumb/sphx_glr_run_topics_and_transformations_thumb.png b/...to_examples/core/images/thumb/sphx_glr_run_topics_and_transformations_thumb.png
diff --git a/docs/src/auto_examples/core/run_topics_and_transformations.ipynb b/docs/src/auto_examples/core/run_topics_and_transformations.ipynb
diff --git a/docs/src/auto_examples/core/run_topics_and_transformations.py b/docs/src/auto_examples/core/run_topics_and_transformations.py
@@ -188,6 +188,20 @@
 #
 #     model = models.TfidfModel(corpus, normalize=True)
 #
+# * `Okapi Best Matching, Okapi BM25 <https://en.wikipedia.org/wiki/Okapi_BM25>`_
+#   expects a bag-of-words (integer values) training corpus during initialization.
+#   During transformation, it will take a vector and return another vector of the
+#   same dimensionality, except that features which were rare in the training corpus
+#   will have their value increased. It therefore converts integer-valued
+#   vectors into real-valued ones, while leaving the number of dimensions intact.
+#
+#   Okapi BM25 is the standard ranking function used by search engines to estimate
+#   the relevance of documents to a given search query.
+#
+#  .. sourcecode:: pycon
+#
+#     model = models.OkapiBM25Model(corpus)
+#
 # * `Latent Semantic Indexing, LSI (or sometimes LSA) <http://en.wikipedia.org/wiki/Latent_semantic_indexing>`_
 #   transforms documents from either bag-of-words or (preferrably) TfIdf-weighted space into
 #   a latent space of a lower dimensionality. For the toy corpus above we used only

diff --git a/docs/src/auto_examples/core/run_topics_and_transformations.py.md5 b/docs/src/auto_examples/core/run_topics_and_transformations.py.md5
@@ -1 +1 @@
-f49c3821bbacdeefdf3945d5dcb5ad01
+226db24f9e807e4bbd2a6ef280a75510
diff --git a/docs/src/auto_examples/core/run_topics_and_transformations.rst b/docs/src/auto_examples/core/run_topics_and_transformations.rst
diff --git a/docs/src/auto_examples/core/sg_execution_times.rst b/docs/src/auto_examples/core/sg_execution_times.rst
@@ -5,14 +5,14 @@
 
 Computation times
 =================
-**00:05.212** total execution time for **auto_examples_core** files:
+**00:01.658** total execution time for **auto_examples_core** files:
 
 +--------------------------------------------------------------------------------------------------------------+-----------+---------+
-| :ref:`sphx_glr_auto_examples_core_run_corpora_and_vector_spaces.py` (``run_corpora_and_vector_spaces.py``)   | 00:05.212 | 47.2 MB |
+| :ref:`sphx_glr_auto_examples_core_run_topics_and_transformations.py` (``run_topics_and_transformations.py``) | 00:01.658 | 58.1 MB |
 +--------------------------------------------------------------------------------------------------------------+-----------+---------+
 | :ref:`sphx_glr_auto_examples_core_run_core_concepts.py` (``run_core_concepts.py``)                           | 00:00.000 | 0.0 MB  |
 +--------------------------------------------------------------------------------------------------------------+-----------+---------+
-| :ref:`sphx_glr_auto_examples_core_run_similarity_queries.py` (``run_similarity_queries.py``)                 | 00:00.000 | 0.0 MB  |
+| :ref:`sphx_glr_auto_examples_core_run_corpora_and_vector_spaces.py` (``run_corpora_and_vector_spaces.py``)   | 00:00.000 | 0.0 MB  |
 +--------------------------------------------------------------------------------------------------------------+-----------+---------+
-| :ref:`sphx_glr_auto_examples_core_run_topics_and_transformations.py` (``run_topics_and_transformations.py``) | 00:00.000 | 0.0 MB  |
+| :ref:`sphx_glr_auto_examples_core_run_similarity_queries.py` (``run_similarity_queries.py``)                 | 00:00.000 | 0.0 MB  |
 +--------------------------------------------------------------------------------------------------------------+-----------+---------+
diff --git a/docs/src/auto_examples/index.rst b/docs/src/auto_examples/index.rst
@@ -220,7 +220,7 @@ Learning-oriented lessons that introduce a particular gensim feature, e.g. a mod
 
 .. raw:: html
 
-    <div class="sphx-glr-thumbcontainer" tooltip="Demonstrates using Gensim&#x27;s implemenation of the WMD.">
+    <div class="sphx-glr-thumbcontainer" tooltip="Demonstrates using Gensim&#x27;s implemenation of the SCM.">
 
 .. only:: html
 
@@ -237,7 +237,7 @@ Learning-oriented lessons that introduce a particular gensim feature, e.g. a mod
 
 .. raw:: html
 
-    <div class="sphx-glr-thumbcontainer" tooltip="Demonstrates using Gensim&#x27;s implemenation of the SCM.">
+    <div class="sphx-glr-thumbcontainer" tooltip="Demonstrates using Gensim&#x27;s implemenation of the WMD.">
 
 .. only:: html
 

diff --git a/docs/src/gallery/core/run_topics_and_transformations.py b/docs/src/gallery/core/run_topics_and_transformations.py
@@ -188,6 +188,20 @@
 #
 #     model = models.TfidfModel(corpus, normalize=True)
 #
+# * `Okapi Best Matching, Okapi BM25 <https://en.wikipedia.org/wiki/Okapi_BM25>`_
+#   expects a bag-of-words (integer values) training corpus during initialization.
+#   During transformation, it will take a vector and return another vector of the
+#   same dimensionality, except that features which were rare in the training corpus
+#   will have their value increased. It therefore converts integer-valued
+#   vectors into real-valued ones, while leaving the number of dimensions intact.
+#
+#   Okapi BM25 is the standard ranking function used by search engines to estimate
+#   the relevance of documents to a given search query.
+#
+#  .. sourcecode:: pycon
+#
+#     model = models.OkapiBM25Model(corpus)
+#
 # * `Latent Semantic Indexing, LSI (or sometimes LSA) <http://en.wikipedia.org/wiki/Latent_semantic_indexing>`_
 #   transforms documents from either bag-of-words or (preferrably) TfIdf-weighted space into
 #   a latent space of a lower dimensionality. For the toy corpus above we used only

diff --git a/gensim/models/__init__.py b/gensim/models/__init__.py
@@ -9,6 +9,7 @@
 from .ldamodel import LdaModel  # noqa:F401
 from .lsimodel import LsiModel  # noqa:F401
 from .tfidfmodel import TfidfModel  # noqa:F401
+from .bm25model import OkapiBM25Model, LuceneBM25Model, AtireBM25Model  # noqa:F401
 from .rpmodel import RpModel  # noqa:F401
 from .logentropy_model import LogEntropyModel  # noqa:F401
 from .word2vec import Word2Vec, FAST_VERSION  # noqa:F401