piskvorky · piskvorky · Apr 29, 2023 · Mar 14, 2023
diff --git a/docs/notebooks/Word2Vec_FastText_Comparison.ipynb b/docs/notebooks/Word2Vec_FastText_Comparison.ipynb
@@ -40,7 +40,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "--2019-05-12 19:40:14--  http://mattmahoney.net/dc/enwik9.zip\n",
+      "--2019-05-12 19:40:14--  https://mattmahoney.net/dc/enwik9.zip\n",
       "Resolving mattmahoney.net (mattmahoney.net)... 67.195.197.75\n",
       "Connecting to mattmahoney.net (mattmahoney.net)|67.195.197.75|:80... connected.\n",
       "HTTP request sent, awaiting response... 200 OK\n",
@@ -51,7 +51,7 @@
       "\n",
       "2019-05-12 19:50:17 (247 KB/s) - Connection closed at byte 152553031. Retrying.\n",
       "\n",
-      "--2019-05-12 19:50:18--  (try: 2)  http://mattmahoney.net/dc/enwik9.zip\n",
+      "--2019-05-12 19:50:18--  (try: 2)  https://mattmahoney.net/dc/enwik9.zip\n",
       "Connecting to mattmahoney.net (mattmahoney.net)|67.195.197.75|:80... connected.\n",
       "HTTP request sent, awaiting response... 206 Partial Content\n",
       "Length: 322592222 (308M), 170039191 (162M) remaining [application/zip]\n",
@@ -83,11 +83,11 @@
     "# download the text8 corpus (a 100 MB sample of cleaned wikipedia text)\n",
     "import os.path\n",
     "if not os.path.isfile('text8'):\n",
-    "    !wget -c http://mattmahoney.net/dc/text8.zip\n",
+    "    !wget -c https://mattmahoney.net/dc/text8.zip\n",
     "    !unzip text8.zip\n",
     "# download and preprocess the text9 corpus\n",
     "if not os.path.isfile('text9'):\n",
-    "  !wget -c http://mattmahoney.net/dc/enwik9.zip\n",
+    "  !wget -c https://mattmahoney.net/dc/enwik9.zip\n",
     "  !unzip enwik9.zip\n",
     "  !perl {FT_HOME}wikifil.pl enwik9 > text9"
    ]

diff --git a/docs/notebooks/Wordrank_comparisons.ipynb b/docs/notebooks/Wordrank_comparisons.ipynb
@@ -62,7 +62,7 @@
     "# download the text8 corpus (a 100 MB sample of preprocessed wikipedia text)\n",
     "import os.path\n",
     "if not os.path.isfile('text8'):\n",
-    "    !wget -c http://mattmahoney.net/dc/text8.zip\n",
+    "    !wget -c https://mattmahoney.net/dc/text8.zip\n",
     "    !unzip text8.zip"
    ]
   },

diff --git a/docs/notebooks/downloader_api_tutorial.ipynb b/docs/notebooks/downloader_api_tutorial.ipynb
@@ -328,7 +328,7 @@
       "            \"parts\": 3\n",
       "        }, \n",
       "        \"text8\": {\n",
-      "            \"source\": \"http://mattmahoney.net/dc/text8.zip\", \n",
+      "            \"source\": \"https://mattmahoney.net/dc/text8.zip\", \n",
       "            \"checksum\": \"68799af40b6bda07dfa47a32612e5364\", \n",
       "            \"parts\": 1, \n",
       "            \"description\": \"Cleaned small sample from wikipedia\", \n",

diff --git a/docs/notebooks/nmslibtutorial.ipynb b/docs/notebooks/nmslibtutorial.ipynb
@@ -87,7 +87,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "--2019-06-27 13:48:42--  http://mattmahoney.net/dc/text8.zip\n",
+      "--2019-06-27 13:48:42--  https://mattmahoney.net/dc/text8.zip\n",
       "Resolving mattmahoney.net... 67.195.197.75\n",
       "Connecting to mattmahoney.net|67.195.197.75|:80... connected.\n",
       "HTTP request sent, awaiting response... 200 OK\n",
@@ -106,7 +106,7 @@
    "source": [
     "import os.path\n",
     "if not os.path.isfile('text8'):\n",
-    "    !wget -c http://mattmahoney.net/dc/text8.zip\n",
+    "    !wget -c https://mattmahoney.net/dc/text8.zip\n",
     "    !unzip text8.zip"
    ]
   },

diff --git a/docs/src/auto_examples/howtos/run_downloader_api.rst b/docs/src/auto_examples/howtos/run_downloader_api.rst
@@ -335,7 +335,7 @@ Here's how to list all resources available in gensim-data:
                 "checksum": "68799af40b6bda07dfa47a32612e5364",
                 "file_name": "text8.gz",
                 "read_more": [
-                    "http://mattmahoney.net/dc/textdata.html"
+                    "https://mattmahoney.net/dc/textdata.html"
                 ],
                 "parts": 1
             },

diff --git a/gensim/downloader.py b/gensim/downloader.py
@@ -260,7 +260,7 @@ def info(name=None, show_only_latest=True, name_only=False):
          u'description': u'Cleaned small sample from wikipedia',
          u'file_name': u'text8.gz',
          u'parts': 1,
-         u'source': u'http://mattmahoney.net/dc/text8.zip'}
+         u'source': u'https://mattmahoney.net/dc/text8.zip'}
         >>>
         >>> api.info()  # retrieve information about all available datasets and models
 

diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py
@@ -2041,7 +2041,7 @@ def __iter__(self):
 
 class Text8Corpus:
     def __init__(self, fname, max_sentence_length=MAX_WORDS_IN_BATCH):
-        """Iterate over sentences from the "text8" corpus, unzipped from http://mattmahoney.net/dc/text8.zip."""
+        """Iterate over sentences from the "text8" corpus, unzipped from https://mattmahoney.net/dc/text8.zip."""
         self.fname = fname
         self.max_sentence_length = max_sentence_length
 

diff --git a/gensim/scripts/benchmark.py b/gensim/scripts/benchmark.py
@@ -30,6 +30,6 @@
         print(globals()['__doc__'] % locals())
         sys.exit(1)
 
-    corpus = Text8Corpus(sys.argv[1])  # text8/text9 format from http://mattmahoney.net/dc/textdata.html
+    corpus = Text8Corpus(sys.argv[1])  # text8/text9 format from https://mattmahoney.net/dc/textdata.html
     cls = FastText
     cls(corpus, workers=12, epochs=1).save(f'/tmp/{cls.__name__}.gensim{__version__}')