Skip to content
This repository has been archived by the owner on Nov 16, 2023. It is now read-only.

Commit

Permalink
Moved urls to module constants for pretrained embedding utils.
Browse files Browse the repository at this point in the history
  • Loading branch information
AbhiramE committed May 7, 2019
1 parent 90a1209 commit 65b76ff
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 9 deletions.
4 changes: 4 additions & 0 deletions utils_nlp/pretrained_embeddings/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

WORD2VEC_URL = 'https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz'
FASTTEXT_EN_URL = 'https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/wiki.en.zip'
GLOVE_URL = 'http://nlp.stanford.edu/data/glove.840B.300d.zip'
6 changes: 4 additions & 2 deletions utils_nlp/pretrained_embeddings/fasttext.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from gensim.models.fasttext import load_facebook_model

from utils_nlp.dataset.url_utils import maybe_download
from utils_nlp.pretrained_embeddings import FASTTEXT_EN_URL


def _extract_fasttext_vectors(zip_path, dest_path="."):
Expand Down Expand Up @@ -44,8 +45,9 @@ def _download_fasttext_vectors(download_dir, file_name="wiki.en.zip"):
str: file_path to the downloaded vectors.
"""

url = "https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/wiki.en.zip"
return maybe_download(url, filename=file_name, work_directory=download_dir)
return maybe_download(
FASTTEXT_EN_URL, filename=file_name, work_directory=download_dir
)


def _maybe_download_and_extract(dest_path, file_name):
Expand Down
4 changes: 2 additions & 2 deletions utils_nlp/pretrained_embeddings/glove.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from gensim.test.utils import get_tmpfile

from utils_nlp.dataset.url_utils import maybe_download
from utils_nlp.pretrained_embeddings import GLOVE_URL


def _extract_glove_vectors(zip_path, dest_path="."):
Expand Down Expand Up @@ -46,8 +47,7 @@ def _download_glove_vectors(download_dir, file_name="glove.840B.300d.zip"):
str: file_path to the downloaded vectors.
"""

url = "http://nlp.stanford.edu/data/glove.840B.300d.zip"
return maybe_download(url, filename=file_name, work_directory=download_dir)
return maybe_download(GLOVE_URL, filename=file_name, work_directory=download_dir)


def _maybe_download_and_extract(dest_path, file_name):
Expand Down
7 changes: 2 additions & 5 deletions utils_nlp/pretrained_embeddings/word2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from gensim.models.keyedvectors import KeyedVectors

from utils_nlp.dataset.url_utils import maybe_download
from utils_nlp.pretrained_embeddings import WORD2VEC_URL


def _extract_word2vec_vectors(zip_path, dest_filepath):
Expand Down Expand Up @@ -44,11 +45,7 @@ def _download_word2vec_vectors(
str: file_path to the downloaded vectors.
"""

url = (
"https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300"
".bin.gz "
)
return maybe_download(url, filename=file_name, work_directory=download_dir)
return maybe_download(WORD2VEC_URL, filename=file_name, work_directory=download_dir)


def _maybe_download_and_extract(dest_path, file_name):
Expand Down

0 comments on commit 65b76ff

Please sign in to comment.