piskvorky · menshikh-iv · Feb 16, 2018 · Dec 27, 2017 · Jan 15, 2018 · Jan 15, 2018
diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py
@@ -147,9 +147,10 @@ def save_word2vec_format(self, fname, fvocab=None, binary=False, total_vec=None)
             for word, vocab in sorted(iteritems(self.vocab), key=lambda item: -item[1].count):
                 row = self.syn0[vocab.index]
                 if binary:
+                    row = row.astype(REAL)
                     fout.write(utils.to_utf8(word) + b" " + row.tostring())
                 else:
-                    fout.write(utils.to_utf8("%s %s\n" % (word, ' '.join("%f" % val for val in row))))
+                    fout.write(utils.to_utf8("%s %s\n" % (word, ' '.join(repr(val) for val in row))))
 
     @classmethod
     def load_word2vec_format(cls, fname, fvocab=None, binary=False, encoding='utf8', unicode_errors='strict',
@@ -233,7 +234,7 @@ def add_word(word, weights):
                         if ch != b'\n':  # ignore newlines in front of words (some binary files have)
                             word.append(ch)
                     word = utils.to_unicode(b''.join(word), encoding=encoding, errors=unicode_errors)
-                    weights = fromstring(fin.read(binary_len), dtype=REAL)
+                    weights = fromstring(fin.read(binary_len), dtype=REAL).astype(datatype)
                     add_word(word, weights)
             else:
                 for line_no in xrange(vocab_size):
@@ -243,7 +244,7 @@ def add_word(word, weights):
                     parts = utils.to_unicode(line.rstrip(), encoding=encoding, errors=unicode_errors).split(" ")
                     if len(parts) != vector_size + 1:
                         raise ValueError("invalid vector on line %s (is this really the text format?)" % line_no)
-                    word, weights = parts[0], [REAL(x) for x in parts[1:]]
+                    word, weights = parts[0], [datatype(x) for x in parts[1:]]
                     add_word(word, weights)
         if result.syn0.shape[0] != len(result.vocab):
             logger.info(

diff --git a/gensim/test/test_data/test.kv.txt b/gensim/test/test_data/test.kv.txt
@@ -0,0 +1,3 @@
+2 2
+kangaroo.n.01 -0.0007369244245224787 -8.269973595356034e-05
+horse.n.01 -0.0008546282343595379 0.0007694142576316829
diff --git a/gensim/test/test_datatype.py b/gensim/test/test_datatype.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
+
+"""
+Automated tests for checking various matutils functions.
+"""
+
+import logging
+import unittest
+
+import numpy as np
+
+from gensim.test.utils import datapath
+from gensim.models.keyedvectors import KeyedVectors
+
+
+class TestDataType(unittest.TestCase):
+    def test_text(self):
+        path = datapath('test.kv.txt')
+        kv = KeyedVectors.load_word2vec_format(path, binary=False,
+                                               datatype=np.float64)
+        self.assertAlmostEqual(kv['horse.n.01'][0], -0.0008546282343595379)
+        self.assertEqual(kv['horse.n.01'][0].dtype, np.float64)
+
+
+if __name__ == '__main__':
+    logging.root.setLevel(logging.WARNING)
+    unittest.main()