piskvorky · menshikh-iv · Feb 16, 2018 · Dec 27, 2017 · Jan 15, 2018 · Jan 15, 2018
diff --git a/gensim/models/utils_any2vec.py b/gensim/models/utils_any2vec.py
@@ -109,9 +109,10 @@ def _save_word2vec_format(fname, vocab, vectors, fvocab=None, binary=False, tota
             for word, vocab_ in sorted(iteritems(vocab), key=lambda item: -item[1].count):
                 row = vectors[vocab_.index]
                 if binary:
+                    row = row.astype(REAL)
                     fout.write(utils.to_utf8(word) + b" " + row.tostring())
                 else:
-                    fout.write(utils.to_utf8("%s %s\n" % (word, ' '.join("%f" % val for val in row))))
+                    fout.write(utils.to_utf8("%s %s\n" % (word, ' '.join(repr(val) for val in row))))
 
 
 def _load_word2vec_format(cls, fname, fvocab=None, binary=False, encoding='utf8', unicode_errors='strict',
@@ -205,7 +206,7 @@ def add_word(word, weights):
                     if ch != b'\n':  # ignore newlines in front of words (some binary files have)
                         word.append(ch)
                 word = utils.to_unicode(b''.join(word), encoding=encoding, errors=unicode_errors)
-                weights = fromstring(fin.read(binary_len), dtype=REAL)
+                weights = fromstring(fin.read(binary_len), dtype=REAL).astype(datatype)
                 add_word(word, weights)
         else:
             for line_no in xrange(vocab_size):
@@ -215,7 +216,7 @@ def add_word(word, weights):
                 parts = utils.to_unicode(line.rstrip(), encoding=encoding, errors=unicode_errors).split(" ")
                 if len(parts) != vector_size + 1:
                     raise ValueError("invalid vector on line %s (is this really the text format?)" % line_no)
-                word, weights = parts[0], [REAL(x) for x in parts[1:]]
+                word, weights = parts[0], [datatype(x) for x in parts[1:]]
                 add_word(word, weights)
     if result.vectors.shape[0] != len(result.vocab):
         logger.info(

diff --git a/gensim/test/test_data/high_precision.kv.bin b/gensim/test/test_data/high_precision.kv.bin
diff --git a/gensim/test/test_data/high_precision.kv.txt b/gensim/test/test_data/high_precision.kv.txt
@@ -0,0 +1,3 @@
+2 2
+kangaroo.n.01 -0.0007369244245224787 -8.269973595356034e-05
+horse.n.01 -0.0008546282343595379 0.0007694142576316829
diff --git a/gensim/test/test_datatype.py b/gensim/test/test_datatype.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
+
+"""
+Automated tests for checking various matutils functions.
+"""
+
+import logging
+import unittest
+
+import numpy as np
+
+from gensim.test.utils import datapath
+from gensim.models.keyedvectors import KeyedVectors
+
+
+class TestDataType(unittest.TestCase):
+    def load_model(self, datatype):
+        path = datapath('high_precision.kv.txt')
+        kv = KeyedVectors.load_word2vec_format(path, binary=False,
+                                               datatype=datatype)
+        return kv
+
+    def test_high_precision(self):
+        kv = self.load_model(np.float64)
+        self.assertAlmostEqual(kv['horse.n.01'][0], -0.0008546282343595379)
+        self.assertEqual(kv['horse.n.01'][0].dtype, np.float64)
+
+    def test_medium_precision(self):
+        kv = self.load_model(np.float32)
+        self.assertAlmostEqual(kv['horse.n.01'][0], -0.00085462822)
+        self.assertEqual(kv['horse.n.01'][0].dtype, np.float32)
+
+    def test_low_precision(self):
+        kv = self.load_model(np.float16)
+        self.assertAlmostEqual(kv['horse.n.01'][0], -0.00085449)
+        self.assertEqual(kv['horse.n.01'][0].dtype, np.float16)
+
+    def test_type_conversion(self):
+        path = datapath('high_precision.kv.txt')
+        binary_path = datapath('high_precision.kv.bin')
+        model1 = KeyedVectors.load_word2vec_format(path, datatype=np.float16)
+        model1.save_word2vec_format(binary_path, binary=True)
+        model2 = KeyedVectors.load_word2vec_format(binary_path, datatype=np.float64, binary=True)
+        self.assertAlmostEqual(model1["horse.n.01"][0], np.float16(model2["horse.n.01"][0]))
+        self.assertEqual(model1["horse.n.01"][0].dtype, np.float16)
+        self.assertEqual(model2["horse.n.01"][0].dtype, np.float64)
+
+
+if __name__ == '__main__':
+    logging.root.setLevel(logging.WARNING)
+    unittest.main()