quic · quic-ristha · Sep 3, 2024 · Jul 25, 2024 · Aug 30, 2024
diff --git a/TrainingExtensions/torch/src/python/aimet_torch/quantsim.py b/TrainingExtensions/torch/src/python/aimet_torch/quantsim.py
@@ -49,6 +49,7 @@
 import torch
 import onnx
 from packaging import version  # pylint: disable=wrong-import-order
+from safetensors.numpy import save_file as save_safetensor_file
 
 import aimet_common.libpymo as libpymo
 from aimet_common import quantsim
@@ -648,6 +649,26 @@ def export_onnx_model_and_encodings(path: str, filename_prefix: str, original_mo
                                                         excluded_layer_names, propagate_encodings,
                                                         quantizer_args=quantizer_args)
 
+    def export_weights_to_safetensors(self, path: str, filename_prefix: str):
+        """
+        Exports the updated weights in the safetensors format
+
+        :param path: Path to save file
+        :param filename_prefix: Filename to use for saved file
+        """
+
+        def to_numpy(tensor):
+            return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
+
+        # Save state dict in safetensors file
+        unwrapped_model = QuantizationSimModel.get_original_model(self.model)
+        data = unwrapped_model.state_dict()
+        data = {k: to_numpy(v) for k, v in data.items()}
+        metadata = self.model.mpp_meta if hasattr(self.model, 'mpp_meta') else {}
+
+        file_path = os.path.join(path, filename_prefix + '.safetensors')
+        save_safetensor_file(data, file_path, metadata)
+
     def save_encodings_to_json(self, path: str, filename_prefix: str):
         """
         Save encodings in the model to json.

diff --git a/TrainingExtensions/torch/test/python/test_quantizer.py b/TrainingExtensions/torch/test/python/test_quantizer.py
@@ -5212,6 +5212,19 @@ def forward(self, inp):
         closest_wrapper = qsim._get_closest_producer_wrapper(qsim.connected_graph.ordered_ops[1], module_to_quant_wrapper)
         assert closest_wrapper == qsim.model.permute
 
+
+def test_export_to_safetensors():
+    torch.manual_seed(0)
+    model = SmallMnistNoDropoutWithPassThrough()
+    model.eval()
+    dummy_data = torch.randn(1, 1, 32, 32)
+    sim = QuantizationSimModel(model, dummy_data)
+    sim.compute_encodings(lambda m, itr: m(dummy_data), None)
+    with tempfile.TemporaryDirectory() as tempDir:
+        sim.export_weights_to_safetensors(tempDir, 'sim_export')
+        assert(os.path.exists(os.path.join(tempDir, 'sim_export'+'.safetensors')))
+
+
 @pytest.mark.cuda
 @pytest.mark.parametrize('input_dims', (2, 3, 4))
 def test_fused_qdq_linear(input_dims):