From f2ea3609c4bd1d48b1dddff0c9c0c44649cfd02c Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 15 Feb 2024 17:34:36 -0700
Subject: [PATCH] Fix dreambooth data sampler issue (#8400) (#8413)

* Turn on drop last


* Some neva fixes


* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com>
Co-authored-by: yaoyu-33 <54727607+yaoyu-33@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Signed-off-by: ataghibakhsh <ataghibakhsh@nvidia.com>
---
 .../multimodal/models/text_to_image/dreambooth/dreambooth.py   | 2 +-
 nemo/collections/multimodal/parts/utils.py                     | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/nemo/collections/multimodal/models/text_to_image/dreambooth/dreambooth.py b/nemo/collections/multimodal/models/text_to_image/dreambooth/dreambooth.py
index ce82da9bd171..704f8b39371a 100644
--- a/nemo/collections/multimodal/models/text_to_image/dreambooth/dreambooth.py
+++ b/nemo/collections/multimodal/models/text_to_image/dreambooth/dreambooth.py
@@ -487,7 +487,7 @@ def setup_training_data(self, cfg):
             global_batch_size=self.cfg.global_batch_size,
             data_parallel_rank=parallel_state.get_data_parallel_rank(),
             data_parallel_size=parallel_state.get_data_parallel_world_size(),
-            drop_last=False,
+            drop_last=True,
         )
 
         self._train_dl = torch.utils.data.DataLoader(
diff --git a/nemo/collections/multimodal/parts/utils.py b/nemo/collections/multimodal/parts/utils.py
index c82e0cd37140..4d4e952db0ce 100644
--- a/nemo/collections/multimodal/parts/utils.py
+++ b/nemo/collections/multimodal/parts/utils.py
@@ -22,9 +22,11 @@
 from pytorch_lightning.plugins.environments import TorchElasticEnvironment
 from transformers import CLIPImageProcessor
 
+from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel
 from nemo.collections.nlp.parts.nlp_overrides import NLPDDPStrategy, NLPSaveRestoreConnector
 from nemo.collections.nlp.parts.peft_config import PEFT_CONFIG_MAP
 from nemo.utils import AppState, logging
+from nemo.utils.model_utils import inject_model_parallel_rank
 
 try:
     from megatron.core import dist_checkpointing
@@ -361,6 +363,7 @@ def create_neva_model_and_processor(cfg):
             neva_cfg.activations_checkpoint_method = None
             neva_cfg.precision = trainer.precision
             neva_cfg.mm_cfg.llm.from_pretrained = cfg.get('base_model_file', None)
+            neva_cfg.apply_rope_fusion = False
         #    neva_cfg.mm_cfg.vision_encoder.from_pretrained = None
 
         model = MegatronNevaModel.restore_from(