From 78c7194612c132d3f92839b595e50fc7ee09793f Mon Sep 17 00:00:00 2001 From: Alex <116374290+aaalexlit@users.noreply.github.com> Date: Sun, 21 May 2023 16:04:04 +0200 Subject: [PATCH 1/6] Update modeling_open_llama.py Fix typo in `use_memorry_efficient_attention` parameter name --- src/transformers/models/open_llama/modeling_open_llama.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/transformers/models/open_llama/modeling_open_llama.py b/src/transformers/models/open_llama/modeling_open_llama.py index a88ba62056d454..7e4f374099f8f3 100644 --- a/src/transformers/models/open_llama/modeling_open_llama.py +++ b/src/transformers/models/open_llama/modeling_open_llama.py @@ -40,7 +40,7 @@ except ImportError: xops = None logger.warn( - "Xformers is not installed correctly. If you want to use memorry_efficient_attention to accelerate training use the following command to install Xformers\npip install xformers." + "Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers\npip install xformers." ) @@ -226,7 +226,7 @@ def forward( past_key_value = (key_states, value_states) if use_cache else None - if self.config.use_memorry_efficient_attention and xops is not None and self.training: + if self.config.use_memory_efficient_attention and xops is not None and self.training: attn_weights = None query_states = query_states.transpose(1, 2) key_states = key_states.transpose(1, 2) @@ -564,7 +564,7 @@ def forward( if self.embed_layer_norm: inputs_embeds = self.embed_layer_norm(inputs_embeds) # embed positions - if self.config.use_memorry_efficient_attention and self.training: + if self.config.use_memory_efficient_attention and self.training: attention_mask = None elif attention_mask is None: attention_mask = torch.ones( From e9779820cd96ea45216f7c03582d438a1ea99970 Mon Sep 17 00:00:00 2001 From: Alex <116374290+aaalexlit@users.noreply.github.com> Date: Sun, 21 May 2023 16:05:03 +0200 Subject: [PATCH 2/6] Update configuration_open_llama.py Fix typo in `use_memorry_efficient_attention` parameter name --- .../models/open_llama/configuration_open_llama.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/open_llama/configuration_open_llama.py b/src/transformers/models/open_llama/configuration_open_llama.py index c202082b553631..27f460e224c304 100644 --- a/src/transformers/models/open_llama/configuration_open_llama.py +++ b/src/transformers/models/open_llama/configuration_open_llama.py @@ -99,7 +99,7 @@ def __init__( bos_token_id=1, eos_token_id=2, tie_word_embeddings=False, - use_memorry_efficient_attention=True, + use_memory_efficient_attention=True, hidden_dropout_prob=0.1, attention_dropout_prob=0.1, use_stable_embedding=True, @@ -116,7 +116,7 @@ def __init__( self.initializer_range = initializer_range self.rms_norm_eps = rms_norm_eps self.use_cache = use_cache - self.use_memorry_efficient_attention = use_memorry_efficient_attention + self.use_memory_efficient_attention = use_memory_efficient_attention self.hidden_dropout_prob = hidden_dropout_prob self.attention_dropout_prob = attention_dropout_prob self.use_stable_embedding = use_stable_embedding From e7ea522770b7376e7b99ed03d22051d2d59e3e34 Mon Sep 17 00:00:00 2001 From: Alex <116374290+aaalexlit@users.noreply.github.com> Date: Mon, 22 May 2023 21:22:14 +0200 Subject: [PATCH 3/6] Update configuration_open_llama.py Take care of backwards compatibility ensuring that the previous parameter name is taken into account if used --- src/transformers/models/open_llama/configuration_open_llama.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/open_llama/configuration_open_llama.py b/src/transformers/models/open_llama/configuration_open_llama.py index 27f460e224c304..0b4eaed8ded89f 100644 --- a/src/transformers/models/open_llama/configuration_open_llama.py +++ b/src/transformers/models/open_llama/configuration_open_llama.py @@ -116,7 +116,7 @@ def __init__( self.initializer_range = initializer_range self.rms_norm_eps = rms_norm_eps self.use_cache = use_cache - self.use_memory_efficient_attention = use_memory_efficient_attention + self.use_memory_efficient_attention = kwargs['use_memorry_efficient_attention'] if 'use_memorry_efficient_attention' in kwargs else use_memory_efficient_attention self.hidden_dropout_prob = hidden_dropout_prob self.attention_dropout_prob = attention_dropout_prob self.use_stable_embedding = use_stable_embedding From 3ddd4c5b739abb3deb738e2e0062645e3ff9ff3c Mon Sep 17 00:00:00 2001 From: Alex <116374290+aaalexlit@users.noreply.github.com> Date: Mon, 22 May 2023 21:35:09 +0200 Subject: [PATCH 4/6] Update configuration_open_llama.py format to adjust the line length --- src/transformers/models/open_llama/configuration_open_llama.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/transformers/models/open_llama/configuration_open_llama.py b/src/transformers/models/open_llama/configuration_open_llama.py index 0b4eaed8ded89f..c1ee8fb72cea7c 100644 --- a/src/transformers/models/open_llama/configuration_open_llama.py +++ b/src/transformers/models/open_llama/configuration_open_llama.py @@ -116,7 +116,8 @@ def __init__( self.initializer_range = initializer_range self.rms_norm_eps = rms_norm_eps self.use_cache = use_cache - self.use_memory_efficient_attention = kwargs['use_memorry_efficient_attention'] if 'use_memorry_efficient_attention' in kwargs else use_memory_efficient_attention + self.use_memory_efficient_attention = kwargs['use_memorry_efficient_attention'] \ + if 'use_memorry_efficient_attention' in kwargs else use_memory_efficient_attention self.hidden_dropout_prob = hidden_dropout_prob self.attention_dropout_prob = attention_dropout_prob self.use_stable_embedding = use_stable_embedding From 2f616fb8ef7bb150199945cd0de8bdd95408a8aa Mon Sep 17 00:00:00 2001 From: Alex <116374290+aaalexlit@users.noreply.github.com> Date: Tue, 23 May 2023 07:02:42 +0200 Subject: [PATCH 5/6] Update configuration_open_llama.py proper code formatting using `make fixup` --- .../models/open_llama/configuration_open_llama.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/open_llama/configuration_open_llama.py b/src/transformers/models/open_llama/configuration_open_llama.py index c1ee8fb72cea7c..ec41496baf4cca 100644 --- a/src/transformers/models/open_llama/configuration_open_llama.py +++ b/src/transformers/models/open_llama/configuration_open_llama.py @@ -116,8 +116,10 @@ def __init__( self.initializer_range = initializer_range self.rms_norm_eps = rms_norm_eps self.use_cache = use_cache - self.use_memory_efficient_attention = kwargs['use_memorry_efficient_attention'] \ - if 'use_memorry_efficient_attention' in kwargs else use_memory_efficient_attention + if "use_memorry_efficient_attention" in kwargs: + self.use_memory_efficient_attention = kwargs["use_memorry_efficient_attention"] + else: + self.use_memory_efficient_attention = use_memory_efficient_attention self.hidden_dropout_prob = hidden_dropout_prob self.attention_dropout_prob = attention_dropout_prob self.use_stable_embedding = use_stable_embedding From 33738224439c2adaa943a4731a4d1798898e5238 Mon Sep 17 00:00:00 2001 From: Alex <116374290+aaalexlit@users.noreply.github.com> Date: Tue, 23 May 2023 13:34:29 +0200 Subject: [PATCH 6/6] Update configuration_open_llama.py pop the argument not to let it be set later down the line --- .../models/open_llama/configuration_open_llama.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/transformers/models/open_llama/configuration_open_llama.py b/src/transformers/models/open_llama/configuration_open_llama.py index ec41496baf4cca..cbde4d67d498a7 100644 --- a/src/transformers/models/open_llama/configuration_open_llama.py +++ b/src/transformers/models/open_llama/configuration_open_llama.py @@ -116,10 +116,9 @@ def __init__( self.initializer_range = initializer_range self.rms_norm_eps = rms_norm_eps self.use_cache = use_cache - if "use_memorry_efficient_attention" in kwargs: - self.use_memory_efficient_attention = kwargs["use_memorry_efficient_attention"] - else: - self.use_memory_efficient_attention = use_memory_efficient_attention + self.use_memory_efficient_attention = kwargs.pop( + "use_memorry_efficient_attention", use_memory_efficient_attention + ) self.hidden_dropout_prob = hidden_dropout_prob self.attention_dropout_prob = attention_dropout_prob self.use_stable_embedding = use_stable_embedding