From 814de8fac7456bd2ce50d1847505da829761bfdc Mon Sep 17 00:00:00 2001 From: Matt Date: Wed, 24 May 2023 17:03:24 +0100 Subject: [PATCH] Overhaul TF serving signatures + dummy inputs (#23234) * Let's try autodetecting serving sigs * Don't clobber existing sigs * Change shapes for multiplechoice models * Make default dummy inputs smarter too * Fix missing f-string * Let's YOLO a serving output too * Read __class__.__name__ properly * Don't just pass naked lists in there and expect it to be okay * Code cleanup * Update default serving sig * Clearer error messages * Further updates to the default serving output * make fixup * Update the serving output a bit more * Cleanups and renames, raise errors appropriately when we can't infer inputs * More renames * we're building in a functional context again, yolo * import DUMMY_INPUTS from the right place * import DUMMY_INPUTS from the right place * Support cross-attention in the dummies * Support cross-attention in the dummies * Complete removal of dummy/serving overrides in BERT * Complete removal of dummy/serving overrides in RoBERTa * Obliterate lots and lots of serving sig and dummy overrides * merge type hint changes * Fix for token_type_ids with vocab_size 1 * Add missing property decorator * Fix T5 and hopefully some models that take conv inputs * More signature pruning * Fix T5's signature * Fix Wav2Vec2 signature * Fix LongformerForMultipleChoice input signature * Fix BLIP and LED * Better default serving output error handling * Fix BART dummies * Fix dummies for cross-attention, esp encoder-decoder models * Fix visionencoderdecoder signature * Fix BLIP serving output * Small tweak to BART dummies * Cleanup the ugly parameter inspection line that I used in a few places * committed a breakpoint again * Move the text_dims check * Remove blip_text serving_output * Add decoder_input_ids to the default input sig * Remove all the manual overrides for encoder-decoder model signatures * Tweak longformer/led input sigs * Tweak default serving output * output.keys() -> output * make fixup --- src/transformers/modeling_tf_utils.py | 131 ++++++++--- .../models/albert/modeling_tf_albert.py | 85 ------- .../models/bart/modeling_tf_bart.py | 40 +--- .../models/bert/modeling_tf_bert.py | 126 ----------- .../blenderbot/modeling_tf_blenderbot.py | 29 --- .../modeling_tf_blenderbot_small.py | 29 --- .../models/blip/modeling_tf_blip.py | 212 ++---------------- .../models/blip/modeling_tf_blip_text.py | 68 +----- .../models/camembert/modeling_tf_camembert.py | 129 ----------- .../models/clip/modeling_tf_clip.py | 107 +-------- .../models/convbert/modeling_tf_convbert.py | 64 ------ .../models/convnext/modeling_tf_convnext.py | 51 +---- .../models/ctrl/modeling_tf_ctrl.py | 23 -- .../models/cvt/modeling_tf_cvt.py | 41 +--- .../data2vec/modeling_tf_data2vec_vision.py | 55 +---- .../models/deberta/modeling_tf_deberta.py | 32 --- .../deberta_v2/modeling_tf_deberta_v2.py | 32 --- .../models/deit/modeling_tf_deit.py | 73 +----- .../distilbert/modeling_tf_distilbert.py | 80 ------- .../models/dpr/modeling_tf_dpr.py | 37 --- .../models/electra/modeling_tf_electra.py | 108 +-------- .../modeling_tf_encoder_decoder.py | 41 ---- .../models/esm/modeling_tf_esm.py | 93 -------- .../models/flaubert/modeling_tf_flaubert.py | 58 ----- .../models/funnel/modeling_tf_funnel.py | 27 +-- .../models/gpt2/modeling_tf_gpt2.py | 110 +-------- .../models/gptj/modeling_tf_gptj.py | 61 ----- .../models/groupvit/modeling_tf_groupvit.py | 117 +--------- .../models/hubert/modeling_tf_hubert.py | 40 +--- .../models/layoutlm/modeling_tf_layoutlm.py | 47 ---- .../layoutlmv3/modeling_tf_layoutlmv3.py | 70 +----- .../models/led/modeling_tf_led.py | 31 +-- .../longformer/modeling_tf_longformer.py | 116 +--------- .../models/lxmert/modeling_tf_lxmert.py | 102 +++------ .../models/marian/modeling_tf_marian.py | 29 --- .../models/mbart/modeling_tf_mbart.py | 29 --- .../mobilebert/modeling_tf_mobilebert.py | 94 +------- .../models/mobilevit/modeling_tf_mobilevit.py | 49 ---- .../models/mpnet/modeling_tf_mpnet.py | 85 ------- .../models/openai/modeling_tf_openai.py | 61 +---- .../models/opt/modeling_tf_opt.py | 24 -- .../models/pegasus/modeling_tf_pegasus.py | 29 --- .../models/regnet/modeling_tf_regnet.py | 45 +--- .../models/rembert/modeling_tf_rembert.py | 111 --------- .../models/resnet/modeling_tf_resnet.py | 36 +-- .../models/roberta/modeling_tf_roberta.py | 129 ----------- .../modeling_tf_roberta_prelayernorm.py | 129 ----------- .../models/roformer/modeling_tf_roformer.py | 70 ------ .../models/segformer/modeling_tf_segformer.py | 52 +---- .../modeling_tf_speech_to_text.py | 48 ++-- .../models/swin/modeling_tf_swin.py | 51 ----- src/transformers/models/t5/modeling_tf_t5.py | 71 ------ .../models/tapas/modeling_tf_tapas.py | 53 +---- .../transfo_xl/modeling_tf_transfo_xl.py | 42 ---- .../modeling_tf_vision_encoder_decoder.py | 42 ++-- .../models/vit/modeling_tf_vit.py | 51 +---- .../models/vit_mae/modeling_tf_vit_mae.py | 56 +---- .../models/wav2vec2/modeling_tf_wav2vec2.py | 74 +----- .../models/whisper/modeling_tf_whisper.py | 19 +- .../models/xglm/modeling_tf_xglm.py | 61 ----- .../models/xlm/modeling_tf_xlm.py | 58 ----- .../xlm_roberta/modeling_tf_xlm_roberta.py | 129 ----------- .../models/xlnet/modeling_tf_xlnet.py | 77 ------- ...tf_{{cookiecutter.lowercase_modelname}}.py | 145 ------------ tests/test_modeling_tf_common.py | 5 +- 65 files changed, 275 insertions(+), 4144 deletions(-) diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index c2b0485b5f4c38..bac575e249df47 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -42,7 +42,6 @@ from .generation import GenerationConfig, TFGenerationMixin from .tf_utils import expand_1d, load_attributes_from_hdf5_group, save_attributes_to_hdf5_group, shape_list from .utils import ( - DUMMY_INPUTS, SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, TF2_WEIGHTS_INDEX_NAME, @@ -1114,9 +1113,25 @@ def dummy_inputs(self) -> Dict[str, tf.Tensor]: Returns: `Dict[str, tf.Tensor]`: The dummy inputs. """ - return { - "input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32), - } + dummies = {} + sig = self._prune_signature(self.input_signature) + for key, spec in sig.items(): + # 3 is the most correct arbitrary size. I will not be taking questions + dummies[key] = tf.ones(shape=[dim if dim is not None else 3 for dim in spec.shape], dtype=spec.dtype) + if key == "token_type_ids": + # Some models have token_type_ids but with a vocab_size of 1 + dummies[key] = tf.zeros_like(dummies[key]) + if self.config.add_cross_attention and "encoder_hidden_states" in inspect.signature(self.call).parameters: + if "encoder_hidden_states" not in dummies: + if self.main_input_name == "input_ids": + dummies["encoder_hidden_states"] = tf.ones( + shape=(3, 3, self.config.hidden_size), dtype=tf.float32, name="encoder_hidden_states" + ) + else: + raise NotImplementedError( + "Model has cross-attention but we couldn't infer the shape for the encoder hidden states. Please manually override dummy_inputs!" + ) + return dummies @property def framework(self) -> str: @@ -1137,6 +1152,10 @@ def __init__(self, config, *inputs, **kwargs): self.config = config self.name_or_path = config.name_or_path self.generation_config = GenerationConfig.from_model_config(config) if self.can_generate() else None + if not hasattr(self, "serving"): # Don't overwrite existing serving signatures + self.serving = tf.function( + self.eager_serving, input_signature=[self._prune_signature(self.input_signature)] + ) # Set the serving spec quickly to ensure that Keras doesn't use the specific dummy input shapes as the spec self._set_save_spec(self.serving.input_signature[0]) @@ -1201,36 +1220,82 @@ def eager_serving(self, inputs): return self.serving_output(output) - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"), - } - ] - ) - def serving(self, inputs): + @property + def input_signature(self) -> Dict[str, tf.TensorSpec]: """ - Method used for serving the model. - - Args: - inputs (`Dict[str, tf.Tensor]`): - The input of the saved model as a dictionary of tensors. + This property should return a dict mapping input names to tf.TensorSpec objects, representing the expected + shape and dtype for model inputs. It is used for both serving and for generating the dummy inputs used to build + the model. """ - output = self.call(inputs) + model_inputs = list(inspect.signature(self.call).parameters) + sig = {} + if "input_ids" in model_inputs: + if self.__class__.__name__.endswith("ForMultipleChoice"): + text_dims = 3 + else: + text_dims = 2 + for input_name in ( + "input_ids", + "attention_mask", + "token_type_ids", + "decoder_input_ids", + "decoder_attention_mask", + ): + if input_name in model_inputs: + sig[input_name] = tf.TensorSpec([None] * text_dims, tf.int32, name=input_name) + if "pixel_values" in model_inputs: + pixel_values_shape = [None, None, None, None] + if hasattr(self.config, "vision_config"): + vision_config = self.config.vision_config + else: + vision_config = self.config + if hasattr(vision_config, "num_channels"): + pixel_values_shape[1] = vision_config.num_channels + else: + raise NotImplementedError( + "Could not infer number of channels from config, please override input_signature to specify input shapes." + ) + if hasattr(vision_config, "image_size"): + pixel_values_shape[2] = pixel_values_shape[3] = vision_config.image_size + elif hasattr(vision_config, "input_size"): + pixel_values_shape[2] = pixel_values_shape[3] = vision_config.input_size + else: + raise NotImplementedError( + "Could not infer input image shape from config, please override input_signature to specify input shapes." + ) + sig["pixel_values"] = tf.TensorSpec(pixel_values_shape, tf.float32, name="pixel_values") + if "input_features" in model_inputs: + raise NotImplementedError("Audio models need a manually defined input_signature") + return sig - return self.serving_output(output) + def _prune_signature(self, signature): + """Keeps only the keys of a given input signature that are valid for this model.""" + model_inputs = list(inspect.signature(self.call).parameters) + return {key: val for key, val in signature.items() if key in model_inputs} def serving_output(self, output): """ - Prepare the output of the saved model. Each model must implement this function. - - Args: - output ([`TFBaseModelOutput`]): - The output returned by the model. - """ - raise NotImplementedError + Prepare the output of the saved model. Can be overridden if specific serving modifications are required. + """ + if not isinstance(output, ModelOutput): + return output + for key in output: + if key.endswith("hidden_states") and not getattr(self.config, "output_hidden_states", False): + output[key] = None + elif key.endswith("attentions") and not getattr(self.config, "output_attentions", False): + output[key] = None + elif key == "past_key_values" and not getattr(self.config, "use_cache", False): + output[key] = None + elif key == "cross_attentions" and not ( + getattr(self.config, "output_attentions", False) and getattr(self.config, "add_cross_attention", False) + ): + output[key] = None + if isinstance(output[key], (tuple, list)): + try: + output[key] = tf.convert_to_tensor(output[key]) + except (ValueError, tf.errors.InvalidArgumentError): + pass # Layers may not have the same dimensions + return output def can_generate(self) -> bool: """ @@ -1384,7 +1449,7 @@ def prepare_tf_dataset( if not isinstance(dataset, datasets.Dataset): raise TypeError("Dataset argument should be a datasets.Dataset!") - model_inputs = list(dict(inspect.signature(self.call).parameters).keys()) + model_inputs = list(inspect.signature(self.call).parameters) model_labels = find_labels(self.__class__) if "cols_to_retain" in list(inspect.signature(dataset._get_output_signature).parameters.keys()): output_signature, _ = dataset._get_output_signature( @@ -1496,7 +1561,7 @@ def compute_loss(self, *args, **kwargs): return self.hf_compute_loss(*args, **kwargs) def get_label_to_output_name_mapping(self): - arg_names = list(dict(inspect.signature(self.call).parameters).keys()) + arg_names = list(inspect.signature(self.call).parameters) if self._label_to_output_map is not None: return self._label_to_output_map elif "start_positions" in arg_names: @@ -1519,7 +1584,7 @@ def train_step(self, data): """ # We hardcode the most common renamings; models with weirder names can set `self._label_to_output_map` - arg_names = list(dict(inspect.signature(self.call).parameters).keys()) + arg_names = list(inspect.signature(self.call).parameters) label_kwargs = find_labels(self.__class__) label_to_output = self.get_label_to_output_name_mapping() output_to_label = {val: key for key, val in label_to_output.items()} @@ -1626,7 +1691,7 @@ def test_step(self, data): that they are available to the model during the forward pass. """ # We hardcode the most common renamings; models with weirder names can set `self._label_to_output_map` - arg_names = list(dict(inspect.signature(self.call).parameters).keys()) + arg_names = list(inspect.signature(self.call).parameters) label_kwargs = find_labels(self.__class__) label_to_output = self.get_label_to_output_name_mapping() output_to_label = {val: key for key, val in label_to_output.items()} @@ -1645,7 +1710,7 @@ def test_step(self, data): # When using a dummy loss, we ensure that separate labels are copied to the correct model arguments, # if those keys are not already present in the input dict if self._using_dummy_loss and y is not None: - arg_names = list(dict(inspect.signature(self.call).parameters).keys()) + arg_names = list(inspect.signature(self.call).parameters) # If y is a tensor and the model only has one label-like input, map y to that input if len(label_kwargs) == 1 and isinstance(y, tf.Tensor): if isinstance(x, tf.Tensor): diff --git a/src/transformers/models/albert/modeling_tf_albert.py b/src/transformers/models/albert/modeling_tf_albert.py index 57e2414e720d67..ad35b6182a4e21 100644 --- a/src/transformers/models/albert/modeling_tf_albert.py +++ b/src/transformers/models/albert/modeling_tf_albert.py @@ -49,7 +49,6 @@ ) from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( - MULTIPLE_CHOICE_DUMMY_INPUTS, ModelOutput, add_code_sample_docstrings, add_start_docstrings, @@ -826,17 +825,6 @@ def call( return outputs - def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBaseModelOutputWithPooling( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - hidden_states=hs, - attentions=attns, - ) - @add_start_docstrings( """ @@ -933,17 +921,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFAlbertForPreTrainingOutput) -> TFAlbertForPreTrainingOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFAlbertForPreTrainingOutput( - prediction_logits=output.prediction_logits, - sop_logits=output.sop_logits, - hidden_states=hs, - attentions=attns, - ) - class TFAlbertSOPHead(tf.keras.layers.Layer): def __init__(self, config: AlbertConfig, **kwargs): @@ -1058,13 +1035,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMaskedLM.serving_output - def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1147,13 +1117,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForSequenceClassification.serving_output - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1237,13 +1200,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForTokenClassification.serving_output - def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1339,15 +1295,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForQuestionAnswering.serving_output - def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFQuestionAnsweringModelOutput( - start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns - ) - @add_start_docstrings( """ @@ -1370,16 +1317,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs): units=1, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @property - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - tf.Tensor with dummy inputs - """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} - @unpack_inputs @add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( @@ -1457,25 +1394,3 @@ def call( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"), - } - ] - ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMultipleChoice.serving - def serving(self, inputs: Dict[str, tf.Tensor]) -> TFMultipleChoiceModelOutput: - output = self.call(input_ids=inputs) - - return self.serving_output(output) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMultipleChoice.serving_output - def serving_output(self, output: TFMultipleChoiceModelOutput) -> TFMultipleChoiceModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMultipleChoiceModelOutput(logits=output.logits, hidden_states=hs, attentions=attns) diff --git a/src/transformers/models/bart/modeling_tf_bart.py b/src/transformers/models/bart/modeling_tf_bart.py index 5690e022adaac2..e2555381f4bdd3 100644 --- a/src/transformers/models/bart/modeling_tf_bart.py +++ b/src/transformers/models/bart/modeling_tf_bart.py @@ -34,7 +34,6 @@ # Public API from ...modeling_tf_utils import ( - DUMMY_INPUTS, TFCausalLanguageModelingLoss, TFModelInputType, TFPreTrainedModel, @@ -487,31 +486,14 @@ class TFBartPretrainedModel(TFPreTrainedModel): @property def dummy_inputs(self): - pad_token = 1 - input_ids = tf.convert_to_tensor(DUMMY_INPUTS, dtype=tf.int32) - decoder_input_ids = tf.convert_to_tensor(DUMMY_INPUTS, dtype=tf.int32) - dummy_inputs = { - "decoder_input_ids": decoder_input_ids, - "attention_mask": tf.cast(input_ids != pad_token, tf.int32), - "input_ids": input_ids, - } + dummy_inputs = super().dummy_inputs + # Dummy inputs should not contain the default val of 1 + # as this is the padding token and some assertions check it + dummy_inputs["input_ids"] = dummy_inputs["input_ids"] * 2 + if "decoder_input_ids" in dummy_inputs: + dummy_inputs["decoder_input_ids"] = dummy_inputs["decoder_input_ids"] * 2 return dummy_inputs - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"), - "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - BART_START_DOCSTRING = r""" This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the @@ -1461,16 +1443,6 @@ def prepare_decoder_input_ids_from_labels(self, labels: tf.Tensor): BART_START_DOCSTRING, ) class TFBartForSequenceClassification(TFBartPretrainedModel, TFSequenceClassificationLoss): - @property - def dummy_inputs(self): - pad_token = self.config.pad_token_id - input_ids = tf.constant([[0, 6, 10, 4, 2], [0, 8, 12, 2, pad_token]]) - dummy_inputs = { - "attention_mask": tf.cast(tf.math.not_equal(input_ids, (pad_token)), dtype=tf.int32), - "input_ids": input_ids, - } - return dummy_inputs - def __init__(self, config: BartConfig, load_weight_prefix=None, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) self.model = TFBartMainLayer(config, load_weight_prefix=load_weight_prefix, name="model") diff --git a/src/transformers/models/bert/modeling_tf_bert.py b/src/transformers/models/bert/modeling_tf_bert.py index df78d03a0074b6..fd0a07b415f4f2 100644 --- a/src/transformers/models/bert/modeling_tf_bert.py +++ b/src/transformers/models/bert/modeling_tf_bert.py @@ -54,8 +54,6 @@ ) from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( - DUMMY_INPUTS, - MULTIPLE_CHOICE_DUMMY_INPUTS, ModelOutput, add_code_sample_docstrings, add_start_docstrings, @@ -903,24 +901,6 @@ class TFBertPreTrainedModel(TFPreTrainedModel): config_class = BertConfig base_model_prefix = "bert" - @property - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - dummy = {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32)} - # Add `encoder_hidden_states` to make the cross-attention layers' weights initialized - if self.config.add_cross_attention: - batch_size, seq_len = tf.constant(DUMMY_INPUTS).shape - shape = (batch_size, seq_len) + (self.config.hidden_size,) - h = tf.random.uniform(shape=shape) - dummy["encoder_hidden_states"] = h - - return dummy - @dataclass class TFBertForPreTrainingOutput(ModelOutput): @@ -1123,26 +1103,6 @@ def call( ) return outputs - def serving_output( - self, output: TFBaseModelOutputWithPoolingAndCrossAttentions - ) -> TFBaseModelOutputWithPoolingAndCrossAttentions: - output_cache = self.config.use_cache and self.config.is_decoder - pkv = tf.convert_to_tensor(output.past_key_values) if output_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - cross_attns = tf.convert_to_tensor(output.cross_attentions) if output.cross_attentions is not None else None - if not (self.config.output_attentions and self.config.add_cross_attention): - cross_attns = None - - return TFBaseModelOutputWithPoolingAndCrossAttentions( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - past_key_values=pkv, - hidden_states=hs, - attentions=attns, - cross_attentions=cross_attns, - ) - @add_start_docstrings( """ @@ -1255,17 +1215,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFBertForPreTrainingOutput) -> TFBertForPreTrainingOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBertForPreTrainingOutput( - prediction_logits=output.prediction_logits, - seq_relationship_logits=output.seq_relationship_logits, - hidden_states=hs, - attentions=attns, - ) - @add_start_docstrings("""Bert Model with a `language modeling` head on top.""", BERT_START_DOCSTRING) class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss): @@ -1352,12 +1301,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) - class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss): # names with a '.' represents the authorized unexpected/missing layers when a TF model is loaded from a PT model @@ -1483,19 +1426,6 @@ def call( cross_attentions=outputs.cross_attentions, ) - def serving_output(self, output: TFCausalLMOutputWithCrossAttentions) -> TFCausalLMOutputWithCrossAttentions: - output_cache = self.config.use_cache and self.config.is_decoder - pkv = tf.convert_to_tensor(output.past_key_values) if output_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - cross_attns = tf.convert_to_tensor(output.cross_attentions) if output.cross_attentions is not None else None - if not (self.config.output_attentions and self.config.add_cross_attention): - cross_attns = None - - return TFCausalLMOutputWithCrossAttentions( - logits=output.logits, past_key_values=pkv, hidden_states=hs, attentions=attns, cross_attentions=cross_attns - ) - @add_start_docstrings( """Bert Model with a `next sentence prediction (classification)` head on top.""", @@ -1578,12 +1508,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFNextSentencePredictorOutput) -> TFNextSentencePredictorOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFNextSentencePredictorOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1670,12 +1594,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1698,16 +1616,6 @@ def __init__(self, config: BertConfig, *inputs, **kwargs): units=1, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. - - Returns: - tf.Tensor with dummy inputs - """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} - @unpack_inputs @add_start_docstrings_to_model_forward(BERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( @@ -1785,26 +1693,6 @@ def call( attentions=outputs.attentions, ) - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"), - } - ] - ) - def serving(self, inputs: Dict[str, tf.Tensor]) -> TFMultipleChoiceModelOutput: - output = self.call(input_ids=inputs) - - return self.serving_output(output) - - def serving_output(self, output: TFMultipleChoiceModelOutput) -> TFMultipleChoiceModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMultipleChoiceModelOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1895,12 +1783,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -2002,11 +1884,3 @@ def call( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) - - def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFQuestionAnsweringModelOutput( - start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns - ) diff --git a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py index 66f00d89f897d7..d0e74550370505 100644 --- a/src/transformers/models/blenderbot/modeling_tf_blenderbot.py +++ b/src/transformers/models/blenderbot/modeling_tf_blenderbot.py @@ -34,7 +34,6 @@ # Public API from ...modeling_tf_utils import ( - DUMMY_INPUTS, TFCausalLanguageModelingLoss, TFPreTrainedModel, keras_serializable, @@ -464,34 +463,6 @@ class TFBlenderbotPreTrainedModel(TFPreTrainedModel): config_class = BlenderbotConfig base_model_prefix = "model" - @property - def dummy_inputs(self): - pad_token = 1 - input_ids = tf.convert_to_tensor(DUMMY_INPUTS, dtype=tf.int32) - decoder_input_ids = tf.convert_to_tensor(DUMMY_INPUTS, dtype=tf.int32) - dummy_inputs = { - "decoder_input_ids": decoder_input_ids, - "attention_mask": tf.cast(input_ids != pad_token, tf.int32), - "input_ids": input_ids, - } - return dummy_inputs - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"), - "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"), - } - ] - ) - # Copied from transformers.models.bart.modeling_tf_bart.TFBartPretrainedModel.serving - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - BLENDERBOT_START_DOCSTRING = r""" This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the diff --git a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py index 541024470d10f5..2e8d2e11cae798 100644 --- a/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py +++ b/src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py @@ -33,7 +33,6 @@ # Public API from ...modeling_tf_utils import ( - DUMMY_INPUTS, TFCausalLanguageModelingLoss, TFPreTrainedModel, keras_serializable, @@ -464,34 +463,6 @@ class TFBlenderbotSmallPreTrainedModel(TFPreTrainedModel): config_class = BlenderbotSmallConfig base_model_prefix = "model" - @property - def dummy_inputs(self): - pad_token = 1 - input_ids = tf.convert_to_tensor(DUMMY_INPUTS, dtype=tf.int32) - decoder_input_ids = tf.convert_to_tensor(DUMMY_INPUTS, dtype=tf.int32) - dummy_inputs = { - "decoder_input_ids": decoder_input_ids, - "attention_mask": tf.cast(input_ids != pad_token, tf.int32), - "input_ids": input_ids, - } - return dummy_inputs - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"), - "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"), - } - ] - ) - # Copied from transformers.models.bart.modeling_tf_bart.TFBartPretrainedModel.serving - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - BLENDERBOT_SMALL_START_DOCSTRING = r""" This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the diff --git a/src/transformers/models/blip/modeling_tf_blip.py b/src/transformers/models/blip/modeling_tf_blip.py index 95269e4351d994..428151ea9a3c0a 100644 --- a/src/transformers/models/blip/modeling_tf_blip.py +++ b/src/transformers/models/blip/modeling_tf_blip.py @@ -17,13 +17,12 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Any, Dict, Optional, Tuple, Union +from typing import Any, Optional, Tuple, Union import tensorflow as tf from ...modeling_tf_outputs import TFBaseModelOutput, TFBaseModelOutputWithPooling from ...modeling_tf_utils import ( - DUMMY_INPUTS, TFPreTrainedModel, get_initializer, get_tf_activation, @@ -648,38 +647,6 @@ def __init__(self, config: BlipVisionConfig, *args, **kwargs): self.encoder = TFBlipEncoder(config, name="encoder") self.post_layernorm = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="post_layernorm") - @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - VISION_DUMMY_INPUTS = tf.random.uniform( - shape=(len(DUMMY_INPUTS), 3, self.config.image_size, self.config.image_size), dtype=tf.float32 - ) - return {"pixel_values": VISION_DUMMY_INPUTS} - - @tf.function( - input_signature=[ - { - "pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"), - } - ] - ) - def serving(self, inputs: Dict[str, tf.Tensor]) -> TFBaseModelOutputWithPooling: - """ - Method used for serving the model. - - Args: - inputs (`Dict[str, tf.Tensor]`): - The input of the saved model as a dictionary of tensors. - """ - output = self.call(inputs) - - return self.serving_output(output) - def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling: hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None @@ -881,44 +848,6 @@ def __init__(self, config: BlipConfig, *inputs, **kwargs): self.blip = TFBlipMainLayer(config, name="blip") - @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - VISION_DUMMY_INPUTS = tf.random.uniform( - shape=(len(DUMMY_INPUTS), 3, self.config.vision_config.image_size, self.config.vision_config.image_size), - dtype=tf.float32, - ) - return { - "input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32), - "pixel_values": VISION_DUMMY_INPUTS, - } - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs: Dict[str, tf.Tensor]) -> TFBlipOutput: - """ - Method used for serving the model. - - Args: - inputs (`Dict[str, tf.Tensor]`): - The input of the saved model as a dictionary of tensors. - """ - output = self.call(inputs) - - return self.serving_output(output) - def serving_output(self, output: TFBlipOutput) -> TFBlipOutput: return TFBlipOutput( logits_per_image=output.logits_per_image, @@ -1082,48 +1011,6 @@ def __init__(self, config: BlipConfig, *args, **kwargs): def get_input_embeddings(self) -> tf.keras.layers.Layer: return self.vision_model.embeddings.patch_embedding - @property - def dummy_inputs(self): - input_ids = tf.constant(DUMMY_INPUTS, dtype=tf.int32) - VISION_DUMMY_INPUTS = tf.random.uniform( - shape=(len(DUMMY_INPUTS), 3, self.config.vision_config.image_size, self.config.vision_config.image_size), - dtype=tf.float32, - ) - return {"input_ids": input_ids, "pixel_values": VISION_DUMMY_INPUTS} - - @tf.function( - input_signature=[ - { - "pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"), - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - } - ] - ) - def serving(self, inputs: Dict[str, tf.Tensor]) -> TFBaseModelOutputWithPooling: - """ - Method used for serving the model. - - Args: - inputs (`Dict[str, tf.Tensor]`): - The input of the saved model as a dictionary of tensors. - """ - output = self.call(inputs) - - return self.serving_output(output) - - def serving_output( - self, output: TFBlipForConditionalGenerationModelOutput - ) -> TFBlipForConditionalGenerationModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBlipForConditionalGenerationModelOutput( - last_hidden_state=output.last_hidden_state, - image_embeds=output.image_embeds, - hidden_states=hs, - attentions=attns, - ) - @unpack_inputs @add_start_docstrings_to_model_forward(BLIP_VISION_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=TFBlipForConditionalGenerationModelOutput, config_class=BlipConfig) @@ -1297,46 +1184,30 @@ def __init__(self, config: BlipConfig, *args, **kwargs): def get_input_embeddings(self) -> tf.keras.layers.Layer: return self.vision_model.embeddings.patch_embedding - @property - def dummy_inputs(self): - input_ids = tf.constant(DUMMY_INPUTS, dtype=tf.int32) - VISION_DUMMY_INPUTS = tf.random.uniform( - shape=(len(DUMMY_INPUTS), 3, self.config.vision_config.image_size, self.config.vision_config.image_size), - dtype=tf.float32, - ) - return {"input_ids": input_ids, "pixel_values": VISION_DUMMY_INPUTS, "decoder_input_ids": input_ids} - - @tf.function( - input_signature=[ - { - "pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"), - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - } - ] - ) - def serving(self, inputs: Dict[str, tf.Tensor]) -> TFBaseModelOutputWithPooling: - """ - Method used for serving the model. - - Args: - inputs (`Dict[str, tf.Tensor]`): - The input of the saved model as a dictionary of tensors. - """ - output = self.call(inputs) + # Adapted from transformers.models.t5.modeling_tf_t5.TFT5PreTrainedModel._shift_right + def _shift_right(self, input_ids): + decoder_start_token_id = self.decoder_start_token_id + pad_token_id = self.decoder_pad_token_id - return self.serving_output(output) + if decoder_start_token_id is None or pad_token_id is None: + raise ValueError("decoder_start_token_id and pad_token_id must be defined!") - def serving_output(self, output: TFBlipTextVisionModelOutput) -> TFBlipTextVisionModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None + start_tokens = tf.fill((shape_list(input_ids)[0], 1), decoder_start_token_id) + start_tokens = tf.cast(start_tokens, input_ids.dtype) # Ensure compatible dtypes for concatenation + shifted_input_ids = tf.concat([start_tokens, input_ids[:, :-1]], -1) - return TFBlipTextVisionModelOutput( - image_embeds=output.image_embeds, - last_hidden_state=output.last_hidden_state, - hidden_states=hs, - attentions=attns, + # replace possible -100 values in labels by `pad_token_id` + shifted_input_ids = tf.where( + shifted_input_ids == -100, + tf.cast(tf.fill(shape_list(shifted_input_ids), pad_token_id), shifted_input_ids.dtype), + shifted_input_ids, ) + # "Verify that `labels` has only positive values and -100" + tf.debugging.assert_greater_equal(shifted_input_ids, tf.constant(0, dtype=shifted_input_ids.dtype)) + + return shifted_input_ids + @unpack_inputs @add_start_docstrings_to_model_forward(BLIP_VISION_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=TFBlipTextVisionModelOutput, config_class=BlipVisionConfig) @@ -1389,7 +1260,7 @@ def call( ```""" if labels is None and decoder_input_ids is None: raise ValueError( - "Either `decoder_input_ids` or `labels` should be passed when calling `forward` with" + "Either `decoder_input_ids` or `labels` should be passed when calling" " `TFBlipForQuestionAnswering`. if you are training the model make sure that `labels` is passed, if you" " are using the model for inference make sure that `decoder_input_ids` is passed or call `generate`" ) @@ -1579,47 +1450,6 @@ def __init__(self, config: BlipConfig, *args, **kwargs): def get_input_embeddings(self) -> tf.keras.layers.Layer: return self.vision_model.embeddings.patch_embedding - @property - def dummy_inputs(self): - input_ids = tf.constant(DUMMY_INPUTS, dtype=tf.int32) - VISION_DUMMY_INPUTS = tf.random.uniform( - shape=(len(DUMMY_INPUTS), 3, self.config.vision_config.image_size, self.config.vision_config.image_size), - dtype=tf.float32, - ) - return {"input_ids": input_ids, "pixel_values": VISION_DUMMY_INPUTS} - - @tf.function( - input_signature=[ - { - "pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"), - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - } - ] - ) - def serving(self, inputs: Dict[str, tf.Tensor]) -> TFBaseModelOutputWithPooling: - """ - Method used for serving the model. - - Args: - inputs (`Dict[str, tf.Tensor]`): - The input of the saved model as a dictionary of tensors. - """ - output = self.call(inputs) - - return self.serving_output(output) - - def serving_output(self, output: TFBlipImageTextMatchingModelOutput) -> TFBlipImageTextMatchingModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBlipImageTextMatchingModelOutput( - itm_score=output.itm_score, - last_hidden_state=hs, - hidden_states=output.hidden_states, - attentions=attns, - question_embeds=output.question_embeds, - ) - @unpack_inputs @add_start_docstrings_to_model_forward(BLIP_VISION_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=TFBlipImageTextMatchingModelOutput, config_class=BlipVisionConfig) diff --git a/src/transformers/models/blip/modeling_tf_blip_text.py b/src/transformers/models/blip/modeling_tf_blip_text.py index bff81223375ccd..19ebdac62e22fa 100644 --- a/src/transformers/models/blip/modeling_tf_blip_text.py +++ b/src/transformers/models/blip/modeling_tf_blip_text.py @@ -17,7 +17,7 @@ from __future__ import annotations import math -from typing import Dict, Optional, Tuple +from typing import Optional, Tuple import tensorflow as tf @@ -27,7 +27,6 @@ TFCausalLMOutputWithCrossAttentions, ) from ...modeling_tf_utils import ( - DUMMY_INPUTS, TFPreTrainedModel, get_initializer, get_tf_activation, @@ -593,31 +592,6 @@ def __init__(self, config, add_pooling_layer=True, name=None, **kwargs): self.encoder = TFBlipTextEncoder(config, name="encoder") self.pooler = TFBlipTextPooler(config, name="pooler") if add_pooling_layer else None - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs: Dict[str, tf.Tensor]) -> TFBaseModelOutputWithPoolingAndCrossAttentions: - output = self.call(inputs) - return self.serving_output(output) - - def serving_output( - self, output: TFBaseModelOutputWithPoolingAndCrossAttentions - ) -> TFBaseModelOutputWithPoolingAndCrossAttentions: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBaseModelOutputWithPoolingAndCrossAttentions( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - hidden_states=hs, - attentions=attns, - ) - def get_input_embeddings(self): return self.embeddings.word_embeddings @@ -844,46 +818,6 @@ def get_output_embeddings(self): def set_output_embeddings(self, new_embeddings): self.cls.predictions.decoder = new_embeddings - @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - return {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32)} - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - } - ] - ) - def serving(self, inputs: Dict[str, tf.Tensor]) -> TFCausalLMOutputWithCrossAttentions: - """ - Method used for serving the model. - - Args: - inputs (`Dict[str, tf.Tensor]`): - The input of the saved model as a dictionary of tensors. - """ - output = self.call(inputs) - - return self.serving_output(output) - - def serving_output(self, output: TFCausalLMOutputWithCrossAttentions) -> TFCausalLMOutputWithCrossAttentions: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFCausalLMOutputWithCrossAttentions( - logits=output.logits, - cross_attentions=output.cross_attentions, - hidden_states=hs, - attentions=attns, - ) - @add_start_docstrings_to_model_forward(BLIP_TEXT_INPUTS_DOCSTRING) @unpack_inputs def call( diff --git a/src/transformers/models/camembert/modeling_tf_camembert.py b/src/transformers/models/camembert/modeling_tf_camembert.py index 980462f4be7c25..8def74a5b3045e 100644 --- a/src/transformers/models/camembert/modeling_tf_camembert.py +++ b/src/transformers/models/camembert/modeling_tf_camembert.py @@ -51,8 +51,6 @@ ) from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( - DUMMY_INPUTS, - MULTIPLE_CHOICE_DUMMY_INPUTS, add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, @@ -873,38 +871,6 @@ class TFCamembertPreTrainedModel(TFPreTrainedModel): config_class = CamembertConfig base_model_prefix = "roberta" - @property - # Copied from transformers.models.bert.modeling_tf_bert.TFBertPreTrainedModel.dummy_inputs - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - dummy = {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32)} - # Add `encoder_hidden_states` to make the cross-attention layers' weights initialized - if self.config.add_cross_attention: - batch_size, seq_len = tf.constant(DUMMY_INPUTS).shape - shape = (batch_size, seq_len) + (self.config.hidden_size,) - h = tf.random.uniform(shape=shape) - dummy["encoder_hidden_states"] = h - - return dummy - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - @add_start_docstrings( "The bare CamemBERT Model transformer outputting raw hidden-states without any specific head on top.", @@ -979,27 +945,6 @@ def call( return outputs - # Copied from transformers.models.bert.modeling_tf_bert.TFBertModel.serving_output - def serving_output( - self, output: TFBaseModelOutputWithPoolingAndCrossAttentions - ) -> TFBaseModelOutputWithPoolingAndCrossAttentions: - output_cache = self.config.use_cache and self.config.is_decoder - pkv = tf.convert_to_tensor(output.past_key_values) if output_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - cross_attns = tf.convert_to_tensor(output.cross_attentions) if output.cross_attentions is not None else None - if not (self.config.output_attentions and self.config.add_cross_attention): - cross_attns = None - - return TFBaseModelOutputWithPoolingAndCrossAttentions( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - past_key_values=pkv, - hidden_states=hs, - attentions=attns, - cross_attentions=cross_attns, - ) - # Copied from transformers.models.roberta.modeling_tf_roberta.TFRobertaLMHead with Roberta->Camembert class TFCamembertLMHead(tf.keras.layers.Layer): @@ -1135,13 +1080,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMaskedLM.serving_output - def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) - # Copied from transformers.models.roberta.modeling_tf_roberta.TFRobertaClassificationHead class TFCamembertClassificationHead(tf.keras.layers.Layer): @@ -1248,13 +1186,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForSequenceClassification.serving_output - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1339,13 +1270,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForTokenClassification.serving_output - def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1369,16 +1293,6 @@ def __init__(self, config, *inputs, **kwargs): 1, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @property - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - tf.Tensor with dummy inputs - """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} - @unpack_inputs @add_start_docstrings_to_model_forward( CAMEMBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length") @@ -1449,26 +1363,6 @@ def call( attentions=outputs.attentions, ) - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMultipleChoice.serving_output - def serving_output(self, output: TFMultipleChoiceModelOutput) -> TFMultipleChoiceModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMultipleChoiceModelOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1562,15 +1456,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForQuestionAnswering.serving_output - def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFQuestionAnsweringModelOutput( - start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns - ) - @add_start_docstrings( """CamemBERT Model with a `language modeling` head on top for CLM fine-tuning.""", CAMEMBERT_START_DOCSTRING @@ -1696,17 +1581,3 @@ def call( attentions=outputs.attentions, cross_attentions=outputs.cross_attentions, ) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertLMHeadModel.serving_output - def serving_output(self, output: TFCausalLMOutputWithCrossAttentions) -> TFCausalLMOutputWithCrossAttentions: - output_cache = self.config.use_cache and self.config.is_decoder - pkv = tf.convert_to_tensor(output.past_key_values) if output_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - cross_attns = tf.convert_to_tensor(output.cross_attentions) if output.cross_attentions is not None else None - if not (self.config.output_attentions and self.config.add_cross_attention): - cross_attns = None - - return TFCausalLMOutputWithCrossAttentions( - logits=output.logits, past_key_values=pkv, hidden_states=hs, attentions=attns, cross_attentions=cross_attns - ) diff --git a/src/transformers/models/clip/modeling_tf_clip.py b/src/transformers/models/clip/modeling_tf_clip.py index 9b7976f4136626..778f1ed2c92e4a 100644 --- a/src/transformers/models/clip/modeling_tf_clip.py +++ b/src/transformers/models/clip/modeling_tf_clip.py @@ -19,7 +19,7 @@ import math from dataclasses import dataclass -from typing import Any, Dict, Optional, Tuple, Union +from typing import Any, Optional, Tuple, Union import numpy as np import tensorflow as tf @@ -29,7 +29,6 @@ # Public API from ...modeling_tf_utils import ( - DUMMY_INPUTS, TFModelInputType, TFPreTrainedModel, get_initializer, @@ -1090,29 +1089,6 @@ def call( return outputs - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs: Dict[str, tf.Tensor]) -> TFBaseModelOutputWithPooling: - output = self.call(inputs) - return self.serving_output(output) - - def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBaseModelOutputWithPooling( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - hidden_states=hs, - attentions=attns, - ) - class TFCLIPVisionModel(TFCLIPPreTrainedModel): config_class = CLIPVisionConfig @@ -1123,38 +1099,6 @@ def __init__(self, config: CLIPVisionConfig, *inputs, **kwargs): self.clip = TFCLIPVisionMainLayer(config, name="clip") - @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - VISION_DUMMY_INPUTS = tf.random.uniform( - shape=(len(DUMMY_INPUTS), 3, self.config.image_size, self.config.image_size), dtype=tf.float32 - ) - return {"pixel_values": VISION_DUMMY_INPUTS} - - @tf.function( - input_signature=[ - { - "pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"), - } - ] - ) - def serving(self, inputs: Dict[str, tf.Tensor]) -> TFBaseModelOutputWithPooling: - """ - Method used for serving the model. - - Args: - inputs (`Dict[str, tf.Tensor]`): - The input of the saved model as a dictionary of tensors. - """ - output = self.call(inputs) - - return self.serving_output(output) - @unpack_inputs @add_start_docstrings_to_model_forward(CLIP_VISION_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=TFBaseModelOutputWithPooling, config_class=CLIPVisionConfig) @@ -1199,17 +1143,6 @@ def call( return outputs - def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBaseModelOutputWithPooling( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - hidden_states=hs, - attentions=attns, - ) - @add_start_docstrings(CLIP_START_DOCSTRING) class TFCLIPModel(TFCLIPPreTrainedModel): @@ -1220,44 +1153,6 @@ def __init__(self, config: CLIPConfig, *inputs, **kwargs): self.clip = TFCLIPMainLayer(config, name="clip") - @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - VISION_DUMMY_INPUTS = tf.random.uniform( - shape=(len(DUMMY_INPUTS), 3, self.config.vision_config.image_size, self.config.vision_config.image_size), - dtype=tf.float32, - ) - return { - "input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32), - "pixel_values": VISION_DUMMY_INPUTS, - } - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs: Dict[str, tf.Tensor]) -> TFCLIPOutput: - """ - Method used for serving the model. - - Args: - inputs (`Dict[str, tf.Tensor]`): - The input of the saved model as a dictionary of tensors. - """ - output = self.call(inputs) - - return self.serving_output(output) - @unpack_inputs @add_start_docstrings_to_model_forward(CLIP_TEXT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) def get_text_features( diff --git a/src/transformers/models/convbert/modeling_tf_convbert.py b/src/transformers/models/convbert/modeling_tf_convbert.py index 19c2d700dc064a..9b2bf2383bb740 100644 --- a/src/transformers/models/convbert/modeling_tf_convbert.py +++ b/src/transformers/models/convbert/modeling_tf_convbert.py @@ -46,7 +46,6 @@ ) from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( - MULTIPLE_CHOICE_DUMMY_INPUTS, add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, @@ -770,12 +769,6 @@ def call( return outputs - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBaseModelOutput(last_hidden_state=output.last_hidden_state, hidden_states=hs, attentions=attns) - class TFConvBertMaskedLMHead(tf.keras.layers.Layer): def __init__(self, config, input_embeddings, **kwargs): @@ -907,13 +900,6 @@ def call( attentions=generator_hidden_states.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMaskedLM.serving_output - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) - class TFConvBertClassificationHead(tf.keras.layers.Layer): """Head for sentence-level classification tasks.""" @@ -1012,12 +998,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1038,16 +1018,6 @@ def __init__(self, config, *inputs, **kwargs): 1, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @property - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - tf.Tensor with dummy inputs - """ - return {"input_ids": tf.convert_to_tensor(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} - @unpack_inputs @add_start_docstrings_to_model_forward( CONVBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length") @@ -1121,26 +1091,6 @@ def call( attentions=outputs.attentions, ) - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMultipleChoiceModelOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1216,12 +1166,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1307,11 +1251,3 @@ def call( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) - - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFQuestionAnsweringModelOutput( - start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns - ) diff --git a/src/transformers/models/convnext/modeling_tf_convnext.py b/src/transformers/models/convnext/modeling_tf_convnext.py index f258abe24cbb87..23a77a928ecc5a 100644 --- a/src/transformers/models/convnext/modeling_tf_convnext.py +++ b/src/transformers/models/convnext/modeling_tf_convnext.py @@ -17,7 +17,7 @@ from __future__ import annotations -from typing import Dict, Optional, Tuple, Union +from typing import Optional, Tuple, Union import numpy as np import tensorflow as tf @@ -351,43 +351,6 @@ class TFConvNextPreTrainedModel(TFPreTrainedModel): base_model_prefix = "convnext" main_input_name = "pixel_values" - @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - VISION_DUMMY_INPUTS = tf.random.uniform( - shape=( - 3, - self.config.num_channels, - self.config.image_size, - self.config.image_size, - ), - dtype=tf.float32, - ) - return {"pixel_values": tf.constant(VISION_DUMMY_INPUTS)} - - @tf.function( - input_signature=[ - { - "pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"), - } - ] - ) - def serving(self, inputs): - """ - Method used for serving the model. - - Args: - inputs (`Dict[str, tf.Tensor]`): - The input of the saved model as a dictionary of tensors. - """ - output = self.call(inputs) - return self.serving_output(output) - CONVNEXT_START_DOCSTRING = r""" This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the @@ -509,14 +472,6 @@ def call( hidden_states=outputs.hidden_states, ) - def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling: - # hidden_states not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions - return TFBaseModelOutputWithPooling( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - hidden_states=output.hidden_states, - ) - @add_start_docstrings( """ @@ -609,7 +564,3 @@ def call( logits=logits, hidden_states=outputs.hidden_states, ) - - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - # hidden_states not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=output.hidden_states) diff --git a/src/transformers/models/ctrl/modeling_tf_ctrl.py b/src/transformers/models/ctrl/modeling_tf_ctrl.py index cddfd4a9e35274..4dd9e73925070e 100644 --- a/src/transformers/models/ctrl/modeling_tf_ctrl.py +++ b/src/transformers/models/ctrl/modeling_tf_ctrl.py @@ -564,15 +564,6 @@ def call( ) return outputs - def serving_output(self, output): - pkv = tf.convert_to_tensor(output.past_key_values) if self.config.use_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBaseModelOutputWithPast( - last_hidden_state=output.last_hidden_state, past_key_values=pkv, hidden_states=hs, attentions=attns - ) - class TFCTRLLMHead(tf.keras.layers.Layer): def __init__(self, config, input_embeddings, **kwargs): @@ -705,13 +696,6 @@ def call( attentions=transformer_outputs.attentions, ) - def serving_output(self, output): - pkv = tf.convert_to_tensor(output.past_key_values) if self.config.use_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFCausalLMOutputWithPast(logits=output.logits, past_key_values=pkv, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -839,10 +823,3 @@ def call( hidden_states=transformer_outputs.hidden_states, attentions=transformer_outputs.attentions, ) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForSequenceClassification.serving_output - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) diff --git a/src/transformers/models/cvt/modeling_tf_cvt.py b/src/transformers/models/cvt/modeling_tf_cvt.py index 3c80f53bfaf208..80e15a196f8590 100644 --- a/src/transformers/models/cvt/modeling_tf_cvt.py +++ b/src/transformers/models/cvt/modeling_tf_cvt.py @@ -19,7 +19,7 @@ import collections.abc from dataclasses import dataclass -from typing import Dict, Optional, Tuple, Union +from typing import Optional, Tuple, Union import tensorflow as tf @@ -707,35 +707,6 @@ class TFCvtPreTrainedModel(TFPreTrainedModel): base_model_prefix = "cvt" main_input_name = "pixel_values" - @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - VISION_DUMMY_INPUTS = tf.random.uniform(shape=(3, self.config.num_channels, 224, 224), dtype=tf.float32) - return {"pixel_values": tf.constant(VISION_DUMMY_INPUTS)} - - @tf.function( - input_signature=[ - { - "pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"), - } - ] - ) - def serving(self, inputs): - """ - Method used for serving the model. - - Args: - inputs (`Dict[str, tf.Tensor]`): - The input of the saved model as a dictionary of tensors. - """ - output = self.call(inputs) - return self.serving_output(output) - TFCVT_START_DOCSTRING = r""" @@ -844,13 +815,6 @@ def call( hidden_states=outputs.hidden_states, ) - def serving_output(self, output: TFBaseModelOutputWithCLSToken) -> TFBaseModelOutputWithCLSToken: - return TFBaseModelOutputWithCLSToken( - last_hidden_state=output.last_hidden_state, - cls_token_value=output.cls_token_value, - hidden_states=output.hidden_states, - ) - @add_start_docstrings( """ @@ -945,6 +909,3 @@ def call( return ((loss,) + output) if loss is not None else output return TFImageClassifierOutputWithNoAttention(loss=loss, logits=logits, hidden_states=outputs.hidden_states) - - def serving_output(self, output: TFImageClassifierOutputWithNoAttention) -> TFImageClassifierOutputWithNoAttention: - return TFImageClassifierOutputWithNoAttention(logits=output.logits, hidden_states=output.hidden_states) diff --git a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py index 1085d6e48d6cb0..8ebb8c68ff8d99 100644 --- a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py +++ b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py @@ -20,7 +20,7 @@ import collections.abc import math from dataclasses import dataclass -from typing import Dict, List, Optional, Tuple, Union +from typing import List, Optional, Tuple, Union import numpy as np import tensorflow as tf @@ -774,36 +774,6 @@ class TFData2VecVisionPreTrainedModel(TFPreTrainedModel): main_input_name = "pixel_values" _keys_to_ignore_on_load_unexpected = [r"relative_position_index"] - @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - VISION_DUMMY_INPUTS = tf.random.uniform( - shape=(3, self.config.num_channels, self.config.image_size, self.config.image_size), - dtype=tf.float32, - ) - return {"pixel_values": tf.constant(VISION_DUMMY_INPUTS)} - - @tf.function( - input_signature=[ - { - "pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"), - } - ] - ) - def serving(self, inputs): - """ - Method used for serving the model. - - Args: - inputs (`Dict[str, tf.Tensor]`): - The input of the saved model as a dictionary of tensors. - """ - output = self.call(inputs) - return self.serving_output(output) - DATA2VEC_VISION_START_DOCSTRING = r""" This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the @@ -926,17 +896,6 @@ def call( return outputs - def serving_output(self, output: TFData2VecVisionModelOutputWithPooling) -> TFData2VecVisionModelOutputWithPooling: - hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFData2VecVisionModelOutputWithPooling( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - hidden_states=hidden_states, - attentions=attentions, - ) - @add_start_docstrings( """ @@ -1009,12 +968,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions) - class TFData2VecVisionConvModule(tf.keras.layers.Layer): """ @@ -1475,9 +1428,3 @@ def reshape_features(x): hidden_states=outputs.hidden_states if output_hidden_states else None, attentions=outputs.attentions, ) - - def serving_output(self, output: TFSemanticSegmenterOutput) -> TFSemanticSegmenterOutput: - hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSemanticSegmenterOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions) diff --git a/src/transformers/models/deberta/modeling_tf_deberta.py b/src/transformers/models/deberta/modeling_tf_deberta.py index 7a045426185e23..57e6ea8b1e9b07 100644 --- a/src/transformers/models/deberta/modeling_tf_deberta.py +++ b/src/transformers/models/deberta/modeling_tf_deberta.py @@ -1118,12 +1118,6 @@ def call( return outputs - def serving_output(self, output: TFBaseModelOutput) -> TFBaseModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBaseModelOutput(last_hidden_state=output.last_hidden_state, hidden_states=hs, attentions=attns) - @add_start_docstrings("""DeBERTa Model with a `language modeling` head on top.""", DEBERTA_START_DOCSTRING) class TFDebertaForMaskedLM(TFDebertaPreTrainedModel, TFMaskedLanguageModelingLoss): @@ -1194,12 +1188,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1281,12 +1269,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1358,12 +1340,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1448,11 +1424,3 @@ def call( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) - - def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFQuestionAnsweringModelOutput( - start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns - ) diff --git a/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py b/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py index 82b0a30c5a500b..1075cc855a020b 100644 --- a/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py +++ b/src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py @@ -1212,12 +1212,6 @@ def call( return outputs - def serving_output(self, output: TFBaseModelOutput) -> TFBaseModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBaseModelOutput(last_hidden_state=output.last_hidden_state, hidden_states=hs, attentions=attns) - @add_start_docstrings("""DeBERTa Model with a `language modeling` head on top.""", DEBERTA_START_DOCSTRING) # Copied from transformers.models.deberta.modeling_tf_deberta.TFDebertaForMaskedLM with Deberta->DebertaV2 @@ -1289,12 +1283,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1377,12 +1365,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1455,12 +1437,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1546,11 +1522,3 @@ def call( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) - - def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFQuestionAnsweringModelOutput( - start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns - ) diff --git a/src/transformers/models/deit/modeling_tf_deit.py b/src/transformers/models/deit/modeling_tf_deit.py index 131939f5bcfa9b..efd25788b0330b 100644 --- a/src/transformers/models/deit/modeling_tf_deit.py +++ b/src/transformers/models/deit/modeling_tf_deit.py @@ -20,7 +20,7 @@ import collections.abc import math from dataclasses import dataclass -from typing import Dict, Optional, Tuple, Union +from typing import Optional, Tuple, Union import tensorflow as tf @@ -568,38 +568,6 @@ class TFDeiTPreTrainedModel(TFPreTrainedModel): base_model_prefix = "deit" main_input_name = "pixel_values" - @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - VISION_DUMMY_INPUTS = tf.random.uniform( - shape=(3, self.config.num_channels, self.config.image_size, self.config.image_size), dtype=tf.float32 - ) - return {"pixel_values": tf.constant(VISION_DUMMY_INPUTS)} - - @tf.function( - input_signature=[ - { - "pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"), - } - ] - ) - def serving(self, inputs): - """ - Method used for serving the model. - - Args: - inputs (`Dict[str, tf.Tensor]`): - The input of the saved model as a dictionary of tensors. - """ - output = self.call(inputs) - - return self.serving_output(output) - DEIT_START_DOCSTRING = r""" This model is a TensorFlow @@ -679,17 +647,6 @@ def call( ) return outputs - def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling: - hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBaseModelOutputWithPooling( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - hidden_states=hidden_states, - attentions=attentions, - ) - # Copied from transformers.models.vit.modeling_tf_vit.TFViTPooler with ViT->DeiT class TFDeiTPooler(tf.keras.layers.Layer): @@ -865,14 +822,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFMaskedImageModelingOutput) -> TFMaskedImageModelingOutput: - hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMaskedImageModelingOutput( - reconstruction=output.reconstruction, hidden_states=hidden_states, attentions=attentions - ) - @add_start_docstrings( """ @@ -970,12 +919,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFImageClassifierOutput) -> TFImageClassifierOutput: - hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFImageClassifierOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions) - @add_start_docstrings( """ @@ -1055,17 +998,3 @@ def call( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) - - def serving_output( - self, output: TFDeiTForImageClassificationWithTeacherOutput - ) -> TFDeiTForImageClassificationWithTeacherOutput: - hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFDeiTForImageClassificationWithTeacherOutput( - logits=output.logits, - cls_logits=output.cls_logits, - distillation_logits=output.distillation_logits, - hidden_states=hidden_states, - attentions=attentions, - ) diff --git a/src/transformers/models/distilbert/modeling_tf_distilbert.py b/src/transformers/models/distilbert/modeling_tf_distilbert.py index 85a98c2a77fb65..6b0e1b0f3febcf 100644 --- a/src/transformers/models/distilbert/modeling_tf_distilbert.py +++ b/src/transformers/models/distilbert/modeling_tf_distilbert.py @@ -48,7 +48,6 @@ ) from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( - MULTIPLE_CHOICE_DUMMY_INPUTS, add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, @@ -424,19 +423,6 @@ class TFDistilBertPreTrainedModel(TFPreTrainedModel): config_class = DistilBertConfig base_model_prefix = "distilbert" - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - DISTILBERT_START_DOCSTRING = r""" @@ -562,12 +548,6 @@ def call( ) return outputs - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBaseModelOutput(last_hidden_state=output.last_hidden_state, hidden_states=hs, attentions=attns) - class TFDistilBertLMHead(tf.keras.layers.Layer): def __init__(self, config, input_embeddings, **kwargs): @@ -687,13 +667,6 @@ def call( attentions=distilbert_output.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMaskedLM.serving_output - def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -773,13 +746,6 @@ def call( attentions=distilbert_output.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForSequenceClassification.serving_output - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -848,13 +814,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForTokenClassification.serving_output - def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -879,16 +838,6 @@ def __init__(self, config, *inputs, **kwargs): 1, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @property - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - tf.Tensor with dummy inputs - """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} - @unpack_inputs @add_start_docstrings_to_model_forward( DISTILBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length") @@ -959,26 +908,6 @@ def call( attentions=distilbert_output.attentions, ) - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMultipleChoice.serving_output - def serving_output(self, output: TFMultipleChoiceModelOutput) -> TFMultipleChoiceModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMultipleChoiceModelOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1062,12 +991,3 @@ def call( hidden_states=distilbert_output.hidden_states, attentions=distilbert_output.attentions, ) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForQuestionAnswering.serving_output - def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFQuestionAnsweringModelOutput( - start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns - ) diff --git a/src/transformers/models/dpr/modeling_tf_dpr.py b/src/transformers/models/dpr/modeling_tf_dpr.py index 008e6a39fdc90f..759e22c8c71cf8 100644 --- a/src/transformers/models/dpr/modeling_tf_dpr.py +++ b/src/transformers/models/dpr/modeling_tf_dpr.py @@ -372,19 +372,6 @@ class TFDPRPretrainedReader(TFPreTrainedModel): config_class = DPRConfig base_model_prefix = "reader" - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - ############### # Actual Models @@ -612,12 +599,6 @@ def call( pooler_output=outputs.pooler_output, hidden_states=outputs.hidden_states, attentions=outputs.attentions ) - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFDPRContextEncoderOutput(pooler_output=output.pooler_output, hidden_states=hs, attentions=attns) - @add_start_docstrings( "The bare DPRQuestionEncoder transformer outputting pooler outputs as question representations.", @@ -698,12 +679,6 @@ def call( pooler_output=outputs.pooler_output, hidden_states=outputs.hidden_states, attentions=outputs.attentions ) - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFDPRQuestionEncoderOutput(pooler_output=output.pooler_output, hidden_states=hs, attentions=attns) - @add_start_docstrings( "The bare DPRReader transformer outputting span predictions.", @@ -777,15 +752,3 @@ def call( return_dict=return_dict, training=training, ) - - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFDPRReaderOutput( - start_logits=output.start_logits, - end_logits=output.end_logits, - relevance_logits=output.relevance_logits, - hidden_states=hs, - attentions=attns, - ) diff --git a/src/transformers/models/electra/modeling_tf_electra.py b/src/transformers/models/electra/modeling_tf_electra.py index 7602d43cc0ca64..41c64eed369d6a 100644 --- a/src/transformers/models/electra/modeling_tf_electra.py +++ b/src/transformers/models/electra/modeling_tf_electra.py @@ -20,7 +20,7 @@ import math import warnings from dataclasses import dataclass -from typing import Dict, Optional, Tuple, Union +from typing import Optional, Tuple, Union import numpy as np import tensorflow as tf @@ -49,8 +49,6 @@ ) from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( - DUMMY_INPUTS, - MULTIPLE_CHOICE_DUMMY_INPUTS, ModelOutput, add_code_sample_docstrings, add_start_docstrings, @@ -596,25 +594,6 @@ class TFElectraPreTrainedModel(TFPreTrainedModel): _keys_to_ignore_on_load_unexpected = [r"generator_lm_head.weight"] _keys_to_ignore_on_load_missing = [r"dropout"] - @property - # Copied from transformers.models.bert.modeling_tf_bert.TFBertPreTrainedModel.dummy_inputs - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - dummy = {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32)} - # Add `encoder_hidden_states` to make the cross-attention layers' weights initialized - if self.config.add_cross_attention: - batch_size, seq_len = tf.constant(DUMMY_INPUTS).shape - shape = (batch_size, seq_len) + (self.config.hidden_size,) - h = tf.random.uniform(shape=shape) - dummy["encoder_hidden_states"] = h - - return dummy - @keras_serializable class TFElectraMainLayer(tf.keras.layers.Layer): @@ -998,23 +977,6 @@ def call( return outputs - def serving_output(self, output): - output_cache = self.config.use_cache and self.config.is_decoder - pkv = tf.convert_to_tensor(output.past_key_values) if output_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - cross_attns = tf.convert_to_tensor(output.cross_attentions) if output.cross_attentions is not None else None - if not (self.config.output_attentions and self.config.add_cross_attention): - cross_attns = None - - return TFBaseModelOutputWithPastAndCrossAttentions( - last_hidden_state=output.last_hidden_state, - past_key_values=pkv, - hidden_states=hs, - attentions=attns, - cross_attentions=cross_attns, - ) - @add_start_docstrings( """ @@ -1087,12 +1049,6 @@ def call( attentions=discriminator_hidden_states.attentions, ) - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFElectraForPreTrainingOutput(logits=output.logits, hidden_states=hs, attentions=attns) - class TFElectraMaskedLMHead(tf.keras.layers.Layer): def __init__(self, config, input_embeddings, **kwargs): @@ -1221,13 +1177,6 @@ def call( attentions=generator_hidden_states.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMaskedLM.serving_output - def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) - class TFElectraClassificationHead(tf.keras.layers.Layer): """Head for sentence-level classification tasks.""" @@ -1329,13 +1278,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForSequenceClassification.serving_output - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1356,16 +1298,6 @@ def __init__(self, config, *inputs, **kwargs): 1, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @property - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - tf.Tensor with dummy inputs - """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} - @unpack_inputs @add_start_docstrings_to_model_forward(ELECTRA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( @@ -1438,28 +1370,6 @@ def call( attentions=outputs.attentions, ) - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"), - } - ] - ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMultipleChoice.serving - def serving(self, inputs: Dict[str, tf.Tensor]): - output = self.call(input_ids=inputs) - - return self.serving_output(output) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMultipleChoice.serving_output - def serving_output(self, output: TFMultipleChoiceModelOutput) -> TFMultipleChoiceModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMultipleChoiceModelOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1538,13 +1448,6 @@ def call( attentions=discriminator_hidden_states.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForTokenClassification.serving_output - def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1638,12 +1541,3 @@ def call( hidden_states=discriminator_hidden_states.hidden_states, attentions=discriminator_hidden_states.attentions, ) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForQuestionAnswering.serving_output - def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFQuestionAnsweringModelOutput( - start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns - ) diff --git a/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py b/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py index f5cd5e445aac37..19fc47546b0f75 100644 --- a/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py +++ b/src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py @@ -36,7 +36,6 @@ ) from ...tf_utils import shape_list from ...utils import ( - DUMMY_INPUTS, ModelOutput, add_start_docstrings, add_start_docstrings_to_model_forward, @@ -276,19 +275,6 @@ def __init__( "following discussion on GitHub: https://github.com/huggingface/transformers/issues/23350" ) - @property - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - # Add `decoder_input_ids` because `self.decoder` requires it. - input_ids = tf.constant(DUMMY_INPUTS, dtype=tf.int32) - dummy = {"input_ids": input_ids, "decoder_input_ids": input_ids} - return dummy - def get_encoder(self): return self.encoder @@ -642,33 +628,6 @@ def call( encoder_attentions=encoder_outputs.attentions, ) - def serving_output(self, output): - pkv = tf.tuple(output.past_key_values)[1] if self.config.decoder.use_cache else None - dec_hs = ( - tf.convert_to_tensor(output.decoder_hidden_states) if self.config.decoder.output_hidden_states else None - ) - dec_attns = tf.convert_to_tensor(output.decoder_attentions) if self.config.decoder.output_attentions else None - enc_hs = ( - tf.convert_to_tensor(output.encoder_hidden_states) if self.config.encoder.output_hidden_states else None - ) - enc_attns = tf.convert_to_tensor(output.encoder_attentions) if self.config.encoder.output_attentions else None - cross_attns = ( - tf.convert_to_tensor(output.cross_attentions) - if self.config.decoder.output_attentions and output.cross_attentions is not None - else None - ) - - return TFSeq2SeqLMOutput( - logits=output.logits, - past_key_values=pkv, - decoder_hidden_states=dec_hs, - decoder_attentions=dec_attns, - encoder_last_hidden_state=output.encoder_last_hidden_state, - encoder_hidden_states=enc_hs, - encoder_attentions=enc_attns, - cross_attentions=cross_attns, - ) - def prepare_inputs_for_generation( self, input_ids, past_key_values=None, attention_mask=None, use_cache=None, encoder_outputs=None, **kwargs ): diff --git a/src/transformers/models/esm/modeling_tf_esm.py b/src/transformers/models/esm/modeling_tf_esm.py index df4ea54f83bc1a..126473ee529ae9 100644 --- a/src/transformers/models/esm/modeling_tf_esm.py +++ b/src/transformers/models/esm/modeling_tf_esm.py @@ -1038,39 +1038,6 @@ def call( ) return outputs - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - - def serving_output( - self, output: TFBaseModelOutputWithPoolingAndCrossAttentions - ) -> TFBaseModelOutputWithPoolingAndCrossAttentions: - output_cache = self.config.use_cache and self.config.is_decoder - pkv = tf.convert_to_tensor(output.past_key_values) if output_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - cross_attns = tf.convert_to_tensor(output.cross_attentions) if output.cross_attentions is not None else None - if not (self.config.output_attentions and self.config.add_cross_attention): - cross_attns = None - - return TFBaseModelOutputWithPoolingAndCrossAttentions( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - past_key_values=pkv, - hidden_states=hs, - attentions=attns, - cross_attentions=cross_attns, - ) - def predict_contacts(self, tokens, attention_mask): return self.esm.predict_contacts(tokens, attention_mask) @@ -1170,26 +1137,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMaskedLM.serving_output - def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - def predict_contacts(self, tokens, attention_mask): return self.esm.predict_contacts(tokens, attention_mask) @@ -1310,26 +1257,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForSequenceClassification.serving_output - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - @add_start_docstrings( """ @@ -1406,26 +1333,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForTokenClassification.serving_output - def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - class TFEsmClassificationHead(Layer): """Head for sentence-level classification tasks.""" diff --git a/src/transformers/models/flaubert/modeling_tf_flaubert.py b/src/transformers/models/flaubert/modeling_tf_flaubert.py index 7f93caebb00005..068119d35f1709 100644 --- a/src/transformers/models/flaubert/modeling_tf_flaubert.py +++ b/src/transformers/models/flaubert/modeling_tf_flaubert.py @@ -290,13 +290,6 @@ def call( return outputs - # Copied from transformers.models.distilbert.modeling_tf_distilbert.TFDistilBertModel.serving_output - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBaseModelOutput(last_hidden_state=output.last_hidden_state, hidden_states=hs, attentions=attns) - # Copied from transformers.models.xlm.modeling_tf_xlm.TFXLMMultiHeadAttention with XLM->Flaubert class TFFlaubertMultiHeadAttention(tf.keras.layers.Layer): @@ -845,12 +838,6 @@ def call( logits=outputs, hidden_states=transformer_outputs.hidden_states, attentions=transformer_outputs.attentions ) - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFFlaubertWithLMHeadModelOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -930,13 +917,6 @@ def call( attentions=transformer_outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForSequenceClassification.serving_output - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1029,15 +1009,6 @@ def call( attentions=transformer_outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForQuestionAnswering.serving_output - def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFQuestionAnsweringModelOutput( - start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns - ) - @add_start_docstrings( """ @@ -1119,13 +1090,6 @@ def call( attentions=transformer_outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForTokenClassification.serving_output - def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1247,25 +1211,3 @@ def call( hidden_states=transformer_outputs.hidden_states, attentions=transformer_outputs.attentions, ) - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"), - } - ] - ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMultipleChoice.serving - def serving(self, inputs: Dict[str, tf.Tensor]): - output = self.call(input_ids=inputs) - - return self.serving_output(output) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMultipleChoice.serving_output - def serving_output(self, output: TFMultipleChoiceModelOutput) -> TFMultipleChoiceModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMultipleChoiceModelOutput(logits=output.logits, hidden_states=hs, attentions=attns) diff --git a/src/transformers/models/funnel/modeling_tf_funnel.py b/src/transformers/models/funnel/modeling_tf_funnel.py index fa077d612d21de..9c472674cf6505 100644 --- a/src/transformers/models/funnel/modeling_tf_funnel.py +++ b/src/transformers/models/funnel/modeling_tf_funnel.py @@ -19,7 +19,7 @@ import warnings from dataclasses import dataclass -from typing import Dict, Optional, Tuple, Union +from typing import Optional, Tuple, Union import numpy as np import tensorflow as tf @@ -47,7 +47,6 @@ ) from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( - MULTIPLE_CHOICE_DUMMY_INPUTS, ModelOutput, add_code_sample_docstrings, add_start_docstrings, @@ -1425,16 +1424,6 @@ def __init__(self, config: FunnelConfig, *inputs, **kwargs) -> None: self.funnel = TFFunnelBaseLayer(config, name="funnel") self.classifier = TFFunnelClassificationHead(config, 1, name="classifier") - @property - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - tf.Tensor with dummy inputs - """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} - @unpack_inputs @add_start_docstrings_to_model_forward(FUNNEL_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( @@ -1504,20 +1493,6 @@ def call( attentions=outputs.attentions, ) - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.float32, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"), - } - ] - ) - def serving(self, inputs: Dict[str, tf.Tensor]) -> TFMultipleChoiceModelOutput: - output = self.call(input_ids=inputs) - - return self.serving_output(output=output) - def serving_output(self, output: TFMultipleChoiceModelOutput) -> TFMultipleChoiceModelOutput: # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of # different dimensions diff --git a/src/transformers/models/gpt2/modeling_tf_gpt2.py b/src/transformers/models/gpt2/modeling_tf_gpt2.py index 6b7476b71bbabe..ab6bc07947cce7 100644 --- a/src/transformers/models/gpt2/modeling_tf_gpt2.py +++ b/src/transformers/models/gpt2/modeling_tf_gpt2.py @@ -42,7 +42,6 @@ ) from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( - DUMMY_INPUTS, ModelOutput, add_code_sample_docstrings, add_start_docstrings, @@ -522,37 +521,6 @@ class TFGPT2PreTrainedModel(TFPreTrainedModel): # names with a '.' represents the authorized unexpected/missing layers when a TF model is loaded from a PT model _keys_to_ignore_on_load_unexpected = [r"h.\d+.attn.bias", r"h.\d+.crossattention.bias"] - @property - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - dummy = {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32)} - # Add `encoder_hidden_states` to make the cross-attention layers' weights initialized - if self.config.add_cross_attention: - batch_size, seq_len = tf.constant(DUMMY_INPUTS).shape - shape = (batch_size, seq_len) + (self.config.hidden_size,) - h = tf.random.uniform(shape=shape) - dummy["encoder_hidden_states"] = h - - return dummy - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - @dataclass class TFGPT2DoubleHeadsModelOutput(ModelOutput): @@ -773,26 +741,6 @@ def call( return outputs - def serving_output(self, output): - pkv = tf.convert_to_tensor(output.past_key_values) if self.config.use_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - cross_attns = ( - tf.convert_to_tensor(output.cross_attentions) - if self.config.output_attentions - and self.config.add_cross_attention - and output.cross_attentions is not None - else None - ) - - return TFBaseModelOutputWithPastAndCrossAttentions( - last_hidden_state=output.last_hidden_state, - past_key_values=pkv, - hidden_states=hs, - attentions=attns, - cross_attentions=cross_attns, - ) - @add_start_docstrings( """ @@ -925,22 +873,6 @@ def call( cross_attentions=transformer_outputs.cross_attentions, ) - def serving_output(self, output): - pkv = tf.convert_to_tensor(output.past_key_values) if self.config.use_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - cross_attns = ( - tf.convert_to_tensor(output.cross_attentions) - if self.config.output_attentions - and self.config.add_cross_attention - and output.cross_attentions is not None - else None - ) - - return TFCausalLMOutputWithCrossAttentions( - logits=output.logits, past_key_values=pkv, hidden_states=hs, attentions=attns, cross_attentions=cross_attns - ) - @add_start_docstrings( """ @@ -1062,32 +994,13 @@ def call( attentions=transformer_outputs.attentions, ) - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), - "mc_token_ids": tf.TensorSpec((None, None), tf.int32, name="mc_token_ids"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - - def serving_output(self, output): - pkv = tf.convert_to_tensor(output.past_key_values) if self.config.use_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFGPT2DoubleHeadsModelOutput( - logits=output.logits, - mc_logits=output.mc_logits, - past_key_values=pkv, - hidden_states=hs, - attentions=attns, - ) + @property + def input_signature(self): + return { + "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), + "mc_token_ids": tf.TensorSpec((None, None), tf.int32, name="mc_token_ids"), + } @add_start_docstrings( @@ -1210,12 +1123,3 @@ def call( hidden_states=transformer_outputs.hidden_states, attentions=transformer_outputs.attentions, ) - - def serving_output(self, output): - pkv = tf.convert_to_tensor(output.past_key_values) if self.config.use_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutputWithPast( - logits=output.logits, past_key_values=pkv, hidden_states=hs, attentions=attns - ) diff --git a/src/transformers/models/gptj/modeling_tf_gptj.py b/src/transformers/models/gptj/modeling_tf_gptj.py index 09e4330eb18229..bbcdf3bd240ada 100644 --- a/src/transformers/models/gptj/modeling_tf_gptj.py +++ b/src/transformers/models/gptj/modeling_tf_gptj.py @@ -23,7 +23,6 @@ from ...activations_tf import get_tf_activation from ...file_utils import ( - DUMMY_INPUTS, add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, @@ -513,30 +512,6 @@ class TFGPTJPreTrainedModel(TFPreTrainedModel): # names with a '.' represents the authorized unexpected/missing layers when a TF model is loaded from a PT model _keys_to_ignore_on_load_unexpected = [r"h.\d+.attn.bias"] - @property - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - dummy = {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32)} - return dummy - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - GPTJ_START_DOCSTRING = r""" @@ -697,18 +672,6 @@ def call( return outputs - def serving_output(self, output): - pkv = tf.convert_to_tensor(output.past_key_values) if self.config.use_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBaseModelOutputWithPast( - last_hidden_state=output.last_hidden_state, - past_key_values=pkv, - hidden_states=hs, - attentions=attns, - ) - @add_start_docstrings( """ @@ -821,13 +784,6 @@ def call( attentions=transformer_outputs.attentions, ) - def serving_output(self, output): - pkv = tf.convert_to_tensor(output.past_key_values) if self.config.use_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFCausalLMOutputWithPast(logits=output.logits, past_key_values=pkv, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -952,15 +908,6 @@ def call( attentions=transformer_outputs.attentions, ) - def serving_output(self, output): - pkv = tf.convert_to_tensor(output.past_key_values) if self.config.use_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutputWithPast( - logits=output.logits, past_key_values=pkv, hidden_states=hs, attentions=attns - ) - @add_start_docstrings( """ @@ -1051,11 +998,3 @@ def call( hidden_states=transformer_outputs.hidden_states, attentions=transformer_outputs.attentions, ) - - def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFQuestionAnsweringModelOutput( - start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns - ) diff --git a/src/transformers/models/groupvit/modeling_tf_groupvit.py b/src/transformers/models/groupvit/modeling_tf_groupvit.py index 71888472098078..5c989356a5de61 100644 --- a/src/transformers/models/groupvit/modeling_tf_groupvit.py +++ b/src/transformers/models/groupvit/modeling_tf_groupvit.py @@ -20,7 +20,7 @@ import collections.abc import math from dataclasses import dataclass -from typing import Any, Dict, Optional, Tuple, Union +from typing import Any, Optional, Tuple, Union import numpy as np import tensorflow as tf @@ -28,7 +28,6 @@ from ...activations_tf import get_tf_activation from ...modeling_tf_outputs import TFBaseModelOutput, TFBaseModelOutputWithPooling from ...modeling_tf_utils import ( - DUMMY_INPUTS, TFModelInputType, TFPreTrainedModel, get_initializer, @@ -1608,30 +1607,6 @@ def __init__(self, config: GroupViTTextConfig, *inputs, **kwargs): self.groupvit = TFGroupViTTextMainLayer(config, name="groupvit") - @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - return { - "input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32), - } - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs: Dict[str, tf.Tensor]) -> TFBaseModelOutputWithPooling: - output = self.call(inputs) - return self.serving_output(output) - @unpack_inputs @add_start_docstrings_to_model_forward(GROUPVIT_TEXT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @replace_return_docstrings(output_type=TFBaseModelOutputWithPooling, config_class=GroupViTTextConfig) @@ -1675,17 +1650,6 @@ def call( return outputs - def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBaseModelOutputWithPooling( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - hidden_states=hs, - attentions=attns, - ) - class TFGroupViTVisionModel(TFGroupViTPreTrainedModel): config_class = GroupViTVisionConfig @@ -1696,38 +1660,6 @@ def __init__(self, config: GroupViTVisionConfig, *inputs, **kwargs): self.groupvit = TFGroupViTVisionMainLayer(config, name="groupvit") - @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - VISION_DUMMY_INPUTS = tf.random.uniform( - shape=(len(DUMMY_INPUTS), 3, self.config.image_size, self.config.image_size), dtype=tf.float32 - ) - return {"pixel_values": VISION_DUMMY_INPUTS} - - @tf.function( - input_signature=[ - { - "pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"), - } - ] - ) - def serving(self, inputs: Dict[str, tf.Tensor]) -> TFBaseModelOutputWithPooling: - """ - Method used for serving the model. - - Args: - inputs (`Dict[str, tf.Tensor]`): - The input of the saved model as a dictionary of tensors. - """ - output = self.call(inputs) - - return self.serving_output(output) - @unpack_inputs @add_start_docstrings_to_model_forward(GROUPVIT_VISION_INPUTS_DOCSTRING) @replace_return_docstrings(output_type=TFBaseModelOutputWithPooling, config_class=GroupViTVisionConfig) @@ -1772,15 +1704,6 @@ def call( return outputs - def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling: - # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions - return TFBaseModelOutputWithPooling( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - hidden_states=output.hidden_states, - attentions=output.attentions, - ) - @add_start_docstrings(GROUPVIT_START_DOCSTRING) class TFGroupViTModel(TFGroupViTPreTrainedModel): @@ -1791,44 +1714,6 @@ def __init__(self, config: GroupViTConfig, *inputs, **kwargs): self.groupvit = TFGroupViTMainLayer(config, name="groupvit") - @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - VISION_DUMMY_INPUTS = tf.random.uniform( - shape=(len(DUMMY_INPUTS), 3, self.config.vision_config.image_size, self.config.vision_config.image_size), - dtype=tf.float32, - ) - return { - "input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32), - "pixel_values": VISION_DUMMY_INPUTS, - } - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "pixel_values": tf.TensorSpec((None, None, None, None), tf.float64, name="pixel_values"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs: Dict[str, tf.Tensor]) -> TFGroupViTModelOutput: - """ - Method used for serving the model. - - Args: - inputs (`Dict[str, tf.Tensor]`): - The input of the saved model as a dictionary of tensors. - """ - output = self.call(inputs) - - return self.serving_output(output) - @unpack_inputs @add_start_docstrings_to_model_forward(GROUPVIT_TEXT_INPUTS_DOCSTRING.format("batch_size, sequence_length")) def get_text_features( diff --git a/src/transformers/models/hubert/modeling_tf_hubert.py b/src/transformers/models/hubert/modeling_tf_hubert.py index fd1f17edfb3aaf..c237616bf2a42c 100644 --- a/src/transformers/models/hubert/modeling_tf_hubert.py +++ b/src/transformers/models/hubert/modeling_tf_hubert.py @@ -17,7 +17,7 @@ from __future__ import annotations import warnings -from typing import Any, Dict, Optional, Tuple, Union +from typing import Any, Optional, Tuple, Union import numpy as np import tensorflow as tf @@ -1157,14 +1157,12 @@ class TFHubertPreTrainedModel(TFPreTrainedModel): main_input_name = "input_values" @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - pad_token = 0.0 - input_values = tf.convert_to_tensor(np.random.rand(1, 16000), tf.float32) - dummy_inputs = { - "input_values": input_values, - "attention_mask": tf.cast(tf.not_equal(input_values, pad_token), tf.float32), + def input_signature(self): + return { + "input_values": tf.TensorSpec((None, 16000), tf.float32, name="input_values"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), + "token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"), } - return dummy_inputs def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) @@ -1173,20 +1171,6 @@ def __init__(self, config, *inputs, **kwargs): "to train/fine-tine this model, you need a GPU or a TPU" ) - @tf.function( - input_signature=[ - { - "input_values": tf.TensorSpec((None, None), tf.float32, name="input_values"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"), - } - ] - ) - def serving(self, inputs): - output = self.call(input_values=inputs, training=False) - - return self.serving_output(output) - HUBERT_START_DOCSTRING = r""" @@ -1359,13 +1343,6 @@ def call( return outputs - def serving_output(self, output): - hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - return TFBaseModelOutput( - last_hidden_state=output.last_hidden_state, hidden_states=hidden_states, attentions=attentions - ) - @add_start_docstrings( """TFHubert Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).""", @@ -1518,8 +1495,3 @@ def call( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) - - def serving_output(self, output: TFCausalLMOutput) -> TFCausalLMOutput: - hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - return TFCausalLMOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions) diff --git a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py index 67128e0c13383d..c756609468598c 100644 --- a/src/transformers/models/layoutlm/modeling_tf_layoutlm.py +++ b/src/transformers/models/layoutlm/modeling_tf_layoutlm.py @@ -986,27 +986,6 @@ def call( return outputs - # Copied from transformers.models.bert.modeling_tf_bert.TFBertModel.serving_output - def serving_output( - self, output: TFBaseModelOutputWithPoolingAndCrossAttentions - ) -> TFBaseModelOutputWithPoolingAndCrossAttentions: - output_cache = self.config.use_cache and self.config.is_decoder - pkv = tf.convert_to_tensor(output.past_key_values) if output_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - cross_attns = tf.convert_to_tensor(output.cross_attentions) if output.cross_attentions is not None else None - if not (self.config.output_attentions and self.config.add_cross_attention): - cross_attns = None - - return TFBaseModelOutputWithPoolingAndCrossAttentions( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - past_key_values=pkv, - hidden_states=hs, - attentions=attns, - cross_attentions=cross_attns, - ) - @add_start_docstrings("""LayoutLM Model with a `language modeling` head on top.""", LAYOUTLM_START_DOCSTRING) class TFLayoutLMForMaskedLM(TFLayoutLMPreTrainedModel, TFMaskedLanguageModelingLoss): @@ -1128,12 +1107,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1252,12 +1225,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1380,12 +1347,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1524,11 +1485,3 @@ def call( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) - - def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFQuestionAnsweringModelOutput( - start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns - ) diff --git a/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py b/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py index 67377c5baf8a1c..feba69eafc2a71 100644 --- a/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py @@ -19,7 +19,7 @@ import collections import math -from typing import Dict, List, Optional, Tuple, Union +from typing import List, Optional, Tuple, Union import tensorflow as tf @@ -980,37 +980,10 @@ class TFLayoutLMv3PreTrainedModel(TFPreTrainedModel): base_model_prefix = "layoutlmv3" @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - size = self.config.input_size - image_shape = (2, self.config.num_channels, size, size) - pixel_values = tf.random.uniform(shape=image_shape, minval=-1, maxval=1) - return { - "input_ids": tf.constant(_DUMMY_INPUT_IDS, dtype=tf.int32), - "bbox": tf.constant(_DUMMY_BBOX, dtype=tf.int32), - "pixel_values": pixel_values, - } - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "bbox": tf.TensorSpec((None, None, 4), tf.int32, name="bbox"), - "pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - """ - Method used for serving the model. - - Args: - inputs (`Dict[str, tf.Tensor]`): - The input of the saved model as a dictionary of tensors. - """ - output = self.call(inputs) - - return self.serving_output(output) + def input_signature(self): + sig = super().input_signature + sig["bbox"] = tf.TensorSpec((None, None, 4), tf.int32, name="bbox") + return sig LAYOUTLMV3_START_DOCSTRING = r""" @@ -1207,16 +1180,6 @@ def call( return outputs - def serving_output(self, output: TFBaseModelOutput) -> TFBaseModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBaseModelOutput( - last_hidden_state=output.last_hidden_state, - hidden_states=hs, - attentions=attns, - ) - class TFLayoutLMv3ClassificationHead(tf.keras.layers.Layer): """ @@ -1354,13 +1317,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForSequenceClassification.serving_output - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1484,13 +1440,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForTokenClassification.serving_output - def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1618,12 +1567,3 @@ def call( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForQuestionAnswering.serving_output - def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFQuestionAnsweringModelOutput( - start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns - ) diff --git a/src/transformers/models/led/modeling_tf_led.py b/src/transformers/models/led/modeling_tf_led.py index 4e815da33d9e03..6e962ea4934e91 100644 --- a/src/transformers/models/led/modeling_tf_led.py +++ b/src/transformers/models/led/modeling_tf_led.py @@ -1323,33 +1323,10 @@ class TFLEDPreTrainedModel(TFPreTrainedModel): base_model_prefix = "led" @property - def dummy_inputs(self): - input_ids = tf.convert_to_tensor([[7, 6, 0, 0, 1], [1, 2, 3, 0, 0]], dtype=tf.int32) - # make sure global layers are initialized - attention_mask = tf.convert_to_tensor([[1, 1, 0, 0, 1], [1, 1, 1, 0, 0]], dtype=tf.int32) - global_attention_mask = tf.convert_to_tensor([[0, 0, 0, 0, 1], [0, 0, 1, 0, 0]], dtype=tf.int32) - dummy_inputs = { - "input_ids": input_ids, - "attention_mask": attention_mask, - "global_attention_mask": global_attention_mask, - "decoder_input_ids": input_ids, - } - return dummy_inputs - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"), - "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) + def input_signature(self): + sig = super().input_signature + sig["global_attention_mask"] = tf.TensorSpec((None, None), tf.int32, name="global_attention_mask") + return sig @dataclass diff --git a/src/transformers/models/longformer/modeling_tf_longformer.py b/src/transformers/models/longformer/modeling_tf_longformer.py index b5adb2c803e9fd..60cee2a83e89b3 100644 --- a/src/transformers/models/longformer/modeling_tf_longformer.py +++ b/src/transformers/models/longformer/modeling_tf_longformer.py @@ -39,7 +39,6 @@ ) from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( - MULTIPLE_CHOICE_DUMMY_INPUTS, ModelOutput, add_code_sample_docstrings, add_start_docstrings, @@ -1874,31 +1873,10 @@ class TFLongformerPreTrainedModel(TFPreTrainedModel): base_model_prefix = "longformer" @property - def dummy_inputs(self): - input_ids = tf.convert_to_tensor([[7, 6, 0, 0, 1], [1, 2, 3, 0, 0], [0, 0, 0, 4, 5]], dtype=tf.int32) - # make sure global layers are initialized - attention_mask = tf.convert_to_tensor([[1, 1, 0, 0, 1], [1, 1, 1, 0, 0], [1, 0, 0, 1, 1]], dtype=tf.int32) - global_attention_mask = tf.convert_to_tensor( - [[0, 0, 0, 0, 1], [0, 0, 1, 0, 0], [0, 0, 0, 0, 1]], dtype=tf.int32 - ) - return { - "input_ids": input_ids, - "attention_mask": attention_mask, - "global_attention_mask": global_attention_mask, - } - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) + def input_signature(self): + sig = super().input_signature + sig["global_attention_mask"] = tf.TensorSpec((None, None), tf.int32, name="global_attention_mask") + return sig LONGFORMER_START_DOCSTRING = r""" @@ -2069,19 +2047,6 @@ def call( return outputs - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - g_attns = tf.convert_to_tensor(output.global_attentions) if self.config.output_attentions else None - - return TFLongformerBaseModelOutputWithPooling( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - hidden_states=hs, - attentions=attns, - global_attentions=g_attns, - ) - @add_start_docstrings( """Longformer Model with a `language modeling` head on top.""", @@ -2166,15 +2131,6 @@ def call( global_attentions=outputs.global_attentions, ) - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - g_attns = tf.convert_to_tensor(output.global_attentions) if self.config.output_attentions else None - - return TFLongformerMaskedLMOutput( - logits=output.logits, hidden_states=hs, attentions=attns, global_attentions=g_attns - ) - @add_start_docstrings( """ @@ -2305,19 +2261,6 @@ def call( global_attentions=outputs.global_attentions, ) - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - g_attns = tf.convert_to_tensor(output.global_attentions) if self.config.output_attentions else None - - return TFLongformerQuestionAnsweringModelOutput( - start_logits=output.start_logits, - end_logits=output.end_logits, - hidden_states=hs, - attentions=attns, - global_attentions=g_attns, - ) - class TFLongformerClassificationHead(tf.keras.layers.Layer): """Head for sentence-level classification tasks.""" @@ -2446,15 +2389,6 @@ def call( global_attentions=outputs.global_attentions, ) - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - g_attns = tf.convert_to_tensor(output.global_attentions) if self.config.output_attentions else None - - return TFLongformerSequenceClassifierOutput( - logits=output.logits, hidden_states=hs, attentions=attns, global_attentions=g_attns - ) - @add_start_docstrings( """ @@ -2477,11 +2411,12 @@ def __init__(self, config, *inputs, **kwargs): ) @property - def dummy_inputs(self): - input_ids = tf.convert_to_tensor(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32) - # make sure global layers are initialized - global_attention_mask = tf.convert_to_tensor([[[0, 0, 0, 1], [0, 0, 0, 1]]] * 2, dtype=tf.int32) - return {"input_ids": input_ids, "global_attention_mask": global_attention_mask} + def input_signature(self): + return { + "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), + "global_attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="global_attention_mask"), + } @unpack_inputs @add_start_docstrings_to_model_forward( @@ -2568,28 +2503,6 @@ def call( global_attentions=outputs.global_attentions, ) - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - g_attns = tf.convert_to_tensor(output.global_attentions) if self.config.output_attentions else None - - return TFLongformerMultipleChoiceModelOutput( - logits=output.logits, hidden_states=hs, attentions=attns, global_attentions=g_attns - ) - @add_start_docstrings( """ @@ -2669,12 +2582,3 @@ def call( attentions=outputs.attentions, global_attentions=outputs.global_attentions, ) - - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - g_attns = tf.convert_to_tensor(output.global_attentions) if self.config.output_attentions else None - - return TFLongformerTokenClassifierOutput( - logits=output.logits, hidden_states=hs, attentions=attns, global_attentions=g_attns - ) diff --git a/src/transformers/models/lxmert/modeling_tf_lxmert.py b/src/transformers/models/lxmert/modeling_tf_lxmert.py index 59bc1859140596..0b54702d761d59 100644 --- a/src/transformers/models/lxmert/modeling_tf_lxmert.py +++ b/src/transformers/models/lxmert/modeling_tf_lxmert.py @@ -636,26 +636,6 @@ def call( class TFLxmertMainLayer(tf.keras.layers.Layer): config_class = LxmertConfig - @property - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - tf.Tensor with dummy inputs - """ - batch_size = 2 - num_visual_features = 10 - input_ids = tf.constant([[3, 5, 6], [2, 3, 4]], dtype=tf.int32) - visual_feats = tf.random.uniform((batch_size, num_visual_features, self.config.visual_feat_dim)) - visual_pos = tf.random.uniform((batch_size, num_visual_features, 4)) - - return { - "input_ids": input_ids, - "visual_feats": visual_feats, - "visual_pos": visual_pos, - } - def __init__(self, config, **kwargs): super().__init__(**kwargs) @@ -802,25 +782,35 @@ class TFLxmertPreTrainedModel(TFPreTrainedModel): base_model_prefix = "lxmert" @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - return getattr(self, self.base_model_prefix).dummy_inputs - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - "visual_feats": tf.TensorSpec((None, None, None), tf.float32, name="visual_feats"), - "visual_pos": tf.TensorSpec((None, None, None), tf.float32, name="visual_pos"), - "visual_attention_mask": tf.TensorSpec((None, None), tf.int32, name="visual_attention_mask"), - "token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) + def dummy_inputs(self): + """ + Dummy inputs to build the network. - return self.serving_output(output) + Returns: + tf.Tensor with dummy inputs + """ + batch_size = 2 + num_visual_features = 10 + input_ids = tf.constant([[3, 5, 6], [2, 3, 4]], dtype=tf.int32) + visual_feats = tf.random.uniform((batch_size, num_visual_features, self.config.visual_feat_dim)) + visual_pos = tf.random.uniform((batch_size, num_visual_features, 4)) + + return { + "input_ids": input_ids, + "visual_feats": visual_feats, + "visual_pos": visual_pos, + } + + @property + def input_signature(self): + return { + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), + "visual_feats": tf.TensorSpec((None, None, self.config.visual_feat_dim), tf.float32, name="visual_feats"), + "visual_pos": tf.TensorSpec((None, None, 4), tf.float32, name="visual_pos"), + "visual_attention_mask": tf.TensorSpec((None, None), tf.int32, name="visual_attention_mask"), + "token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"), + } LXMERT_START_DOCSTRING = r""" @@ -976,24 +966,6 @@ def call( return outputs - def serving_output(self, output): - l_hs = tf.convert_to_tensor(output.language_hidden_states) if self.config.output_hidden_states else None - v_hs = tf.convert_to_tensor(output.vision_hidden_states) if self.config.output_hidden_states else None - l_attns = tf.convert_to_tensor(output.language_attentions) if self.config.output_attentions else None - v_attns = tf.convert_to_tensor(output.vision_attentions) if self.config.output_attentions else None - c_enc_attns = tf.convert_to_tensor(output.cross_encoder_attentions) if self.config.output_attentions else None - - return TFLxmertModelOutput( - pooled_output=output.pooled_output, - language_output=output.language_output, - vision_output=output.vision_output, - language_hidden_states=l_hs, - vision_hidden_states=v_hs, - language_attentions=l_attns, - vision_attentions=v_attns, - cross_encoder_attentions=c_enc_attns, - ) - class TFLxmertPooler(tf.keras.layers.Layer): def __init__(self, config, **kwargs): @@ -1415,21 +1387,3 @@ def call( vision_attentions=lxmert_output.vision_attentions, cross_encoder_attentions=lxmert_output.cross_encoder_attentions, ) - - def serving_output(self, output): - l_hs = tf.convert_to_tensor(output.language_hidden_states) if self.config.output_hidden_states else None - v_hs = tf.convert_to_tensor(output.vision_hidden_states) if self.config.output_hidden_states else None - l_attns = tf.convert_to_tensor(output.language_attentions) if self.config.output_attentions else None - v_attns = tf.convert_to_tensor(output.vision_attentions) if self.config.output_attentions else None - c_enc_attns = tf.convert_to_tensor(output.cross_encoder_attentions) if self.config.output_attentions else None - - return TFLxmertForPreTrainingOutput( - prediction_logits=output.prediction_logits, - cross_relationship_score=output.cross_relationship_score, - question_answering_score=output.question_answering_score, - language_hidden_states=l_hs, - vision_hidden_states=v_hs, - language_attentions=l_attns, - vision_attentions=v_attns, - cross_encoder_attentions=c_enc_attns, - ) diff --git a/src/transformers/models/marian/modeling_tf_marian.py b/src/transformers/models/marian/modeling_tf_marian.py index 208e9b8335d71f..9632ddeaac8f43 100644 --- a/src/transformers/models/marian/modeling_tf_marian.py +++ b/src/transformers/models/marian/modeling_tf_marian.py @@ -33,7 +33,6 @@ # Public API from ...modeling_tf_utils import ( - DUMMY_INPUTS, TFCausalLanguageModelingLoss, TFPreTrainedModel, keras_serializable, @@ -501,34 +500,6 @@ class TFMarianPreTrainedModel(TFPreTrainedModel): config_class = MarianConfig base_model_prefix = "model" - @property - def dummy_inputs(self): - pad_token = 1 - input_ids = tf.cast(tf.convert_to_tensor(DUMMY_INPUTS), tf.int32) - decoder_input_ids = tf.cast(tf.convert_to_tensor(DUMMY_INPUTS), tf.int32) - dummy_inputs = { - "decoder_input_ids": decoder_input_ids, - "attention_mask": tf.cast(input_ids != pad_token, tf.int32), - "input_ids": input_ids, - } - return dummy_inputs - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"), - "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"), - } - ] - ) - # Copied from transformers.models.bart.modeling_tf_bart.TFBartPretrainedModel.serving - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - MARIAN_START_DOCSTRING = r""" This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the diff --git a/src/transformers/models/mbart/modeling_tf_mbart.py b/src/transformers/models/mbart/modeling_tf_mbart.py index 293c564141b362..b0e2d141f4fa3b 100644 --- a/src/transformers/models/mbart/modeling_tf_mbart.py +++ b/src/transformers/models/mbart/modeling_tf_mbart.py @@ -32,7 +32,6 @@ # Public API from ...modeling_tf_utils import ( - DUMMY_INPUTS, TFCausalLanguageModelingLoss, TFModelInputType, TFPreTrainedModel, @@ -468,34 +467,6 @@ class TFMBartPreTrainedModel(TFPreTrainedModel): config_class = MBartConfig base_model_prefix = "model" - @property - def dummy_inputs(self): - pad_token = 1 - input_ids = tf.cast(tf.convert_to_tensor(DUMMY_INPUTS), tf.int32) - decoder_input_ids = tf.cast(tf.convert_to_tensor(DUMMY_INPUTS), tf.int32) - dummy_inputs = { - "decoder_input_ids": decoder_input_ids, - "attention_mask": tf.cast(input_ids != pad_token, tf.int32), - "input_ids": input_ids, - } - return dummy_inputs - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"), - "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"), - } - ] - ) - # Copied from transformers.models.bart.modeling_tf_bart.TFBartPretrainedModel.serving - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - MBART_START_DOCSTRING = r""" This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the diff --git a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py index eddb339074a322..c454a8b35db13d 100644 --- a/src/transformers/models/mobilebert/modeling_tf_mobilebert.py +++ b/src/transformers/models/mobilebert/modeling_tf_mobilebert.py @@ -20,7 +20,7 @@ import warnings from dataclasses import dataclass -from typing import Dict, Optional, Tuple, Union +from typing import Optional, Tuple, Union import numpy as np import tensorflow as tf @@ -51,7 +51,6 @@ ) from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( - MULTIPLE_CHOICE_DUMMY_INPUTS, ModelOutput, add_code_sample_docstrings, add_start_docstrings, @@ -998,17 +997,6 @@ def call( return outputs - def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBaseModelOutputWithPooling( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - hidden_states=hs, - attentions=attns, - ) - @add_start_docstrings( """ @@ -1099,17 +1087,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMobileBertForPreTrainingOutput( - prediction_logits=output.prediction_logits, - seq_relationship_logits=output.seq_relationship_logits, - hidden_states=hs, - attentions=attns, - ) - @add_start_docstrings("""MobileBert Model with a `language modeling` head on top.""", MOBILEBERT_START_DOCSTRING) class TFMobileBertForMaskedLM(TFMobileBertPreTrainedModel, TFMaskedLanguageModelingLoss): @@ -1190,13 +1167,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMaskedLM.serving_output - def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) - class TFMobileBertOnlyNSPHead(tf.keras.layers.Layer): def __init__(self, config, **kwargs): @@ -1289,13 +1259,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForNextSentencePrediction.serving_output - def serving_output(self, output: TFNextSentencePredictorOutput) -> TFNextSentencePredictorOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFNextSentencePredictorOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1386,13 +1349,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForSequenceClassification.serving_output - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1492,15 +1448,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForQuestionAnswering.serving_output - def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFQuestionAnsweringModelOutput( - start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns - ) - @add_start_docstrings( """ @@ -1528,16 +1475,6 @@ def __init__(self, config, *inputs, **kwargs): 1, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @property - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - tf.Tensor with dummy inputs - """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} - @unpack_inputs @add_start_docstrings_to_model_forward( MOBILEBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length") @@ -1612,28 +1549,6 @@ def call( attentions=outputs.attentions, ) - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"), - } - ] - ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMultipleChoice.serving - def serving(self, inputs: Dict[str, tf.Tensor]): - output = self.call(input_ids=inputs) - - return self.serving_output(output) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMultipleChoice.serving_output - def serving_output(self, output: TFMultipleChoiceModelOutput) -> TFMultipleChoiceModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMultipleChoiceModelOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1722,10 +1637,3 @@ def call( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForTokenClassification.serving_output - def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) diff --git a/src/transformers/models/mobilevit/modeling_tf_mobilevit.py b/src/transformers/models/mobilevit/modeling_tf_mobilevit.py index 879c642800fe38..4d48ce72725c1e 100644 --- a/src/transformers/models/mobilevit/modeling_tf_mobilevit.py +++ b/src/transformers/models/mobilevit/modeling_tf_mobilevit.py @@ -735,38 +735,6 @@ class TFMobileViTPreTrainedModel(TFPreTrainedModel): base_model_prefix = "mobilevit" main_input_name = "pixel_values" - @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - VISION_DUMMY_INPUTS = tf.random.uniform( - shape=(3, self.config.num_channels, self.config.image_size, self.config.image_size), - dtype=tf.float32, - ) - return {"pixel_values": tf.constant(VISION_DUMMY_INPUTS)} - - @tf.function( - input_signature=[ - { - "pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"), - } - ] - ) - def serving(self, inputs): - """ - Method used for serving the model. - - Args: - inputs (`Dict[str, tf.Tensor]`): - The input of the saved model as a dictionary of tensors. - """ - output = self.call(inputs) - return self.serving_output(output) - MOBILEVIT_START_DOCSTRING = r""" This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the @@ -856,14 +824,6 @@ def call( output = self.mobilevit(pixel_values, output_hidden_states, return_dict, training=training) return output - def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling: - # hidden_states not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions - return TFBaseModelOutputWithPooling( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - hidden_states=output.hidden_states, - ) - @add_start_docstrings( """ @@ -924,10 +884,6 @@ def call( return TFImageClassifierOutputWithNoAttention(loss=loss, logits=logits, hidden_states=outputs.hidden_states) - def serving_output(self, output: TFImageClassifierOutputWithNoAttention) -> TFImageClassifierOutputWithNoAttention: - # hidden_states and attention not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions - return TFImageClassifierOutputWithNoAttention(logits=output.logits, hidden_states=output.hidden_states) - class TFMobileViTASPPPooling(tf.keras.layers.Layer): def __init__(self, config: MobileViTConfig, out_channels: int, **kwargs) -> None: @@ -1157,8 +1113,3 @@ def call( logits=logits, hidden_states=outputs.hidden_states if output_hidden_states else None, ) - - def serving_output( - self, output: TFSemanticSegmenterOutputWithNoAttention - ) -> TFSemanticSegmenterOutputWithNoAttention: - return TFSemanticSegmenterOutputWithNoAttention(logits=output.logits, hidden_states=output.hidden_states) diff --git a/src/transformers/models/mpnet/modeling_tf_mpnet.py b/src/transformers/models/mpnet/modeling_tf_mpnet.py index 2f4178d6cfc9a0..2982899340d203 100644 --- a/src/transformers/models/mpnet/modeling_tf_mpnet.py +++ b/src/transformers/models/mpnet/modeling_tf_mpnet.py @@ -49,7 +49,6 @@ ) from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( - MULTIPLE_CHOICE_DUMMY_INPUTS, add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, @@ -77,19 +76,6 @@ class TFMPNetPreTrainedModel(TFPreTrainedModel): config_class = MPNetConfig base_model_prefix = "mpnet" - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - class TFMPNetEmbeddings(tf.keras.layers.Layer): """Construct the embeddings from word, position embeddings.""" @@ -707,17 +693,6 @@ def call( ) return outputs - def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBaseModelOutputWithPooling( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - hidden_states=hs, - attentions=attns, - ) - class TFMPNetLMHead(tf.keras.layers.Layer): """MPNet head for masked and permuted language modeling""" @@ -841,13 +816,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMaskedLM.serving_output - def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) - class TFMPNetClassificationHead(tf.keras.layers.Layer): """Head for sentence-level classification tasks.""" @@ -945,13 +913,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForSequenceClassification.serving_output - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -970,16 +931,6 @@ def __init__(self, config, *inputs, **kwargs): 1, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @property - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - tf.Tensor with dummy inputs - """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} - @unpack_inputs @add_start_docstrings_to_model_forward(MPNET_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( @@ -1048,26 +999,6 @@ def call( attentions=outputs.attentions, ) - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMultipleChoice.serving_output - def serving_output(self, output: TFMultipleChoiceModelOutput) -> TFMultipleChoiceModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMultipleChoiceModelOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1142,13 +1073,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForTokenClassification.serving_output - def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1235,12 +1159,3 @@ def call( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForQuestionAnswering.serving_output - def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFQuestionAnsweringModelOutput( - start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns - ) diff --git a/src/transformers/models/openai/modeling_tf_openai.py b/src/transformers/models/openai/modeling_tf_openai.py index 3f8967241946f4..70b7f6c05efb3d 100644 --- a/src/transformers/models/openai/modeling_tf_openai.py +++ b/src/transformers/models/openai/modeling_tf_openai.py @@ -357,19 +357,6 @@ class TFOpenAIGPTPreTrainedModel(TFPreTrainedModel): config_class = OpenAIGPTConfig base_model_prefix = "transformer" - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - @dataclass class TFOpenAIGPTDoubleHeadsModelOutput(ModelOutput): @@ -541,13 +528,6 @@ def call( ) return outputs - # Copied from transformers.models.distilbert.modeling_tf_distilbert.TFDistilBertModel.serving_output - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBaseModelOutput(last_hidden_state=output.last_hidden_state, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -630,12 +610,6 @@ def call( attentions=transformer_outputs.attentions, ) - def serving_output(self, output: TFCausalLMOutput) -> TFCausalLMOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFCausalLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) - def prepare_inputs_for_generation(self, inputs, **kwargs): return {"input_ids": inputs} @@ -752,27 +726,13 @@ def call( attentions=transformer_outputs.attentions, ) - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), - "mc_token_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFOpenAIGPTDoubleHeadsModelOutput( - logits=output.logits, mc_logits=output.mc_logits, hidden_states=hs, attentions=attns - ) + @property + def input_signature(self): + return { + "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), + "mc_token_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"), + } @add_start_docstrings( @@ -894,10 +854,3 @@ def call( hidden_states=transformer_outputs.hidden_states, attentions=transformer_outputs.attentions, ) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForSequenceClassification.serving_output - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) diff --git a/src/transformers/models/opt/modeling_tf_opt.py b/src/transformers/models/opt/modeling_tf_opt.py index 227e56fdef5577..5f7dd22369b87d 100644 --- a/src/transformers/models/opt/modeling_tf_opt.py +++ b/src/transformers/models/opt/modeling_tf_opt.py @@ -27,7 +27,6 @@ # Public API from ...modeling_tf_utils import ( - DUMMY_INPUTS, TFCausalLanguageModelingLoss, TFModelInputType, TFPreTrainedModel, @@ -413,29 +412,6 @@ class TFOPTPreTrainedModel(TFPreTrainedModel): config_class = OPTConfig base_model_prefix = "model" - @property - def dummy_inputs(self): - pad_token = 1 - input_ids = tf.convert_to_tensor(DUMMY_INPUTS, dtype=tf.int32) - dummy_inputs = { - "attention_mask": tf.cast(input_ids != pad_token, tf.int32), - "input_ids": input_ids, - } - return dummy_inputs - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - OPT_INPUTS_DOCSTRING = r""" Args: diff --git a/src/transformers/models/pegasus/modeling_tf_pegasus.py b/src/transformers/models/pegasus/modeling_tf_pegasus.py index 7de1542ebe4737..15c87b938bfafe 100644 --- a/src/transformers/models/pegasus/modeling_tf_pegasus.py +++ b/src/transformers/models/pegasus/modeling_tf_pegasus.py @@ -33,7 +33,6 @@ # Public API from ...modeling_tf_utils import ( - DUMMY_INPUTS, TFCausalLanguageModelingLoss, TFModelInputType, TFPreTrainedModel, @@ -503,34 +502,6 @@ class TFPegasusPreTrainedModel(TFPreTrainedModel): config_class = PegasusConfig base_model_prefix = "model" - @property - def dummy_inputs(self): - pad_token = 1 - input_ids = tf.convert_to_tensor(DUMMY_INPUTS, dtype=tf.int32) - decoder_input_ids = tf.convert_to_tensor(DUMMY_INPUTS, dtype=tf.int32) - dummy_inputs = { - "decoder_input_ids": decoder_input_ids, - "attention_mask": tf.cast(input_ids != pad_token, tf.int32), - "input_ids": input_ids, - } - return dummy_inputs - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"), - "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"), - } - ] - ) - # Copied from transformers.models.bart.modeling_tf_bart.TFBartPretrainedModel.serving - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - PEGASUS_START_DOCSTRING = r""" This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the diff --git a/src/transformers/models/regnet/modeling_tf_regnet.py b/src/transformers/models/regnet/modeling_tf_regnet.py index 2c3a1ac42e5063..254d49a9f1efd5 100644 --- a/src/transformers/models/regnet/modeling_tf_regnet.py +++ b/src/transformers/models/regnet/modeling_tf_regnet.py @@ -14,7 +14,7 @@ # limitations under the License. """ TensorFlow RegNet model.""" -from typing import Dict, Optional, Tuple, Union +from typing import Optional, Tuple, Union import tensorflow as tf @@ -345,33 +345,8 @@ class TFRegNetPreTrainedModel(TFPreTrainedModel): main_input_name = "pixel_values" @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - VISION_DUMMY_INPUTS = tf.random.uniform(shape=(3, self.config.num_channels, 224, 224), dtype=tf.float32) - return {"pixel_values": tf.constant(VISION_DUMMY_INPUTS)} - - @tf.function( - input_signature=[ - { - "pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"), - } - ] - ) - def serving(self, inputs): - """ - Method used for serving the model. - - Args: - inputs (`Dict[str, tf.Tensor]`): - The input of the saved model as a dictionary of tensors. - """ - output = self.call(inputs) - return self.serving_output(output) + def input_signature(self): + return {"pixel_values": tf.TensorSpec(shape=(None, self.config.num_channels, 224, 224), dtype=tf.float32)} REGNET_START_DOCSTRING = r""" @@ -443,16 +418,6 @@ def call( hidden_states=outputs.hidden_states, ) - def serving_output( - self, output: TFBaseModelOutputWithPoolingAndNoAttention - ) -> TFBaseModelOutputWithPoolingAndNoAttention: - # hidden_states not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions - return TFBaseModelOutputWithPoolingAndNoAttention( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - hidden_states=output.hidden_states, - ) - @add_start_docstrings( """ @@ -514,7 +479,3 @@ def call( return ((loss,) + output) if loss is not None else output return TFSequenceClassifierOutput(loss=loss, logits=logits, hidden_states=outputs.hidden_states) - - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - # hidden_states not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=output.hidden_states) diff --git a/src/transformers/models/rembert/modeling_tf_rembert.py b/src/transformers/models/rembert/modeling_tf_rembert.py index 097bd977a4a14c..1595fd8118debd 100644 --- a/src/transformers/models/rembert/modeling_tf_rembert.py +++ b/src/transformers/models/rembert/modeling_tf_rembert.py @@ -49,8 +49,6 @@ ) from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( - DUMMY_INPUTS, - MULTIPLE_CHOICE_DUMMY_INPUTS, add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, @@ -812,24 +810,6 @@ class TFRemBertPreTrainedModel(TFPreTrainedModel): config_class = RemBertConfig base_model_prefix = "rembert" - @property - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - dummy = {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32)} - # Add `encoder_hidden_states` to make the cross-attention layers' weights initialized - if self.config.add_cross_attention: - batch_size, seq_len = tf.constant(DUMMY_INPUTS).shape - shape = (batch_size, seq_len) + (self.config.hidden_size,) - h = tf.random.uniform(shape=shape) - dummy["encoder_hidden_states"] = h - - return dummy - REMBERT_START_DOCSTRING = r""" @@ -1002,27 +982,6 @@ def call( return outputs - # Copied from transformers.models.bert.modeling_tf_bert.TFBertModel.serving_output - def serving_output( - self, output: TFBaseModelOutputWithPoolingAndCrossAttentions - ) -> TFBaseModelOutputWithPoolingAndCrossAttentions: - output_cache = self.config.use_cache and self.config.is_decoder - pkv = tf.convert_to_tensor(output.past_key_values) if output_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - cross_attns = tf.convert_to_tensor(output.cross_attentions) if output.cross_attentions is not None else None - if not (self.config.output_attentions and self.config.add_cross_attention): - cross_attns = None - - return TFBaseModelOutputWithPoolingAndCrossAttentions( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - past_key_values=pkv, - hidden_states=hs, - attentions=attns, - cross_attentions=cross_attns, - ) - @add_start_docstrings("""RemBERT Model with a `language modeling` head on top.""", REMBERT_START_DOCSTRING) class TFRemBertForMaskedLM(TFRemBertPreTrainedModel, TFMaskedLanguageModelingLoss): @@ -1095,12 +1054,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """RemBERT Model with a `language modeling` head on top for CLM fine-tuning.""", REMBERT_START_DOCSTRING @@ -1217,20 +1170,6 @@ def call( cross_attentions=outputs.cross_attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertLMHeadModel.serving_output - def serving_output(self, output: TFCausalLMOutputWithCrossAttentions) -> TFCausalLMOutputWithCrossAttentions: - output_cache = self.config.use_cache and self.config.is_decoder - pkv = tf.convert_to_tensor(output.past_key_values) if output_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - cross_attns = tf.convert_to_tensor(output.cross_attentions) if output.cross_attentions is not None else None - if not (self.config.output_attentions and self.config.add_cross_attention): - cross_attns = None - - return TFCausalLMOutputWithCrossAttentions( - logits=output.logits, past_key_values=pkv, hidden_states=hs, attentions=attns, cross_attentions=cross_attns - ) - @add_start_docstrings( """ @@ -1307,12 +1246,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1331,16 +1264,6 @@ def __init__(self, config: RemBertConfig, *inputs, **kwargs): units=1, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. - - Returns: - tf.Tensor with dummy inputs - """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} - @unpack_inputs @add_start_docstrings_to_model_forward(REMBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( @@ -1419,26 +1342,6 @@ def call( attentions=outputs.attentions, ) - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"), - } - ] - ) - def serving(self, inputs: Dict[str, tf.Tensor]) -> TFMultipleChoiceModelOutput: - output = self.call(input_ids=inputs) - - return self.serving_output(output) - - def serving_output(self, output: TFMultipleChoiceModelOutput) -> TFMultipleChoiceModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMultipleChoiceModelOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1512,12 +1415,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1604,11 +1501,3 @@ def call( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) - - def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFQuestionAnsweringModelOutput( - start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns - ) diff --git a/src/transformers/models/resnet/modeling_tf_resnet.py b/src/transformers/models/resnet/modeling_tf_resnet.py index bb6035adf2df64..4ff1b119d42820 100644 --- a/src/transformers/models/resnet/modeling_tf_resnet.py +++ b/src/transformers/models/resnet/modeling_tf_resnet.py @@ -14,7 +14,7 @@ # limitations under the License. """ TensorFlow ResNet model.""" -from typing import Dict, Optional, Tuple, Union +from typing import Optional, Tuple, Union import tensorflow as tf @@ -276,24 +276,8 @@ class TFResNetPreTrainedModel(TFPreTrainedModel): main_input_name = "pixel_values" @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - VISION_DUMMY_INPUTS = tf.random.uniform(shape=(3, self.config.num_channels, 224, 224), dtype=tf.float32) - return {"pixel_values": tf.constant(VISION_DUMMY_INPUTS)} - - @tf.function( - input_signature=[ - { - "pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - return self.serving_output(output) + def input_signature(self): + return {"pixel_values": tf.TensorSpec(shape=(None, self.config.num_channels, 224, 224), dtype=tf.float32)} RESNET_START_DOCSTRING = r""" @@ -419,16 +403,6 @@ def call( ) return resnet_outputs - def serving_output( - self, output: TFBaseModelOutputWithPoolingAndNoAttention - ) -> TFBaseModelOutputWithPoolingAndNoAttention: - # hidden_states not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions - return TFBaseModelOutputWithPoolingAndNoAttention( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - hidden_states=output.hidden_states, - ) - @add_start_docstrings( """ @@ -492,7 +466,3 @@ def call( return (loss,) + output if loss is not None else output return TFImageClassifierOutputWithNoAttention(loss=loss, logits=logits, hidden_states=outputs.hidden_states) - - def serving_output(self, output: TFImageClassifierOutputWithNoAttention) -> TFImageClassifierOutputWithNoAttention: - # hidden_states not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions - return TFImageClassifierOutputWithNoAttention(logits=output.logits, hidden_states=output.hidden_states) diff --git a/src/transformers/models/roberta/modeling_tf_roberta.py b/src/transformers/models/roberta/modeling_tf_roberta.py index 585c4d31ad0d0e..9b6c491d2761e6 100644 --- a/src/transformers/models/roberta/modeling_tf_roberta.py +++ b/src/transformers/models/roberta/modeling_tf_roberta.py @@ -51,8 +51,6 @@ ) from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( - DUMMY_INPUTS, - MULTIPLE_CHOICE_DUMMY_INPUTS, add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, @@ -777,38 +775,6 @@ class TFRobertaPreTrainedModel(TFPreTrainedModel): config_class = RobertaConfig base_model_prefix = "roberta" - @property - # Copied from transformers.models.bert.modeling_tf_bert.TFBertPreTrainedModel.dummy_inputs - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - dummy = {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32)} - # Add `encoder_hidden_states` to make the cross-attention layers' weights initialized - if self.config.add_cross_attention: - batch_size, seq_len = tf.constant(DUMMY_INPUTS).shape - shape = (batch_size, seq_len) + (self.config.hidden_size,) - h = tf.random.uniform(shape=shape) - dummy["encoder_hidden_states"] = h - - return dummy - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - ROBERTA_START_DOCSTRING = r""" @@ -980,27 +946,6 @@ def call( return outputs - # Copied from transformers.models.bert.modeling_tf_bert.TFBertModel.serving_output - def serving_output( - self, output: TFBaseModelOutputWithPoolingAndCrossAttentions - ) -> TFBaseModelOutputWithPoolingAndCrossAttentions: - output_cache = self.config.use_cache and self.config.is_decoder - pkv = tf.convert_to_tensor(output.past_key_values) if output_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - cross_attns = tf.convert_to_tensor(output.cross_attentions) if output.cross_attentions is not None else None - if not (self.config.output_attentions and self.config.add_cross_attention): - cross_attns = None - - return TFBaseModelOutputWithPoolingAndCrossAttentions( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - past_key_values=pkv, - hidden_states=hs, - attentions=attns, - cross_attentions=cross_attns, - ) - class TFRobertaLMHead(tf.keras.layers.Layer): """Roberta Head for masked language modeling.""" @@ -1131,13 +1076,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMaskedLM.serving_output - def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) - class TFRobertaForCausalLM(TFRobertaPreTrainedModel, TFCausalLanguageModelingLoss): # names with a '.' represents the authorized unexpected/missing layers when a TF model is loaded from a PT model @@ -1260,20 +1198,6 @@ def call( cross_attentions=outputs.cross_attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertLMHeadModel.serving_output - def serving_output(self, output: TFCausalLMOutputWithCrossAttentions) -> TFCausalLMOutputWithCrossAttentions: - output_cache = self.config.use_cache and self.config.is_decoder - pkv = tf.convert_to_tensor(output.past_key_values) if output_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - cross_attns = tf.convert_to_tensor(output.cross_attentions) if output.cross_attentions is not None else None - if not (self.config.output_attentions and self.config.add_cross_attention): - cross_attns = None - - return TFCausalLMOutputWithCrossAttentions( - logits=output.logits, past_key_values=pkv, hidden_states=hs, attentions=attns, cross_attentions=cross_attns - ) - class TFRobertaClassificationHead(tf.keras.layers.Layer): """Head for sentence-level classification tasks.""" @@ -1378,13 +1302,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForSequenceClassification.serving_output - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1407,16 +1324,6 @@ def __init__(self, config, *inputs, **kwargs): 1, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @property - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - tf.Tensor with dummy inputs - """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} - @unpack_inputs @add_start_docstrings_to_model_forward(ROBERTA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( @@ -1485,26 +1392,6 @@ def call( attentions=outputs.attentions, ) - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMultipleChoice.serving_output - def serving_output(self, output: TFMultipleChoiceModelOutput) -> TFMultipleChoiceModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMultipleChoiceModelOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1588,13 +1475,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForTokenClassification.serving_output - def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1686,12 +1566,3 @@ def call( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForQuestionAnswering.serving_output - def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFQuestionAnsweringModelOutput( - start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns - ) diff --git a/src/transformers/models/roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py b/src/transformers/models/roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py index 80a834ad585437..2f98a5f5d0cff4 100644 --- a/src/transformers/models/roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py +++ b/src/transformers/models/roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py @@ -51,8 +51,6 @@ ) from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( - DUMMY_INPUTS, - MULTIPLE_CHOICE_DUMMY_INPUTS, add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, @@ -778,38 +776,6 @@ class TFRobertaPreLayerNormPreTrainedModel(TFPreTrainedModel): config_class = RobertaPreLayerNormConfig base_model_prefix = "roberta_prelayernorm" - @property - # Copied from transformers.models.bert.modeling_tf_bert.TFBertPreTrainedModel.dummy_inputs - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - dummy = {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32)} - # Add `encoder_hidden_states` to make the cross-attention layers' weights initialized - if self.config.add_cross_attention: - batch_size, seq_len = tf.constant(DUMMY_INPUTS).shape - shape = (batch_size, seq_len) + (self.config.hidden_size,) - h = tf.random.uniform(shape=shape) - dummy["encoder_hidden_states"] = h - - return dummy - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - ROBERTA_PRELAYERNORM_START_DOCSTRING = r""" @@ -982,27 +948,6 @@ def call( return outputs - # Copied from transformers.models.bert.modeling_tf_bert.TFBertModel.serving_output - def serving_output( - self, output: TFBaseModelOutputWithPoolingAndCrossAttentions - ) -> TFBaseModelOutputWithPoolingAndCrossAttentions: - output_cache = self.config.use_cache and self.config.is_decoder - pkv = tf.convert_to_tensor(output.past_key_values) if output_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - cross_attns = tf.convert_to_tensor(output.cross_attentions) if output.cross_attentions is not None else None - if not (self.config.output_attentions and self.config.add_cross_attention): - cross_attns = None - - return TFBaseModelOutputWithPoolingAndCrossAttentions( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - past_key_values=pkv, - hidden_states=hs, - attentions=attns, - cross_attentions=cross_attns, - ) - # Copied from transformers.models.roberta.modeling_tf_roberta.TFRobertaLMHead with Roberta->RobertaPreLayerNorm class TFRobertaPreLayerNormLMHead(tf.keras.layers.Layer): @@ -1140,13 +1085,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMaskedLM.serving_output - def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) - # Copied from transformers.models.roberta.modeling_tf_roberta.TFRobertaForCausalLM with ROBERTA->ROBERTA_PRELAYERNORM,Roberta->RobertaPreLayerNorm,roberta->roberta_prelayernorm class TFRobertaPreLayerNormForCausalLM(TFRobertaPreLayerNormPreTrainedModel, TFCausalLanguageModelingLoss): @@ -1276,20 +1214,6 @@ def call( cross_attentions=outputs.cross_attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertLMHeadModel.serving_output - def serving_output(self, output: TFCausalLMOutputWithCrossAttentions) -> TFCausalLMOutputWithCrossAttentions: - output_cache = self.config.use_cache and self.config.is_decoder - pkv = tf.convert_to_tensor(output.past_key_values) if output_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - cross_attns = tf.convert_to_tensor(output.cross_attentions) if output.cross_attentions is not None else None - if not (self.config.output_attentions and self.config.add_cross_attention): - cross_attns = None - - return TFCausalLMOutputWithCrossAttentions( - logits=output.logits, past_key_values=pkv, hidden_states=hs, attentions=attns, cross_attentions=cross_attns - ) - # Copied from transformers.models.roberta.modeling_tf_roberta.TFRobertaClassificationHead with Roberta->RobertaPreLayerNorm class TFRobertaPreLayerNormClassificationHead(tf.keras.layers.Layer): @@ -1398,13 +1322,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForSequenceClassification.serving_output - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1428,16 +1345,6 @@ def __init__(self, config, *inputs, **kwargs): 1, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @property - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - tf.Tensor with dummy inputs - """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} - @unpack_inputs @add_start_docstrings_to_model_forward( ROBERTA_PRELAYERNORM_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length") @@ -1508,26 +1415,6 @@ def call( attentions=outputs.attentions, ) - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMultipleChoice.serving_output - def serving_output(self, output: TFMultipleChoiceModelOutput) -> TFMultipleChoiceModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMultipleChoiceModelOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1612,13 +1499,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForTokenClassification.serving_output - def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1711,12 +1591,3 @@ def call( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForQuestionAnswering.serving_output - def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFQuestionAnsweringModelOutput( - start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns - ) diff --git a/src/transformers/models/roformer/modeling_tf_roformer.py b/src/transformers/models/roformer/modeling_tf_roformer.py index 50b57571461da7..f6067f9237f45e 100644 --- a/src/transformers/models/roformer/modeling_tf_roformer.py +++ b/src/transformers/models/roformer/modeling_tf_roformer.py @@ -50,7 +50,6 @@ ) from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( - MULTIPLE_CHOICE_DUMMY_INPUTS, add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, @@ -835,12 +834,6 @@ def call( return outputs - def serving_output(self, output: TFBaseModelOutput) -> TFBaseModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBaseModelOutput(last_hidden_state=output.last_hidden_state, hidden_states=hs, attentions=attns) - @add_start_docstrings("""RoFormer Model with a `language modeling` head on top.""", ROFORMER_START_DOCSTRING) class TFRoFormerForMaskedLM(TFRoFormerPreTrainedModel, TFMaskedLanguageModelingLoss): @@ -911,12 +904,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """RoFormer Model with a `language modeling` head on top for CLM fine-tuning.""", ROFORMER_START_DOCSTRING @@ -990,12 +977,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFCausalLMOutput) -> TFCausalLMOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFCausalLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) - class TFRoFormerClassificationHead(tf.keras.layers.Layer): """Head for sentence-level classification tasks.""" @@ -1094,12 +1075,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1118,17 +1093,6 @@ def __init__(self, config: RoFormerConfig, *inputs, **kwargs): units=1, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. - - - Returns: - tf.Tensor with dummy inputs - """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} - @unpack_inputs @add_start_docstrings_to_model_forward( ROFORMER_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length") @@ -1203,26 +1167,6 @@ def call( attentions=outputs.attentions, ) - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"), - } - ] - ) - def serving(self, inputs: Dict[str, tf.Tensor]) -> TFMultipleChoiceModelOutput: - output = self.call(input_ids=inputs) - - return self.serving_output(output) - - def serving_output(self, output: TFMultipleChoiceModelOutput) -> TFMultipleChoiceModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMultipleChoiceModelOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1294,12 +1238,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1383,11 +1321,3 @@ def call( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) - - def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFQuestionAnsweringModelOutput( - start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns - ) diff --git a/src/transformers/models/segformer/modeling_tf_segformer.py b/src/transformers/models/segformer/modeling_tf_segformer.py index 47b7ce8e8c5c05..b3090135afc290 100644 --- a/src/transformers/models/segformer/modeling_tf_segformer.py +++ b/src/transformers/models/segformer/modeling_tf_segformer.py @@ -18,7 +18,7 @@ from __future__ import annotations import math -from typing import Dict, Optional, Tuple, Union +from typing import Optional, Tuple, Union import tensorflow as tf @@ -521,34 +521,8 @@ class TFSegformerPreTrainedModel(TFPreTrainedModel): main_input_name = "pixel_values" @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - VISION_DUMMY_INPUTS = tf.random.uniform(shape=(3, self.config.num_channels, 512, 512), dtype=tf.float32) - return {"pixel_values": tf.constant(VISION_DUMMY_INPUTS)} - - @tf.function( - input_signature=[ - { - "pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"), - } - ] - ) - def serving(self, inputs): - """ - Method used for serving the model. - - Args: - inputs (`Dict[str, tf.Tensor]`): - The input of the saved model as a dictionary of tensors. - """ - output = self.call(inputs) - - return self.serving_output(output) + def input_signature(self): + return {"pixel_values": tf.TensorSpec(shape=(None, self.config.num_channels, 512, 512), dtype=tf.float32)} SEGFORMER_START_DOCSTRING = r""" @@ -631,14 +605,6 @@ def call( ) return outputs - def serving_output(self, output: TFBaseModelOutput) -> TFBaseModelOutput: - # hidden_states and attention not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions - return TFBaseModelOutput( - last_hidden_state=output.last_hidden_state, - hidden_states=output.hidden_states, - attentions=output.attentions, - ) - @add_start_docstrings( """ @@ -702,12 +668,6 @@ def call( loss=loss, logits=logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions ) - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - # hidden_states and attention not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions - return TFSequenceClassifierOutput( - logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions - ) - class TFSegformerMLP(tf.keras.layers.Layer): """ @@ -892,9 +852,3 @@ def call( hidden_states=outputs.hidden_states if output_hidden_states else None, attentions=outputs.attentions, ) - - def serving_output(self, output: TFSemanticSegmenterOutput) -> TFSemanticSegmenterOutput: - # hidden_states and attention not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions - return TFSemanticSegmenterOutput( - logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions - ) diff --git a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py index 3651506894c7f5..59caabffab9c16 100755 --- a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py +++ b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py @@ -18,7 +18,7 @@ from __future__ import annotations import random -from typing import Dict, Optional, Tuple, Union +from typing import Optional, Tuple, Union import numpy as np import tensorflow as tf @@ -563,26 +563,6 @@ class TFSpeech2TextPreTrainedModel(TFPreTrainedModel): base_model_prefix = "model" main_input_name = "input_features" - # Overwritten property due to different expected input shape and type - @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - return { - self.main_input_name: tf.random.uniform( - [ - 1, - random.randint(1, self.config.max_source_positions), # time - self.config.input_feat_per_channel * self.config.input_channels, # input channels - ] - ), - "decoder_input_ids": tf.constant([[2, 3]], dtype=tf.int32), - } - def _get_feat_extract_output_lengths(self, input_lengths: tf.Tensor): """ Computes the output length of the convolutional layers @@ -592,20 +572,18 @@ def _get_feat_extract_output_lengths(self, input_lengths: tf.Tensor): return input_lengths - @tf.function( - input_signature=[ - { - "input_features": tf.TensorSpec((None, None, None), tf.float32, name="input_features"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"), - "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) + @property + def input_signature(self): + return { + "input_features": tf.TensorSpec( + (None, None, self.config.input_feat_per_channel * self.config.input_channels), + tf.float32, + name="input_features", + ), + "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), + "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"), + "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"), + } SPEECH_TO_TEXT_START_DOCSTRING = r""" diff --git a/src/transformers/models/swin/modeling_tf_swin.py b/src/transformers/models/swin/modeling_tf_swin.py index f75bf230c0ad37..02ec39edb0fe14 100644 --- a/src/transformers/models/swin/modeling_tf_swin.py +++ b/src/transformers/models/swin/modeling_tf_swin.py @@ -957,29 +957,6 @@ def _set_gradient_checkpointing(self, module, value=False) -> None: if isinstance(module, TFSwinEncoder): module.gradient_checkpointing = value - @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - VISION_DUMMY_INPUTS = tf.random.uniform( - shape=(3, self.config.num_channels, self.config.image_size, self.config.image_size), - dtype=tf.float32, - ) - return {"pixel_values": tf.constant(VISION_DUMMY_INPUTS)} - - @tf.function( - input_signature=[ - { - "pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - return self.serving_output(output) - SWIN_START_DOCSTRING = r""" This model is a Tensorflow @@ -1245,16 +1222,6 @@ def call( return swin_outputs - def serving_output(self, output: TFSwinModelOutput) -> TFSwinModelOutput: - # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions - return TFSwinModelOutput( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - hidden_states=output.hidden_states, - attentions=output.attentions, - reshaped_hidden_states=output.reshaped_hidden_states, - ) - class TFSwinPixelShuffle(tf.keras.layers.Layer): """TF layer implementation of torch.nn.PixelShuffle""" @@ -1410,15 +1377,6 @@ def call( reshaped_hidden_states=outputs.reshaped_hidden_states, ) - def serving_output(self, output: TFSwinMaskedImageModelingOutput) -> TFSwinMaskedImageModelingOutput: - # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions - return TFSwinMaskedImageModelingOutput( - reconstruction=output.reconstruction, - hidden_states=output.hidden_states, - attentions=output.attentions, - reshaped_hidden_states=output.reshaped_hidden_states, - ) - @add_start_docstrings( """ @@ -1493,12 +1451,3 @@ def call( attentions=outputs.attentions, reshaped_hidden_states=outputs.reshaped_hidden_states, ) - - def serving_output(self, output: TFSwinImageClassifierOutput) -> TFSwinImageClassifierOutput: - # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions - return TFSwinImageClassifierOutput( - logits=output.logits, - hidden_states=output.hidden_states, - attentions=output.attentions, - reshaped_hidden_states=output.reshaped_hidden_states, - ) diff --git a/src/transformers/models/t5/modeling_tf_t5.py b/src/transformers/models/t5/modeling_tf_t5.py index 012f0c41b01735..daef8bfb7fddc9 100644 --- a/src/transformers/models/t5/modeling_tf_t5.py +++ b/src/transformers/models/t5/modeling_tf_t5.py @@ -45,8 +45,6 @@ ) from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( - DUMMY_INPUTS, - DUMMY_MASK, ContextManagers, add_start_docstrings, add_start_docstrings_to_model_forward, @@ -867,32 +865,6 @@ class TFT5PreTrainedModel(TFPreTrainedModel): # names with a '.' represents the authorized unexpected/missing layers when a TF model is loaded from a PT model _keys_to_ignore_on_load_unexpected = [r"decoder\Wblock[\W_0]+layer[\W_1]+EncDecAttention\Wrelative_attention_bias"] - @property - def dummy_inputs(self): - inputs = tf.constant(DUMMY_INPUTS, dtype=tf.int32) - input_mask = tf.constant(DUMMY_MASK, dtype=tf.int32) - dummy_inputs = { - "input_ids": inputs, - "decoder_input_ids": inputs, - "decoder_attention_mask": input_mask, - } - return dummy_inputs - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"), - "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - def get_input_embeddings(self): return self.shared @@ -1249,25 +1221,6 @@ def call( encoder_attentions=encoder_outputs.attentions, ) - def serving_output(self, output): - pkv = tf.convert_to_tensor(output.past_key_values[1:]) if self.config.use_cache else None - dec_hs = tf.convert_to_tensor(output.decoder_hidden_states) if self.config.output_hidden_states else None - dec_attns = tf.convert_to_tensor(output.decoder_attentions) if self.config.output_attentions else None - cross_attns = tf.convert_to_tensor(output.cross_attentions) if self.config.output_attentions else None - enc_hs = tf.convert_to_tensor(output.encoder_hidden_states) if self.config.output_hidden_states else None - enc_attns = tf.convert_to_tensor(output.encoder_attentions) if self.config.output_attentions else None - - return TFSeq2SeqModelOutput( - last_hidden_state=output.last_hidden_state, - past_key_values=pkv, - decoder_hidden_states=dec_hs, - decoder_attentions=dec_attns, - encoder_last_hidden_state=output.encoder_last_hidden_state, - cross_attentions=cross_attns, - encoder_hidden_states=enc_hs, - encoder_attentions=enc_attns, - ) - @add_start_docstrings("""T5 Model with a `language modeling` head on top.""", T5_START_DOCSTRING) class TFT5ForConditionalGeneration(TFT5PreTrainedModel, TFCausalLanguageModelingLoss): @@ -1539,10 +1492,6 @@ def __init__(self, config, *inputs, **kwargs): encoder_config.use_cache = False self.encoder = TFT5MainLayer(encoder_config, self.shared, name="encoder") - @property - def dummy_inputs(self): - return {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32)} - def get_encoder(self): return self.encoder @@ -1600,23 +1549,3 @@ def call( hidden_states=encoder_outputs.hidden_states, attentions=encoder_outputs.attentions, ) - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - - # Copied from transformers.models.distilbert.modeling_tf_distilbert.TFDistilBertModel.serving_output - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBaseModelOutput(last_hidden_state=output.last_hidden_state, hidden_states=hs, attentions=attns) diff --git a/src/transformers/models/tapas/modeling_tf_tapas.py b/src/transformers/models/tapas/modeling_tf_tapas.py index b17fddc32720d0..62e77a6678deec 100644 --- a/src/transformers/models/tapas/modeling_tf_tapas.py +++ b/src/transformers/models/tapas/modeling_tf_tapas.py @@ -862,18 +862,13 @@ class TFTapasPreTrainedModel(TFPreTrainedModel): config_class = TapasConfig base_model_prefix = "tapas" - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.float32, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - return self.serving_output(output) + @property + def input_signature(self): + return { + "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), + "attention_mask": tf.TensorSpec((None, None), tf.float32, name="attention_mask"), + "token_type_ids": tf.TensorSpec((None, None, 7), tf.int32, name="token_type_ids"), + } TAPAS_START_DOCSTRING = r""" @@ -1038,17 +1033,6 @@ def call( return outputs - def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling: - hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBaseModelOutputWithPooling( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - hidden_states=hidden_states, - attentions=attentions, - ) - @add_start_docstrings("""Tapas Model with a `language modeling` head on top.""", TAPAS_START_DOCSTRING) class TFTapasForMaskedLM(TFTapasPreTrainedModel, TFMaskedLanguageModelingLoss): @@ -1145,12 +1129,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput: - hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMaskedLMOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions) - class TFTapasComputeTokenLogits(tf.keras.layers.Layer): def __init__(self, config: TapasConfig, **kwargs): @@ -1574,17 +1552,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFTableQuestionAnsweringOutput) -> TFTableQuestionAnsweringOutput: - hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTableQuestionAnsweringOutput( - logits=output.logits, - logits_aggregation=output.logits_aggregation, - hidden_states=hidden_states, - attentions=attentions, - ) - @add_start_docstrings( """ @@ -1687,12 +1654,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions) - """ TAPAS utilities.""" diff --git a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py index decf18b8a7a0bf..2ef67426f87cdb 100644 --- a/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py +++ b/src/transformers/models/transfo_xl/modeling_tf_transfo_xl.py @@ -684,18 +684,6 @@ class TFTransfoXLPreTrainedModel(TFPreTrainedModel): config_class = TransfoXLConfig base_model_prefix = "transformer" - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - @dataclass class TFTransfoXLModelOutput(ModelOutput): @@ -916,17 +904,6 @@ def call( return outputs - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTransfoXLModelOutput( - last_hidden_state=output.last_hidden_state, - mems=tf.convert_to_tensor(output.mems), - hidden_states=hs, - attentions=attns, - ) - @add_start_docstrings( """ @@ -1015,17 +992,6 @@ def call( attentions=transformer_outputs.attentions, ) - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTransfoXLLMHeadModelOutput( - prediction_scores=output.prediction_scores, - mems=tf.convert_to_tensor(output.mems), - hidden_states=hs, - attentions=attns, - ) - def prepare_inputs_for_generation(self, input_ids, past_key_values=None, **model_kwargs): inputs = {} @@ -1157,11 +1123,3 @@ def call( hidden_states=transformer_outputs.hidden_states, attentions=transformer_outputs.attentions, ) - - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTransfoXLSequenceClassifierOutputWithPast( - logits=output.logits, mems=tf.convert_to_tensor(output.mems), hidden_states=hs, attentions=attns - ) diff --git a/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py b/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py index ad39a0ae82bafa..9667c529b56445 100644 --- a/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py +++ b/src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py @@ -29,7 +29,6 @@ from ...modeling_tf_utils import TFCausalLanguageModelingLoss, TFPreTrainedModel, get_initializer, unpack_inputs from ...tf_utils import shape_list from ...utils import ( - DUMMY_INPUTS, ModelOutput, add_start_docstrings, add_start_docstrings_to_model_forward, @@ -254,29 +253,26 @@ def __init__( ) @property - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - decoder_input_ids = tf.constant(DUMMY_INPUTS, dtype=tf.int32) - batch_size, seq_len = decoder_input_ids.shape - - VISION_DUMMY_INPUTS = tf.random.uniform( - shape=( - batch_size, - self.config.encoder.num_channels, - self.config.encoder.image_size, - self.config.encoder.image_size, + def input_signature(self): + vision_config = self.config.encoder + if hasattr(vision_config, "vision_config"): + vision_config = vision_config.vision_config + if hasattr(vision_config, "image_size"): + image_size = vision_config.image_size + else: + image_size = vision_config.input_size + return { + "pixel_values": tf.TensorSpec( + shape=( + None, + vision_config.num_channels, + image_size, + image_size, + ), + dtype=tf.float32, ), - dtype=tf.float32, - ) - pixel_values = tf.constant(VISION_DUMMY_INPUTS) - # Add `decoder_input_ids` because `self.decoder` requires it. - dummy = {"pixel_values": pixel_values, "decoder_input_ids": decoder_input_ids} - return dummy + "decoder_input_ids": tf.TensorSpec(shape=(None, None), dtype=tf.int32, name="decoder_input_ids"), + } def get_encoder(self): return self.encoder diff --git a/src/transformers/models/vit/modeling_tf_vit.py b/src/transformers/models/vit/modeling_tf_vit.py index 6a07719c916c06..727db8dfc6c081 100644 --- a/src/transformers/models/vit/modeling_tf_vit.py +++ b/src/transformers/models/vit/modeling_tf_vit.py @@ -19,7 +19,7 @@ import collections.abc import math -from typing import Dict, Optional, Tuple, Union +from typing import Optional, Tuple, Union import numpy as np import tensorflow as tf @@ -550,38 +550,6 @@ class TFViTPreTrainedModel(TFPreTrainedModel): base_model_prefix = "vit" main_input_name = "pixel_values" - @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - VISION_DUMMY_INPUTS = tf.random.uniform( - shape=(3, self.config.num_channels, self.config.image_size, self.config.image_size), dtype=tf.float32 - ) - return {"pixel_values": tf.constant(VISION_DUMMY_INPUTS)} - - @tf.function( - input_signature=[ - { - "pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"), - } - ] - ) - def serving(self, inputs): - """ - Method used for serving the model. - - Args: - inputs (`Dict[str, tf.Tensor]`): - The input of the saved model as a dictionary of tensors. - """ - output = self.call(inputs) - - return self.serving_output(output) - VIT_START_DOCSTRING = r""" @@ -697,17 +665,6 @@ def call( return outputs - def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBaseModelOutputWithPooling( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - hidden_states=hs, - attentions=attns, - ) - class TFViTPooler(tf.keras.layers.Layer): def __init__(self, config: ViTConfig, **kwargs): @@ -807,9 +764,3 @@ def call( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) - - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) diff --git a/src/transformers/models/vit_mae/modeling_tf_vit_mae.py b/src/transformers/models/vit_mae/modeling_tf_vit_mae.py index 5f5c1a6830d619..e7d7770bcf26d7 100644 --- a/src/transformers/models/vit_mae/modeling_tf_vit_mae.py +++ b/src/transformers/models/vit_mae/modeling_tf_vit_mae.py @@ -21,7 +21,7 @@ import math from copy import deepcopy from dataclasses import dataclass -from typing import Dict, Optional, Tuple, Union +from typing import Optional, Tuple, Union import numpy as np import tensorflow as tf @@ -698,36 +698,6 @@ class TFViTMAEPreTrainedModel(TFPreTrainedModel): base_model_prefix = "vit" main_input_name = "pixel_values" - @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - VISION_DUMMY_INPUTS = tf.random.uniform( - shape=(3, self.config.num_channels, self.config.image_size, self.config.image_size), - dtype=tf.float32, - ) - return {"pixel_values": tf.constant(VISION_DUMMY_INPUTS)} - - @tf.function( - input_signature=[ - { - "pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"), - } - ] - ) - def serving(self, inputs): - """ - Method used for serving the model. - - Args: - inputs (`Dict[str, tf.Tensor]`): - The input of the saved model as a dictionary of tensors. - """ - output = self.call(inputs) - return self.serving_output(output) - VIT_MAE_START_DOCSTRING = r""" This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the @@ -859,18 +829,6 @@ def call( return outputs - def serving_output(self, output: TFViTMAEModelOutput) -> TFViTMAEModelOutput: - hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFViTMAEModelOutput( - last_hidden_state=output.last_hidden_state, - mask=output.mask, - ids_restore=output.ids_restore, - hidden_states=hidden_states, - attentions=attentions, - ) - class TFViTMAEDecoder(tf.keras.layers.Layer): def __init__(self, config, num_patches, **kwargs): @@ -1173,15 +1131,3 @@ def call( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) - - def serving_output(self, output: TFViTMAEForPreTrainingOutput) -> TFViTMAEForPreTrainingOutput: - hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFViTMAEForPreTrainingOutput( - logits=output.logits, - mask=output.mask, - ids_restore=output.ids_restore, - hidden_states=hidden_states, - attentions=attentions, - ) diff --git a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py index 3ee16127b323b4..39e1539e70a787 100644 --- a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py +++ b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py @@ -19,7 +19,7 @@ import warnings from dataclasses import dataclass -from typing import Any, Dict, Optional, Tuple, Union +from typing import Any, Optional, Tuple, Union import numpy as np import tensorflow as tf @@ -1185,14 +1185,18 @@ class TFWav2Vec2PreTrainedModel(TFPreTrainedModel): main_input_name = "input_values" @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - pad_token = 0.0 - input_values = tf.convert_to_tensor(np.random.rand(1, 16000), tf.float32) - dummy_inputs = { - "input_values": input_values, - "attention_mask": tf.cast(tf.not_equal(input_values, pad_token), tf.float32), + def input_signature(self): + return { + "input_values": tf.TensorSpec((None, None), tf.float32, name="input_values"), + "attention_mask": tf.TensorSpec((None, None), tf.float32, name="attention_mask"), + } + + @property + def dummy_inputs(self): + return { + "input_values": tf.random.uniform(shape=(1, 16000), dtype=tf.float32), + "attention_mask": tf.ones(shape=(1, 16000), dtype=tf.float32), } - return dummy_inputs def __init__(self, config, *inputs, **kwargs): super().__init__(config, *inputs, **kwargs) @@ -1201,20 +1205,6 @@ def __init__(self, config, *inputs, **kwargs): "to train/fine-tine this model, you need a GPU or a TPU" ) - @tf.function( - input_signature=[ - { - "input_values": tf.TensorSpec((None, None), tf.float32, name="input_values"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"), - } - ] - ) - def serving(self, inputs): - output = self.call(input_values=inputs, training=False) - - return self.serving_output(output) - def _get_feat_extract_output_lengths(self, input_lengths, add_adapter=None): """ Computes the output length of the convolutional layers @@ -1427,17 +1417,6 @@ def call( return outputs - def serving_output(self, output): - hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFWav2Vec2BaseModelOutput( - last_hidden_state=output.last_hidden_state, - extract_features=output.extract_features, - hidden_states=hidden_states, - attentions=attentions, - ) - @add_start_docstrings( """TFWav2Vec2 Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).""", @@ -1591,11 +1570,6 @@ def call( attentions=outputs.attentions, ) - def serving_output(self, output: TFCausalLMOutput) -> TFCausalLMOutput: - hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - return TFCausalLMOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions) - class TFWav2Vec2ForSequenceClassification(TFWav2Vec2PreTrainedModel): def __init__(self, config): @@ -1693,27 +1667,3 @@ def call( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) - - def serving_output(self, output): - hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput( - logits=output.logits, - hidden_states=hidden_states, - attentions=attentions, - ) - - @tf.function( - input_signature=[ - { - "input_values": tf.TensorSpec((None, None), tf.float32, name="input_values"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"), - } - ] - ) - def serving(self, inputs): - output = self.call(input_values=inputs) - - return self.serving_output(output) diff --git a/src/transformers/models/whisper/modeling_tf_whisper.py b/src/transformers/models/whisper/modeling_tf_whisper.py index 11168df3f9ca7c..b8cd87f67ef03a 100644 --- a/src/transformers/models/whisper/modeling_tf_whisper.py +++ b/src/transformers/models/whisper/modeling_tf_whisper.py @@ -486,18 +486,13 @@ def dummy_inputs(self) -> Dict[str, tf.Tensor]: "decoder_input_ids": tf.constant([[2, 3]], dtype=tf.int32), } - @tf.function( - input_signature=[ - { - "input_features": tf.TensorSpec((None, None, None), tf.float32, name="input_features"), - "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"), - "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - return self.serving_output(output) + @property + def input_signature(self): + return { + "input_features": tf.TensorSpec((None, self.config.num_mel_bins, None), tf.float32, name="input_features"), + "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"), + "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"), + } WHISPER_START_DOCSTRING = r""" diff --git a/src/transformers/models/xglm/modeling_tf_xglm.py b/src/transformers/models/xglm/modeling_tf_xglm.py index 236720ae49df47..6cc9db021cf9ac 100644 --- a/src/transformers/models/xglm/modeling_tf_xglm.py +++ b/src/transformers/models/xglm/modeling_tf_xglm.py @@ -28,7 +28,6 @@ # Public API from ...file_utils import ( - DUMMY_INPUTS, add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, @@ -620,29 +619,6 @@ class TFXGLMPreTrainedModel(TFPreTrainedModel): config_class = XGLMConfig base_model_prefix = "model" - @property - def dummy_inputs(self): - pad_token = 1 - input_ids = tf.cast(tf.convert_to_tensor(DUMMY_INPUTS), tf.int32) - dummy_inputs = { - "input_ids": input_ids, - "attention_mask": tf.cast(input_ids != pad_token, tf.int32), - } - return dummy_inputs - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - XGLM_START_DOCSTRING = r""" This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the @@ -821,24 +797,6 @@ def call( return outputs - def serving_output(self, output): - pkv = tf.convert_to_tensor(output.past_key_values) if self.config.use_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - cross_attns = ( - tf.convert_to_tensor(output.cross_attentions) - if self.config.output_attentions and self.config.add_cross_attention - else None - ) - - return TFBaseModelOutputWithPastAndCrossAttentions( - last_hidden_state=output.hidden_states, - past_key_values=pkv, - hidden_states=hs, - attentions=attns, - cross_attentions=cross_attns, - ) - @add_start_docstrings( """ @@ -971,22 +929,3 @@ def call( attentions=outputs.attentions, cross_attentions=outputs.cross_attentions, ) - - def serving_output(self, output): - pkv = tf.convert_to_tensor(output.past_key_values) if self.config.use_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - cross_attns = ( - tf.convert_to_tensor(output.cross_attentions) - if self.config.output_attentions and self.config.add_cross_attention - else None - ) - - return TFCausalLMOutputWithCrossAttentions( - loss=output.loss, - logits=output.logits, - past_key_values=pkv, - hidden_states=hs, - attentions=attns, - cross_attentions=cross_attns, - ) diff --git a/src/transformers/models/xlm/modeling_tf_xlm.py b/src/transformers/models/xlm/modeling_tf_xlm.py index 1815b27c859529..80a214280cb6f3 100644 --- a/src/transformers/models/xlm/modeling_tf_xlm.py +++ b/src/transformers/models/xlm/modeling_tf_xlm.py @@ -732,13 +732,6 @@ def call( return outputs - # Copied from transformers.models.distilbert.modeling_tf_distilbert.TFDistilBertModel.serving_output - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFBaseModelOutput(last_hidden_state=output.last_hidden_state, hidden_states=hs, attentions=attns) - class TFXLMPredLayer(tf.keras.layers.Layer): """ @@ -876,12 +869,6 @@ def call( logits=outputs, hidden_states=transformer_outputs.hidden_states, attentions=transformer_outputs.attentions ) - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFXLMWithLMHeadModelOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -960,13 +947,6 @@ def call( attentions=transformer_outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForSequenceClassification.serving_output - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1086,28 +1066,6 @@ def call( attentions=transformer_outputs.attentions, ) - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"), - } - ] - ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMultipleChoice.serving - def serving(self, inputs: Dict[str, tf.Tensor]): - output = self.call(input_ids=inputs) - - return self.serving_output(output) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMultipleChoice.serving_output - def serving_output(self, output: TFMultipleChoiceModelOutput) -> TFMultipleChoiceModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMultipleChoiceModelOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1188,13 +1146,6 @@ def call( attentions=transformer_outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForTokenClassification.serving_output - def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1285,12 +1236,3 @@ def call( hidden_states=transformer_outputs.hidden_states, attentions=transformer_outputs.attentions, ) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForQuestionAnswering.serving_output - def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFQuestionAnsweringModelOutput( - start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns - ) diff --git a/src/transformers/models/xlm_roberta/modeling_tf_xlm_roberta.py b/src/transformers/models/xlm_roberta/modeling_tf_xlm_roberta.py index ae2bae7d7aa773..65f3be9e2f277f 100644 --- a/src/transformers/models/xlm_roberta/modeling_tf_xlm_roberta.py +++ b/src/transformers/models/xlm_roberta/modeling_tf_xlm_roberta.py @@ -51,8 +51,6 @@ ) from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( - DUMMY_INPUTS, - MULTIPLE_CHOICE_DUMMY_INPUTS, add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, @@ -868,38 +866,6 @@ class TFXLMRobertaPreTrainedModel(TFPreTrainedModel): config_class = XLMRobertaConfig base_model_prefix = "roberta" - @property - # Copied from transformers.models.bert.modeling_tf_bert.TFBertPreTrainedModel.dummy_inputs - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - dummy = {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int32)} - # Add `encoder_hidden_states` to make the cross-attention layers' weights initialized - if self.config.add_cross_attention: - batch_size, seq_len = tf.constant(DUMMY_INPUTS).shape - shape = (batch_size, seq_len) + (self.config.hidden_size,) - h = tf.random.uniform(shape=shape) - dummy["encoder_hidden_states"] = h - - return dummy - - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - @add_start_docstrings( "The bare XLM RoBERTa Model transformer outputting raw hidden-states without any specific head on top.", @@ -974,27 +940,6 @@ def call( return outputs - # Copied from transformers.models.bert.modeling_tf_bert.TFBertModel.serving_output - def serving_output( - self, output: TFBaseModelOutputWithPoolingAndCrossAttentions - ) -> TFBaseModelOutputWithPoolingAndCrossAttentions: - output_cache = self.config.use_cache and self.config.is_decoder - pkv = tf.convert_to_tensor(output.past_key_values) if output_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - cross_attns = tf.convert_to_tensor(output.cross_attentions) if output.cross_attentions is not None else None - if not (self.config.output_attentions and self.config.add_cross_attention): - cross_attns = None - - return TFBaseModelOutputWithPoolingAndCrossAttentions( - last_hidden_state=output.last_hidden_state, - pooler_output=output.pooler_output, - past_key_values=pkv, - hidden_states=hs, - attentions=attns, - cross_attentions=cross_attns, - ) - # Copied from transformers.models.roberta.modeling_tf_roberta.TFRobertaLMHead with Roberta->XLMRoberta class TFXLMRobertaLMHead(tf.keras.layers.Layer): @@ -1127,13 +1072,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMaskedLM.serving_output - def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( "XLM-RoBERTa Model with a `language modeling` head on top for CLM fine-tuning.", @@ -1261,20 +1199,6 @@ def call( cross_attentions=outputs.cross_attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertLMHeadModel.serving_output - def serving_output(self, output: TFCausalLMOutputWithCrossAttentions) -> TFCausalLMOutputWithCrossAttentions: - output_cache = self.config.use_cache and self.config.is_decoder - pkv = tf.convert_to_tensor(output.past_key_values) if output_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - cross_attns = tf.convert_to_tensor(output.cross_attentions) if output.cross_attentions is not None else None - if not (self.config.output_attentions and self.config.add_cross_attention): - cross_attns = None - - return TFCausalLMOutputWithCrossAttentions( - logits=output.logits, past_key_values=pkv, hidden_states=hs, attentions=attns, cross_attentions=cross_attns - ) - # Copied from transformers.models.roberta.modeling_tf_roberta.TFRobertaClassificationHead with Roberta->XLMRoberta class TFXLMRobertaClassificationHead(tf.keras.layers.Layer): @@ -1381,13 +1305,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForSequenceClassification.serving_output - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1411,16 +1328,6 @@ def __init__(self, config, *inputs, **kwargs): 1, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @property - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - tf.Tensor with dummy inputs - """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} - @unpack_inputs @add_start_docstrings_to_model_forward( XLM_ROBERTA_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length") @@ -1491,26 +1398,6 @@ def call( attentions=outputs.attentions, ) - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMultipleChoice.serving_output - def serving_output(self, output: TFMultipleChoiceModelOutput) -> TFMultipleChoiceModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMultipleChoiceModelOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1595,13 +1482,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForTokenClassification.serving_output - def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1694,12 +1574,3 @@ def call( hidden_states=outputs.hidden_states, attentions=outputs.attentions, ) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForQuestionAnswering.serving_output - def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFQuestionAnsweringModelOutput( - start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns - ) diff --git a/src/transformers/models/xlnet/modeling_tf_xlnet.py b/src/transformers/models/xlnet/modeling_tf_xlnet.py index 1d8a6692c09094..c5f3805ec98747 100644 --- a/src/transformers/models/xlnet/modeling_tf_xlnet.py +++ b/src/transformers/models/xlnet/modeling_tf_xlnet.py @@ -44,7 +44,6 @@ ) from ...tf_utils import check_embeddings_within_bounds, shape_list, stable_softmax from ...utils import ( - MULTIPLE_CHOICE_DUMMY_INPUTS, ModelOutput, add_code_sample_docstrings, add_start_docstrings, @@ -1177,15 +1176,6 @@ def call( return outputs - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - mems = tf.convert_to_tensor(output.mems) if output.mems is not None else None - - return TFXLNetModelOutput( - last_hidden_state=output.last_hidden_state, mems=mems, hidden_states=hs, attentions=attns - ) - @add_start_docstrings( """ @@ -1345,13 +1335,6 @@ def call( attentions=transformer_outputs.attentions, ) - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - mems = tf.convert_to_tensor(output.mems) if output.mems is not None else None - - return TFXLNetLMHeadModelOutput(logits=output.logits, mems=mems, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1439,15 +1422,6 @@ def call( attentions=transformer_outputs.attentions, ) - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - mems = tf.convert_to_tensor(output.mems) if output.mems is not None else None - - return TFXLNetForSequenceClassificationOutput( - logits=output.logits, mems=mems, hidden_states=hs, attentions=attns - ) - @add_start_docstrings( """ @@ -1468,16 +1442,6 @@ def __init__(self, config, *inputs, **kwargs): 1, kernel_initializer=get_initializer(config.initializer_range), name="logits_proj" ) - @property - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - tf.Tensor with dummy inputs - """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int32)} - @unpack_inputs @add_start_docstrings_to_model_forward(XLNET_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( @@ -1559,27 +1523,6 @@ def call( attentions=transformer_outputs.attentions, ) - @tf.function( - input_signature=[ - { - "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"), - } - ] - ) - def serving(self, inputs): - output = self.call(inputs) - - return self.serving_output(output) - - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - mems = tf.convert_to_tensor(output.mems) if output.mems is not None else None - - return TFXLNetForMultipleChoiceOutput(logits=output.logits, mems=mems, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1660,13 +1603,6 @@ def call( attentions=transformer_outputs.attentions, ) - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - mems = tf.convert_to_tensor(output.mems) if output.mems is not None else None - - return TFXLNetForTokenClassificationOutput(logits=output.logits, mems=mems, hidden_states=hs, attentions=attns) - @add_start_docstrings( """ @@ -1760,16 +1696,3 @@ def call( hidden_states=transformer_outputs.hidden_states, attentions=transformer_outputs.attentions, ) - - def serving_output(self, output): - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - mems = tf.convert_to_tensor(output.mems) if output.mems is not None else None - - return TFXLNetForQuestionAnsweringSimpleOutput( - start_logits=output.start_logits, - end_logits=output.end_logits, - mems=mems, - hidden_states=hs, - attentions=attns, - ) diff --git a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py index 80e2d8ed1e0978..a0da6fc492f68e 100644 --- a/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py +++ b/templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_tf_{{cookiecutter.lowercase_modelname}}.py @@ -803,23 +803,6 @@ class TF{{cookiecutter.camelcase_modelname}}PreTrainedModel(TFPreTrainedModel): config_class = {{cookiecutter.camelcase_modelname}}Config base_model_prefix = "{{cookiecutter.lowercase_modelname}}" - @property - def dummy_inputs(self): - """ - Dummy inputs to build the network. - - Returns: - `Dict[str, tf.Tensor]`: The dummy inputs. - """ - dummy = {"input_ids": tf.constant(DUMMY_INPUTS, dtype=tf.int64)} - # Add `encoder_hidden_states` to make the cross-attention layers' weights initialized - if self.config.add_cross_attention: - batch_size, seq_len = tf.constant(DUMMY_INPUTS).shape - shape = (batch_size, seq_len) + (self.config.hidden_size,) - h = tf.random.uniform(shape=shape) - dummy["encoder_hidden_states"] = h - - return dummy {{cookiecutter.uppercase_modelname}}_START_DOCSTRING = r""" @@ -991,24 +974,6 @@ def call( return outputs - def serving_output( - self, output: TFBaseModelOutputWithPastAndCrossAttentions - ) -> TFBaseModelOutputWithPastAndCrossAttentions: - output_cache = self.config.use_cache and self.config.is_decoder - pkv = tf.convert_to_tensor(output.past_key_values) if output_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - cross_attns = tf.convert_to_tensor(output.cross_attentions) if output.cross_attentions is not None else None - if not (self.config.output_attentions and self.config.add_cross_attention): - cross_attns = None - - return TFBaseModelOutputWithPastAndCrossAttentions( - last_hidden_state=output.last_hidden_state, - past_key_values=pkv, - hidden_states=hs, - attentions=attns, - cross_attentions=cross_attns, - ) @add_start_docstrings("""{{cookiecutter.modelname}} Model with a `language modeling` head on top. """, {{cookiecutter.uppercase_modelname}}_START_DOCSTRING) @@ -1084,13 +1049,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMaskedLM.serving_output - def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """{{cookiecutter.modelname}} Model with a `language modeling` head on top for CLM fine-tuning. """, {{cookiecutter.uppercase_modelname}}_START_DOCSTRING @@ -1206,19 +1164,6 @@ def call( cross_attentions=outputs.cross_attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertLMHeadModel.serving_output - def serving_output(self, output: TFCausalLMOutputWithCrossAttentions) -> TFCausalLMOutputWithCrossAttentions: - output_cache = self.config.use_cache and self.config.is_decoder - pkv = tf.convert_to_tensor(output.past_key_values) if output_cache else None - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - cross_attns = tf.convert_to_tensor(output.cross_attentions) if output.cross_attentions is not None else None - if not (self.config.output_attentions and self.config.add_cross_attention): - cross_attns = None - - return TFCausalLMOutputWithCrossAttentions( - logits=output.logits, past_key_values=pkv, hidden_states=hs, attentions=attns, cross_attentions=cross_attns - ) class TF{{cookiecutter.camelcase_modelname}}ClassificationHead(tf.keras.layers.Layer): @@ -1318,13 +1263,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForSequenceClassification.serving_output - def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """{{cookiecutter.modelname}} Model with a multiple choice classification head on top (a linear layer on top of @@ -1343,16 +1281,6 @@ def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, *inputs, units=1, kernel_initializer=get_initializer(config.initializer_range), name="classifier" ) - @property - def dummy_inputs(self) -> Dict[str, tf.Tensor]: - """ - Dummy inputs to build the network. - - Returns: - tf.Tensor with dummy inputs - """ - return {"input_ids": tf.constant(MULTIPLE_CHOICE_DUMMY_INPUTS, dtype=tf.int64)} - @unpack_inputs @add_start_docstrings_to_model_forward({{cookiecutter.uppercase_modelname}}_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length")) @add_code_sample_docstrings( @@ -1441,24 +1369,6 @@ def call( attentions=outputs.attentions, ) - @tf.function(input_signature=[{ - "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), - "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), - "token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"), - }]) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMultipleChoice.serving - def serving(self, inputs: Dict[str, tf.Tensor]) -> TFMultipleChoiceModelOutput: - output = self.call(input_ids=inputs) - - return self.serving_output(output) - - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMultipleChoice.serving_output - def serving_output(self, output: TFMultipleChoiceModelOutput) -> TFMultipleChoiceModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFMultipleChoiceModelOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """{{cookiecutter.modelname}} Model with a token classification head on top (a linear layer on top of @@ -1532,13 +1442,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForTokenClassification.serving_output - def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) - @add_start_docstrings( """{{cookiecutter.modelname}} Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear @@ -1625,14 +1528,6 @@ def call( attentions=outputs.attentions, ) - # Copied from transformers.models.bert.modeling_tf_bert.TFBertForQuestionAnswering.serving_output - def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput: - hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None - attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None - - return TFQuestionAnsweringModelOutput( - start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns - ) {% else %} import random @@ -2777,26 +2672,6 @@ def call( return outputs - # Copied from transformers.models.bart.modeling_tf_bart.TFBartModel.serving_output - def serving_output(self, output): - pkv = tf.tuple(output.past_key_values)[1] if self.config.use_cache else None - dec_hs = tf.convert_to_tensor(output.decoder_hidden_states) if self.config.output_hidden_states else None - dec_attns = tf.convert_to_tensor(output.decoder_attentions) if self.config.output_attentions else None - cross_attns = tf.convert_to_tensor(output.cross_attentions) if self.config.output_attentions else None - enc_hs = tf.convert_to_tensor(output.encoder_hidden_states) if self.config.output_hidden_states else None - enc_attns = tf.convert_to_tensor(output.encoder_attentions) if self.config.output_attentions else None - - return TFSeq2SeqModelOutput( - last_hidden_state=output.last_hidden_state, - past_key_values=pkv, - decoder_hidden_states=dec_hs, - decoder_attentions=dec_attns, - cross_attentions=cross_attns, - encoder_last_hidden_state=output.encoder_last_hidden_state, - encoder_hidden_states=enc_hs, - encoder_attentions=enc_attns, - ) - # Copied from transformers.models.bart.modeling_tf_bart.BiasLayer class BiasLayer(tf.keras.layers.Layer): @@ -2944,26 +2819,6 @@ def call( encoder_attentions=outputs.encoder_attentions, # 2 of e out ) - # Copied from transformers.models.bart.modeling_tf_bart.TFBartForConditionalGeneration.serving_output - def serving_output(self, output): - pkv = tf.tuple(output.past_key_values)[1] if self.config.use_cache else None - dec_hs = tf.convert_to_tensor(output.decoder_hidden_states) if self.config.output_hidden_states else None - dec_attns = tf.convert_to_tensor(output.decoder_attentions) if self.config.output_attentions else None - cross_attns = tf.convert_to_tensor(output.cross_attentions) if self.config.output_attentions else None - enc_hs = tf.convert_to_tensor(output.encoder_hidden_states) if self.config.output_hidden_states else None - enc_attns = tf.convert_to_tensor(output.encoder_attentions) if self.config.output_attentions else None - - return TFSeq2SeqLMOutput( - logits=output.logits, - past_key_values=pkv, - decoder_hidden_states=dec_hs, - decoder_attentions=dec_attns, - cross_attentions=cross_attns, - encoder_last_hidden_state=output.encoder_last_hidden_state, - encoder_hidden_states=enc_hs, - encoder_attentions=enc_attns, - ) - def prepare_inputs_for_generation( self, decoder_input_ids, diff --git a/tests/test_modeling_tf_common.py b/tests/test_modeling_tf_common.py index 02d5077e233cd0..69363686837b8a 100644 --- a/tests/test_modeling_tf_common.py +++ b/tests/test_modeling_tf_common.py @@ -1677,7 +1677,10 @@ def test_int_support(self): # After testing that the model accepts all int inputs, confirm that its dummies are int32 for key, tensor in model.dummy_inputs.items(): - self.assertTrue(isinstance(tensor, tf.Tensor), "Dummy inputs should be tf.Tensor!") + self.assertTrue( + isinstance(tensor, tf.Tensor) or tf.keras.backend.is_keras_tensor(tensor), + "Dummy inputs should be tf.Tensor!", + ) if tensor.dtype.is_integer: self.assertTrue(tensor.dtype == tf.int32, "Integer dummy inputs should be tf.int32!")