Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#56 from smallv0221/yxp0301
Browse files Browse the repository at this point in the history
Fix tokenizer_util padding
  • Loading branch information
guoshengCS authored Mar 2, 2021
2 parents 054c95e + f32192c commit f4d4eac
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions paddlenlp/transformers/tokenizer_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,9 +766,9 @@ def get_input_ids(text):
encoded_inputs["attention_mask"] = [1] * len(encoded_inputs[
"input_ids"]) + [0] * difference
if return_token_type_ids:
# 0 for padding token mask
encoded_inputs["token_type_ids"] = (
encoded_inputs["token_type_ids"] + [0] * difference)
encoded_inputs["token_type_ids"] +
[self.pad_token_type_id] * difference)
if return_special_tokens_mask:
encoded_inputs["special_tokens_mask"] = encoded_inputs[
"special_tokens_mask"] + [1] * difference
Expand All @@ -780,9 +780,9 @@ def get_input_ids(text):
1
] * len(encoded_inputs["input_ids"])
if return_token_type_ids:
# 0 for padding token mask
encoded_inputs["token_type_ids"] = (
[0] * difference + encoded_inputs["token_type_ids"])
[self.pad_token_type_id] * difference +
encoded_inputs["token_type_ids"])
if return_special_tokens_mask:
encoded_inputs["special_tokens_mask"] = [
1
Expand Down

0 comments on commit f4d4eac

Please sign in to comment.