Skip to content

Commit

Permalink
fix the simdjson issue on gcc
Browse files Browse the repository at this point in the history
  • Loading branch information
wenbingl committed Jan 29, 2024
1 parent b994688 commit 5e96ecd
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 6 deletions.
4 changes: 2 additions & 2 deletions tfmtok/bpe_encoder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class BpeEncoder {
}

auto ewsuffix = model_node.at_key("end_of_word_suffix");
if (ewsuffix.is_string()) {
if (ewsuffix.is_string()) {
end_of_word_suffix_ = model_node.at_key("end_of_word_suffix").get_c_str().value();
}

Expand Down Expand Up @@ -155,7 +155,7 @@ class BpeEncoder {
}
}

uint32_t GetTokenId(const std::string& key) const{
uint32_t GetTokenId(const std::string& key) const {
auto it = vocab_map_.find(key);
if (it != end(vocab_map_)) {
return it->second;
Expand Down
9 changes: 5 additions & 4 deletions tfmtok/token_bpe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,10 @@ void BPETokenizer::LoadPredefinedTokens(const TokenConfig& config) {
}

TfmStatus BPETokenizer::DecodeExtraArgs(const simdjson::dom::element& root) {
const simdjson::dom::element& decoder_obj = root.at_key("decoder");
if (decoder_obj.is_null()) {
return {kTfmErrorInvalidFile, "Cannot find the decoder key in the the tokenizer.json"};
simdjson::dom::element decoder_obj;
auto error = root.at_key("decoder").get(decoder_obj);
if (error != simdjson::SUCCESS && error != simdjson::NO_SUCH_FIELD) {
return {kTfmErrorInvalidFile, "Cannot parse the decoder section in the the tokenizer.json"};
}
TryToGetJson(decoder_obj, "add_prefix_space", decode_extra_args_.add_prefix_space);
return TfmStatus::OK();
Expand Down Expand Up @@ -377,7 +378,7 @@ TfmStatus BPETokenizer::Decode(const span<tfmTokenId_t const>& ids, std::string&
if (byte_decoder_.count(wchr) == 0 && wchr <= 0xFF) {
// std::cout << "Error: cannot find the byte_decoder_ for " << (uint32_t)(unsigned char)wchr << std::endl;
decoded_token.push_back(gsl::narrow<unsigned char>(wchr));
}else {
} else {
unsigned char uchr = byte_decoder_.at(wchr);
decoded_token.push_back(uchr);
}
Expand Down

0 comments on commit 5e96ecd

Please sign in to comment.