Skip to content

Commit

Permalink
add cut with tag functions
Browse files Browse the repository at this point in the history
  • Loading branch information
zhujun1980 committed May 30, 2018
1 parent b3f177d commit ef6eecb
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 0 deletions.
34 changes: 34 additions & 0 deletions lib/jieba.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ extern "C" {
#include "jieba.h"
}

#include <cstring>
#include "cppjieba/Jieba.hpp"
#include "cppjieba/KeywordExtractor.hpp"

Expand Down Expand Up @@ -41,6 +42,39 @@ CJiebaWord* Cut(Jieba handle, const char* sentence, size_t len) {
return res;
}

CJiebaWordWithTag* CutWithTag(Jieba handle, const char* sentence, size_t len) {
cppjieba::Jieba* x = (cppjieba::Jieba*)handle;
vector<pair<string, string> > tag_words;
x->Tag(string(sentence, len), tag_words);

size_t i, offset = 0, buf_size = 0;
for(i = 0; i < tag_words.size(); i++) {
buf_size += sizeof(CJiebaWordWithTag) + tag_words[i].second.size() + 1;
}
buf_size += sizeof(CJiebaWordWithTag);

CJiebaWordWithTag* res = (CJiebaWordWithTag*)malloc(buf_size);
memset(res, '\0', buf_size);

char *ptr = reinterpret_cast<char*>(res);
for(i = 0; i < tag_words.size(); i++) {
CJiebaWordWithTag *current = (CJiebaWordWithTag*)ptr;
current->word = sentence + offset;
current->len = tag_words[i].first.size();
memcpy(current->tag, tag_words[i].second.data(), tag_words[i].second.size());
ptr += sizeof(CJiebaWordWithTag) + tag_words[i].second.size() + 1;
offset += tag_words[i].first.size();
}
CJiebaWordWithTag *current = (CJiebaWordWithTag*)ptr;
current->word = NULL;
current->len = 0;
return res;
}

void FreeWordTag(CJiebaWordWithTag* words) {
free(words);
}

CJiebaWord*
CutWithoutTagName(Jieba handle, const char* sentence, size_t len, const char* tagname)
{
Expand Down
10 changes: 10 additions & 0 deletions lib/jieba.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,22 @@ typedef struct {
size_t len;
} CJiebaWord;

typedef struct {
const char* word;
size_t len;
char tag[0];
} CJiebaWordWithTag;

CJiebaWord* Cut(Jieba handle, const char* sentence, size_t len);

CJiebaWord* CutWithoutTagName(Jieba, const char*, size_t, const char*);

void FreeWords(CJiebaWord* words);

CJiebaWordWithTag* CutWithTag(Jieba, const char*, size_t);

void FreeWordTag(CJiebaWordWithTag* words);

bool JiebaInsertUserWord(Jieba handle, const char* word);

typedef void* Extractor;
Expand Down

0 comments on commit ef6eecb

Please sign in to comment.