-
Notifications
You must be signed in to change notification settings - Fork 0
/
Makefile
119 lines (87 loc) · 3.5 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# platform
UNAME := $(shell uname)
# cc code
SRC = src
BUILD = build
CXXFLAGS += -std=c++17 -Wall -Werror -O2 -DNDEBUG
CXXFLAGS += -I${INSTALL_SP}/include
LDFLAGS += -L${INSTALL_SP}/lib -lsentencepiece
LDFLAGS += -Wl,-rpath,${INSTALL_SP}/lib
# sentencepiece related
SRC_SP = third_party/sentencepiece
BUILD_SP = build_sentencepiece
INSTALL_SP = install_sentencepiece
# demo related
INSTALL_DEMO_MODEL = install_demo_model
LIB_PATH_SET = DYLD_LIBRARY_PATH=${INSTALL_SP}/lib LD_LIBRARY_PATH=${INSTALL_SP}/lib
# mistral related
INSTALL_MISTRAL = install_mistral
# clang-format
CLANG_EXTS = -iname *.h -o -iname *.c -o -name *.cc
CLANG_FMT = clang-format -i --style=file
FMT = sh -c 'find "$$@" ${CLANG_EXTS} | xargs ${CLANG_FMT}' sh
FMT_FOLDERS = src
# ==================================================================================================
# actions
# ---------
# bootstrap
bootstrap: bootstrap_sentencepiece bootstrap_demo_model bootstrap_mistral
bootstrap_sentencepiece: ${INSTALL_SP}/lib/libsentencepiece.a
${INSTALL_SP}/lib/libsentencepiece.a:
git submodule update --init && \
mkdir -p ${BUILD_SP} && \
cd ${BUILD_SP} && \
cmake ../${SRC_SP} && \
make -j && \
cmake --install . --prefix ../${INSTALL_SP}
bootstrap_demo_model: bootstrap_sentencepiece ${INSTALL_DEMO_MODEL}/shakespeare.model
echo "Hello" | ${LIB_PATH_SET} ${INSTALL_SP}/bin/spm_encode --model=${INSTALL_DEMO_MODEL}/shakespeare.model --output_format=piece
${INSTALL_DEMO_MODEL}/data.txt:
mkdir -p ${INSTALL_DEMO_MODEL} && \
wget https://raw.githubusercontent.com/brunoklein99/deep-learning-notes/master/shakespeare.txt -O ${INSTALL_DEMO_MODEL}/data.txt
${INSTALL_DEMO_MODEL}/shakespeare.model: ${INSTALL_DEMO_MODEL}/data.txt
${LIB_PATH_SET} ${INSTALL_SP}/bin/spm_train --input=${INSTALL_DEMO_MODEL}/data.txt --model_prefix=${INSTALL_DEMO_MODEL}/shakespeare --vocab_size=8000 --model_type=bpe
bootstrap_mistral: ${INSTALL_MISTRAL}/tokenizer.model
${INSTALL_MISTRAL}/tokenizer.model:
mkdir -p ${INSTALL_MISTRAL} && \
wget https://github.com/stevenchen-db/steven-jianwei/releases/download/v0.0.1/tokenizer.model -O ${INSTALL_MISTRAL}/tokenizer.model
.PHONY: bootstrap_python
bootstrap_python:
@echo "Checking for Python3..."
@command -v python3 >/dev/null 2>&1 || { \
echo "Python3 not found. Installing..."; \
sudo apt-get update; \
sudo apt-get install -y python3 python3-pip; \
}
@echo "Python3 is installed."
@echo "Installing torch..."
@python3 -m pip install torch
@echo "torch is installed."
@python3 -m pip install transformers
@echo "transformers is installed"
# ---------
# cc
compile: encoder_demo encoder_mistral
.PHONY: compile
encoder_demo: ${BUILD}/encoder_demo
run_encoder_demo: ${BUILD}/encoder_demo
$<
encoder_mistral: ${BUILD}/encoder_mistral
run_encoder_mistral: ${BUILD}/encoder_mistral
$<
.PHONY: run_encoder_mistral_py
run_encoder_mistral_py:
python3 ${SRC}/mistral_tokenize.py
.PHONY: encoder_demo run_encoder_demo encoder_mistral run_encoder_mistral
${BUILD}/encoder_demo: ${SRC}/encoder_main.cc | ${BUILD}
${CXX} -o $@ ${CXXFLAGS} -DMODEL_FILE='"${INSTALL_DEMO_MODEL}/shakespeare.model"' $< ${LDFLAGS}
${BUILD}/encoder_mistral: ${SRC}/encoder_main.cc | ${BUILD}
${CXX} -o $@ ${CXXFLAGS} -DMODEL_FILE='"${INSTALL_MISTRAL}/tokenizer.model"' $< ${LDFLAGS}
${BUILD}:
mkdir -p ${BUILD}
# ------
# format
fmt:
${FMT} ${FMT_FOLDERS}
clean:
rm -rf ${BUILD} ${BUILD_SP}