Making make install work better by default. (huggingface#2004)

# What does this PR do? Making `make install` a much better sane default to start local dev environments.   Fixes # (issue) ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests), Pull Request section? - [ ] Was this discussed/approved via a Github issue or the [forum](https://discuss.huggingface.co/)? Please add a link to it if that's the case. - [ ] Did you make sure to update the documentation with your changes? Here are the [documentation guidelines](https://github.com/huggingface/transformers/tree/main/docs), and [here are tips on formatting docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation). - [ ] Did you write any new necessary tests? ## Who can review? Anyone in the community is free to review the PR once the tests have passed. Feel free to tag members/contributors who may be interested in your PR.
yuanwu2017 · Sep 24, 2024 · ed89135 · ed89135
1 parent 648dd7b
commit ed89135
Show file tree

Hide file tree

Showing 9 changed files with 347 additions and 299 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -20,6 +20,10 @@ tokenizers = { version = "0.19.1", features = ["http"] }
 hf-hub = { version = "0.3.1", features = ["tokio"] }
 
 [profile.release]
+incremental = true
+
+[profile.release-binary]
+inherits = "release"
 debug = 1
 incremental = true
 lto = "fat"

diff --git a/Makefile b/Makefile
@@ -25,6 +25,10 @@ router-dev:
 rust-tests: install-router install-launcher
 	cargo test
 
+install-integration-tests:
+	cd integration-tests && pip install -r requirements.txt
+	cd clients/python && pip install .
+
 integration-tests: install-integration-tests
 	pytest -s -vv -m "not private" integration-tests
 

diff --git a/router/client/build.rs b/router/client/build.rs
@@ -13,7 +13,11 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         .out_dir("src/v2/pb")
         .include_file("mod.rs")
         .compile_with_config(config, &["../../proto/generate.proto"], &["../../proto"])
-        .unwrap_or_else(|e| panic!("protobuf compilation failed: {e}"));
+        .map_err(|e| match e.kind(){
+            std::io::ErrorKind::NotFound => {panic!("`protoc` not found, install libprotoc")},
+            std::io::ErrorKind::Other => {panic!("`protoc` version unsupported, upgrade protoc: https://github.com/protocolbuffers/protobuf/releases")},
+            e => {e}
+        }).unwrap_or_else(|e| panic!("protobuf compilation failed: {e}"));
 
     fs::create_dir_all("src/v3/pb").unwrap_or(());
     let mut config = prost_build::Config::new();

diff --git a/server/Makefile b/server/Makefile
@@ -10,18 +10,26 @@ unit-tests:
 
 gen-server:
 	# Compile protos
-	pip install grpcio-tools==1.51.1 mypy-protobuf==3.4.0 'types-protobuf>=3.20.4' --no-cache-dir
+	pip install grpcio-tools==1.62.2 mypy-protobuf==3.6.0 'types-protobuf' --no-cache-dir
 	mkdir text_generation_server/pb || true
 	python -m grpc_tools.protoc -I../proto/v3 --python_out=text_generation_server/pb \
 		--grpc_python_out=text_generation_server/pb --mypy_out=text_generation_server/pb ../proto/v3/generate.proto
 	find text_generation_server/pb/ -type f -name "*.py" -print0 -exec sed -i -e 's/^\(import.*pb2\)/from . \1/g' {} \;
 	touch text_generation_server/pb/__init__.py
 
-install: gen-server
+install-server: gen-server
 	pip install pip --upgrade
 	pip install -r requirements_cuda.txt
 	pip install -e ".[bnb, accelerate, quantize, peft, outlines]"
 
+
+install: install-cuda
+	echo "Installed server"
+
+install-cuda: install-server install-flash-attention-v2-cuda install-vllm-cuda install-flash-attention
+
+install-rocm: install-server install-flash-attention-v2-rocm  install-vllm-rocm
+
 run-dev:
 	SAFETENSORS_FAST_GPU=1 python -m torch.distributed.run --nproc_per_node=2 text_generation_server/cli.py serve bigscience/bloom-560m --sharded
 

diff --git a/server/Makefile-flash-att b/server/Makefile-flash-att
@@ -1,16 +1,14 @@
 flash_att_commit := 3a9bfd076f98746c73362328958dbc68d145fbec
 
-flash-attention:
-    # Clone flash attention
-	pip install -U packaging ninja  --no-cache-dir
-	git clone https://github.com/HazyResearch/flash-attention.git
-
-build-flash-attention: flash-attention
-	cd flash-attention && git fetch && git checkout $(flash_att_commit)
-	cd flash-attention && python setup.py build
-	cd flash-attention/csrc/rotary && python setup.py build
-	cd flash-attention/csrc/layer_norm && python setup.py build
+build-flash-attention:
+	if [ ! -d 'flash-attention' ]; then \
+		pip install -U packaging ninja  --no-cache-dir && \
+		git clone https://github.com/HazyResearch/flash-attention.git && \
+		cd flash-attention && git fetch && git checkout $(flash_att_commit) && \
+		MAX_JOBS=8 python setup.py build && cd csrc/layer_norm && python setup.py build && cd ../rotary && python setup.py build; \
+	fi
 
 install-flash-attention: build-flash-attention
-	pip uninstall flash_attn rotary_emb dropout_layer_norm -y || true
-	cd flash-attention && python setup.py install && cd csrc/layer_norm && python setup.py install && cd ../rotary && python setup.py install
+	if [ ! -d 'flash-attention' ]; then \
+		cd flash-attntion && python setup.py install && cd csrc/layer_norm && python setup.py install && cd ../rotary && python setup.py install; \
+	fi
diff --git a/server/Makefile-flash-att-v2 b/server/Makefile-flash-att-v2
@@ -1,29 +1,24 @@
-flash_att_v2_commit_cuda := v2.5.8
+flash_att_v2_commit_cuda := v2.5.9.post1
 flash_att_v2_commit_rocm := 2554f490101742ccdc56620a938f847f61754be6
 
+build-flash-attention-v2-cuda:
+	pip install -U packaging wheel
+	pip install flash-attn==$(flash_att_v2_commit_cuda)
 
-flash-attention-v2-cuda:
-  # Clone flash attention
-	pip install -U packaging ninja  --no-cache-dir
-	git clone https://github.com/Dao-AILab/flash-attention.git flash-attention-v2
+install-flash-attention-v2-cuda:
+	pip install -U packaging wheel
+	pip install flash-attn==$(flash_att_v2_commit_cuda)
 
-build-flash-attention-v2-cuda: flash-attention-v2-cuda
-	cd flash-attention-v2 && git fetch && git checkout $(flash_att_v2_commit_cuda)
-	cd flash-attention-v2 && git submodule update --init --recursive
-	cd flash-attention-v2 && python setup.py build
-
-install-flash-attention-v2-cuda: build-flash-attention-v2-cuda
-	cd flash-attention-v2 && git submodule update --init --recursive && python setup.py install
-
-flash-attention-v2-rocm:
-  # Clone flash attention
-	pip install -U packaging ninja  --no-cache-dir
-	git clone https://github.com/ROCm/flash-attention.git flash-attention-v2
-
-build-flash-attention-v2-rocm: flash-attention-v2-rocm
-	cd flash-attention-v2 && git fetch && git checkout $(flash_att_v2_commit_rocm)
-	cd flash-attention-v2 && git submodule update --init --recursive
-	cd flash-attention-v2 && GPU_ARCHS="gfx90a;gfx942" PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py build
+build-flash-attention-v2-rocm:
+	if [ ! -d 'flash-attention-v2' ]; then \
+		pip install -U packaging ninja  --no-cache-dir && \
+		git clone https://github.com/ROCm/flash-attention.git flash-attention-v2 && \
+		cd flash-attention-v2 && git fetch && git checkout $(flash_att_v2_commit_rocm) && \
+		git submodule update --init --recursive && GPU_ARCHS="gfx90a;gfx942" PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py build; \
+	fi
 
 install-flash-attention-v2-rocm: build-flash-attention-v2-rocm
-	cd flash-attention-v2 && git submodule update --init --recursive && python setup.py install
+	if [ ! -d 'flash-attention-v2' ]; then \
+		cd flash-attention-v2 &&  \
+		GPU_ARCHS="gfx90a;gfx942" PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py install; \
+	fi
diff --git a/server/Makefile-vllm b/server/Makefile-vllm
@@ -1,25 +1,26 @@
-vllm-cuda:
-    # Clone vllm
-	pip install -U ninja packaging --no-cache-dir
-	git clone https://github.com/Narsil/vllm.git vllm
-
-build-vllm-cuda: vllm-cuda
-	cd vllm && git fetch && git checkout b5dfc61db88a81069e45b44f7cc99bd9e62a60fa
-	cd vllm && python setup.py build
-
+build-vllm-cuda:
+	if [ ! -d 'vllm' ]; then \
+		pip install -U ninja packaging --no-cache-dir && \
+		git clone https://github.com/Narsil/vllm.git vllm  &&\
+		cd vllm  && \
+		git fetch && git checkout b5dfc61db88a81069e45b44f7cc99bd9e62a60fa &&\
+		python setup.py build; \
+	fi
 install-vllm-cuda: build-vllm-cuda
-	pip uninstall vllm -y || true
-	cd vllm && python setup.py install
-
-vllm-rocm:
-    # Clone vllm
-	pip install -U ninja packaging --no-cache-dir
-	git clone https://github.com/fxmarty/rocm-vllm.git vllm
+	if [ ! -d 'vllm' ]; then \
+		cd vllm && pip install -e .; \
+	fi
 
-build-vllm-rocm: vllm-rocm
-	cd vllm && git fetch && git checkout ca6913b3c2ffacdcb7d15e914dc34adbc6c89479
-	cd vllm && PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py install
+build-vllm-rocm:
+	if [ ! -d 'vllm' ]; then \
+		pip install -U ninja packaging --no-cache-dir && \
+		git clone https://github.com/fxmarty/rocm-vllm.git vllm && \
+		cd vllm && git fetch && git checkout ca6913b3c2ffacdcb7d15e914dc34adbc6c89479 &&  \
+		PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py build; \
+	fi
 
 install-vllm-rocm: build-vllm-rocm
-	pip uninstall vllm -y || true
-	cd vllm && python setup.py install
+	if [ ! -d 'vllm' ]; then \
+		cd vllm && \
+		PYTORCH_ROCM_ARCH="gfx90a;gfx942" pip install -e .; \
+	fi