Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Bugfix]: serialize config instances by value when using --trust-remote-code #6751

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion vllm/executor/ray_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

class RayWorkerWrapper(WorkerWrapperBase):
"""Ray wrapper for vllm.worker.Worker, allowing Worker to be
lazliy initialized after Ray sets CUDA_VISIBLE_DEVICES."""
lazily initialized after Ray sets CUDA_VISIBLE_DEVICES."""

def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
Expand Down
22 changes: 22 additions & 0 deletions vllm/transformers_utils/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import contextlib
from typing import Dict, Optional, Type

import ray
from transformers import GenerationConfig, PretrainedConfig

from vllm.envs import VLLM_USE_MODELSCOPE
Expand Down Expand Up @@ -70,6 +71,27 @@ def get_config(model: str,
logger.info("Updating %s from %r to %r", key,
getattr(config, key, None), value)
config.update({key: value})

if trust_remote_code:
# With trust_remote_code, the config is typically an instance of a
# custom class imported from the HF modules cache.
#
# The class will not be importable in Ray workers by default (and won't
# exist at all on other nodes), which breaks serialization of the
# config. Here we tell the serialization library used by Ray to pass
# instances of these generated classes by value instead of by reference
# (eg. the class definition is serialized along with its data).
#
# See: https://github.com/cloudpipe/cloudpickle?tab=readme-ov-file#overriding-pickles-serialization-mechanism-for-importable-constructs
try:
import transformers_modules
ray.cloudpickle.register_pickle_by_value(transformers_modules)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can use import cloudpickle rather than ray.cloudpickle


# ignore import errors in the case that trust_remote_code is set
# unnecessarily
except ImportError:
pass

return config


Expand Down
8 changes: 0 additions & 8 deletions vllm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,14 +766,6 @@ def flatten_2d_lists(lists: List[List[T]]) -> List[T]:
return [item for sublist in lists for item in sublist]


def init_cached_hf_modules() -> None:
"""
Lazy initialization of the Hugging Face modules.
"""
from transformers.dynamic_module_utils import init_hf_modules
init_hf_modules()


@lru_cache(maxsize=None)
def find_library(lib_name: str) -> str:
"""
Expand Down
5 changes: 0 additions & 5 deletions vllm/worker/cpu_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,11 +153,6 @@ def __init__(
if self.is_driver_worker:
assert self.rank == 0, "The driver worker must have rank 0."

if self.model_config.trust_remote_code:
# note: lazy import to avoid importing torch before initializing
from vllm.utils import init_cached_hf_modules
init_cached_hf_modules()

# Setup OpenMP threads affinity.
omp_cpuids = envs.VLLM_CPU_OMP_THREADS_BIND
if omp_cpuids == "all":
Expand Down
4 changes: 0 additions & 4 deletions vllm/worker/neuron_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,6 @@ def __init__(
self.scheduler_config = scheduler_config
self.device_config = device_config
self.cache_config = cache_config
if self.model_config.trust_remote_code:
# note: lazy import to avoid importing torch before initializing
from vllm.utils import init_cached_hf_modules
init_cached_hf_modules()

self.model_runner: NeuronModelRunner = NeuronModelRunner(
model_config, parallel_config, scheduler_config, device_config)
Expand Down
5 changes: 0 additions & 5 deletions vllm/worker/openvino_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,11 +168,6 @@ def __init__(
if self.is_driver_worker:
assert self.rank == 0, "The driver worker must have rank 0."

if self.model_config.trust_remote_code:
# note: lazy import to avoid importing torch before initializing
from vllm.utils import init_cached_hf_modules

init_cached_hf_modules()
self.model_runner = OpenVINOModelRunner(
model_config,
parallel_config,
Expand Down
4 changes: 0 additions & 4 deletions vllm/worker/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,6 @@ def __init__(
if parallel_config and is_driver_worker:
assert rank % parallel_config.tensor_parallel_size == 0, \
"Driver worker should be rank 0 of tensor parallel group."
if self.model_config.trust_remote_code:
# note: lazy import to avoid importing torch before initializing
from vllm.utils import init_cached_hf_modules
init_cached_hf_modules()
self.multimodal_config = multimodal_config

# Return hidden states from target model if the draft model is an
Expand Down
4 changes: 0 additions & 4 deletions vllm/worker/worker_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,10 +336,6 @@ def __init__(
self.worker_class_name = worker_class_name
self.worker_class_fn = worker_class_fn
self.worker: Optional[WorkerBase] = None
if trust_remote_code:
# note: lazy import to avoid importing torch before initializing
from vllm.utils import init_cached_hf_modules
init_cached_hf_modules()

@staticmethod
def update_environment_variables(envs: Dict[str, str]) -> None:
Expand Down
Loading