diff --git a/nni/experiment/config/convert.py b/nni/experiment/config/convert.py index b07bf3acb0..254e4a498d 100644 --- a/nni/experiment/config/convert.py +++ b/nni/experiment/config/convert.py @@ -16,7 +16,9 @@ def to_v2(v1) -> ExperimentConfig: v1 = copy.deepcopy(v1) platform = v1.pop('trainingServicePlatform') - assert platform in ['local', 'remote', 'openpai', 'aml'] + assert platform in ['local', 'remote', 'pai', 'aml'] + if platform == 'pai': + platform = 'openpai' v2 = ExperimentConfig(platform) _drop_field(v1, 'authorName') @@ -88,7 +90,7 @@ def to_v2(v1) -> ExperimentConfig: if 'memoryMB' in v1_trial: ts.trial_memory_size = str(v1_trial.pop('memoryMB')) + 'mb' _move_field(v1_trial, ts, 'image', 'docker_image') - _deprecate(v1_trial, v2, 'virtualCluster') + _move_field(v1_trial, ts, 'virtualCluster', 'virtual_cluster') _move_field(v1_trial, ts, 'paiStorageConfigName', 'storage_config_name') _move_field(v1_trial, ts, 'paiConfigPath', 'openpaiConfigFile') diff --git a/nni/experiment/config/openpai.py b/nni/experiment/config/openpai.py index 66eecadac7..e941530f84 100644 --- a/nni/experiment/config/openpai.py +++ b/nni/experiment/config/openpai.py @@ -21,6 +21,7 @@ class OpenpaiConfig(TrainingServiceConfig): trial_memory_size: str storage_config_name: str docker_image: str = 'msranni/nni:latest' + virtual_cluster: Optional[str] local_storage_mount_point: PathLike container_storage_mount_point: str reuse_mode: bool = True diff --git a/ts/nni_manager/common/experimentConfig.ts b/ts/nni_manager/common/experimentConfig.ts index 7713bb94d4..6a3467722e 100644 --- a/ts/nni_manager/common/experimentConfig.ts +++ b/ts/nni_manager/common/experimentConfig.ts @@ -58,6 +58,7 @@ export interface OpenpaiConfig extends TrainingServiceConfig { containerStorageMountPoint: string; reuseMode: boolean; openpaiConfig?: object; + virtualCluster?: string; } /* AML */ @@ -198,7 +199,7 @@ export function toSeconds(time: string): number { throw new Error(`Bad time string "${time}"`); } -const sizeUnits = { tb: 1024 * 1024, gb: 1024 * 1024, mb: 1, kb: 1 / 1024 }; +const sizeUnits = { tb: 1024 * 1024, gb: 1024, mb: 1, kb: 1 / 1024 }; export function toMegaBytes(size: string): number { for (const [unit, factor] of Object.entries(sizeUnits)) { diff --git a/ts/nni_manager/training_service/reusable/environments/openPaiEnvironmentService.ts b/ts/nni_manager/training_service/reusable/environments/openPaiEnvironmentService.ts index 222b0b9e53..de913f0e65 100644 --- a/ts/nni_manager/training_service/reusable/environments/openPaiEnvironmentService.ts +++ b/ts/nni_manager/training_service/reusable/environments/openPaiEnvironmentService.ts @@ -5,6 +5,7 @@ import * as yaml from 'js-yaml'; import * as request from 'request'; +import { Container, Scope } from 'typescript-ioc'; import { Deferred } from 'ts-deferred'; import * as component from '../../../common/component'; import { ExperimentConfig, OpenpaiConfig, flattenConfig, toMegaBytes } from '../../../common/experimentConfig'; @@ -15,6 +16,7 @@ import { NNIPAITrialConfig } from '../../pai/paiConfig'; import { EnvironmentInformation, EnvironmentService } from '../environment'; import { SharedStorageService } from '../sharedStorage'; import { MountedStorageService } from '../storages/mountedStorageService'; +import { StorageService } from '../storageService'; interface FlattenOpenpaiConfig extends ExperimentConfig, OpenpaiConfig { } @@ -38,9 +40,10 @@ export class OpenPaiEnvironmentService extends EnvironmentService { this.config = flattenConfig(config, 'openpai'); this.paiToken = this.config.token; this.protocol = this.config.host.toLowerCase().startsWith('https://') ? 'https' : 'http'; - - // FIXME: only support MountedStorageService - const storageService = new MountedStorageService(); + Container.bind(StorageService) + .to(MountedStorageService) + .scope(Scope.Singleton); + const storageService = component.get(StorageService) const remoteRoot = storageService.joinPath(this.config.localStorageMountPoint, this.experimentId); storageService.initialize(this.config.localStorageMountPoint, remoteRoot); } @@ -286,7 +289,7 @@ export class OpenPaiEnvironmentService extends EnvironmentService { taskRetryCount: 0, dockerImage: 'docker_image_0', resourcePerInstance: { - gpu: this.config.trialGpuNumber, + gpu: this.config.trialGpuNumber === undefined? 0: this.config.trialGpuNumber, cpu: this.config.trialCpuNumber, memoryMB: toMegaBytes(this.config.trialMemorySize) }, @@ -304,9 +307,9 @@ export class OpenPaiEnvironmentService extends EnvironmentService { submitFrom: 'submit-job-v2' } } - if (this.config.deprecated && this.config.deprecated.virtualCluster) { + if (this.config.virtualCluster) { nniJobConfig.defaults = { - virtualCluster: this.config.deprecated.virtualCluster + virtualCluster: this.config.virtualCluster } } }