Skip to content

Commit

Permalink
changed using gzip (-g) to "-z". "-g" and "--gpu_indices" now is for …
Browse files Browse the repository at this point in the history
…specifying GPU indices
  • Loading branch information
justanhduc committed Apr 9, 2021
1 parent 263fb2e commit 088787e
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 23 deletions.
46 changes: 27 additions & 19 deletions client.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ void c_new_job() {
m.u.newjob.wait_enqueuing = command_line.wait_enqueuing;
m.u.newjob.num_slots = command_line.num_slots;
m.u.newjob.gpus = command_line.gpus;
m.u.newjob.wait_free_gpus = command_line.wait_free_gpus;

/* Send the message */
send_msg(server_socket, &m);
Expand Down Expand Up @@ -147,29 +148,36 @@ int c_wait_server_commands() {
c_send_runjob_ok(0, -1);
} else {
if (command_line.gpus) {
int numFree;
int *freeList = getFreeGpuList(&numFree);
if ((command_line.gpus > numFree)) {
result.errorlevel = -1;
result.user_ms = 0.;
result.system_ms = 0.;
result.real_ms = 0.;
result.skipped = 1;
c_send_runjob_ok(0, -1);
} else {
if (command_line.gpu_nums) {
char tmp[50];
strcpy(tmp, "CUDA_VISIBLE_DEVICES=");
shuffle(freeList, numFree);
for (int i = 0; i < command_line.gpus; i++) {
char tmp2[5];
sprintf(tmp2, "%d", freeList[i]);
strcat(tmp, tmp2);
if (i < command_line.gpus - 1)
strcat(tmp, ",");
}
strcat(tmp, command_line.gpu_nums);
putenv(tmp);
} else {
int numFree;
int *freeList = getFreeGpuList(&numFree);
if ((command_line.gpus > numFree)) {
result.errorlevel = -1;
result.user_ms = 0.;
result.system_ms = 0.;
result.real_ms = 0.;
result.skipped = 1;
c_send_runjob_ok(0, -1);
} else {
char tmp[50];
strcpy(tmp, "CUDA_VISIBLE_DEVICES=");
shuffle(freeList, numFree);
for (int i = 0; i < command_line.gpus; i++) {
char tmp2[5];
sprintf(tmp2, "%d", freeList[i]);
strcat(tmp, tmp2);
if (i < command_line.gpus - 1)
strcat(tmp, ",");
}
putenv(tmp);
}
free(freeList);
}
free(freeList);
} else {
putenv("CUDA_VISIBLE_DEVICES=-1");
}
Expand Down
4 changes: 3 additions & 1 deletion jobs.c
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,8 @@ int s_newjob(int s, struct Msg *m) {
p->state = (p->gpus) ? ALLOCATING : QUEUED;
else
p->state = HOLDING_CLIENT;
p->wait_free_gpus = m->u.newjob.wait_free_gpus;

p->num_slots = m->u.newjob.num_slots;
p->store_output = m->u.newjob.store_output;
p->should_keep_finished = m->u.newjob.should_keep_finished;
Expand Down Expand Up @@ -638,7 +640,7 @@ int next_run_job() {
p = firstjob;
while (p != 0) {
if (p->state == QUEUED || p->state == ALLOCATING) {
if (p->gpus) {
if (p->gpus && p->wait_free_gpus) {
/* GPU mem takes some time to be allocated,
* so two consecutive jobs can use the same GPU,
* so we need to spare some time between two GPU jobs.
Expand Down
15 changes: 12 additions & 3 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ static void default_command_line() {
command_line.num_slots = 1;
command_line.require_elevel = 0;
command_line.gpus = 0;
command_line.wait_free_gpus = 1;
}

void get_command(int index, int argc, char **argv) {
Expand Down Expand Up @@ -91,6 +92,7 @@ static struct option longOptions[] = {
{"count_running", no_argument, NULL, 'R'},
{"last_queue_id", no_argument, NULL, 'q'},
{"gpus", required_argument, NULL, 'G'},
{"gpu_indices", required_argument, NULL, 'g'},
{"set_gpu_wait", required_argument, NULL, 0},
{"get_gpu_wait", no_argument, NULL, 0},
{"full_cmd", optional_argument, NULL, 'F'},
Expand All @@ -104,7 +106,7 @@ void parse_opts(int argc, char **argv) {

/* Parse options */
while (1) {
c = getopt_long(argc, argv, ":RTVhKgClnfmBEr:a:F:t:c:o:p:w:k:u:s:U:qi:N:L:dS:D:G:W:",
c = getopt_long(argc, argv, ":RTVhKzClnfmBEr:a:F:t:c:o:p:w:k:u:s:U:qi:N:L:dS:D:G:W:g:",
longOptions, &optionIdx);

if (c == -1)
Expand Down Expand Up @@ -167,7 +169,7 @@ void parse_opts(int argc, char **argv) {
case 'L':
command_line.label = optarg;
break;
case 'g':
case 'z':
command_line.gzip = 1;
break;
case 'f':
Expand All @@ -182,6 +184,12 @@ void parse_opts(int argc, char **argv) {
else
command_line.gpus = 1;
break;
case 'g':
command_line.gpu_nums = optarg;
int *foo = (int*) malloc(strlen(optarg) * sizeof(int));;
command_line.gpus = strtok_int(optarg, ",", foo);
command_line.wait_free_gpus = 0;
break;
case 't':
command_line.request = c_TAIL;
command_line.jobid = atoi(optarg);
Expand Down Expand Up @@ -442,14 +450,15 @@ static void print_help(const char *cmd) {
printf("Options adding jobs:\n");
printf(" -n don't store the output of the command.\n");
printf(" -E Keep stderr apart, in a name like the output file, but adding '.e'.\n");
printf(" -g gzip the stored output (if not -n).\n");
printf(" -z gzip the stored output (if not -n).\n");
printf(" -f don't fork into background.\n");
printf(" -m send the output by e-mail (uses sendmail).\n");
printf(" -d the job will be run after the last job ends.\n");
printf(" -D <id,...> the job will be run after the job of given IDs ends.\n");
printf(" -W <id,...> the job will be run after the job of given IDs ends well (exit code 0).\n");
printf(" -L <lab> name this task with a label, to be distinguished on listing.\n");
printf(" -N <num> number of slots required by the job (1 default).\n");
printf(" -g <id,...> the job will be on these GPU indices without checking whether they are free.\n");
}

static void print_version() {
Expand Down
4 changes: 4 additions & 0 deletions main.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ struct CommandLine {
int num_slots; /* Slots for the job to use. Default 1 */
int require_elevel; /* whether requires error level of dependencies or not */
int gpus;
char *gpu_nums;
int wait_free_gpus;
int gpu_wait_time;
};

Expand Down Expand Up @@ -145,6 +147,7 @@ struct Msg {
int wait_enqueuing;
int num_slots;
int gpus;
int wait_free_gpus;
} newjob;
struct {
int ofilename_size;
Expand Down Expand Up @@ -205,6 +208,7 @@ struct Job {
struct Procinfo info;
int num_slots;
int gpus;
int wait_free_gpus;
};

enum ExitCodes {
Expand Down

0 comments on commit 088787e

Please sign in to comment.