Skip to content

Commit

Permalink
refactored codes
Browse files Browse the repository at this point in the history
  • Loading branch information
justanhduc committed Nov 4, 2021
1 parent 0cf6dc0 commit 347f18a
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 26 deletions.
52 changes: 38 additions & 14 deletions gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,41 @@

#include <stdlib.h>
#include <nvml.h>
#include <string.h>

#include "main.h"

int * getGpuList(int *num, int unoccupied) {
int *used_gpus;
int num_total_gpus;

void initGPU() {
unsigned int nDevices;
nvmlReturn_t result;

result = nvmlInit();
if (NVML_SUCCESS != result)
error("Failed to initialize NVML: %s", nvmlErrorString(result));

result = nvmlDeviceGetCount_v2(&nDevices);
if (NVML_SUCCESS != result) {
error("Failed to get device count: %s", nvmlErrorString(result));
goto Error;
}
num_total_gpus = (int) nDevices;
used_gpus = (int *) malloc(num_total_gpus * sizeof(int));
memset(used_gpus, 0, num_total_gpus * sizeof(int)); /* 0 is not in used, 1 is in used */
return;

Error:
result = nvmlShutdown();
if (NVML_SUCCESS != result)
error("Failed to shutdown NVML: %s", nvmlErrorString(result));
}

int * getGpuList(int *num) {
int * gpuList;
unsigned int nDevices;
int i, j = 0, count = 0;
int i, count = 0;
nvmlReturn_t result;

result = nvmlInit();
Expand All @@ -31,20 +59,16 @@ int * getGpuList(int *num, int unoccupied) {
goto Error;
}

if (unoccupied) {
result = nvmlDeviceGetMemoryInfo(dev, &mem);
if (result != 0) {
error("Failed to get GPU memory for GPU %d: %s", i, nvmlErrorString(result));
goto Error;
}

if (mem.free < .1 * mem.total)
continue;
result = nvmlDeviceGetMemoryInfo(dev, &mem);
if (result != 0) {
error("Failed to get GPU memory for GPU %d: %s", i, nvmlErrorString(result));
goto Error;
}

gpuList[j] = i;
count++;
j++;
if (mem.free < .9 * mem.total)
continue;

gpuList[count++] = i;
}
*num = count;
result = nvmlShutdown();
Expand Down
2 changes: 1 addition & 1 deletion jobs.c
Original file line number Diff line number Diff line change
Expand Up @@ -660,7 +660,7 @@ int next_run_job() {

/* Query GPUs */
int numFree;
int *freeGpuList = getGpuList(&numFree, 1);
int *freeGpuList = getGpuList(&numFree);

/* Look for a runnable task */
p = firstjob;
Expand Down
4 changes: 3 additions & 1 deletion main.h
Original file line number Diff line number Diff line change
Expand Up @@ -457,4 +457,6 @@ char *get_environment();
int tail_file(const char *fname, int last_lines);

/* gpu.c */
int *getGpuList(int *num, int unoccupied);
int *getGpuList(int *num);

void initGPU();
12 changes: 2 additions & 10 deletions server.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ enum Break {
/* Prototypes */
static void server_loop(int ls);

static enum Break
client_read(int index);
static enum Break client_read(int index);

static void end_server(int ls);

Expand Down Expand Up @@ -71,13 +70,6 @@ static int max_descriptors;
extern int max_jobs;

int *used_gpus;
int num_total_gpus;

static void initialize_gpus() {
getGpuList(&num_total_gpus, 0);
used_gpus = (int *) malloc(num_total_gpus * sizeof(int));
memset(used_gpus, 0, num_total_gpus * sizeof(int)); /* 0 is not in used, 1 is in used */
}

static void s_send_version(int s) {
struct Msg m;
Expand Down Expand Up @@ -212,7 +204,7 @@ void server_main(int notify_fd, char *_path) {

notify_parent(notify_fd);

initialize_gpus();
initGPU();

server_loop(ls);
}
Expand Down

0 comments on commit 347f18a

Please sign in to comment.