From 2bd17bfa4de47a7a13fd65c1035311b7fc161904 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Thu, 19 May 2022 01:04:13 +0000 Subject: [PATCH 01/24] data/benchmarks --- benchmarks/README.md | 34 ++++++++++ benchmarks/requirements.txt | 4 ++ benchmarks/run.py | 28 ++++++++ benchmarks/run_benchmark.py | 124 ++++++++++++++++++++++++++++++++++++ 4 files changed, 190 insertions(+) create mode 100644 benchmarks/README.md create mode 100644 benchmarks/requirements.txt create mode 100644 benchmarks/run.py create mode 100644 benchmarks/run_benchmark.py diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 000000000..6e60e38af --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,34 @@ +# Install dependencies + +``` +pip3 install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cu113 +python setup.py develop +``` + +# Usage instructions + + +``` +usage: run_benchmark.py [-h] [--dataset DATASET] [--model_name MODEL_NAME] [--batch_size BATCH_SIZE] + [--num_epochs NUM_EPOCHS] [--report_location REPORT_LOCATION] + [--num_workers NUM_WORKERS] [--shuffle] [--dataloaderv DATALOADERV] +``` + +## Available metrics +* [x] Total time +* [x] Time per batch +* [x] Time per epoch +* [x] Precision over time +* [x] CPU Load +* [x] GPU Load +* [x] Memory usage + +## Additional profiling + +``` +pip install scalene +pip install torch-tb-profiler +``` + + +`scalene run_benchmark.py` \ No newline at end of file diff --git a/benchmarks/requirements.txt b/benchmarks/requirements.txt new file mode 100644 index 000000000..2c59965bf --- /dev/null +++ b/benchmarks/requirements.txt @@ -0,0 +1,4 @@ +pip3 install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cu113 +pip install git+https://github.com/pytorch/data@main +pip install scalene +pip install torch-tb-profiler \ No newline at end of file diff --git a/benchmarks/run.py b/benchmarks/run.py new file mode 100644 index 000000000..2aa2d3909 --- /dev/null +++ b/benchmarks/run.py @@ -0,0 +1,28 @@ +import argparse +from torchvision.prototype.datasets import load +from torch.utils.data import DataLoader + + +parser = argparse.ArgumentParser() +parser.add_argument("--dataset", type=str, default="gtsrb", help="The name of the dataset") +parser.add_argument("--model_name", type=str, default="resnext50_32x4d", help="The name of the model") +parser.add_argument("--batch_size", type=int, default=32, help="") +parser.add_argument("--report_location", type=str, default="./report.md", help="The location where the generated report will be stored") + +args = parser.parse_args() +dataset = args.dataset +batch_size = args.batch_size + +# setup data pipe +dp = load("gtsrb", split="train") +print(f"batch size {batch_size}") +print(f"Dataset name {dp}") +print(f"Dataset length {len(dp)}") + +# Setup data loader +# Shuffle won't work in distributed +dl = DataLoader(dataset=dp, batch_size=batch_size, shuffle=True) + +# Training loop +for elem in dl: + print(i) \ No newline at end of file diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py new file mode 100644 index 000000000..13c2a2e09 --- /dev/null +++ b/benchmarks/run_benchmark.py @@ -0,0 +1,124 @@ +import argparse +import torchvision +import torch +try: + import transformers +except: + pass +from torchvision.prototype.datasets import load +import torch.nn.functional as F +from torchvision import transforms +import time +from statistics import mean +import torch.optim as optim + + + +parser = argparse.ArgumentParser() +parser.add_argument("--dataset", type=str, default="gtsrb", help="The name of the dataset") +parser.add_argument("--model_name", type=str, default="resnext50_32x4d", help="The name of the model") +parser.add_argument("--batch_size", type=int, default=1, help="") +parser.add_argument("--num_epochs", type=int, default=2) +parser.add_argument("--report_location", type=str, default="./report.md", help="The location where the generated report will be stored") +parser.add_argument("--num_workers", type=int, default=1, help="Number of dataloader workers") +parser.add_argument("--shuffle", action="store_true") +parser.add_argument("--dataloaderv", type=int, default=1) + +args = parser.parse_args() +dataset = args.dataset +model_name = args.model_name +batch_size = args.batch_size +num_epochs = args.num_epochs +report_location = args.report_location +num_workers = args.num_workers +shuffle = args.shuffle +dataloaderv = args.dataloaderv + +if dataloaderv == 1: + from torch.utils.data import DataLoader +elif dataloaderv == 2: + from torch.utils.data.dataloader_experimental import DataLoader2 as DataLoader +else: + raise(f"dataloaderv{dataloaderv} is not a valid option") + +# Util function for multiprocessing +def init_fn(worker_id): + info = torch.utils.data.get_worker_info() + num_workers = info.num_workers + datapipe = info.dataset + torch.utils.data.graph_settings.apply_sharding(datapipe, num_workers, worker_id) + +# Download model +model_map = { + "resnext50_32x4d": torchvision.models.resnext50_32x4d, + "mobilenet_v3_large" : torchvision.models.mobilenet_v3_large, + "transformerencoder" : torch.nn.TransformerEncoder, + # "bert-base" : transformers.BertModel, + +} + +model = model_map[model_name]() + +# setup data pipe +dp = load(dataset, split="train") +print(f"batch size {batch_size}") +print(f"Dataset name {dp}") +print(f"Dataset length {len(dp)}") + +# Datapipe format +print(f"data format is {next(iter(dp))}") + +# Setup data loader +if num_workers == 1: + dl = DataLoader(dataset=dp, batch_size=batch_size, shuffle=shuffle) + +# Shuffle won't work in distributed yet +else: + dl = DataLoader(dataset=dp, batch_size=batch_size, shuffle=True, num_workers=num_workers, worker_init_fn=init_fn, multiprocessing_context="spawn") + + +# TODO: Add measurements time per batch, per epoch and total time here + +total_start = time.time() +per_epoch_durations = [] +batch_durations = [] + +criterion = torch.nn.CrossEntropyLoss() +optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) +for epoch in range(num_epochs): + epoch_start = time.time() + running_loss = 0 + for i, elem in enumerate(dl): + batch_start = time.time() + # Should image preprocessing be done online or offline? + # This is all image specific, need to refactor this out or create a training loop per model/dataset combo + input_image = torch.unsqueeze(elem["image"], 0) + input_image = transforms.Resize(size=(96,98))(input_image) + input_image = input_image.reshape(64,3,7,7) / 255 + + labels = elem["label"] + optimizer.zero_grad() + + outputs = model(input_image) + + # TODO: ValueError: Expected input batch_size (64) to match target batch_size (1). + loss = criterion(outputs,labels) + loss.backward() + optimizer.step() + running_loss += loss.item() + if i % 2000 == 1999: # print every 2000 mini-batches + print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}') + running_loss = 0.0 + + batch_end = time.time() + batch_duration = batch_end - batch_start + batch_durations.append(batch_duration) + epoch_end = time.time() + epoch_duration = epoch_end - epoch_start + per_epoch_durations.append(epoch_duration) +total_end = time.time() +total_duration = total_end - total_start + +print(f"Total duration is {total_duration}") +print(f"Per epoch duration {mean(per_epoch_durations)}") +print(f"Per batch duration {mean(batch_durations)}") \ No newline at end of file From 4e8f41cbf8b781609fcdfa0c6477751e80f65a4c Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Thu, 19 May 2022 01:06:23 +0000 Subject: [PATCH 02/24] removed extra files --- benchmarks/requirements.txt | 4 ---- benchmarks/run.py | 28 ---------------------------- 2 files changed, 32 deletions(-) delete mode 100644 benchmarks/requirements.txt delete mode 100644 benchmarks/run.py diff --git a/benchmarks/requirements.txt b/benchmarks/requirements.txt deleted file mode 100644 index 2c59965bf..000000000 --- a/benchmarks/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -pip3 install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cu113 -pip install git+https://github.com/pytorch/data@main -pip install scalene -pip install torch-tb-profiler \ No newline at end of file diff --git a/benchmarks/run.py b/benchmarks/run.py deleted file mode 100644 index 2aa2d3909..000000000 --- a/benchmarks/run.py +++ /dev/null @@ -1,28 +0,0 @@ -import argparse -from torchvision.prototype.datasets import load -from torch.utils.data import DataLoader - - -parser = argparse.ArgumentParser() -parser.add_argument("--dataset", type=str, default="gtsrb", help="The name of the dataset") -parser.add_argument("--model_name", type=str, default="resnext50_32x4d", help="The name of the model") -parser.add_argument("--batch_size", type=int, default=32, help="") -parser.add_argument("--report_location", type=str, default="./report.md", help="The location where the generated report will be stored") - -args = parser.parse_args() -dataset = args.dataset -batch_size = args.batch_size - -# setup data pipe -dp = load("gtsrb", split="train") -print(f"batch size {batch_size}") -print(f"Dataset name {dp}") -print(f"Dataset length {len(dp)}") - -# Setup data loader -# Shuffle won't work in distributed -dl = DataLoader(dataset=dp, batch_size=batch_size, shuffle=True) - -# Training loop -for elem in dl: - print(i) \ No newline at end of file From 0818eb6afcaba9149c84e7c0b90b669158cc7dc8 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Thu, 19 May 2022 01:45:06 +0000 Subject: [PATCH 03/24] nananannana --- benchmarks/run_benchmark.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index 13c2a2e09..cbdb6780b 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -97,18 +97,21 @@ def init_fn(worker_id): input_image = input_image.reshape(64,3,7,7) / 255 labels = elem["label"] + + # TODO: remove this is wrong + labels = labels.repeat(64) optimizer.zero_grad() outputs = model(input_image) - - # TODO: ValueError: Expected input batch_size (64) to match target batch_size (1). + + # ValueError: Expected input batch_size (64) to match target batch_size (1). loss = criterion(outputs,labels) loss.backward() optimizer.step() running_loss += loss.item() - if i % 2000 == 1999: # print every 2000 mini-batches - print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}') - running_loss = 0.0 + # if i % 2000 == 1999: # print every 2000 mini-batches + print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}') + running_loss = 0.0 batch_end = time.time() batch_duration = batch_end - batch_start From 590a20bae4facc96e8cc33cc27c07f38947df300 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Thu, 19 May 2022 01:49:45 +0000 Subject: [PATCH 04/24] added gpu support --- benchmarks/run_benchmark.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index cbdb6780b..d8bafd755 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -18,6 +18,7 @@ parser.add_argument("--dataset", type=str, default="gtsrb", help="The name of the dataset") parser.add_argument("--model_name", type=str, default="resnext50_32x4d", help="The name of the model") parser.add_argument("--batch_size", type=int, default=1, help="") +parser.add_argument("--device", type=str, default="cuda:0", help="Options are are cpu or cuda:0") parser.add_argument("--num_epochs", type=int, default=2) parser.add_argument("--report_location", type=str, default="./report.md", help="The location where the generated report will be stored") parser.add_argument("--num_workers", type=int, default=1, help="Number of dataloader workers") @@ -57,7 +58,7 @@ def init_fn(worker_id): } -model = model_map[model_name]() +model = model_map[model_name]().to(torch.device("cuda:0")) # setup data pipe dp = load(dataset, split="train") @@ -92,11 +93,11 @@ def init_fn(worker_id): batch_start = time.time() # Should image preprocessing be done online or offline? # This is all image specific, need to refactor this out or create a training loop per model/dataset combo - input_image = torch.unsqueeze(elem["image"], 0) + input_image = torch.unsqueeze(elem["image"], 0).to(torch.device("cuda:0")) input_image = transforms.Resize(size=(96,98))(input_image) input_image = input_image.reshape(64,3,7,7) / 255 - labels = elem["label"] + labels = elem["label"].to(torch.device("cuda:0")) # TODO: remove this is wrong labels = labels.repeat(64) From 411167bbca3b800b3f54d37674e8751e40a80e29 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Thu, 19 May 2022 18:22:26 +0000 Subject: [PATCH 05/24] [skip ci] enable profiler --- benchmarks/run_benchmark.py | 91 ++++++++++++++++++++++--------------- 1 file changed, 54 insertions(+), 37 deletions(-) diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index d8bafd755..c373afb8d 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -11,9 +11,10 @@ import time from statistics import mean import torch.optim as optim +from torch.profiler import profile, record_function, ProfilerActivity - +## Arg parsing parser = argparse.ArgumentParser() parser.add_argument("--dataset", type=str, default="gtsrb", help="The name of the dataset") parser.add_argument("--model_name", type=str, default="resnext50_32x4d", help="The name of the model") @@ -49,6 +50,12 @@ def init_fn(worker_id): datapipe = info.dataset torch.utils.data.graph_settings.apply_sharding(datapipe, num_workers, worker_id) +def trace_handler(p): + output = p.key_averages().table(sort_by="self_cuda_time_total", row_limit=10) + print(output) + p.export_chrome_trace("/tmp/trace_" + str(p.step_num) + ".json") + + # Download model model_map = { "resnext50_32x4d": torchvision.models.resnext50_32x4d, @@ -86,42 +93,52 @@ def init_fn(worker_id): criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) -for epoch in range(num_epochs): - epoch_start = time.time() - running_loss = 0 - for i, elem in enumerate(dl): - batch_start = time.time() - # Should image preprocessing be done online or offline? - # This is all image specific, need to refactor this out or create a training loop per model/dataset combo - input_image = torch.unsqueeze(elem["image"], 0).to(torch.device("cuda:0")) - input_image = transforms.Resize(size=(96,98))(input_image) - input_image = input_image.reshape(64,3,7,7) / 255 - - labels = elem["label"].to(torch.device("cuda:0")) - - # TODO: remove this is wrong - labels = labels.repeat(64) - optimizer.zero_grad() - - outputs = model(input_image) - - # ValueError: Expected input batch_size (64) to match target batch_size (1). - loss = criterion(outputs,labels) - loss.backward() - optimizer.step() - running_loss += loss.item() - # if i % 2000 == 1999: # print every 2000 mini-batches - print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}') - running_loss = 0.0 - - batch_end = time.time() - batch_duration = batch_end - batch_start - batch_durations.append(batch_duration) - epoch_end = time.time() - epoch_duration = epoch_end - epoch_start - per_epoch_durations.append(epoch_duration) -total_end = time.time() -total_duration = total_end - total_start + +with profile( + activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], + schedule=torch.profiler.schedule( + wait=1, + warmup=1, + active=2), + on_trace_ready=trace_handler +) as p: + + for epoch in range(num_epochs): + epoch_start = time.time() + running_loss = 0 + for i, elem in enumerate(dl): + batch_start = time.time() + # Should image preprocessing be done online or offline? + # This is all image specific, need to refactor this out or create a training loop per model/dataset combo + input_image = torch.unsqueeze(elem["image"], 0).to(torch.device("cuda:0")) + input_image = transforms.Resize(size=(96,98))(input_image) + input_image = input_image.reshape(64,3,7,7) / 255 + + labels = elem["label"].to(torch.device("cuda:0")) + + # TODO: remove this is wrong + labels = labels.repeat(64) + optimizer.zero_grad() + + outputs = model(input_image) + + # ValueError: Expected input batch_size (64) to match target batch_size (1). + loss = criterion(outputs,labels) + loss.backward() + optimizer.step() + running_loss += loss.item() + # if i % 2000 == 1999: # print every 2000 mini-batches + print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}') + running_loss = 0.0 + + batch_end = time.time() + batch_duration = batch_end - batch_start + batch_durations.append(batch_duration) + epoch_end = time.time() + epoch_duration = epoch_end - epoch_start + per_epoch_durations.append(epoch_duration) + total_end = time.time() + total_duration = total_end - total_start print(f"Total duration is {total_duration}") print(f"Per epoch duration {mean(per_epoch_durations)}") From 4fffddf50ac0b9864c16ed31e02eaa5b20962672 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Wed, 25 May 2022 19:57:28 +0000 Subject: [PATCH 06/24] fixed preprocessing pipeline --- benchmarks/run_benchmark.py | 64 ++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index c373afb8d..b638104a4 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -1,10 +1,9 @@ import argparse +import sys import torchvision import torch -try: - import transformers -except: - pass +import transformers + from torchvision.prototype.datasets import load import torch.nn.functional as F from torchvision import transforms @@ -13,7 +12,6 @@ import torch.optim as optim from torch.profiler import profile, record_function, ProfilerActivity - ## Arg parsing parser = argparse.ArgumentParser() parser.add_argument("--dataset", type=str, default="gtsrb", help="The name of the dataset") @@ -30,6 +28,7 @@ dataset = args.dataset model_name = args.model_name batch_size = args.batch_size +device = args.device num_epochs = args.num_epochs report_location = args.report_location num_workers = args.num_workers @@ -61,20 +60,45 @@ def trace_handler(p): "resnext50_32x4d": torchvision.models.resnext50_32x4d, "mobilenet_v3_large" : torchvision.models.mobilenet_v3_large, "transformerencoder" : torch.nn.TransformerEncoder, - # "bert-base" : transformers.BertModel, + "bert-base" : transformers.BertModel, } -model = model_map[model_name]().to(torch.device("cuda:0")) +model = model_map[model_name]().to(torch.device(device)) # setup data pipe -dp = load(dataset, split="train") +if model_name in ["resnext50_32x4d", "mobilenet_v3_large"]: + dp = load(dataset, split="train") + +else: + print(f"{model} not supported yet") + print(f"batch size {batch_size}") print(f"Dataset name {dp}") print(f"Dataset length {len(dp)}") # Datapipe format -print(f"data format is {next(iter(dp))}") +print(f"data format before preprocessing is {next(iter(dp))}") + +if dataset == "gtsrb": + def transform(img): + t= transforms.Compose([ + transforms.ToPILImage(), + transforms.Resize(size=(96,98)), + # transforms.reshape(64,3,7,7), + transforms.ToTensor()] + ) + return t(img) + + dp = dp.map(lambda sample : {"image" : sample["image"], "label" : sample["label"]}) + dp = dp.map(lambda sample : transform(sample.decode()), input_col="image") + dp = dp.map(lambda sample : sample.to_categories(), input_col="label") + dp = dp.batch(batch_size) + +# dp_batches.map(lambda batch : {"images" : [sample["image"]]}) + +# Datapipe format after preprocessing +print(f"data format after preprocessing is \n {next(iter(dp))}\n") # Setup data loader if num_workers == 1: @@ -85,8 +109,6 @@ def trace_handler(p): dl = DataLoader(dataset=dp, batch_size=batch_size, shuffle=True, num_workers=num_workers, worker_init_fn=init_fn, multiprocessing_context="spawn") -# TODO: Add measurements time per batch, per epoch and total time here - total_start = time.time() per_epoch_durations = [] batch_durations = [] @@ -108,27 +130,17 @@ def trace_handler(p): running_loss = 0 for i, elem in enumerate(dl): batch_start = time.time() - # Should image preprocessing be done online or offline? - # This is all image specific, need to refactor this out or create a training loop per model/dataset combo - input_image = torch.unsqueeze(elem["image"], 0).to(torch.device("cuda:0")) - input_image = transforms.Resize(size=(96,98))(input_image) - input_image = input_image.reshape(64,3,7,7) / 255 - - labels = elem["label"].to(torch.device("cuda:0")) - # TODO: remove this is wrong - labels = labels.repeat(64) + labels = elem["label"].to(torch.device("cuda:0")) optimizer.zero_grad() - - outputs = model(input_image) - - # ValueError: Expected input batch_size (64) to match target batch_size (1). + outputs = model(elem["image"]) loss = criterion(outputs,labels) loss.backward() optimizer.step() + running_loss += loss.item() - # if i % 2000 == 1999: # print every 2000 mini-batches - print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}') + if i % 200 == 1999: # print every 2000 mini-batches + print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}') running_loss = 0.0 batch_end = time.time() From 07739b21d306b00d18f648cded250d006057567b Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Wed, 25 May 2022 19:57:44 +0000 Subject: [PATCH 07/24] [skip ci] From fea3322bf40c62e4b8fc24cb6f0437eae72f6aed Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Wed, 25 May 2022 20:07:02 +0000 Subject: [PATCH 08/24] update --- benchmarks/run_benchmark.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index b638104a4..66973896e 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -88,15 +88,20 @@ def transform(img): # transforms.reshape(64,3,7,7), transforms.ToTensor()] ) - return t(img) + return t(img).to(torch.device(device)) + # Filter out bounding box and path to image dp = dp.map(lambda sample : {"image" : sample["image"], "label" : sample["label"]}) + + # Apply image preprocessing dp = dp.map(lambda sample : transform(sample.decode()), input_col="image") dp = dp.map(lambda sample : sample.to_categories(), input_col="label") + + # TODO: Missing a collation + + # Batch dp = dp.batch(batch_size) -# dp_batches.map(lambda batch : {"images" : [sample["image"]]}) - # Datapipe format after preprocessing print(f"data format after preprocessing is \n {next(iter(dp))}\n") @@ -131,7 +136,7 @@ def transform(img): for i, elem in enumerate(dl): batch_start = time.time() - labels = elem["label"].to(torch.device("cuda:0")) + labels = elem["label"].to(torch.device(device)) optimizer.zero_grad() outputs = model(elem["image"]) loss = criterion(outputs,labels) From aec3a6fcfdf768c264c5b03e70da65f7a1092471 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Wed, 25 May 2022 21:10:15 +0000 Subject: [PATCH 09/24] [skip ci] lunch --- benchmarks/run_benchmark.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index 66973896e..a9adaee47 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -90,12 +90,18 @@ def transform(img): ) return t(img).to(torch.device(device)) + def str_to_list(str): + l = [] + for char in str: + l.append(int(char)) + return l + # Filter out bounding box and path to image dp = dp.map(lambda sample : {"image" : sample["image"], "label" : sample["label"]}) # Apply image preprocessing dp = dp.map(lambda sample : transform(sample.decode()), input_col="image") - dp = dp.map(lambda sample : sample.to_categories(), input_col="label") + dp = dp.map(lambda sample : torch.tensor(str_to_list(sample.to_categories())).to(torch.device(device)), input_col="label") # TODO: Missing a collation @@ -135,10 +141,10 @@ def transform(img): running_loss = 0 for i, elem in enumerate(dl): batch_start = time.time() - - labels = elem["label"].to(torch.device(device)) + # print(f"elem is {elem}") + labels = elem[0]["label"] optimizer.zero_grad() - outputs = model(elem["image"]) + outputs = model(elem[0]["image"]) loss = criterion(outputs,labels) loss.backward() optimizer.step() From 621ca0b941339c05f7ef226a03fca04c57a87e32 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Wed, 25 May 2022 22:18:45 +0000 Subject: [PATCH 10/24] [skip ci] it runs --- benchmarks/run_benchmark.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index a9adaee47..40153dce2 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -78,7 +78,7 @@ def trace_handler(p): print(f"Dataset length {len(dp)}") # Datapipe format -print(f"data format before preprocessing is {next(iter(dp))}") +# print(f"data format before preprocessing is {next(iter(dp))}") if dataset == "gtsrb": def transform(img): @@ -109,7 +109,7 @@ def str_to_list(str): dp = dp.batch(batch_size) # Datapipe format after preprocessing -print(f"data format after preprocessing is \n {next(iter(dp))}\n") +# print(f"data format after preprocessing is \n {next(iter(dp))}\n") # Setup data loader if num_workers == 1: @@ -141,8 +141,8 @@ def str_to_list(str): running_loss = 0 for i, elem in enumerate(dl): batch_start = time.time() - # print(f"elem is {elem}") - labels = elem[0]["label"] + + labels = torch.argmax(elem[0]["label"], dim=1) optimizer.zero_grad() outputs = model(elem[0]["image"]) loss = criterion(outputs,labels) From 1c06bae6d851175c28543e216aa5b66d1dd562ef Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Wed, 25 May 2022 22:55:08 +0000 Subject: [PATCH 11/24] [skip ci] fix profile settings --- benchmarks/run_benchmark.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index 40153dce2..b6a9a49a6 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -18,7 +18,7 @@ parser.add_argument("--model_name", type=str, default="resnext50_32x4d", help="The name of the model") parser.add_argument("--batch_size", type=int, default=1, help="") parser.add_argument("--device", type=str, default="cuda:0", help="Options are are cpu or cuda:0") -parser.add_argument("--num_epochs", type=int, default=2) +parser.add_argument("--num_epochs", type=int, default=1) parser.add_argument("--report_location", type=str, default="./report.md", help="The location where the generated report will be stored") parser.add_argument("--num_workers", type=int, default=1, help="Number of dataloader workers") parser.add_argument("--shuffle", action="store_true") @@ -49,10 +49,10 @@ def init_fn(worker_id): datapipe = info.dataset torch.utils.data.graph_settings.apply_sharding(datapipe, num_workers, worker_id) -def trace_handler(p): - output = p.key_averages().table(sort_by="self_cuda_time_total", row_limit=10) - print(output) - p.export_chrome_trace("/tmp/trace_" + str(p.step_num) + ".json") +# def trace_handler(p): +# output = p.key_averages().table(sort_by="self_cuda_time_total", row_limit=10) +# print(output) +# p.export_chrome_trace("/tmp/trace_" + str(p.step_num) + ".json") # Download model @@ -129,11 +129,12 @@ def str_to_list(str): with profile( activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], - schedule=torch.profiler.schedule( - wait=1, - warmup=1, - active=2), - on_trace_ready=trace_handler + on_trace_ready=torch.profiler.tensorboard_trace_handler, + record_shapes=True, + profile_memory=True, + with_flops=True, + with_stack=True, + with_modules=True ) as p: for epoch in range(num_epochs): @@ -150,8 +151,8 @@ def str_to_list(str): optimizer.step() running_loss += loss.item() - if i % 200 == 1999: # print every 2000 mini-batches - print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}') + # if i % 200 == 1999: # print every 2000 mini-batches + print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.10f}') running_loss = 0.0 batch_end = time.time() From 9001b005c2c2106cc2fb345c4a9fedd2ce0da1b7 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Wed, 25 May 2022 23:35:17 +0000 Subject: [PATCH 12/24] [skip ci] added logging and fixed profiler --- benchmarks/run_benchmark.py | 41 +++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index b6a9a49a6..2c2da5a6a 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -1,16 +1,20 @@ import argparse import sys +import logging + import torchvision import torch import transformers - from torchvision.prototype.datasets import load import torch.nn.functional as F from torchvision import transforms import time from statistics import mean import torch.optim as optim -from torch.profiler import profile, record_function, ProfilerActivity +import torch.profiler + +logging.basicConfig(filename='example.log', level=logging.DEBUG) + ## Arg parsing parser = argparse.ArgumentParser() @@ -49,12 +53,6 @@ def init_fn(worker_id): datapipe = info.dataset torch.utils.data.graph_settings.apply_sharding(datapipe, num_workers, worker_id) -# def trace_handler(p): -# output = p.key_averages().table(sort_by="self_cuda_time_total", row_limit=10) -# print(output) -# p.export_chrome_trace("/tmp/trace_" + str(p.step_num) + ".json") - - # Download model model_map = { "resnext50_32x4d": torchvision.models.resnext50_32x4d, @@ -78,7 +76,7 @@ def init_fn(worker_id): print(f"Dataset length {len(dp)}") # Datapipe format -# print(f"data format before preprocessing is {next(iter(dp))}") +logging.debug(f"data format before preprocessing is {next(iter(dp))}") if dataset == "gtsrb": def transform(img): @@ -103,13 +101,11 @@ def str_to_list(str): dp = dp.map(lambda sample : transform(sample.decode()), input_col="image") dp = dp.map(lambda sample : torch.tensor(str_to_list(sample.to_categories())).to(torch.device(device)), input_col="label") - # TODO: Missing a collation - # Batch dp = dp.batch(batch_size) # Datapipe format after preprocessing -# print(f"data format after preprocessing is \n {next(iter(dp))}\n") +logging.debug(f"data format after preprocessing is \n {next(iter(dp))}\n") # Setup data loader if num_workers == 1: @@ -127,9 +123,10 @@ def str_to_list(str): criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) -with profile( - activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], - on_trace_ready=torch.profiler.tensorboard_trace_handler, +with torch.profiler.profile( + activities=[torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA], + on_trace_ready=torch.profiler.tensorboard_trace_handler('./result', worker_name='datapipe0'), + schedule=torch.profiler.schedule(wait=1,warmup=1,active=2), record_shapes=True, profile_memory=True, with_flops=True, @@ -141,6 +138,8 @@ def str_to_list(str): epoch_start = time.time() running_loss = 0 for i, elem in enumerate(dl): + p.step() + batch_start = time.time() labels = torch.argmax(elem[0]["label"], dim=1) @@ -158,12 +157,18 @@ def str_to_list(str): batch_end = time.time() batch_duration = batch_end - batch_start batch_durations.append(batch_duration) + p.step() + p.step() epoch_end = time.time() epoch_duration = epoch_end - epoch_start per_epoch_durations.append(epoch_duration) total_end = time.time() total_duration = total_end - total_start -print(f"Total duration is {total_duration}") -print(f"Per epoch duration {mean(per_epoch_durations)}") -print(f"Per batch duration {mean(batch_durations)}") \ No newline at end of file +# TODO: Make this output some human readable markdown file +def create_report(per_epoch_durations, batch_durations, total_duration): + print(f"Total duration is {total_duration}") + print(f"Per epoch duration {mean(per_epoch_durations)}") + print(f"Per batch duration {mean(batch_durations)}") + +create_report(per_epoch_durations, batch_durations, total_duration) \ No newline at end of file From 95d48aedd444dfc69b2532be8303a750e9cb9ff8 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Thu, 26 May 2022 00:24:54 +0000 Subject: [PATCH 13/24] update --- benchmarks/README.md | 15 +++++++++------ benchmarks/run_benchmark.py | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index 6e60e38af..ad5ab431e 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -1,7 +1,7 @@ # Install dependencies ``` -pip3 install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cu113 +pip3 install --pre torch torchvision torchaudio torchtext --extra-index-url https://download.pytorch.org/whl/nightly/cu113 python setup.py develop ``` @@ -9,9 +9,8 @@ python setup.py develop ``` -usage: run_benchmark.py [-h] [--dataset DATASET] [--model_name MODEL_NAME] [--batch_size BATCH_SIZE] - [--num_epochs NUM_EPOCHS] [--report_location REPORT_LOCATION] - [--num_workers NUM_WORKERS] [--shuffle] [--dataloaderv DATALOADERV] +usage: run_benchmark.py [-h] [--dataset DATASET] [--model_name MODEL_NAME] [--batch_size BATCH_SIZE] [--device DEVICE] [--num_epochs NUM_EPOCHS] + [--report_location REPORT_LOCATION] [--num_workers NUM_WORKERS] [--shuffle] [--dataloaderv DATALOADERV] ``` ## Available metrics @@ -22,13 +21,17 @@ usage: run_benchmark.py [-h] [--dataset DATASET] [--model_name MODEL_NAME] [--ba * [x] CPU Load * [x] GPU Load * [x] Memory usage +* [x] PyTorch profiler ## Additional profiling ``` pip install scalene -pip install torch-tb-profiler ``` +`scalene run_benchmark.py` -`scalene run_benchmark.py` \ No newline at end of file +## Other benchmarks in the wild +* https://github.com/pytorch/kineto/blob/main/tb_plugin/examples/datapipe_example.py +* https://github.com/pytorch/text/tree/main/test/datasets +* https://github.com/pytorch/vision/tree/main/torchvision/prototype/datasets \ No newline at end of file diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index 2c2da5a6a..aaa941efe 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -82,7 +82,7 @@ def init_fn(worker_id): def transform(img): t= transforms.Compose([ transforms.ToPILImage(), - transforms.Resize(size=(96,98)), + transforms.Resize(size=(100,100)), # transforms.reshape(64,3,7,7), transforms.ToTensor()] ) From 4b8da09bf516d093ee55cf8fd8bc388438ae5df0 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Thu, 26 May 2022 00:25:37 +0000 Subject: [PATCH 14/24] [skip ci] push --- benchmarks/run_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index aaa941efe..e943c64dc 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -83,7 +83,7 @@ def transform(img): t= transforms.Compose([ transforms.ToPILImage(), transforms.Resize(size=(100,100)), - # transforms.reshape(64,3,7,7), + # transforms.reshape(64,3,7,7), TODO: Figure out collation transforms.ToTensor()] ) return t(img).to(torch.device(device)) From 58101fe0a313f9fb0263dbcb2d3fcb053ff9328d Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Tue, 31 May 2022 22:47:50 +0000 Subject: [PATCH 15/24] [skip ci] added nvidia smi and lscpu --- benchmarks/run_benchmark.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index e943c64dc..cbe207973 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -1,6 +1,7 @@ import argparse import sys import logging +import subprocess import torchvision import torch @@ -71,6 +72,13 @@ def init_fn(worker_id): else: print(f"{model} not supported yet") +if device.startswith("cuda"): + nvidiasmi = subprocess.check_output("nvidia-smi", shell=True, text=True) + print(nvidiasmi) + +lscpu = subprocess.check_output("lscpu", shell=True, text=True) +print(lscpu) + print(f"batch size {batch_size}") print(f"Dataset name {dp}") print(f"Dataset length {len(dp)}") @@ -138,8 +146,6 @@ def str_to_list(str): epoch_start = time.time() running_loss = 0 for i, elem in enumerate(dl): - p.step() - batch_start = time.time() labels = torch.argmax(elem[0]["label"], dim=1) @@ -158,10 +164,11 @@ def str_to_list(str): batch_duration = batch_end - batch_start batch_durations.append(batch_duration) p.step() - p.step() + epoch_end = time.time() epoch_duration = epoch_end - epoch_start per_epoch_durations.append(epoch_duration) + total_end = time.time() total_duration = total_end - total_start From 18d6a2e52762dccaae11ecb2231cf041eee906b1 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Wed, 1 Jun 2022 00:16:10 +0000 Subject: [PATCH 16/24] [ski ci] refactor --- benchmarks/__init__.py | 0 benchmarks/args.py | 26 +++++++++ benchmarks/datasets.py | 29 ++++++++++ benchmarks/report.py | 7 +++ benchmarks/run_benchmark.py | 106 +++++++----------------------------- benchmarks/trainers.py | 30 ++++++++++ benchmarks/utils.py | 6 ++ 7 files changed, 117 insertions(+), 87 deletions(-) create mode 100644 benchmarks/__init__.py create mode 100644 benchmarks/args.py create mode 100644 benchmarks/datasets.py create mode 100644 benchmarks/report.py create mode 100644 benchmarks/trainers.py create mode 100644 benchmarks/utils.py diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/args.py b/benchmarks/args.py new file mode 100644 index 000000000..a399f6572 --- /dev/null +++ b/benchmarks/args.py @@ -0,0 +1,26 @@ +import argparse + +## Arg parsing +def arg_parser(): + parser = argparse.ArgumentParser() + parser.add_argument("--dataset", type=str, default="gtsrb", help="The name of the dataset") + parser.add_argument("--model_name", type=str, default="resnext50_32x4d", help="The name of the model") + parser.add_argument("--batch_size", type=int, default=1, help="") + parser.add_argument("--device", type=str, default="cuda:0", help="Options are are cpu or cuda:0") + parser.add_argument("--num_epochs", type=int, default=1) + parser.add_argument("--report_location", type=str, default="./report.md", help="The location where the generated report will be stored") + parser.add_argument("--num_workers", type=int, default=1, help="Number of dataloader workers") + parser.add_argument("--shuffle", action="store_true") + parser.add_argument("--dataloaderv", type=int, default=1) + + args = parser.parse_args() + dataset = args.dataset + model_name = args.model_name + batch_size = args.batch_size + device = args.device + num_epochs = args.num_epochs + report_location = args.report_location + num_workers = args.num_workers + shuffle = args.shuffle + dataloaderv = args.dataloaderv + return dataset,model_name,batch_size,device,num_epochs,num_workers,shuffle,dataloaderv \ No newline at end of file diff --git a/benchmarks/datasets.py b/benchmarks/datasets.py new file mode 100644 index 000000000..1ea50bc5b --- /dev/null +++ b/benchmarks/datasets.py @@ -0,0 +1,29 @@ +from torchvision import transforms +import torch + +def prepare_gtsrb(batch_size, device, dp): + def transform(img): + t= transforms.Compose([ + transforms.ToPILImage(), + transforms.Resize(size=(100,100)), + # transforms.reshape(64,3,7,7), TODO: Figure out collation + transforms.ToTensor()] + ) + return t(img).to(torch.device(device)) + + def str_to_list(str): + l = [] + for char in str: + l.append(int(char)) + return l + + # Filter out bounding box and path to image + dp = dp.map(lambda sample : {"image" : sample["image"], "label" : sample["label"]}) + + # Apply image preprocessing + dp = dp.map(lambda sample : transform(sample.decode()), input_col="image") + dp = dp.map(lambda sample : torch.tensor(str_to_list(sample.to_categories())).to(torch.device(device)), input_col="label") + + # Batch + dp = dp.batch(batch_size) + return dp \ No newline at end of file diff --git a/benchmarks/report.py b/benchmarks/report.py new file mode 100644 index 000000000..a5bbf8d91 --- /dev/null +++ b/benchmarks/report.py @@ -0,0 +1,7 @@ +from statistics import mean + + +def create_report(per_epoch_durations, batch_durations, total_duration): + print(f"Total duration is {total_duration}") + print(f"Per epoch duration {mean(per_epoch_durations)}") + print(f"Per batch duration {mean(batch_durations)}") diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index cbe207973..cd62dec67 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -1,4 +1,3 @@ -import argparse import sys import logging import subprocess @@ -10,35 +9,20 @@ import torch.nn.functional as F from torchvision import transforms import time -from statistics import mean import torch.optim as optim import torch.profiler +# Relative imports +from args import arg_parser +from utils import init_fn +from datasets import prepare_gtsrb +from trainers import train +from report import create_report + logging.basicConfig(filename='example.log', level=logging.DEBUG) -## Arg parsing -parser = argparse.ArgumentParser() -parser.add_argument("--dataset", type=str, default="gtsrb", help="The name of the dataset") -parser.add_argument("--model_name", type=str, default="resnext50_32x4d", help="The name of the model") -parser.add_argument("--batch_size", type=int, default=1, help="") -parser.add_argument("--device", type=str, default="cuda:0", help="Options are are cpu or cuda:0") -parser.add_argument("--num_epochs", type=int, default=1) -parser.add_argument("--report_location", type=str, default="./report.md", help="The location where the generated report will be stored") -parser.add_argument("--num_workers", type=int, default=1, help="Number of dataloader workers") -parser.add_argument("--shuffle", action="store_true") -parser.add_argument("--dataloaderv", type=int, default=1) - -args = parser.parse_args() -dataset = args.dataset -model_name = args.model_name -batch_size = args.batch_size -device = args.device -num_epochs = args.num_epochs -report_location = args.report_location -num_workers = args.num_workers -shuffle = args.shuffle -dataloaderv = args.dataloaderv +dataset, model_name, batch_size, device, num_epochs, num_workers, shuffle, dataloaderv = arg_parser() if dataloaderv == 1: from torch.utils.data import DataLoader @@ -47,12 +31,7 @@ else: raise(f"dataloaderv{dataloaderv} is not a valid option") -# Util function for multiprocessing -def init_fn(worker_id): - info = torch.utils.data.get_worker_info() - num_workers = info.num_workers - datapipe = info.dataset - torch.utils.data.graph_settings.apply_sharding(datapipe, num_workers, worker_id) + # Download model model_map = { @@ -87,30 +66,7 @@ def init_fn(worker_id): logging.debug(f"data format before preprocessing is {next(iter(dp))}") if dataset == "gtsrb": - def transform(img): - t= transforms.Compose([ - transforms.ToPILImage(), - transforms.Resize(size=(100,100)), - # transforms.reshape(64,3,7,7), TODO: Figure out collation - transforms.ToTensor()] - ) - return t(img).to(torch.device(device)) - - def str_to_list(str): - l = [] - for char in str: - l.append(int(char)) - return l - - # Filter out bounding box and path to image - dp = dp.map(lambda sample : {"image" : sample["image"], "label" : sample["label"]}) - - # Apply image preprocessing - dp = dp.map(lambda sample : transform(sample.decode()), input_col="image") - dp = dp.map(lambda sample : torch.tensor(str_to_list(sample.to_categories())).to(torch.device(device)), input_col="label") - - # Batch - dp = dp.batch(batch_size) + dp = prepare_gtsrb(batch_size, device, dp) # Datapipe format after preprocessing logging.debug(f"data format after preprocessing is \n {next(iter(dp))}\n") @@ -123,13 +79,16 @@ def str_to_list(str): else: dl = DataLoader(dataset=dp, batch_size=batch_size, shuffle=True, num_workers=num_workers, worker_init_fn=init_fn, multiprocessing_context="spawn") +criterion = torch.nn.CrossEntropyLoss() +optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) + + total_start = time.time() per_epoch_durations = [] batch_durations = [] -criterion = torch.nn.CrossEntropyLoss() -optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) + with torch.profiler.profile( activities=[torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA], @@ -142,40 +101,13 @@ def str_to_list(str): with_modules=True ) as p: - for epoch in range(num_epochs): - epoch_start = time.time() - running_loss = 0 - for i, elem in enumerate(dl): - batch_start = time.time() - - labels = torch.argmax(elem[0]["label"], dim=1) - optimizer.zero_grad() - outputs = model(elem[0]["image"]) - loss = criterion(outputs,labels) - loss.backward() - optimizer.step() - - running_loss += loss.item() - # if i % 200 == 1999: # print every 2000 mini-batches - print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.10f}') - running_loss = 0.0 - - batch_end = time.time() - batch_duration = batch_end - batch_start - batch_durations.append(batch_duration) - p.step() - - epoch_end = time.time() - epoch_duration = epoch_end - epoch_start - per_epoch_durations.append(epoch_duration) + train(num_epochs, model, dl, per_epoch_durations, batch_durations, criterion, optimizer, p) total_end = time.time() total_duration = total_end - total_start # TODO: Make this output some human readable markdown file -def create_report(per_epoch_durations, batch_durations, total_duration): - print(f"Total duration is {total_duration}") - print(f"Per epoch duration {mean(per_epoch_durations)}") - print(f"Per batch duration {mean(batch_durations)}") -create_report(per_epoch_durations, batch_durations, total_duration) \ No newline at end of file +create_report(per_epoch_durations, batch_durations, total_duration) + + diff --git a/benchmarks/trainers.py b/benchmarks/trainers.py new file mode 100644 index 000000000..674faf7c4 --- /dev/null +++ b/benchmarks/trainers.py @@ -0,0 +1,30 @@ +import time +import torch + +def train(num_epochs, model, dl, per_epoch_durations, batch_durations, criterion, optimizer, p): + for epoch in range(num_epochs): + epoch_start = time.time() + running_loss = 0 + for i, elem in enumerate(dl): + batch_start = time.time() + + labels = torch.argmax(elem[0]["label"], dim=1) + optimizer.zero_grad() + outputs = model(elem[0]["image"]) + loss = criterion(outputs,labels) + loss.backward() + optimizer.step() + + running_loss += loss.item() + # if i % 200 == 1999: # print every 2000 mini-batches + print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.10f}') + running_loss = 0.0 + + batch_end = time.time() + batch_duration = batch_end - batch_start + batch_durations.append(batch_duration) + p.step() + + epoch_end = time.time() + epoch_duration = epoch_end - epoch_start + per_epoch_durations.append(epoch_duration) \ No newline at end of file diff --git a/benchmarks/utils.py b/benchmarks/utils.py new file mode 100644 index 000000000..f149b3ce1 --- /dev/null +++ b/benchmarks/utils.py @@ -0,0 +1,6 @@ +# Util function for multiprocessing +def init_fn(worker_id): + info = torch.utils.data.get_worker_info() + num_workers = info.num_workers + datapipe = info.dataset + torch.utils.data.graph_settings.apply_sharding(datapipe, num_workers, worker_id) \ No newline at end of file From e6638c7dce93ec0649bf4655d81a6dc99be787ca Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Wed, 1 Jun 2022 00:27:03 +0000 Subject: [PATCH 17/24] [ski ci] refactor --- benchmarks/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benchmarks/utils.py b/benchmarks/utils.py index f149b3ce1..d53ec796a 100644 --- a/benchmarks/utils.py +++ b/benchmarks/utils.py @@ -1,4 +1,6 @@ # Util function for multiprocessing +import torch + def init_fn(worker_id): info = torch.utils.data.get_worker_info() num_workers = info.num_workers From 6cae2a3559b7623a0aded8f3de19825ac11a2a92 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Wed, 1 Jun 2022 00:46:45 +0000 Subject: [PATCH 18/24] [skip ci] collation works --- benchmarks/README.md | 1 + benchmarks/datasets.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index ad5ab431e..a098fd2a9 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -14,6 +14,7 @@ usage: run_benchmark.py [-h] [--dataset DATASET] [--model_name MODEL_NAME] [--ba ``` ## Available metrics +* [ ] PyTorch profiler * [x] Total time * [x] Time per batch * [x] Time per epoch diff --git a/benchmarks/datasets.py b/benchmarks/datasets.py index 1ea50bc5b..7b6fb78d7 100644 --- a/benchmarks/datasets.py +++ b/benchmarks/datasets.py @@ -6,7 +6,6 @@ def transform(img): t= transforms.Compose([ transforms.ToPILImage(), transforms.Resize(size=(100,100)), - # transforms.reshape(64,3,7,7), TODO: Figure out collation transforms.ToTensor()] ) return t(img).to(torch.device(device)) From cfa49aa161403402d6b9b6a68fe1427ae2c340ec Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Wed, 1 Jun 2022 01:51:32 +0000 Subject: [PATCH 19/24] [skip ci] push --- benchmarks/args.py | 5 +++- benchmarks/datasets.py | 32 ++++++++++----------- benchmarks/report.py | 1 - benchmarks/run_benchmark.py | 55 ++++++++++++++++++++----------------- benchmarks/trainers.py | 14 ++++++---- requirements.txt | 1 + 6 files changed, 60 insertions(+), 48 deletions(-) diff --git a/benchmarks/args.py b/benchmarks/args.py index a399f6572..6576a78d4 100644 --- a/benchmarks/args.py +++ b/benchmarks/args.py @@ -4,6 +4,7 @@ def arg_parser(): parser = argparse.ArgumentParser() parser.add_argument("--dataset", type=str, default="gtsrb", help="The name of the dataset") + parser.add_argument("--ispipe", action="store_true", help="is datapipe or dataset?") parser.add_argument("--model_name", type=str, default="resnext50_32x4d", help="The name of the model") parser.add_argument("--batch_size", type=int, default=1, help="") parser.add_argument("--device", type=str, default="cuda:0", help="Options are are cpu or cuda:0") @@ -14,7 +15,9 @@ def arg_parser(): parser.add_argument("--dataloaderv", type=int, default=1) args = parser.parse_args() + print(args) dataset = args.dataset + ispipe = args.ispipe model_name = args.model_name batch_size = args.batch_size device = args.device @@ -23,4 +26,4 @@ def arg_parser(): num_workers = args.num_workers shuffle = args.shuffle dataloaderv = args.dataloaderv - return dataset,model_name,batch_size,device,num_epochs,num_workers,shuffle,dataloaderv \ No newline at end of file + return dataset, ispipe, model_name,batch_size,device,num_epochs,num_workers,shuffle,dataloaderv \ No newline at end of file diff --git a/benchmarks/datasets.py b/benchmarks/datasets.py index 7b6fb78d7..8dc93cc18 100644 --- a/benchmarks/datasets.py +++ b/benchmarks/datasets.py @@ -1,28 +1,28 @@ -from torchvision import transforms +from torchvision import transforms, datasets import torch -def prepare_gtsrb(batch_size, device, dp): - def transform(img): - t= transforms.Compose([ - transforms.ToPILImage(), - transforms.Resize(size=(100,100)), - transforms.ToTensor()] - ) - return t(img).to(torch.device(device)) +def transform(img): + t= transforms.Compose([ + transforms.ToPILImage(), + transforms.Resize(size=(100,100)), + transforms.ToTensor()] + ) + return t(img) - def str_to_list(str): - l = [] - for char in str: - l.append(int(char)) - return l +def str_to_list(str): + l = [] + for char in str: + l.append(int(char)) + return l +def prepare_gtsrb_datapipe(batch_size, device, dp): # Filter out bounding box and path to image dp = dp.map(lambda sample : {"image" : sample["image"], "label" : sample["label"]}) # Apply image preprocessing - dp = dp.map(lambda sample : transform(sample.decode()), input_col="image") + dp = dp.map(lambda sample : transform(sample.decode().to(torch.device(device))), input_col="image") dp = dp.map(lambda sample : torch.tensor(str_to_list(sample.to_categories())).to(torch.device(device)), input_col="label") # Batch dp = dp.batch(batch_size) - return dp \ No newline at end of file + return dp \ No newline at end of file diff --git a/benchmarks/report.py b/benchmarks/report.py index a5bbf8d91..5d2378e69 100644 --- a/benchmarks/report.py +++ b/benchmarks/report.py @@ -1,6 +1,5 @@ from statistics import mean - def create_report(per_epoch_durations, batch_durations, total_duration): print(f"Total duration is {total_duration}") print(f"Per epoch duration {mean(per_epoch_durations)}") diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index cd62dec67..eeece0c2c 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -5,7 +5,7 @@ import torchvision import torch import transformers -from torchvision.prototype.datasets import load +from torchvision.prototype.datasets import load as loadpipe import torch.nn.functional as F from torchvision import transforms import time @@ -14,15 +14,24 @@ # Relative imports from args import arg_parser +from benchmarks.datasets import prepare_gtsrb_dataset from utils import init_fn -from datasets import prepare_gtsrb +from datasets import prepare_gtsrb_datapipe from trainers import train from report import create_report logging.basicConfig(filename='example.log', level=logging.DEBUG) -dataset, model_name, batch_size, device, num_epochs, num_workers, shuffle, dataloaderv = arg_parser() +dataset, ispipe, model_name, batch_size, device, num_epochs, num_workers, shuffle, dataloaderv = arg_parser() + +if device.startswith("cuda"): + nvidiasmi = subprocess.check_output("nvidia-smi", shell=True, text=True) + logging.debug(nvidiasmi) + +lscpu = subprocess.check_output("lscpu", shell=True, text=True) +logging.debug(lscpu) + if dataloaderv == 1: from torch.utils.data import DataLoader @@ -31,8 +40,6 @@ else: raise(f"dataloaderv{dataloaderv} is not a valid option") - - # Download model model_map = { "resnext50_32x4d": torchvision.models.resnext50_32x4d, @@ -45,39 +52,38 @@ model = model_map[model_name]().to(torch.device(device)) # setup data pipe -if model_name in ["resnext50_32x4d", "mobilenet_v3_large"]: - dp = load(dataset, split="train") +if dataset == "gtsrb": + if ispipe: + dp = loadpipe(dataset, split="train") + logging.debug(f"data format before preprocessing is {next(iter(dp))}") -else: - print(f"{model} not supported yet") + dp = prepare_gtsrb_datapipe(batch_size, device, dp) + logging.debug(f"data format after preprocessing is \n {next(iter(dp))}\n") -if device.startswith("cuda"): - nvidiasmi = subprocess.check_output("nvidia-smi", shell=True, text=True) - print(nvidiasmi) + else: + # No further preprocessing needed this returns a tuple of Images and labels as ints + # Do I need to do batching and collation manually? + ds = torchvision.datasets.GTSRB(root=".",split="train", download=True) + + +else: + print(f"{dataset} not supported yet") -lscpu = subprocess.check_output("lscpu", shell=True, text=True) -print(lscpu) print(f"batch size {batch_size}") print(f"Dataset name {dp}") print(f"Dataset length {len(dp)}") -# Datapipe format -logging.debug(f"data format before preprocessing is {next(iter(dp))}") +# Setup data loader -if dataset == "gtsrb": - dp = prepare_gtsrb(batch_size, device, dp) - -# Datapipe format after preprocessing -logging.debug(f"data format after preprocessing is \n {next(iter(dp))}\n") +data = dp if dp else ds -# Setup data loader if num_workers == 1: - dl = DataLoader(dataset=dp, batch_size=batch_size, shuffle=shuffle) + dl = DataLoader(dataset=data, batch_size=batch_size, shuffle=shuffle) # Shuffle won't work in distributed yet else: - dl = DataLoader(dataset=dp, batch_size=batch_size, shuffle=True, num_workers=num_workers, worker_init_fn=init_fn, multiprocessing_context="spawn") + dl = DataLoader(dataset=data, batch_size=batch_size, shuffle=True, num_workers=num_workers, worker_init_fn=init_fn, multiprocessing_context="spawn") criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) @@ -107,7 +113,6 @@ total_duration = total_end - total_start # TODO: Make this output some human readable markdown file - create_report(per_epoch_durations, batch_durations, total_duration) diff --git a/benchmarks/trainers.py b/benchmarks/trainers.py index 674faf7c4..5f5a33f4e 100644 --- a/benchmarks/trainers.py +++ b/benchmarks/trainers.py @@ -8,10 +8,7 @@ def train(num_epochs, model, dl, per_epoch_durations, batch_durations, criterion for i, elem in enumerate(dl): batch_start = time.time() - labels = torch.argmax(elem[0]["label"], dim=1) - optimizer.zero_grad() - outputs = model(elem[0]["image"]) - loss = criterion(outputs,labels) + loss = process(model, criterion, optimizer, elem) loss.backward() optimizer.step() @@ -27,4 +24,11 @@ def train(num_epochs, model, dl, per_epoch_durations, batch_durations, criterion epoch_end = time.time() epoch_duration = epoch_end - epoch_start - per_epoch_durations.append(epoch_duration) \ No newline at end of file + per_epoch_durations.append(epoch_duration) + +def process(model, criterion, optimizer, elem): + labels = torch.argmax(elem[0]["label"], dim=1) + optimizer.zero_grad() + outputs = model(elem[0]["image"]) + loss = criterion(outputs,labels) + return loss \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 14a4b8fa8..f509913dc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ urllib3 >= 1.25 requests +portalocker >= 2.0.0 \ No newline at end of file From eed5203324bf4b70ebf4f6e48a589c56dfa047e8 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Tue, 28 Jun 2022 15:47:45 -0700 Subject: [PATCH 20/24] [skip ci] push --- benchmarks/README.md | 2 +- benchmarks/cloud/README.md | 33 +++++++++++++++ benchmarks/cloud/ec2.yml | 84 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 benchmarks/cloud/README.md create mode 100644 benchmarks/cloud/ec2.yml diff --git a/benchmarks/README.md b/benchmarks/README.md index a098fd2a9..b9ff7136d 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -14,7 +14,7 @@ usage: run_benchmark.py [-h] [--dataset DATASET] [--model_name MODEL_NAME] [--ba ``` ## Available metrics -* [ ] PyTorch profiler +* [ ] PyTorch profiler - won't be possible until H2 2022 * [x] Total time * [x] Time per batch * [x] Time per epoch diff --git a/benchmarks/cloud/README.md b/benchmarks/cloud/README.md new file mode 100644 index 000000000..59d8bbbc9 --- /dev/null +++ b/benchmarks/cloud/README.md @@ -0,0 +1,33 @@ +This folder contains templates that are useful for cloud setups + +Idea would be to provision a machine by configuring it in a YAML file and then running a benchmark script on it automatically. This is critical both for ad hoc benchmarking that are reproducible but also including real world benchmarks in a release. + +We've provided some useful `yml` templates for you to get started + +https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/using-cfn-cli-creating-stack.html + +## Setup aws cli +`aws configure` and enter your credentials + +## Setup stack (machine configuration) + +```sh + aws cloudformation create-stack \ + --stack-name myteststack \ + --template-body ec2.yml \ +``` + +## Ssh into machine and run job +``` +ssh elastic_ip +git clone https://github.com/pytorch/data +cd data/benchmarks +python run_benchmark.py +``` + +Visually inspect logs + +## Shut down stack + +`aws cloudformation delete-stack --stack-name myteststack` + diff --git a/benchmarks/cloud/ec2.yml b/benchmarks/cloud/ec2.yml new file mode 100644 index 000000000..8b6537cc1 --- /dev/null +++ b/benchmarks/cloud/ec2.yml @@ -0,0 +1,84 @@ +# This script sets up an Ec2 instance with elastic IP and a disk volume +Parameters: + InstanceTypeParameter: + Type: String + Default: c5n.large + AllowedValues: + - c5n.large + - p2.2xlarge + - p3.2xlarge + - p3.8xlarge + Description: Instance type CPU, GPU + DiskSize: + Type: Number + Default: 100 + Description: Disk size in GB + DiskType: + Type: String + Default: gp2 + AllowedValues: + - gp2 + - gp3 + - io1 + - io2 + - sc1 + - st1 + - standard + Description: Enter Disk type SSD, HDD + +Resources: + MyInstance: + Type: AWS::EC2::Instance + Properties: + AvailabilityZone: us-west-2a + ImageId: ami-0306d46d05aaf8663 # Deep Learning AMI + InstanceType: + Ref: InstanceTypeParameter + SecurityGroups: + - !Ref SSHSecurityGroup + + # Elastic IP so I can easily ssh into the machine + MyEIP: + Type: AWS::EC2::EIP + Properties: + InstanceId: !Ref MyInstance + + # Open security group for SSH + SSHSecurityGroup: + Type: AWS::EC2::SecurityGroup + Properties: + GroupDescription: Enable SSH access via port 22 + SSHSecurityGroupIngress: + - CidrIp: 0.0.0.0/0 + FromPort: 22 + IpProtocol: tcp + ToPort: 22 + + + NewVolume: + Type: AWS::EC2::Volume + Properties: + Size: + Ref: DiskSize + VolumeType: + Ref: DiskType + AvailabilityZone: !GetAtt MyInstance.AvailabilityZone + Tags: + - Key: MyTag + Value: TagValue + DeletionPolicy: Snapshot + + MountPoint: + Type: AWS::EC2::VolumeAttachment + Properties: + InstanceId: !Ref MyInstance + VolumeId: !Ref NewVolume + Device: /dev/sdh + +# # Volume +# SSD: +# Type: AWS::EC2::VolumeAttachment +# Properties: +# InstanceId: !Ref MyInstance + +# HDD: \ No newline at end of file From cf54e201ea66af78e51794099fa7abf6ca1db7a9 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Tue, 28 Jun 2022 16:00:39 -0700 Subject: [PATCH 21/24] [skip ci] push --- benchmarks/cloud/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/benchmarks/cloud/README.md b/benchmarks/cloud/README.md index 59d8bbbc9..14aab3963 100644 --- a/benchmarks/cloud/README.md +++ b/benchmarks/cloud/README.md @@ -13,8 +13,9 @@ https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/using-cfn-cli-cre ```sh aws cloudformation create-stack \ - --stack-name myteststack \ + --stack-name torchdatabenchmark \ --template-body ec2.yml \ + --parameters ParameterKey=InstanceTypeParameter,ParameterValue=p3.2xlarge ParameterKey=DiskType,ParameterValue=gp3 ``` ## Ssh into machine and run job From 8ace4ff972e5463afa8fd2c6d46968137b977511 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Tue, 28 Jun 2022 16:12:41 -0700 Subject: [PATCH 22/24] [skip ci] update --- benchmarks/cloud/ec2.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmarks/cloud/ec2.yml b/benchmarks/cloud/ec2.yml index 8b6537cc1..fc894506a 100644 --- a/benchmarks/cloud/ec2.yml +++ b/benchmarks/cloud/ec2.yml @@ -48,11 +48,11 @@ Resources: Type: AWS::EC2::SecurityGroup Properties: GroupDescription: Enable SSH access via port 22 - SSHSecurityGroupIngress: + SecurityGroupIngress: - CidrIp: 0.0.0.0/0 - FromPort: 22 - IpProtocol: tcp - ToPort: 22 + FromPort: 22 + IpProtocol: tcp + ToPort: 22 NewVolume: From 493323965b37599ef32f89eab02048e4af6a0929 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Thu, 21 Jul 2022 00:20:13 +0000 Subject: [PATCH 23/24] git commit --- benchmarks/README.md | 36 ++++++++++++------------- benchmarks/args.py | 51 ++++++++++++++++++------------------ benchmarks/report.py | 52 ++++++++++++++++++++++++++++++++++--- benchmarks/requirements.txt | 9 +++++++ benchmarks/run_benchmark.py | 44 +++++++++++++++---------------- benchmarks/utils.py | 8 ------ 6 files changed, 120 insertions(+), 80 deletions(-) create mode 100644 benchmarks/requirements.txt delete mode 100644 benchmarks/utils.py diff --git a/benchmarks/README.md b/benchmarks/README.md index b9ff7136d..0c2cfc077 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -1,38 +1,34 @@ # Install dependencies ``` -pip3 install --pre torch torchvision torchaudio torchtext --extra-index-url https://download.pytorch.org/whl/nightly/cu113 +pip install -r benchmarks/requirements.txt python setup.py develop ``` # Usage instructions - ``` -usage: run_benchmark.py [-h] [--dataset DATASET] [--model_name MODEL_NAME] [--batch_size BATCH_SIZE] [--device DEVICE] [--num_epochs NUM_EPOCHS] +usage: run_benchmark.py [-h] [--dataset DATASET] [--model_name MODEL_NAME] [--batch_size BATCH_SIZE] [--device DEVICE] [--num_epochs NUM_EPOCHS] [--report_location REPORT_LOCATION] [--num_workers NUM_WORKERS] [--shuffle] [--dataloaderv DATALOADERV] ``` ## Available metrics -* [ ] PyTorch profiler - won't be possible until H2 2022 -* [x] Total time -* [x] Time per batch -* [x] Time per epoch -* [x] Precision over time -* [x] CPU Load -* [x] GPU Load -* [x] Memory usage -* [x] PyTorch profiler -## Additional profiling +- [x] Total time +- [x] Time per batch +- [x] Time per epoch +- [x] Precision over time +- [x] CPU Load +- [x] GPU Load +- [x] Memory usage -``` -pip install scalene -``` -`scalene run_benchmark.py` +## Additional profiling +The PyTorch profiler doesn't work quite well with `torchdata` for now https://github.com/pytorch/kineto/issues/609 but +there are other good options like `py-spy` or `scalene` which could be used like so `profiler_name run_benchmark.py` ## Other benchmarks in the wild -* https://github.com/pytorch/kineto/blob/main/tb_plugin/examples/datapipe_example.py -* https://github.com/pytorch/text/tree/main/test/datasets -* https://github.com/pytorch/vision/tree/main/torchvision/prototype/datasets \ No newline at end of file + +- https://github.com/pytorch/kineto/blob/main/tb_plugin/examples/datapipe_example.py +- https://github.com/pytorch/text/tree/main/test/datasets +- https://github.com/pytorch/vision/tree/main/torchvision/prototype/datasets diff --git a/benchmarks/args.py b/benchmarks/args.py index 6576a78d4..98414e244 100644 --- a/benchmarks/args.py +++ b/benchmarks/args.py @@ -1,29 +1,30 @@ -import argparse +from dataclasses import dataclass, fields +from enum import Enum + +from simple_parsing import ArgumentParser + + +@dataclass(frozen=True) +class BenchmarkConfig: + dataset: str = "gtsrb" # TODO: Integrate with HF datasets + model_name: str = "resnext50_32x4d" # TODO: torchvision models supported only + batch_size: int = 1 + device: str = "cuda:0" # Options are cpu or cuda:0 + num_epochs: int = 1 + report_location: str = "report.csv" + num_wokers: int = 1 + shuffle: bool = True + dataloader_version: int = 1 # Options are 1 or 2 + ## Arg parsing def arg_parser(): - parser = argparse.ArgumentParser() - parser.add_argument("--dataset", type=str, default="gtsrb", help="The name of the dataset") - parser.add_argument("--ispipe", action="store_true", help="is datapipe or dataset?") - parser.add_argument("--model_name", type=str, default="resnext50_32x4d", help="The name of the model") - parser.add_argument("--batch_size", type=int, default=1, help="") - parser.add_argument("--device", type=str, default="cuda:0", help="Options are are cpu or cuda:0") - parser.add_argument("--num_epochs", type=int, default=1) - parser.add_argument("--report_location", type=str, default="./report.md", help="The location where the generated report will be stored") - parser.add_argument("--num_workers", type=int, default=1, help="Number of dataloader workers") - parser.add_argument("--shuffle", action="store_true") - parser.add_argument("--dataloaderv", type=int, default=1) - + parser = ArgumentParser() + parser.add_arguments(BenchmarkConfig, dest="options") args = parser.parse_args() - print(args) - dataset = args.dataset - ispipe = args.ispipe - model_name = args.model_name - batch_size = args.batch_size - device = args.device - num_epochs = args.num_epochs - report_location = args.report_location - num_workers = args.num_workers - shuffle = args.shuffle - dataloaderv = args.dataloaderv - return dataset, ispipe, model_name,batch_size,device,num_epochs,num_workers,shuffle,dataloaderv \ No newline at end of file + benchmark_config = args.options + return benchmark_config + + +if __name__ == "__main__": + arg_parser() diff --git a/benchmarks/report.py b/benchmarks/report.py index 5d2378e69..b72f3d207 100644 --- a/benchmarks/report.py +++ b/benchmarks/report.py @@ -1,6 +1,50 @@ +import csv +from abc import ABC, abstractclassmethod +from dataclasses import dataclass, fields from statistics import mean +from typing import Dict, list, tuple -def create_report(per_epoch_durations, batch_durations, total_duration): - print(f"Total duration is {total_duration}") - print(f"Per epoch duration {mean(per_epoch_durations)}") - print(f"Per batch duration {mean(batch_durations)}") +import numpy as np + +duration = int + + +@dataclass +class MetricCache: + epoch_durations: list[duration] + batch_durations: list[duration] + total_duration: int = 0 + + +class MetricExporter(ABC): + @abstractclassmethod + def export(self, metric_cache: MetricCache) -> None: + return NotImplementedError + + def calculate_percentiles(self, metric_cache: MetricCache) -> Dict[str, float]: + output = {} + for field in fields(metric_cache): + duration_list = getattr(metric_cache, field.name) + percentiles = [ + np.percentile(duration_list, 0.5), + np.percentile(duration_list, 0.9), + np.percentile(duration_list, 0.99), + ] + output[field.name] = percentiles + return output + + +class StdOutReport(MetricExporter): + def export(self, metric_cache): + percentiles_dict = metric_cache.calculate_percentiles() + for field, percentiles in percentiles_dict.items: + print(f"{field} duration is {percentiles}") + + +class CSVReport(MetricExporter): + def export(self, metric_cache: MetricCache, filepath: str): + percentiles_dict = metric_cache.calculate_percentiles() + with open(filepath, "w") as file: + writer = csv.writer(file) + for field, percentiles in percentiles_dict.items: + writer.writerow(field + percentiles) diff --git a/benchmarks/requirements.txt b/benchmarks/requirements.txt new file mode 100644 index 000000000..cfc16922a --- /dev/null +++ b/benchmarks/requirements.txt @@ -0,0 +1,9 @@ +--extra-index-url https://download.pytorch.org/whl/nightly/cu113 +simple-parsing +dill +numpy +torch +torchvision +torchaudio +torchtext +transformers diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index eeece0c2c..41c318c56 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -1,26 +1,27 @@ -import sys import logging import subprocess +import sys +import time -import torchvision import torch -import transformers -from torchvision.prototype.datasets import load as loadpipe import torch.nn.functional as F -from torchvision import transforms -import time import torch.optim as optim import torch.profiler +import torchvision +import transformers + # Relative imports from args import arg_parser from benchmarks.datasets import prepare_gtsrb_dataset -from utils import init_fn from datasets import prepare_gtsrb_datapipe -from trainers import train from report import create_report +from torchvision import transforms +from torchvision.prototype.datasets import load as loadpipe +from trainers import train +from utils import init_fn -logging.basicConfig(filename='example.log', level=logging.DEBUG) +logging.basicConfig(filename="example.log", level=logging.DEBUG) dataset, ispipe, model_name, batch_size, device, num_epochs, num_workers, shuffle, dataloaderv = arg_parser() @@ -38,15 +39,14 @@ elif dataloaderv == 2: from torch.utils.data.dataloader_experimental import DataLoader2 as DataLoader else: - raise(f"dataloaderv{dataloaderv} is not a valid option") + raise (f"dataloaderv{dataloaderv} is not a valid option") # Download model model_map = { "resnext50_32x4d": torchvision.models.resnext50_32x4d, - "mobilenet_v3_large" : torchvision.models.mobilenet_v3_large, - "transformerencoder" : torch.nn.TransformerEncoder, - "bert-base" : transformers.BertModel, - + "mobilenet_v3_large": torchvision.models.mobilenet_v3_large, + "transformerencoder": torch.nn.TransformerEncoder, + "bert-base": transformers.BertModel, } model = model_map[model_name]().to(torch.device(device)) @@ -63,7 +63,7 @@ else: # No further preprocessing needed this returns a tuple of Images and labels as ints # Do I need to do batching and collation manually? - ds = torchvision.datasets.GTSRB(root=".",split="train", download=True) + ds = torchvision.datasets.GTSRB(root=".", split="train", download=True) else: @@ -83,28 +83,28 @@ # Shuffle won't work in distributed yet else: - dl = DataLoader(dataset=data, batch_size=batch_size, shuffle=True, num_workers=num_workers, worker_init_fn=init_fn, multiprocessing_context="spawn") + dl = DataLoader( + dataset=data, batch_size=batch_size, shuffle=True, num_workers=num_workers, multiprocessing_context="spawn" + ) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) - total_start = time.time() per_epoch_durations = [] batch_durations = [] - with torch.profiler.profile( activities=[torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA], - on_trace_ready=torch.profiler.tensorboard_trace_handler('./result', worker_name='datapipe0'), - schedule=torch.profiler.schedule(wait=1,warmup=1,active=2), + on_trace_ready=torch.profiler.tensorboard_trace_handler("./result", worker_name="datapipe0"), + schedule=torch.profiler.schedule(wait=1, warmup=1, active=2), record_shapes=True, profile_memory=True, with_flops=True, with_stack=True, - with_modules=True + with_modules=True, ) as p: train(num_epochs, model, dl, per_epoch_durations, batch_durations, criterion, optimizer, p) @@ -114,5 +114,3 @@ # TODO: Make this output some human readable markdown file create_report(per_epoch_durations, batch_durations, total_duration) - - diff --git a/benchmarks/utils.py b/benchmarks/utils.py deleted file mode 100644 index d53ec796a..000000000 --- a/benchmarks/utils.py +++ /dev/null @@ -1,8 +0,0 @@ -# Util function for multiprocessing -import torch - -def init_fn(worker_id): - info = torch.utils.data.get_worker_info() - num_workers = info.num_workers - datapipe = info.dataset - torch.utils.data.graph_settings.apply_sharding(datapipe, num_workers, worker_id) \ No newline at end of file From a09826d5b6691a37cb6f3978ec2fc4caece758cf Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Wed, 20 Jul 2022 19:28:25 -0700 Subject: [PATCH 24/24] update --- benchmarks/datasets.py | 54 ++++++++++++++++++++++--------------- benchmarks/report.py | 2 ++ benchmarks/run_benchmark.py | 16 ++++++----- 3 files changed, 44 insertions(+), 28 deletions(-) diff --git a/benchmarks/datasets.py b/benchmarks/datasets.py index 8dc93cc18..3ff794422 100644 --- a/benchmarks/datasets.py +++ b/benchmarks/datasets.py @@ -1,28 +1,40 @@ from torchvision import transforms, datasets import torch +from abc import ABC, abstractmethod -def transform(img): - t= transforms.Compose([ - transforms.ToPILImage(), - transforms.Resize(size=(100,100)), - transforms.ToTensor()] - ) - return t(img) +class DataPipeReadyBenchmark(ABC): + @abstractmethod + def prepare_pipe(self, params): + return NotImplementedError -def str_to_list(str): - l = [] - for char in str: - l.append(int(char)) - return l +class GTSRBReadyBenchmark(DataPipeReadyBenchmark): + def transform(img): + t= transforms.Compose([ + transforms.ToPILImage(), + transforms.Resize(size=(100,100)), + transforms.ToTensor()] + ) + return t(img) -def prepare_gtsrb_datapipe(batch_size, device, dp): - # Filter out bounding box and path to image - dp = dp.map(lambda sample : {"image" : sample["image"], "label" : sample["label"]}) + def str_to_list(str): + l = [] + for char in str: + l.append(int(char)) + return l - # Apply image preprocessing - dp = dp.map(lambda sample : transform(sample.decode().to(torch.device(device))), input_col="image") - dp = dp.map(lambda sample : torch.tensor(str_to_list(sample.to_categories())).to(torch.device(device)), input_col="label") + def prepare_pipe(self, params): + batch_size, device, dp = params + # Filter out bounding box and path to image + dp = dp.map(lambda sample : {"image" : sample["image"], "label" : sample["label"]}) - # Batch - dp = dp.batch(batch_size) - return dp \ No newline at end of file + # Apply image preprocessing + dp = dp.map(lambda sample : transform(sample.decode().to(torch.device(device))), input_col="image") + dp = dp.map(lambda sample : torch.tensor(str_to_list(sample.to_categories())).to(torch.device(device)), input_col="label") + + # Batch + dp = dp.batch(batch_size) + return dp + +class HuggingFaceReadyBenchmark(DataPipeReadyBenchmark): + def prepare(self, dataset_name): + return NotImplementedError \ No newline at end of file diff --git a/benchmarks/report.py b/benchmarks/report.py index b72f3d207..7fa4e7a28 100644 --- a/benchmarks/report.py +++ b/benchmarks/report.py @@ -35,6 +35,7 @@ def calculate_percentiles(self, metric_cache: MetricCache) -> Dict[str, float]: class StdOutReport(MetricExporter): + @staticmethod def export(self, metric_cache): percentiles_dict = metric_cache.calculate_percentiles() for field, percentiles in percentiles_dict.items: @@ -42,6 +43,7 @@ def export(self, metric_cache): class CSVReport(MetricExporter): + @staticmethod def export(self, metric_cache: MetricCache, filepath: str): percentiles_dict = metric_cache.calculate_percentiles() with open(filepath, "w") as file: diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py index 41c318c56..cebc5f4f8 100644 --- a/benchmarks/run_benchmark.py +++ b/benchmarks/run_benchmark.py @@ -15,16 +15,15 @@ from args import arg_parser from benchmarks.datasets import prepare_gtsrb_dataset from datasets import prepare_gtsrb_datapipe -from report import create_report from torchvision import transforms from torchvision.prototype.datasets import load as loadpipe from trainers import train -from utils import init_fn +from report import MetricCache, CSVReport logging.basicConfig(filename="example.log", level=logging.DEBUG) -dataset, ispipe, model_name, batch_size, device, num_epochs, num_workers, shuffle, dataloaderv = arg_parser() +dataset, ispipe, model_name, batch_size, device, num_epochs, report_location, num_workers, shuffle, dataloaderv = arg_parser() if device.startswith("cuda"): nvidiasmi = subprocess.check_output("nvidia-smi", shell=True, text=True) @@ -52,6 +51,7 @@ model = model_map[model_name]().to(torch.device(device)) # setup data pipe +# TODO: How about we just make this work with any HF dataset if dataset == "gtsrb": if ispipe: dp = loadpipe(dataset, split="train") @@ -70,9 +70,9 @@ print(f"{dataset} not supported yet") -print(f"batch size {batch_size}") -print(f"Dataset name {dp}") -print(f"Dataset length {len(dp)}") +logging.info(f"batch size {batch_size}") +logging.info(f"Dataset name {dp}") +logging.info(f"Dataset length {len(dp)}") # Setup data loader @@ -107,10 +107,12 @@ with_modules=True, ) as p: + # TODO: Double check if this actually modifies the metrics in calling code train(num_epochs, model, dl, per_epoch_durations, batch_durations, criterion, optimizer, p) total_end = time.time() total_duration = total_end - total_start + metric_cache = MetricCache(batch_durations, per_epoch_durations, total_duration) # TODO: Make this output some human readable markdown file -create_report(per_epoch_durations, batch_durations, total_duration) +CSVReport(metric_cache, report_location).export() \ No newline at end of file