Skip to content

Commit

Permalink
[software] Parametrize apps with hjson file
Browse files Browse the repository at this point in the history
  • Loading branch information
mbertuletti committed Sep 25, 2024
1 parent f10c95f commit d4091de
Show file tree
Hide file tree
Showing 23 changed files with 535 additions and 833 deletions.
2 changes: 1 addition & 1 deletion software/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,5 @@ runtime/arch.ld

# Generated data files
data.h
apps/*/*/data*.h
data/data*.h
data/__pyc*
4 changes: 2 additions & 2 deletions software/apps/baremetal/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ RUNTIME_DIR := $(abspath $(SOFTWARE_DIR)/runtime)
include $(RUNTIME_DIR)/runtime.mk

APPS := $(patsubst $(APPS_DIR)/%/main.c,%,$(shell find $(APPS_DIR) -name "main.c"))
DATA := $(patsubst %.args,%.h,$(shell find $(APPS_DIR) -name "data.args"))
BINARIES := $(addprefix $(BIN_DIR)/,$(APPS))
ALL := $(APPS)

Expand All @@ -33,7 +32,7 @@ all_llvm: $(ALL_LLVM)
$(APPS): % : $(BIN_DIR)/% $(APPS_DIR)/Makefile $(shell find $(RUNTIME_DIR)/**.{S,c,h,ld} -type f)

.PHONY: $(BINARIES)
$(BINARIES): $(BIN_DIR)/%: %/main.c.o $(RUNTIME) $(LINKER_SCRIPT) $(DATA) update_opcodes
$(BINARIES): $(BIN_DIR)/%: %/main.c.o $(RUNTIME) $(LINKER_SCRIPT) data_%.h update_opcodes
mkdir -p $(dir $@)
$(RISCV_CC) -Iinclude -o $@ $< $(RUNTIME) $(RISCV_LDFLAGS) -T$(RUNTIME_DIR)/link.ld
$(RISCV_OBJDUMP) $(RISCV_OBJDUMP_FLAGS) -D $@ > $@.dump
Expand All @@ -49,5 +48,6 @@ clean:
rm -vf $(addsuffix /main.c.o,$(APPS))
rm -vf $(RUNTIME)
rm -vf $(LINKER_SCRIPT)
rm -vf $(wildcard $(DATA_DIR)/data_*.h)

.INTERMEDIATE: $(addsuffix /main.c.o,$(APPS))
113 changes: 19 additions & 94 deletions software/apps/baremetal/axpy_i32/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,125 +5,50 @@
// Author: Yichao Zhang, ETH Zurich

#include <stdint.h>
#include <stdlib.h>
#include <string.h>

#include "baremetal/mempool_axpy_i32p.h"
/* Mempool runtime libraries */
#include "builtins_v2.h"
#include "dma.h"
#include "encoding.h"
#include "printf.h"
#include "runtime.h"
#include "synchronization.h"
#include <stdlib.h>

#if NUM_CORES > 32
#define size_M 64
#define size_N 64
#else
#define size_M (NUM_CORES)
#define size_N (NUM_CORES)
#endif

#define ALPHA 2

#if NUM_CORES > 32
int32_t data_x[size_M * size_N]
__attribute__((aligned(64 * 1024), section(".l1")));
int32_t data_y[size_M * size_N]
__attribute__((aligned(64 * 1024), section(".l1")));
int32_t data_y_copy[size_M * size_N]
__attribute__((aligned(64 * 1024), section(".l1")));
#else
int32_t data_x[size_M * size_N] __attribute__((aligned(32), section(".l1")));
int32_t data_y[size_M * size_N] __attribute__((aligned(32), section(".l1")));
int32_t data_y_copy[size_M * size_N]
__attribute__((aligned(32), section(".l1")));
#endif
#include "baremetal/mempool_axpy_i32p.h"
#include "baremetal/mempool_checks.h"
#include "data_axpy_i32.h"

int32_t l1_X[array_N]
__attribute__((aligned(NUM_CORES * sizeof(uint32_t)), section(".l1")));
int32_t l1_Y[array_N]
__attribute__((aligned(NUM_CORES * sizeof(uint32_t)), section(".l1")));
int volatile error __attribute__((section(".l1")));

void init_matrix(int32_t *matrix, uint32_t num_rows, uint32_t num_columns,
int32_t a, int32_t b, int32_t c, uint32_t core_id,
uint32_t num_cores) {
// How many rows/columns to split the matrix into
uint32_t const split = 8;
if (num_columns > num_rows) {
// Parallelize over columns
uint32_t const c_start = (num_rows / split) * (core_id % split);
uint32_t const c_end = (num_rows / split) * ((core_id % split) + 1);
for (uint32_t j = (core_id / split); j < num_columns;
j += (num_cores / split)) {
for (uint32_t i = c_start; i < c_end; ++i) {
matrix[i * num_columns + j] = a * (int32_t)i + b * (int32_t)j + c;
}
}
} else {
// Parallelize over rows
uint32_t const c_start = (num_columns / split) * (core_id % split);
uint32_t const c_end = (num_columns / split) * ((core_id % split) + 1);
for (uint32_t i = (core_id / split); i < num_rows;
i += (num_cores / split)) {
for (uint32_t j = c_start; j < c_end; ++j) {
matrix[i * num_columns + j] = a * (int32_t)i + b * (int32_t)j + c;
}
}
}
}

int verify_axpy(int32_t *matrix_X, int32_t *matrix_Y, int32_t *matrix_Y_COPY,
int32_t alpha, uint32_t elements) {
for (uint32_t i = 0; i < elements; i++) {
if (matrix_Y[i] != matrix_X[i] * alpha + matrix_Y_COPY[i]) {
return 1;
}
}
return 0;
}

int main() {

uint32_t const core_id = mempool_get_core_id();
uint32_t const num_cores = mempool_get_core_count();
uint32_t const total_elements = size_M * size_N;

// Seed for create element matrix
int32_t const A_a = 1;
int32_t const A_b = 1;
int32_t const A_c = -32;
int32_t const B_a = 2;
int32_t const B_b = 1;
int32_t const B_c = 16;

// Initialize synchronization variables
mempool_barrier_init(core_id);

// Initialize data
if (core_id == 0) {
printf("Initialize %3d cores\n", num_cores);
dma_memcpy_blocking(l1_X, l2_X, array_N * sizeof(int32_t));
dma_memcpy_blocking(l1_Y, l2_Y, array_N * sizeof(int32_t));
error = 0;
}

// init_elements;
init_matrix(data_x, size_M, size_N, A_a, A_b, A_c, core_id, num_cores);
init_matrix(data_y, size_M, size_N, B_a, B_b, B_c, core_id, num_cores);
init_matrix(data_y_copy, size_M, size_N, B_a, B_b, B_c, core_id, num_cores);
mempool_barrier(num_cores);

// start kernel testing
// Benchmark
mempool_start_benchmark();
calc_axpy_unloop_x4_localbank(data_x, data_y, ALPHA, total_elements, core_id,
num_cores);
calc_axpy_unloop_x4_localbank(l1_X, l1_Y, ALPHA, array_N, core_id, num_cores);
mempool_barrier(num_cores);
mempool_stop_benchmark();
// end kernel testing

// Verify results
if (core_id == 0) {
printf("START CHECKING RESULTS\n");
if (verify_axpy(data_x, data_y, data_y_copy, ALPHA, total_elements)) {
printf("RESULTS ERROR\n");
error = 1;
} else {
printf("RESULTS CORRECT\n");
}
}
mempool_check_q32(l1_Y, l2_Z, array_N, 0, 0);
mempool_barrier(num_cores);

return error;
return 0;
}
1 change: 0 additions & 1 deletion software/apps/baremetal/cfft_radix2_q16/data.args

This file was deleted.

1 change: 1 addition & 0 deletions software/apps/baremetal/cfft_radix2_q16/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "synchronization.h"

#include "data_cfft_radix2_q16.h"
#define N_BANKS (NUM_CORES * BANKING_FACTOR)

/* CFFT mempool libraries */
#include "baremetal/mempool_cfft_q16_bitreversal.h"
Expand Down
1 change: 0 additions & 1 deletion software/apps/baremetal/cfft_radix4_q16/data.args

This file was deleted.

2 changes: 2 additions & 0 deletions software/apps/baremetal/cfft_radix4_q16/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

/* CFFT data libraries */
#include "data_cfft_radix4_q16.h"
#define N_BANKS (NUM_CORES * BANKING_FACTOR)
#define MAX_COL (N_BANKS / (N_CSAMPLES / 4))

/* CHOOSE ONE */
//#define SINGLE // Single core FFT.
Expand Down
3 changes: 0 additions & 3 deletions software/apps/baremetal/chest_q16/data.args

This file was deleted.

8 changes: 5 additions & 3 deletions software/apps/baremetal/matmul_f16/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@ int main() {

// Initialize Matrices 1
if (core_id == 0) {
dma_memcpy_blocking(matrix_a, A, (matrix_M * matrix_N) * sizeof(int16_t));
dma_memcpy_blocking(matrix_b, B, (matrix_N * matrix_P) * sizeof(int16_t));
dma_memcpy_blocking(matrix_a, l2_A,
(matrix_M * matrix_N) * sizeof(int16_t));
dma_memcpy_blocking(matrix_b, l2_B,
(matrix_N * matrix_P) * sizeof(int16_t));
}
mempool_barrier(num_cores);

Expand All @@ -59,7 +61,7 @@ int main() {
mempool_stop_benchmark();
#endif

mempool_check_f16(matrix_c, C, matrix_M * matrix_P, 0.5f, 0);
mempool_check_f16(matrix_c, l2_C, matrix_M * matrix_P, 0.5f, 0);
mempool_barrier(num_cores);
return 0;
}
9 changes: 5 additions & 4 deletions software/apps/baremetal/matmul_f32/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,14 @@ int main() {
uint32_t num_cores = mempool_get_core_count();
mempool_barrier_init(core_id);

// Initialize Matrices
// Initialize data
if (core_id == 0) {
dma_memcpy_blocking(matrix_a, A, matrix_M * matrix_N * sizeof(int32_t));
dma_memcpy_blocking(matrix_b, B, matrix_N * matrix_P * sizeof(int32_t));
dma_memcpy_blocking(matrix_a, l2_A, matrix_M * matrix_N * sizeof(int32_t));
dma_memcpy_blocking(matrix_b, l2_B, matrix_N * matrix_P * sizeof(int32_t));
}
mempool_barrier(num_cores);

// Benchmark
#if defined(SINGLE)
if (core_id == 0) {
// Execute function to test.
Expand All @@ -57,7 +58,7 @@ int main() {
mempool_stop_benchmark();
#endif

mempool_check_f32(matrix_c, C, matrix_M * matrix_P, 0.01f, 0);
mempool_check_f32(matrix_c, l2_C, matrix_M * matrix_P, 0.01f, 0);
mempool_barrier(num_cores);
return 0;
}
Loading

0 comments on commit d4091de

Please sign in to comment.