Skip to content

Commit

Permalink
Instead of compiler specific align hint use C++ standard aligned alloc
Browse files Browse the repository at this point in the history
  • Loading branch information
ThrudPrimrose committed Nov 9, 2024
1 parent 1554421 commit b6e807c
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions dace/codegen/targets/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ def allocate_array(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg: StateSubgraphV
if not declared:
declaration_stream.write(f'{nodedesc.dtype.ctype} *{name};\n', cfg, state_id, node)
allocation_stream.write(
"%s = new %s DACE_ALIGN(64)[%s];\n" % (alloc_name, nodedesc.dtype.ctype, cpp.sym2cpp(arrsize)), cfg,
"%s = std::aligned_alloc(64, %s * sizeof(%s));\n" % (alloc_name, cpp.sym2cpp(arrsize), nodedesc.dtype.ctype), cfg,
state_id, node)
define_var(name, DefinedType.Pointer, ctypedef)

Expand All @@ -512,15 +512,15 @@ def allocate_array(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg: StateSubgraphV
raise NotImplementedError('Start offset unsupported for registers')
if node.setzero:
declaration_stream.write(
"%s %s[%s] DACE_ALIGN(64) = {0};\n" % (nodedesc.dtype.ctype, name, cpp.sym2cpp(arrsize)),
"%s alignas(64) %s[%s]{0};\n" % (nodedesc.dtype.ctype, name, cpp.sym2cpp(arrsize)),
cfg,
state_id,
node,
)
define_var(name, DefinedType.Pointer, ctypedef)
return
declaration_stream.write(
"%s %s[%s] DACE_ALIGN(64);\n" % (nodedesc.dtype.ctype, name, cpp.sym2cpp(arrsize)),
"%s alignas(64) %s[%s];\n" % (nodedesc.dtype.ctype, name, cpp.sym2cpp(arrsize)),
cfg,
state_id,
node,
Expand All @@ -544,7 +544,7 @@ def allocate_array(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg: StateSubgraphV
"""
#pragma omp parallel
{{
{name} = new {ctype} DACE_ALIGN(64)[{arrsize}];""".format(ctype=nodedesc.dtype.ctype,
{name} = ({ctype}*)std::aligned_alloc(64, {arrsize} * sizeof({ctype}));""".format(ctype=nodedesc.dtype.ctype,
name=alloc_name,
arrsize=cpp.sym2cpp(arrsize)),
cfg,
Expand Down Expand Up @@ -581,13 +581,13 @@ def deallocate_array(self, sdfg: SDFG, cfg: ControlFlowRegion, dfg: StateSubgrap
return
elif (nodedesc.storage == dtypes.StorageType.CPU_Heap
or (nodedesc.storage == dtypes.StorageType.Register and symbolic.issymbolic(arrsize, sdfg.constants))):
callsite_stream.write("delete[] %s;\n" % alloc_name, cfg, state_id, node)
callsite_stream.write("std::free(%s);\n" % alloc_name, cfg, state_id, node)
elif nodedesc.storage is dtypes.StorageType.CPU_ThreadLocal:
# Deallocate in each OpenMP thread
callsite_stream.write(
"""#pragma omp parallel
{{
delete[] {name};
std::fre({name});
}}""".format(name=alloc_name),
cfg,
state_id,
Expand Down

0 comments on commit b6e807c

Please sign in to comment.