Skip to content
This repository has been archived by the owner on Aug 30, 2024. It is now read-only.

Commit

Permalink
save
Browse files Browse the repository at this point in the history
  • Loading branch information
sunjiweiswift committed Aug 27, 2024
1 parent 9bab674 commit 4f481df
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 9 deletions.
18 changes: 9 additions & 9 deletions include/subgroup/tile/impl/load_xe.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,8 @@ tile_load(tile_t& tile, payload_t& payload) {
static constexpr uint32_t max_load_width_in_elem =
load_store_attr::max_load_width_in_bytes / sizeof(dtype);

// static constexpr uint32_t max_trans_load_height_in_elem =
// load_store_attr::max_trans_load_height_in_elem;
// static constexpr uint32_t max_trans_load_height_in_elem =
// load_store_attr::max_trans_load_height_in_elem;
static constexpr uint32_t max_load_height_in_elem =
load_store_attr::max_load_height_in_elem;

Expand Down Expand Up @@ -206,6 +206,11 @@ tile_load(tile_t& tile, payload_t& payload) {
#pragma unroll
for (uint32_t ii = 0; ii < block_size_y / ld_blk_size_y; ++ii) {
constexpr uint32_t load_elems = ld_blk_size_y * block_size_x * arr_len;
uint32_t address_offset_x =
(mem_transpose ? (offset_y + ii * ld_blk_size_y) : offset_x) /
scale_factor;
uint32_t address_offset_y =
mem_transpose ? offset_x : (offset_y + ii * ld_blk_size_y);
reg_tmp.xetla_format<native_type_t<load_dtype>>() = xetla_load_global<
native_type_t<load_dtype>,
(trans ? ld_blk_size_y : block_size_x) / scale_factor,
Expand All @@ -222,13 +227,8 @@ tile_load(tile_t& tile, payload_t& payload) {
payload.surface_width,
payload.surface_height,
payload.surface_pitch,
payload.offset_x +
(mem_transpose ? (offset_y / (int)scale_factor +
ii * ld_blk_size_y / (int)scale_factor)
: (offset_x / scale_factor)),

payload.offset_y +
(mem_transpose ? offset_x : (offset_y + ii * ld_blk_size_y)));
payload.offset_x + address_offset_x,
payload.offset_y + address_offset_y);

if constexpr (reg_transpose && trans) {
reg_blk.xetla_select<load_elems, 1>(ii * load_elems)
Expand Down
5 changes: 5 additions & 0 deletions include/subgroup/tile/impl/payload_xe.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,14 @@ struct mem_payload_t<
!(std::is_same_v<dtype_, int4x2> || std::is_same_v<dtype_, int4x8>);

// Transformed and Transposed cannot be set to true at the same time.
// If Transformed is true then:
// sizeof(T) must be 1- or 2-byte (bytes or words).
static constexpr bool mem_transform = (sizeof(dtype) <= 2) && !trans &&
(register_layout == reg_layout::vnni_tiled ||
register_layout == reg_layout::vnni_tiled_col_major);

// If Transposed is true then:
// sizeof(T) must be 4- or 8-byte (dwords or qwords).
static constexpr bool mem_transpose_dtype_less4bytes =
(sizeof(dtype) < 4) && trans;

Expand Down

0 comments on commit 4f481df

Please sign in to comment.