Skip to content

Commit

Permalink
Merge pull request #270 from SChernykh/evo
Browse files Browse the repository at this point in the history
GCN optimized code for RandomX
  • Loading branch information
xmrig committed Aug 14, 2019
2 parents 4d47950 + a50c9f2 commit 85339fa
Show file tree
Hide file tree
Showing 20 changed files with 3,825 additions and 113 deletions.
34 changes: 26 additions & 8 deletions src/amd/GpuContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,18 +70,27 @@ struct GpuContext
freeMem(0),
globalMem(0),
computeUnits(0),
Nonce(0),
rx_variant(xmrig::VARIANT_AUTO),
rx_dataset(nullptr),
rx_scratchpads(nullptr),
rx_hashes(nullptr),
rx_entropy(nullptr),
rx_vm_states(nullptr),
rx_rounding(nullptr)
Nonce(0)
#ifdef XMRIG_ALGO_RANDOMX
, gcnAsm(1)
, AsmProgram(nullptr)
, rx_variant(xmrig::VARIANT_AUTO)
, rx_dataset(nullptr)
, rx_scratchpads(nullptr)
, rx_hashes(nullptr)
, rx_entropy(nullptr)
, rx_vm_states(nullptr)
, rx_registers(nullptr)
, rx_intermediate_programs(nullptr)
, rx_programs(nullptr)
, rx_rounding(nullptr)
#endif
{
memset(Kernels, 0, sizeof(Kernels));
#ifdef XMRIG_ALGO_RANDOMX
memset(rx_dataset_seedhash, 0, sizeof(rx_dataset_seedhash));
memset(rx_kernels, 0, sizeof(rx_kernels));
#endif
}

/*Input vars*/
Expand Down Expand Up @@ -116,18 +125,27 @@ struct GpuContext
cl_uint computeUnits;
xmrig::String board;
xmrig::String name;
uint32_t gcn_version;

uint32_t Nonce;

#ifdef XMRIG_ALGO_RANDOMX
int gcnAsm;
cl_program AsmProgram;

uint8_t rx_dataset_seedhash[32];
xmrig::Variant rx_variant;
cl_mem rx_dataset;
cl_mem rx_scratchpads;
cl_mem rx_hashes;
cl_mem rx_entropy;
cl_mem rx_vm_states;
cl_mem rx_registers;
cl_mem rx_intermediate_programs;
cl_mem rx_programs;
cl_mem rx_rounding;
cl_kernel rx_kernels[32];
#endif
};


Expand Down
2 changes: 1 addition & 1 deletion src/amd/OclCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ void OclCache::getOptions(xmrig::Algo algo, xmrig::Variant, const GpuContext* ct
workSize = 8;
}

snprintf(options, options_size, "-DWORKERS_PER_HASH=%u", workSize);
snprintf(options, options_size, "-DWORKERS_PER_HASH=%u -DGCN_VERSION=%u", workSize, ctx->gcn_version);
}
else
# endif
Expand Down
403 changes: 319 additions & 84 deletions src/amd/OclGPU.cpp

Large diffs are not rendered by default.

11 changes: 3 additions & 8 deletions src/amd/opencl/RandomX/aes.cl
Original file line number Diff line number Diff line change
Expand Up @@ -558,27 +558,23 @@ uint get_byte32(uint a, uint start_bit) { return (a >> start_bit) & 0xFF; }
#define fillAes_name fillAes1Rx4_scratchpad
#define outputSize RANDOMX_SCRATCHPAD_L3
#define outputSize0 (outputSize + 64)
#define strided SCRATCHPAD_STRIDED
#define unroll_factor 8
#define num_rounds 1
#include "fillAes1Rx4.cl"
#undef num_rounds
#undef unroll_factor
#undef strided
#undef outputSize
#undef outputSize0
#undef fillAes_name

#define fillAes_name fillAes4Rx4_entropy
#define outputSize ENTROPY_SIZE
#define outputSize0 outputSize
#define strided 0
#define unroll_factor 2
#define num_rounds 4
#include "fillAes1Rx4.cl"
#undef num_rounds
#undef unroll_factor
#undef strided
#undef outputSize
#undef outputSize0
#undef fillAes_name
Expand All @@ -590,9 +586,8 @@ __kernel void hashAes1Rx4(__global const void* input, __global void* hash, uint
{
__local uint T[2048];

const uint stride_size = batch_size * 4;
const uint global_index = get_global_id(0);
if (global_index >= stride_size)
if (global_index >= batch_size * 4)
return;

const uint idx = global_index / 4;
Expand All @@ -608,15 +603,15 @@ __kernel void hashAes1Rx4(__global const void* input, __global void* hash, uint
const uint s1 = ((sub & 1) == 0) ? 8 : 24;
const uint s3 = ((sub & 1) == 0) ? 24 : 8;

__global const uint4* p = SCRATCHPAD_STRIDED ? (((__global uint4*) input) + idx * 4 + sub) : (((__global uint4*) input) + idx * ((inputSize + 64) / sizeof(uint4)) + sub);
__global const uint4* p = ((__global uint4*) input) + idx * ((inputSize + 64) / sizeof(uint4)) + sub;

__local const uint* const t0 = ((sub & 1) == 0) ? T : (T + 1024);
__local const uint* const t1 = ((sub & 1) == 0) ? (T + 256) : (T + 1792);
__local const uint* const t2 = ((sub & 1) == 0) ? (T + 512) : (T + 1536);
__local const uint* const t3 = ((sub & 1) == 0) ? (T + 768) : (T + 1280);

#pragma unroll(8)
for (uint i = 0; i < inputSize / sizeof(uint4); i += 4, p += SCRATCHPAD_STRIDED ? stride_size : 4)
for (uint i = 0; i < inputSize / sizeof(uint4); i += 4, p += 4)
{
uint k[4], y[4];
*(uint4*)(k) = *p;
Expand Down
8 changes: 4 additions & 4 deletions src/amd/opencl/RandomX/fillAes1Rx4.cl
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@ __kernel void fillAes_name(__global void* state, __global void* out, uint batch_
{
__local uint T[2048];

const uint stride_size = batch_size * 4;
const uint global_index = get_global_id(0);
if (global_index >= stride_size)
if (global_index >= batch_size * 4)
return;

const uint idx = global_index / 4;
Expand Down Expand Up @@ -67,15 +66,15 @@ __kernel void fillAes_name(__global void* state, __global void* out, uint batch_
const uint s1 = (sub & 1) ? 8 : 24;
const uint s3 = (sub & 1) ? 24 : 8;

__global uint4* p = strided ? (((__global uint4*) out) + idx * 4 + sub) : (((__global uint4*) out) + idx * (outputSize0 / sizeof(uint4)) + sub);
__global uint4* p = ((__global uint4*) out) + idx * (outputSize0 / sizeof(uint4)) + sub;

const __local uint* const t0 = (sub & 1) ? T : (T + 1024);
const __local uint* const t1 = (sub & 1) ? (T + 256) : (T + 1792);
const __local uint* const t2 = (sub & 1) ? (T + 512) : (T + 1536);
const __local uint* const t3 = (sub & 1) ? (T + 768) : (T + 1280);

#pragma unroll(unroll_factor)
for (uint i = 0; i < outputSize / sizeof(uint4); i += 4, p += strided ? stride_size : 4)
for (uint i = 0; i < outputSize / sizeof(uint4); i += 4, p += 4)
{
uint y[4];

Expand Down Expand Up @@ -115,6 +114,7 @@ __kernel void fillAes_name(__global void* state, __global void* out, uint batch_
*p = *(uint4*)(x);
#endif
}

*(__global uint4*)(s) = *(uint4*)(x);
}
)==="
2 changes: 0 additions & 2 deletions src/amd/opencl/RandomX/randomx_constants_loki.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,6 @@ along with RandomX OpenCL. If not, see <http://www.gnu.org/licenses/>.

#define RANDOMX_PROGRAM_SIZE 320

#define SCRATCHPAD_STRIDED 0

#define HASH_SIZE 64
#define ENTROPY_SIZE (128 + RANDOMX_PROGRAM_SIZE * 8)
#define REGISTERS_SIZE 256
Expand Down
98 changes: 98 additions & 0 deletions src/amd/opencl/RandomX/randomx_constants_monero.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
R"===(
/*
Copyright (c) 2019 SChernykh
This file is part of RandomX OpenCL.
RandomX OpenCL is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
RandomX OpenCL is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with RandomX OpenCL. If not, see <http://www.gnu.org/licenses/>.
*/

//Dataset base size in bytes. Must be a power of 2.
#define RANDOMX_DATASET_BASE_SIZE 2147483648

//Dataset extra size. Must be divisible by 64.
#define RANDOMX_DATASET_EXTRA_SIZE 33554368

//Scratchpad L3 size in bytes. Must be a power of 2.
#define RANDOMX_SCRATCHPAD_L3 2097152

//Scratchpad L2 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L3.
#define RANDOMX_SCRATCHPAD_L2 262144

//Scratchpad L1 size in bytes. Must be a power of two (minimum 64) and less than or equal to RANDOMX_SCRATCHPAD_L2.
#define RANDOMX_SCRATCHPAD_L1 16384

//Jump condition mask size in bits.
#define RANDOMX_JUMP_BITS 8

//Jump condition mask offset in bits. The sum of RANDOMX_JUMP_BITS and RANDOMX_JUMP_OFFSET must not exceed 16.
#define RANDOMX_JUMP_OFFSET 8

//Integer instructions
#define RANDOMX_FREQ_IADD_RS 25
#define RANDOMX_FREQ_IADD_M 7
#define RANDOMX_FREQ_ISUB_R 16
#define RANDOMX_FREQ_ISUB_M 7
#define RANDOMX_FREQ_IMUL_R 16
#define RANDOMX_FREQ_IMUL_M 4
#define RANDOMX_FREQ_IMULH_R 4
#define RANDOMX_FREQ_IMULH_M 1
#define RANDOMX_FREQ_ISMULH_R 4
#define RANDOMX_FREQ_ISMULH_M 1
#define RANDOMX_FREQ_IMUL_RCP 8
#define RANDOMX_FREQ_INEG_R 2
#define RANDOMX_FREQ_IXOR_R 15
#define RANDOMX_FREQ_IXOR_M 5
#define RANDOMX_FREQ_IROR_R 8
#define RANDOMX_FREQ_IROL_R 2
#define RANDOMX_FREQ_ISWAP_R 4

//Floating point instructions
#define RANDOMX_FREQ_FSWAP_R 4
#define RANDOMX_FREQ_FADD_R 16
#define RANDOMX_FREQ_FADD_M 5
#define RANDOMX_FREQ_FSUB_R 16
#define RANDOMX_FREQ_FSUB_M 5
#define RANDOMX_FREQ_FSCAL_R 6
#define RANDOMX_FREQ_FMUL_R 32
#define RANDOMX_FREQ_FDIV_M 4
#define RANDOMX_FREQ_FSQRT_R 6

//Control instructions
#define RANDOMX_FREQ_CBRANCH 16
#define RANDOMX_FREQ_CFROUND 1

//Store instruction
#define RANDOMX_FREQ_ISTORE 16

//No-op instruction
#define RANDOMX_FREQ_NOP 0

#define RANDOMX_DATASET_ITEM_SIZE 64

#define RANDOMX_PROGRAM_SIZE 256

#define HASH_SIZE 64
#define ENTROPY_SIZE (128 + RANDOMX_PROGRAM_SIZE * 8)
#define REGISTERS_SIZE 256
#define IMM_BUF_SIZE (RANDOMX_PROGRAM_SIZE * 4 - REGISTERS_SIZE)
#define IMM_INDEX_COUNT ((IMM_BUF_SIZE / 4) - 2)
#define VM_STATE_SIZE (REGISTERS_SIZE + IMM_BUF_SIZE + RANDOMX_PROGRAM_SIZE * 4)
#define ROUNDING_MODE (RANDOMX_FREQ_CFROUND ? -1 : 0)

// Scratchpad L1/L2/L3 bits
#define LOC_L1 (32 - 14)
#define LOC_L2 (32 - 18)
#define LOC_L3 (32 - 21)
)==="
2 changes: 0 additions & 2 deletions src/amd/opencl/RandomX/randomx_constants_wow.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,6 @@ along with RandomX OpenCL. If not, see <http://www.gnu.org/licenses/>.

#define RANDOMX_PROGRAM_SIZE 256

#define SCRATCHPAD_STRIDED 0

#define HASH_SIZE 64
#define ENTROPY_SIZE (128 + RANDOMX_PROGRAM_SIZE * 8)
#define REGISTERS_SIZE 256
Expand Down
Loading

0 comments on commit 85339fa

Please sign in to comment.