From deab7ff2debba9ed5360a02fcf21c58a30c227a4 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Tue, 13 Aug 2019 20:53:52 +0200 Subject: [PATCH 1/3] Added optimized code for RandomX --- src/amd/GpuContext.h | 34 +- src/amd/OclCache.cpp | 2 +- src/amd/OclGPU.cpp | 402 ++++- src/amd/opencl/RandomX/aes.cl | 11 +- src/amd/opencl/RandomX/fillAes1Rx4.cl | 8 +- .../opencl/RandomX/randomx_constants_loki.h | 2 - .../opencl/RandomX/randomx_constants_monero.h | 98 ++ .../opencl/RandomX/randomx_constants_wow.h | 2 - src/amd/opencl/RandomX/randomx_jit.cl | 1510 +++++++++++++++++ src/amd/opencl/RandomX/randomx_run_gfx803.asm | 712 ++++++++ src/amd/opencl/RandomX/randomx_run_gfx803.h | 218 +++ src/amd/opencl/RandomX/randomx_run_gfx900.asm | 688 ++++++++ src/amd/opencl/RandomX/randomx_run_gfx900.h | 215 +++ src/base/net/Pool.cpp | 1 + src/common/crypto/Algorithm.cpp | 2 + src/common/xmrig.h | 1 + src/crypto/CryptoNight.cpp | 12 +- src/workers/OclThread.cpp | 13 + src/workers/OclThread.h | 3 + src/workers/Workers.cpp | 2 + 20 files changed, 3823 insertions(+), 113 deletions(-) create mode 100644 src/amd/opencl/RandomX/randomx_constants_monero.h create mode 100644 src/amd/opencl/RandomX/randomx_jit.cl create mode 100644 src/amd/opencl/RandomX/randomx_run_gfx803.asm create mode 100644 src/amd/opencl/RandomX/randomx_run_gfx803.h create mode 100644 src/amd/opencl/RandomX/randomx_run_gfx900.asm create mode 100644 src/amd/opencl/RandomX/randomx_run_gfx900.h diff --git a/src/amd/GpuContext.h b/src/amd/GpuContext.h index aaa2fb97..f1addfb2 100644 --- a/src/amd/GpuContext.h +++ b/src/amd/GpuContext.h @@ -70,18 +70,27 @@ struct GpuContext freeMem(0), globalMem(0), computeUnits(0), - Nonce(0), - rx_variant(xmrig::VARIANT_AUTO), - rx_dataset(nullptr), - rx_scratchpads(nullptr), - rx_hashes(nullptr), - rx_entropy(nullptr), - rx_vm_states(nullptr), - rx_rounding(nullptr) + Nonce(0) +#ifdef XMRIG_ALGO_RANDOMX + , gcnAsm(1) + , AsmProgram(nullptr) + , rx_variant(xmrig::VARIANT_AUTO) + , rx_dataset(nullptr) + , rx_scratchpads(nullptr) + , rx_hashes(nullptr) + , rx_entropy(nullptr) + , rx_vm_states(nullptr) + , rx_registers(nullptr) + , rx_intermediate_programs(nullptr) + , rx_programs(nullptr) + , rx_rounding(nullptr) +#endif { memset(Kernels, 0, sizeof(Kernels)); +#ifdef XMRIG_ALGO_RANDOMX memset(rx_dataset_seedhash, 0, sizeof(rx_dataset_seedhash)); memset(rx_kernels, 0, sizeof(rx_kernels)); +#endif } /*Input vars*/ @@ -116,9 +125,14 @@ struct GpuContext cl_uint computeUnits; xmrig::String board; xmrig::String name; + uint32_t gcn_version; uint32_t Nonce; +#ifdef XMRIG_ALGO_RANDOMX + int gcnAsm; + cl_program AsmProgram; + uint8_t rx_dataset_seedhash[32]; xmrig::Variant rx_variant; cl_mem rx_dataset; @@ -126,8 +140,12 @@ struct GpuContext cl_mem rx_hashes; cl_mem rx_entropy; cl_mem rx_vm_states; + cl_mem rx_registers; + cl_mem rx_intermediate_programs; + cl_mem rx_programs; cl_mem rx_rounding; cl_kernel rx_kernels[32]; +#endif }; diff --git a/src/amd/OclCache.cpp b/src/amd/OclCache.cpp index cdced26a..6e4eda0d 100644 --- a/src/amd/OclCache.cpp +++ b/src/amd/OclCache.cpp @@ -84,7 +84,7 @@ void OclCache::getOptions(xmrig::Algo algo, xmrig::Variant, const GpuContext* ct workSize = 8; } - snprintf(options, options_size, "-DWORKERS_PER_HASH=%u", workSize); + snprintf(options, options_size, "-DWORKERS_PER_HASH=%u -DGCN_VERSION=%u", workSize, ctx->gcn_version); } else # endif diff --git a/src/amd/OclGPU.cpp b/src/amd/OclGPU.cpp index 63823501..7f4c3b41 100644 --- a/src/amd/OclGPU.cpp +++ b/src/amd/OclGPU.cpp @@ -39,6 +39,10 @@ #include "amd/OclGPU.h" #include "amd/OclLib.h" #include "amd/OclCryptonightR_gen.h" +#ifdef XMRIG_ALGO_RANDOMX +#include "amd/opencl/RandomX/randomx_run_gfx803.h" +#include "amd/opencl/RandomX/randomx_run_gfx900.h" +#endif #include "common/log/Log.h" #include "common/utils/timestamp.h" #include "core/Config.h" @@ -192,6 +196,10 @@ size_t InitOpenCLGpu(int index, cl_context opencl_ctx, GpuContext* ctx, const ch printGPU(index, ctx, config); + xmrig::String device_name = ctx->name; + std::for_each(device_name.data(), device_name.data() + device_name.size(), [](char& c) { c = static_cast(std::toupper(c)); }); + ctx->gcn_version = (device_name == "GFX900") ? 14 : 12; + cl_int ret; ctx->CommandQueues = OclLib::createCommandQueue(opencl_ctx, ctx->DeviceID, &ret); if (ret != CL_SUCCESS) { @@ -232,10 +240,31 @@ size_t InitOpenCLGpu(int index, cl_context opencl_ctx, GpuContext* ctx, const ch return OCL_ERR_API; } - ctx->rx_vm_states = OclLib::createBuffer(opencl_ctx, CL_MEM_READ_WRITE, 2560 * g_thd, nullptr, &ret); - if (ret != CL_SUCCESS) { - LOG_ERR("Error %s when calling clCreateBuffer to create RandomX VM states buffer.", err_to_str(ret)); - return OCL_ERR_API; + if (ctx->gcnAsm) { + ctx->rx_registers = OclLib::createBuffer(opencl_ctx, CL_MEM_READ_WRITE, 256 * g_thd, nullptr, &ret); + if (ret != CL_SUCCESS) { + LOG_ERR("Error %s when calling clCreateBuffer to create RandomX JIT registers buffer.", err_to_str(ret)); + return OCL_ERR_API; + } + + ctx->rx_intermediate_programs = OclLib::createBuffer(opencl_ctx, CL_MEM_READ_WRITE, 5120 * g_thd, nullptr, &ret); + if (ret != CL_SUCCESS) { + LOG_ERR("Error %s when calling clCreateBuffer to create RandomX JIT intermediate programs buffer.", err_to_str(ret)); + return OCL_ERR_API; + } + + ctx->rx_programs = OclLib::createBuffer(opencl_ctx, CL_MEM_READ_WRITE, 10048 * g_thd, nullptr, &ret); + if (ret != CL_SUCCESS) { + LOG_ERR("Error %s when calling clCreateBuffer to create RandomX JIT programs buffer.", err_to_str(ret)); + return OCL_ERR_API; + } + } + else { + ctx->rx_vm_states = OclLib::createBuffer(opencl_ctx, CL_MEM_READ_WRITE, 2560 * g_thd, nullptr, &ret); + if (ret != CL_SUCCESS) { + LOG_ERR("Error %s when calling clCreateBuffer to create RandomX VM states buffer.", err_to_str(ret)); + return OCL_ERR_API; + } } ctx->rx_rounding = OclLib::createBuffer(opencl_ctx, CL_MEM_READ_WRITE, sizeof(uint32_t) * g_thd, nullptr, &ret); @@ -305,7 +334,9 @@ size_t InitOpenCLGpu(int index, cl_context opencl_ctx, GpuContext* ctx, const ch const char* KernelNamesRX[] = { "fillAes1Rx4_scratchpad", "fillAes4Rx4_entropy", "hashAes1Rx4", "blake2b_initial_hash", "blake2b_hash_registers_32", "blake2b_hash_registers_64", - "init_vm", "execute_vm", "find_shares", + ctx->gcnAsm ? "" : "init_vm", ctx->gcnAsm ? "" : "execute_vm", "find_shares", + ctx->gcnAsm ? "randomx_jit" : "", + "", // reserved for randomx_run binary kernel nullptr }; @@ -319,6 +350,52 @@ size_t InitOpenCLGpu(int index, cl_context opencl_ctx, GpuContext* ctx, const ch } } + if (ctx->gcnAsm) { + // Adrenaline drivers on Windows and amdgpu-pro drivers on Linux use ELF header's flags (offset 0x30) to store internal device ID + // Read it from compiled OpenCL code and substitute this ID into pre-compiled binary to make sure the driver accepts it + uint32_t elf_header_flags = 0; + const uint32_t elf_header_flags_offset = 0x30; + + size_t bin_size; + if (OclLib::getProgramInfo(ctx->Program, CL_PROGRAM_BINARY_SIZES, sizeof(bin_size), &bin_size) != CL_SUCCESS) { + return OCL_ERR_API; + } + + std::vector binary_data(bin_size); + char* tmp[1] = { binary_data.data() }; + if (OclLib::getProgramInfo(ctx->Program, CL_PROGRAM_BINARIES, sizeof(char*), tmp) != CL_SUCCESS) { + return false; + } + + if (bin_size >= elf_header_flags_offset + sizeof(uint32_t)) { + elf_header_flags = *(uint32_t*)(binary_data.data() + elf_header_flags_offset); + } + + size_t len = (ctx->gcn_version == 14) ? randomx_run_gfx900_bin_size : randomx_run_gfx803_bin_size; + unsigned char* binary = (ctx->gcn_version == 14) ? randomx_run_gfx900_bin : randomx_run_gfx803_bin; + + // Set correct internal device ID in the pre-compiled binary + if (elf_header_flags) { + *(uint32_t*)(binary + elf_header_flags_offset) = elf_header_flags; + } + + cl_int status; + ctx->AsmProgram = OclLib::createProgramWithBinary(ctx->opencl_ctx, 1, &ctx->DeviceID, &len, (const unsigned char**) &binary, &status, &ret); + if (ret != CL_SUCCESS) { + return OCL_ERR_API; + } + + ret = OclLib::buildProgram(ctx->AsmProgram, 1, &ctx->DeviceID); + if (ret != CL_SUCCESS) { + return OCL_ERR_API; + } + + ctx->rx_kernels[10] = OclLib::createKernel(ctx->AsmProgram, "randomx_run", &ret); + if (ret != CL_SUCCESS) { + return OCL_ERR_API; + } + } + // fillAes1Rx4_scratchpad if ((ret = OclLib::setKernelArg(ctx->rx_kernels[0], 0, sizeof(cl_mem), &ctx->rx_hashes)) != CL_SUCCESS) { LOG_ERR(kSetKernelArgErr, err_to_str(ret), 0, 0); @@ -369,7 +446,7 @@ size_t InitOpenCLGpu(int index, cl_context opencl_ctx, GpuContext* ctx, const ch return OCL_ERR_API; } - if ((ret = OclLib::setKernelArg(ctx->rx_kernels[2], 1, sizeof(cl_mem), &ctx->rx_vm_states)) != CL_SUCCESS) { + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[2], 1, sizeof(cl_mem), ctx->gcnAsm ? &ctx->rx_registers : &ctx->rx_vm_states)) != CL_SUCCESS) { LOG_ERR(kSetKernelArgErr, err_to_str(ret), 2, 1); return OCL_ERR_API; } @@ -380,7 +457,12 @@ size_t InitOpenCLGpu(int index, cl_context opencl_ctx, GpuContext* ctx, const ch return OCL_ERR_API; } - const uint32_t hashStrideBytes = (config->algorithm().variant() == xmrig::VARIANT_RX_LOKI) ? RandomX_LokiConfig.ProgramSize * 8 : RandomX_MoneroConfig.ProgramSize * 8; + uint32_t hashStrideBytes; + if (ctx->gcnAsm) + hashStrideBytes = 256; + else + hashStrideBytes = (config->algorithm().variant() == xmrig::VARIANT_RX_LOKI) ? RandomX_LokiConfig.ProgramSize * 8 : RandomX_MoneroConfig.ProgramSize * 8; + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[2], 3, sizeof(uint32_t), &hashStrideBytes)) != CL_SUCCESS) { LOG_ERR(kSetKernelArgErr, err_to_str(ret), 2, 3); return OCL_ERR_API; @@ -411,7 +493,7 @@ size_t InitOpenCLGpu(int index, cl_context opencl_ctx, GpuContext* ctx, const ch return OCL_ERR_API; } - if ((ret = OclLib::setKernelArg(ctx->rx_kernels[4], 1, sizeof(cl_mem), &ctx->rx_vm_states)) != CL_SUCCESS) { + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[4], 1, sizeof(cl_mem), ctx->gcnAsm ? &ctx->rx_registers : &ctx->rx_vm_states)) != CL_SUCCESS) { LOG_ERR(kSetKernelArgErr, err_to_str(ret), 4, 1); return OCL_ERR_API; } @@ -427,7 +509,7 @@ size_t InitOpenCLGpu(int index, cl_context opencl_ctx, GpuContext* ctx, const ch return OCL_ERR_API; } - if ((ret = OclLib::setKernelArg(ctx->rx_kernels[5], 1, sizeof(cl_mem), &ctx->rx_vm_states)) != CL_SUCCESS) { + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[5], 1, sizeof(cl_mem), ctx->gcnAsm ? &ctx->rx_registers : &ctx->rx_vm_states)) != CL_SUCCESS) { LOG_ERR(kSetKernelArgErr, err_to_str(ret), 5, 1); return OCL_ERR_API; } @@ -437,51 +519,53 @@ size_t InitOpenCLGpu(int index, cl_context opencl_ctx, GpuContext* ctx, const ch return OCL_ERR_API; } - // init_vm - if ((ret = OclLib::setKernelArg(ctx->rx_kernels[6], 0, sizeof(cl_mem), &ctx->rx_entropy)) != CL_SUCCESS) { - LOG_ERR(kSetKernelArgErr, err_to_str(ret), 6, 0); - return OCL_ERR_API; - } + if (!ctx->gcnAsm) { + // init_vm + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[6], 0, sizeof(cl_mem), &ctx->rx_entropy)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 6, 0); + return OCL_ERR_API; + } - if ((ret = OclLib::setKernelArg(ctx->rx_kernels[6], 1, sizeof(cl_mem), &ctx->rx_vm_states)) != CL_SUCCESS) { - LOG_ERR(kSetKernelArgErr, err_to_str(ret), 6, 1); - return OCL_ERR_API; - } + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[6], 1, sizeof(cl_mem), &ctx->rx_vm_states)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 6, 1); + return OCL_ERR_API; + } - if ((ret = OclLib::setKernelArg(ctx->rx_kernels[6], 2, sizeof(cl_mem), &ctx->rx_rounding)) != CL_SUCCESS) { - LOG_ERR(kSetKernelArgErr, err_to_str(ret), 6, 2); - return OCL_ERR_API; - } + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[6], 2, sizeof(cl_mem), &ctx->rx_rounding)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 6, 2); + return OCL_ERR_API; + } - // iteration is set in RXRunJob() + // iteration is set in RXRunJob() - // execute_vm - if ((ret = OclLib::setKernelArg(ctx->rx_kernels[7], 0, sizeof(cl_mem), &ctx->rx_vm_states)) != CL_SUCCESS) { - LOG_ERR(kSetKernelArgErr, err_to_str(ret), 7, 0); - return OCL_ERR_API; - } + // execute_vm + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[7], 0, sizeof(cl_mem), &ctx->rx_vm_states)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 7, 0); + return OCL_ERR_API; + } - if ((ret = OclLib::setKernelArg(ctx->rx_kernels[7], 1, sizeof(cl_mem), &ctx->rx_rounding)) != CL_SUCCESS) { - LOG_ERR(kSetKernelArgErr, err_to_str(ret), 7, 1); - return OCL_ERR_API; - } + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[7], 1, sizeof(cl_mem), &ctx->rx_rounding)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 7, 1); + return OCL_ERR_API; + } - if ((ret = OclLib::setKernelArg(ctx->rx_kernels[7], 2, sizeof(cl_mem), &ctx->rx_scratchpads)) != CL_SUCCESS) { - LOG_ERR(kSetKernelArgErr, err_to_str(ret), 7, 2); - return OCL_ERR_API; - } + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[7], 2, sizeof(cl_mem), &ctx->rx_scratchpads)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 7, 2); + return OCL_ERR_API; + } - if ((ret = OclLib::setKernelArg(ctx->rx_kernels[7], 3, sizeof(cl_mem), &ctx->rx_dataset)) != CL_SUCCESS) { - LOG_ERR(kSetKernelArgErr, err_to_str(ret), 7, 3); - return OCL_ERR_API; - } + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[7], 3, sizeof(cl_mem), &ctx->rx_dataset)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 7, 3); + return OCL_ERR_API; + } - if ((ret = OclLib::setKernelArg(ctx->rx_kernels[7], 4, sizeof(uint32_t), &batch_size)) != CL_SUCCESS) { - LOG_ERR(kSetKernelArgErr, err_to_str(ret), 7, 4); - return OCL_ERR_API; - } + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[7], 4, sizeof(uint32_t), &batch_size)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 7, 4); + return OCL_ERR_API; + } - // num_iterations, first, last are set in RXRunJob() + // num_iterations, first, last are set in RXRunJob() + } // find_shares if ((ret = OclLib::setKernelArg(ctx->rx_kernels[8], 0, sizeof(cl_mem), &ctx->rx_hashes)) != CL_SUCCESS) { @@ -496,6 +580,111 @@ size_t InitOpenCLGpu(int index, cl_context opencl_ctx, GpuContext* ctx, const ch LOG_ERR(kSetKernelArgErr, err_to_str(ret), 8, 3); return OCL_ERR_API; } + + if (ctx->gcnAsm) { + // randomx_jit + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[9], 0, sizeof(cl_mem), &ctx->rx_entropy)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 9, 0); + return OCL_ERR_API; + } + + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[9], 1, sizeof(cl_mem), &ctx->rx_registers)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 9, 1); + return OCL_ERR_API; + } + + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[9], 2, sizeof(cl_mem), &ctx->rx_intermediate_programs)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 9, 2); + return OCL_ERR_API; + } + + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[9], 3, sizeof(cl_mem), &ctx->rx_programs)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 9, 3); + return OCL_ERR_API; + } + + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[9], 4, sizeof(uint32_t), &batch_size)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 9, 4); + return OCL_ERR_API; + } + + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[9], 5, sizeof(cl_mem), &ctx->rx_rounding)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 9, 5); + return OCL_ERR_API; + } + + // iteration is set in RXRunJob() + + // randomx_run + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[10], 0, sizeof(cl_mem), &ctx->rx_dataset)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 10, 0); + return OCL_ERR_API; + } + + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[10], 1, sizeof(cl_mem), &ctx->rx_scratchpads)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 10, 1); + return OCL_ERR_API; + } + + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[10], 2, sizeof(cl_mem), &ctx->rx_registers)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 10, 2); + return OCL_ERR_API; + } + + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[10], 3, sizeof(cl_mem), &ctx->rx_rounding)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 10, 3); + return OCL_ERR_API; + } + + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[10], 4, sizeof(cl_mem), &ctx->rx_programs)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 10, 4); + return OCL_ERR_API; + } + + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[10], 5, sizeof(uint32_t), &batch_size)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 10, 5); + return OCL_ERR_API; + } + + auto PowerOf2 = [](size_t N) + { + uint32_t result = 0; + while (N > 1) + { + ++result; + N >>= 1; + } + return result; + }; + + const RandomX_ConfigurationBase* rx_conf; + switch (config->algorithm().variant()) + { + case xmrig::VARIANT_RX_LOKI: + rx_conf = &RandomX_LokiConfig; + break; + + case xmrig::VARIANT_RX_WOW: + rx_conf = &RandomX_WowneroConfig; + break; + + default: + rx_conf = &RandomX_MoneroConfig; + break; + } + + const uint32_t rx_parameters = + (PowerOf2(rx_conf->ScratchpadL1_Size) << 0) | + (PowerOf2(rx_conf->ScratchpadL2_Size) << 5) | + (PowerOf2(rx_conf->ScratchpadL3_Size) << 10) | + (PowerOf2(rx_conf->ProgramIterations) << 15); + ; + + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[10], 6, sizeof(uint32_t), &rx_parameters)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 10, 6); + return OCL_ERR_API; + } + } } else #endif @@ -744,6 +933,9 @@ size_t InitOpenCL(const std::vector &contexts, xmrig::Config *conf const char* randomx_constants_loki_h = #include "./opencl/RandomX/randomx_constants_loki.h" ; + const char* randomx_constants_monero_h = + #include "./opencl/RandomX/randomx_constants_monero.h" + ; const char* aesCL = #include "./opencl/RandomX/aes.cl" ; @@ -759,6 +951,9 @@ size_t InitOpenCL(const std::vector &contexts, xmrig::Config *conf const char* randomx_vmCL = #include "./opencl/RandomX/randomx_vm.cl" ; + const char* randomx_jitCL = + #include "./opencl/RandomX/randomx_jit.cl" + ; switch (config->algorithm().variant()) { @@ -769,11 +964,17 @@ size_t InitOpenCL(const std::vector &contexts, xmrig::Config *conf case xmrig::VARIANT_RX_LOKI: source_code.append(randomx_constants_loki_h); break; + + case xmrig::VARIANT_RX_0: + default: + source_code.append(randomx_constants_monero_h); + break; } source_code.append(std::regex_replace(aesCL, std::regex("#include \"fillAes1Rx4.cl\""), fillAes1Rx4CL)); source_code.append(std::regex_replace(blake2bCL, std::regex("#include \"blake2b_double_block.cl\""), blake2b_double_blockCL)); source_code.append(randomx_vmCL); + source_code.append(randomx_jitCL); } else #endif @@ -1213,6 +1414,8 @@ size_t RXRunJob(GpuContext *ctx, cl_uint *HashOutput, xmrig::Variant variant) size_t globalWorkSize4 = g_intensity * 4; size_t globalWorkSize8 = g_intensity * 8; size_t globalWorkSize16 = g_intensity * 16; + size_t globalWorkSize32 = g_intensity * 32; + size_t globalWorkSize64 = g_intensity * 64; size_t localWorkSize = 64; size_t localWorkSize32 = 32; size_t localWorkSize16 = 16; @@ -1247,58 +1450,83 @@ size_t RXRunJob(GpuContext *ctx, cl_uint *HashOutput, xmrig::Variant variant) return OCL_ERR_API; } - if ((ret = OclLib::setKernelArg(ctx->rx_kernels[6], 3, sizeof(uint32_t), &i)) != CL_SUCCESS) { - LOG_ERR(kSetKernelArgErr, err_to_str(ret), 6, 3); - return OCL_ERR_API; - } + if (!ctx->gcnAsm) { + // init_vm + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[6], 3, sizeof(uint32_t), &i)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 6, 3); + return OCL_ERR_API; + } - // init_vm - if ((ret = OclLib::enqueueNDRangeKernel(ctx->CommandQueues, ctx->rx_kernels[6], 1, nullptr, &globalWorkSize8, &localWorkSize32, 0, nullptr, nullptr)) != CL_SUCCESS) { - LOG_ERR("Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 6); - return OCL_ERR_API; - } + if ((ret = OclLib::enqueueNDRangeKernel(ctx->CommandQueues, ctx->rx_kernels[6], 1, nullptr, &globalWorkSize8, &localWorkSize32, 0, nullptr, nullptr)) != CL_SUCCESS) { + LOG_ERR("Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 6); + return OCL_ERR_API; + } - // execute_vm - uint32_t num_iterations = RandomX_CurrentConfig.ProgramIterations >> bfactor; - uint32_t first = 1; - uint32_t last = 0; + // execute_vm + uint32_t num_iterations = RandomX_CurrentConfig.ProgramIterations >> bfactor; + uint32_t first = 1; + uint32_t last = 0; - if ((ret = OclLib::setKernelArg(ctx->rx_kernels[7], 5, sizeof(uint32_t), &num_iterations)) != CL_SUCCESS) { - LOG_ERR(kSetKernelArgErr, err_to_str(ret), 7, 5); - return OCL_ERR_API; - } + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[7], 5, sizeof(uint32_t), &num_iterations)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 7, 5); + return OCL_ERR_API; + } - if ((ret = OclLib::setKernelArg(ctx->rx_kernels[7], 6, sizeof(uint32_t), &first)) != CL_SUCCESS) { - LOG_ERR(kSetKernelArgErr, err_to_str(ret), 7, 6); - return OCL_ERR_API; - } + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[7], 6, sizeof(uint32_t), &first)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 7, 6); + return OCL_ERR_API; + } - if ((ret = OclLib::setKernelArg(ctx->rx_kernels[7], 7, sizeof(uint32_t), &last)) != CL_SUCCESS) { - LOG_ERR(kSetKernelArgErr, err_to_str(ret), 7, 7); - return OCL_ERR_API; - } + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[7], 7, sizeof(uint32_t), &last)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 7, 7); + return OCL_ERR_API; + } + + for (int j = 0, n = 1 << bfactor; j < n; ++j) { + if (j == n - 1) { + last = 1; + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[7], 7, sizeof(uint32_t), &last)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 7, 7); + return OCL_ERR_API; + } + } - for (int j = 0, n = 1 << bfactor; j < n; ++j) { - if (j == n - 1) { - last = 1; - if ((ret = OclLib::setKernelArg(ctx->rx_kernels[7], 7, sizeof(uint32_t), &last)) != CL_SUCCESS) { - LOG_ERR(kSetKernelArgErr, err_to_str(ret), 7, 7); + // execute_vm + if ((ret = OclLib::enqueueNDRangeKernel(ctx->CommandQueues, ctx->rx_kernels[7], 1, nullptr, (ctx->workSize == 16) ? &globalWorkSize16 : &globalWorkSize8, (ctx->workSize == 16) ? &localWorkSize32 : &localWorkSize16, 0, nullptr, nullptr)) != CL_SUCCESS) { + LOG_ERR("Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 7); return OCL_ERR_API; } + + if (j == 0) { + first = 0; + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[7], 6, sizeof(uint32_t), &first)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 7, 6); + return OCL_ERR_API; + } + } + } + } + else { + // randomx_jit + if ((ret = OclLib::setKernelArg(ctx->rx_kernels[9], 6, sizeof(uint32_t), &i)) != CL_SUCCESS) { + LOG_ERR(kSetKernelArgErr, err_to_str(ret), 9, 6); + return OCL_ERR_API; } - // execute_vm - if ((ret = OclLib::enqueueNDRangeKernel(ctx->CommandQueues, ctx->rx_kernels[7], 1, nullptr, (ctx->workSize == 16) ? &globalWorkSize16 : &globalWorkSize8, (ctx->workSize == 16) ? &localWorkSize32 : &localWorkSize16, 0, nullptr, nullptr)) != CL_SUCCESS) { - LOG_ERR("Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 7); + if ((ret = OclLib::enqueueNDRangeKernel(ctx->CommandQueues, ctx->rx_kernels[9], 1, nullptr, &globalWorkSize32, &localWorkSize, 0, nullptr, nullptr)) != CL_SUCCESS) { + LOG_ERR("Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 9); return OCL_ERR_API; } - if (j == 0) { - first = 0; - if ((ret = OclLib::setKernelArg(ctx->rx_kernels[7], 6, sizeof(uint32_t), &first)) != CL_SUCCESS) { - LOG_ERR(kSetKernelArgErr, err_to_str(ret), 7, 6); - return OCL_ERR_API; - } + if ((ret = OclLib::finish(ctx->CommandQueues)) != CL_SUCCESS) { + LOG_ERR("Error %s when calling clFinish.", err_to_str(ret)); + return OCL_ERR_API; + } + + // randomx_run + if ((ret = OclLib::enqueueNDRangeKernel(ctx->CommandQueues, ctx->rx_kernels[10], 1, nullptr, &globalWorkSize64, &localWorkSize, 0, nullptr, nullptr)) != CL_SUCCESS) { + LOG_ERR("Error %s when calling clEnqueueNDRangeKernel for kernel %d.", err_to_str(ret), 10); + return OCL_ERR_API; } } @@ -1369,11 +1597,16 @@ void ReleaseOpenCl(GpuContext* ctx) } } +#ifdef XMRIG_ALGO_RANDOMX + if (ctx->AsmProgram) OclLib::releaseProgram(ctx->AsmProgram); if (ctx->rx_dataset) OclLib::releaseMemObject(ctx->rx_dataset); if (ctx->rx_scratchpads) OclLib::releaseMemObject(ctx->rx_scratchpads); if (ctx->rx_hashes) OclLib::releaseMemObject(ctx->rx_hashes); if (ctx->rx_entropy) OclLib::releaseMemObject(ctx->rx_entropy); if (ctx->rx_vm_states) OclLib::releaseMemObject(ctx->rx_vm_states); + if (ctx->rx_registers) OclLib::releaseMemObject(ctx->rx_registers); + if (ctx->rx_intermediate_programs) OclLib::releaseMemObject(ctx->rx_intermediate_programs); + if (ctx->rx_programs) OclLib::releaseMemObject(ctx->rx_programs); if (ctx->rx_rounding) OclLib::releaseMemObject(ctx->rx_rounding); kernel_count = sizeof(ctx->rx_kernels) / sizeof(ctx->rx_kernels[0]); @@ -1382,6 +1615,7 @@ void ReleaseOpenCl(GpuContext* ctx) OclLib::releaseKernel(ctx->rx_kernels[k]); } } +#endif OclLib::releaseCommandQueue(ctx->CommandQueues); } diff --git a/src/amd/opencl/RandomX/aes.cl b/src/amd/opencl/RandomX/aes.cl index 36ee37b4..bb49f552 100644 --- a/src/amd/opencl/RandomX/aes.cl +++ b/src/amd/opencl/RandomX/aes.cl @@ -558,13 +558,11 @@ uint get_byte32(uint a, uint start_bit) { return (a >> start_bit) & 0xFF; } #define fillAes_name fillAes1Rx4_scratchpad #define outputSize RANDOMX_SCRATCHPAD_L3 #define outputSize0 (outputSize + 64) -#define strided SCRATCHPAD_STRIDED #define unroll_factor 8 #define num_rounds 1 #include "fillAes1Rx4.cl" #undef num_rounds #undef unroll_factor -#undef strided #undef outputSize #undef outputSize0 #undef fillAes_name @@ -572,13 +570,11 @@ uint get_byte32(uint a, uint start_bit) { return (a >> start_bit) & 0xFF; } #define fillAes_name fillAes4Rx4_entropy #define outputSize ENTROPY_SIZE #define outputSize0 outputSize -#define strided 0 #define unroll_factor 2 #define num_rounds 4 #include "fillAes1Rx4.cl" #undef num_rounds #undef unroll_factor -#undef strided #undef outputSize #undef outputSize0 #undef fillAes_name @@ -590,9 +586,8 @@ __kernel void hashAes1Rx4(__global const void* input, __global void* hash, uint { __local uint T[2048]; - const uint stride_size = batch_size * 4; const uint global_index = get_global_id(0); - if (global_index >= stride_size) + if (global_index >= batch_size * 4) return; const uint idx = global_index / 4; @@ -608,7 +603,7 @@ __kernel void hashAes1Rx4(__global const void* input, __global void* hash, uint const uint s1 = ((sub & 1) == 0) ? 8 : 24; const uint s3 = ((sub & 1) == 0) ? 24 : 8; - __global const uint4* p = SCRATCHPAD_STRIDED ? (((__global uint4*) input) + idx * 4 + sub) : (((__global uint4*) input) + idx * ((inputSize + 64) / sizeof(uint4)) + sub); + __global const uint4* p = ((__global uint4*) input) + idx * ((inputSize + 64) / sizeof(uint4)) + sub; __local const uint* const t0 = ((sub & 1) == 0) ? T : (T + 1024); __local const uint* const t1 = ((sub & 1) == 0) ? (T + 256) : (T + 1792); @@ -616,7 +611,7 @@ __kernel void hashAes1Rx4(__global const void* input, __global void* hash, uint __local const uint* const t3 = ((sub & 1) == 0) ? (T + 768) : (T + 1280); #pragma unroll(8) - for (uint i = 0; i < inputSize / sizeof(uint4); i += 4, p += SCRATCHPAD_STRIDED ? stride_size : 4) + for (uint i = 0; i < inputSize / sizeof(uint4); i += 4, p += 4) { uint k[4], y[4]; *(uint4*)(k) = *p; diff --git a/src/amd/opencl/RandomX/fillAes1Rx4.cl b/src/amd/opencl/RandomX/fillAes1Rx4.cl index f06992ca..a30742f5 100644 --- a/src/amd/opencl/RandomX/fillAes1Rx4.cl +++ b/src/amd/opencl/RandomX/fillAes1Rx4.cl @@ -23,9 +23,8 @@ __kernel void fillAes_name(__global void* state, __global void* out, uint batch_ { __local uint T[2048]; - const uint stride_size = batch_size * 4; const uint global_index = get_global_id(0); - if (global_index >= stride_size) + if (global_index >= batch_size * 4) return; const uint idx = global_index / 4; @@ -67,7 +66,7 @@ __kernel void fillAes_name(__global void* state, __global void* out, uint batch_ const uint s1 = (sub & 1) ? 8 : 24; const uint s3 = (sub & 1) ? 24 : 8; - __global uint4* p = strided ? (((__global uint4*) out) + idx * 4 + sub) : (((__global uint4*) out) + idx * (outputSize0 / sizeof(uint4)) + sub); + __global uint4* p = ((__global uint4*) out) + idx * (outputSize0 / sizeof(uint4)) + sub; const __local uint* const t0 = (sub & 1) ? T : (T + 1024); const __local uint* const t1 = (sub & 1) ? (T + 256) : (T + 1792); @@ -75,7 +74,7 @@ __kernel void fillAes_name(__global void* state, __global void* out, uint batch_ const __local uint* const t3 = (sub & 1) ? (T + 768) : (T + 1280); #pragma unroll(unroll_factor) - for (uint i = 0; i < outputSize / sizeof(uint4); i += 4, p += strided ? stride_size : 4) + for (uint i = 0; i < outputSize / sizeof(uint4); i += 4, p += 4) { uint y[4]; @@ -115,6 +114,7 @@ __kernel void fillAes_name(__global void* state, __global void* out, uint batch_ *p = *(uint4*)(x); #endif } + *(__global uint4*)(s) = *(uint4*)(x); } )===" diff --git a/src/amd/opencl/RandomX/randomx_constants_loki.h b/src/amd/opencl/RandomX/randomx_constants_loki.h index c56381c9..065da96c 100644 --- a/src/amd/opencl/RandomX/randomx_constants_loki.h +++ b/src/amd/opencl/RandomX/randomx_constants_loki.h @@ -83,8 +83,6 @@ along with RandomX OpenCL. If not, see . #define RANDOMX_PROGRAM_SIZE 320 -#define SCRATCHPAD_STRIDED 0 - #define HASH_SIZE 64 #define ENTROPY_SIZE (128 + RANDOMX_PROGRAM_SIZE * 8) #define REGISTERS_SIZE 256 diff --git a/src/amd/opencl/RandomX/randomx_constants_monero.h b/src/amd/opencl/RandomX/randomx_constants_monero.h new file mode 100644 index 00000000..8cb78883 --- /dev/null +++ b/src/amd/opencl/RandomX/randomx_constants_monero.h @@ -0,0 +1,98 @@ +R"===( +/* +Copyright (c) 2019 SChernykh + +This file is part of RandomX OpenCL. + +RandomX OpenCL is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +RandomX OpenCL is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with RandomX OpenCL. If not, see . +*/ + +//Dataset base size in bytes. Must be a power of 2. +#define RANDOMX_DATASET_BASE_SIZE 2147483648 + +//Dataset extra size. Must be divisible by 64. +#define RANDOMX_DATASET_EXTRA_SIZE 33554368 + +//Scratchpad L3 size in bytes. Must be a power of 2. +#define RANDOMX_SCRATCHPAD_L3 2097152 + +//Scratchpad L2 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L3. +#define RANDOMX_SCRATCHPAD_L2 262144 + +//Scratchpad L1 size in bytes. Must be a power of two (minimum 64) and less than or equal to RANDOMX_SCRATCHPAD_L2. +#define RANDOMX_SCRATCHPAD_L1 16384 + +//Jump condition mask size in bits. +#define RANDOMX_JUMP_BITS 8 + +//Jump condition mask offset in bits. The sum of RANDOMX_JUMP_BITS and RANDOMX_JUMP_OFFSET must not exceed 16. +#define RANDOMX_JUMP_OFFSET 8 + +//Integer instructions +#define RANDOMX_FREQ_IADD_RS 25 +#define RANDOMX_FREQ_IADD_M 7 +#define RANDOMX_FREQ_ISUB_R 16 +#define RANDOMX_FREQ_ISUB_M 7 +#define RANDOMX_FREQ_IMUL_R 16 +#define RANDOMX_FREQ_IMUL_M 4 +#define RANDOMX_FREQ_IMULH_R 4 +#define RANDOMX_FREQ_IMULH_M 1 +#define RANDOMX_FREQ_ISMULH_R 4 +#define RANDOMX_FREQ_ISMULH_M 1 +#define RANDOMX_FREQ_IMUL_RCP 8 +#define RANDOMX_FREQ_INEG_R 2 +#define RANDOMX_FREQ_IXOR_R 15 +#define RANDOMX_FREQ_IXOR_M 5 +#define RANDOMX_FREQ_IROR_R 8 +#define RANDOMX_FREQ_IROL_R 2 +#define RANDOMX_FREQ_ISWAP_R 4 + +//Floating point instructions +#define RANDOMX_FREQ_FSWAP_R 4 +#define RANDOMX_FREQ_FADD_R 16 +#define RANDOMX_FREQ_FADD_M 5 +#define RANDOMX_FREQ_FSUB_R 16 +#define RANDOMX_FREQ_FSUB_M 5 +#define RANDOMX_FREQ_FSCAL_R 6 +#define RANDOMX_FREQ_FMUL_R 32 +#define RANDOMX_FREQ_FDIV_M 4 +#define RANDOMX_FREQ_FSQRT_R 6 + +//Control instructions +#define RANDOMX_FREQ_CBRANCH 16 +#define RANDOMX_FREQ_CFROUND 1 + +//Store instruction +#define RANDOMX_FREQ_ISTORE 16 + +//No-op instruction +#define RANDOMX_FREQ_NOP 0 + +#define RANDOMX_DATASET_ITEM_SIZE 64 + +#define RANDOMX_PROGRAM_SIZE 256 + +#define HASH_SIZE 64 +#define ENTROPY_SIZE (128 + RANDOMX_PROGRAM_SIZE * 8) +#define REGISTERS_SIZE 256 +#define IMM_BUF_SIZE (RANDOMX_PROGRAM_SIZE * 4 - REGISTERS_SIZE) +#define IMM_INDEX_COUNT ((IMM_BUF_SIZE / 4) - 2) +#define VM_STATE_SIZE (REGISTERS_SIZE + IMM_BUF_SIZE + RANDOMX_PROGRAM_SIZE * 4) +#define ROUNDING_MODE (RANDOMX_FREQ_CFROUND ? -1 : 0) + +// Scratchpad L1/L2/L3 bits +#define LOC_L1 (32 - 14) +#define LOC_L2 (32 - 18) +#define LOC_L3 (32 - 21) +)===" diff --git a/src/amd/opencl/RandomX/randomx_constants_wow.h b/src/amd/opencl/RandomX/randomx_constants_wow.h index 27cbae94..60f10671 100644 --- a/src/amd/opencl/RandomX/randomx_constants_wow.h +++ b/src/amd/opencl/RandomX/randomx_constants_wow.h @@ -83,8 +83,6 @@ along with RandomX OpenCL. If not, see . #define RANDOMX_PROGRAM_SIZE 256 -#define SCRATCHPAD_STRIDED 0 - #define HASH_SIZE 64 #define ENTROPY_SIZE (128 + RANDOMX_PROGRAM_SIZE * 8) #define REGISTERS_SIZE 256 diff --git a/src/amd/opencl/RandomX/randomx_jit.cl b/src/amd/opencl/RandomX/randomx_jit.cl new file mode 100644 index 00000000..59d5697d --- /dev/null +++ b/src/amd/opencl/RandomX/randomx_jit.cl @@ -0,0 +1,1510 @@ +R"===( +/* +Copyright (c) 2019 SChernykh +Portions Copyright (c) 2018-2019 tevador + +This file is part of RandomX OpenCL. + +RandomX OpenCL is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +RandomX OpenCL is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with RandomX OpenCL. If not, see . +*/ + +#define INITIAL_HASH_SIZE 64 +#define INTERMEDIATE_PROGRAM_SIZE (RANDOMX_PROGRAM_SIZE * 16) +#define COMPILED_PROGRAM_SIZE 10048 +#define NUM_VGPR_REGISTERS 128 + +#define mantissaSize 52 +#define exponentSize 11 +#define mantissaMask ((1UL << mantissaSize) - 1) +#define exponentMask ((1UL << exponentSize) - 1) +#define exponentBias 1023 + +#define dynamicExponentBits 4 +#define staticExponentBits 4 +#define constExponentBits 0x300 +#define dynamicMantissaMask ((1UL << (mantissaSize + dynamicExponentBits)) - 1) + +#define ScratchpadL1Mask_reg 38 +#define ScratchpadL2Mask_reg 39 +#define ScratchpadL3Mask_reg 50 + +#define ScratchpadL3Mask (RANDOMX_SCRATCHPAD_L3 - 8) + +#define RANDOMX_JUMP_BITS 8 +#define RANDOMX_JUMP_OFFSET 8 + +__global uint* jit_scratchpad_calc_address(__global uint* p, uint src, uint imm32, uint mask_reg, uint batch_size) +{ + // s_add_i32 s14, s(16 + src * 2), imm32 + *(p++) = 0x810eff10u | (src << 1); + *(p++) = imm32; + + // v_and_b32 v28, s14, mask_reg + *(p++) = 0x2638000eu | (mask_reg << 9); + + return p; +} + +__global uint* jit_scratchpad_calc_fixed_address(__global uint* p, uint imm32, uint batch_size) +{ + // v_mov_b32 v28, imm32 + *(p++) = 0x7e3802ffu; + *(p++) = imm32; + + return p; +} + +__global uint* jit_scratchpad_load(__global uint* p, uint vgpr_index) +{ + // v28 = offset + +#if GCN_VERSION >= 14 + // global_load_dwordx2 v[vgpr_index:vgpr_index+1], v28, s[0:1] + *(p++) = 0xdc548000u; + *(p++) = 0x0000001cu | (vgpr_index << 24); +#else + *(p++) = 0x32543902u; // v_add_u32 v42, vcc, v2, v28 + *(p++) = 0xd11c6a2bu; // v_addc_u32 v43, vcc, v3, 0, vcc + *(p++) = 0x01a90103u; + *(p++) = 0xdc540000u; // flat_load_dwordx2 v[vgpr_index:vgpr_index+1], v[42:43] + *(p++) = 0x0000002au | (vgpr_index << 24); +#endif + + return p; +} + +__global uint* jit_scratchpad_load2(__global uint* p, uint vgpr_index, int vmcnt) +{ + // s_waitcnt vmcnt(N) + if (vmcnt >= 0) + *(p++) = 0xbf8c0f70u | (vmcnt & 15) | ((vmcnt >> 4) << 14); + + // v_readlane_b32 s14, vgpr_index, 0 + *(p++) = 0xd289000eu; + *(p++) = 0x00010100u | vgpr_index; + + // v_readlane_b32 s15, vgpr_index + 1, 0 + *(p++) = 0xd289000fu; + *(p++) = 0x00010100u | (vgpr_index + 1); + + return p; +} + +__global uint* jit_scratchpad_calc_address_fp(__global uint* p, uint src, uint imm32, uint mask_reg, uint batch_size) +{ + // s_add_i32 s14, s(16 + src * 2), imm32 + *(p++) = 0x810eff10u | (src << 1); + *(p++) = imm32; + + // v_and_b32 v28, s14, mask_reg + *(p++) = 0x2638000eu | (mask_reg << 9); + +#if GCN_VERSION >= 14 + // v_add_u32 v28, v28, v44 + *(p++) = 0x6838591cu; +#else + // v_add_u32 v28, vcc, v28, v44 + *(p++) = 0x3238591cu; +#endif + + return p; +} + +__global uint* jit_scratchpad_load_fp(__global uint* p, uint vgpr_index) +{ + // v28 = offset + +#if GCN_VERSION >= 14 + // global_load_dword v(vgpr_index), v28, s[0:1] + *(p++) = 0xdc508000u; + *(p++) = 0x0000001cu | (vgpr_index << 24); +#else + *(p++) = 0x32543902u; // v_add_u32 v42, vcc, v2, v28 + *(p++) = 0xd11c6a2bu; // v_addc_u32 v43, vcc, v3, 0, vcc + *(p++) = 0x01a90103u; + *(p++) = 0xdc500000u; // flat_load_dword v(vgpr_index), v[42:43] + *(p++) = 0x0000002au | (vgpr_index << 24); +#endif + + return p; +} + +__global uint* jit_scratchpad_load2_fp(__global uint* p, uint vgpr_index, int vmcnt) +{ + // s_waitcnt vmcnt(N) + if (vmcnt >= 0) + *(p++) = 0xbf8c0f70u | (vmcnt & 15) | ((vmcnt >> 4) << 14); + + // v_cvt_f64_i32 v[28:29], vgpr_index + *(p++) = 0x7e380900u | vgpr_index; + + return p; +} + +)===" +R"===( + +__global uint* jit_emit_instruction(__global uint* p, __global uint* last_branch_target, const uint2 inst, int prefetch_vgpr_index, int vmcnt, uint batch_size) +{ + uint opcode = inst.x & 0xFF; + const uint dst = (inst.x >> 8) & 7; + const uint src = (inst.x >> 16) & 7; + const uint mod = inst.x >> 24; + + if (opcode < RANDOMX_FREQ_IADD_RS) + { + const uint shift = (mod >> 2) % 4; + if (shift > 0) // p = 3/4 + { + // s_lshl_b64 s[14:15], s[(16 + src * 2):(17 + src * 2)], shift + *(p++) = 0x8e8e8010u | (src << 1) | (shift << 8); + + // s_add_u32 s(16 + dst * 2), s(16 + dst * 2), s14 + *(p++) = 0x80100e10u | (dst << 1) | (dst << 17); + + // s_addc_u32 s(17 + dst * 2), s(17 + dst * 2), s15 + *(p++) = 0x82110f11u | (dst << 1) | (dst << 17); + } + else // p = 1/4 + { + // s_add_u32 s(16 + dst * 2), s(16 + dst * 2), s(16 + src * 2) + *(p++) = 0x80101010u | (dst << 1) | (dst << 17) | (src << 9); + + // s_addc_u32 s(17 + dst * 2), s(17 + dst * 2), s(17 + src * 2) + *(p++) = 0x82111111u | (dst << 1) | (dst << 17) | (src << 9); + } + + if (dst == 5) // p = 1/8 + { + // s_add_u32 s(16 + dst * 2), s(16 + dst * 2), imm32 + *(p++) = 0x8010ff10u | (dst << 1) | (dst << 17); + *(p++) = inst.y; + + // s_addc_u32 s(17 + dst * 2), s(17 + dst * 2), ((inst.y < 0) ? -1 : 0) + *(p++) = 0x82110011u | (dst << 1) | (dst << 17) | (((as_int(inst.y) < 0) ? 0xc1 : 0x80) << 8); + } + + // 12*3/4 + 8*1/4 + 12/8 = 12.5 bytes on average + return p; + } + opcode -= RANDOMX_FREQ_IADD_RS; + + if (opcode < RANDOMX_FREQ_IADD_M) + { + if (prefetch_vgpr_index >= 0) + { + if (src != dst) // p = 7/8 + p = jit_scratchpad_calc_address(p, src, inst.y, (mod % 4) ? ScratchpadL1Mask_reg : ScratchpadL2Mask_reg, batch_size); + else // p = 1/8 + p = jit_scratchpad_calc_fixed_address(p, inst.y & ScratchpadL3Mask, batch_size); + + p = jit_scratchpad_load(p, prefetch_vgpr_index ? prefetch_vgpr_index : 28); + } + + if (prefetch_vgpr_index <= 0) + { + p = jit_scratchpad_load2(p, prefetch_vgpr_index ? -prefetch_vgpr_index : 28, prefetch_vgpr_index ? vmcnt : 0); + + // s_add_u32 s(16 + dst * 2), s(16 + dst * 2), s14 + *(p++) = 0x80100e10u | (dst << 1) | (dst << 17); + + // s_addc_u32 s(17 + dst * 2), s(17 + dst * 2), s15 + *(p++) = 0x82110f11u | (dst << 1) | (dst << 17); + } + + // (12*7/8 + 8*1/8 + 28) + 8 = 47.5 bytes on average + return p; + } + opcode -= RANDOMX_FREQ_IADD_M; + + if (opcode < RANDOMX_FREQ_ISUB_R) + { + if (src != dst) // p = 7/8 + { + // s_sub_u32 s(16 + dst * 2), s(16 + dst * 2), s(16 + src * 2) + *(p++) = 0x80901010u | (dst << 1) | (dst << 17) | (src << 9); + + // s_subb_u32 s(17 + dst * 2), s(17 + dst * 2), s(17 + src * 2) + *(p++) = 0x82911111u | (dst << 1) | (dst << 17) | (src << 9); + } + else // p = 1/8 + { + // s_sub_u32 s(16 + dst * 2), s(16 + dst * 2), imm32 + *(p++) = 0x8090ff10u | (dst << 1) | (dst << 17); + *(p++) = inst.y; + + // s_subb_u32 s(17 + dst * 2), s(17 + dst * 2), ((inst.y < 0) ? -1 : 0) + *(p++) = 0x82910011u | (dst << 1) | (dst << 17) | (((as_int(inst.y) < 0) ? 0xc1 : 0x80) << 8); + } + + // 8*7/8 + 12/8 = 8.5 bytes on average + return p; + } + opcode -= RANDOMX_FREQ_ISUB_R; + + if (opcode < RANDOMX_FREQ_ISUB_M) + { + if (prefetch_vgpr_index >= 0) + { + if (src != dst) // p = 7/8 + p = jit_scratchpad_calc_address(p, src, inst.y, (mod % 4) ? ScratchpadL1Mask_reg : ScratchpadL2Mask_reg, batch_size); + else // p = 1/8 + p = jit_scratchpad_calc_fixed_address(p, inst.y & ScratchpadL3Mask, batch_size); + + p = jit_scratchpad_load(p, prefetch_vgpr_index ? prefetch_vgpr_index : 28); + } + + if (prefetch_vgpr_index <= 0) + { + p = jit_scratchpad_load2(p, prefetch_vgpr_index ? -prefetch_vgpr_index : 28, prefetch_vgpr_index ? vmcnt : 0); + + // s_sub_u32 s(16 + dst * 2), s(16 + dst * 2), s14 + *(p++) = 0x80900e10u | (dst << 1) | (dst << 17); + + // s_subb_u32 s(17 + dst * 2), s(17 + dst * 2), s15 + *(p++) = 0x82910f11u | (dst << 1) | (dst << 17); + } + + // (12*7/8 + 8*1/8 + 28) + 8 = 47.5 bytes on average + return p; + } + opcode -= RANDOMX_FREQ_ISUB_M; + + if (opcode < RANDOMX_FREQ_IMUL_R) + { + if (src != dst) // p = 7/8 + { +#if GCN_VERSION >= 14 + // s_mul_hi_u32 s15, s(16 + dst * 2), s(16 + src * 2) + *(p++) = 0x960f1010u | (dst << 1) | (src << 9); +#else + // v_mov_b32 v28, s(16 + dst * 2) + *(p++) = 0x7e380210u | (dst << 1); + // v_mul_hi_u32 v28, v28, s(16 + src * 2) + *(p++) = 0xd286001cu; + *(p++) = 0x0000211cu + (src << 10); + // v_readlane_b32 s15, v28, 0 + *(p++) = 0xd289000fu; + *(p++) = 0x0001011cu; +#endif + + // s_mul_i32 s14, s(16 + dst * 2), s(17 + src * 2) + *(p++) = 0x920e1110u | (dst << 1) | (src << 9); + + // s_add_u32 s15, s15, s14 + *(p++) = 0x800f0e0fu; + + // s_mul_i32 s14, s(17 + dst * 2), s(16 + src * 2) + *(p++) = 0x920e1011u | (dst << 1) | (src << 9); + + // s_add_u32 s(17 + dst * 2), s15, s14 + *(p++) = 0x80110e0fu | (dst << 17); + + // s_mul_i32 s(16 + dst * 2), s(16 + dst * 2), s(16 + src * 2) + *(p++) = 0x92101010u | (dst << 1) | (dst << 17) | (src << 9); + } + else // p = 1/8 + { +#if GCN_VERSION >= 14 + // s_mul_hi_u32 s15, s(16 + dst * 2), imm32 + *(p++) = 0x960fff10u | (dst << 1); + *(p++) = inst.y; +#else + // v_mov_b32 v28, imm32 + *(p++) = 0x7e3802ffu; + *(p++) = inst.y; + // v_mul_hi_u32 v28, v28, s(16 + dst * 2) + *(p++) = 0xd286001cu; + *(p++) = 0x0000211cu + (dst << 10); + // v_readlane_b32 s15, v28, 0 + *(p++) = 0xd289000fu; + *(p++) = 0x0001011cu; +#endif + + if (as_int(inst.y) < 0) // p = 1/2 + { + // s_sub_u32 s15, s15, s(16 + dst * 2) + *(p++) = 0x808f100fu | (dst << 9); + } + + // s_mul_i32 s14, s(17 + dst * 2), imm32 + *(p++) = 0x920eff11u | (dst << 1); + *(p++) = inst.y; + + // s_add_u32 s(17 + dst * 2), s15, s14 + *(p++) = 0x80110e0fu | (dst << 17); + + // s_mul_i32 s(16 + dst * 2), s(16 + dst * 2), imm32 + *(p++) = 0x9210ff10u | (dst << 1) | (dst << 17); + *(p++) = inst.y; + } + + // 24*7/8 + 28*1/8 + 4*1/16 = 24.75 bytes on average + return p; + } + opcode -= RANDOMX_FREQ_IMUL_R; + + if (opcode < RANDOMX_FREQ_IMUL_M) + { + if (prefetch_vgpr_index >= 0) + { + if (src != dst) // p = 7/8 + p = jit_scratchpad_calc_address(p, src, inst.y, (mod % 4) ? ScratchpadL1Mask_reg : ScratchpadL2Mask_reg, batch_size); + else // p = 1/8 + p = jit_scratchpad_calc_fixed_address(p, inst.y & ScratchpadL3Mask, batch_size); + + p = jit_scratchpad_load(p, prefetch_vgpr_index ? prefetch_vgpr_index : 28); + } + + if (prefetch_vgpr_index <= 0) + { + p = jit_scratchpad_load2(p, prefetch_vgpr_index ? -prefetch_vgpr_index : 28, prefetch_vgpr_index ? vmcnt : 0); + +#if GCN_VERSION >= 14 + // s_mul_hi_u32 s33, s(16 + dst * 2), s14 + *(p++) = 0x96210e10u | (dst << 1); +#else + // v_mov_b32 v28, s(16 + dst * 2) + *(p++) = 0x7e380210u | (dst << 1); + // v_mul_hi_u32 v28, v28, s14 + *(p++) = 0xd286001cu; + *(p++) = 0x00001d1cu; + // v_readlane_b32 s33, v28, 0 + *(p++) = 0xd2890021u; + *(p++) = 0x0001011cu; +#endif + + // s_mul_i32 s32, s(16 + dst * 2), s15 + *(p++) = 0x92200f10u | (dst << 1); + + // s_add_u32 s33, s33, s32 + *(p++) = 0x80212021u; + + // s_mul_i32 s32, s(17 + dst * 2), s14 + *(p++) = 0x92200e11u | (dst << 1); + + // s_add_u32 s(17 + dst * 2), s33, s32 + *(p++) = 0x80112021u | (dst << 17); + + // s_mul_i32 s(16 + dst * 2), s(16 + dst * 2), s14 + *(p++) = 0x92100e10u | (dst << 1) | (dst << 17); + } + + // (12*7/8 + 8*1/8 + 28) + 24 = 63.5 bytes on average + return p; + } + opcode -= RANDOMX_FREQ_IMUL_M; + + if (opcode < RANDOMX_FREQ_IMULH_R) + { + *(p++) = 0xbe8e0110u | (dst << 1); // s_mov_b64 s[14:15], s[16 + dst * 2:17 + dst * 2] + *(p++) = 0xbea60110u | (src << 1); // s_mov_b64 s[38:39], s[16 + src * 2:17 + src * 2] + *(p++) = 0xbebc1e3au; // s_swappc_b64 s[60:61], s[58:59] + *(p++) = 0xbe90010eu | (dst << 17); // s_mov_b64 s[16 + dst * 2:17 + dst * 2], s[14:15] + + // 16 bytes + return p; + } + opcode -= RANDOMX_FREQ_IMULH_R; + + if (opcode < RANDOMX_FREQ_IMULH_M) + { + if (prefetch_vgpr_index >= 0) + { + if (src != dst) // p = 7/8 + p = jit_scratchpad_calc_address(p, src, inst.y, (mod % 4) ? ScratchpadL1Mask_reg : ScratchpadL2Mask_reg, batch_size); + else // p = 1/8 + p = jit_scratchpad_calc_fixed_address(p, inst.y & ScratchpadL3Mask, batch_size); + + p = jit_scratchpad_load(p, prefetch_vgpr_index ? prefetch_vgpr_index : 28); + } + + if (prefetch_vgpr_index <= 0) + { + p = jit_scratchpad_load2(p, prefetch_vgpr_index ? -prefetch_vgpr_index : 28, prefetch_vgpr_index ? vmcnt : 0); + + *(p++) = 0xbea60110u | (dst << 1); // s_mov_b64 s[38:39], s[16 + src * 2:17 + src * 2] + *(p++) = 0xbebc1e3au; // s_swappc_b64 s[60:61], s[58:59] + *(p++) = 0xbe90010eu | (dst << 17); // s_mov_b64 s[16 + dst * 2:17 + dst * 2], s[14:15] + } + + // (12*7/8 + 8*1/8 + 28) + 12 = 51.5 bytes on average + return p; + } + opcode -= RANDOMX_FREQ_IMULH_M; + + if (opcode < RANDOMX_FREQ_ISMULH_R) + { + *(p++) = 0xbe8e0110u | (dst << 1); // s_mov_b64 s[14:15], s[16 + dst * 2:17 + dst * 2] + *(p++) = 0xbea60110u | (src << 1); // s_mov_b64 s[38:39], s[16 + src * 2:17 + src * 2] + *(p++) = 0xbebc1e38u; // s_swappc_b64 s[60:61], s[56:57] + *(p++) = 0xbe90010eu | (dst << 17); // s_mov_b64 s[16 + dst * 2:17 + dst * 2], s[14:15] + + // 16 bytes + return p; + } + opcode -= RANDOMX_FREQ_ISMULH_R; + + if (opcode < RANDOMX_FREQ_ISMULH_M) + { + if (prefetch_vgpr_index >= 0) + { + if (src != dst) // p = 7/8 + p = jit_scratchpad_calc_address(p, src, inst.y, (mod % 4) ? ScratchpadL1Mask_reg : ScratchpadL2Mask_reg, batch_size); + else // p = 1/8 + p = jit_scratchpad_calc_fixed_address(p, inst.y & ScratchpadL3Mask, batch_size); + + p = jit_scratchpad_load(p, prefetch_vgpr_index ? prefetch_vgpr_index : 28); + } + + if (prefetch_vgpr_index <= 0) + { + p = jit_scratchpad_load2(p, prefetch_vgpr_index ? -prefetch_vgpr_index : 28, prefetch_vgpr_index ? vmcnt : 0); + + *(p++) = 0xbea60110u | (dst << 1); // s_mov_b64 s[38:39], s[16 + dst * 2:17 + dst * 2] + *(p++) = 0xbebc1e38u; // s_swappc_b64 s[60:61], s[56:57] + *(p++) = 0xbe90010eu | (dst << 17); // s_mov_b64 s[16 + dst * 2:17 + dst * 2], s[14:15] + } + + // (12*7/8 + 8*1/8 + 28) + 12 = 51.5 bytes on average + return p; + } + opcode -= RANDOMX_FREQ_ISMULH_M; + + if (opcode < RANDOMX_FREQ_IMUL_RCP) + { + if (inst.y & (inst.y - 1)) + { + const uint2 rcp_value = as_uint2(imul_rcp_value(inst.y)); + + *(p++) = 0xbea000ffu; // s_mov_b32 s32, imm32 + *(p++) = rcp_value.x; +#if GCN_VERSION >= 14 + *(p++) = 0x960f2010u | (dst << 1); // s_mul_hi_u32 s15, s(16 + dst * 2), s32 +#else + // v_mov_b32 v28, s32 + *(p++) = 0x7e380220u; + // v_mul_hi_u32 v28, v28, s(16 + dst * 2) + *(p++) = 0xd286001cu; + *(p++) = 0x0000211cu + (dst << 10); + // v_readlane_b32 s15, v28, 0 + *(p++) = 0xd289000fu; + *(p++) = 0x0001011cu; +#endif + *(p++) = 0x920eff10u | (dst << 1); // s_mul_i32 s14, s(16 + dst * 2), imm32 + *(p++) = rcp_value.y; + *(p++) = 0x800f0e0fu; // s_add_u32 s15, s15, s14 + *(p++) = 0x920e2011u | (dst << 1); // s_mul_i32 s14, s(17 + dst * 2), s32 + *(p++) = 0x80110e0fu | (dst << 17); // s_add_u32 s(17 + dst * 2), s15, s14 + *(p++) = 0x92102010u | (dst << 1) | (dst << 17);// s_mul_i32 s(16 + dst * 2), s(16 + dst * 2), s32 + } + + // 36 bytes + return p; + } + opcode -= RANDOMX_FREQ_IMUL_RCP; + + if (opcode < RANDOMX_FREQ_INEG_R) + { + *(p++) = 0x80901080u | (dst << 9) | (dst << 17); // s_sub_u32 s(16 + dst * 2), 0, s(16 + dst * 2) + *(p++) = 0x82911180u | (dst << 9) | (dst << 17); // s_subb_u32 s(17 + dst * 2), 0, s(17 + dst * 2) + + // 8 bytes + return p; + } + opcode -= RANDOMX_FREQ_INEG_R; + + if (opcode < RANDOMX_FREQ_IXOR_R) + { + if (src != dst) // p = 7/8 + { + // s_xor_b64 s[16 + dst * 2:17 + dst * 2], s[16 + dst * 2:17 + dst * 2], s[16 + src * 2:17 + src * 2] + *(p++) = 0x88901010u | (dst << 1) | (dst << 17) | (src << 9); + } + else // p = 1/8 + { + if (as_int(inst.y) < 0) // p = 1/2 + { + // s_mov_b32 s62, imm32 + *(p++) = 0xbebe00ffu; + *(p++) = inst.y; + + // s_xor_b64 s[16 + dst * 2:17 + dst * 2], s[16 + dst * 2:17 + dst * 2], s[62:63] + *(p++) = 0x88903e10u | (dst << 1) | (dst << 17); + } + else + { + // s_xor_b32 s(16 + dst * 2), s(16 + dst * 2), imm32 + *(p++) = 0x8810ff10u | (dst << 1) | (dst << 17); + *(p++) = inst.y; + } + } + + // 4*7/8 + 12/16 + 8/16 = 4.75 bytes on average + return p; + } + opcode -= RANDOMX_FREQ_IXOR_R; + + if (opcode < RANDOMX_FREQ_IXOR_M) + { + if (prefetch_vgpr_index >= 0) + { + if (src != dst) // p = 7/8 + p = jit_scratchpad_calc_address(p, src, inst.y, (mod % 4) ? ScratchpadL1Mask_reg : ScratchpadL2Mask_reg, batch_size); + else // p = 1/8 + p = jit_scratchpad_calc_fixed_address(p, inst.y & ScratchpadL3Mask, batch_size); + + p = jit_scratchpad_load(p, prefetch_vgpr_index ? prefetch_vgpr_index : 28); + } + + if (prefetch_vgpr_index <= 0) + { + p = jit_scratchpad_load2(p, prefetch_vgpr_index ? -prefetch_vgpr_index : 28, prefetch_vgpr_index ? vmcnt : 0); + + // s_xor_b64 s[16 + dst * 2:17 + dst * 2], s[16 + dst * 2:17 + dst * 2], s[14:15] + *(p++) = 0x88900e10u | (dst << 1) | (dst << 17); + } + + // (12*7/8 + 8*1/8 + 28) + 4 = 43.5 bytes on average + return p; + } + opcode -= RANDOMX_FREQ_IXOR_M; + + if (opcode < RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_IROL_R) + { + if (src != dst) // p = 7/8 + { + if (opcode < RANDOMX_FREQ_IROR_R) + { + // s_lshr_b64 s[32:33], s[16 + dst * 2:17 + dst * 2], s(16 + src * 2) + *(p++) = 0x8fa01010u | (dst << 1) | (src << 9); + + // s_sub_u32 s15, 64, s(16 + src * 2) + *(p++) = 0x808f10c0u | (src << 9); + + // s_lshl_b64 s[34:35], s[16 + dst * 2:17 + dst * 2], s15 + *(p++) = 0x8ea20f10u | (dst << 1); + } + else + { + // s_lshl_b64 s[32:33], s[16 + dst * 2:17 + dst * 2], s(16 + src * 2) + *(p++) = 0x8ea01010u | (dst << 1) | (src << 9); + + // s_sub_u32 s15, 64, s(16 + src * 2) + *(p++) = 0x808f10c0u | (src << 9); + + // s_lshr_b64 s[34:35], s[16 + dst * 2:17 + dst * 2], s15 + *(p++) = 0x8fa20f10u | (dst << 1); + } + } + else // p = 1/8 + { + const uint shift = ((opcode < RANDOMX_FREQ_IROR_R) ? inst.y : -inst.y) & 63; + + // s_lshr_b64 s[32:33], s[16 + dst * 2:17 + dst * 2], shift + *(p++) = 0x8fa08010u | (dst << 1) | (shift << 8); + + // s_lshl_b64 s[34:35], s[16 + dst * 2:17 + dst * 2], 64 - shift + *(p++) = 0x8ea28010u | (dst << 1) | ((64 - shift) << 8); + } + + // s_or_b64 s[16 + dst * 2:17 + dst * 2], s[32:33], s[34:35] + *(p++) = 0x87902220u | (dst << 17); + + // 12*7/8 + 8/8 + 4 = 15.5 bytes on average + return p; + } + opcode -= RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_IROL_R; + + if (opcode < RANDOMX_FREQ_ISWAP_R) + { + if (src != dst) + { + *(p++) = 0xbea00110u | (dst << 1); // s_mov_b64 s[32:33], s[16 + dst * 2:17 + dst * 2] + *(p++) = 0xbe900110u | (src << 1) | (dst << 17);// s_mov_b64 s[16 + dst * 2:17 + dst * 2], s[16 + src * 2:17 + src * 2] + *(p++) = 0xbe900120u | (src << 17); // s_mov_b64 s[16 + src * 2:17 + Src * 2], s[32:33] + } + + // 12*7/8 = 10.5 bytes on average + return p; + } + opcode -= RANDOMX_FREQ_ISWAP_R; + + if (opcode < RANDOMX_FREQ_FSWAP_R) + { + // ds_swizzle_b32 v(60 + dst * 2), v(60 + dst * 2) offset:0x8001 + *(p++) = 0xd87a8001u; + *(p++) = 0x3c00003cu + (dst << 1) + (dst << 25); + + // ds_swizzle_b32 v(61 + dst * 2), v(61 + dst * 2) offset:0x8001 + *(p++) = 0xd87a8001u; + *(p++) = 0x3d00003du + (dst << 1) + (dst << 25); + + // s_waitcnt lgkmcnt(0) + *(p++) = 0xbf8cc07fu; + + // 20 bytes + return p; + } + opcode -= RANDOMX_FREQ_FSWAP_R; + + if (opcode < RANDOMX_FREQ_FADD_R) + { + // v_add_f64 v[60 + dst * 2:61 + dst * 2], v[60 + dst * 2:61 + dst * 2], v[52 + src * 2:53 + src * 2] + *(p++) = 0xd280003cu + ((dst & 3) << 1); + *(p++) = 0x0002693cu + ((dst & 3) << 1) + ((src & 3) << 10); + + // 8 bytes + return p; + } + opcode -= RANDOMX_FREQ_FADD_R; + +)===" +R"===( + + if (opcode < RANDOMX_FREQ_FADD_M) + { + if (prefetch_vgpr_index >= 0) + { + p = jit_scratchpad_calc_address_fp(p, src, inst.y, (mod % 4) ? ScratchpadL1Mask_reg : ScratchpadL2Mask_reg, batch_size); + p = jit_scratchpad_load_fp(p, prefetch_vgpr_index ? prefetch_vgpr_index : 28); + } + + if (prefetch_vgpr_index <= 0) + { + p = jit_scratchpad_load2_fp(p, prefetch_vgpr_index ? -prefetch_vgpr_index : 28, prefetch_vgpr_index ? vmcnt : 0); + + // v_add_f64 v[60 + dst * 2:61 + dst * 2], v[60 + dst * 2:61 + dst * 2], v[28:29] + *(p++) = 0xd280003cu + ((dst & 3) << 1); + *(p++) = 0x0002393cu + ((dst & 3) << 1); + } + + // 32 + 8 = 40 bytes + return p; + } + opcode -= RANDOMX_FREQ_FADD_M; + + if (opcode < RANDOMX_FREQ_FSUB_R) + { + // v_add_f64 v[60 + dst * 2:61 + dst * 2], v[60 + dst * 2:61 + dst * 2], -v[52 + src * 2:53 + src * 2] + *(p++) = 0xd280003cu + ((dst & 3) << 1); + *(p++) = 0x4002693cu + ((dst & 3) << 1) + ((src & 3) << 10); + + // 8 bytes + return p; + } + opcode -= RANDOMX_FREQ_FSUB_R; + + if (opcode < RANDOMX_FREQ_FSUB_M) + { + if (prefetch_vgpr_index >= 0) + { + p = jit_scratchpad_calc_address_fp(p, src, inst.y, (mod % 4) ? ScratchpadL1Mask_reg : ScratchpadL2Mask_reg, batch_size); + p = jit_scratchpad_load_fp(p, prefetch_vgpr_index ? prefetch_vgpr_index : 28); + } + + if (prefetch_vgpr_index <= 0) + { + p = jit_scratchpad_load2_fp(p, prefetch_vgpr_index ? -prefetch_vgpr_index : 28, prefetch_vgpr_index ? vmcnt : 0); + + // v_add_f64 v[60 + dst * 2:61 + dst * 2], v[60 + dst * 2:61 + dst * 2], -v[28:29] + *(p++) = 0xd280003cu + ((dst & 3) << 1); + *(p++) = 0x4002393cu + ((dst & 3) << 1); + } + + // 32 + 8 = 40 bytes + return p; + } + opcode -= RANDOMX_FREQ_FSUB_M; + + if (opcode < RANDOMX_FREQ_FSCAL_R) + { + // v_xor_b32 v(61 + dst * 2), v(61 + dst * 2), v51 + *(p++) = 0x2a7a673du + ((dst & 3) << 1) + ((dst & 3) << 18); + + // 4 bytes + return p; + } + opcode -= RANDOMX_FREQ_FSCAL_R; + + if (opcode < RANDOMX_FREQ_FMUL_R) + { + // v_mul_f64 v[68 + dst * 2:69 + dst * 2], v[68 + dst * 2:69 + dst * 2], v[52 + src * 2:53 + src * 2] + *(p++) = 0xd2810044u + ((dst & 3) << 1); + *(p++) = 0x00026944u + ((dst & 3) << 1) + ((src & 3) << 10); + + // 8 bytes + return p; + } + opcode -= RANDOMX_FREQ_FMUL_R; + + if (opcode < RANDOMX_FREQ_FDIV_M) + { + if (prefetch_vgpr_index >= 0) + { + p = jit_scratchpad_calc_address_fp(p, src, inst.y, (mod % 4) ? ScratchpadL1Mask_reg : ScratchpadL2Mask_reg, batch_size); + p = jit_scratchpad_load_fp(p, prefetch_vgpr_index ? prefetch_vgpr_index : 28); + } + + if (prefetch_vgpr_index <= 0) + { + p = jit_scratchpad_load2_fp(p, prefetch_vgpr_index ? -prefetch_vgpr_index : 28, prefetch_vgpr_index ? vmcnt : 0); + + // s_swappc_b64 s[60:61], s[48 + dst * 2:49 + dst * 2] + *(p++) = 0xbebc1e30u + ((dst & 3) << 1); + } + + // 32 + 4 = 36 bytes + return p; + } + opcode -= RANDOMX_FREQ_FDIV_M; + + if (opcode < RANDOMX_FREQ_FSQRT_R) + { + // s_swappc_b64 s[60:61], s[40 + dst * 2:41 + dst * 2] + *(p++) = 0xbebc1e28u + ((dst & 3) << 1); + + // 4 bytes + return p; + } + opcode -= RANDOMX_FREQ_FSQRT_R; + + if (opcode < RANDOMX_FREQ_CBRANCH) + { + const int shift = (mod >> 4) + RANDOMX_JUMP_OFFSET; + uint imm = inst.y | (1u << shift); + imm &= ~(1u << (shift - 1)); + + // s_add_u32 s(16 + dst * 2), s(16 + dst * 2), imm32 + *(p++) = 0x8010ff10 | (dst << 1) | (dst << 17); + *(p++) = imm; + + // s_addc_u32 s(17 + dst * 2), s(17 + dst * 2), ((imm < 0) ? -1 : 0) + *(p++) = 0x82110011u | (dst << 1) | (dst << 17) | (((as_int(imm) < 0) ? 0xc1 : 0x80) << 8); + + const uint conditionMaskReg = 70 + (mod >> 4); + + // s_and_b32 s14, s(16 + dst * 2), conditionMaskReg + *(p++) = 0x860e0010u | (dst << 1) | (conditionMaskReg << 8); + + // s_cbranch_scc0 target + const int delta = ((last_branch_target - p) - 1); + *(p++) = 0xbf840000u | (delta & 0xFFFF); + + // 20 bytes + return p; + } + opcode -= RANDOMX_FREQ_CBRANCH; + + if (opcode < RANDOMX_FREQ_CFROUND) + { + const uint shift = inst.y & 63; + if (shift == 63) + { + *(p++) = 0x8e0e8110u | (src << 1); // s_lshl_b32 s14, s(16 + src * 2), 1 + *(p++) = 0x8f0f9f11u | (src << 1); // s_lshr_b32 s15, s(17 + src * 2), 31 + *(p++) = 0x870e0f0eu; // s_or_b32 s14, s14, s15 + *(p++) = 0x860e830eu; // s_and_b32 s14, s14, 3 + } + else + { + // s_bfe_u64 s[14:15], s[16:17], (shift,width=2) + *(p++) = 0x938eff10u | (src << 1); + *(p++) = shift | (2 << 16); + } + + // s_brev_b32 s14, s14 + *(p++) = 0xbe8e080eu; + + // s_lshr_b32 s66, s14, 30 + *(p++) = 0x8f429e0eu; + + // s_setreg_b32 hwreg(mode, 2, 2), s66 + *(p++) = 0xb9420881u; + + // 20 bytes + return p; + } + opcode -= RANDOMX_FREQ_CFROUND; + + if (opcode < RANDOMX_FREQ_ISTORE) + { + const uint mask = ((mod >> 4) < 14) ? ((mod % 4) ? ScratchpadL1Mask_reg : ScratchpadL2Mask_reg) : ScratchpadL3Mask_reg; + p = jit_scratchpad_calc_address(p, dst, inst.y, mask, batch_size); + + const uint vgpr_id = 48; + *(p++) = 0x7e000210u | (src << 1) | (vgpr_id << 17); // v_mov_b32 vgpr_id, s(16 + src * 2) + *(p++) = 0x7e020211u | (src << 1) | (vgpr_id << 17); // v_mov_b32 vgpr_id + 1, s(17 + src * 2) + + // v28 = offset + +#if GCN_VERSION >= 14 + // global_store_dwordx2 v28, v[vgpr_id:vgpr_id + 1], s[0:1] + *(p++) = 0xdc748000u; + *(p++) = 0x0000001cu | (vgpr_id << 8); +#else + // v_add_u32 v28, vcc, v28, v2 + *(p++) = 0x3238051cu; + // v_addc_u32 v29, vcc, 0, v3, vcc + *(p++) = 0x383a0680u; + // flat_store_dwordx2 v[28:29], v[vgpr_id:vgpr_id + 1] + *(p++) = 0xdc740000u; + *(p++) = 0x0000001cu | (vgpr_id << 8); +#endif + + // 28 bytes + return p; + } + opcode -= RANDOMX_FREQ_ISTORE; + + return p; +} + +int jit_prefetch_read( + __global uint2* p0, + const int prefetch_data_count, + const uint i, + const uint src, + const uint dst, + const uint2 inst, + const uint srcAvailableAt, + const uint scratchpadAvailableAt, + const uint scratchpadHighAvailableAt, + const int lastBranchTarget, + const int lastBranch) +{ + uint2 t; + t.x = (src == dst) ? (((inst.y & ScratchpadL3Mask) >= RANDOMX_SCRATCHPAD_L2) ? scratchpadHighAvailableAt : scratchpadAvailableAt) : max(scratchpadAvailableAt, srcAvailableAt); + t.y = i; + + const int t1 = t.x; + + if ((lastBranchTarget <= t1) && (t1 <= lastBranch)) + { + // Don't move prefetch inside previous branch scope + t.x = lastBranch + 1; + } + else if ((lastBranchTarget > lastBranch) && (t1 < lastBranchTarget)) + { + // Don't move prefetch outside current branch scope + t.x = lastBranchTarget; + } + + p0[prefetch_data_count] = t; + return prefetch_data_count + 1; +} + +)===" +R"===( + +__global uint* generate_jit_code(__global uint2* e, __global uint2* p0, __global uint* p, uint batch_size) +{ + int prefetch_data_count; + + #pragma unroll(1) + for (int pass = 0; pass < 2; ++pass) + { +#if RANDOMX_PROGRAM_SIZE > 256 + int registerLastChanged[8] = { -1, -1, -1, -1, -1, -1, -1, -1 }; +#else + ulong registerLastChanged = 0; + uint registerWasChanged = 0; +#endif + + uint scratchpadAvailableAt = 0; + uint scratchpadHighAvailableAt = 0; + + int lastBranchTarget = -1; + int lastBranch = -1; + +#if RANDOMX_PROGRAM_SIZE > 256 + int registerLastChangedAtBranchTarget[8] = { -1, -1, -1, -1, -1, -1, -1, -1 }; +#else + ulong registerLastChangedAtBranchTarget = 0; + uint registerWasChangedAtBranchTarget = 0; +#endif + uint scratchpadAvailableAtBranchTarget = 0; + uint scratchpadHighAvailableAtBranchTarget = 0; + + prefetch_data_count = 0; + + #pragma unroll(1) + for (uint i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) + { + // Clean flags + if (pass == 0) + e[i].x &= ~(0xf8u << 8); + + uint2 inst = e[i]; + uint opcode = inst.x & 0xFF; + const uint dst = (inst.x >> 8) & 7; + const uint src = (inst.x >> 16) & 7; + const uint mod = inst.x >> 24; + + if (pass == 1) + { + // Branch target + if (inst.x & (0x20 << 8)) + { + lastBranchTarget = i; +#if RANDOMX_PROGRAM_SIZE > 256 + #pragma unroll + for (int j = 0; j < 8; ++j) + registerLastChangedAtBranchTarget[j] = registerLastChanged[j]; +#else + registerLastChangedAtBranchTarget = registerLastChanged; + registerWasChangedAtBranchTarget = registerWasChanged; +#endif + scratchpadAvailableAtBranchTarget = scratchpadAvailableAt; + scratchpadHighAvailableAtBranchTarget = scratchpadHighAvailableAt; + } + + // Branch + if (inst.x & (0x40 << 8)) + lastBranch = i; + } + +#if RANDOMX_PROGRAM_SIZE > 256 + const uint srcAvailableAt = registerLastChanged[src] + 1; + const uint dstAvailableAt = registerLastChanged[dst] + 1; +#else + const uint srcAvailableAt = (registerWasChanged & (1u << src)) ? (((registerLastChanged >> (src * 8)) & 0xFF) + 1) : 0; + const uint dstAvailableAt = (registerWasChanged & (1u << dst)) ? (((registerLastChanged >> (dst * 8)) & 0xFF) + 1) : 0; +#endif + + if (opcode < RANDOMX_FREQ_IADD_RS) + { +#if RANDOMX_PROGRAM_SIZE > 256 + registerLastChanged[dst] = i; +#else + registerLastChanged = (registerLastChanged & ~(0xFFul << (dst * 8))) | ((ulong)(i) << (dst * 8)); + registerWasChanged |= 1u << dst; +#endif + continue; + } + opcode -= RANDOMX_FREQ_IADD_RS; + + if (opcode < RANDOMX_FREQ_IADD_M) + { +#if RANDOMX_PROGRAM_SIZE > 256 + registerLastChanged[dst] = i; +#else + registerLastChanged = (registerLastChanged & ~(0xFFul << (dst * 8))) | ((ulong)(i) << (dst * 8)); + registerWasChanged |= 1u << dst; +#endif + if (pass == 1) + prefetch_data_count = jit_prefetch_read(p0, prefetch_data_count, i, src, dst, inst, srcAvailableAt, scratchpadAvailableAt, scratchpadHighAvailableAt, lastBranchTarget, lastBranch); + continue; + } + opcode -= RANDOMX_FREQ_IADD_M; + + if (opcode < RANDOMX_FREQ_ISUB_R) + { +#if RANDOMX_PROGRAM_SIZE > 256 + registerLastChanged[dst] = i; +#else + registerLastChanged = (registerLastChanged & ~(0xFFul << (dst * 8))) | ((ulong)(i) << (dst * 8)); + registerWasChanged |= 1u << dst; +#endif + continue; + } + opcode -= RANDOMX_FREQ_ISUB_R; + + if (opcode < RANDOMX_FREQ_ISUB_M) + { +#if RANDOMX_PROGRAM_SIZE > 256 + registerLastChanged[dst] = i; +#else + registerLastChanged = (registerLastChanged & ~(0xFFul << (dst * 8))) | ((ulong)(i) << (dst * 8)); + registerWasChanged |= 1u << dst; +#endif + if (pass == 1) + prefetch_data_count = jit_prefetch_read(p0, prefetch_data_count, i, src, dst, inst, srcAvailableAt, scratchpadAvailableAt, scratchpadHighAvailableAt, lastBranchTarget, lastBranch); + continue; + } + opcode -= RANDOMX_FREQ_ISUB_M; + + if (opcode < RANDOMX_FREQ_IMUL_R) + { +#if RANDOMX_PROGRAM_SIZE > 256 + registerLastChanged[dst] = i; +#else + registerLastChanged = (registerLastChanged & ~(0xFFul << (dst * 8))) | ((ulong)(i) << (dst * 8)); + registerWasChanged |= 1u << dst; +#endif + continue; + } + opcode -= RANDOMX_FREQ_IMUL_R; + + if (opcode < RANDOMX_FREQ_IMUL_M) + { +#if RANDOMX_PROGRAM_SIZE > 256 + registerLastChanged[dst] = i; +#else + registerLastChanged = (registerLastChanged & ~(0xFFul << (dst * 8))) | ((ulong)(i) << (dst * 8)); + registerWasChanged |= 1u << dst; +#endif + if (pass == 1) + prefetch_data_count = jit_prefetch_read(p0, prefetch_data_count, i, src, dst, inst, srcAvailableAt, scratchpadAvailableAt, scratchpadHighAvailableAt, lastBranchTarget, lastBranch); + continue; + } + opcode -= RANDOMX_FREQ_IMUL_M; + + if (opcode < RANDOMX_FREQ_IMULH_R) + { +#if RANDOMX_PROGRAM_SIZE > 256 + registerLastChanged[dst] = i; +#else + registerLastChanged = (registerLastChanged & ~(0xFFul << (dst * 8))) | ((ulong)(i) << (dst * 8)); + registerWasChanged |= 1u << dst; +#endif + continue; + } + opcode -= RANDOMX_FREQ_IMULH_R; + + if (opcode < RANDOMX_FREQ_IMULH_M) + { +#if RANDOMX_PROGRAM_SIZE > 256 + registerLastChanged[dst] = i; +#else + registerLastChanged = (registerLastChanged & ~(0xFFul << (dst * 8))) | ((ulong)(i) << (dst * 8)); + registerWasChanged |= 1u << dst; +#endif + if (pass == 1) + prefetch_data_count = jit_prefetch_read(p0, prefetch_data_count, i, src, dst, inst, srcAvailableAt, scratchpadAvailableAt, scratchpadHighAvailableAt, lastBranchTarget, lastBranch); + continue; + } + opcode -= RANDOMX_FREQ_IMULH_M; + + if (opcode < RANDOMX_FREQ_ISMULH_R) + { +#if RANDOMX_PROGRAM_SIZE > 256 + registerLastChanged[dst] = i; +#else + registerLastChanged = (registerLastChanged & ~(0xFFul << (dst * 8))) | ((ulong)(i) << (dst * 8)); + registerWasChanged |= 1u << dst; +#endif + continue; + } + opcode -= RANDOMX_FREQ_ISMULH_R; + + if (opcode < RANDOMX_FREQ_ISMULH_M) + { +#if RANDOMX_PROGRAM_SIZE > 256 + registerLastChanged[dst] = i; +#else + registerLastChanged = (registerLastChanged & ~(0xFFul << (dst * 8))) | ((ulong)(i) << (dst * 8)); + registerWasChanged |= 1u << dst; +#endif + if (pass == 1) + prefetch_data_count = jit_prefetch_read(p0, prefetch_data_count, i, src, dst, inst, srcAvailableAt, scratchpadAvailableAt, scratchpadHighAvailableAt, lastBranchTarget, lastBranch); + continue; + } + opcode -= RANDOMX_FREQ_ISMULH_M; + + if (opcode < RANDOMX_FREQ_IMUL_RCP) + { + if (inst.y & (inst.y - 1)) + { +#if RANDOMX_PROGRAM_SIZE > 256 + registerLastChanged[dst] = i; +#else + registerLastChanged = (registerLastChanged & ~(0xFFul << (dst * 8))) | ((ulong)(i) << (dst * 8)); + registerWasChanged |= 1u << dst; +#endif + } + continue; + } + opcode -= RANDOMX_FREQ_IMUL_RCP; + + if (opcode < RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R) + { +#if RANDOMX_PROGRAM_SIZE > 256 + registerLastChanged[dst] = i; +#else + registerLastChanged = (registerLastChanged & ~(0xFFul << (dst * 8))) | ((ulong)(i) << (dst * 8)); + registerWasChanged |= 1u << dst; +#endif + continue; + } + opcode -= RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R; + + if (opcode < RANDOMX_FREQ_IXOR_M) + { +#if RANDOMX_PROGRAM_SIZE > 256 + registerLastChanged[dst] = i; +#else + registerLastChanged = (registerLastChanged & ~(0xFFul << (dst * 8))) | ((ulong)(i) << (dst * 8)); + registerWasChanged |= 1u << dst; +#endif + if (pass == 1) + prefetch_data_count = jit_prefetch_read(p0, prefetch_data_count, i, src, dst, inst, srcAvailableAt, scratchpadAvailableAt, scratchpadHighAvailableAt, lastBranchTarget, lastBranch); + continue; + } + opcode -= RANDOMX_FREQ_IXOR_M; + + if (opcode < RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_IROL_R) + { +#if RANDOMX_PROGRAM_SIZE > 256 + registerLastChanged[dst] = i; +#else + registerLastChanged = (registerLastChanged & ~(0xFFul << (dst * 8))) | ((ulong)(i) << (dst * 8)); + registerWasChanged |= 1u << dst; +#endif + continue; + } + opcode -= RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_IROL_R; + + if (opcode < RANDOMX_FREQ_ISWAP_R) + { + if (src != dst) + { +#if RANDOMX_PROGRAM_SIZE > 256 + registerLastChanged[dst] = i; + registerLastChanged[src] = i; +#else + registerLastChanged = (registerLastChanged & ~(0xFFul << (dst * 8))) | ((ulong)(i) << (dst * 8)); + registerLastChanged = (registerLastChanged & ~(0xFFul << (src * 8))) | ((ulong)(i) << (src * 8)); + registerWasChanged |= (1u << dst) | (1u << src); +#endif + } + continue; + } + opcode -= RANDOMX_FREQ_ISWAP_R; + + if (opcode < RANDOMX_FREQ_FSWAP_R + RANDOMX_FREQ_FADD_R) + { + continue; + } + opcode -= RANDOMX_FREQ_FSWAP_R + RANDOMX_FREQ_FADD_R; + + if (opcode < RANDOMX_FREQ_FADD_M) + { + if (pass == 1) + prefetch_data_count = jit_prefetch_read(p0, prefetch_data_count, i, src, 0xFF, inst, srcAvailableAt, scratchpadAvailableAt, scratchpadHighAvailableAt, lastBranchTarget, lastBranch); + continue; + } + opcode -= RANDOMX_FREQ_FADD_M; + + if (opcode < RANDOMX_FREQ_FSUB_R) + { + continue; + } + opcode -= RANDOMX_FREQ_FSUB_R; + + if (opcode < RANDOMX_FREQ_FSUB_M) + { + if (pass == 1) + prefetch_data_count = jit_prefetch_read(p0, prefetch_data_count, i, src, 0xFF, inst, srcAvailableAt, scratchpadAvailableAt, scratchpadHighAvailableAt, lastBranchTarget, lastBranch); + continue; + } + opcode -= RANDOMX_FREQ_FSUB_M; + + if (opcode < RANDOMX_FREQ_FSCAL_R + RANDOMX_FREQ_FMUL_R) + { + continue; + } + opcode -= RANDOMX_FREQ_FSCAL_R + RANDOMX_FREQ_FMUL_R; + + if (opcode < RANDOMX_FREQ_FDIV_M) + { + if (pass == 1) + prefetch_data_count = jit_prefetch_read(p0, prefetch_data_count, i, src, 0xFF, inst, srcAvailableAt, scratchpadAvailableAt, scratchpadHighAvailableAt, lastBranchTarget, lastBranch); + continue; + } + opcode -= RANDOMX_FREQ_FDIV_M; + + if (opcode < RANDOMX_FREQ_FSQRT_R) + { + continue; + } + opcode -= RANDOMX_FREQ_FSQRT_R; + + if (opcode < RANDOMX_FREQ_CBRANCH) + { + if (pass == 0) + { + // Workaround for a bug in AMD 18.6.1 driver + volatile uint dstAvailableAt2 = dstAvailableAt; + + // Mark branch target + e[dstAvailableAt2].x |= (0x20 << 8); + + // Mark branch + e[i].x |= (0x40 << 8); + + // Set all registers as changed at this instruction as per RandomX specification +#if RANDOMX_PROGRAM_SIZE > 256 + #pragma unroll + for (int j = 0; j < 8; ++j) + registerLastChanged[j] = i; +#else + uint t = i | (i << 8); + t = t | (t << 16); + registerLastChanged = t; + registerLastChanged = registerLastChanged | (registerLastChanged << 32); + registerWasChanged = 0xFF; +#endif + } + else + { + // Update only registers which really changed inside this branch +#if RANDOMX_PROGRAM_SIZE > 256 + registerLastChanged[dst] = i; +#else + registerLastChanged = (registerLastChanged & ~(0xFFul << (dst * 8))) | ((ulong)(i) << (dst * 8)); + registerWasChanged |= 1u << dst; +#endif + + for (int reg = 0; reg < 8; ++reg) + { +#if RANDOMX_PROGRAM_SIZE > 256 + const uint availableAtBranchTarget = registerLastChangedAtBranchTarget[reg] + 1; + const uint availableAt = registerLastChanged[reg] + 1; + if (availableAt != availableAtBranchTarget) + { + registerLastChanged[reg] = i; + } +#else + const uint availableAtBranchTarget = (registerWasChangedAtBranchTarget & (1u << reg)) ? (((registerLastChangedAtBranchTarget >> (reg * 8)) & 0xFF) + 1) : 0; + const uint availableAt = (registerWasChanged & (1u << reg)) ? (((registerLastChanged >> (reg * 8)) & 0xFF) + 1) : 0; + if (availableAt != availableAtBranchTarget) + { + registerLastChanged = (registerLastChanged & ~(0xFFul << (reg * 8))) | ((ulong)(i) << (reg * 8)); + registerWasChanged |= 1u << reg; + } +#endif + } + + if (scratchpadAvailableAtBranchTarget != scratchpadAvailableAt) + scratchpadAvailableAt = i + 1; + + if (scratchpadHighAvailableAtBranchTarget != scratchpadHighAvailableAt) + scratchpadHighAvailableAt = i + 1; + } + continue; + } + opcode -= RANDOMX_FREQ_CBRANCH; + + if (opcode < RANDOMX_FREQ_CFROUND) + { + continue; + } + opcode -= RANDOMX_FREQ_CFROUND; + + if (opcode < RANDOMX_FREQ_ISTORE) + { + if (pass == 0) + { + // Mark ISTORE + e[i].x = inst.x | (0x80 << 8); + } + else + { + scratchpadAvailableAt = i + 1; + if ((mod >> 4) >= 14) + scratchpadHighAvailableAt = i + 1; + } + continue; + } + opcode -= RANDOMX_FREQ_ISTORE; + } + } + + // Sort p0 + uint prev = p0[0].x; + #pragma unroll(1) + for (int j = 1; j < prefetch_data_count; ++j) + { + uint2 cur = p0[j]; + if (cur.x >= prev) + { + prev = cur.x; + continue; + } + + int j1 = j - 1; + do { + p0[j1 + 1] = p0[j1]; + --j1; + } while ((j1 >= 0) && (p0[j1].x >= cur.x)); + p0[j1 + 1] = cur; + } + p0[prefetch_data_count].x = RANDOMX_PROGRAM_SIZE; + + __global int* prefecth_vgprs_stack = (__global int*)(p0 + prefetch_data_count + 1); + + // v86 - v127 will be used for global memory loads + enum { num_prefetch_vgprs = 21 }; + + #pragma unroll + for (int i = 0; i < num_prefetch_vgprs; ++i) + prefecth_vgprs_stack[i] = NUM_VGPR_REGISTERS - 2 - i * 2; + + __global int* prefetched_vgprs = prefecth_vgprs_stack + num_prefetch_vgprs; + + #pragma unroll(8) + for (int i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) + prefetched_vgprs[i] = 0; + + int k = 0; + uint2 prefetch_data = p0[0]; + int mem_counter = 0; + int s_waitcnt_value = 63; + int num_prefetch_vgprs_available = num_prefetch_vgprs; + + __global uint* last_branch_target = p; + + const uint size_limit = (COMPILED_PROGRAM_SIZE - 200) / sizeof(uint); + __global uint* start_p = p; + + #pragma unroll(1) + for (int i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) + { + const uint2 inst = e[i]; + + if (inst.x & (0x20 << 8)) + last_branch_target = p; + + bool done = false; + do { + uint2 jit_inst; + int jit_prefetch_vgpr_index; + int jit_vmcnt; + + if (!done && (prefetch_data.x == i) && (num_prefetch_vgprs_available > 0)) + { + ++mem_counter; + const int vgpr_id = prefecth_vgprs_stack[--num_prefetch_vgprs_available]; + prefetched_vgprs[prefetch_data.y] = vgpr_id | (mem_counter << 16); + + jit_inst = e[prefetch_data.y]; + jit_prefetch_vgpr_index = vgpr_id; + jit_vmcnt = mem_counter; + + s_waitcnt_value = 63; + + ++k; + prefetch_data = p0[k]; + } + else + { + const int prefetched_vgprs_data = prefetched_vgprs[i]; + const int vgpr_id = prefetched_vgprs_data & 0xFFFF; + const int prev_mem_counter = prefetched_vgprs_data >> 16; + if (vgpr_id) + prefecth_vgprs_stack[num_prefetch_vgprs_available++] = vgpr_id; + + if (inst.x & (0x80 << 8)) + { + ++mem_counter; + s_waitcnt_value = 63; + } + + const int vmcnt = mem_counter - prev_mem_counter; + + jit_inst = inst; + jit_prefetch_vgpr_index = -vgpr_id; + jit_vmcnt = (vmcnt < s_waitcnt_value) ? vmcnt : -1; + + if (vmcnt < s_waitcnt_value) + s_waitcnt_value = vmcnt; + + done = true; + } + + p = jit_emit_instruction(p, last_branch_target, jit_inst, jit_prefetch_vgpr_index, jit_vmcnt, batch_size); + if (p - start_p > size_limit) + { + // Code size limit exceeded!!! + // Jump back to randomx_run kernel + *(p++) = 0xbe801d0cu; // s_setpc_b64 s[12:13] + return p; + } + } while (!done); + } + + // Jump back to randomx_run kernel + *(p++) = 0xbe801d0cu; // s_setpc_b64 s[12:13] + return p; +} + +)===" +R"===( + +__attribute__((reqd_work_group_size(64, 1, 1))) +__kernel void randomx_jit(__global ulong* entropy, __global ulong* registers, __global uint2* intermediate_programs, __global uint* programs, uint batch_size, __global uint32_t* rounding, uint32_t iteration) +{ + const uint global_index = get_global_id(0) / 32; + const uint sub = get_global_id(0) % 32; + + if (sub != 0) + return; + + __global uint2* e = (__global uint2*)(entropy + global_index * (ENTROPY_SIZE / sizeof(ulong)) + (128 / sizeof(ulong))); + __global uint2* p0 = intermediate_programs + global_index * (INTERMEDIATE_PROGRAM_SIZE / sizeof(uint2)); + __global uint* p = programs + global_index * (COMPILED_PROGRAM_SIZE / sizeof(uint)); + + generate_jit_code(e, p0, p, batch_size); + + if (iteration == 0) + rounding[global_index] = 0; + + __global ulong* R = registers + global_index * 32; + entropy += global_index * (ENTROPY_SIZE / sizeof(ulong)); + + // Group R registers + R[0] = 0; + R[1] = 0; + R[2] = 0; + R[3] = 0; + R[4] = 0; + R[5] = 0; + R[6] = 0; + R[7] = 0; + + // Group A registers + __global double* A = (__global double*)(R + 24); + A[0] = getSmallPositiveFloatBits(entropy[0]); + A[1] = getSmallPositiveFloatBits(entropy[1]); + A[2] = getSmallPositiveFloatBits(entropy[2]); + A[3] = getSmallPositiveFloatBits(entropy[3]); + A[4] = getSmallPositiveFloatBits(entropy[4]); + A[5] = getSmallPositiveFloatBits(entropy[5]); + A[6] = getSmallPositiveFloatBits(entropy[6]); + A[7] = getSmallPositiveFloatBits(entropy[7]); + + // ma, mx + ((__global uint*)(R + 16))[0] = entropy[8] & CacheLineAlignMask; + ((__global uint*)(R + 16))[1] = entropy[10]; + + // address registers + uint addressRegisters = entropy[12]; + ((__global uint*)(R + 17))[0] = 0 + (addressRegisters & 1); + addressRegisters >>= 1; + ((__global uint*)(R + 17))[1] = 2 + (addressRegisters & 1); + addressRegisters >>= 1; + ((__global uint*)(R + 17))[2] = 4 + (addressRegisters & 1); + addressRegisters >>= 1; + ((__global uint*)(R + 17))[3] = 6 + (addressRegisters & 1); + + // dataset offset + ((__global uint*)(R + 19))[0] = (entropy[13] & DatasetExtraItems) * CacheLineSize; + + // eMask + R[20] = getFloatMask(entropy[14]); + R[21] = getFloatMask(entropy[15]); +} + +)===" diff --git a/src/amd/opencl/RandomX/randomx_run_gfx803.asm b/src/amd/opencl/RandomX/randomx_run_gfx803.asm new file mode 100644 index 00000000..e28db51e --- /dev/null +++ b/src/amd/opencl/RandomX/randomx_run_gfx803.asm @@ -0,0 +1,712 @@ +/* +Copyright (c) 2019 SChernykh + +This file is part of RandomX OpenCL. + +RandomX OpenCL is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +RandomX OpenCL is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with RandomX OpenCL. If not, see . +*/ + +.amdcl2 +.gpu GFX803 +.64bit +.arch_minor 0 +.arch_stepping 0 +.driver_version 203603 +.kernel randomx_run + .config + .dims x + .cws 64, 1, 1 + .sgprsnum 96 + # 6 waves per SIMD: 37-40 VGPRs + # 5 waves per SIMD: 41-48 VGPRs + # 4 waves per SIMD: 49-64 VGPRs + # 3 waves per SIMD: 65-84 VGPRs + # 2 waves per SIMD: 85-128 VGPRs + # 1 wave per SIMD: 129-256 VGPRs + .vgprsnum 128 + .localsize 256 + .floatmode 0xc0 + .pgmrsrc1 0x00ac035f + .pgmrsrc2 0x0000008c + .dx10clamp + .ieeemode + .useargs + .priority 0 + .arg _.global_offset_0, "size_t", long + .arg _.global_offset_1, "size_t", long + .arg _.global_offset_2, "size_t", long + .arg _.printf_buffer, "size_t", void*, global, , rdonly + .arg _.vqueue_pointer, "size_t", long + .arg _.aqlwrap_pointer, "size_t", long + .arg dataset, "uchar*", uchar*, global, const, rdonly + .arg scratchpad, "uchar*", uchar*, global, + .arg registers, "ulong*", ulong*, global, + .arg rounding_modes, "uint*", uint*, global, + .arg programs, "uint*", uint*, global, + .arg batch_size, "uint", uint + .arg rx_parameters, "uint", uint + .text + s_mov_b32 m0, 0x10000 + s_dcache_wb + s_waitcnt vmcnt(0) & lgkmcnt(0) + s_icache_inv + s_branch begin + + # pgmrsrc2 = 0x00000090, bits 1:5 = 8, so first 8 SGPRs (s0-s7) contain user data + # s8 contains group id + # v0 contains local id +begin: + s_mov_b32 s8, s6 + v_lshlrev_b32 v1, 6, s8 + v_add_u32 v1, vcc, v1, v0 + s_load_dwordx2 s[0:1], s[4:5], 0x0 + s_load_dwordx2 s[2:3], s[4:5], 0x40 + s_load_dwordx2 s[64:65], s[4:5], 0x48 + s_waitcnt lgkmcnt(0) + + # load rounding mode + s_lshl_b32 s16, s8, 2 + s_add_u32 s64, s64, s16 + s_addc_u32 s65, s65, 0 + v_mov_b32 v8, s64 + v_mov_b32 v9, s65 + flat_load_dword v8, v[8:9] + s_waitcnt vmcnt(0) + v_readlane_b32 s66, v8, 0 + s_setreg_b32 hwreg(mode, 2, 2), s66 + s_mov_b32 s67, 0 + + # used in FSQRT_R to check for "positive normal value" (v_cmpx_class_f64) + s_mov_b32 s68, 256 + s_mov_b32 s69, 0 + + v_add_u32 v1, vcc, s0, v1 + v_lshrrev_b32 v2, 6, v1 + v_lshlrev_b32 v3, 5, v2 + v_and_b32 v1, 63, v1 + v_mov_b32 v4, 0 + v_lshlrev_b64 v[3:4], 3, v[3:4] + v_lshlrev_b32 v5, 4, v1 + v_add_u32 v3, vcc, s2, v3 + v_mov_b32 v6, s3 + v_addc_u32 v4, vcc, v6, v4, vcc + v_lshlrev_b32 v41, 2, v1 + v_add_u32 v6, vcc, v3, v41 + v_addc_u32 v7, vcc, v4, 0, vcc + flat_load_dword v6, v[6:7] + v_mov_b32 v0, 0 + s_waitcnt vmcnt(0) + ds_write_b32 v41, v6 + s_waitcnt lgkmcnt(0) + s_mov_b64 s[0:1], exec + v_cmpx_le_u32 s[2:3], v1, 7 + s_cbranch_execz program_end + + # rx_parameters + s_load_dword s20, s[4:5], 0x5c + s_waitcnt lgkmcnt(0) + + # Scratchpad L1 size + s_bfe_u32 s21, s20, 0x050000 + s_lshl_b32 s21, 1, s21 + + # Scratchpad L2 size + s_bfe_u32 s22, s20, 0x050005 + s_lshl_b32 s22, 1, s22 + + # Scratchpad L3 size + s_bfe_u32 s23, s20, 0x05000A + s_lshl_b32 s23, 1, s23 + + # program iterations + s_bfe_u32 s24, s20, 0x04000F + s_lshl_b32 s24, 1, s24 + + # Base address for scratchpads + s_add_u32 s2, s23, 64 + v_mul_hi_u32 v20, v2, s2 + v_mul_lo_u32 v2, v2, s2 + + # v41, v44 = 0 + v_mov_b32 v41, 0 + v_mov_b32 v44, 0 + + ds_read_b32 v6, v0 offset:152 + v_cmp_lt_u32 s[2:3], v1, 4 + ds_read2_b64 v[34:37], v0 offset0:18 offset1:16 + ds_read_b64 v[11:12], v0 offset:136 + s_movk_i32 s9, 0x0 + s_mov_b64 s[6:7], exec + s_andn2_b64 exec, s[6:7], s[2:3] + ds_read_b64 v[13:14], v0 offset:160 + s_andn2_b64 exec, s[6:7], exec + v_mov_b32 v13, 0 + v_mov_b32 v14, 0 + s_mov_b64 exec, s[6:7] + + # compiled program size + s_mov_b64 s[6:7], s[8:9] + s_mulk_i32 s6, 10048 + + v_add_u32 v5, vcc, v0, v5 + v_add_u32 v5, vcc, v5, 64 + s_mov_b64 s[8:9], exec + s_andn2_b64 exec, s[8:9], s[2:3] + ds_read_b64 v[15:16], v0 offset:168 + s_andn2_b64 exec, s[8:9], exec + v_mov_b32 v15, 0 + v_mov_b32 v16, 0 + s_mov_b64 exec, s[8:9] + s_load_dwordx4 s[8:11], s[4:5], 0x30 + + # batch_size + s_load_dword s16, s[4:5], 0x58 + + s_load_dwordx2 s[4:5], s[4:5], 0x50 + v_lshlrev_b32 v1, 3, v1 + v_add_u32 v17, vcc, v0, v1 + s_waitcnt lgkmcnt(0) + v_add_u32 v2, vcc, s10, v2 + v_mov_b32 v18, s11 + v_addc_u32 v18, vcc, v18, v20, vcc + v_mov_b32 v19, 0xffffff + v_add_u32 v6, vcc, s8, v6 + v_mov_b32 v20, s9 + v_addc_u32 v20, vcc, v20, 0, vcc + ds_read_b64 v[21:22], v17 + s_add_u32 s4, s4, s6 + s_addc_u32 s5, s5, s7 + v_cndmask_b32 v19, v19, -1, s[2:3] + v_lshlrev_b32 v8, 3, v35 + v_lshlrev_b32 v7, 3, v34 + v_lshlrev_b32 v12, 3, v12 + v_lshlrev_b32 v10, 3, v11 + v_add_u32 v8, vcc, v8, v0 + v_add_u32 v7, vcc, v7, v0 + v_add_u32 v12, vcc, v12, v0 + v_add_u32 v0, vcc, v10, v0 + v_mov_b32 v10, v36 + v_mov_b32 v23, v37 + + # loop counter + s_sub_u32 s2, s24, 1 + + # batch_size + s_mov_b32 s3, s16 + + # Scratchpad masks for scratchpads + v_sub_u32 v38, vcc, s21, 8 + v_sub_u32 v39, vcc, s22, 8 + v_sub_u32 v50, vcc, s23, 8 + + # mask for FSCAL_R + v_mov_b32 v51, 0x80F00000 + + # swap v3 and v18 + v_mov_b32 v52, v3 + v_mov_b32 v3, v18 + v_mov_b32 v18, v52 + + # load scratchpad base address + v_readlane_b32 s0, v2, 0 + v_readlane_b32 s1, v3, 0 + + # save current executiom mask + s_mov_b64 s[36:37], exec + + # v41 = 0 on lane 0, set it to 8 on lane 1 + # v44 = 0 on lane 0, set it to 4 on lane 1 + s_mov_b64 exec, 2 + v_mov_b32 v41, 8 + v_mov_b32 v44, 4 + + # load group A registers + # Read low 8 bytes into lane 0 and high 8 bytes into lane 1 + s_mov_b64 exec, 3 + ds_read2_b64 v[52:55], v41 offset0:24 offset1:26 + ds_read2_b64 v[56:59], v41 offset0:28 offset1:30 + + # xmantissaMask + v_mov_b32 v77, (1 << 24) - 1 + + # xexponentMask + ds_read_b64 v[78:79], v41 offset:160 + + # Restore execution mask + s_mov_b64 exec, s[36:37] + + # sign mask (used in FSQRT_R) + v_mov_b32 v82, 0x80000000 + + # High 32 bits of "1.0" constant (used in FDIV_M) + v_mov_b32 v83, (1023 << 20) + + # Used to multiply FP64 values by 0.5 + v_mov_b32 v84, (1 << 20) + + s_getpc_b64 s[14:15] +cur_addr: + + # get addresses of FSQRT_R subroutines + s_add_u32 s40, s14, fsqrt_r_sub0 - cur_addr + s_addc_u32 s41, s15, 0 + s_add_u32 s42, s14, fsqrt_r_sub1 - cur_addr + s_addc_u32 s43, s15, 0 + s_add_u32 s44, s14, fsqrt_r_sub2 - cur_addr + s_addc_u32 s45, s15, 0 + s_add_u32 s46, s14, fsqrt_r_sub3 - cur_addr + s_addc_u32 s47, s15, 0 + + # get addresses of FDIV_M subroutines + s_add_u32 s48, s14, fdiv_m_sub0 - cur_addr + s_addc_u32 s49, s15, 0 + s_add_u32 s50, s14, fdiv_m_sub1 - cur_addr + s_addc_u32 s51, s15, 0 + s_add_u32 s52, s14, fdiv_m_sub2 - cur_addr + s_addc_u32 s53, s15, 0 + s_add_u32 s54, s14, fdiv_m_sub3 - cur_addr + s_addc_u32 s55, s15, 0 + + # get address for ISMULH_R subroutine + s_add_u32 s56, s14, ismulh_r_sub - cur_addr + s_addc_u32 s57, s15, 0 + + # get address for IMULH_R subroutine + s_add_u32 s58, s14, imulh_r_sub - cur_addr + s_addc_u32 s59, s15, 0 + + # used in IXOR_R instruction + s_mov_b32 s63, -1 + + # used in CBRANCH instruction + s_mov_b32 s70, (0xFF << 8) + s_mov_b32 s71, (0xFF << 9) + s_mov_b32 s72, (0xFF << 10) + s_mov_b32 s73, (0xFF << 11) + s_mov_b32 s74, (0xFF << 12) + s_mov_b32 s75, (0xFF << 13) + s_mov_b32 s76, (0xFF << 14) + s_mov_b32 s77, (0xFF << 15) + s_mov_b32 s78, (0xFF << 16) + s_mov_b32 s79, (0xFF << 17) + s_mov_b32 s80, (0xFF << 18) + s_mov_b32 s81, (0xFF << 19) + s_mov_b32 s82, (0xFF << 20) + s_mov_b32 s83, (0xFF << 21) + s_mov_b32 s84, (0xFF << 22) + s_mov_b32 s85, (0xFF << 23) + + # ScratchpadL3Mask64 + s_sub_u32 s86, s23, 64 + +main_loop: + # const uint2 spMix = as_uint2(R[readReg0] ^ R[readReg1]); + ds_read_b64 v[24:25], v0 + ds_read_b64 v[26:27], v12 + s_waitcnt lgkmcnt(0) + v_xor_b32 v25, v27, v25 + v_xor_b32 v24, v26, v24 + + # spAddr1 ^= spMix.y; + # spAddr0 ^= spMix.x; + v_xor_b32 v10, v25, v10 + v_xor_b32 v23, v24, v23 + + # spAddr1 &= ScratchpadL3Mask64; + # spAddr0 &= ScratchpadL3Mask64; + v_and_b32 v10, s86, v10 + v_and_b32 v23, s86, v23 + + # Offset for scratchpads + # offset1 = spAddr1 + sub * 8 + # offset0 = spAddr0 + sub * 8 + v_add_u32 v10, vcc, v10, v1 + v_add_u32 v23, vcc, v23, v1 + + # __global ulong* p1 = (__global ulong*)(scratchpad + offset1); + # __global ulong* p0 = (__global ulong*)(scratchpad + offset0); + v_add_u32 v26, vcc, v2, v10 + v_addc_u32 v27, vcc, v3, 0, vcc + v_add_u32 v23, vcc, v2, v23 + v_addc_u32 v24, vcc, v3, 0, vcc + + # load from spAddr1 + flat_load_dwordx2 v[28:29], v[26:27] + + # load from spAddr0 + flat_load_dwordx2 v[30:31], v[23:24] + s_waitcnt vmcnt(1) + + v_cvt_f64_i32 v[32:33], v28 + v_cvt_f64_i32 v[28:29], v29 + s_waitcnt vmcnt(0) + + # R[sub] ^= *p0; + v_xor_b32 v34, v21, v30 + v_xor_b32 v35, v22, v31 + + v_add_u32 v22, vcc, v6, v36 + v_addc_u32 v25, vcc, v20, 0, vcc + v_or_b32 v30, v32, v13 + v_and_b32 v31, v33, v19 + v_or_b32 v31, v31, v14 + v_or_b32 v28, v28, v15 + v_and_b32 v29, v29, v19 + v_or_b32 v29, v29, v16 + v_add_u32 v21, vcc, v22, v1 + v_addc_u32 v22, vcc, v25, 0, vcc + ds_write2_b64 v5, v[30:31], v[28:29] offset1:1 + s_waitcnt lgkmcnt(0) + + # Program 0 + + # load group F,E registers + # Read low 8 bytes into lane 0 and high 8 bytes into lane 1 + s_mov_b64 exec, 3 + ds_read2_b64 v[60:63], v41 offset0:8 offset1:10 + ds_read2_b64 v[64:67], v41 offset0:12 offset1:14 + ds_read2_b64 v[68:71], v41 offset0:16 offset1:18 + ds_read2_b64 v[72:75], v41 offset0:20 offset1:22 + + # load VM integer registers + v_readlane_b32 s16, v34, 0 + v_readlane_b32 s17, v35, 0 + v_readlane_b32 s18, v34, 1 + v_readlane_b32 s19, v35, 1 + v_readlane_b32 s20, v34, 2 + v_readlane_b32 s21, v35, 2 + v_readlane_b32 s22, v34, 3 + v_readlane_b32 s23, v35, 3 + v_readlane_b32 s24, v34, 4 + v_readlane_b32 s25, v35, 4 + v_readlane_b32 s26, v34, 5 + v_readlane_b32 s27, v35, 5 + v_readlane_b32 s28, v34, 6 + v_readlane_b32 s29, v35, 6 + v_readlane_b32 s30, v34, 7 + v_readlane_b32 s31, v35, 7 + + s_waitcnt lgkmcnt(0) + + # call JIT code + s_swappc_b64 s[12:13], s[4:5] + + # store VM integer registers + v_writelane_b32 v28, s16, 0 + v_writelane_b32 v29, s17, 0 + v_writelane_b32 v28, s18, 1 + v_writelane_b32 v29, s19, 1 + v_writelane_b32 v28, s20, 2 + v_writelane_b32 v29, s21, 2 + v_writelane_b32 v28, s22, 3 + v_writelane_b32 v29, s23, 3 + v_writelane_b32 v28, s24, 4 + v_writelane_b32 v29, s25, 4 + v_writelane_b32 v28, s26, 5 + v_writelane_b32 v29, s27, 5 + v_writelane_b32 v28, s28, 6 + v_writelane_b32 v29, s29, 6 + v_writelane_b32 v28, s30, 7 + v_writelane_b32 v29, s31, 7 + + # Write out group F,E registers + # Write low 8 bytes from lane 0 and high 8 bytes from lane 1 + ds_write2_b64 v41, v[60:61], v[62:63] offset0:8 offset1:10 + ds_write2_b64 v41, v[64:65], v[66:67] offset0:12 offset1:14 + ds_write2_b64 v41, v[68:69], v[70:71] offset0:16 offset1:18 + ds_write2_b64 v41, v[72:73], v[74:75] offset0:20 offset1:22 + + # Restore execution mask + s_mov_b64 exec, s[36:37] + + # Write out VM integer registers + ds_write_b64 v17, v[28:29] + + flat_load_dwordx2 v[21:22], v[21:22] + s_waitcnt vmcnt(0) & lgkmcnt(0) + v_xor_b32 v21, v28, v21 + v_xor_b32 v22, v29, v22 + ds_read_b32 v28, v7 + ds_read_b32 v29, v8 + ds_write_b64 v17, v[21:22] + s_waitcnt lgkmcnt(1) + ds_read2_b64 v[30:33], v17 offset0:8 offset1:16 + v_xor_b32 v10, v28, v37 + s_waitcnt lgkmcnt(0) + v_xor_b32 v30, v32, v30 + v_xor_b32 v31, v33, v31 + v_xor_b32 v10, v10, v29 + flat_store_dwordx2 v[26:27], v[21:22] + v_and_b32 v10, 0x7fffffc0, v10 + flat_store_dwordx2 v[23:24], v[30:31] + s_cmp_eq_u32 s2, 0 + s_cbranch_scc1 main_loop_end + s_sub_i32 s2, s2, 1 + v_mov_b32 v37, v36 + v_mov_b32 v23, 0 + v_mov_b32 v36, v10 + v_mov_b32 v10, 0 + s_branch main_loop +main_loop_end: + + v_add_u32 v0, vcc, v18, v1 + v_addc_u32 v1, vcc, v4, 0, vcc + flat_store_dwordx2 v[0:1], v[21:22] + v_add_u32 v0, vcc, v0, 64 + v_addc_u32 v1, vcc, v1, 0, vcc + flat_store_dwordx2 v[0:1], v[30:31] + v_add_u32 v0, vcc, v0, 64 + v_addc_u32 v1, vcc, v1, 0, vcc + flat_store_dwordx2 v[0:1], v[32:33] + + # store rounding mode + v_mov_b32 v0, s64 + v_mov_b32 v1, s65 + v_mov_b32 v2, s66 + flat_store_dword v[0:1], v2 + +program_end: + s_endpgm + +fsqrt_r_sub0: + s_setreg_b32 hwreg(mode, 2, 2), s67 + v_rsq_f64 v[28:29], v[68:69] + + # Improve initial approximation (can be skipped) + #v_mul_f64 v[42:43], v[28:29], v[68:69] + #v_mul_f64 v[48:49], v[28:29], -0.5 + #v_fma_f64 v[48:49], v[48:49], v[42:43], 0.5 + #v_fma_f64 v[28:29], v[28:29], v[48:49], v[28:29] + + v_mul_f64 v[42:43], v[28:29], v[68:69] + v_mov_b32 v48, v28 + v_sub_u32 v49, vcc, v29, v84 + v_mov_b32 v46, v28 + v_xor_b32 v47, v49, v82 + v_fma_f64 v[46:47], v[46:47], v[42:43], 0.5 + v_fma_f64 v[42:43], v[42:43], v[46:47], v[42:43] + v_fma_f64 v[48:49], v[48:49], v[46:47], v[48:49] + v_fma_f64 v[46:47], -v[42:43], v[42:43], v[68:69] + s_setreg_b32 hwreg(mode, 2, 2), s66 + v_fma_f64 v[42:43], v[46:47], v[48:49], v[42:43] + v_cmpx_class_f64 s[14:15], v[68:69], s[68:69] + v_mov_b32 v68, v42 + v_mov_b32 v69, v43 + s_mov_b64 exec, 3 + s_setpc_b64 s[60:61] + +fsqrt_r_sub1: + s_setreg_b32 hwreg(mode, 2, 2), s67 + v_rsq_f64 v[28:29], v[70:71] + + # Improve initial approximation (can be skipped) + #v_mul_f64 v[42:43], v[28:29], v[70:71] + #v_mul_f64 v[48:49], v[28:29], -0.5 + #v_fma_f64 v[48:49], v[48:49], v[42:43], 0.5 + #v_fma_f64 v[28:29], v[28:29], v[48:49], v[28:29] + + v_mul_f64 v[42:43], v[28:29], v[70:71] + v_mov_b32 v48, v28 + v_sub_u32 v49, vcc, v29, v84 + v_mov_b32 v46, v28 + v_xor_b32 v47, v49, v82 + v_fma_f64 v[46:47], v[46:47], v[42:43], 0.5 + v_fma_f64 v[42:43], v[42:43], v[46:47], v[42:43] + v_fma_f64 v[48:49], v[48:49], v[46:47], v[48:49] + v_fma_f64 v[46:47], -v[42:43], v[42:43], v[70:71] + s_setreg_b32 hwreg(mode, 2, 2), s66 + v_fma_f64 v[42:43], v[46:47], v[48:49], v[42:43] + v_cmpx_class_f64 s[14:15], v[70:71], s[68:69] + v_mov_b32 v70, v42 + v_mov_b32 v71, v43 + s_mov_b64 exec, 3 + s_setpc_b64 s[60:61] + +fsqrt_r_sub2: + s_setreg_b32 hwreg(mode, 2, 2), s67 + v_rsq_f64 v[28:29], v[72:73] + + # Improve initial approximation (can be skipped) + #v_mul_f64 v[42:43], v[28:29], v[72:73] + #v_mul_f64 v[48:49], v[28:29], -0.5 + #v_fma_f64 v[48:49], v[48:49], v[42:43], 0.5 + #v_fma_f64 v[28:29], v[28:29], v[48:49], v[28:29] + + v_mul_f64 v[42:43], v[28:29], v[72:73] + v_mov_b32 v48, v28 + v_sub_u32 v49, vcc, v29, v84 + v_mov_b32 v46, v28 + v_xor_b32 v47, v49, v82 + v_fma_f64 v[46:47], v[46:47], v[42:43], 0.5 + v_fma_f64 v[42:43], v[42:43], v[46:47], v[42:43] + v_fma_f64 v[48:49], v[48:49], v[46:47], v[48:49] + v_fma_f64 v[46:47], -v[42:43], v[42:43], v[72:73] + s_setreg_b32 hwreg(mode, 2, 2), s66 + v_fma_f64 v[42:43], v[46:47], v[48:49], v[42:43] + v_cmpx_class_f64 s[14:15], v[72:73], s[68:69] + v_mov_b32 v72, v42 + v_mov_b32 v73, v43 + s_mov_b64 exec, 3 + s_setpc_b64 s[60:61] + +fsqrt_r_sub3: + s_setreg_b32 hwreg(mode, 2, 2), s67 + v_rsq_f64 v[28:29], v[74:75] + + # Improve initial approximation (can be skipped) + #v_mul_f64 v[42:43], v[28:29], v[74:75] + #v_mul_f64 v[48:49], v[28:29], -0.5 + #v_fma_f64 v[48:49], v[48:49], v[42:43], 0.5 + #v_fma_f64 v[28:29], v[28:29], v[48:49], v[28:29] + + v_mul_f64 v[42:43], v[28:29], v[74:75] + v_mov_b32 v48, v28 + v_sub_u32 v49, vcc, v29, v84 + v_mov_b32 v46, v28 + v_xor_b32 v47, v49, v82 + v_fma_f64 v[46:47], v[46:47], v[42:43], 0.5 + v_fma_f64 v[42:43], v[42:43], v[46:47], v[42:43] + v_fma_f64 v[48:49], v[48:49], v[46:47], v[48:49] + v_fma_f64 v[46:47], -v[42:43], v[42:43], v[74:75] + s_setreg_b32 hwreg(mode, 2, 2), s66 + v_fma_f64 v[42:43], v[46:47], v[48:49], v[42:43] + v_cmpx_class_f64 s[14:15], v[74:75], s[68:69] + v_mov_b32 v74, v42 + v_mov_b32 v75, v43 + s_mov_b64 exec, 3 + s_setpc_b64 s[60:61] + +fdiv_m_sub0: + v_or_b32 v28, v28, v78 + v_and_b32 v29, v29, v77 + v_or_b32 v29, v29, v79 + s_setreg_b32 hwreg(mode, 2, 2), s67 + v_rcp_f64 v[48:49], v[28:29] + v_fma_f64 v[80:81], -v[28:29], v[48:49], 1.0 + v_fma_f64 v[48:49], v[48:49], v[80:81], v[48:49] + v_mul_f64 v[80:81], v[68:69], v[48:49] + v_fma_f64 v[42:43], -v[28:29], v[80:81], v[68:69] + s_setreg_b32 hwreg(mode, 2, 2), s66 + v_fma_f64 v[42:43], v[42:43], v[48:49], v[80:81] + v_div_fixup_f64 v[80:81], v[42:43], v[28:29], v[68:69] + v_cmpx_eq_f64 s[14:15], v[68:69], v[28:29] + v_mov_b32 v80, 0 + v_mov_b32 v81, v83 + s_mov_b64 exec, 3 + v_mov_b32 v68, v80 + v_mov_b32 v69, v81 + s_setpc_b64 s[60:61] + +fdiv_m_sub1: + v_or_b32 v28, v28, v78 + v_and_b32 v29, v29, v77 + v_or_b32 v29, v29, v79 + s_setreg_b32 hwreg(mode, 2, 2), s67 + v_rcp_f64 v[48:49], v[28:29] + v_fma_f64 v[80:81], -v[28:29], v[48:49], 1.0 + v_fma_f64 v[48:49], v[48:49], v[80:81], v[48:49] + v_mul_f64 v[80:81], v[70:71], v[48:49] + v_fma_f64 v[42:43], -v[28:29], v[80:81], v[70:71] + s_setreg_b32 hwreg(mode, 2, 2), s66 + v_fma_f64 v[42:43], v[42:43], v[48:49], v[80:81] + v_div_fixup_f64 v[80:81], v[42:43], v[28:29], v[70:71] + v_cmpx_eq_f64 s[14:15], v[70:71], v[28:29] + v_mov_b32 v80, 0 + v_mov_b32 v81, v83 + s_mov_b64 exec, 3 + v_mov_b32 v70, v80 + v_mov_b32 v71, v81 + s_setpc_b64 s[60:61] + +fdiv_m_sub2: + v_or_b32 v28, v28, v78 + v_and_b32 v29, v29, v77 + v_or_b32 v29, v29, v79 + s_setreg_b32 hwreg(mode, 2, 2), s67 + v_rcp_f64 v[48:49], v[28:29] + v_fma_f64 v[80:81], -v[28:29], v[48:49], 1.0 + v_fma_f64 v[48:49], v[48:49], v[80:81], v[48:49] + v_mul_f64 v[80:81], v[72:73], v[48:49] + v_fma_f64 v[42:43], -v[28:29], v[80:81], v[72:73] + s_setreg_b32 hwreg(mode, 2, 2), s66 + v_fma_f64 v[42:43], v[42:43], v[48:49], v[80:81] + v_div_fixup_f64 v[80:81], v[42:43], v[28:29], v[72:73] + v_cmpx_eq_f64 s[14:15], v[72:73], v[28:29] + v_mov_b32 v80, 0 + v_mov_b32 v81, v83 + s_mov_b64 exec, 3 + v_mov_b32 v72, v80 + v_mov_b32 v73, v81 + s_setpc_b64 s[60:61] + +fdiv_m_sub3: + v_or_b32 v28, v28, v78 + v_and_b32 v29, v29, v77 + v_or_b32 v29, v29, v79 + s_setreg_b32 hwreg(mode, 2, 2), s67 + v_rcp_f64 v[48:49], v[28:29] + v_fma_f64 v[80:81], -v[28:29], v[48:49], 1.0 + v_fma_f64 v[48:49], v[48:49], v[80:81], v[48:49] + v_mul_f64 v[80:81], v[74:75], v[48:49] + v_fma_f64 v[42:43], -v[28:29], v[80:81], v[74:75] + s_setreg_b32 hwreg(mode, 2, 2), s66 + v_fma_f64 v[42:43], v[42:43], v[48:49], v[80:81] + v_div_fixup_f64 v[80:81], v[42:43], v[28:29], v[74:75] + v_cmpx_eq_f64 s[14:15], v[74:75], v[28:29] + v_mov_b32 v80, 0 + v_mov_b32 v81, v83 + s_mov_b64 exec, 3 + v_mov_b32 v74, v80 + v_mov_b32 v75, v81 + s_setpc_b64 s[60:61] + +ismulh_r_sub: + s_mov_b64 exec, 1 + v_mov_b32 v45, s14 + v_mul_hi_u32 v40, s38, v45 + v_mov_b32 v47, s15 + v_mad_u64_u32 v[42:43], s[32:33], s38, v47, v[40:41] + v_mov_b32 v40, v42 + v_mad_u64_u32 v[45:46], s[32:33], s39, v45, v[40:41] + v_mad_u64_u32 v[42:43], s[32:33], s39, v47, v[43:44] + v_add_u32 v42, vcc, v42, v46 + v_addc_u32 v43, vcc, 0, v43, vcc + v_readlane_b32 s32, v42, 0 + v_readlane_b32 s33, v43, 0 + s_cmp_lt_i32 s15, 0 + s_cselect_b64 s[34:35], s[38:39], 0 + s_sub_u32 s32, s32, s34 + s_subb_u32 s33, s33, s35 + s_cmp_lt_i32 s39, 0 + s_cselect_b64 s[34:35], s[14:15], 0 + s_sub_u32 s14, s32, s34 + s_subb_u32 s15, s33, s35 + s_mov_b64 exec, 3 + s_setpc_b64 s[60:61] + +imulh_r_sub: + s_mov_b64 exec, 1 + v_mov_b32 v45, s38 + v_mul_hi_u32 v40, s14, v45 + v_mov_b32 v47, s39 + v_mad_u64_u32 v[42:43], s[32:33], s14, v47, v[40:41] + v_mov_b32 v40, v42 + v_mad_u64_u32 v[45:46], s[32:33], s15, v45, v[40:41] + v_mad_u64_u32 v[42:43], s[32:33], s15, v47, v[43:44] + v_add_u32 v42, vcc, v42, v46 + v_addc_u32 v43, vcc, 0, v43, vcc + v_readlane_b32 s14, v42, 0 + v_readlane_b32 s15, v43, 0 + s_mov_b64 exec, 3 + s_setpc_b64 s[60:61] diff --git a/src/amd/opencl/RandomX/randomx_run_gfx803.h b/src/amd/opencl/RandomX/randomx_run_gfx803.h new file mode 100644 index 00000000..78d292fb --- /dev/null +++ b/src/amd/opencl/RandomX/randomx_run_gfx803.h @@ -0,0 +1,218 @@ +/* +This file was auto-generated from randomx_run_gfx803.asm: + +clrxasm randomx_run_gfx803.asm -o randomx_run_gfx803.bin +bin2h -c randomx_run_gfx803_bin < randomx_run_gfx803.bin > randomx_run_gfx803.h + +clrxasm can be downloaded here: https://github.com/CLRX/CLRX-mirror/releases +bin2h can be downloaded here: http://www.deadnode.org/sw/bin2h/ +*/ + +static unsigned char randomx_run_gfx803_bin[]={ +0x7f,0x45,0x4c,0x46,0x02,0x01,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x5b,0xaf,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xe8,0x17,0x00,0x00,0x00,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x00,0x00,0x40,0x00,0x07,0x00,0x01,0x00,0x00 +,0x2e,0x73,0x68,0x73,0x74,0x72,0x74,0x61,0x62,0x00,0x2e,0x73,0x74,0x72,0x74,0x61,0x62,0x00,0x2e,0x73,0x79,0x6d,0x74,0x61,0x62,0x00,0x2e,0x63,0x6f,0x6d,0x6d,0x65 +,0x6e,0x74,0x00,0x2e,0x72,0x6f,0x64,0x61,0x74,0x61,0x00,0x2e,0x74,0x65,0x78,0x74,0x00,0x00,0x5f,0x5f,0x4f,0x70,0x65,0x6e,0x43,0x4c,0x5f,0x26,0x5f,0x5f,0x4f,0x70 +,0x65,0x6e,0x43,0x4c,0x5f,0x72,0x61,0x6e,0x64,0x6f,0x6d,0x78,0x5f,0x72,0x75,0x6e,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x5f,0x6d,0x65,0x74,0x61,0x64,0x61,0x74,0x61 +,0x00,0x61,0x63,0x6c,0x5f,0x76,0x65,0x72,0x73,0x69,0x6f,0x6e,0x5f,0x73,0x74,0x72,0x69,0x6e,0x67,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x3b +,0x07,0x00,0x00,0x00,0x00,0x00,0x00,0x30,0x00,0x00,0x00,0x01,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x41 +,0x4d,0x44,0x2d,0x43,0x4f,0x4d,0x50,0x2d,0x4c,0x49,0x42,0x2d,0x76,0x30,0x2e,0x38,0x20,0x28,0x30,0x2e,0x30,0x2e,0x53,0x43,0x5f,0x42,0x55,0x49,0x4c,0x44,0x5f,0x4e +,0x55,0x4d,0x42,0x45,0x52,0x29,0x10,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x3b,0x07,0x00,0x00,0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x68,0x00 +,0x00,0x00,0x24,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x40,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x15,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x0d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x06,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x5f,0x5f,0x4f,0x70,0x65,0x6e,0x43,0x4c,0x5f,0x64 +,0x75,0x6d,0x6d,0x79,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x00,0x67,0x65,0x6e,0x65,0x72,0x69,0x63,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00 +,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x06,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x10,0x00,0x00 +,0x00,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x58,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x0f,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x30,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x04,0x00,0x00 +,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00 +,0x00,0x01,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x06,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x50,0x00,0x00 +,0x00,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x01,0x00,0x00 +,0x00,0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x58,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x0a,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x04,0x00,0x00 +,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x09,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00 +,0x00,0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x05,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x90,0x00,0x00 +,0x00,0x07,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0xa0,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x04,0x00,0x00 +,0x00,0x08,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x58,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x0a,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0xb0,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0d,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00 +,0x00,0x01,0x00,0x00,0x00,0xc0,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x5f,0x2e,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x30,0x00,0x73,0x69,0x7a,0x65,0x5f,0x74,0x00,0x5f,0x2e +,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x31,0x00,0x73,0x69,0x7a,0x65,0x5f,0x74,0x00,0x5f,0x2e,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f +,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x32,0x00,0x73,0x69,0x7a,0x65,0x5f,0x74,0x00,0x5f,0x2e,0x70,0x72,0x69,0x6e,0x74,0x66,0x5f,0x62,0x75,0x66,0x66,0x65,0x72,0x00 +,0x73,0x69,0x7a,0x65,0x5f,0x74,0x00,0x5f,0x2e,0x76,0x71,0x75,0x65,0x75,0x65,0x5f,0x70,0x6f,0x69,0x6e,0x74,0x65,0x72,0x00,0x73,0x69,0x7a,0x65,0x5f,0x74,0x00,0x5f +,0x2e,0x61,0x71,0x6c,0x77,0x72,0x61,0x70,0x5f,0x70,0x6f,0x69,0x6e,0x74,0x65,0x72,0x00,0x73,0x69,0x7a,0x65,0x5f,0x74,0x00,0x64,0x61,0x74,0x61,0x73,0x65,0x74,0x00 +,0x75,0x63,0x68,0x61,0x72,0x2a,0x00,0x73,0x63,0x72,0x61,0x74,0x63,0x68,0x70,0x61,0x64,0x00,0x75,0x63,0x68,0x61,0x72,0x2a,0x00,0x72,0x65,0x67,0x69,0x73,0x74,0x65 +,0x72,0x73,0x00,0x75,0x6c,0x6f,0x6e,0x67,0x2a,0x00,0x72,0x6f,0x75,0x6e,0x64,0x69,0x6e,0x67,0x5f,0x6d,0x6f,0x64,0x65,0x73,0x00,0x75,0x69,0x6e,0x74,0x2a,0x00,0x70 +,0x72,0x6f,0x67,0x72,0x61,0x6d,0x73,0x00,0x75,0x69,0x6e,0x74,0x2a,0x00,0x62,0x61,0x74,0x63,0x68,0x5f,0x73,0x69,0x7a,0x65,0x00,0x75,0x69,0x6e,0x74,0x00,0x72,0x78 +,0x5f,0x70,0x61,0x72,0x61,0x6d,0x65,0x74,0x65,0x72,0x73,0x00,0x75,0x69,0x6e,0x74,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x7f,0x45,0x4c,0x46,0x02,0x01,0x01,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0xe0,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x40,0x00,0x38,0x00,0x01,0x00,0x40,0x00,0x06,0x00,0x01 +,0x00,0x03,0x00,0x00,0x60,0x05,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0xb4,0x0b,0x00,0x00,0x00,0x00,0x00,0x00,0xb4,0x0b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2e,0x73,0x68,0x73,0x74,0x72 +,0x74,0x61,0x62,0x00,0x2e,0x73,0x74,0x72,0x74,0x61,0x62,0x00,0x2e,0x6e,0x6f,0x74,0x65,0x00,0x2e,0x68,0x73,0x61,0x74,0x65,0x78,0x74,0x00,0x2e,0x73,0x79,0x6d,0x74 +,0x61,0x62,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x26,0x5f,0x5f,0x4f,0x70,0x65,0x6e,0x43,0x4c,0x5f,0x72,0x61,0x6e,0x64,0x6f,0x6d,0x78,0x5f,0x72,0x75,0x6e,0x5f +,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x00,0x5f,0x5f,0x68,0x73,0x61,0x5f,0x73,0x65,0x63,0x74,0x69,0x6f,0x6e,0x2e,0x68,0x73,0x61,0x74,0x65,0x78,0x74,0x00,0x00,0x00,0x00 +,0x00,0x04,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x4d,0x44,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x0c,0x00,0x00 +,0x00,0x02,0x00,0x00,0x00,0x41,0x4d,0x44,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x01,0x01,0x00,0x04,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x03,0x00,0x00 +,0x00,0x41,0x4d,0x44,0x00,0x04,0x00,0x07,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x4d,0x44,0x00,0x41,0x4d,0x44,0x47,0x50,0x55,0x00 +,0x00,0x04,0x00,0x00,0x00,0x29,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x41,0x4d,0x44,0x00,0x19,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x4d,0x44 +,0x20,0x48,0x53,0x41,0x20,0x52,0x75,0x6e,0x74,0x69,0x6d,0x65,0x20,0x46,0x69,0x6e,0x61,0x6c,0x69,0x7a,0x65,0x72,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00 +,0x00,0x1a,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x41,0x4d,0x44,0x00,0x16,0x00,0x2d,0x68,0x73,0x61,0x5f,0x63,0x61,0x6c,0x6c,0x5f,0x63,0x6f,0x6e,0x76,0x65,0x6e,0x74 +,0x69,0x6f,0x6e,0x3d,0x30,0x00,0x74,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x5f,0x03,0xac,0x00,0x8c,0x00,0x00,0x00,0x09,0x00,0x0a,0x00,0x00,0x00,0x00 +,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x62,0x00,0x80,0x00,0x80,0x00,0x00,0x00,0x60,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x04,0x04,0x04,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0xff,0x00,0xfc,0xbe,0x00,0x00,0x01,0x00,0x00,0x00,0x84,0xc0,0x00,0x00,0x00,0x00,0x70,0x00,0x8c,0xbf,0x00,0x00,0x93,0xbf,0x00,0x00,0x82,0xbf,0x06,0x00,0x88 +,0xbe,0x01,0x00,0x12,0xd1,0x86,0x10,0x00,0x00,0x01,0x01,0x02,0x32,0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00,0x82,0x00,0x06,0xc0,0x40,0x00,0x00,0x00,0x02,0x10,0x06 +,0xc0,0x48,0x00,0x00,0x00,0x7f,0x00,0x8c,0xbf,0x08,0x82,0x10,0x8e,0x40,0x10,0x40,0x80,0x41,0x80,0x41,0x82,0x40,0x02,0x10,0x7e,0x41,0x02,0x12,0x7e,0x00,0x00,0x50 +,0xdc,0x08,0x00,0x00,0x08,0x70,0x0f,0x8c,0xbf,0x42,0x00,0x89,0xd2,0x08,0x01,0x01,0x00,0x81,0x08,0x42,0xb9,0x80,0x00,0xc3,0xbe,0xff,0x00,0xc4,0xbe,0x00,0x01,0x00 +,0x00,0x80,0x00,0xc5,0xbe,0x00,0x02,0x02,0x32,0x86,0x02,0x04,0x20,0x85,0x04,0x06,0x24,0xbf,0x02,0x02,0x26,0x80,0x02,0x08,0x7e,0x03,0x00,0x8f,0xd2,0x83,0x06,0x02 +,0x00,0x84,0x02,0x0a,0x24,0x02,0x06,0x06,0x32,0x03,0x02,0x0c,0x7e,0x06,0x09,0x08,0x38,0x82,0x02,0x52,0x24,0x03,0x53,0x0c,0x32,0x07,0x6a,0x1c,0xd1,0x04,0x01,0xa9 +,0x01,0x00,0x00,0x50,0xdc,0x06,0x00,0x00,0x06,0x80,0x02,0x00,0x7e,0x70,0x0f,0x8c,0xbf,0x00,0x00,0x1a,0xd8,0x29,0x06,0x00,0x00,0x7f,0x00,0x8c,0xbf,0x7e,0x01,0x80 +,0xbe,0x02,0x00,0xdb,0xd0,0x01,0x0f,0x01,0x00,0x75,0x01,0x88,0xbf,0x02,0x05,0x02,0xc0,0x5c,0x00,0x00,0x00,0x7f,0x00,0x8c,0xbf,0x14,0xff,0x95,0x92,0x00,0x00,0x05 +,0x00,0x81,0x15,0x15,0x8e,0x14,0xff,0x96,0x92,0x05,0x00,0x05,0x00,0x81,0x16,0x16,0x8e,0x14,0xff,0x97,0x92,0x0a,0x00,0x05,0x00,0x81,0x17,0x17,0x8e,0x14,0xff,0x98 +,0x92,0x0f,0x00,0x04,0x00,0x81,0x18,0x18,0x8e,0x17,0xc0,0x02,0x80,0x14,0x00,0x86,0xd2,0x02,0x05,0x00,0x00,0x02,0x00,0x85,0xd2,0x02,0x05,0x00,0x00,0x80,0x02,0x52 +,0x7e,0x80,0x02,0x58,0x7e,0x98,0x00,0x6c,0xd8,0x00,0x00,0x00,0x06,0x02,0x00,0xc9,0xd0,0x01,0x09,0x01,0x00,0x12,0x10,0xee,0xd8,0x00,0x00,0x00,0x22,0x88,0x00,0xec +,0xd8,0x00,0x00,0x00,0x0b,0x00,0x00,0x09,0xb0,0x7e,0x01,0x86,0xbe,0x06,0x02,0xfe,0x89,0xa0,0x00,0xec,0xd8,0x00,0x00,0x00,0x0d,0x06,0x7e,0xfe,0x89,0x80,0x02,0x1a +,0x7e,0x80,0x02,0x1c,0x7e,0x06,0x01,0xfe,0xbe,0x08,0x01,0x86,0xbe,0x40,0x27,0x86,0xb7,0x00,0x0b,0x0a,0x32,0x05,0x6a,0x19,0xd1,0x05,0x81,0x01,0x00,0x7e,0x01,0x88 +,0xbe,0x08,0x02,0xfe,0x89,0xa8,0x00,0xec,0xd8,0x00,0x00,0x00,0x0f,0x08,0x7e,0xfe,0x89,0x80,0x02,0x1e,0x7e,0x80,0x02,0x20,0x7e,0x08,0x01,0xfe,0xbe,0x02,0x02,0x0a +,0xc0,0x30,0x00,0x00,0x00,0x02,0x04,0x02,0xc0,0x58,0x00,0x00,0x00,0x02,0x01,0x06,0xc0,0x50,0x00,0x00,0x00,0x83,0x02,0x02,0x24,0x00,0x03,0x22,0x32,0x7f,0x00,0x8c +,0xbf,0x0a,0x04,0x04,0x32,0x0b,0x02,0x24,0x7e,0x12,0x29,0x24,0x38,0xff,0x02,0x26,0x7e,0xff,0xff,0xff,0x00,0x08,0x0c,0x0c,0x32,0x09,0x02,0x28,0x7e,0x14,0x6a,0x1c +,0xd1,0x14,0x01,0xa9,0x01,0x00,0x00,0xec,0xd8,0x11,0x00,0x00,0x15,0x04,0x06,0x04,0x80,0x05,0x07,0x05,0x82,0x13,0x00,0x00,0xd1,0x13,0x83,0x09,0x00,0x83,0x46,0x10 +,0x24,0x83,0x44,0x0e,0x24,0x83,0x18,0x18,0x24,0x83,0x16,0x14,0x24,0x08,0x01,0x10,0x32,0x07,0x01,0x0e,0x32,0x0c,0x01,0x18,0x32,0x0a,0x01,0x00,0x32,0x24,0x03,0x14 +,0x7e,0x25,0x03,0x2e,0x7e,0x18,0x81,0x82,0x80,0x10,0x00,0x83,0xbe,0x26,0x6a,0x1a,0xd1,0x15,0x10,0x01,0x00,0x27,0x6a,0x1a,0xd1,0x16,0x10,0x01,0x00,0x32,0x6a,0x1a +,0xd1,0x17,0x10,0x01,0x00,0xff,0x02,0x66,0x7e,0x00,0x00,0xf0,0x80,0x03,0x03,0x68,0x7e,0x12,0x03,0x06,0x7e,0x34,0x03,0x24,0x7e,0x00,0x00,0x89,0xd2,0x02,0x01,0x01 +,0x00,0x01,0x00,0x89,0xd2,0x03,0x01,0x01,0x00,0x7e,0x01,0xa4,0xbe,0x82,0x01,0xfe,0xbe,0x88,0x02,0x52,0x7e,0x84,0x02,0x58,0x7e,0x83,0x01,0xfe,0xbe,0x18,0x1a,0xee +,0xd8,0x29,0x00,0x00,0x34,0x1c,0x1e,0xee,0xd8,0x29,0x00,0x00,0x38,0xff,0x02,0x9a,0x7e,0xff,0xff,0xff,0x00,0xa0,0x00,0xec,0xd8,0x29,0x00,0x00,0x4e,0x24,0x01,0xfe +,0xbe,0xff,0x02,0xa4,0x7e,0x00,0x00,0x00,0x80,0xff,0x02,0xa6,0x7e,0x00,0x00,0xf0,0x3f,0xff,0x02,0xa8,0x7e,0x00,0x00,0x10,0x00,0x00,0x1c,0x8e,0xbe,0x0e,0xff,0x28 +,0x80,0xe8,0x03,0x00,0x00,0x0f,0x80,0x29,0x82,0x0e,0xff,0x2a,0x80,0x4c,0x04,0x00,0x00,0x0f,0x80,0x2b,0x82,0x0e,0xff,0x2c,0x80,0xb0,0x04,0x00,0x00,0x0f,0x80,0x2d +,0x82,0x0e,0xff,0x2e,0x80,0x14,0x05,0x00,0x00,0x0f,0x80,0x2f,0x82,0x0e,0xff,0x30,0x80,0x78,0x05,0x00,0x00,0x0f,0x80,0x31,0x82,0x0e,0xff,0x32,0x80,0xe0,0x05,0x00 +,0x00,0x0f,0x80,0x33,0x82,0x0e,0xff,0x34,0x80,0x48,0x06,0x00,0x00,0x0f,0x80,0x35,0x82,0x0e,0xff,0x36,0x80,0xb0,0x06,0x00,0x00,0x0f,0x80,0x37,0x82,0x0e,0xff,0x38 +,0x80,0x18,0x07,0x00,0x00,0x0f,0x80,0x39,0x82,0x0e,0xff,0x3a,0x80,0x88,0x07,0x00,0x00,0x0f,0x80,0x3b,0x82,0xc1,0x00,0xbf,0xbe,0xff,0x00,0xc6,0xbe,0x00,0xff,0x00 +,0x00,0xff,0x00,0xc7,0xbe,0x00,0xfe,0x01,0x00,0xff,0x00,0xc8,0xbe,0x00,0xfc,0x03,0x00,0xff,0x00,0xc9,0xbe,0x00,0xf8,0x07,0x00,0xff,0x00,0xca,0xbe,0x00,0xf0,0x0f +,0x00,0xff,0x00,0xcb,0xbe,0x00,0xe0,0x1f,0x00,0xff,0x00,0xcc,0xbe,0x00,0xc0,0x3f,0x00,0xff,0x00,0xcd,0xbe,0x00,0x80,0x7f,0x00,0xff,0x00,0xce,0xbe,0x00,0x00,0xff +,0x00,0xff,0x00,0xcf,0xbe,0x00,0x00,0xfe,0x01,0xff,0x00,0xd0,0xbe,0x00,0x00,0xfc,0x03,0xff,0x00,0xd1,0xbe,0x00,0x00,0xf8,0x07,0xff,0x00,0xd2,0xbe,0x00,0x00,0xf0 +,0x0f,0xff,0x00,0xd3,0xbe,0x00,0x00,0xe0,0x1f,0xff,0x00,0xd4,0xbe,0x00,0x00,0xc0,0x3f,0xff,0x00,0xd5,0xbe,0x00,0x00,0x80,0x7f,0x17,0xc0,0xd6,0x80,0x00,0x00,0xec +,0xd8,0x00,0x00,0x00,0x18,0x00,0x00,0xec,0xd8,0x0c,0x00,0x00,0x1a,0x7f,0x00,0x8c,0xbf,0x1b,0x33,0x32,0x2a,0x1a,0x31,0x30,0x2a,0x19,0x15,0x14,0x2a,0x18,0x2f,0x2e +,0x2a,0x56,0x14,0x14,0x26,0x56,0x2e,0x2e,0x26,0x0a,0x03,0x14,0x32,0x17,0x03,0x2e,0x32,0x02,0x15,0x34,0x32,0x1b,0x6a,0x1c,0xd1,0x03,0x01,0xa9,0x01,0x02,0x2f,0x2e +,0x32,0x18,0x6a,0x1c,0xd1,0x03,0x01,0xa9,0x01,0x00,0x00,0x54,0xdc,0x1a,0x00,0x00,0x1c,0x00,0x00,0x54,0xdc,0x17,0x00,0x00,0x1e,0x71,0x0f,0x8c,0xbf,0x1c,0x09,0x40 +,0x7e,0x1d,0x09,0x38,0x7e,0x70,0x0f,0x8c,0xbf,0x15,0x3d,0x44,0x2a,0x16,0x3f,0x46,0x2a,0x06,0x49,0x2c,0x32,0x19,0x6a,0x1c,0xd1,0x14,0x01,0xa9,0x01,0x20,0x1b,0x3c +,0x28,0x21,0x27,0x3e,0x26,0x1f,0x1d,0x3e,0x28,0x1c,0x1f,0x38,0x28,0x1d,0x27,0x3a,0x26,0x1d,0x21,0x3a,0x28,0x16,0x03,0x2a,0x32,0x16,0x6a,0x1c,0xd1,0x19,0x01,0xa9 +,0x01,0x00,0x01,0x9c,0xd8,0x05,0x1e,0x1c,0x00,0x7f,0x00,0x8c,0xbf,0x83,0x01,0xfe,0xbe,0x08,0x0a,0xee,0xd8,0x29,0x00,0x00,0x3c,0x0c,0x0e,0xee,0xd8,0x29,0x00,0x00 +,0x40,0x10,0x12,0xee,0xd8,0x29,0x00,0x00,0x44,0x14,0x16,0xee,0xd8,0x29,0x00,0x00,0x48,0x10,0x00,0x89,0xd2,0x22,0x01,0x01,0x00,0x11,0x00,0x89,0xd2,0x23,0x01,0x01 +,0x00,0x12,0x00,0x89,0xd2,0x22,0x03,0x01,0x00,0x13,0x00,0x89,0xd2,0x23,0x03,0x01,0x00,0x14,0x00,0x89,0xd2,0x22,0x05,0x01,0x00,0x15,0x00,0x89,0xd2,0x23,0x05,0x01 +,0x00,0x16,0x00,0x89,0xd2,0x22,0x07,0x01,0x00,0x17,0x00,0x89,0xd2,0x23,0x07,0x01,0x00,0x18,0x00,0x89,0xd2,0x22,0x09,0x01,0x00,0x19,0x00,0x89,0xd2,0x23,0x09,0x01 +,0x00,0x1a,0x00,0x89,0xd2,0x22,0x0b,0x01,0x00,0x1b,0x00,0x89,0xd2,0x23,0x0b,0x01,0x00,0x1c,0x00,0x89,0xd2,0x22,0x0d,0x01,0x00,0x1d,0x00,0x89,0xd2,0x23,0x0d,0x01 +,0x00,0x1e,0x00,0x89,0xd2,0x22,0x0f,0x01,0x00,0x1f,0x00,0x89,0xd2,0x23,0x0f,0x01,0x00,0x7f,0x00,0x8c,0xbf,0x04,0x1e,0x8c,0xbe,0x1c,0x00,0x8a,0xd2,0x10,0x00,0x01 +,0x00,0x1d,0x00,0x8a,0xd2,0x11,0x00,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x12,0x02,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x13,0x02,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x14,0x04,0x01 +,0x00,0x1d,0x00,0x8a,0xd2,0x15,0x04,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x16,0x06,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x17,0x06,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x18,0x08,0x01 +,0x00,0x1d,0x00,0x8a,0xd2,0x19,0x08,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x1a,0x0a,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x1b,0x0a,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x1c,0x0c,0x01 +,0x00,0x1d,0x00,0x8a,0xd2,0x1d,0x0c,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x1e,0x0e,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x1f,0x0e,0x01,0x00,0x08,0x0a,0x9c,0xd8,0x29,0x3c,0x3e +,0x00,0x0c,0x0e,0x9c,0xd8,0x29,0x40,0x42,0x00,0x10,0x12,0x9c,0xd8,0x29,0x44,0x46,0x00,0x14,0x16,0x9c,0xd8,0x29,0x48,0x4a,0x00,0x24,0x01,0xfe,0xbe,0x00,0x00,0x9a +,0xd8,0x11,0x1c,0x00,0x00,0x00,0x00,0x54,0xdc,0x15,0x00,0x00,0x15,0x70,0x00,0x8c,0xbf,0x1c,0x2b,0x2a,0x2a,0x1d,0x2d,0x2c,0x2a,0x00,0x00,0x6c,0xd8,0x07,0x00,0x00 +,0x1c,0x00,0x00,0x6c,0xd8,0x08,0x00,0x00,0x1d,0x00,0x00,0x9a,0xd8,0x11,0x15,0x00,0x00,0x7f,0x01,0x8c,0xbf,0x08,0x10,0xee,0xd8,0x11,0x00,0x00,0x1e,0x1c,0x4b,0x14 +,0x2a,0x7f,0x00,0x8c,0xbf,0x20,0x3d,0x3c,0x2a,0x21,0x3f,0x3e,0x2a,0x0a,0x3b,0x14,0x2a,0x00,0x00,0x74,0xdc,0x1a,0x15,0x00,0x00,0xff,0x14,0x14,0x26,0xc0,0xff,0xff +,0x7f,0x00,0x00,0x74,0xdc,0x17,0x1e,0x00,0x00,0x02,0x80,0x06,0xbf,0x06,0x00,0x85,0xbf,0x02,0x81,0x82,0x81,0x24,0x03,0x4a,0x7e,0x80,0x02,0x2e,0x7e,0x0a,0x03,0x48 +,0x7e,0x80,0x02,0x14,0x7e,0x5d,0xff,0x82,0xbf,0x12,0x03,0x00,0x32,0x01,0x6a,0x1c,0xd1,0x04,0x01,0xa9,0x01,0x00,0x00,0x74,0xdc,0x00,0x15,0x00,0x00,0x00,0x6a,0x19 +,0xd1,0x00,0x81,0x01,0x00,0x01,0x6a,0x1c,0xd1,0x01,0x01,0xa9,0x01,0x00,0x00,0x74,0xdc,0x00,0x1e,0x00,0x00,0x00,0x6a,0x19,0xd1,0x00,0x81,0x01,0x00,0x01,0x6a,0x1c +,0xd1,0x01,0x01,0xa9,0x01,0x00,0x00,0x74,0xdc,0x00,0x20,0x00,0x00,0x40,0x02,0x00,0x7e,0x41,0x02,0x02,0x7e,0x42,0x02,0x04,0x7e,0x00,0x00,0x70,0xdc,0x00,0x02,0x00 +,0x00,0x00,0x00,0x81,0xbf,0x81,0x08,0x43,0xb9,0x44,0x4d,0x38,0x7e,0x2a,0x00,0x81,0xd2,0x1c,0x89,0x02,0x00,0x1c,0x03,0x60,0x7e,0x1d,0xa9,0x62,0x34,0x1c,0x03,0x5c +,0x7e,0x31,0xa5,0x5e,0x2a,0x2e,0x00,0xcc,0xd1,0x2e,0x55,0xc2,0x03,0x2a,0x00,0xcc,0xd1,0x2a,0x5d,0xaa,0x04,0x30,0x00,0xcc,0xd1,0x30,0x5d,0xc2,0x04,0x2e,0x00,0xcc +,0xd1,0x2a,0x55,0x12,0x25,0x81,0x08,0x42,0xb9,0x2a,0x00,0xcc,0xd1,0x2e,0x61,0xaa,0x04,0x0e,0x00,0x13,0xd0,0x44,0x89,0x00,0x00,0x2a,0x03,0x88,0x7e,0x2b,0x03,0x8a +,0x7e,0x83,0x01,0xfe,0xbe,0x3c,0x1d,0x80,0xbe,0x81,0x08,0x43,0xb9,0x46,0x4d,0x38,0x7e,0x2a,0x00,0x81,0xd2,0x1c,0x8d,0x02,0x00,0x1c,0x03,0x60,0x7e,0x1d,0xa9,0x62 +,0x34,0x1c,0x03,0x5c,0x7e,0x31,0xa5,0x5e,0x2a,0x2e,0x00,0xcc,0xd1,0x2e,0x55,0xc2,0x03,0x2a,0x00,0xcc,0xd1,0x2a,0x5d,0xaa,0x04,0x30,0x00,0xcc,0xd1,0x30,0x5d,0xc2 +,0x04,0x2e,0x00,0xcc,0xd1,0x2a,0x55,0x1a,0x25,0x81,0x08,0x42,0xb9,0x2a,0x00,0xcc,0xd1,0x2e,0x61,0xaa,0x04,0x0e,0x00,0x13,0xd0,0x46,0x89,0x00,0x00,0x2a,0x03,0x8c +,0x7e,0x2b,0x03,0x8e,0x7e,0x83,0x01,0xfe,0xbe,0x3c,0x1d,0x80,0xbe,0x81,0x08,0x43,0xb9,0x48,0x4d,0x38,0x7e,0x2a,0x00,0x81,0xd2,0x1c,0x91,0x02,0x00,0x1c,0x03,0x60 +,0x7e,0x1d,0xa9,0x62,0x34,0x1c,0x03,0x5c,0x7e,0x31,0xa5,0x5e,0x2a,0x2e,0x00,0xcc,0xd1,0x2e,0x55,0xc2,0x03,0x2a,0x00,0xcc,0xd1,0x2a,0x5d,0xaa,0x04,0x30,0x00,0xcc +,0xd1,0x30,0x5d,0xc2,0x04,0x2e,0x00,0xcc,0xd1,0x2a,0x55,0x22,0x25,0x81,0x08,0x42,0xb9,0x2a,0x00,0xcc,0xd1,0x2e,0x61,0xaa,0x04,0x0e,0x00,0x13,0xd0,0x48,0x89,0x00 +,0x00,0x2a,0x03,0x90,0x7e,0x2b,0x03,0x92,0x7e,0x83,0x01,0xfe,0xbe,0x3c,0x1d,0x80,0xbe,0x81,0x08,0x43,0xb9,0x4a,0x4d,0x38,0x7e,0x2a,0x00,0x81,0xd2,0x1c,0x95,0x02 +,0x00,0x1c,0x03,0x60,0x7e,0x1d,0xa9,0x62,0x34,0x1c,0x03,0x5c,0x7e,0x31,0xa5,0x5e,0x2a,0x2e,0x00,0xcc,0xd1,0x2e,0x55,0xc2,0x03,0x2a,0x00,0xcc,0xd1,0x2a,0x5d,0xaa +,0x04,0x30,0x00,0xcc,0xd1,0x30,0x5d,0xc2,0x04,0x2e,0x00,0xcc,0xd1,0x2a,0x55,0x2a,0x25,0x81,0x08,0x42,0xb9,0x2a,0x00,0xcc,0xd1,0x2e,0x61,0xaa,0x04,0x0e,0x00,0x13 +,0xd0,0x4a,0x89,0x00,0x00,0x2a,0x03,0x94,0x7e,0x2b,0x03,0x96,0x7e,0x83,0x01,0xfe,0xbe,0x3c,0x1d,0x80,0xbe,0x1c,0x9d,0x38,0x28,0x1d,0x9b,0x3a,0x26,0x1d,0x9f,0x3a +,0x28,0x81,0x08,0x43,0xb9,0x1c,0x4b,0x60,0x7e,0x50,0x00,0xcc,0xd1,0x1c,0x61,0xca,0x23,0x30,0x00,0xcc,0xd1,0x30,0xa1,0xc2,0x04,0x50,0x00,0x81,0xd2,0x44,0x61,0x02 +,0x00,0x2a,0x00,0xcc,0xd1,0x1c,0xa1,0x12,0x25,0x81,0x08,0x42,0xb9,0x2a,0x00,0xcc,0xd1,0x2a,0x61,0x42,0x05,0x50,0x00,0xdf,0xd1,0x2a,0x39,0x12,0x05,0x0e,0x00,0x72 +,0xd0,0x44,0x39,0x02,0x00,0x80,0x02,0xa0,0x7e,0x53,0x03,0xa2,0x7e,0x83,0x01,0xfe,0xbe,0x50,0x03,0x88,0x7e,0x51,0x03,0x8a,0x7e,0x3c,0x1d,0x80,0xbe,0x1c,0x9d,0x38 +,0x28,0x1d,0x9b,0x3a,0x26,0x1d,0x9f,0x3a,0x28,0x81,0x08,0x43,0xb9,0x1c,0x4b,0x60,0x7e,0x50,0x00,0xcc,0xd1,0x1c,0x61,0xca,0x23,0x30,0x00,0xcc,0xd1,0x30,0xa1,0xc2 +,0x04,0x50,0x00,0x81,0xd2,0x46,0x61,0x02,0x00,0x2a,0x00,0xcc,0xd1,0x1c,0xa1,0x1a,0x25,0x81,0x08,0x42,0xb9,0x2a,0x00,0xcc,0xd1,0x2a,0x61,0x42,0x05,0x50,0x00,0xdf +,0xd1,0x2a,0x39,0x1a,0x05,0x0e,0x00,0x72,0xd0,0x46,0x39,0x02,0x00,0x80,0x02,0xa0,0x7e,0x53,0x03,0xa2,0x7e,0x83,0x01,0xfe,0xbe,0x50,0x03,0x8c,0x7e,0x51,0x03,0x8e +,0x7e,0x3c,0x1d,0x80,0xbe,0x1c,0x9d,0x38,0x28,0x1d,0x9b,0x3a,0x26,0x1d,0x9f,0x3a,0x28,0x81,0x08,0x43,0xb9,0x1c,0x4b,0x60,0x7e,0x50,0x00,0xcc,0xd1,0x1c,0x61,0xca +,0x23,0x30,0x00,0xcc,0xd1,0x30,0xa1,0xc2,0x04,0x50,0x00,0x81,0xd2,0x48,0x61,0x02,0x00,0x2a,0x00,0xcc,0xd1,0x1c,0xa1,0x22,0x25,0x81,0x08,0x42,0xb9,0x2a,0x00,0xcc +,0xd1,0x2a,0x61,0x42,0x05,0x50,0x00,0xdf,0xd1,0x2a,0x39,0x22,0x05,0x0e,0x00,0x72,0xd0,0x48,0x39,0x02,0x00,0x80,0x02,0xa0,0x7e,0x53,0x03,0xa2,0x7e,0x83,0x01,0xfe +,0xbe,0x50,0x03,0x90,0x7e,0x51,0x03,0x92,0x7e,0x3c,0x1d,0x80,0xbe,0x1c,0x9d,0x38,0x28,0x1d,0x9b,0x3a,0x26,0x1d,0x9f,0x3a,0x28,0x81,0x08,0x43,0xb9,0x1c,0x4b,0x60 +,0x7e,0x50,0x00,0xcc,0xd1,0x1c,0x61,0xca,0x23,0x30,0x00,0xcc,0xd1,0x30,0xa1,0xc2,0x04,0x50,0x00,0x81,0xd2,0x4a,0x61,0x02,0x00,0x2a,0x00,0xcc,0xd1,0x1c,0xa1,0x2a +,0x25,0x81,0x08,0x42,0xb9,0x2a,0x00,0xcc,0xd1,0x2a,0x61,0x42,0x05,0x50,0x00,0xdf,0xd1,0x2a,0x39,0x2a,0x05,0x0e,0x00,0x72,0xd0,0x4a,0x39,0x02,0x00,0x80,0x02,0xa0 +,0x7e,0x53,0x03,0xa2,0x7e,0x83,0x01,0xfe,0xbe,0x50,0x03,0x94,0x7e,0x51,0x03,0x96,0x7e,0x3c,0x1d,0x80,0xbe,0x81,0x01,0xfe,0xbe,0x0e,0x02,0x5a,0x7e,0x28,0x00,0x86 +,0xd2,0x26,0x5a,0x02,0x00,0x0f,0x02,0x5e,0x7e,0x2a,0x20,0xe8,0xd1,0x26,0x5e,0xa2,0x04,0x2a,0x03,0x50,0x7e,0x2d,0x20,0xe8,0xd1,0x27,0x5a,0xa2,0x04,0x2a,0x20,0xe8 +,0xd1,0x27,0x5e,0xae,0x04,0x2a,0x5d,0x54,0x32,0x80,0x56,0x56,0x38,0x20,0x00,0x89,0xd2,0x2a,0x01,0x01,0x00,0x21,0x00,0x89,0xd2,0x2b,0x01,0x01,0x00,0x0f,0x80,0x04 +,0xbf,0x26,0x80,0xa2,0x85,0x20,0x22,0xa0,0x80,0x21,0x23,0xa1,0x82,0x27,0x80,0x04,0xbf,0x0e,0x80,0xa2,0x85,0x20,0x22,0x8e,0x80,0x21,0x23,0x8f,0x82,0x83,0x01,0xfe +,0xbe,0x3c,0x1d,0x80,0xbe,0x81,0x01,0xfe,0xbe,0x26,0x02,0x5a,0x7e,0x28,0x00,0x86,0xd2,0x0e,0x5a,0x02,0x00,0x27,0x02,0x5e,0x7e,0x2a,0x20,0xe8,0xd1,0x0e,0x5e,0xa2 +,0x04,0x2a,0x03,0x50,0x7e,0x2d,0x20,0xe8,0xd1,0x0f,0x5a,0xa2,0x04,0x2a,0x20,0xe8,0xd1,0x0f,0x5e,0xae,0x04,0x2a,0x5d,0x54,0x32,0x80,0x56,0x56,0x38,0x0e,0x00,0x89 +,0xd2,0x2a,0x01,0x01,0x00,0x0f,0x00,0x89,0xd2,0x2b,0x01,0x01,0x00,0x83,0x01,0xfe,0xbe,0x3c,0x1d,0x80,0xbe,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x1a,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0xb4,0x0b,0x00,0x00,0x00,0x00,0x00,0x00,0x1e,0x00,0x00,0x00,0x03,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x78,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x2a,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x0b,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xa8,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x34,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x13,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xe0,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0xc8,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x19,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x07,0x00,0xc0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00 +,0x00,0xb4,0x0b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x22,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xb8,0x0d,0x00,0x00,0x00,0x00,0x00 +,0x00,0x48,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x40 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x32,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x72 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x43,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x13,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xb8 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x48,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x1b,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x24,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x27 +,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x3b,0x07,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2c,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x62 +,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x0f,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; +static const int randomx_run_gfx803_bin_size=6568; diff --git a/src/amd/opencl/RandomX/randomx_run_gfx900.asm b/src/amd/opencl/RandomX/randomx_run_gfx900.asm new file mode 100644 index 00000000..b67dcc02 --- /dev/null +++ b/src/amd/opencl/RandomX/randomx_run_gfx900.asm @@ -0,0 +1,688 @@ +/* +Copyright (c) 2019 SChernykh + +This file is part of RandomX OpenCL. + +RandomX OpenCL is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +RandomX OpenCL is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with RandomX OpenCL. If not, see . +*/ + +.amdcl2 +.gpu GFX900 +.64bit +.arch_minor 0 +.arch_stepping 0 +.driver_version 223600 +.kernel randomx_run + .config + .dims x + .cws 64, 1, 1 + .sgprsnum 96 + # 6 waves per SIMD: 37-40 VGPRs + # 5 waves per SIMD: 41-48 VGPRs + # 4 waves per SIMD: 49-64 VGPRs + # 3 waves per SIMD: 65-84 VGPRs + # 2 waves per SIMD: 85-128 VGPRs + # 1 wave per SIMD: 129-256 VGPRs + .vgprsnum 128 + .localsize 256 + .floatmode 0xc0 + .pgmrsrc1 0x00ac035f + .pgmrsrc2 0x00000090 + .dx10clamp + .ieeemode + .useargs + .priority 0 + .arg _.global_offset_0, "size_t", long + .arg _.global_offset_1, "size_t", long + .arg _.global_offset_2, "size_t", long + .arg _.printf_buffer, "size_t", void*, global, , rdonly + .arg _.vqueue_pointer, "size_t", long + .arg _.aqlwrap_pointer, "size_t", long + .arg dataset, "uchar*", uchar*, global, const, rdonly + .arg scratchpad, "uchar*", uchar*, global, + .arg registers, "ulong*", ulong*, global, + .arg rounding_modes, "uint*", uint*, global, + .arg programs, "uint*", uint*, global, + .arg batch_size, "uint", uint + .arg rx_parameters, "uint", uint + .text + s_mov_b32 m0, 0x10000 + s_dcache_wb + s_waitcnt vmcnt(0) & lgkmcnt(0) + s_icache_inv + s_branch begin + + # pgmrsrc2 = 0x00000090, bits 1:5 = 8, so first 8 SGPRs (s0-s7) contain user data + # s8 contains group id + # v0 contains local id +begin: + v_lshl_add_u32 v1, s8, 6, v0 + s_load_dwordx2 s[0:1], s[4:5], 0x0 + s_load_dwordx2 s[2:3], s[4:5], 0x40 + s_load_dwordx2 s[64:65], s[4:5], 0x48 + s_waitcnt lgkmcnt(0) + + # load rounding mode + s_lshl_b32 s16, s8, 2 + s_add_u32 s64, s64, s16 + s_addc_u32 s65, s65, 0 + v_mov_b32 v8, 0 + global_load_dword v8, v8, s[64:65] + s_waitcnt vmcnt(0) + v_readlane_b32 s66, v8, 0 + s_setreg_b32 hwreg(mode, 2, 2), s66 + s_mov_b32 s67, 0 + + # used in FSQRT_R to check for "positive normal value" (v_cmpx_class_f64) + s_mov_b32 s68, 256 + s_mov_b32 s69, 0 + + v_add_u32 v1, s0, v1 + v_lshrrev_b32 v2, 6, v1 + v_lshlrev_b32 v3, 5, v2 + v_and_b32 v1, 63, v1 + v_mov_b32 v4, 0 + v_lshlrev_b64 v[3:4], 3, v[3:4] + v_lshlrev_b32 v5, 4, v1 + v_add_co_u32 v3, vcc, s2, v3 + v_mov_b32 v6, s3 + v_addc_co_u32 v4, vcc, v6, v4, vcc + v_lshlrev_b32 v41, 2, v1 + v_add_co_u32 v6, vcc, v3, v41 + v_addc_co_u32 v7, vcc, v4, 0, vcc + global_load_dword v6, v[6:7], off + v_mov_b32 v0, 0 + s_waitcnt vmcnt(0) + ds_write_b32 v41, v6 + s_waitcnt lgkmcnt(0) + s_mov_b64 s[0:1], exec + v_cmpx_le_u32 s[2:3], v1, 7 + s_cbranch_execz program_end + + # rx_parameters + s_load_dword s20, s[4:5], 0x5c + s_waitcnt lgkmcnt(0) + + # Scratchpad L1 size + s_bfe_u32 s21, s20, 0x050000 + s_lshl_b32 s21, 1, s21 + + # Scratchpad L2 size + s_bfe_u32 s22, s20, 0x050005 + s_lshl_b32 s22, 1, s22 + + # Scratchpad L3 size + s_bfe_u32 s23, s20, 0x05000A + s_lshl_b32 s23, 1, s23 + + # program iterations + s_bfe_u32 s24, s20, 0x04000F + s_lshl_b32 s24, 1, s24 + + # Base address for scratchpads + s_add_u32 s2, s23, 64 + v_mul_hi_u32 v20, v2, s2 + v_mul_lo_u32 v2, v2, s2 + + # v41, v44 = 0 + v_mov_b32 v41, 0 + v_mov_b32 v44, 0 + + ds_read_b32 v6, v0 offset:152 + v_cmp_lt_u32 s[2:3], v1, 4 + ds_read2_b64 v[34:37], v0 offset0:18 offset1:16 + ds_read_b64 v[11:12], v0 offset:136 + s_movk_i32 s9, 0x0 + s_mov_b64 s[6:7], exec + s_andn2_b64 exec, s[6:7], s[2:3] + ds_read_b64 v[13:14], v0 offset:160 + s_andn2_b64 exec, s[6:7], exec + v_mov_b32 v13, 0 + v_mov_b32 v14, 0 + s_mov_b64 exec, s[6:7] + + # compiled program size + s_mov_b64 s[6:7], s[8:9] + s_mulk_i32 s6, 10048 + + v_add3_u32 v5, v0, v5, 64 + s_mov_b64 s[8:9], exec + s_andn2_b64 exec, s[8:9], s[2:3] + ds_read_b64 v[15:16], v0 offset:168 + s_andn2_b64 exec, s[8:9], exec + v_mov_b32 v15, 0 + v_mov_b32 v16, 0 + s_mov_b64 exec, s[8:9] + s_load_dwordx4 s[8:11], s[4:5], 0x30 + + # batch_size + s_load_dword s16, s[4:5], 0x58 + + s_load_dwordx2 s[4:5], s[4:5], 0x50 + v_lshlrev_b32 v1, 3, v1 + v_add_u32 v17, v0, v1 + s_waitcnt lgkmcnt(0) + v_add_co_u32 v2, vcc, s10, v2 + v_mov_b32 v18, s11 + v_addc_co_u32 v18, vcc, v18, v20, vcc + v_mov_b32 v19, 0xffffff + v_add_co_u32 v6, vcc, s8, v6 + v_mov_b32 v20, s9 + v_addc_co_u32 v20, vcc, v20, 0, vcc + ds_read_b64 v[21:22], v17 + s_add_u32 s4, s4, s6 + s_addc_u32 s5, s5, s7 + v_cndmask_b32 v19, v19, -1, s[2:3] + v_lshl_add_u32 v8, v35, 3, v0 + v_lshl_add_u32 v7, v34, 3, v0 + v_lshl_add_u32 v12, v12, 3, v0 + v_lshl_add_u32 v0, v11, 3, v0 + v_mov_b32 v10, v36 + v_mov_b32 v23, v37 + + # loop counter + s_sub_u32 s2, s24, 1 + + # batch_size + s_mov_b32 s3, s16 + + # Scratchpad masks for scratchpads + v_sub_u32 v38, s21, 8 + v_sub_u32 v39, s22, 8 + v_sub_u32 v50, s23, 8 + + # mask for FSCAL_R + v_mov_b32 v51, 0x80F00000 + + # load scratchpad base address + v_readlane_b32 s0, v2, 0 + v_readlane_b32 s1, v18, 0 + + # save current executiom mask + s_mov_b64 s[36:37], exec + + # v41 = 0 on lane 0, set it to 8 on lane 1 + # v44 = 0 on lane 0, set it to 4 on lane 1 + s_mov_b64 exec, 2 + v_mov_b32 v41, 8 + v_mov_b32 v44, 4 + + # load group A registers + # Read low 8 bytes into lane 0 and high 8 bytes into lane 1 + s_mov_b64 exec, 3 + ds_read2_b64 v[52:55], v41 offset0:24 offset1:26 + ds_read2_b64 v[56:59], v41 offset0:28 offset1:30 + + # xmantissaMask + v_mov_b32 v77, (1 << 24) - 1 + + # xexponentMask + ds_read_b64 v[78:79], v41 offset:160 + + # Restore execution mask + s_mov_b64 exec, s[36:37] + + # sign mask (used in FSQRT_R) + v_mov_b32 v82, 0x80000000 + + # High 32 bits of "1.0" constant (used in FDIV_M) + v_mov_b32 v83, (1023 << 20) + + # Used to multiply FP64 values by 0.5 + v_mov_b32 v84, (1 << 20) + + s_getpc_b64 s[14:15] +cur_addr: + + # get addresses of FSQRT_R subroutines + s_add_u32 s40, s14, fsqrt_r_sub0 - cur_addr + s_addc_u32 s41, s15, 0 + s_add_u32 s42, s14, fsqrt_r_sub1 - cur_addr + s_addc_u32 s43, s15, 0 + s_add_u32 s44, s14, fsqrt_r_sub2 - cur_addr + s_addc_u32 s45, s15, 0 + s_add_u32 s46, s14, fsqrt_r_sub3 - cur_addr + s_addc_u32 s47, s15, 0 + + # get addresses of FDIV_M subroutines + s_add_u32 s48, s14, fdiv_m_sub0 - cur_addr + s_addc_u32 s49, s15, 0 + s_add_u32 s50, s14, fdiv_m_sub1 - cur_addr + s_addc_u32 s51, s15, 0 + s_add_u32 s52, s14, fdiv_m_sub2 - cur_addr + s_addc_u32 s53, s15, 0 + s_add_u32 s54, s14, fdiv_m_sub3 - cur_addr + s_addc_u32 s55, s15, 0 + + # get address for ISMULH_R subroutine + s_add_u32 s56, s14, ismulh_r_sub - cur_addr + s_addc_u32 s57, s15, 0 + + # get address for IMULH_R subroutine + s_add_u32 s58, s14, imulh_r_sub - cur_addr + s_addc_u32 s59, s15, 0 + + # used in IXOR_R instruction + s_mov_b32 s63, -1 + + # used in CBRANCH instruction + s_mov_b32 s70, (0xFF << 8) + s_mov_b32 s71, (0xFF << 9) + s_mov_b32 s72, (0xFF << 10) + s_mov_b32 s73, (0xFF << 11) + s_mov_b32 s74, (0xFF << 12) + s_mov_b32 s75, (0xFF << 13) + s_mov_b32 s76, (0xFF << 14) + s_mov_b32 s77, (0xFF << 15) + s_mov_b32 s78, (0xFF << 16) + s_mov_b32 s79, (0xFF << 17) + s_mov_b32 s80, (0xFF << 18) + s_mov_b32 s81, (0xFF << 19) + s_mov_b32 s82, (0xFF << 20) + s_mov_b32 s83, (0xFF << 21) + s_mov_b32 s84, (0xFF << 22) + s_mov_b32 s85, (0xFF << 23) + + # ScratchpadL3Mask64 + s_sub_u32 s86, s23, 64 + +main_loop: + # const uint2 spMix = as_uint2(R[readReg0] ^ R[readReg1]); + ds_read_b64 v[24:25], v0 + ds_read_b64 v[26:27], v12 + s_waitcnt lgkmcnt(0) + v_xor_b32 v25, v27, v25 + v_xor_b32 v24, v26, v24 + + # spAddr1 ^= spMix.y; + # spAddr0 ^= spMix.x; + v_xor_b32 v10, v25, v10 + v_xor_b32 v23, v24, v23 + + # spAddr1 &= ScratchpadL3Mask64; + # spAddr0 &= ScratchpadL3Mask64; + v_and_b32 v10, s86, v10 + v_and_b32 v23, s86, v23 + + # Offset for scratchpads + # offset1 = spAddr1 + sub * 8 + # offset0 = spAddr0 + sub * 8 + v_add_u32 v10, v10, v1 + v_add_u32 v23, v23, v1 + + # __global ulong* p1 = (__global ulong*)(scratchpad + offset1); + # __global ulong* p0 = (__global ulong*)(scratchpad + offset0); + v_add_co_u32 v26, vcc, v2, v10 + v_addc_co_u32 v27, vcc, v18, 0, vcc + v_add_co_u32 v23, vcc, v2, v23 + v_addc_co_u32 v24, vcc, v18, 0, vcc + + # load from spAddr1 + global_load_dwordx2 v[28:29], v[26:27], off + + # load from spAddr0 + global_load_dwordx2 v[30:31], v[23:24], off + s_waitcnt vmcnt(1) + + v_cvt_f64_i32 v[32:33], v28 + v_cvt_f64_i32 v[28:29], v29 + s_waitcnt vmcnt(0) + + # R[sub] ^= *p0; + v_xor_b32 v34, v21, v30 + v_xor_b32 v35, v22, v31 + + v_add_co_u32 v22, vcc, v6, v36 + v_addc_co_u32 v25, vcc, v20, 0, vcc + v_or_b32 v30, v32, v13 + v_and_or_b32 v31, v33, v19, v14 + v_or_b32 v28, v28, v15 + v_and_or_b32 v29, v29, v19, v16 + v_add_co_u32 v21, vcc, v22, v1 + v_addc_co_u32 v22, vcc, v25, 0, vcc + ds_write2_b64 v5, v[30:31], v[28:29] offset1:1 + s_waitcnt lgkmcnt(0) + + # Program 0 + + # load group F,E registers + # Read low 8 bytes into lane 0 and high 8 bytes into lane 1 + s_mov_b64 exec, 3 + ds_read2_b64 v[60:63], v41 offset0:8 offset1:10 + ds_read2_b64 v[64:67], v41 offset0:12 offset1:14 + ds_read2_b64 v[68:71], v41 offset0:16 offset1:18 + ds_read2_b64 v[72:75], v41 offset0:20 offset1:22 + + # load VM integer registers + v_readlane_b32 s16, v34, 0 + v_readlane_b32 s17, v35, 0 + v_readlane_b32 s18, v34, 1 + v_readlane_b32 s19, v35, 1 + v_readlane_b32 s20, v34, 2 + v_readlane_b32 s21, v35, 2 + v_readlane_b32 s22, v34, 3 + v_readlane_b32 s23, v35, 3 + v_readlane_b32 s24, v34, 4 + v_readlane_b32 s25, v35, 4 + v_readlane_b32 s26, v34, 5 + v_readlane_b32 s27, v35, 5 + v_readlane_b32 s28, v34, 6 + v_readlane_b32 s29, v35, 6 + v_readlane_b32 s30, v34, 7 + v_readlane_b32 s31, v35, 7 + + s_waitcnt lgkmcnt(0) + + # call JIT code + s_swappc_b64 s[12:13], s[4:5] + + # store VM integer registers + v_writelane_b32 v28, s16, 0 + v_writelane_b32 v29, s17, 0 + v_writelane_b32 v28, s18, 1 + v_writelane_b32 v29, s19, 1 + v_writelane_b32 v28, s20, 2 + v_writelane_b32 v29, s21, 2 + v_writelane_b32 v28, s22, 3 + v_writelane_b32 v29, s23, 3 + v_writelane_b32 v28, s24, 4 + v_writelane_b32 v29, s25, 4 + v_writelane_b32 v28, s26, 5 + v_writelane_b32 v29, s27, 5 + v_writelane_b32 v28, s28, 6 + v_writelane_b32 v29, s29, 6 + v_writelane_b32 v28, s30, 7 + v_writelane_b32 v29, s31, 7 + + # Write out group F,E registers + # Write low 8 bytes from lane 0 and high 8 bytes from lane 1 + ds_write2_b64 v41, v[60:61], v[62:63] offset0:8 offset1:10 + ds_write2_b64 v41, v[64:65], v[66:67] offset0:12 offset1:14 + ds_write2_b64 v41, v[68:69], v[70:71] offset0:16 offset1:18 + ds_write2_b64 v41, v[72:73], v[74:75] offset0:20 offset1:22 + + # Restore execution mask + s_mov_b64 exec, s[36:37] + + # Write out VM integer registers + ds_write_b64 v17, v[28:29] + + global_load_dwordx2 v[21:22], v[21:22], off + s_waitcnt vmcnt(0) & lgkmcnt(0) + v_xor_b32 v21, v28, v21 + v_xor_b32 v22, v29, v22 + ds_read_b32 v28, v7 + ds_read_b32 v29, v8 + ds_write_b64 v17, v[21:22] + s_waitcnt lgkmcnt(1) + ds_read2_b64 v[30:33], v17 offset0:8 offset1:16 + v_xor_b32 v10, v28, v37 + s_waitcnt lgkmcnt(0) + v_xor_b32 v30, v32, v30 + v_xor_b32 v31, v33, v31 + v_xor_b32 v10, v10, v29 + global_store_dwordx2 v[26:27], v[21:22], off + v_and_b32 v10, 0x7fffffc0, v10 + global_store_dwordx2 v[23:24], v[30:31], off + s_cmp_eq_u32 s2, 0 + s_cbranch_scc1 main_loop_end + s_sub_i32 s2, s2, 1 + v_mov_b32 v37, v36 + v_mov_b32 v23, 0 + v_mov_b32 v36, v10 + v_mov_b32 v10, 0 + s_branch main_loop +main_loop_end: + + v_add_co_u32 v0, vcc, v3, v1 + v_addc_co_u32 v1, vcc, v4, 0, vcc + global_store_dwordx2 v[0:1], v[21:22], off + global_store_dwordx2 v[0:1], v[30:31], off inst_offset:64 + global_store_dwordx2 v[0:1], v[32:33], off inst_offset:128 + + # store rounding mode + v_mov_b32 v0, 0 + v_mov_b32 v1, s66 + global_store_dword v0, v1, s[64:65] + +program_end: + s_endpgm + +fsqrt_r_sub0: + s_setreg_b32 hwreg(mode, 2, 2), s67 + v_rsq_f64 v[28:29], v[68:69] + + # Improve initial approximation (can be skipped) + #v_mul_f64 v[42:43], v[28:29], v[68:69] + #v_mul_f64 v[48:49], v[28:29], -0.5 + #v_fma_f64 v[48:49], v[48:49], v[42:43], 0.5 + #v_fma_f64 v[28:29], v[28:29], v[48:49], v[28:29] + + v_mul_f64 v[42:43], v[28:29], v[68:69] + v_mov_b32 v48, v28 + v_sub_u32 v49, v29, v84 + v_mov_b32 v46, v28 + v_xor_b32 v47, v49, v82 + v_fma_f64 v[46:47], v[46:47], v[42:43], 0.5 + v_fma_f64 v[42:43], v[42:43], v[46:47], v[42:43] + v_fma_f64 v[48:49], v[48:49], v[46:47], v[48:49] + v_fma_f64 v[46:47], -v[42:43], v[42:43], v[68:69] + s_setreg_b32 hwreg(mode, 2, 2), s66 + v_fma_f64 v[42:43], v[46:47], v[48:49], v[42:43] + v_cmpx_class_f64 s[14:15], v[68:69], s[68:69] + v_mov_b32 v68, v42 + v_mov_b32 v69, v43 + s_mov_b64 exec, 3 + s_setpc_b64 s[60:61] + +fsqrt_r_sub1: + s_setreg_b32 hwreg(mode, 2, 2), s67 + v_rsq_f64 v[28:29], v[70:71] + + # Improve initial approximation (can be skipped) + #v_mul_f64 v[42:43], v[28:29], v[70:71] + #v_mul_f64 v[48:49], v[28:29], -0.5 + #v_fma_f64 v[48:49], v[48:49], v[42:43], 0.5 + #v_fma_f64 v[28:29], v[28:29], v[48:49], v[28:29] + + v_mul_f64 v[42:43], v[28:29], v[70:71] + v_mov_b32 v48, v28 + v_sub_u32 v49, v29, v84 + v_mov_b32 v46, v28 + v_xor_b32 v47, v49, v82 + v_fma_f64 v[46:47], v[46:47], v[42:43], 0.5 + v_fma_f64 v[42:43], v[42:43], v[46:47], v[42:43] + v_fma_f64 v[48:49], v[48:49], v[46:47], v[48:49] + v_fma_f64 v[46:47], -v[42:43], v[42:43], v[70:71] + s_setreg_b32 hwreg(mode, 2, 2), s66 + v_fma_f64 v[42:43], v[46:47], v[48:49], v[42:43] + v_cmpx_class_f64 s[14:15], v[70:71], s[68:69] + v_mov_b32 v70, v42 + v_mov_b32 v71, v43 + s_mov_b64 exec, 3 + s_setpc_b64 s[60:61] + +fsqrt_r_sub2: + s_setreg_b32 hwreg(mode, 2, 2), s67 + v_rsq_f64 v[28:29], v[72:73] + + # Improve initial approximation (can be skipped) + #v_mul_f64 v[42:43], v[28:29], v[72:73] + #v_mul_f64 v[48:49], v[28:29], -0.5 + #v_fma_f64 v[48:49], v[48:49], v[42:43], 0.5 + #v_fma_f64 v[28:29], v[28:29], v[48:49], v[28:29] + + v_mul_f64 v[42:43], v[28:29], v[72:73] + v_mov_b32 v48, v28 + v_sub_u32 v49, v29, v84 + v_mov_b32 v46, v28 + v_xor_b32 v47, v49, v82 + v_fma_f64 v[46:47], v[46:47], v[42:43], 0.5 + v_fma_f64 v[42:43], v[42:43], v[46:47], v[42:43] + v_fma_f64 v[48:49], v[48:49], v[46:47], v[48:49] + v_fma_f64 v[46:47], -v[42:43], v[42:43], v[72:73] + s_setreg_b32 hwreg(mode, 2, 2), s66 + v_fma_f64 v[42:43], v[46:47], v[48:49], v[42:43] + v_cmpx_class_f64 s[14:15], v[72:73], s[68:69] + v_mov_b32 v72, v42 + v_mov_b32 v73, v43 + s_mov_b64 exec, 3 + s_setpc_b64 s[60:61] + +fsqrt_r_sub3: + s_setreg_b32 hwreg(mode, 2, 2), s67 + v_rsq_f64 v[28:29], v[74:75] + + # Improve initial approximation (can be skipped) + #v_mul_f64 v[42:43], v[28:29], v[74:75] + #v_mul_f64 v[48:49], v[28:29], -0.5 + #v_fma_f64 v[48:49], v[48:49], v[42:43], 0.5 + #v_fma_f64 v[28:29], v[28:29], v[48:49], v[28:29] + + v_mul_f64 v[42:43], v[28:29], v[74:75] + v_mov_b32 v48, v28 + v_sub_u32 v49, v29, v84 + v_mov_b32 v46, v28 + v_xor_b32 v47, v49, v82 + v_fma_f64 v[46:47], v[46:47], v[42:43], 0.5 + v_fma_f64 v[42:43], v[42:43], v[46:47], v[42:43] + v_fma_f64 v[48:49], v[48:49], v[46:47], v[48:49] + v_fma_f64 v[46:47], -v[42:43], v[42:43], v[74:75] + s_setreg_b32 hwreg(mode, 2, 2), s66 + v_fma_f64 v[42:43], v[46:47], v[48:49], v[42:43] + v_cmpx_class_f64 s[14:15], v[74:75], s[68:69] + v_mov_b32 v74, v42 + v_mov_b32 v75, v43 + s_mov_b64 exec, 3 + s_setpc_b64 s[60:61] + +fdiv_m_sub0: + v_or_b32 v28, v28, v78 + v_and_or_b32 v29, v29, v77, v79 + s_setreg_b32 hwreg(mode, 2, 2), s67 + v_rcp_f64 v[48:49], v[28:29] + v_fma_f64 v[80:81], -v[28:29], v[48:49], 1.0 + v_fma_f64 v[48:49], v[48:49], v[80:81], v[48:49] + v_mul_f64 v[80:81], v[68:69], v[48:49] + v_fma_f64 v[42:43], -v[28:29], v[80:81], v[68:69] + s_setreg_b32 hwreg(mode, 2, 2), s66 + v_fma_f64 v[42:43], v[42:43], v[48:49], v[80:81] + v_div_fixup_f64 v[80:81], v[42:43], v[28:29], v[68:69] + v_cmpx_eq_f64 s[14:15], v[68:69], v[28:29] + v_mov_b32 v80, 0 + v_mov_b32 v81, v83 + s_mov_b64 exec, 3 + v_mov_b32 v68, v80 + v_mov_b32 v69, v81 + s_setpc_b64 s[60:61] + +fdiv_m_sub1: + v_or_b32 v28, v28, v78 + v_and_or_b32 v29, v29, v77, v79 + s_setreg_b32 hwreg(mode, 2, 2), s67 + v_rcp_f64 v[48:49], v[28:29] + v_fma_f64 v[80:81], -v[28:29], v[48:49], 1.0 + v_fma_f64 v[48:49], v[48:49], v[80:81], v[48:49] + v_mul_f64 v[80:81], v[70:71], v[48:49] + v_fma_f64 v[42:43], -v[28:29], v[80:81], v[70:71] + s_setreg_b32 hwreg(mode, 2, 2), s66 + v_fma_f64 v[42:43], v[42:43], v[48:49], v[80:81] + v_div_fixup_f64 v[80:81], v[42:43], v[28:29], v[70:71] + v_cmpx_eq_f64 s[14:15], v[70:71], v[28:29] + v_mov_b32 v80, 0 + v_mov_b32 v81, v83 + s_mov_b64 exec, 3 + v_mov_b32 v70, v80 + v_mov_b32 v71, v81 + s_setpc_b64 s[60:61] + +fdiv_m_sub2: + v_or_b32 v28, v28, v78 + v_and_or_b32 v29, v29, v77, v79 + s_setreg_b32 hwreg(mode, 2, 2), s67 + v_rcp_f64 v[48:49], v[28:29] + v_fma_f64 v[80:81], -v[28:29], v[48:49], 1.0 + v_fma_f64 v[48:49], v[48:49], v[80:81], v[48:49] + v_mul_f64 v[80:81], v[72:73], v[48:49] + v_fma_f64 v[42:43], -v[28:29], v[80:81], v[72:73] + s_setreg_b32 hwreg(mode, 2, 2), s66 + v_fma_f64 v[42:43], v[42:43], v[48:49], v[80:81] + v_div_fixup_f64 v[80:81], v[42:43], v[28:29], v[72:73] + v_cmpx_eq_f64 s[14:15], v[72:73], v[28:29] + v_mov_b32 v80, 0 + v_mov_b32 v81, v83 + s_mov_b64 exec, 3 + v_mov_b32 v72, v80 + v_mov_b32 v73, v81 + s_setpc_b64 s[60:61] + +fdiv_m_sub3: + v_or_b32 v28, v28, v78 + v_and_or_b32 v29, v29, v77, v79 + s_setreg_b32 hwreg(mode, 2, 2), s67 + v_rcp_f64 v[48:49], v[28:29] + v_fma_f64 v[80:81], -v[28:29], v[48:49], 1.0 + v_fma_f64 v[48:49], v[48:49], v[80:81], v[48:49] + v_mul_f64 v[80:81], v[74:75], v[48:49] + v_fma_f64 v[42:43], -v[28:29], v[80:81], v[74:75] + s_setreg_b32 hwreg(mode, 2, 2), s66 + v_fma_f64 v[42:43], v[42:43], v[48:49], v[80:81] + v_div_fixup_f64 v[80:81], v[42:43], v[28:29], v[74:75] + v_cmpx_eq_f64 s[14:15], v[74:75], v[28:29] + v_mov_b32 v80, 0 + v_mov_b32 v81, v83 + s_mov_b64 exec, 3 + v_mov_b32 v74, v80 + v_mov_b32 v75, v81 + s_setpc_b64 s[60:61] + +ismulh_r_sub: + s_mov_b64 exec, 1 + v_mov_b32 v45, s14 + v_mul_hi_u32 v40, s38, v45 + v_mov_b32 v47, s15 + v_mad_u64_u32 v[42:43], s[32:33], s38, v47, v[40:41] + v_mov_b32 v40, v42 + v_mad_u64_u32 v[45:46], s[32:33], s39, v45, v[40:41] + v_mad_u64_u32 v[42:43], s[32:33], s39, v47, v[43:44] + v_add_co_u32 v42, vcc, v42, v46 + v_addc_co_u32 v43, vcc, 0, v43, vcc + v_readlane_b32 s32, v42, 0 + v_readlane_b32 s33, v43, 0 + s_cmp_lt_i32 s15, 0 + s_cselect_b64 s[34:35], s[38:39], 0 + s_sub_u32 s32, s32, s34 + s_subb_u32 s33, s33, s35 + s_cmp_lt_i32 s39, 0 + s_cselect_b64 s[34:35], s[14:15], 0 + s_sub_u32 s14, s32, s34 + s_subb_u32 s15, s33, s35 + s_mov_b64 exec, 3 + s_setpc_b64 s[60:61] + +imulh_r_sub: + s_mov_b64 exec, 1 + v_mov_b32 v45, s38 + v_mul_hi_u32 v40, s14, v45 + v_mov_b32 v47, s39 + v_mad_u64_u32 v[42:43], s[32:33], s14, v47, v[40:41] + v_mov_b32 v40, v42 + v_mad_u64_u32 v[45:46], s[32:33], s15, v45, v[40:41] + v_mad_u64_u32 v[42:43], s[32:33], s15, v47, v[43:44] + v_add_co_u32 v42, vcc, v42, v46 + v_addc_co_u32 v43, vcc, 0, v43, vcc + v_readlane_b32 s14, v42, 0 + v_readlane_b32 s15, v43, 0 + s_mov_b64 exec, 3 + s_setpc_b64 s[60:61] diff --git a/src/amd/opencl/RandomX/randomx_run_gfx900.h b/src/amd/opencl/RandomX/randomx_run_gfx900.h new file mode 100644 index 00000000..77ca6cca --- /dev/null +++ b/src/amd/opencl/RandomX/randomx_run_gfx900.h @@ -0,0 +1,215 @@ +/* +This file was auto-generated from randomx_run_gfx900.asm: + +clrxasm randomx_run_gfx900.asm -o randomx_run_gfx900.bin +bin2h -c randomx_run_gfx900_bin < randomx_run_gfx900.bin > randomx_run_gfx900.h + +clrxasm can be downloaded here: https://github.com/CLRX/CLRX-mirror/releases +bin2h can be downloaded here: http://www.deadnode.org/sw/bin2h/ +*/ + +static unsigned char randomx_run_gfx900_bin[]={ +0x7f,0x45,0x4c,0x46,0x02,0x01,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x5b,0xaf,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xa0,0x17,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x00,0x00,0x40,0x00,0x07,0x00,0x01,0x00,0x00 +,0x2e,0x73,0x68,0x73,0x74,0x72,0x74,0x61,0x62,0x00,0x2e,0x73,0x74,0x72,0x74,0x61,0x62,0x00,0x2e,0x73,0x79,0x6d,0x74,0x61,0x62,0x00,0x2e,0x63,0x6f,0x6d,0x6d,0x65 +,0x6e,0x74,0x00,0x2e,0x72,0x6f,0x64,0x61,0x74,0x61,0x00,0x2e,0x74,0x65,0x78,0x74,0x00,0x00,0x5f,0x5f,0x4f,0x70,0x65,0x6e,0x43,0x4c,0x5f,0x26,0x5f,0x5f,0x4f,0x70 +,0x65,0x6e,0x43,0x4c,0x5f,0x72,0x61,0x6e,0x64,0x6f,0x6d,0x78,0x5f,0x72,0x75,0x6e,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x5f,0x6d,0x65,0x74,0x61,0x64,0x61,0x74,0x61 +,0x00,0x61,0x63,0x6c,0x5f,0x76,0x65,0x72,0x73,0x69,0x6f,0x6e,0x5f,0x73,0x74,0x72,0x69,0x6e,0x67,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x38 +,0x07,0x00,0x00,0x00,0x00,0x00,0x00,0x30,0x00,0x00,0x00,0x01,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x27,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x41 +,0x4d,0x44,0x2d,0x43,0x4f,0x4d,0x50,0x2d,0x4c,0x49,0x42,0x2d,0x76,0x30,0x2e,0x38,0x20,0x28,0x30,0x2e,0x30,0x2e,0x53,0x43,0x5f,0x42,0x55,0x49,0x4c,0x44,0x5f,0x4e +,0x55,0x4d,0x42,0x45,0x52,0x29,0x10,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x38,0x07,0x00,0x00,0x00,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x68,0x00 +,0x00,0x00,0x24,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x40,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x15,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x0d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x06,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x5f,0x5f,0x4f,0x70,0x65,0x6e,0x43,0x4c,0x5f,0x64 +,0x75,0x6d,0x6d,0x79,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x00,0x47,0x46,0x58,0x39,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x05,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x0f,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x30,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x01,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00 +,0x00,0x00,0x40,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x11,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x50,0x00,0x00,0x00,0x05,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x06,0x00 +,0x00,0x00,0x04,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x0a,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x70,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x03,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00 +,0x00,0x00,0x80,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x09,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x05,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0e,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x90,0x00,0x00,0x00,0x07,0x00 +,0x00,0x00,0x04,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0xa0,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x08,0x00 +,0x00,0x00,0x04,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x0a,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0xb0,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x58,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0d,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00 +,0x00,0x00,0xc0,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x5f,0x2e,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x30,0x00,0x73,0x69,0x7a,0x65,0x5f,0x74,0x00,0x5f,0x2e,0x67,0x6c,0x6f +,0x62,0x61,0x6c,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x5f,0x31,0x00,0x73,0x69,0x7a,0x65,0x5f,0x74,0x00,0x5f,0x2e,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x5f,0x6f,0x66,0x66 +,0x73,0x65,0x74,0x5f,0x32,0x00,0x73,0x69,0x7a,0x65,0x5f,0x74,0x00,0x5f,0x2e,0x70,0x72,0x69,0x6e,0x74,0x66,0x5f,0x62,0x75,0x66,0x66,0x65,0x72,0x00,0x73,0x69,0x7a +,0x65,0x5f,0x74,0x00,0x5f,0x2e,0x76,0x71,0x75,0x65,0x75,0x65,0x5f,0x70,0x6f,0x69,0x6e,0x74,0x65,0x72,0x00,0x73,0x69,0x7a,0x65,0x5f,0x74,0x00,0x5f,0x2e,0x61,0x71 +,0x6c,0x77,0x72,0x61,0x70,0x5f,0x70,0x6f,0x69,0x6e,0x74,0x65,0x72,0x00,0x73,0x69,0x7a,0x65,0x5f,0x74,0x00,0x64,0x61,0x74,0x61,0x73,0x65,0x74,0x00,0x75,0x63,0x68 +,0x61,0x72,0x2a,0x00,0x73,0x63,0x72,0x61,0x74,0x63,0x68,0x70,0x61,0x64,0x00,0x75,0x63,0x68,0x61,0x72,0x2a,0x00,0x72,0x65,0x67,0x69,0x73,0x74,0x65,0x72,0x73,0x00 +,0x75,0x6c,0x6f,0x6e,0x67,0x2a,0x00,0x72,0x6f,0x75,0x6e,0x64,0x69,0x6e,0x67,0x5f,0x6d,0x6f,0x64,0x65,0x73,0x00,0x75,0x69,0x6e,0x74,0x2a,0x00,0x70,0x72,0x6f,0x67 +,0x72,0x61,0x6d,0x73,0x00,0x75,0x69,0x6e,0x74,0x2a,0x00,0x62,0x61,0x74,0x63,0x68,0x5f,0x73,0x69,0x7a,0x65,0x00,0x75,0x69,0x6e,0x74,0x00,0x72,0x78,0x5f,0x70,0x61 +,0x72,0x61,0x6d,0x65,0x74,0x65,0x72,0x73,0x00,0x75,0x69,0x6e,0x74,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x7f,0x45 +,0x4c,0x46,0x02,0x01,0x01,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0xe0,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x40,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0xc0,0x0d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x40,0x00,0x38,0x00,0x01,0x00,0x40,0x00,0x06,0x00,0x01,0x00,0x03,0x00 +,0x00,0x60,0x05,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x74,0x0b +,0x00,0x00,0x00,0x00,0x00,0x00,0x74,0x0b,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2e,0x73,0x68,0x73,0x74,0x72,0x74,0x61,0x62 +,0x00,0x2e,0x73,0x74,0x72,0x74,0x61,0x62,0x00,0x2e,0x6e,0x6f,0x74,0x65,0x00,0x2e,0x68,0x73,0x61,0x74,0x65,0x78,0x74,0x00,0x2e,0x73,0x79,0x6d,0x74,0x61,0x62,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x26,0x5f,0x5f,0x4f,0x70,0x65,0x6e,0x43,0x4c,0x5f,0x72,0x61,0x6e,0x64,0x6f,0x6d,0x78,0x5f,0x72,0x75,0x6e,0x5f,0x6b,0x65,0x72 +,0x6e,0x65,0x6c,0x00,0x5f,0x5f,0x68,0x73,0x61,0x5f,0x73,0x65,0x63,0x74,0x69,0x6f,0x6e,0x2e,0x68,0x73,0x61,0x74,0x65,0x78,0x74,0x00,0x00,0x00,0x00,0x00,0x04,0x00 +,0x00,0x00,0x08,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x41,0x4d,0x44,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x0c,0x00,0x00,0x00,0x02,0x00 +,0x00,0x00,0x41,0x4d,0x44,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x01,0x01,0x00,0x04,0x00,0x00,0x00,0x1a,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x41,0x4d +,0x44,0x00,0x04,0x00,0x07,0x00,0x09,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x4d,0x44,0x00,0x41,0x4d,0x44,0x47,0x50,0x55,0x00,0x00,0x04,0x00 +,0x00,0x00,0x29,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x41,0x4d,0x44,0x00,0x19,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x41,0x4d,0x44,0x20,0x48,0x53 +,0x41,0x20,0x52,0x75,0x6e,0x74,0x69,0x6d,0x65,0x20,0x46,0x69,0x6e,0x61,0x6c,0x69,0x7a,0x65,0x72,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00,0x1a,0x00 +,0x00,0x00,0x05,0x00,0x00,0x00,0x41,0x4d,0x44,0x00,0x16,0x00,0x2d,0x68,0x73,0x61,0x5f,0x63,0x61,0x6c,0x6c,0x5f,0x63,0x6f,0x6e,0x76,0x65,0x6e,0x74,0x69,0x6f,0x6e +,0x3d,0x30,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00 +,0x00,0x00,0x01,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x5f,0x03,0xac,0x00,0x90,0x00,0x00,0x00,0x29,0x00,0x0a,0x00,0x00,0x00,0x00,0x00,0x00,0x01 +,0x00,0x00,0x00,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x62,0x00,0x80,0x00,0x80,0x00,0x00,0x00,0x60,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x04,0x04,0x04,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x00 +,0xfc,0xbe,0x00,0x00,0x01,0x00,0x00,0x00,0x84,0xc0,0x00,0x00,0x00,0x00,0x70,0x00,0x8c,0xbf,0x00,0x00,0x93,0xbf,0x00,0x00,0x82,0xbf,0x01,0x00,0xfd,0xd1,0x08,0x0c +,0x01,0x04,0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00,0x82,0x00,0x06,0xc0,0x40,0x00,0x00,0x00,0x02,0x10,0x06,0xc0,0x48,0x00,0x00,0x00,0x7f,0xc0,0x8c,0xbf,0x08,0x82 +,0x10,0x8e,0x40,0x10,0x40,0x80,0x41,0x80,0x41,0x82,0x80,0x02,0x10,0x7e,0x00,0x80,0x50,0xdc,0x08,0x00,0x40,0x08,0x70,0x0f,0x8c,0xbf,0x42,0x00,0x89,0xd2,0x08,0x01 +,0x01,0x00,0x81,0x08,0x42,0xb9,0x80,0x00,0xc3,0xbe,0xff,0x00,0xc4,0xbe,0x00,0x01,0x00,0x00,0x80,0x00,0xc5,0xbe,0x00,0x02,0x02,0x68,0x86,0x02,0x04,0x20,0x85,0x04 +,0x06,0x24,0xbf,0x02,0x02,0x26,0x80,0x02,0x08,0x7e,0x03,0x00,0x8f,0xd2,0x83,0x06,0x02,0x00,0x84,0x02,0x0a,0x24,0x02,0x06,0x06,0x32,0x03,0x02,0x0c,0x7e,0x06,0x09 +,0x08,0x38,0x82,0x02,0x52,0x24,0x03,0x53,0x0c,0x32,0x07,0x6a,0x1c,0xd1,0x04,0x01,0xa9,0x01,0x00,0x80,0x50,0xdc,0x06,0x00,0x7f,0x06,0x80,0x02,0x00,0x7e,0x70,0x0f +,0x8c,0xbf,0x00,0x00,0x1a,0xd8,0x29,0x06,0x00,0x00,0x7f,0xc0,0x8c,0xbf,0x7e,0x01,0x80,0xbe,0x02,0x00,0xdb,0xd0,0x01,0x0f,0x01,0x00,0x68,0x01,0x88,0xbf,0x02,0x05 +,0x02,0xc0,0x5c,0x00,0x00,0x00,0x7f,0xc0,0x8c,0xbf,0x14,0xff,0x95,0x92,0x00,0x00,0x05,0x00,0x81,0x15,0x15,0x8e,0x14,0xff,0x96,0x92,0x05,0x00,0x05,0x00,0x81,0x16 +,0x16,0x8e,0x14,0xff,0x97,0x92,0x0a,0x00,0x05,0x00,0x81,0x17,0x17,0x8e,0x14,0xff,0x98,0x92,0x0f,0x00,0x04,0x00,0x81,0x18,0x18,0x8e,0x17,0xc0,0x02,0x80,0x14,0x00 +,0x86,0xd2,0x02,0x05,0x00,0x00,0x02,0x00,0x85,0xd2,0x02,0x05,0x00,0x00,0x80,0x02,0x52,0x7e,0x80,0x02,0x58,0x7e,0x98,0x00,0x6c,0xd8,0x00,0x00,0x00,0x06,0x02,0x00 +,0xc9,0xd0,0x01,0x09,0x01,0x00,0x12,0x10,0xee,0xd8,0x00,0x00,0x00,0x22,0x88,0x00,0xec,0xd8,0x00,0x00,0x00,0x0b,0x00,0x00,0x09,0xb0,0x7e,0x01,0x86,0xbe,0x06,0x02 +,0xfe,0x89,0xa0,0x00,0xec,0xd8,0x00,0x00,0x00,0x0d,0x06,0x7e,0xfe,0x89,0x80,0x02,0x1a,0x7e,0x80,0x02,0x1c,0x7e,0x06,0x01,0xfe,0xbe,0x08,0x01,0x86,0xbe,0x40,0x27 +,0x86,0xb7,0x05,0x00,0xff,0xd1,0x00,0x0b,0x02,0x03,0x7e,0x01,0x88,0xbe,0x08,0x02,0xfe,0x89,0xa8,0x00,0xec,0xd8,0x00,0x00,0x00,0x0f,0x08,0x7e,0xfe,0x89,0x80,0x02 +,0x1e,0x7e,0x80,0x02,0x20,0x7e,0x08,0x01,0xfe,0xbe,0x02,0x02,0x0a,0xc0,0x30,0x00,0x00,0x00,0x02,0x04,0x02,0xc0,0x58,0x00,0x00,0x00,0x02,0x01,0x06,0xc0,0x50,0x00 +,0x00,0x00,0x83,0x02,0x02,0x24,0x00,0x03,0x22,0x68,0x7f,0xc0,0x8c,0xbf,0x0a,0x04,0x04,0x32,0x0b,0x02,0x24,0x7e,0x12,0x29,0x24,0x38,0xff,0x02,0x26,0x7e,0xff,0xff +,0xff,0x00,0x08,0x0c,0x0c,0x32,0x09,0x02,0x28,0x7e,0x14,0x6a,0x1c,0xd1,0x14,0x01,0xa9,0x01,0x00,0x00,0xec,0xd8,0x11,0x00,0x00,0x15,0x04,0x06,0x04,0x80,0x05,0x07 +,0x05,0x82,0x13,0x00,0x00,0xd1,0x13,0x83,0x09,0x00,0x08,0x00,0xfd,0xd1,0x23,0x07,0x01,0x04,0x07,0x00,0xfd,0xd1,0x22,0x07,0x01,0x04,0x0c,0x00,0xfd,0xd1,0x0c,0x07 +,0x01,0x04,0x00,0x00,0xfd,0xd1,0x0b,0x07,0x01,0x04,0x24,0x03,0x14,0x7e,0x25,0x03,0x2e,0x7e,0x18,0x81,0x82,0x80,0x10,0x00,0x83,0xbe,0x26,0x00,0x35,0xd1,0x15,0x10 +,0x01,0x00,0x27,0x00,0x35,0xd1,0x16,0x10,0x01,0x00,0x32,0x00,0x35,0xd1,0x17,0x10,0x01,0x00,0xff,0x02,0x66,0x7e,0x00,0x00,0xf0,0x80,0x00,0x00,0x89,0xd2,0x02,0x01 +,0x01,0x00,0x01,0x00,0x89,0xd2,0x12,0x01,0x01,0x00,0x7e,0x01,0xa4,0xbe,0x82,0x01,0xfe,0xbe,0x88,0x02,0x52,0x7e,0x84,0x02,0x58,0x7e,0x83,0x01,0xfe,0xbe,0x18,0x1a +,0xee,0xd8,0x29,0x00,0x00,0x34,0x1c,0x1e,0xee,0xd8,0x29,0x00,0x00,0x38,0xff,0x02,0x9a,0x7e,0xff,0xff,0xff,0x00,0xa0,0x00,0xec,0xd8,0x29,0x00,0x00,0x4e,0x24,0x01 +,0xfe,0xbe,0xff,0x02,0xa4,0x7e,0x00,0x00,0x00,0x80,0xff,0x02,0xa6,0x7e,0x00,0x00,0xf0,0x3f,0xff,0x02,0xa8,0x7e,0x00,0x00,0x10,0x00,0x00,0x1c,0x8e,0xbe,0x0e,0xff +,0x28,0x80,0xc4,0x03,0x00,0x00,0x0f,0x80,0x29,0x82,0x0e,0xff,0x2a,0x80,0x28,0x04,0x00,0x00,0x0f,0x80,0x2b,0x82,0x0e,0xff,0x2c,0x80,0x8c,0x04,0x00,0x00,0x0f,0x80 +,0x2d,0x82,0x0e,0xff,0x2e,0x80,0xf0,0x04,0x00,0x00,0x0f,0x80,0x2f,0x82,0x0e,0xff,0x30,0x80,0x54,0x05,0x00,0x00,0x0f,0x80,0x31,0x82,0x0e,0xff,0x32,0x80,0xbc,0x05 +,0x00,0x00,0x0f,0x80,0x33,0x82,0x0e,0xff,0x34,0x80,0x24,0x06,0x00,0x00,0x0f,0x80,0x35,0x82,0x0e,0xff,0x36,0x80,0x8c,0x06,0x00,0x00,0x0f,0x80,0x37,0x82,0x0e,0xff +,0x38,0x80,0xf4,0x06,0x00,0x00,0x0f,0x80,0x39,0x82,0x0e,0xff,0x3a,0x80,0x64,0x07,0x00,0x00,0x0f,0x80,0x3b,0x82,0xc1,0x00,0xbf,0xbe,0xff,0x00,0xc6,0xbe,0x00,0xff +,0x00,0x00,0xff,0x00,0xc7,0xbe,0x00,0xfe,0x01,0x00,0xff,0x00,0xc8,0xbe,0x00,0xfc,0x03,0x00,0xff,0x00,0xc9,0xbe,0x00,0xf8,0x07,0x00,0xff,0x00,0xca,0xbe,0x00,0xf0 +,0x0f,0x00,0xff,0x00,0xcb,0xbe,0x00,0xe0,0x1f,0x00,0xff,0x00,0xcc,0xbe,0x00,0xc0,0x3f,0x00,0xff,0x00,0xcd,0xbe,0x00,0x80,0x7f,0x00,0xff,0x00,0xce,0xbe,0x00,0x00 +,0xff,0x00,0xff,0x00,0xcf,0xbe,0x00,0x00,0xfe,0x01,0xff,0x00,0xd0,0xbe,0x00,0x00,0xfc,0x03,0xff,0x00,0xd1,0xbe,0x00,0x00,0xf8,0x07,0xff,0x00,0xd2,0xbe,0x00,0x00 +,0xf0,0x0f,0xff,0x00,0xd3,0xbe,0x00,0x00,0xe0,0x1f,0xff,0x00,0xd4,0xbe,0x00,0x00,0xc0,0x3f,0xff,0x00,0xd5,0xbe,0x00,0x00,0x80,0x7f,0x17,0xc0,0xd6,0x80,0x00,0x00 +,0xec,0xd8,0x00,0x00,0x00,0x18,0x00,0x00,0xec,0xd8,0x0c,0x00,0x00,0x1a,0x7f,0xc0,0x8c,0xbf,0x1b,0x33,0x32,0x2a,0x1a,0x31,0x30,0x2a,0x19,0x15,0x14,0x2a,0x18,0x2f +,0x2e,0x2a,0x56,0x14,0x14,0x26,0x56,0x2e,0x2e,0x26,0x0a,0x03,0x14,0x68,0x17,0x03,0x2e,0x68,0x02,0x15,0x34,0x32,0x1b,0x6a,0x1c,0xd1,0x12,0x01,0xa9,0x01,0x02,0x2f +,0x2e,0x32,0x18,0x6a,0x1c,0xd1,0x12,0x01,0xa9,0x01,0x00,0x80,0x54,0xdc,0x1a,0x00,0x7f,0x1c,0x00,0x80,0x54,0xdc,0x17,0x00,0x7f,0x1e,0x71,0x0f,0x8c,0xbf,0x1c,0x09 +,0x40,0x7e,0x1d,0x09,0x38,0x7e,0x70,0x0f,0x8c,0xbf,0x15,0x3d,0x44,0x2a,0x16,0x3f,0x46,0x2a,0x06,0x49,0x2c,0x32,0x19,0x6a,0x1c,0xd1,0x14,0x01,0xa9,0x01,0x20,0x1b +,0x3c,0x28,0x1f,0x00,0x01,0xd2,0x21,0x27,0x3a,0x04,0x1c,0x1f,0x38,0x28,0x1d,0x00,0x01,0xd2,0x1d,0x27,0x42,0x04,0x16,0x03,0x2a,0x32,0x16,0x6a,0x1c,0xd1,0x19,0x01 +,0xa9,0x01,0x00,0x01,0x9c,0xd8,0x05,0x1e,0x1c,0x00,0x7f,0xc0,0x8c,0xbf,0x83,0x01,0xfe,0xbe,0x08,0x0a,0xee,0xd8,0x29,0x00,0x00,0x3c,0x0c,0x0e,0xee,0xd8,0x29,0x00 +,0x00,0x40,0x10,0x12,0xee,0xd8,0x29,0x00,0x00,0x44,0x14,0x16,0xee,0xd8,0x29,0x00,0x00,0x48,0x10,0x00,0x89,0xd2,0x22,0x01,0x01,0x00,0x11,0x00,0x89,0xd2,0x23,0x01 +,0x01,0x00,0x12,0x00,0x89,0xd2,0x22,0x03,0x01,0x00,0x13,0x00,0x89,0xd2,0x23,0x03,0x01,0x00,0x14,0x00,0x89,0xd2,0x22,0x05,0x01,0x00,0x15,0x00,0x89,0xd2,0x23,0x05 +,0x01,0x00,0x16,0x00,0x89,0xd2,0x22,0x07,0x01,0x00,0x17,0x00,0x89,0xd2,0x23,0x07,0x01,0x00,0x18,0x00,0x89,0xd2,0x22,0x09,0x01,0x00,0x19,0x00,0x89,0xd2,0x23,0x09 +,0x01,0x00,0x1a,0x00,0x89,0xd2,0x22,0x0b,0x01,0x00,0x1b,0x00,0x89,0xd2,0x23,0x0b,0x01,0x00,0x1c,0x00,0x89,0xd2,0x22,0x0d,0x01,0x00,0x1d,0x00,0x89,0xd2,0x23,0x0d +,0x01,0x00,0x1e,0x00,0x89,0xd2,0x22,0x0f,0x01,0x00,0x1f,0x00,0x89,0xd2,0x23,0x0f,0x01,0x00,0x7f,0xc0,0x8c,0xbf,0x04,0x1e,0x8c,0xbe,0x1c,0x00,0x8a,0xd2,0x10,0x00 +,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x11,0x00,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x12,0x02,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x13,0x02,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x14,0x04 +,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x15,0x04,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x16,0x06,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x17,0x06,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x18,0x08 +,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x19,0x08,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x1a,0x0a,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x1b,0x0a,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x1c,0x0c +,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x1d,0x0c,0x01,0x00,0x1c,0x00,0x8a,0xd2,0x1e,0x0e,0x01,0x00,0x1d,0x00,0x8a,0xd2,0x1f,0x0e,0x01,0x00,0x08,0x0a,0x9c,0xd8,0x29,0x3c +,0x3e,0x00,0x0c,0x0e,0x9c,0xd8,0x29,0x40,0x42,0x00,0x10,0x12,0x9c,0xd8,0x29,0x44,0x46,0x00,0x14,0x16,0x9c,0xd8,0x29,0x48,0x4a,0x00,0x24,0x01,0xfe,0xbe,0x00,0x00 +,0x9a,0xd8,0x11,0x1c,0x00,0x00,0x00,0x80,0x54,0xdc,0x15,0x00,0x7f,0x15,0x70,0x00,0x8c,0xbf,0x1c,0x2b,0x2a,0x2a,0x1d,0x2d,0x2c,0x2a,0x00,0x00,0x6c,0xd8,0x07,0x00 +,0x00,0x1c,0x00,0x00,0x6c,0xd8,0x08,0x00,0x00,0x1d,0x00,0x00,0x9a,0xd8,0x11,0x15,0x00,0x00,0x7f,0xc1,0x8c,0xbf,0x08,0x10,0xee,0xd8,0x11,0x00,0x00,0x1e,0x1c,0x4b +,0x14,0x2a,0x7f,0xc0,0x8c,0xbf,0x20,0x3d,0x3c,0x2a,0x21,0x3f,0x3e,0x2a,0x0a,0x3b,0x14,0x2a,0x00,0x80,0x74,0xdc,0x1a,0x15,0x7f,0x00,0xff,0x14,0x14,0x26,0xc0,0xff +,0xff,0x7f,0x00,0x80,0x74,0xdc,0x17,0x1e,0x7f,0x00,0x02,0x80,0x06,0xbf,0x06,0x00,0x85,0xbf,0x02,0x81,0x82,0x81,0x24,0x03,0x4a,0x7e,0x80,0x02,0x2e,0x7e,0x0a,0x03 +,0x48,0x7e,0x80,0x02,0x14,0x7e,0x5d,0xff,0x82,0xbf,0x03,0x03,0x00,0x32,0x01,0x6a,0x1c,0xd1,0x04,0x01,0xa9,0x01,0x00,0x80,0x74,0xdc,0x00,0x15,0x7f,0x00,0x40,0x80 +,0x74,0xdc,0x00,0x1e,0x7f,0x00,0x80,0x80,0x74,0xdc,0x00,0x20,0x7f,0x00,0x80,0x02,0x00,0x7e,0x42,0x02,0x02,0x7e,0x00,0x80,0x70,0xdc,0x00,0x01,0x40,0x00,0x00,0x00 +,0x81,0xbf,0x81,0x08,0x43,0xb9,0x44,0x4d,0x38,0x7e,0x2a,0x00,0x81,0xd2,0x1c,0x89,0x02,0x00,0x1c,0x03,0x60,0x7e,0x1d,0xa9,0x62,0x6a,0x1c,0x03,0x5c,0x7e,0x31,0xa5 +,0x5e,0x2a,0x2e,0x00,0xcc,0xd1,0x2e,0x55,0xc2,0x03,0x2a,0x00,0xcc,0xd1,0x2a,0x5d,0xaa,0x04,0x30,0x00,0xcc,0xd1,0x30,0x5d,0xc2,0x04,0x2e,0x00,0xcc,0xd1,0x2a,0x55 +,0x12,0x25,0x81,0x08,0x42,0xb9,0x2a,0x00,0xcc,0xd1,0x2e,0x61,0xaa,0x04,0x0e,0x00,0x13,0xd0,0x44,0x89,0x00,0x00,0x2a,0x03,0x88,0x7e,0x2b,0x03,0x8a,0x7e,0x83,0x01 +,0xfe,0xbe,0x3c,0x1d,0x80,0xbe,0x81,0x08,0x43,0xb9,0x46,0x4d,0x38,0x7e,0x2a,0x00,0x81,0xd2,0x1c,0x8d,0x02,0x00,0x1c,0x03,0x60,0x7e,0x1d,0xa9,0x62,0x6a,0x1c,0x03 +,0x5c,0x7e,0x31,0xa5,0x5e,0x2a,0x2e,0x00,0xcc,0xd1,0x2e,0x55,0xc2,0x03,0x2a,0x00,0xcc,0xd1,0x2a,0x5d,0xaa,0x04,0x30,0x00,0xcc,0xd1,0x30,0x5d,0xc2,0x04,0x2e,0x00 +,0xcc,0xd1,0x2a,0x55,0x1a,0x25,0x81,0x08,0x42,0xb9,0x2a,0x00,0xcc,0xd1,0x2e,0x61,0xaa,0x04,0x0e,0x00,0x13,0xd0,0x46,0x89,0x00,0x00,0x2a,0x03,0x8c,0x7e,0x2b,0x03 +,0x8e,0x7e,0x83,0x01,0xfe,0xbe,0x3c,0x1d,0x80,0xbe,0x81,0x08,0x43,0xb9,0x48,0x4d,0x38,0x7e,0x2a,0x00,0x81,0xd2,0x1c,0x91,0x02,0x00,0x1c,0x03,0x60,0x7e,0x1d,0xa9 +,0x62,0x6a,0x1c,0x03,0x5c,0x7e,0x31,0xa5,0x5e,0x2a,0x2e,0x00,0xcc,0xd1,0x2e,0x55,0xc2,0x03,0x2a,0x00,0xcc,0xd1,0x2a,0x5d,0xaa,0x04,0x30,0x00,0xcc,0xd1,0x30,0x5d +,0xc2,0x04,0x2e,0x00,0xcc,0xd1,0x2a,0x55,0x22,0x25,0x81,0x08,0x42,0xb9,0x2a,0x00,0xcc,0xd1,0x2e,0x61,0xaa,0x04,0x0e,0x00,0x13,0xd0,0x48,0x89,0x00,0x00,0x2a,0x03 +,0x90,0x7e,0x2b,0x03,0x92,0x7e,0x83,0x01,0xfe,0xbe,0x3c,0x1d,0x80,0xbe,0x81,0x08,0x43,0xb9,0x4a,0x4d,0x38,0x7e,0x2a,0x00,0x81,0xd2,0x1c,0x95,0x02,0x00,0x1c,0x03 +,0x60,0x7e,0x1d,0xa9,0x62,0x6a,0x1c,0x03,0x5c,0x7e,0x31,0xa5,0x5e,0x2a,0x2e,0x00,0xcc,0xd1,0x2e,0x55,0xc2,0x03,0x2a,0x00,0xcc,0xd1,0x2a,0x5d,0xaa,0x04,0x30,0x00 +,0xcc,0xd1,0x30,0x5d,0xc2,0x04,0x2e,0x00,0xcc,0xd1,0x2a,0x55,0x2a,0x25,0x81,0x08,0x42,0xb9,0x2a,0x00,0xcc,0xd1,0x2e,0x61,0xaa,0x04,0x0e,0x00,0x13,0xd0,0x4a,0x89 +,0x00,0x00,0x2a,0x03,0x94,0x7e,0x2b,0x03,0x96,0x7e,0x83,0x01,0xfe,0xbe,0x3c,0x1d,0x80,0xbe,0x1c,0x9d,0x38,0x28,0x1d,0x00,0x01,0xd2,0x1d,0x9b,0x3e,0x05,0x81,0x08 +,0x43,0xb9,0x1c,0x4b,0x60,0x7e,0x50,0x00,0xcc,0xd1,0x1c,0x61,0xca,0x23,0x30,0x00,0xcc,0xd1,0x30,0xa1,0xc2,0x04,0x50,0x00,0x81,0xd2,0x44,0x61,0x02,0x00,0x2a,0x00 +,0xcc,0xd1,0x1c,0xa1,0x12,0x25,0x81,0x08,0x42,0xb9,0x2a,0x00,0xcc,0xd1,0x2a,0x61,0x42,0x05,0x50,0x00,0xdf,0xd1,0x2a,0x39,0x12,0x05,0x0e,0x00,0x72,0xd0,0x44,0x39 +,0x02,0x00,0x80,0x02,0xa0,0x7e,0x53,0x03,0xa2,0x7e,0x83,0x01,0xfe,0xbe,0x50,0x03,0x88,0x7e,0x51,0x03,0x8a,0x7e,0x3c,0x1d,0x80,0xbe,0x1c,0x9d,0x38,0x28,0x1d,0x00 +,0x01,0xd2,0x1d,0x9b,0x3e,0x05,0x81,0x08,0x43,0xb9,0x1c,0x4b,0x60,0x7e,0x50,0x00,0xcc,0xd1,0x1c,0x61,0xca,0x23,0x30,0x00,0xcc,0xd1,0x30,0xa1,0xc2,0x04,0x50,0x00 +,0x81,0xd2,0x46,0x61,0x02,0x00,0x2a,0x00,0xcc,0xd1,0x1c,0xa1,0x1a,0x25,0x81,0x08,0x42,0xb9,0x2a,0x00,0xcc,0xd1,0x2a,0x61,0x42,0x05,0x50,0x00,0xdf,0xd1,0x2a,0x39 +,0x1a,0x05,0x0e,0x00,0x72,0xd0,0x46,0x39,0x02,0x00,0x80,0x02,0xa0,0x7e,0x53,0x03,0xa2,0x7e,0x83,0x01,0xfe,0xbe,0x50,0x03,0x8c,0x7e,0x51,0x03,0x8e,0x7e,0x3c,0x1d +,0x80,0xbe,0x1c,0x9d,0x38,0x28,0x1d,0x00,0x01,0xd2,0x1d,0x9b,0x3e,0x05,0x81,0x08,0x43,0xb9,0x1c,0x4b,0x60,0x7e,0x50,0x00,0xcc,0xd1,0x1c,0x61,0xca,0x23,0x30,0x00 +,0xcc,0xd1,0x30,0xa1,0xc2,0x04,0x50,0x00,0x81,0xd2,0x48,0x61,0x02,0x00,0x2a,0x00,0xcc,0xd1,0x1c,0xa1,0x22,0x25,0x81,0x08,0x42,0xb9,0x2a,0x00,0xcc,0xd1,0x2a,0x61 +,0x42,0x05,0x50,0x00,0xdf,0xd1,0x2a,0x39,0x22,0x05,0x0e,0x00,0x72,0xd0,0x48,0x39,0x02,0x00,0x80,0x02,0xa0,0x7e,0x53,0x03,0xa2,0x7e,0x83,0x01,0xfe,0xbe,0x50,0x03 +,0x90,0x7e,0x51,0x03,0x92,0x7e,0x3c,0x1d,0x80,0xbe,0x1c,0x9d,0x38,0x28,0x1d,0x00,0x01,0xd2,0x1d,0x9b,0x3e,0x05,0x81,0x08,0x43,0xb9,0x1c,0x4b,0x60,0x7e,0x50,0x00 +,0xcc,0xd1,0x1c,0x61,0xca,0x23,0x30,0x00,0xcc,0xd1,0x30,0xa1,0xc2,0x04,0x50,0x00,0x81,0xd2,0x4a,0x61,0x02,0x00,0x2a,0x00,0xcc,0xd1,0x1c,0xa1,0x2a,0x25,0x81,0x08 +,0x42,0xb9,0x2a,0x00,0xcc,0xd1,0x2a,0x61,0x42,0x05,0x50,0x00,0xdf,0xd1,0x2a,0x39,0x2a,0x05,0x0e,0x00,0x72,0xd0,0x4a,0x39,0x02,0x00,0x80,0x02,0xa0,0x7e,0x53,0x03 +,0xa2,0x7e,0x83,0x01,0xfe,0xbe,0x50,0x03,0x94,0x7e,0x51,0x03,0x96,0x7e,0x3c,0x1d,0x80,0xbe,0x81,0x01,0xfe,0xbe,0x0e,0x02,0x5a,0x7e,0x28,0x00,0x86,0xd2,0x26,0x5a +,0x02,0x00,0x0f,0x02,0x5e,0x7e,0x2a,0x20,0xe8,0xd1,0x26,0x5e,0xa2,0x04,0x2a,0x03,0x50,0x7e,0x2d,0x20,0xe8,0xd1,0x27,0x5a,0xa2,0x04,0x2a,0x20,0xe8,0xd1,0x27,0x5e +,0xae,0x04,0x2a,0x5d,0x54,0x32,0x80,0x56,0x56,0x38,0x20,0x00,0x89,0xd2,0x2a,0x01,0x01,0x00,0x21,0x00,0x89,0xd2,0x2b,0x01,0x01,0x00,0x0f,0x80,0x04,0xbf,0x26,0x80 +,0xa2,0x85,0x20,0x22,0xa0,0x80,0x21,0x23,0xa1,0x82,0x27,0x80,0x04,0xbf,0x0e,0x80,0xa2,0x85,0x20,0x22,0x8e,0x80,0x21,0x23,0x8f,0x82,0x83,0x01,0xfe,0xbe,0x3c,0x1d +,0x80,0xbe,0x81,0x01,0xfe,0xbe,0x26,0x02,0x5a,0x7e,0x28,0x00,0x86,0xd2,0x0e,0x5a,0x02,0x00,0x27,0x02,0x5e,0x7e,0x2a,0x20,0xe8,0xd1,0x0e,0x5e,0xa2,0x04,0x2a,0x03 +,0x50,0x7e,0x2d,0x20,0xe8,0xd1,0x0f,0x5a,0xa2,0x04,0x2a,0x20,0xe8,0xd1,0x0f,0x5e,0xae,0x04,0x2a,0x5d,0x54,0x32,0x80,0x56,0x56,0x38,0x0e,0x00,0x89,0xd2,0x2a,0x01 +,0x01,0x00,0x0f,0x00,0x89,0xd2,0x2b,0x01,0x01,0x00,0x83,0x01,0xfe,0xbe,0x3c,0x1d,0x80,0xbe,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x1a,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x74,0x0b +,0x00,0x00,0x00,0x00,0x00,0x00,0x1e,0x00,0x00,0x00,0x03,0x00,0x04,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00 +,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x78,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2a,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0b,0x00 +,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xa8,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x34,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x13,0x00 +,0x00,0x00,0x07,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xe0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xc8,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x19,0x00 +,0x00,0x00,0x01,0x00,0x00,0x00,0x07,0x00,0xc0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x74,0x0b +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x22,0x00 +,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x78,0x0d,0x00,0x00,0x00,0x00,0x00,0x00,0x48,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01 +,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x32 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x0b +,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x72,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x43 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x13 +,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xb8,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x48 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x18,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x1b +,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x27 +,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x24 +,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x27,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x38 +,0x07,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2c +,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x5f,0x08,0x00,0x00,0x00,0x00,0x00,0x00,0x40 +,0x0f,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; +static const int randomx_run_gfx900_bin_size=6496; diff --git a/src/base/net/Pool.cpp b/src/base/net/Pool.cpp index 9184dffe..8071d4bc 100644 --- a/src/base/net/Pool.cpp +++ b/src/base/net/Pool.cpp @@ -503,6 +503,7 @@ void xmrig::Pool::rebuild() # ifdef XMRIG_ALGO_RANDOMX addVariant(VARIANT_RX_WOW); addVariant(VARIANT_RX_LOKI); + addVariant(VARIANT_RX_0); # endif addVariant(VARIANT_AUTO); # endif diff --git a/src/common/crypto/Algorithm.cpp b/src/common/crypto/Algorithm.cpp index 3d72be76..6befc88b 100644 --- a/src/common/crypto/Algorithm.cpp +++ b/src/common/crypto/Algorithm.cpp @@ -72,6 +72,7 @@ static AlgoData const algorithms[] = { { "randomx/wow", "rx/wow", xmrig::RANDOM_X, xmrig::VARIANT_RX_WOW }, { "randomx/loki", "rx/loki", xmrig::RANDOM_X, xmrig::VARIANT_RX_LOKI }, + { "randomx/0", "rx/0", xmrig::RANDOM_X, xmrig::VARIANT_RX_0 }, # ifndef XMRIG_NO_AEON { "cryptonight-lite", "cn-lite", xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_AUTO }, @@ -144,6 +145,7 @@ static const char *variants[] = { "double", "rx/wow", "rx/loki", + "rx/0", }; diff --git a/src/common/xmrig.h b/src/common/xmrig.h index cbfe330e..6bf29c8a 100644 --- a/src/common/xmrig.h +++ b/src/common/xmrig.h @@ -82,6 +82,7 @@ enum Variant { VARIANT_DOUBLE = 16, // CryptoNight variant 2 with double iterations (X-CASH) VARIANT_RX_WOW = 17, // RandomX (Wownero) VARIANT_RX_LOKI = 18, // RandomX (Loki) + VARIANT_RX_0 = 19, // RandomX (Monero) VARIANT_MAX }; diff --git a/src/crypto/CryptoNight.cpp b/src/crypto/CryptoNight.cpp index 408d21f6..27b4b8d6 100644 --- a/src/crypto/CryptoNight.cpp +++ b/src/crypto/CryptoNight.cpp @@ -245,6 +245,7 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, // VARIANT_RX_WOW nullptr, nullptr, // VARIANT_RX_LOKI + nullptr, nullptr, // VARIANT_RX_0 # ifndef XMRIG_NO_AEON cryptonight_single_hash, @@ -270,6 +271,7 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, // VARIANT_DOUBLE nullptr, nullptr, // VARIANT_RX_WOW nullptr, nullptr, // VARIANT_RX_LOKI + nullptr, nullptr, // VARIANT_RX_0 # else nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, @@ -280,7 +282,7 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, - nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, # endif # ifndef XMRIG_NO_SUMO @@ -311,6 +313,7 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, // VARIANT_DOUBLE nullptr, nullptr, // VARIANT_RX_WOW nullptr, nullptr, // VARIANT_RX_LOKI + nullptr, nullptr, // VARIANT_RX_0 # else nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, @@ -321,7 +324,7 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, - nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, # endif # ifndef XMRIG_NO_CN_PICO nullptr, nullptr, // VARIANT_0 @@ -350,6 +353,7 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, // VARIANT_DOUBLE nullptr, nullptr, // VARIANT_RX_WOW nullptr, nullptr, // VARIANT_RX_LOKI + nullptr, nullptr, // VARIANT_RX_0 # else nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, @@ -360,7 +364,7 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, - nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, # endif nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, @@ -371,7 +375,7 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, - nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, }; static_assert((VARIANT_MAX * 2 * ALGO_MAX) == sizeof(func_table) / sizeof(func_table[0]), "func_table size mismatch"); diff --git a/src/workers/OclThread.cpp b/src/workers/OclThread.cpp index 543e4769..51ca0943 100644 --- a/src/workers/OclThread.cpp +++ b/src/workers/OclThread.cpp @@ -44,6 +44,9 @@ static const char *kIntensity = "intensity"; static const char *kMemChunk = "mem_chunk"; static const char *kStridedIndex = "strided_index"; static const char *kUnroll = "unroll"; +#ifdef XMRIG_ALGO_RANDOMX +static const char *kGCNAsm = "gcn_asm"; +#endif static const char *kWorksize = "worksize"; static const char *kBFactor = "bfactor"; @@ -69,6 +72,9 @@ xmrig::OclThread::OclThread(const rapidjson::Value &object) : setAffinity(Json::getInt64(object, kAffineToCpu, -1)); setMemChunk(Json::getInt(object, kMemChunk, m_ctx->memChunk)); setUnrollFactor(Json::getInt(object, kUnroll, m_ctx->unrollFactor)); +#ifdef XMRIG_ALGO_RANDOMX + setGCNAsm(Json::getInt(object, kGCNAsm, m_ctx->gcnAsm)); +#endif setCompMode(Json::getBool(object, kCompMode, true)); const rapidjson::Value &stridedIndex = object[kStridedIndex]; @@ -190,6 +196,13 @@ void xmrig::OclThread::setUnrollFactor(int unrollFactor) m_ctx->unrollFactor = unrollFactor > 128 ? 128 : unrollFactor; } +#ifdef XMRIG_ALGO_RANDOMX +void xmrig::OclThread::setGCNAsm(int gcnAsm) +{ + m_ctx->gcnAsm = gcnAsm; +} +#endif + void xmrig::OclThread::setWorksize(size_t worksize) { diff --git a/src/workers/OclThread.h b/src/workers/OclThread.h index 265f10cc..707be02e 100644 --- a/src/workers/OclThread.h +++ b/src/workers/OclThread.h @@ -69,6 +69,9 @@ class OclThread : public xmrig::IThread void setStridedIndex(int stridedIndex); void setThreadsCountByGPU(size_t threads); void setUnrollFactor(int unrollFactor); +#ifdef XMRIG_ALGO_RANDOMX + void setGCNAsm(int gcnAsm); +#endif void setWorksize(size_t worksize); void setBFactor(size_t bfactor); diff --git a/src/workers/Workers.cpp b/src/workers/Workers.cpp index 97b66780..9dfca4ab 100644 --- a/src/workers/Workers.cpp +++ b/src/workers/Workers.cpp @@ -363,6 +363,7 @@ void Workers::onResult(uv_async_t *handle) case xmrig::VARIANT_RX_LOKI: randomx_apply_config(RandomX_LokiConfig); break; + case xmrig::VARIANT_RX_0: default: randomx_apply_config(RandomX_MoneroConfig); break; @@ -479,6 +480,7 @@ randomx_dataset* Workers::getDataset(const uint8_t* seed_hash, xmrig::Variant va case xmrig::VARIANT_RX_LOKI: randomx_apply_config(RandomX_LokiConfig); break; + case xmrig::VARIANT_RX_0: default: randomx_apply_config(RandomX_MoneroConfig); break; From cfb83ab67e7c2d0724d6059682d314f87e0fb179 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Tue, 13 Aug 2019 21:15:54 +0200 Subject: [PATCH 2/3] Added "rx/test" for testing RandomX for Monero --- src/common/crypto/Algorithm.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/crypto/Algorithm.cpp b/src/common/crypto/Algorithm.cpp index 6befc88b..f1893bd0 100644 --- a/src/common/crypto/Algorithm.cpp +++ b/src/common/crypto/Algorithm.cpp @@ -73,6 +73,7 @@ static AlgoData const algorithms[] = { { "randomx/wow", "rx/wow", xmrig::RANDOM_X, xmrig::VARIANT_RX_WOW }, { "randomx/loki", "rx/loki", xmrig::RANDOM_X, xmrig::VARIANT_RX_LOKI }, { "randomx/0", "rx/0", xmrig::RANDOM_X, xmrig::VARIANT_RX_0 }, + { "randomx/test", "rx/test", xmrig::RANDOM_X, xmrig::VARIANT_RX_0 }, # ifndef XMRIG_NO_AEON { "cryptonight-lite", "cn-lite", xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_AUTO }, From a50c9f276a68e5cd2fc8e8dd5bb78da079669b2d Mon Sep 17 00:00:00 2001 From: SChernykh Date: Tue, 13 Aug 2019 21:30:47 +0200 Subject: [PATCH 3/3] Fixed compilation error --- src/amd/OclGPU.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/amd/OclGPU.cpp b/src/amd/OclGPU.cpp index 7f4c3b41..02c43c51 100644 --- a/src/amd/OclGPU.cpp +++ b/src/amd/OclGPU.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include "amd/OclCache.h" @@ -197,7 +198,7 @@ size_t InitOpenCLGpu(int index, cl_context opencl_ctx, GpuContext* ctx, const ch printGPU(index, ctx, config); xmrig::String device_name = ctx->name; - std::for_each(device_name.data(), device_name.data() + device_name.size(), [](char& c) { c = static_cast(std::toupper(c)); }); + std::for_each(device_name.data(), device_name.data() + device_name.size(), [](char& c) { c = static_cast(toupper(c)); }); ctx->gcn_version = (device_name == "GFX900") ? 14 : 12; cl_int ret;