From 19d4f8233de560796e3d56f7c06c393027f26cce Mon Sep 17 00:00:00 2001 From: Qin Jiajia Date: Mon, 2 Aug 2021 09:43:49 +0800 Subject: [PATCH] [webgl] Use uniforms for depthwise conv2d/reshape Bug #5205 --- tfjs-backend-webgl/src/backend_webgl.ts | 3 +- tfjs-backend-webgl/src/conv_gpu_depthwise.ts | 29 ++++---- .../src/conv_packed_gpu_depthwise.ts | 73 +++++++++---------- tfjs-backend-webgl/src/decode_matrix_gpu.ts | 2 +- .../src/decode_matrix_packed_gpu.ts | 2 +- .../src/kernel_utils/reshape.ts | 3 +- .../src/kernels/DepthwiseConv2dNative.ts | 8 +- .../src/kernels/FusedDepthwiseConv2D.ts | 10 ++- tfjs-backend-webgl/src/reshape_packed_gpu.ts | 22 ++++-- tfjs-backend-webgl/src/shader_compiler.ts | 4 +- .../src/shader_compiler_util.ts | 31 +++++++- 11 files changed, 116 insertions(+), 71 deletions(-) diff --git a/tfjs-backend-webgl/src/backend_webgl.ts b/tfjs-backend-webgl/src/backend_webgl.ts index b34a6230807..7d90453227e 100644 --- a/tfjs-backend-webgl/src/backend_webgl.ts +++ b/tfjs-backend-webgl/src/backend_webgl.ts @@ -733,8 +733,9 @@ export class MathBackendWebGL extends KernelBackend { const program = new ReshapePackedProgram(afterShapeAs3D, input3DShape); const preventEagerUnpackingOfOutput = true; + const customValues = [input3DShape]; const output = this.runWebGLProgram( - program, [input3D], input.dtype, null /* customUniformValues */, + program, [input3D], input.dtype, customValues, preventEagerUnpackingOfOutput); return {dataId: output.dataId, shape: afterShape, dtype: output.dtype}; } diff --git a/tfjs-backend-webgl/src/conv_gpu_depthwise.ts b/tfjs-backend-webgl/src/conv_gpu_depthwise.ts index cf4f65f19ea..a4d13539d39 100644 --- a/tfjs-backend-webgl/src/conv_gpu_depthwise.ts +++ b/tfjs-backend-webgl/src/conv_gpu_depthwise.ts @@ -16,27 +16,27 @@ */ import {backend_util} from '@tensorflow/tfjs-core'; -import {GPGPUProgram} from './gpgpu_math'; +import {GPGPUProgram, useShapeUniforms} from './gpgpu_math'; export class DepthwiseConv2DProgram implements GPGPUProgram { variableNames = ['x', 'W']; outputShape: number[]; userCode: string; + enableShapeUniforms: boolean; + customUniforms = [ + {name: 'pads', type: 'ivec2' as const }, + {name: 'strides', type: 'ivec2' as const }, + {name: 'dilations', type: 'ivec2' as const }, + {name: 'inDims', type: 'ivec2' as const }, + ]; constructor( convInfo: backend_util.Conv2DInfo, addBias = false, activation: string = null, hasPreluActivation = false, hasLeakyReluAlpha = false) { this.outputShape = convInfo.outShape; + this.enableShapeUniforms = useShapeUniforms(this.outputShape.length); - const xNumRows = convInfo.inHeight; - const xNumCols = convInfo.inWidth; - const padTop = convInfo.padInfo.top; - const padLeft = convInfo.padInfo.left; - const strideHeight = convInfo.strideHeight; - const strideWidth = convInfo.strideWidth; - const dilationHeight = convInfo.dilationHeight; - const dilationWidth = convInfo.dilationWidth; const filterHeight = convInfo.filterHeight; const filterWidth = convInfo.filterWidth; const channelMul = convInfo.outChannels / convInfo.inChannels; @@ -79,9 +79,6 @@ export class DepthwiseConv2DProgram implements GPGPUProgram { this.userCode = ` ${activationSnippet} - const ivec2 strides = ivec2(${strideHeight}, ${strideWidth}); - const ivec2 pads = ivec2(${padTop}, ${padLeft}); - void main() { ivec4 coords = getOutputCoords(); int batch = coords.x; @@ -98,16 +95,16 @@ export class DepthwiseConv2DProgram implements GPGPUProgram { float dotProd = 0.0; // TO DO(dsmilkov): Flatten the two for loops and vec4 the operations. for (int wR = 0; wR < ${filterHeight}; wR++) { - int xR = xRCorner + wR * ${dilationHeight}; + int xR = xRCorner + wR * dilations[0]; - if (xR < 0 || xR >= ${xNumRows}) { + if (xR < 0 || xR >= inDims[0]) { continue; } for (int wC = 0; wC < ${filterWidth}; wC++) { - int xC = xCCorner + wC * ${dilationWidth}; + int xC = xCCorner + wC * dilations[1]; - if (xC < 0 || xC >= ${xNumCols}) { + if (xC < 0 || xC >= inDims[1]) { continue; } diff --git a/tfjs-backend-webgl/src/conv_packed_gpu_depthwise.ts b/tfjs-backend-webgl/src/conv_packed_gpu_depthwise.ts index 661001963d0..2739312a891 100644 --- a/tfjs-backend-webgl/src/conv_packed_gpu_depthwise.ts +++ b/tfjs-backend-webgl/src/conv_packed_gpu_depthwise.ts @@ -17,7 +17,7 @@ import {backend_util, util} from '@tensorflow/tfjs-core'; -import {GPGPUProgram} from './gpgpu_math'; +import {GPGPUProgram, useShapeUniforms} from './gpgpu_math'; export class DepthwiseConvPacked2DProgram implements GPGPUProgram { variableNames = ['x', 'W']; @@ -25,20 +25,23 @@ export class DepthwiseConvPacked2DProgram implements GPGPUProgram { packedOutput = true; outputShape: number[]; userCode: string; + enableShapeUniforms: boolean; + customUniforms = [ + {name: 'pads', type: 'ivec2' as const }, + {name: 'strides', type: 'ivec2' as const }, + {name: 'dilations', type: 'ivec2' as const }, + {name: 'inDims', type: 'ivec2' as const }, + ]; constructor( convInfo: backend_util.Conv2DInfo, addBias = false, activation: string = null, hasPreluActivation = false, hasLeakyReluAlpha = false) { this.outputShape = convInfo.outShape; + this.enableShapeUniforms = useShapeUniforms(this.outputShape.length); const channelMul = convInfo.outChannels / convInfo.inChannels; - const xNumRows = convInfo.inHeight; - const xNumCols = convInfo.inWidth; - const padTop = convInfo.padInfo.top; const padLeft = convInfo.padInfo.left; - const strideHeight = convInfo.strideHeight; const strideWidth = convInfo.strideWidth; - const dilationHeight = convInfo.dilationHeight; const dilationWidth = convInfo.dilationWidth; const filterHeight = convInfo.filterHeight; const filterWidth = convInfo.filterWidth; @@ -75,8 +78,8 @@ export class DepthwiseConvPacked2DProgram implements GPGPUProgram { xC${c} = vec4(0.0);`; } mainLoop += ` - xR = xRCorner + ${r * dilationHeight}; - if (xR >=0 && xR < ${xNumRows}) { + xR = xRCorner + ${r} * dilations[0]; + if (xR >=0 && xR < inDims[0]) { `; for (let texelC = 0; texelC < (texelsAcross + 1) / 2; texelC++) { @@ -102,13 +105,13 @@ export class DepthwiseConvPacked2DProgram implements GPGPUProgram { mainLoop += ` xCOffset = xC + 1; - if (xCOffset >= 0 && xCOffset < ${xNumCols} && xTexelC${ + if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${ colIndex}Ready == 0) { xTexelC${colIndex} = getX(batch, xR, xCOffset, d1); // Need to manually clear unused channels in case // we're reading from recycled texture. - if (xCOffset + 1 >= ${xNumCols}) { + if (xCOffset + 1 >= inDims[1]) { xTexelC${colIndex}.zw = vec2(0.0); } xTexelC${colIndex}Ready = 1; @@ -125,12 +128,12 @@ export class DepthwiseConvPacked2DProgram implements GPGPUProgram { mainLoop += ` xCOffset = xC + 1 - 2; - if (xCOffset >= 0 && xCOffset < ${xNumCols}) { + if (xCOffset >= 0 && xCOffset < inDims[1]) { previous = getX(batch, xR, xCOffset, d1); // Need to manually clear unused channels in case // we're reading from recycled texture. - if (xCOffset + 1 >= ${xNumCols}) { + if (xCOffset + 1 >= inDims[1]) { previous.zw = vec2(0.0); } @@ -143,10 +146,9 @@ export class DepthwiseConvPacked2DProgram implements GPGPUProgram { } else { // Padding is even, so xRC corresponds to a single texel. mainLoop += ` - if (xC >= 0 && xC < ${xNumCols} && xTexelC${ - colIndex}Ready == 0) { + if (xC >= 0 && xC < inDims[1] && xTexelC${colIndex}Ready == 0) { xTexelC${colIndex} = getX(batch, xR, xC, d1); - if (xC + 1 >= ${xNumCols}) { + if (xC + 1 >= inDims[1]) { xTexelC${colIndex}.zw = vec2(0.0); } xTexelC${colIndex}Ready = 1; @@ -170,15 +172,15 @@ export class DepthwiseConvPacked2DProgram implements GPGPUProgram { if ((dilationWidth % 2 === 0 && padLeft % 2 === 1) || (dilationWidth % 2 !== 0 && padLeft % 2 !== 1)) { mainLoop += ` - xCOffset = xC + ${padLeft % 2} + ${nextTexelOffset}; + xCOffset = xC + pads[1] % 2 + ${nextTexelOffset}; - if (xCOffset >= 0 && xCOffset < ${xNumCols} && xTexelC${ + if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${ colIndex + 1}Ready == 0) { xTexelC${colIndex + 1} = getX(batch, xR, xCOffset, d1); // Need to manually clear unused channels in case // we're reading from recycled texture. - if (xCOffset + 1 >= ${xNumCols}) { + if (xCOffset + 1 >= inDims[1]) { xTexelC${colIndex + 1}.zw = vec2(0.0); } xTexelC${colIndex + 1}Ready = 1; @@ -190,7 +192,7 @@ export class DepthwiseConvPacked2DProgram implements GPGPUProgram { if (dilationWidth > 1) { mainLoop += ` xCOffset -= 2; - if (xCOffset >= 0 && xCOffset < ${xNumCols} && xTexelC${ + if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${ colIndex}Ready == 0) { xTexelC${colIndex} = getX(batch, xR, xCOffset, d1); xTexelC${colIndex}Ready = 1; @@ -214,10 +216,10 @@ export class DepthwiseConvPacked2DProgram implements GPGPUProgram { mainLoop += ` xCOffset = xC + ${nextTexelOffset}; - if (xCOffset >= 0 && xCOffset < ${xNumCols} && xTexelC${ + if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${ colIndex + 1}Ready == 0) { xTexelC${colIndex + 1} = getX(batch, xR, xCOffset, d1); - if (xCOffset + 1 >= ${xNumCols}) { + if (xCOffset + 1 >= inDims[1]) { xTexelC${colIndex + 1}.zw = vec2(0.0); } xTexelC${colIndex + 1}Ready = 1; @@ -240,24 +242,24 @@ export class DepthwiseConvPacked2DProgram implements GPGPUProgram { // final` initialized below. if (padLeft % 2 === 1) { mainLoop += ` - xCOffset = xC + 1 - ${strideWidth}; - if(xCOffset >= 0 && xCOffset < ${xNumCols} && xTexelC${ + xCOffset = xC + 1 - strides[1]; + if(xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${ colIndex}Ready == 0) { xTexelC${colIndex} = getX(batch, xR, xCOffset, d1); // Need to manually clear unused channels in case // we're reading from recycled texture. - if (xCOffset + 1 >= ${xNumCols}) { + if (xCOffset + 1 >= inDims[1]) { xTexelC${colIndex}.zw = vec2(0.0); } xTexelC${colIndex}Ready = 1; } - if(xC + 1 >= 0 && xC + 1 < ${xNumCols} && xTexelC${ + if(xC + 1 >= 0 && xC + 1 < inDims[1] && xTexelC${ colIndex + 1}Ready == 0) { xTexelC${colIndex + 1} = getX(batch, xR, xC + 1, d1); // Need to manually clear unused channels in case // we're reading from recycled texture. - if (xC + 2 >= ${xNumCols}) { + if (xC + 2 >= inDims[1]) { xTexelC${colIndex + 1}.zw = vec2(0.0); } xTexelC${colIndex + 1}Ready = 1; @@ -270,8 +272,8 @@ export class DepthwiseConvPacked2DProgram implements GPGPUProgram { if (c + 1 < filterWidth) { mainLoop += ` final = vec4(0.0); - xCOffset = xC + 1 + ${strideWidth}; - if(xCOffset >= 0 && xCOffset < ${xNumCols}) { + xCOffset = xC + 1 + strides[1]; + if(xCOffset >= 0 && xCOffset < inDims[1]) { final = getX(batch, xR, xCOffset, d1); } xC${colIndex + 1} = vec4(xTexelC${colIndex + 1}.xy, final.xy); @@ -279,20 +281,19 @@ export class DepthwiseConvPacked2DProgram implements GPGPUProgram { } } else { mainLoop += ` - if(xC >= 0 && xC < ${xNumCols} && xTexelC${ - colIndex}Ready == 0) { + if(xC >= 0 && xC < inDims[1] && xTexelC${colIndex}Ready == 0) { xTexelC${colIndex} = getX(batch, xR, xC, d1); - if (xC + 1 >= ${xNumCols}) { + if (xC + 1 >= inDims[1]) { xTexelC${colIndex}.zw = vec2(0.0); } xTexelC${colIndex}Ready = 1; } - xCOffset = xC + ${strideWidth}; - if(xCOffset >= 0 && xCOffset < ${xNumCols} && xTexelC${ + xCOffset = xC + strides[1]; + if(xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${ colIndex + 1}Ready == 0) { xTexelC${colIndex + 1} = getX(batch, xR, xCOffset, d1); - if (xCOffset + 1 >= ${xNumCols}) { + if (xCOffset + 1 >= inDims[1]) { xTexelC${colIndex + 1}.zw = vec2(0.); } xTexelC${colIndex + 1}Ready = 1; @@ -371,11 +372,7 @@ export class DepthwiseConvPacked2DProgram implements GPGPUProgram { this.userCode = ` ${activationSnippet} - const ivec2 strides = ivec2(${strideHeight}, ${strideWidth}); - const ivec2 pads = ivec2(${padTop}, ${padLeft}); - void main() { - ivec4 coords = getOutputCoords(); int batch = coords.x; ivec2 xRCCorner = coords.yz * strides - pads; diff --git a/tfjs-backend-webgl/src/decode_matrix_gpu.ts b/tfjs-backend-webgl/src/decode_matrix_gpu.ts index c9c50a45d06..1738126917a 100644 --- a/tfjs-backend-webgl/src/decode_matrix_gpu.ts +++ b/tfjs-backend-webgl/src/decode_matrix_gpu.ts @@ -39,7 +39,7 @@ export class DecodeMatrixProgram implements GPGPUProgram { ivec3 outCoordsFromFlatIndex(int index) { ${ this.enableShapeUniforms ? - shader_util.getLogicalCoordinatesFromFlatIndexByUniform( + shader_util.getOutputLogicalCoordinatesFromFlatIndexByUniform( ['r', 'c', 'd'], outputShape) : shader_util.getLogicalCoordinatesFromFlatIndex( ['r', 'c', 'd'], outputShape)} diff --git a/tfjs-backend-webgl/src/decode_matrix_packed_gpu.ts b/tfjs-backend-webgl/src/decode_matrix_packed_gpu.ts index cbceee1b97e..c3888024ffd 100644 --- a/tfjs-backend-webgl/src/decode_matrix_packed_gpu.ts +++ b/tfjs-backend-webgl/src/decode_matrix_packed_gpu.ts @@ -39,7 +39,7 @@ export class DecodeMatrixPackedProgram implements GPGPUProgram { ivec3 outCoordsFromFlatIndex(int index) { ${ this.enableShapeUniforms ? - shader_util.getLogicalCoordinatesFromFlatIndexByUniform( + shader_util.getOutputLogicalCoordinatesFromFlatIndexByUniform( ['r', 'c', 'd'], outputShape) : shader_util.getLogicalCoordinatesFromFlatIndex( ['r', 'c', 'd'], outputShape)} diff --git a/tfjs-backend-webgl/src/kernel_utils/reshape.ts b/tfjs-backend-webgl/src/kernel_utils/reshape.ts index b3ffdedb0a8..1f3ae73ef2d 100644 --- a/tfjs-backend-webgl/src/kernel_utils/reshape.ts +++ b/tfjs-backend-webgl/src/kernel_utils/reshape.ts @@ -38,8 +38,9 @@ export function packedReshape( const program = new ReshapePackedProgram(afterShapeAs3D, input3DShape); const preventEagerUnpackingOfOutput = true; + const customValues = [input3DShape]; const output = backend.runWebGLProgram( - program, [input3D], input.dtype, null /* customSetup */, + program, [input3D], input.dtype, customValues, preventEagerUnpackingOfOutput); return {dataId: output.dataId, shape: afterShape, dtype: output.dtype}; } diff --git a/tfjs-backend-webgl/src/kernels/DepthwiseConv2dNative.ts b/tfjs-backend-webgl/src/kernels/DepthwiseConv2dNative.ts index 123b5634cb4..87f12fff53c 100644 --- a/tfjs-backend-webgl/src/kernels/DepthwiseConv2dNative.ts +++ b/tfjs-backend-webgl/src/kernels/DepthwiseConv2dNative.ts @@ -52,7 +52,13 @@ export function depthwiseConv2dNative(args: { } else { program = new DepthwiseConv2DProgram(convInfo); } - return backend.runWebGLProgram(program, [x, filter], 'float32'); + const customValues = [ + [convInfo.padInfo.top, convInfo.padInfo.left], + [convInfo.strideHeight, convInfo.strideWidth], + [convInfo.dilationHeight, convInfo.dilationWidth], + [convInfo.inHeight, convInfo.inWidth] + ]; + return backend.runWebGLProgram(program, [x, filter], 'float32', customValues); } export const depthwiseConv2dNativeConfig: KernelConfig = { diff --git a/tfjs-backend-webgl/src/kernels/FusedDepthwiseConv2D.ts b/tfjs-backend-webgl/src/kernels/FusedDepthwiseConv2D.ts index 136872d3cb9..147987dff26 100644 --- a/tfjs-backend-webgl/src/kernels/FusedDepthwiseConv2D.ts +++ b/tfjs-backend-webgl/src/kernels/FusedDepthwiseConv2D.ts @@ -85,8 +85,14 @@ export function fusedDepthwiseConv2D(args: { convInfo, hasBias, fusedActivation, hasPreluActivationWeights, hasLeakyreluAlpha); } - - const result = backend.runWebGLProgram(program, programInputs, 'float32'); + const customValues = [ + [convInfo.padInfo.top, convInfo.padInfo.left], + [convInfo.strideHeight, convInfo.strideWidth], + [convInfo.dilationHeight, convInfo.dilationWidth], + [convInfo.inHeight, convInfo.inWidth] + ]; + const result = + backend.runWebGLProgram(program, programInputs, 'float32', customValues); intermediates.forEach(t => backend.disposeIntermediateTensorInfo(t)); diff --git a/tfjs-backend-webgl/src/reshape_packed_gpu.ts b/tfjs-backend-webgl/src/reshape_packed_gpu.ts index 90c3e098c04..29cae9cb094 100644 --- a/tfjs-backend-webgl/src/reshape_packed_gpu.ts +++ b/tfjs-backend-webgl/src/reshape_packed_gpu.ts @@ -15,7 +15,7 @@ * ============================================================================= */ -import {GPGPUProgram} from './gpgpu_math'; +import {GPGPUProgram, useShapeUniforms} from './gpgpu_math'; import * as shader_util from './shader_compiler_util'; export class ReshapePackedProgram implements GPGPUProgram { @@ -24,11 +24,14 @@ export class ReshapePackedProgram implements GPGPUProgram { packedOutput = true; outputShape: number[]; userCode: string; + enableShapeUniforms: boolean; + customUniforms = [{name: 'inputShape', type: 'ivec3' as const }]; constructor(outputShape: [number, number, number], inputShape: [ number, number, number ]) { this.outputShape = outputShape; + this.enableShapeUniforms = useShapeUniforms(this.outputShape.length); let mainLoop = ``; for (let i = 0; i < 4; i++) { @@ -55,8 +58,10 @@ export class ReshapePackedProgram implements GPGPUProgram { } this.userCode = ` - ${getReshapedInputCoords(inputShape)} - ${shader_util.getFlatIndexFrom3D(outputShape)} + ${getReshapedInputCoords(inputShape, this.enableShapeUniforms)} + ${ + this.enableShapeUniforms ? shader_util.getFlatIndexFrom3DOutput() : + shader_util.getFlatIndexFrom3D(outputShape)} void main() { ivec3 rc = getOutputCoords(); @@ -64,8 +69,8 @@ export class ReshapePackedProgram implements GPGPUProgram { vec4 result = vec4(0.); ivec3 thisRC; - int rows = ${outputShape[1]}; - int cols = ${outputShape[2]}; + int rows = ${this.enableShapeUniforms ? 'outShape[1]' : outputShape[1]}; + int cols = ${this.enableShapeUniforms ? 'outShape[2]' : outputShape[2]}; ${mainLoop} @@ -75,8 +80,11 @@ export class ReshapePackedProgram implements GPGPUProgram { } } -function getReshapedInputCoords(shape: [number, number, number]): string { - const coordsFromIndexSnippet = +function getReshapedInputCoords( + shape: [number, number, number], enableShapeUniforms: boolean): string { + const coordsFromIndexSnippet = enableShapeUniforms ? + shader_util.getLogicalCoordinatesFromFlatIndexByUniform( + ['r', 'c', 'd'], 'inputShape') : shader_util.getLogicalCoordinatesFromFlatIndex(['r', 'c', 'd'], shape); return ` diff --git a/tfjs-backend-webgl/src/shader_compiler.ts b/tfjs-backend-webgl/src/shader_compiler.ts index cc943d2c1cb..4ceec248634 100644 --- a/tfjs-backend-webgl/src/shader_compiler.ts +++ b/tfjs-backend-webgl/src/shader_compiler.ts @@ -571,7 +571,7 @@ function getOutput3DCoords( enableShapeUniforms: boolean): string { if (enableShapeUniforms) { const coordsFromIndexSnippet = - shader_util.getLogicalCoordinatesFromFlatIndexByUniform( + shader_util.getOutputLogicalCoordinatesFromFlatIndexByUniform( ['r', 'c', 'd'], shape); return ` @@ -670,7 +670,7 @@ function getOutput4DCoords( enableShapeUniforms: boolean): string { if (enableShapeUniforms) { const coordsFromIndexSnippet = - shader_util.getLogicalCoordinatesFromFlatIndexByUniform( + shader_util.getOutputLogicalCoordinatesFromFlatIndexByUniform( ['r', 'c', 'd', 'd2'], shape); return ` diff --git a/tfjs-backend-webgl/src/shader_compiler_util.ts b/tfjs-backend-webgl/src/shader_compiler_util.ts index 295dd1064b5..7e744c73a31 100644 --- a/tfjs-backend-webgl/src/shader_compiler_util.ts +++ b/tfjs-backend-webgl/src/shader_compiler_util.ts @@ -36,7 +36,7 @@ export function getLogicalCoordinatesFromFlatIndex( .join(''); } -export function getLogicalCoordinatesFromFlatIndexByUniform( +export function getOutputLogicalCoordinatesFromFlatIndexByUniform( coords: string[], shape: number[], index = 'index'): string { const strides = util.computeStrides(shape); return strides @@ -51,6 +51,35 @@ export function getLogicalCoordinatesFromFlatIndexByUniform( .join(''); } +// Produces GLSL code that computes strides. +function symbolicallyComputeStrides( + indicesArr: number[], variableName: string): string[] { + const numCoords = indicesArr.length; + const shape = indicesArr.map(d => `${variableName}[${d}]`); + const strides = new Array(numCoords - 1); + strides[numCoords - 2] = shape[numCoords - 1]; + for (let i = numCoords - 3; i >= 0; --i) { + strides[i] = `(${strides[i + 1]} * ${shape[i + 1]})`; + } + + return strides; +} + +export function getLogicalCoordinatesFromFlatIndexByUniform( + coords: string[], variableName: string, index = 'index'): string { + const indicesArray = coords.map((_, i) => i); + const strides = symbolicallyComputeStrides(indicesArray, variableName); + return strides + .map((_, i) => { + const line1 = `int ${coords[i]} = ${index} / ${strides[i]}`; + const line2 = i === strides.length - 1 ? + `int ${coords[i + 1]} = ${index} - ${coords[i]} * ${strides[i]}` : + `index -= ${coords[i]} * ${strides[i]}`; + return `${line1}; ${line2};`; + }) + .join(''); +} + function buildVec(x: string[]): string { if (x.length === 1) { return `${x[0]}`;