Skip to content

Commit

Permalink
[webgl] Use uniforms for depthwise conv2d/reshape
Browse files Browse the repository at this point in the history
  • Loading branch information
qjia7 committed Aug 3, 2021
1 parent 6bfe297 commit 19d4f82
Show file tree
Hide file tree
Showing 11 changed files with 116 additions and 71 deletions.
3 changes: 2 additions & 1 deletion tfjs-backend-webgl/src/backend_webgl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -733,8 +733,9 @@ export class MathBackendWebGL extends KernelBackend {

const program = new ReshapePackedProgram(afterShapeAs3D, input3DShape);
const preventEagerUnpackingOfOutput = true;
const customValues = [input3DShape];
const output = this.runWebGLProgram(
program, [input3D], input.dtype, null /* customUniformValues */,
program, [input3D], input.dtype, customValues,
preventEagerUnpackingOfOutput);
return {dataId: output.dataId, shape: afterShape, dtype: output.dtype};
}
Expand Down
29 changes: 13 additions & 16 deletions tfjs-backend-webgl/src/conv_gpu_depthwise.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,27 @@
*/

import {backend_util} from '@tensorflow/tfjs-core';
import {GPGPUProgram} from './gpgpu_math';
import {GPGPUProgram, useShapeUniforms} from './gpgpu_math';

export class DepthwiseConv2DProgram implements GPGPUProgram {
variableNames = ['x', 'W'];
outputShape: number[];
userCode: string;
enableShapeUniforms: boolean;
customUniforms = [
{name: 'pads', type: 'ivec2' as const },
{name: 'strides', type: 'ivec2' as const },
{name: 'dilations', type: 'ivec2' as const },
{name: 'inDims', type: 'ivec2' as const },
];

constructor(
convInfo: backend_util.Conv2DInfo, addBias = false,
activation: string = null, hasPreluActivation = false,
hasLeakyReluAlpha = false) {
this.outputShape = convInfo.outShape;
this.enableShapeUniforms = useShapeUniforms(this.outputShape.length);

const xNumRows = convInfo.inHeight;
const xNumCols = convInfo.inWidth;
const padTop = convInfo.padInfo.top;
const padLeft = convInfo.padInfo.left;
const strideHeight = convInfo.strideHeight;
const strideWidth = convInfo.strideWidth;
const dilationHeight = convInfo.dilationHeight;
const dilationWidth = convInfo.dilationWidth;
const filterHeight = convInfo.filterHeight;
const filterWidth = convInfo.filterWidth;
const channelMul = convInfo.outChannels / convInfo.inChannels;
Expand Down Expand Up @@ -79,9 +79,6 @@ export class DepthwiseConv2DProgram implements GPGPUProgram {
this.userCode = `
${activationSnippet}
const ivec2 strides = ivec2(${strideHeight}, ${strideWidth});
const ivec2 pads = ivec2(${padTop}, ${padLeft});
void main() {
ivec4 coords = getOutputCoords();
int batch = coords.x;
Expand All @@ -98,16 +95,16 @@ export class DepthwiseConv2DProgram implements GPGPUProgram {
float dotProd = 0.0;
// TO DO(dsmilkov): Flatten the two for loops and vec4 the operations.
for (int wR = 0; wR < ${filterHeight}; wR++) {
int xR = xRCorner + wR * ${dilationHeight};
int xR = xRCorner + wR * dilations[0];
if (xR < 0 || xR >= ${xNumRows}) {
if (xR < 0 || xR >= inDims[0]) {
continue;
}
for (int wC = 0; wC < ${filterWidth}; wC++) {
int xC = xCCorner + wC * ${dilationWidth};
int xC = xCCorner + wC * dilations[1];
if (xC < 0 || xC >= ${xNumCols}) {
if (xC < 0 || xC >= inDims[1]) {
continue;
}
Expand Down
73 changes: 35 additions & 38 deletions tfjs-backend-webgl/src/conv_packed_gpu_depthwise.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,31 @@

import {backend_util, util} from '@tensorflow/tfjs-core';

import {GPGPUProgram} from './gpgpu_math';
import {GPGPUProgram, useShapeUniforms} from './gpgpu_math';

export class DepthwiseConvPacked2DProgram implements GPGPUProgram {
variableNames = ['x', 'W'];
packedInputs = true;
packedOutput = true;
outputShape: number[];
userCode: string;
enableShapeUniforms: boolean;
customUniforms = [
{name: 'pads', type: 'ivec2' as const },
{name: 'strides', type: 'ivec2' as const },
{name: 'dilations', type: 'ivec2' as const },
{name: 'inDims', type: 'ivec2' as const },
];

constructor(
convInfo: backend_util.Conv2DInfo, addBias = false,
activation: string = null, hasPreluActivation = false,
hasLeakyReluAlpha = false) {
this.outputShape = convInfo.outShape;
this.enableShapeUniforms = useShapeUniforms(this.outputShape.length);
const channelMul = convInfo.outChannels / convInfo.inChannels;
const xNumRows = convInfo.inHeight;
const xNumCols = convInfo.inWidth;
const padTop = convInfo.padInfo.top;
const padLeft = convInfo.padInfo.left;
const strideHeight = convInfo.strideHeight;
const strideWidth = convInfo.strideWidth;
const dilationHeight = convInfo.dilationHeight;
const dilationWidth = convInfo.dilationWidth;
const filterHeight = convInfo.filterHeight;
const filterWidth = convInfo.filterWidth;
Expand Down Expand Up @@ -75,8 +78,8 @@ export class DepthwiseConvPacked2DProgram implements GPGPUProgram {
xC${c} = vec4(0.0);`;
}
mainLoop += `
xR = xRCorner + ${r * dilationHeight};
if (xR >=0 && xR < ${xNumRows}) {
xR = xRCorner + ${r} * dilations[0];
if (xR >=0 && xR < inDims[0]) {
`;

for (let texelC = 0; texelC < (texelsAcross + 1) / 2; texelC++) {
Expand All @@ -102,13 +105,13 @@ export class DepthwiseConvPacked2DProgram implements GPGPUProgram {

mainLoop += `
xCOffset = xC + 1;
if (xCOffset >= 0 && xCOffset < ${xNumCols} && xTexelC${
if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${
colIndex}Ready == 0) {
xTexelC${colIndex} = getX(batch, xR, xCOffset, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= ${xNumCols}) {
if (xCOffset + 1 >= inDims[1]) {
xTexelC${colIndex}.zw = vec2(0.0);
}
xTexelC${colIndex}Ready = 1;
Expand All @@ -125,12 +128,12 @@ export class DepthwiseConvPacked2DProgram implements GPGPUProgram {
mainLoop += `
xCOffset = xC + 1 - 2;
if (xCOffset >= 0 && xCOffset < ${xNumCols}) {
if (xCOffset >= 0 && xCOffset < inDims[1]) {
previous = getX(batch, xR, xCOffset, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= ${xNumCols}) {
if (xCOffset + 1 >= inDims[1]) {
previous.zw = vec2(0.0);
}
Expand All @@ -143,10 +146,9 @@ export class DepthwiseConvPacked2DProgram implements GPGPUProgram {
} else {
// Padding is even, so xRC corresponds to a single texel.
mainLoop += `
if (xC >= 0 && xC < ${xNumCols} && xTexelC${
colIndex}Ready == 0) {
if (xC >= 0 && xC < inDims[1] && xTexelC${colIndex}Ready == 0) {
xTexelC${colIndex} = getX(batch, xR, xC, d1);
if (xC + 1 >= ${xNumCols}) {
if (xC + 1 >= inDims[1]) {
xTexelC${colIndex}.zw = vec2(0.0);
}
xTexelC${colIndex}Ready = 1;
Expand All @@ -170,15 +172,15 @@ export class DepthwiseConvPacked2DProgram implements GPGPUProgram {
if ((dilationWidth % 2 === 0 && padLeft % 2 === 1) ||
(dilationWidth % 2 !== 0 && padLeft % 2 !== 1)) {
mainLoop += `
xCOffset = xC + ${padLeft % 2} + ${nextTexelOffset};
xCOffset = xC + pads[1] % 2 + ${nextTexelOffset};
if (xCOffset >= 0 && xCOffset < ${xNumCols} && xTexelC${
if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${
colIndex + 1}Ready == 0) {
xTexelC${colIndex + 1} = getX(batch, xR, xCOffset, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= ${xNumCols}) {
if (xCOffset + 1 >= inDims[1]) {
xTexelC${colIndex + 1}.zw = vec2(0.0);
}
xTexelC${colIndex + 1}Ready = 1;
Expand All @@ -190,7 +192,7 @@ export class DepthwiseConvPacked2DProgram implements GPGPUProgram {
if (dilationWidth > 1) {
mainLoop += `
xCOffset -= 2;
if (xCOffset >= 0 && xCOffset < ${xNumCols} && xTexelC${
if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${
colIndex}Ready == 0) {
xTexelC${colIndex} = getX(batch, xR, xCOffset, d1);
xTexelC${colIndex}Ready = 1;
Expand All @@ -214,10 +216,10 @@ export class DepthwiseConvPacked2DProgram implements GPGPUProgram {
mainLoop += `
xCOffset = xC + ${nextTexelOffset};
if (xCOffset >= 0 && xCOffset < ${xNumCols} && xTexelC${
if (xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${
colIndex + 1}Ready == 0) {
xTexelC${colIndex + 1} = getX(batch, xR, xCOffset, d1);
if (xCOffset + 1 >= ${xNumCols}) {
if (xCOffset + 1 >= inDims[1]) {
xTexelC${colIndex + 1}.zw = vec2(0.0);
}
xTexelC${colIndex + 1}Ready = 1;
Expand All @@ -240,24 +242,24 @@ export class DepthwiseConvPacked2DProgram implements GPGPUProgram {
// final` initialized below.
if (padLeft % 2 === 1) {
mainLoop += `
xCOffset = xC + 1 - ${strideWidth};
if(xCOffset >= 0 && xCOffset < ${xNumCols} && xTexelC${
xCOffset = xC + 1 - strides[1];
if(xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${
colIndex}Ready == 0) {
xTexelC${colIndex} = getX(batch, xR, xCOffset, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xCOffset + 1 >= ${xNumCols}) {
if (xCOffset + 1 >= inDims[1]) {
xTexelC${colIndex}.zw = vec2(0.0);
}
xTexelC${colIndex}Ready = 1;
}
if(xC + 1 >= 0 && xC + 1 < ${xNumCols} && xTexelC${
if(xC + 1 >= 0 && xC + 1 < inDims[1] && xTexelC${
colIndex + 1}Ready == 0) {
xTexelC${colIndex + 1} = getX(batch, xR, xC + 1, d1);
// Need to manually clear unused channels in case
// we're reading from recycled texture.
if (xC + 2 >= ${xNumCols}) {
if (xC + 2 >= inDims[1]) {
xTexelC${colIndex + 1}.zw = vec2(0.0);
}
xTexelC${colIndex + 1}Ready = 1;
Expand All @@ -270,29 +272,28 @@ export class DepthwiseConvPacked2DProgram implements GPGPUProgram {
if (c + 1 < filterWidth) {
mainLoop += `
final = vec4(0.0);
xCOffset = xC + 1 + ${strideWidth};
if(xCOffset >= 0 && xCOffset < ${xNumCols}) {
xCOffset = xC + 1 + strides[1];
if(xCOffset >= 0 && xCOffset < inDims[1]) {
final = getX(batch, xR, xCOffset, d1);
}
xC${colIndex + 1} = vec4(xTexelC${colIndex + 1}.xy, final.xy);
`;
}
} else {
mainLoop += `
if(xC >= 0 && xC < ${xNumCols} && xTexelC${
colIndex}Ready == 0) {
if(xC >= 0 && xC < inDims[1] && xTexelC${colIndex}Ready == 0) {
xTexelC${colIndex} = getX(batch, xR, xC, d1);
if (xC + 1 >= ${xNumCols}) {
if (xC + 1 >= inDims[1]) {
xTexelC${colIndex}.zw = vec2(0.0);
}
xTexelC${colIndex}Ready = 1;
}
xCOffset = xC + ${strideWidth};
if(xCOffset >= 0 && xCOffset < ${xNumCols} && xTexelC${
xCOffset = xC + strides[1];
if(xCOffset >= 0 && xCOffset < inDims[1] && xTexelC${
colIndex + 1}Ready == 0) {
xTexelC${colIndex + 1} = getX(batch, xR, xCOffset, d1);
if (xCOffset + 1 >= ${xNumCols}) {
if (xCOffset + 1 >= inDims[1]) {
xTexelC${colIndex + 1}.zw = vec2(0.);
}
xTexelC${colIndex + 1}Ready = 1;
Expand Down Expand Up @@ -371,11 +372,7 @@ export class DepthwiseConvPacked2DProgram implements GPGPUProgram {
this.userCode = `
${activationSnippet}
const ivec2 strides = ivec2(${strideHeight}, ${strideWidth});
const ivec2 pads = ivec2(${padTop}, ${padLeft});
void main() {
ivec4 coords = getOutputCoords();
int batch = coords.x;
ivec2 xRCCorner = coords.yz * strides - pads;
Expand Down
2 changes: 1 addition & 1 deletion tfjs-backend-webgl/src/decode_matrix_gpu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ export class DecodeMatrixProgram implements GPGPUProgram {
ivec3 outCoordsFromFlatIndex(int index) {
${
this.enableShapeUniforms ?
shader_util.getLogicalCoordinatesFromFlatIndexByUniform(
shader_util.getOutputLogicalCoordinatesFromFlatIndexByUniform(
['r', 'c', 'd'], outputShape) :
shader_util.getLogicalCoordinatesFromFlatIndex(
['r', 'c', 'd'], outputShape)}
Expand Down
2 changes: 1 addition & 1 deletion tfjs-backend-webgl/src/decode_matrix_packed_gpu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ export class DecodeMatrixPackedProgram implements GPGPUProgram {
ivec3 outCoordsFromFlatIndex(int index) {
${
this.enableShapeUniforms ?
shader_util.getLogicalCoordinatesFromFlatIndexByUniform(
shader_util.getOutputLogicalCoordinatesFromFlatIndexByUniform(
['r', 'c', 'd'], outputShape) :
shader_util.getLogicalCoordinatesFromFlatIndex(
['r', 'c', 'd'], outputShape)}
Expand Down
3 changes: 2 additions & 1 deletion tfjs-backend-webgl/src/kernel_utils/reshape.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@ export function packedReshape(

const program = new ReshapePackedProgram(afterShapeAs3D, input3DShape);
const preventEagerUnpackingOfOutput = true;
const customValues = [input3DShape];
const output = backend.runWebGLProgram(
program, [input3D], input.dtype, null /* customSetup */,
program, [input3D], input.dtype, customValues,
preventEagerUnpackingOfOutput);
return {dataId: output.dataId, shape: afterShape, dtype: output.dtype};
}
8 changes: 7 additions & 1 deletion tfjs-backend-webgl/src/kernels/DepthwiseConv2dNative.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,13 @@ export function depthwiseConv2dNative(args: {
} else {
program = new DepthwiseConv2DProgram(convInfo);
}
return backend.runWebGLProgram(program, [x, filter], 'float32');
const customValues = [
[convInfo.padInfo.top, convInfo.padInfo.left],
[convInfo.strideHeight, convInfo.strideWidth],
[convInfo.dilationHeight, convInfo.dilationWidth],
[convInfo.inHeight, convInfo.inWidth]
];
return backend.runWebGLProgram(program, [x, filter], 'float32', customValues);
}

export const depthwiseConv2dNativeConfig: KernelConfig = {
Expand Down
10 changes: 8 additions & 2 deletions tfjs-backend-webgl/src/kernels/FusedDepthwiseConv2D.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,14 @@ export function fusedDepthwiseConv2D(args: {
convInfo, hasBias, fusedActivation, hasPreluActivationWeights,
hasLeakyreluAlpha);
}

const result = backend.runWebGLProgram(program, programInputs, 'float32');
const customValues = [
[convInfo.padInfo.top, convInfo.padInfo.left],
[convInfo.strideHeight, convInfo.strideWidth],
[convInfo.dilationHeight, convInfo.dilationWidth],
[convInfo.inHeight, convInfo.inWidth]
];
const result =
backend.runWebGLProgram(program, programInputs, 'float32', customValues);

intermediates.forEach(t => backend.disposeIntermediateTensorInfo(t));

Expand Down
Loading

0 comments on commit 19d4f82

Please sign in to comment.