1 /** 2 Contains common neural network operations. 3 4 These operations are currently only implemented for the CUDA backend. 5 6 Authors: Henry Gouk 7 */ 8 module dopt.core.ops.nnet; 9 10 import dopt.core.ops; 11 import dopt.core.types; 12 13 import std.array; 14 import std.functional; 15 import std.variant; 16 17 package 18 { 19 void initialize() 20 { 21 registerOperation("convolution", OpDef(toDelegate(&verifyConvolution), toDelegate(&judgeConvolution))); 22 registerOperation("maxpool", OpDef(toDelegate(&verifyMaxpool), toDelegate(&judgeMaxpool))); 23 registerOperation("convolutionFeaturesGrad", OpDef(toDelegate(&verifyConvolutionFeaturesGrad), 24 toDelegate(&judgeConvolutionFeaturesGrad))); 25 registerOperation("convolutionFiltersGrad", OpDef(toDelegate(&verifyConvolutionFiltersGrad), 26 toDelegate(&judgeConvolutionFiltersGrad))); 27 registerOperation("maxpoolGrad", OpDef(toDelegate(&verifyMaxpoolGrad), toDelegate(&judgeMaxpoolGrad))); 28 registerOperation("softmax", OpDef(toDelegate(&verifySoftmax), toDelegate(&judgeSoftmax))); 29 registerOperation("softmaxGrad", OpDef(toDelegate(&verifySoftmaxGrad), toDelegate(&judgeSoftmaxGrad))); 30 registerOperation("relu", OpDef(toDelegate(&verifyRelu), toDelegate(&judgeRelu))); 31 registerOperation("reluGrad", OpDef(toDelegate(&verifyReluGrad), toDelegate(&judgeReluGrad))); 32 registerOperation("addBias", OpDef(toDelegate(&verifyAddBias), toDelegate(&judgeAddBias))); 33 registerOperation("addBiasGrad", OpDef(toDelegate(&verifyAddBiasGrad), toDelegate(&judgeAddBiasGrad))); 34 registerOperation("batchNormTrain", OpDef(toDelegate(&verifyBatchNormTrain), toDelegate(&judgeBatchNormTrain))); 35 registerOperation("batchNormGrad", OpDef(toDelegate(&verifyBatchNormGrad), toDelegate(&judgeBatchNormGrad))); 36 registerOperation("batchNormInference", OpDef(toDelegate(&verifyBatchNormInference), 37 toDelegate(&judgeBatchNormInference))); 38 } 39 } 40 41 private 42 { 43 bool verifyConvolution(Operation op) 44 { 45 if(op.deps.length != 2) 46 { 47 return false; 48 } 49 50 auto imgs = op.deps[0].outputType; 51 auto filters = op.deps[1].outputType; 52 53 if(imgs.rank != 4 || filters.rank != 4) 54 { 55 return false; 56 } 57 58 if(imgs.elementType != filters.elementType) 59 { 60 return false; 61 } 62 63 if(imgs.shape[1] != filters.shape[1]) 64 { 65 return false; 66 } 67 68 return true; 69 } 70 71 TensorType judgeConvolution(Operation op) 72 { 73 auto imgs = op.deps[0]; 74 auto filters = op.deps[1]; 75 76 auto padding = op.attributes["padding"].get!(size_t[]); 77 auto stride = op.attributes["stride"].get!(size_t[]); 78 79 auto batchSize = imgs.outputType.shape[0]; 80 auto outputChannels = filters.outputType.shape[0]; 81 auto newHeight = (imgs.outputType.shape[2] + 2 * padding[0] - filters.outputType.shape[2]) / stride[0] + 1; 82 auto newWidth = (imgs.outputType.shape[3] + 2 * padding[1] - filters.outputType.shape[3]) / stride[1] + 1; 83 84 auto shape = [batchSize, outputChannels, newHeight, newWidth]; 85 86 return TensorType(imgs.outputType.elementType, shape); 87 } 88 89 bool verifyMaxpool(Operation op) 90 { 91 return op.deps.length == 1 92 && op.deps[0].outputType.rank == 4 93 && op.attributes["dims"].peek!(size_t[]) !is null 94 && op.attributes["dims"].get!(size_t[]).length == 2; 95 } 96 97 TensorType judgeMaxpool(Operation op) 98 { 99 auto poolDims = op.attributes["dims"].get!(size_t[]); 100 size_t[] shape = new size_t[4]; 101 shape[0] = op.deps[0].shape[0]; 102 shape[1] = op.deps[0].shape[1]; 103 shape[2] = op.deps[0].shape[2] / poolDims[0]; 104 shape[3] = op.deps[0].shape[3] / poolDims[1]; 105 106 return TensorType(op.deps[0].outputType.elementType, shape); 107 } 108 109 bool verifyConvolutionFeaturesGrad(Operation op) 110 { 111 return true; 112 } 113 114 TensorType judgeConvolutionFeaturesGrad(Operation op) 115 { 116 auto parentGrad = op.deps[0]; 117 auto dims = op.attributes["featuresShape"].get!(size_t[]); 118 119 size_t[] shape = new size_t[4]; 120 shape[] = dims[]; 121 122 return TensorType(parentGrad.outputType.elementType, shape); 123 } 124 125 bool verifyConvolutionFiltersGrad(Operation op) 126 { 127 return true; 128 } 129 130 TensorType judgeConvolutionFiltersGrad(Operation op) 131 { 132 auto parentGrad = op.deps[0]; 133 auto dims = op.attributes["filtersShape"].get!(size_t[]); 134 135 size_t[] shape = new size_t[4]; 136 shape[] = dims[]; 137 138 return TensorType(parentGrad.outputType.elementType, shape); 139 } 140 141 bool verifyMaxpoolGrad(Operation op) 142 { 143 return true; 144 } 145 146 TensorType judgeMaxpoolGrad(Operation op) 147 { 148 auto parentGrad = op.deps[0]; 149 auto dims = op.attributes["featuresShape"].get!(size_t[]); 150 151 size_t[] shape = new size_t[4]; 152 shape[] = dims[]; 153 154 return TensorType(parentGrad.outputType.elementType, shape); 155 } 156 157 bool verifySoftmax(Operation op) 158 { 159 return op.deps.length == 1; 160 } 161 162 TensorType judgeSoftmax(Operation op) 163 { 164 return TensorType(op.deps[0].elementType, op.deps[0].shape); 165 } 166 167 bool verifySoftmaxGrad(Operation op) 168 { 169 return op.deps.length == 2; 170 } 171 172 TensorType judgeSoftmaxGrad(Operation op) 173 { 174 return TensorType(op.deps[1].elementType, op.deps[1].shape); 175 } 176 177 bool verifyRelu(Operation op) 178 { 179 return op.deps.length == 1; 180 } 181 182 TensorType judgeRelu(Operation op) 183 { 184 return TensorType(op.deps[0].elementType, op.deps[0].shape); 185 } 186 187 bool verifyReluGrad(Operation op) 188 { 189 return op.deps.length == 3; 190 } 191 192 TensorType judgeReluGrad(Operation op) 193 { 194 return TensorType(op.deps[1].elementType, op.deps[1].shape); 195 } 196 197 bool verifyAddBias(Operation op) 198 { 199 return true; 200 } 201 202 TensorType judgeAddBias(Operation op) 203 { 204 return op.deps[0].outputType; 205 } 206 207 bool verifyAddBiasGrad(Operation op) 208 { 209 return true; 210 } 211 212 TensorType judgeAddBiasGrad(Operation op) 213 { 214 return TensorType(op.deps[0].elementType, [op.deps[0].shape[1]]); 215 } 216 217 bool verifyBatchNormTrain(Operation op) 218 { 219 return true; 220 } 221 222 TensorType judgeBatchNormTrain(Operation op) 223 { 224 return TensorType(op.deps[0].elementType, [op.deps[0].volume + 2 * op.deps[0].shape[1]]); 225 } 226 227 bool verifyBatchNormGrad(Operation op) 228 { 229 return true; 230 } 231 232 TensorType judgeBatchNormGrad(Operation op) 233 { 234 return TensorType(op.deps[0].elementType, [op.deps[0].volume + op.deps[1].volume + op.deps[2].volume]); 235 } 236 237 bool verifyBatchNormInference(Operation op) 238 { 239 return true; 240 } 241 242 TensorType judgeBatchNormInference(Operation op) 243 { 244 return op.deps[0].outputType; 245 } 246 } 247 248 public 249 { 250 /** 251 Creates a convolution operation that performs the computation required to implement a convolutional layer. 252 253 Currently this operation only implements 2D convolutions. 254 255 Params: 256 features = A tensor containing a batch of input feature maps. 257 filters = A tensor containing the filters that will be convolved with the feature maps. 258 259 Returns: 260 An operation representing convolutions of input imgs with some kernels. 261 */ 262 Operation convolution(Operation features, Operation filters, size_t[] padding = [0, 0], size_t[] stride = [1, 1], 263 string mod = __MODULE__, size_t line = __LINE__) 264 { 265 return createOperation("convolution", [features, filters], 266 ["padding": Variant(padding), "stride": Variant(stride)], mod, line); 267 } 268 269 /// 270 unittest 271 { 272 import dopt.core : evaluate; 273 274 auto features = float32([1, 1, 3, 5], [ 275 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 276 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 277 1.0f, 1.0f, 1.0f, 0.0f, 0.0f 278 ]); 279 280 auto filters = float32([1, 1, 1, 2], [ 281 -1.0f, 1.0f 282 ]); 283 284 auto result = convolution(features, filters); 285 286 auto edges = result.evaluate().get!float; 287 288 assert(edges == [ 289 0.0f, 0.0f, 1.0f, 0.0f, 290 0.0f, 0.0f, 1.0f, 0.0f, 291 0.0f, 0.0f, 1.0f, 0.0f 292 ]); 293 } 294 295 /** 296 Creates a transposed convolution operation (also known, incorrectly, as deconvolution). 297 298 Params: 299 features = The feature maps. 300 filters = The filters to be convolved with the feature maps. 301 302 Returns: 303 The operation. 304 */ 305 Operation convolutionTranspose(Operation features, Operation filters, size_t[] padding = [0, 0], 306 size_t[] stride = [1, 1], string mod = __MODULE__, size_t line = __LINE__) 307 { 308 auto outShape = features.shape.dup; 309 outShape[2 .. $] -= 1; 310 outShape[2 .. $] *= stride[]; 311 outShape[2 .. $] += filters.shape[2 .. $] - 2 * padding[]; 312 outShape[1] = filters.shape[1]; 313 314 return convolutionFeaturesGrad(features, filters, outShape, padding, stride, mod, line); 315 } 316 317 /** 318 Creates a max pool operation that performs the computation required to implement a max pooling layer. 319 320 Params: 321 features = A tensor containing a batch of input feature maps. 322 dims = An array of pool dims. 323 324 Returns: 325 An operation representing a max pool computation. 326 */ 327 Operation maxpool(Operation features, size_t[] dims, string mod = __MODULE__, size_t line = __LINE__) 328 { 329 return createOperation("maxpool", [features], ["dims": Variant(dims)], mod, line); 330 } 331 332 /// 333 unittest 334 { 335 import dopt.core : evaluate; 336 337 auto features = float32([1, 1, 4, 4], [ 338 1.0f, 2.0f, 4.0f, 3.0f, 339 5.0f, 3.0f, 2.0f, 2.0f, 340 0.1f, -4.0f, 3.0f, 2.0f, 341 0.0f, 0.0f, 2.0f, 2.0f 342 ]); 343 344 auto result = features.maxpool([2,2]); 345 346 auto pooledFeatures = result.evaluate().get!float; 347 348 assert(pooledFeatures == [ 349 5.0f, 4.0f, 350 0.1f, 3.0f 351 ]); 352 } 353 354 /** 355 Creates an operation representing the derivative of a convolution operation with respect to the feature maps. 356 357 Params: 358 parentGrad = Gradient of some functions w.r.t. the convolution operation. 359 filters = The filters of the convolution operation. 360 featuresShape = The shape of the features fed into the convolution operations. 361 362 Returns: 363 The gradient. 364 */ 365 Operation convolutionFeaturesGrad(Operation parentGrad, Operation filters, size_t[] featuresShape, 366 size_t[] padding, size_t[] stride, string mod = __MODULE__, size_t line = __LINE__) 367 { 368 return createOperation("convolutionFeaturesGrad", [parentGrad, filters], 369 ["featuresShape": Variant(featuresShape), "padding": Variant(padding), "stride": Variant(stride)], 370 mod, line); 371 } 372 373 /** 374 Creates an operation representing the derivative of a convolution operation with respect to the filters. 375 376 Params: 377 parentGrad = Gradient of some functions w.r.t. the convolution operation. 378 features = The features provided to the convolution operation. 379 filtersShape = The shape of the filters provided to the convolution operation. 380 381 Returns: 382 The gradient. 383 */ 384 Operation convolutionFiltersGrad(Operation parentGrad, Operation features, size_t[] filtersShape, 385 size_t[] padding, size_t[] stride, string mod = __MODULE__, size_t line = __LINE__) 386 { 387 return createOperation("convolutionFiltersGrad", [parentGrad, features], 388 ["filtersShape": Variant(filtersShape), "padding": Variant(padding), "stride": Variant(stride)], 389 mod, line); 390 } 391 392 /** 393 Creates an operation representing the derivative of a maxpool operation with respect to the feature maps. 394 395 Params: 396 parentGrad = Gradient of some function w.r.t. the maxpool operation. 397 op = The operation being differentiated. 398 399 Returns: 400 The gradient. 401 */ 402 Operation maxpoolGrad(Operation parentGrad, Operation op, string mod = __MODULE__, 403 size_t line = __LINE__) 404 { 405 return createOperation("maxpoolGrad", [parentGrad, op, op.deps[0]], 406 ["featuresShape": Variant(op.deps[0].outputType.shape), "dims": op.attributes["dims"]], mod, line); 407 } 408 409 /** 410 Creates an operation representing the computation required for a softmax layer. 411 412 Params: 413 inputs = The inputs to the softmax function. 414 415 Returns: 416 The operation. 417 */ 418 Operation softmax(Operation inputs, string mod = __MODULE__, size_t line = __LINE__) 419 { 420 return createOperation("softmax", [inputs], null, mod, line); 421 } 422 423 /// 424 unittest 425 { 426 import std.math : approxEqual; 427 import dopt.core : evaluate; 428 429 auto y = float32([1, 5], [1.0f, 2.0f, 3.0f, 1.0f, 2.0f]).softmax(); 430 431 assert(approxEqual( 432 y.evaluate().get!float, 433 [0.0674508, 0.18335, 0.498398, 0.0674508, 0.18335] 434 )); 435 } 436 437 /** 438 Creates an operation representing the gradient of the softmax function. 439 */ 440 Operation softmaxGrad(Operation parentGrad, Operation op, string mod = __MODULE__, 441 size_t line = __LINE__) 442 { 443 return createOperation("softmaxGrad", [parentGrad, op], null, mod, line); 444 } 445 446 /** 447 Creates an operation representing the computation required for a ReLU layer. 448 449 Params: 450 inputs = The inputs to the ReLU function. 451 452 Returns: 453 The operation. 454 */ 455 Operation relu(Operation inputs, string mod = __MODULE__, size_t line = __LINE__) 456 { 457 return createOperation("relu", [inputs], null, mod, line); 458 } 459 460 Operation reluGrad(Operation parentGrad, Operation op, string mod = __MODULE__, 461 size_t line = __LINE__) 462 { 463 return createOperation("reluGrad", [parentGrad, op, op.deps[0]], null, mod, line); 464 } 465 466 Operation addBias(Operation input, Operation bias, string mod = __MODULE__, size_t line = __LINE__) 467 { 468 return createOperation("addBias", [input, bias], null, mod, line); 469 } 470 471 Operation addBiasGrad(Operation parentGrad, string mod = __MODULE__, size_t line = __LINE__) 472 { 473 return createOperation("addBiasGrad", [parentGrad], null, mod, line); 474 } 475 476 Operation[] batchNormTrain(Operation input, Operation scale, Operation bias, Operation mean, Operation var, 477 double momentum, string mod = __MODULE__, size_t line = __LINE__) 478 { 479 auto bnop = createOperation("batchNormTrain", [input, scale, bias, mean, var], ["momentum" : Variant(momentum)] 480 , mod, line); 481 482 //bnop has the running mean/variance packed after the actual forward prop value 483 484 return [ 485 bnop.slice([0], [input.volume]).reshape(input.shape), 486 bnop.slice([input.volume], [input.volume + input.shape[1]]), 487 bnop.slice([input.volume + input.shape[1]], [input.volume + 2 * input.shape[1]]) 488 ]; 489 } 490 491 Operation batchNormGrad(Operation parentGrad, Operation input, Operation scale, string mod = __MODULE__, 492 size_t line = __LINE__) 493 { 494 return createOperation("batchNormGrad", [parentGrad, input, scale], null, mod, line); 495 } 496 497 Operation batchNormInference(Operation input, Operation scale, Operation bias, Operation mean, Operation var, 498 string mod = __MODULE__, size_t line = __LINE__) 499 { 500 return createOperation("batchNormInference", [input, scale, bias, mean, var], null, mod, line); 501 } 502 }