1 /** 2 Contains common neural network operations. 3 4 These operations are currently only implemented for the CUDA backend. 5 6 Authors: Henry Gouk 7 */ 8 module dopt.core.ops.nnet; 9 10 import dopt.core.ops; 11 import dopt.core.types; 12 13 import std.array; 14 import std.functional; 15 import std.variant; 16 17 package 18 { 19 void initialize() 20 { 21 registerOperation("convolution", OpDef(toDelegate(&verifyConvolution), toDelegate(&judgeConvolution))); 22 registerOperation("maxpool", OpDef(toDelegate(&verifyMaxpool), toDelegate(&judgeMaxpool))); 23 registerOperation("convolutionFeaturesGrad", OpDef(toDelegate(&verifyConvolutionFeaturesGrad), 24 toDelegate(&judgeConvolutionFeaturesGrad))); 25 registerOperation("convolutionFiltersGrad", OpDef(toDelegate(&verifyConvolutionFiltersGrad), 26 toDelegate(&judgeConvolutionFiltersGrad))); 27 registerOperation("maxpoolGrad", OpDef(toDelegate(&verifyMaxpoolGrad), toDelegate(&judgeMaxpoolGrad))); 28 registerOperation("softmax", OpDef(toDelegate(&verifySoftmax), toDelegate(&judgeSoftmax))); 29 registerOperation("softmaxGrad", OpDef(toDelegate(&verifySoftmaxGrad), toDelegate(&judgeSoftmaxGrad))); 30 registerOperation("relu", OpDef(toDelegate(&verifyRelu), toDelegate(&judgeRelu))); 31 registerOperation("reluGrad", OpDef(toDelegate(&verifyReluGrad), toDelegate(&judgeReluGrad))); 32 registerOperation("addBias", OpDef(toDelegate(&verifyAddBias), toDelegate(&judgeAddBias))); 33 registerOperation("addBiasGrad", OpDef(toDelegate(&verifyAddBiasGrad), toDelegate(&judgeAddBiasGrad))); 34 registerOperation("batchNormTrain", OpDef(toDelegate(&verifyBatchNormTrain), toDelegate(&judgeBatchNormTrain))); 35 registerOperation("batchNormGrad", OpDef(toDelegate(&verifyBatchNormGrad), toDelegate(&judgeBatchNormGrad))); 36 registerOperation("batchNormInference", OpDef(toDelegate(&verifyBatchNormInference), 37 toDelegate(&judgeBatchNormInference))); 38 } 39 } 40 41 private 42 { 43 bool verifyConvolution(Operation op) 44 { 45 if(op.deps.length != 2) 46 { 47 return false; 48 } 49 50 auto imgs = op.deps[0].outputType; 51 auto filters = op.deps[1].outputType; 52 53 if(imgs.rank != 4 || filters.rank != 4) 54 { 55 return false; 56 } 57 58 if(imgs.elementType != filters.elementType) 59 { 60 return false; 61 } 62 63 if(imgs.shape[1] != filters.shape[1]) 64 { 65 return false; 66 } 67 68 return true; 69 } 70 71 TensorType judgeConvolution(Operation op) 72 { 73 auto imgs = op.deps[0]; 74 auto filters = op.deps[1]; 75 76 auto padding = op.attributes["padding"].get!(size_t[]); 77 auto stride = op.attributes["stride"].get!(size_t[]); 78 79 auto batchSize = imgs.outputType.shape[0]; 80 auto outputChannels = filters.outputType.shape[0]; 81 auto newHeight = (imgs.outputType.shape[2] + 2 * padding[0] - filters.outputType.shape[2]) / stride[0] + 1; 82 auto newWidth = (imgs.outputType.shape[3] + 2 * padding[1] - filters.outputType.shape[3]) / stride[1] + 1; 83 84 auto shape = [batchSize, outputChannels, newHeight, newWidth]; 85 86 return TensorType(imgs.outputType.elementType, shape); 87 } 88 89 bool verifyMaxpool(Operation op) 90 { 91 return op.deps.length == 1 92 && op.deps[0].outputType.rank == 4 93 && op.attributes["dims"].peek!(size_t[]) !is null 94 && op.attributes["dims"].get!(size_t[]).length == 2; 95 } 96 97 TensorType judgeMaxpool(Operation op) 98 { 99 auto poolDims = op.attributes["dims"].get!(size_t[]); 100 size_t[] shape = new size_t[4]; 101 shape[0] = op.deps[0].shape[0]; 102 shape[1] = op.deps[0].shape[1]; 103 shape[2] = op.deps[0].shape[2] / poolDims[0]; 104 shape[3] = op.deps[0].shape[3] / poolDims[1]; 105 106 return TensorType(op.deps[0].outputType.elementType, shape); 107 } 108 109 bool verifyConvolutionFeaturesGrad(Operation op) 110 { 111 return true; 112 } 113 114 TensorType judgeConvolutionFeaturesGrad(Operation op) 115 { 116 auto parentGrad = op.deps[0]; 117 auto dims = op.attributes["featuresShape"].get!(size_t[]); 118 119 size_t[] shape = new size_t[4]; 120 shape[] = dims[]; 121 122 return TensorType(parentGrad.outputType.elementType, shape); 123 } 124 125 bool verifyConvolutionFiltersGrad(Operation op) 126 { 127 return true; 128 } 129 130 TensorType judgeConvolutionFiltersGrad(Operation op) 131 { 132 auto parentGrad = op.deps[0]; 133 auto dims = op.attributes["filtersShape"].get!(size_t[]); 134 135 size_t[] shape = new size_t[4]; 136 shape[] = dims[]; 137 138 return TensorType(parentGrad.outputType.elementType, shape); 139 } 140 141 bool verifyMaxpoolGrad(Operation op) 142 { 143 return true; 144 } 145 146 TensorType judgeMaxpoolGrad(Operation op) 147 { 148 auto parentGrad = op.deps[0]; 149 auto dims = op.attributes["featuresShape"].get!(size_t[]); 150 151 size_t[] shape = new size_t[4]; 152 shape[] = dims[]; 153 154 return TensorType(parentGrad.outputType.elementType, shape); 155 } 156 157 bool verifySoftmax(Operation op) 158 { 159 return op.deps.length == 1; 160 } 161 162 TensorType judgeSoftmax(Operation op) 163 { 164 return TensorType(op.deps[0].elementType, op.deps[0].shape); 165 } 166 167 bool verifySoftmaxGrad(Operation op) 168 { 169 return op.deps.length == 2; 170 } 171 172 TensorType judgeSoftmaxGrad(Operation op) 173 { 174 return TensorType(op.deps[1].elementType, op.deps[1].shape); 175 } 176 177 bool verifyRelu(Operation op) 178 { 179 return op.deps.length == 1; 180 } 181 182 TensorType judgeRelu(Operation op) 183 { 184 return TensorType(op.deps[0].elementType, op.deps[0].shape); 185 } 186 187 bool verifyReluGrad(Operation op) 188 { 189 return op.deps.length == 3; 190 } 191 192 TensorType judgeReluGrad(Operation op) 193 { 194 return TensorType(op.deps[1].elementType, op.deps[1].shape); 195 } 196 197 bool verifyAddBias(Operation op) 198 { 199 return true; 200 } 201 202 TensorType judgeAddBias(Operation op) 203 { 204 return op.deps[0].outputType; 205 } 206 207 bool verifyAddBiasGrad(Operation op) 208 { 209 return true; 210 } 211 212 TensorType judgeAddBiasGrad(Operation op) 213 { 214 return TensorType(op.deps[0].elementType, [op.deps[0].shape[1]]); 215 } 216 217 bool verifyBatchNormTrain(Operation op) 218 { 219 return true; 220 } 221 222 TensorType judgeBatchNormTrain(Operation op) 223 { 224 return TensorType(op.deps[0].elementType, [op.deps[0].volume + 2 * op.deps[0].shape[1]]); 225 } 226 227 bool verifyBatchNormGrad(Operation op) 228 { 229 return true; 230 } 231 232 TensorType judgeBatchNormGrad(Operation op) 233 { 234 return TensorType(op.deps[0].elementType, [op.deps[0].volume + op.deps[1].volume + op.deps[2].volume]); 235 } 236 237 bool verifyBatchNormInference(Operation op) 238 { 239 return true; 240 } 241 242 TensorType judgeBatchNormInference(Operation op) 243 { 244 return op.deps[0].outputType; 245 } 246 } 247 248 public 249 { 250 /** 251 Creates a convolution operation that performs the computation required to implement a convolutional layer. 252 253 Currently this operation only implements 2D convolutions. 254 255 Params: 256 features = A tensor containing a batch of input feature maps. 257 filters = A tensor containing the filters that will be convolved with the feature maps. 258 259 Returns: 260 An operation representing convolutions of input imgs with some kernels. 261 */ 262 Operation convolution(Operation features, Operation filters, size_t[] padding = [0, 0], size_t[] stride = [1, 1], 263 string mod = __MODULE__, size_t line = __LINE__) 264 { 265 return createOperation("convolution", [features, filters], 266 ["padding": Variant(padding), "stride": Variant(stride)], mod, line); 267 } 268 269 /// 270 unittest 271 { 272 import dopt.core : evaluate; 273 274 auto features = float32([1, 1, 3, 5], [ 275 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 276 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 277 1.0f, 1.0f, 1.0f, 0.0f, 0.0f 278 ]); 279 280 auto filters = float32([1, 1, 1, 2], [ 281 -1.0f, 1.0f 282 ]); 283 284 auto result = convolution(features, filters); 285 286 auto edges = result.evaluate().as!float; 287 288 assert(edges == [ 289 0.0f, 0.0f, 1.0f, 0.0f, 290 0.0f, 0.0f, 1.0f, 0.0f, 291 0.0f, 0.0f, 1.0f, 0.0f 292 ]); 293 } 294 295 /** 296 Creates a transposed convolution operation (also known, incorrectly, as deconvolution). 297 298 Params: 299 features = The feature maps. 300 filters = The filters to be convolved with the feature maps. 301 302 Returns: 303 The operation. 304 */ 305 Operation convolutionTranspose(Operation features, Operation filters, size_t[] padding = [0, 0], 306 size_t[] stride = [1, 1], string mod = __MODULE__, size_t line = __LINE__) 307 { 308 auto outShape = features.shape.dup; 309 outShape[2 .. $] -= 1; 310 outShape[2 .. $] *= stride[]; 311 outShape[2 .. $] += filters.shape[2 .. $] - 2 * padding[]; 312 313 return convolutionFeaturesGrad(features, filters, outShape, padding, stride, mod, line); 314 } 315 316 /** 317 Creates a max pool operation that performs the computation required to implement a max pooling layer. 318 319 Params: 320 features = A tensor containing a batch of input feature maps. 321 dims = An array of pool dims. 322 323 Returns: 324 An operation representing a max pool computation. 325 */ 326 Operation maxpool(Operation features, size_t[] dims, string mod = __MODULE__, size_t line = __LINE__) 327 { 328 return createOperation("maxpool", [features], ["dims": Variant(dims)], mod, line); 329 } 330 331 /// 332 unittest 333 { 334 import dopt.core : evaluate; 335 336 auto features = float32([1, 1, 4, 4], [ 337 1.0f, 2.0f, 4.0f, 3.0f, 338 5.0f, 3.0f, 2.0f, 2.0f, 339 0.1f, -4.0f, 3.0f, 2.0f, 340 0.0f, 0.0f, 2.0f, 2.0f 341 ]); 342 343 auto result = features.maxpool([2,2]); 344 345 auto pooledFeatures = result.evaluate().as!float; 346 347 assert(pooledFeatures == [ 348 5.0f, 4.0f, 349 0.1f, 3.0f 350 ]); 351 } 352 353 /** 354 Creates an operation representing the derivative of a convolution operation with respect to the feature maps. 355 356 Params: 357 parentGrad = Gradient of some functions w.r.t. the convolution operation. 358 filters = The filters of the convolution operation. 359 featuresShape = The shape of the features fed into the convolution operations. 360 361 Returns: 362 The gradient. 363 */ 364 Operation convolutionFeaturesGrad(Operation parentGrad, Operation filters, size_t[] featuresShape, 365 size_t[] padding, size_t[] stride, string mod = __MODULE__, size_t line = __LINE__) 366 { 367 return createOperation("convolutionFeaturesGrad", [parentGrad, filters], 368 ["featuresShape": Variant(featuresShape), "padding": Variant(padding), "stride": Variant(stride)], 369 mod, line); 370 } 371 372 /** 373 Creates an operation representing the derivative of a convolution operation with respect to the filters. 374 375 Params: 376 parentGrad = Gradient of some functions w.r.t. the convolution operation. 377 features = The features provided to the convolution operation. 378 filtersShape = The shape of the filters provided to the convolution operation. 379 380 Returns: 381 The gradient. 382 */ 383 Operation convolutionFiltersGrad(Operation parentGrad, Operation features, size_t[] filtersShape, 384 size_t[] padding, size_t[] stride, string mod = __MODULE__, size_t line = __LINE__) 385 { 386 return createOperation("convolutionFiltersGrad", [parentGrad, features], 387 ["filtersShape": Variant(filtersShape), "padding": Variant(padding), "stride": Variant(stride)], 388 mod, line); 389 } 390 391 /** 392 Creates an operation representing the derivative of a maxpool operation with respect to the feature maps. 393 394 Params: 395 parentGrad = Gradient of some function w.r.t. the maxpool operation. 396 op = The operation being differentiated. 397 398 Returns: 399 The gradient. 400 */ 401 Operation maxpoolGrad(Operation parentGrad, Operation op, string mod = __MODULE__, 402 size_t line = __LINE__) 403 { 404 return createOperation("maxpoolGrad", [parentGrad, op, op.deps[0]], 405 ["featuresShape": Variant(op.deps[0].outputType.shape), "dims": op.attributes["dims"]], mod, line); 406 } 407 408 /** 409 Creates an operation representing the computation required for a softmax layer. 410 411 Params: 412 inputs = The inputs to the softmax function. 413 414 Returns: 415 The operation. 416 */ 417 Operation softmax(Operation inputs, string mod = __MODULE__, size_t line = __LINE__) 418 { 419 return createOperation("softmax", [inputs], null, mod, line); 420 } 421 422 /// 423 unittest 424 { 425 import std.math : approxEqual; 426 import dopt.core : evaluate; 427 428 auto y = float32([1, 5], [1.0f, 2.0f, 3.0f, 1.0f, 2.0f]).softmax(); 429 430 assert(approxEqual( 431 y.evaluate().as!float, 432 [0.0674508, 0.18335, 0.498398, 0.0674508, 0.18335] 433 )); 434 } 435 436 /** 437 Creates an operation representing the gradient of the softmax function. 438 */ 439 Operation softmaxGrad(Operation parentGrad, Operation op, string mod = __MODULE__, 440 size_t line = __LINE__) 441 { 442 return createOperation("softmaxGrad", [parentGrad, op], null, mod, line); 443 } 444 445 /** 446 Creates an operation representing the computation required for a ReLU layer. 447 448 Params: 449 inputs = The inputs to the ReLU function. 450 451 Returns: 452 The operation. 453 */ 454 Operation relu(Operation inputs, string mod = __MODULE__, size_t line = __LINE__) 455 { 456 return createOperation("relu", [inputs], null, mod, line); 457 } 458 459 Operation reluGrad(Operation parentGrad, Operation op, string mod = __MODULE__, 460 size_t line = __LINE__) 461 { 462 return createOperation("reluGrad", [parentGrad, op, op.deps[0]], null, mod, line); 463 } 464 465 Operation addBias(Operation input, Operation bias, string mod = __MODULE__, size_t line = __LINE__) 466 { 467 return createOperation("addBias", [input, bias], null, mod, line); 468 } 469 470 Operation addBiasGrad(Operation parentGrad, string mod = __MODULE__, size_t line = __LINE__) 471 { 472 return createOperation("addBiasGrad", [parentGrad], null, mod, line); 473 } 474 475 Operation[] batchNormTrain(Operation input, Operation scale, Operation bias, Operation mean, Operation var, 476 double momentum, string mod = __MODULE__, size_t line = __LINE__) 477 { 478 auto bnop = createOperation("batchNormTrain", [input, scale, bias, mean, var], ["momentum" : Variant(momentum)] 479 , mod, line); 480 481 //bnop has the running mean/variance packed after the actual forward prop value 482 483 return [ 484 bnop.slice([0], [input.volume]).reshape(input.shape), 485 bnop.slice([input.volume], [input.volume + input.shape[1]]), 486 bnop.slice([input.volume + input.shape[1]], [input.volume + 2 * input.shape[1]]) 487 ]; 488 } 489 490 Operation batchNormGrad(Operation parentGrad, Operation input, Operation scale, string mod = __MODULE__, 491 size_t line = __LINE__) 492 { 493 return createOperation("batchNormGrad", [parentGrad, input, scale], null, mod, line); 494 } 495 496 Operation batchNormInference(Operation input, Operation scale, Operation bias, Operation mean, Operation var, 497 string mod = __MODULE__, size_t line = __LINE__) 498 { 499 return createOperation("batchNormInference", [input, scale, bias, mean, var], null, mod, line); 500 } 501 }