1 /** 2 Contains common neural network operations. 3 4 These operations are currently only implemented for the CUDA backend. 5 6 Authors: Henry Gouk 7 */ 8 module dopt.core.ops.nnet; 9 10 import dopt.core.ops; 11 import dopt.core.types; 12 13 import std.array; 14 import std.functional; 15 import std.variant; 16 17 package 18 { 19 void initialize() 20 { 21 registerOperation("convolution", OpDef(toDelegate(&verifyConvolution), toDelegate(&judgeConvolution))); 22 registerOperation("maxpool", OpDef(toDelegate(&verifyMaxpool), toDelegate(&judgeMaxpool))); 23 registerOperation("convolutionFeaturesGrad", OpDef(toDelegate(&verifyConvolutionFeaturesGrad), 24 toDelegate(&judgeConvolutionFeaturesGrad))); 25 registerOperation("convolutionFiltersGrad", OpDef(toDelegate(&verifyConvolutionFiltersGrad), 26 toDelegate(&judgeConvolutionFiltersGrad))); 27 registerOperation("maxpoolGrad", OpDef(toDelegate(&verifyMaxpoolGrad), toDelegate(&judgeMaxpoolGrad))); 28 registerOperation("softmax", OpDef(toDelegate(&verifySoftmax), toDelegate(&judgeSoftmax))); 29 registerOperation("softmaxGrad", OpDef(toDelegate(&verifySoftmaxGrad), toDelegate(&judgeSoftmaxGrad))); 30 registerOperation("addBias", OpDef(toDelegate(&verifyAddBias), toDelegate(&judgeAddBias))); 31 registerOperation("addBiasGrad", OpDef(toDelegate(&verifyAddBiasGrad), toDelegate(&judgeAddBiasGrad))); 32 registerOperation("batchNormTrain", OpDef(toDelegate(&verifyBatchNormTrain), toDelegate(&judgeBatchNormTrain))); 33 registerOperation("batchNormGrad", OpDef(toDelegate(&verifyBatchNormGrad), toDelegate(&judgeBatchNormGrad))); 34 } 35 } 36 37 private 38 { 39 bool verifyConvolution(Operation op) 40 { 41 if(op.deps.length != 2) 42 { 43 return false; 44 } 45 46 auto imgs = op.deps[0].outputType; 47 auto filters = op.deps[1].outputType; 48 49 if(imgs.rank != 4 || filters.rank != 4) 50 { 51 return false; 52 } 53 54 if(imgs.elementType != filters.elementType) 55 { 56 return false; 57 } 58 59 if(imgs.shape[1] != filters.shape[1]) 60 { 61 return false; 62 } 63 64 return true; 65 } 66 67 TensorType judgeConvolution(Operation op) 68 { 69 auto imgs = op.deps[0]; 70 auto filters = op.deps[1]; 71 72 auto padding = op.attributes["padding"].get!(size_t[]); 73 auto stride = op.attributes["stride"].get!(size_t[]); 74 75 auto batchSize = imgs.outputType.shape[0]; 76 auto outputChannels = filters.outputType.shape[0]; 77 auto newHeight = (imgs.outputType.shape[2] + 2 * padding[0] - filters.outputType.shape[2]) / stride[0] + 1; 78 auto newWidth = (imgs.outputType.shape[3] + 2 * padding[1] - filters.outputType.shape[3]) / stride[1] + 1; 79 80 auto shape = [batchSize, outputChannels, newHeight, newWidth]; 81 82 return TensorType(imgs.outputType.elementType, shape); 83 } 84 85 bool verifyMaxpool(Operation op) 86 { 87 return op.deps.length == 1 88 && op.deps[0].outputType.rank == 4 89 && op.attributes["dims"].peek!(size_t[]) !is null 90 && op.attributes["dims"].get!(size_t[]).length == 2; 91 } 92 93 TensorType judgeMaxpool(Operation op) 94 { 95 auto poolDims = op.attributes["dims"].get!(size_t[]); 96 size_t[] shape = new size_t[4]; 97 shape[0] = op.deps[0].shape[0]; 98 shape[1] = op.deps[0].shape[1]; 99 shape[2] = op.deps[0].shape[2] / poolDims[0]; 100 shape[3] = op.deps[0].shape[3] / poolDims[1]; 101 102 return TensorType(op.deps[0].outputType.elementType, shape); 103 } 104 105 bool verifyConvolutionFeaturesGrad(Operation op) 106 { 107 return true; 108 } 109 110 TensorType judgeConvolutionFeaturesGrad(Operation op) 111 { 112 auto parentGrad = op.deps[0]; 113 auto dims = op.attributes["featuresShape"].get!(size_t[]); 114 115 size_t[] shape = new size_t[4]; 116 shape[] = dims[]; 117 118 return TensorType(parentGrad.outputType.elementType, shape); 119 } 120 121 bool verifyConvolutionFiltersGrad(Operation op) 122 { 123 return true; 124 } 125 126 TensorType judgeConvolutionFiltersGrad(Operation op) 127 { 128 auto parentGrad = op.deps[0]; 129 auto dims = op.attributes["filtersShape"].get!(size_t[]); 130 131 size_t[] shape = new size_t[4]; 132 shape[] = dims[]; 133 134 return TensorType(parentGrad.outputType.elementType, shape); 135 } 136 137 bool verifyMaxpoolGrad(Operation op) 138 { 139 return true; 140 } 141 142 TensorType judgeMaxpoolGrad(Operation op) 143 { 144 auto parentGrad = op.deps[0]; 145 auto dims = op.attributes["featuresShape"].get!(size_t[]); 146 147 size_t[] shape = new size_t[4]; 148 shape[] = dims[]; 149 150 return TensorType(parentGrad.outputType.elementType, shape); 151 } 152 153 bool verifySoftmax(Operation op) 154 { 155 return op.deps.length == 1; 156 } 157 158 TensorType judgeSoftmax(Operation op) 159 { 160 return TensorType(op.deps[0].elementType, op.deps[0].shape); 161 } 162 163 bool verifySoftmaxGrad(Operation op) 164 { 165 return op.deps.length == 2; 166 } 167 168 TensorType judgeSoftmaxGrad(Operation op) 169 { 170 return TensorType(op.deps[1].elementType, op.deps[1].shape); 171 } 172 173 bool verifyAddBias(Operation op) 174 { 175 return true; 176 } 177 178 TensorType judgeAddBias(Operation op) 179 { 180 return op.deps[0].outputType; 181 } 182 183 bool verifyAddBiasGrad(Operation op) 184 { 185 return true; 186 } 187 188 TensorType judgeAddBiasGrad(Operation op) 189 { 190 return TensorType(op.deps[0].elementType, [op.deps[0].shape[1]]); 191 } 192 193 bool verifyBatchNormTrain(Operation op) 194 { 195 return true; 196 } 197 198 TensorType judgeBatchNormTrain(Operation op) 199 { 200 return op.deps[0].outputType; 201 } 202 203 bool verifyBatchNormGrad(Operation op) 204 { 205 return true; 206 } 207 208 TensorType judgeBatchNormGrad(Operation op) 209 { 210 return TensorType(op.deps[0].elementType, [op.deps[0].volume + op.deps[1].volume + op.deps[2].volume]); 211 } 212 } 213 214 public 215 { 216 /** 217 Creates a convolution operation that performs the computation required to implement a convolutional layer. 218 219 Currently this operation only implements 2D convolutions. 220 221 Params: 222 features = A tensor containing a batch of input feature maps. 223 filters = A tensor containing the filters that will be convolved with the feature maps. 224 225 Returns: 226 An operation representing convolutions of input imgs with some kernels. 227 */ 228 Operation convolution(Operation features, Operation filters, size_t[] padding = [0, 0], size_t[] stride = [1, 1], 229 string mod = __MODULE__, size_t line = __LINE__) 230 { 231 return createOperation("convolution", [features, filters], 232 ["padding": Variant(padding), "stride": Variant(stride)], mod, line); 233 } 234 235 /// 236 unittest 237 { 238 import dopt.core.cuda : evaluateCUDA; 239 240 auto features = float32([1, 1, 3, 5], [ 241 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 242 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 243 1.0f, 1.0f, 1.0f, 0.0f, 0.0f 244 ]); 245 246 auto filters = float32([1, 1, 1, 2], [ 247 -1.0f, 1.0f 248 ]); 249 250 auto result = convolution(features, filters); 251 252 auto edges = result.evaluateCUDA().as!float; 253 254 assert(edges == [ 255 0.0f, 0.0f, 1.0f, 0.0f, 256 0.0f, 0.0f, 1.0f, 0.0f, 257 0.0f, 0.0f, 1.0f, 0.0f 258 ]); 259 } 260 261 /** 262 Creates a transposed convolution operation (also known, incorrectly, as deconvolution). 263 264 Params: 265 features = The feature maps. 266 filters = The filters to be convolved with the feature maps. 267 268 Returns: 269 The operation. 270 */ 271 Operation convolutionTranspose(Operation features, Operation filters, size_t[] padding = [0, 0], 272 size_t[] stride = [1, 1], string mod = __MODULE__, size_t line = __LINE__) 273 { 274 auto outShape = features.shape.dup; 275 outShape[2 .. $] -= 1; 276 outShape[2 .. $] *= stride[]; 277 outShape[2 .. $] += filters.shape[2 .. $] - 2 * padding[]; 278 279 return convolutionFeaturesGrad(features, filters, outShape, padding, stride, mod, line); 280 } 281 282 /** 283 Creates a max pool operation that performs the computation required to implement a max pooling layer. 284 285 Params: 286 features = A tensor containing a batch of input feature maps. 287 dims = An array of pool dims. 288 289 Returns: 290 An operation representing a max pool computation. 291 */ 292 Operation maxpool(Operation features, size_t[] dims, string mod = __MODULE__, size_t line = __LINE__) 293 { 294 return createOperation("maxpool", [features], ["dims": Variant(dims)], mod, line); 295 } 296 297 /// 298 unittest 299 { 300 import dopt.core.cuda : evaluateCUDA; 301 302 auto features = float32([1, 1, 4, 4], [ 303 1.0f, 2.0f, 4.0f, 3.0f, 304 5.0f, 3.0f, 2.0f, 2.0f, 305 0.1f, -4.0f, 3.0f, 2.0f, 306 0.0f, 0.0f, 2.0f, 2.0f 307 ]); 308 309 auto result = features.maxpool([2,2]); 310 311 auto pooledFeatures = result.evaluateCUDA().as!float; 312 313 assert(pooledFeatures == [ 314 5.0f, 4.0f, 315 0.1f, 3.0f 316 ]); 317 } 318 319 /** 320 Creates an operation representing the derivative of a convolution operation with respect to the feature maps. 321 322 Params: 323 parentGrad = Gradient of some functions w.r.t. the convolution operation. 324 filters = The filters of the convolution operation. 325 featuresShape = The shape of the features fed into the convolution operations. 326 327 Returns: 328 The gradient. 329 */ 330 Operation convolutionFeaturesGrad(Operation parentGrad, Operation filters, size_t[] featuresShape, 331 size_t[] padding, size_t[] stride, string mod = __MODULE__, size_t line = __LINE__) 332 { 333 return createOperation("convolutionFeaturesGrad", [parentGrad, filters], 334 ["featuresShape": Variant(featuresShape), "padding": Variant(padding), "stride": Variant(stride)], 335 mod, line); 336 } 337 338 /** 339 Creates an operation representing the derivative of a convolution operation with respect to the filters. 340 341 Params: 342 parentGrad = Gradient of some functions w.r.t. the convolution operation. 343 features = The features provided to the convolution operation. 344 filtersShape = The shape of the filters provided to the convolution operation. 345 346 Returns: 347 The gradient. 348 */ 349 Operation convolutionFiltersGrad(Operation parentGrad, Operation features, size_t[] filtersShape, 350 size_t[] padding, size_t[] stride, string mod = __MODULE__, size_t line = __LINE__) 351 { 352 return createOperation("convolutionFiltersGrad", [parentGrad, features], 353 ["filtersShape": Variant(filtersShape), "padding": Variant(padding), "stride": Variant(stride)], 354 mod, line); 355 } 356 357 /** 358 Creates an operation representing the derivative of a maxpool operation with respect to the feature maps. 359 360 Params: 361 parentGrad = Gradient of some function w.r.t. the maxpool operation. 362 op = The operation being differentiated. 363 364 Returns: 365 The gradient. 366 */ 367 Operation maxpoolGrad(Operation parentGrad, Operation op, string mod = __MODULE__, 368 size_t line = __LINE__) 369 { 370 return createOperation("maxpoolGrad", [parentGrad, op, op.deps[0]], 371 ["featuresShape": Variant(op.deps[0].outputType.shape), "dims": op.attributes["dims"]], mod, line); 372 } 373 374 /** 375 Creates an operation representing the computation required for a softmax layer. 376 377 Params: 378 inputs = The inputs to the softmax function. 379 380 Returns: 381 The operation. 382 */ 383 Operation softmax(Operation inputs, string mod = __MODULE__, size_t line = __LINE__) 384 { 385 return createOperation("softmax", [inputs], null, mod, line); 386 } 387 388 /// 389 unittest 390 { 391 import std.math : approxEqual; 392 import dopt.core.cpu : evaluateCUDA; 393 394 auto y = float32([1, 5], [1.0f, 2.0f, 3.0f, 1.0f, 2.0f]).softmax(); 395 396 assert(approxEqual( 397 y.evaluateCUDA().as!float, 398 [0.0674508, 0.18335, 0.498398, 0.0674508, 0.18335] 399 )); 400 } 401 402 /** 403 Creates an operation representing the gradient of the softmax function. 404 */ 405 Operation softmaxGrad(Operation parentGrad, Operation op, string mod = __MODULE__, 406 size_t line = __LINE__) 407 { 408 return createOperation("softmaxGrad", [parentGrad, op], null, mod, line); 409 } 410 411 Operation addBias(Operation input, Operation bias, string mod = __MODULE__, size_t line = __LINE__) 412 { 413 return createOperation("addBias", [input, bias], null, mod, line); 414 } 415 416 Operation addBiasGrad(Operation parentGrad, string mod = __MODULE__, size_t line = __LINE__) 417 { 418 return createOperation("addBiasGrad", [parentGrad], null, mod, line); 419 } 420 421 Operation batchNormTrain(Operation input, Operation scale, Operation bias, string mod = __MODULE__, 422 size_t line = __LINE__) 423 { 424 return createOperation("batchNormTrain", [input, scale, bias], null, mod, line); 425 } 426 427 Operation batchNormGrad(Operation parentGrad, Operation input, Operation scale, string mod = __MODULE__, 428 size_t line = __LINE__) 429 { 430 return createOperation("batchNormGrad", [parentGrad, input, scale], null, mod, line); 431 } 432 }