1 /**
2     Contains common neural network operations.
3 
4     These operations are currently only implemented for the CUDA backend.
5 
6     Authors: Henry Gouk
7 */
8 module dopt.core.ops.nnet;
9 
10 import dopt.core.ops;
11 import dopt.core.types;
12 
13 import std.array;
14 import std.functional;
15 import std.variant;
16 
17 package
18 {
19     void initialize()
20     {
21         registerOperation("convolution", OpDef(toDelegate(&verifyConvolution), toDelegate(&judgeConvolution)));
22         registerOperation("maxpool", OpDef(toDelegate(&verifyMaxpool), toDelegate(&judgeMaxpool)));
23         registerOperation("convolutionFeaturesGrad", OpDef(toDelegate(&verifyConvolutionFeaturesGrad),
24             toDelegate(&judgeConvolutionFeaturesGrad)));
25         registerOperation("convolutionFiltersGrad", OpDef(toDelegate(&verifyConvolutionFiltersGrad),
26             toDelegate(&judgeConvolutionFiltersGrad)));
27         registerOperation("maxpoolGrad", OpDef(toDelegate(&verifyMaxpoolGrad), toDelegate(&judgeMaxpoolGrad)));
28         registerOperation("softmax", OpDef(toDelegate(&verifySoftmax), toDelegate(&judgeSoftmax)));
29         registerOperation("softmaxGrad", OpDef(toDelegate(&verifySoftmaxGrad), toDelegate(&judgeSoftmaxGrad)));
30         registerOperation("relu", OpDef(toDelegate(&verifyRelu), toDelegate(&judgeRelu)));
31         registerOperation("reluGrad", OpDef(toDelegate(&verifyReluGrad), toDelegate(&judgeReluGrad)));
32         registerOperation("addBias", OpDef(toDelegate(&verifyAddBias), toDelegate(&judgeAddBias)));
33         registerOperation("addBiasGrad", OpDef(toDelegate(&verifyAddBiasGrad), toDelegate(&judgeAddBiasGrad)));
34         registerOperation("batchNormTrain", OpDef(toDelegate(&verifyBatchNormTrain), toDelegate(&judgeBatchNormTrain)));
35         registerOperation("batchNormGrad", OpDef(toDelegate(&verifyBatchNormGrad), toDelegate(&judgeBatchNormGrad)));
36         registerOperation("batchNormInference", OpDef(toDelegate(&verifyBatchNormInference),
37             toDelegate(&judgeBatchNormInference)));
38     }
39 }
40 
41 private
42 {
43     bool verifyConvolution(Operation op)
44     {
45         if(op.deps.length != 2)
46         {
47             return false;
48         }
49 
50         auto imgs = op.deps[0].outputType;
51         auto filters = op.deps[1].outputType;
52 
53         if(imgs.rank != 4 || filters.rank != 4)
54         {
55             return false;
56         }
57 
58         if(imgs.elementType != filters.elementType)
59         {
60             return false;
61         }
62 
63         if(imgs.shape[1] != filters.shape[1])
64         {
65             return false;
66         }
67 
68         return true;
69     }
70 
71     TensorType judgeConvolution(Operation op)
72     {
73         auto imgs = op.deps[0];
74         auto filters = op.deps[1];
75 
76         auto padding = op.attributes["padding"].get!(size_t[]);
77         auto stride = op.attributes["stride"].get!(size_t[]);
78 
79         auto batchSize = imgs.outputType.shape[0];
80         auto outputChannels = filters.outputType.shape[0];
81         auto newHeight = (imgs.outputType.shape[2] + 2 * padding[0] - filters.outputType.shape[2]) / stride[0] + 1;
82         auto newWidth = (imgs.outputType.shape[3] + 2 * padding[1] - filters.outputType.shape[3]) / stride[1] + 1;
83 
84         auto shape = [batchSize, outputChannels, newHeight, newWidth];
85 
86         return TensorType(imgs.outputType.elementType, shape);
87     }
88 
89     bool verifyMaxpool(Operation op)
90     {
91         return op.deps.length == 1
92             && op.deps[0].outputType.rank == 4
93             && op.attributes["dims"].peek!(size_t[]) !is null
94             && op.attributes["dims"].get!(size_t[]).length == 2;
95     }
96 
97     TensorType judgeMaxpool(Operation op)
98     {
99         auto poolDims = op.attributes["dims"].get!(size_t[]);
100         size_t[] shape = new size_t[4];
101         shape[0] = op.deps[0].shape[0];
102         shape[1] = op.deps[0].shape[1];
103         shape[2] = op.deps[0].shape[2] / poolDims[0];
104         shape[3] = op.deps[0].shape[3] / poolDims[1];
105 
106         return TensorType(op.deps[0].outputType.elementType, shape);
107     }
108 
109     bool verifyConvolutionFeaturesGrad(Operation op)
110     {
111         return true;
112     }
113 
114     TensorType judgeConvolutionFeaturesGrad(Operation op)
115     {
116         auto parentGrad = op.deps[0];
117         auto dims = op.attributes["featuresShape"].get!(size_t[]);
118 
119         size_t[] shape = new size_t[4];
120         shape[] = dims[];
121 
122         return TensorType(parentGrad.outputType.elementType, shape);
123     }
124 
125     bool verifyConvolutionFiltersGrad(Operation op)
126     {
127         return true;
128     }
129 
130     TensorType judgeConvolutionFiltersGrad(Operation op)
131     {
132         auto parentGrad = op.deps[0];
133         auto dims = op.attributes["filtersShape"].get!(size_t[]);
134 
135         size_t[] shape = new size_t[4];
136         shape[] = dims[];
137 
138         return TensorType(parentGrad.outputType.elementType, shape);
139     }
140 
141     bool verifyMaxpoolGrad(Operation op)
142     {
143         return true;
144     }
145 
146     TensorType judgeMaxpoolGrad(Operation op)
147     {
148         auto parentGrad = op.deps[0];
149         auto dims = op.attributes["featuresShape"].get!(size_t[]);
150 
151         size_t[] shape = new size_t[4];
152         shape[] = dims[];
153 
154         return TensorType(parentGrad.outputType.elementType, shape);
155     }
156 
157     bool verifySoftmax(Operation op)
158     {
159         return op.deps.length == 1;
160     }
161 
162     TensorType judgeSoftmax(Operation op)
163     {
164         return TensorType(op.deps[0].elementType, op.deps[0].shape);
165     }
166 
167     bool verifySoftmaxGrad(Operation op)
168     {
169         return op.deps.length == 2;
170     }
171 
172     TensorType judgeSoftmaxGrad(Operation op)
173     {
174         return TensorType(op.deps[1].elementType, op.deps[1].shape);
175     }
176 
177     bool verifyRelu(Operation op)
178     {
179         return op.deps.length == 1;
180     }
181 
182     TensorType judgeRelu(Operation op)
183     {
184         return TensorType(op.deps[0].elementType, op.deps[0].shape);
185     }
186 
187     bool verifyReluGrad(Operation op)
188     {
189         return op.deps.length == 3;
190     }
191 
192     TensorType judgeReluGrad(Operation op)
193     {
194         return TensorType(op.deps[1].elementType, op.deps[1].shape);
195     }
196 
197     bool verifyAddBias(Operation op)
198     {
199         return true;
200     }
201 
202     TensorType judgeAddBias(Operation op)
203     {
204         return op.deps[0].outputType;
205     }
206 
207     bool verifyAddBiasGrad(Operation op)
208     {
209         return true;
210     }
211 
212     TensorType judgeAddBiasGrad(Operation op)
213     {
214         return TensorType(op.deps[0].elementType, [op.deps[0].shape[1]]);
215     }
216 
217     bool verifyBatchNormTrain(Operation op)
218     {
219         return true;
220     }
221 
222     TensorType judgeBatchNormTrain(Operation op)
223     {
224         return TensorType(op.deps[0].elementType, [op.deps[0].volume + 2 * op.deps[0].shape[1]]);
225     }
226 
227     bool verifyBatchNormGrad(Operation op)
228     {
229         return true;
230     }
231 
232     TensorType judgeBatchNormGrad(Operation op)
233     {
234         return TensorType(op.deps[0].elementType, [op.deps[0].volume + op.deps[1].volume + op.deps[2].volume]);
235     }
236 
237     bool verifyBatchNormInference(Operation op)
238     {
239         return true;
240     }
241 
242     TensorType judgeBatchNormInference(Operation op)
243     {
244         return op.deps[0].outputType;
245     }
246 }
247 
248 public
249 {
250     /**
251         Creates a convolution operation that performs the computation required to implement a convolutional layer.
252 
253         Currently this operation only implements 2D convolutions.
254 
255         Params:
256             features = A tensor containing a batch of input feature maps.
257             filters = A tensor containing the filters that will be convolved with the feature maps.
258         
259         Returns:
260             An operation representing convolutions of input imgs with some kernels.
261     */
262     Operation convolution(Operation features, Operation filters, size_t[] padding = [0, 0], size_t[] stride = [1, 1],
263         string mod = __MODULE__, size_t line = __LINE__)
264     {
265         return createOperation("convolution", [features, filters],
266             ["padding": Variant(padding), "stride": Variant(stride)], mod, line);
267     }
268 
269     ///
270     unittest
271     {
272         import dopt.core : evaluate;
273 
274         auto features = float32([1, 1, 3, 5], [
275             1.0f, 1.0f, 1.0f, 0.0f, 0.0f,
276             1.0f, 1.0f, 1.0f, 0.0f, 0.0f,
277             1.0f, 1.0f, 1.0f, 0.0f, 0.0f
278         ]);
279 
280         auto filters = float32([1, 1, 1, 2], [
281             -1.0f, 1.0f
282         ]);
283 
284         auto result = convolution(features, filters);
285 
286         auto edges = result.evaluate().as!float;
287 
288         assert(edges == [
289             0.0f, 0.0f, 1.0f, 0.0f,
290             0.0f, 0.0f, 1.0f, 0.0f,
291             0.0f, 0.0f, 1.0f, 0.0f
292         ]);
293     }
294 
295     /**
296         Creates a transposed convolution operation (also known, incorrectly, as deconvolution).
297 
298         Params:
299             features = The feature maps.
300             filters = The filters to be convolved with the feature maps.
301         
302         Returns:
303             The operation.
304     */
305     Operation convolutionTranspose(Operation features, Operation filters, size_t[] padding = [0, 0],
306         size_t[] stride = [1, 1], string mod = __MODULE__, size_t line = __LINE__)
307     {
308         auto outShape = features.shape.dup;
309         outShape[2 .. $] -= 1;
310         outShape[2 .. $] *= stride[];
311         outShape[2 .. $] += filters.shape[2 .. $] - 2 * padding[];
312 
313         return convolutionFeaturesGrad(features, filters, outShape, padding, stride, mod, line);
314     }
315 
316     /**
317         Creates a max pool operation that performs the computation required to implement a max pooling layer.
318 
319         Params:
320             features = A tensor containing a batch of input feature maps.
321             dims = An array of pool dims.
322 
323         Returns:
324             An operation representing a max pool computation.
325     */
326     Operation maxpool(Operation features, size_t[] dims, string mod = __MODULE__, size_t line = __LINE__)
327     {
328         return createOperation("maxpool", [features], ["dims": Variant(dims)], mod, line);
329     }
330 
331     ///
332     unittest
333     {
334         import dopt.core : evaluate;
335 
336         auto features = float32([1, 1, 4, 4], [
337             1.0f, 2.0f, 4.0f, 3.0f,
338             5.0f, 3.0f, 2.0f, 2.0f,
339             0.1f, -4.0f, 3.0f, 2.0f,
340             0.0f, 0.0f, 2.0f, 2.0f
341         ]);
342 
343         auto result = features.maxpool([2,2]);
344 
345         auto pooledFeatures = result.evaluate().as!float;
346 
347         assert(pooledFeatures == [
348             5.0f, 4.0f,
349             0.1f, 3.0f
350         ]);
351     }
352 
353     /**
354         Creates an operation representing the derivative of a convolution operation with respect to the feature maps.
355 
356         Params:
357             parentGrad = Gradient of some functions w.r.t. the convolution operation.
358             filters = The filters of the convolution operation.
359             featuresShape = The shape of the features fed into the convolution operations.
360         
361         Returns:
362             The gradient.
363     */
364     Operation convolutionFeaturesGrad(Operation parentGrad, Operation filters, size_t[] featuresShape,
365         size_t[] padding, size_t[] stride, string mod = __MODULE__, size_t line = __LINE__)
366     {
367         return createOperation("convolutionFeaturesGrad", [parentGrad, filters],
368             ["featuresShape": Variant(featuresShape), "padding": Variant(padding), "stride": Variant(stride)],
369             mod, line);
370     }
371 
372     /**
373         Creates an operation representing the derivative of a convolution operation with respect to the filters.
374 
375         Params:
376             parentGrad = Gradient of some functions w.r.t. the convolution operation.
377             features = The features provided to the convolution operation.
378             filtersShape = The shape of the filters provided to the convolution operation.
379         
380         Returns:
381             The gradient.
382     */
383     Operation convolutionFiltersGrad(Operation parentGrad, Operation features, size_t[] filtersShape,
384         size_t[] padding, size_t[] stride, string mod = __MODULE__, size_t line = __LINE__)
385     {
386         return createOperation("convolutionFiltersGrad", [parentGrad, features],
387             ["filtersShape": Variant(filtersShape), "padding": Variant(padding), "stride": Variant(stride)],
388             mod, line);
389     }
390 
391     /**
392         Creates an operation representing the derivative of a maxpool operation with respect to the feature maps.
393 
394         Params:
395             parentGrad = Gradient of some function w.r.t. the maxpool operation.
396             op = The operation being differentiated.
397 
398         Returns:
399             The gradient.
400     */
401     Operation maxpoolGrad(Operation parentGrad, Operation op, string mod = __MODULE__,
402         size_t line = __LINE__)
403     {
404         return createOperation("maxpoolGrad", [parentGrad, op, op.deps[0]],
405             ["featuresShape": Variant(op.deps[0].outputType.shape), "dims": op.attributes["dims"]], mod, line);
406     }
407 
408     /**
409         Creates an operation representing the computation required for a softmax layer.
410 
411         Params:
412             inputs = The inputs to the softmax function.
413         
414         Returns:
415             The operation.
416     */
417     Operation softmax(Operation inputs, string mod = __MODULE__, size_t line = __LINE__)
418     {
419         return createOperation("softmax", [inputs], null, mod, line);
420     }
421 
422     ///
423     unittest
424     {
425         import std.math : approxEqual;
426         import dopt.core : evaluate;
427 
428         auto y = float32([1, 5], [1.0f, 2.0f, 3.0f, 1.0f, 2.0f]).softmax();
429 
430         assert(approxEqual(
431             y.evaluate().as!float,
432             [0.0674508, 0.18335, 0.498398, 0.0674508, 0.18335]
433         ));
434     }
435 
436     /**
437         Creates an operation representing the gradient of the softmax function.
438     */
439     Operation softmaxGrad(Operation parentGrad, Operation op, string mod = __MODULE__,
440         size_t line = __LINE__)
441     {
442         return createOperation("softmaxGrad", [parentGrad, op], null, mod, line);
443     }
444 
445     /**
446         Creates an operation representing the computation required for a ReLU layer.
447 
448         Params:
449             inputs = The inputs to the ReLU function.
450         
451         Returns:
452             The operation.
453     */
454     Operation relu(Operation inputs, string mod = __MODULE__, size_t line = __LINE__)
455     {
456         return createOperation("relu", [inputs], null, mod, line);
457     }
458 
459     Operation reluGrad(Operation parentGrad, Operation op, string mod = __MODULE__,
460         size_t line = __LINE__)
461     {
462         return createOperation("reluGrad", [parentGrad, op, op.deps[0]], null, mod, line);
463     }
464 
465     Operation addBias(Operation input, Operation bias, string mod = __MODULE__, size_t line = __LINE__)
466     {
467         return createOperation("addBias", [input, bias], null, mod, line);
468     }
469 
470     Operation addBiasGrad(Operation parentGrad, string mod = __MODULE__, size_t line = __LINE__)
471     {
472         return createOperation("addBiasGrad", [parentGrad], null, mod, line);
473     }
474 
475     Operation[] batchNormTrain(Operation input, Operation scale, Operation bias, Operation mean, Operation var,
476         double momentum, string mod = __MODULE__, size_t line = __LINE__)
477     {
478         auto bnop = createOperation("batchNormTrain", [input, scale, bias, mean, var], ["momentum" : Variant(momentum)]
479             , mod, line);
480         
481         //bnop has the running mean/variance packed after the actual forward prop value
482 
483         return [
484             bnop.slice([0], [input.volume]).reshape(input.shape),
485             bnop.slice([input.volume], [input.volume + input.shape[1]]),
486             bnop.slice([input.volume + input.shape[1]], [input.volume + 2 * input.shape[1]])
487         ];
488     }
489 
490     Operation batchNormGrad(Operation parentGrad, Operation input, Operation scale, string mod = __MODULE__,
491         size_t line = __LINE__)
492     {
493         return createOperation("batchNormGrad", [parentGrad, input, scale], null, mod, line);
494     }
495 
496     Operation batchNormInference(Operation input, Operation scale, Operation bias, Operation mean, Operation var,
497         string mod = __MODULE__, size_t line = __LINE__)
498     {
499         return createOperation("batchNormInference", [input, scale, bias, mean, var], null, mod, line);
500     }
501 }