1 /**
2     Contains common neural network operations.
3 
4     These operations are currently only implemented for the CUDA backend.
5 
6     Authors: Henry Gouk
7 */
8 module dopt.core.ops.nnet;
9 
10 import dopt.core.ops;
11 import dopt.core.types;
12 
13 import std.array;
14 import std.functional;
15 import std.variant;
16 
17 package
18 {
19     void initialize()
20     {
21         registerOperation("convolution", OpDef(toDelegate(&verifyConvolution), toDelegate(&judgeConvolution)));
22         registerOperation("maxpool", OpDef(toDelegate(&verifyMaxpool), toDelegate(&judgeMaxpool)));
23         registerOperation("convolutionFeaturesGrad", OpDef(toDelegate(&verifyConvolutionFeaturesGrad),
24             toDelegate(&judgeConvolutionFeaturesGrad)));
25         registerOperation("convolutionFiltersGrad", OpDef(toDelegate(&verifyConvolutionFiltersGrad),
26             toDelegate(&judgeConvolutionFiltersGrad)));
27         registerOperation("maxpoolGrad", OpDef(toDelegate(&verifyMaxpoolGrad), toDelegate(&judgeMaxpoolGrad)));
28         registerOperation("softmax", OpDef(toDelegate(&verifySoftmax), toDelegate(&judgeSoftmax)));
29         registerOperation("softmaxGrad", OpDef(toDelegate(&verifySoftmaxGrad), toDelegate(&judgeSoftmaxGrad)));
30         registerOperation("relu", OpDef(toDelegate(&verifyRelu), toDelegate(&judgeRelu)));
31         registerOperation("reluGrad", OpDef(toDelegate(&verifyReluGrad), toDelegate(&judgeReluGrad)));
32         registerOperation("addBias", OpDef(toDelegate(&verifyAddBias), toDelegate(&judgeAddBias)));
33         registerOperation("addBiasGrad", OpDef(toDelegate(&verifyAddBiasGrad), toDelegate(&judgeAddBiasGrad)));
34         registerOperation("batchNormTrain", OpDef(toDelegate(&verifyBatchNormTrain), toDelegate(&judgeBatchNormTrain)));
35         registerOperation("batchNormGrad", OpDef(toDelegate(&verifyBatchNormGrad), toDelegate(&judgeBatchNormGrad)));
36         registerOperation("batchNormInference", OpDef(toDelegate(&verifyBatchNormInference),
37             toDelegate(&judgeBatchNormInference)));
38     }
39 }
40 
41 private
42 {
43     bool verifyConvolution(Operation op)
44     {
45         if(op.deps.length != 2)
46         {
47             return false;
48         }
49 
50         auto imgs = op.deps[0].outputType;
51         auto filters = op.deps[1].outputType;
52 
53         if(imgs.rank != 4 || filters.rank != 4)
54         {
55             return false;
56         }
57 
58         if(imgs.elementType != filters.elementType)
59         {
60             return false;
61         }
62 
63         if(imgs.shape[1] != filters.shape[1])
64         {
65             return false;
66         }
67 
68         return true;
69     }
70 
71     TensorType judgeConvolution(Operation op)
72     {
73         auto imgs = op.deps[0];
74         auto filters = op.deps[1];
75 
76         auto padding = op.attributes["padding"].get!(size_t[]);
77         auto stride = op.attributes["stride"].get!(size_t[]);
78 
79         auto batchSize = imgs.outputType.shape[0];
80         auto outputChannels = filters.outputType.shape[0];
81         auto newHeight = (imgs.outputType.shape[2] + 2 * padding[0] - filters.outputType.shape[2]) / stride[0] + 1;
82         auto newWidth = (imgs.outputType.shape[3] + 2 * padding[1] - filters.outputType.shape[3]) / stride[1] + 1;
83 
84         auto shape = [batchSize, outputChannels, newHeight, newWidth];
85 
86         return TensorType(imgs.outputType.elementType, shape);
87     }
88 
89     bool verifyMaxpool(Operation op)
90     {
91         return op.deps.length == 1
92             && op.deps[0].outputType.rank == 4
93             && op.attributes["dims"].peek!(size_t[]) !is null
94             && op.attributes["dims"].get!(size_t[]).length == 2;
95     }
96 
97     TensorType judgeMaxpool(Operation op)
98     {
99         auto poolDims = op.attributes["dims"].get!(size_t[]);
100         size_t[] shape = new size_t[4];
101         shape[0] = op.deps[0].shape[0];
102         shape[1] = op.deps[0].shape[1];
103         shape[2] = op.deps[0].shape[2] / poolDims[0];
104         shape[3] = op.deps[0].shape[3] / poolDims[1];
105 
106         return TensorType(op.deps[0].outputType.elementType, shape);
107     }
108 
109     bool verifyConvolutionFeaturesGrad(Operation op)
110     {
111         return true;
112     }
113 
114     TensorType judgeConvolutionFeaturesGrad(Operation op)
115     {
116         auto parentGrad = op.deps[0];
117         auto dims = op.attributes["featuresShape"].get!(size_t[]);
118 
119         size_t[] shape = new size_t[4];
120         shape[] = dims[];
121 
122         return TensorType(parentGrad.outputType.elementType, shape);
123     }
124 
125     bool verifyConvolutionFiltersGrad(Operation op)
126     {
127         return true;
128     }
129 
130     TensorType judgeConvolutionFiltersGrad(Operation op)
131     {
132         auto parentGrad = op.deps[0];
133         auto dims = op.attributes["filtersShape"].get!(size_t[]);
134 
135         size_t[] shape = new size_t[4];
136         shape[] = dims[];
137 
138         return TensorType(parentGrad.outputType.elementType, shape);
139     }
140 
141     bool verifyMaxpoolGrad(Operation op)
142     {
143         return true;
144     }
145 
146     TensorType judgeMaxpoolGrad(Operation op)
147     {
148         auto parentGrad = op.deps[0];
149         auto dims = op.attributes["featuresShape"].get!(size_t[]);
150 
151         size_t[] shape = new size_t[4];
152         shape[] = dims[];
153 
154         return TensorType(parentGrad.outputType.elementType, shape);
155     }
156 
157     bool verifySoftmax(Operation op)
158     {
159         return op.deps.length == 1;
160     }
161 
162     TensorType judgeSoftmax(Operation op)
163     {
164         return TensorType(op.deps[0].elementType, op.deps[0].shape);
165     }
166 
167     bool verifySoftmaxGrad(Operation op)
168     {
169         return op.deps.length == 2;
170     }
171 
172     TensorType judgeSoftmaxGrad(Operation op)
173     {
174         return TensorType(op.deps[1].elementType, op.deps[1].shape);
175     }
176 
177     bool verifyRelu(Operation op)
178     {
179         return op.deps.length == 1;
180     }
181 
182     TensorType judgeRelu(Operation op)
183     {
184         return TensorType(op.deps[0].elementType, op.deps[0].shape);
185     }
186 
187     bool verifyReluGrad(Operation op)
188     {
189         return op.deps.length == 3;
190     }
191 
192     TensorType judgeReluGrad(Operation op)
193     {
194         return TensorType(op.deps[1].elementType, op.deps[1].shape);
195     }
196 
197     bool verifyAddBias(Operation op)
198     {
199         return true;
200     }
201 
202     TensorType judgeAddBias(Operation op)
203     {
204         return op.deps[0].outputType;
205     }
206 
207     bool verifyAddBiasGrad(Operation op)
208     {
209         return true;
210     }
211 
212     TensorType judgeAddBiasGrad(Operation op)
213     {
214         return TensorType(op.deps[0].elementType, [op.deps[0].shape[1]]);
215     }
216 
217     bool verifyBatchNormTrain(Operation op)
218     {
219         return true;
220     }
221 
222     TensorType judgeBatchNormTrain(Operation op)
223     {
224         return TensorType(op.deps[0].elementType, [op.deps[0].volume + 2 * op.deps[0].shape[1]]);
225     }
226 
227     bool verifyBatchNormGrad(Operation op)
228     {
229         return true;
230     }
231 
232     TensorType judgeBatchNormGrad(Operation op)
233     {
234         return TensorType(op.deps[0].elementType, [op.deps[0].volume + op.deps[1].volume + op.deps[2].volume]);
235     }
236 
237     bool verifyBatchNormInference(Operation op)
238     {
239         return true;
240     }
241 
242     TensorType judgeBatchNormInference(Operation op)
243     {
244         return op.deps[0].outputType;
245     }
246 }
247 
248 public
249 {
250     /**
251         Creates a convolution operation that performs the computation required to implement a convolutional layer.
252 
253         Currently this operation only implements 2D convolutions.
254 
255         Params:
256             features = A tensor containing a batch of input feature maps.
257             filters = A tensor containing the filters that will be convolved with the feature maps.
258         
259         Returns:
260             An operation representing convolutions of input imgs with some kernels.
261     */
262     Operation convolution(Operation features, Operation filters, size_t[] padding = [0, 0], size_t[] stride = [1, 1],
263         string mod = __MODULE__, size_t line = __LINE__)
264     {
265         return createOperation("convolution", [features, filters],
266             ["padding": Variant(padding), "stride": Variant(stride)], mod, line);
267     }
268 
269     ///
270     unittest
271     {
272         import dopt.core : evaluate;
273 
274         auto features = float32([1, 1, 3, 5], [
275             1.0f, 1.0f, 1.0f, 0.0f, 0.0f,
276             1.0f, 1.0f, 1.0f, 0.0f, 0.0f,
277             1.0f, 1.0f, 1.0f, 0.0f, 0.0f
278         ]);
279 
280         auto filters = float32([1, 1, 1, 2], [
281             -1.0f, 1.0f
282         ]);
283 
284         auto result = convolution(features, filters);
285 
286         auto edges = result.evaluate().get!float;
287 
288         assert(edges == [
289             0.0f, 0.0f, 1.0f, 0.0f,
290             0.0f, 0.0f, 1.0f, 0.0f,
291             0.0f, 0.0f, 1.0f, 0.0f
292         ]);
293     }
294 
295     /**
296         Creates a transposed convolution operation (also known, incorrectly, as deconvolution).
297 
298         Params:
299             features = The feature maps.
300             filters = The filters to be convolved with the feature maps.
301         
302         Returns:
303             The operation.
304     */
305     Operation convolutionTranspose(Operation features, Operation filters, size_t[] padding = [0, 0],
306         size_t[] stride = [1, 1], string mod = __MODULE__, size_t line = __LINE__)
307     {
308         auto outShape = features.shape.dup;
309         outShape[2 .. $] -= 1;
310         outShape[2 .. $] *= stride[];
311         outShape[2 .. $] += filters.shape[2 .. $] - 2 * padding[];
312         outShape[1] = filters.shape[1];
313 
314         return convolutionFeaturesGrad(features, filters, outShape, padding, stride, mod, line);
315     }
316 
317     /**
318         Creates a max pool operation that performs the computation required to implement a max pooling layer.
319 
320         Params:
321             features = A tensor containing a batch of input feature maps.
322             dims = An array of pool dims.
323 
324         Returns:
325             An operation representing a max pool computation.
326     */
327     Operation maxpool(Operation features, size_t[] dims, string mod = __MODULE__, size_t line = __LINE__)
328     {
329         return createOperation("maxpool", [features], ["dims": Variant(dims)], mod, line);
330     }
331 
332     ///
333     unittest
334     {
335         import dopt.core : evaluate;
336 
337         auto features = float32([1, 1, 4, 4], [
338             1.0f, 2.0f, 4.0f, 3.0f,
339             5.0f, 3.0f, 2.0f, 2.0f,
340             0.1f, -4.0f, 3.0f, 2.0f,
341             0.0f, 0.0f, 2.0f, 2.0f
342         ]);
343 
344         auto result = features.maxpool([2,2]);
345 
346         auto pooledFeatures = result.evaluate().get!float;
347 
348         assert(pooledFeatures == [
349             5.0f, 4.0f,
350             0.1f, 3.0f
351         ]);
352     }
353 
354     /**
355         Creates an operation representing the derivative of a convolution operation with respect to the feature maps.
356 
357         Params:
358             parentGrad = Gradient of some functions w.r.t. the convolution operation.
359             filters = The filters of the convolution operation.
360             featuresShape = The shape of the features fed into the convolution operations.
361         
362         Returns:
363             The gradient.
364     */
365     Operation convolutionFeaturesGrad(Operation parentGrad, Operation filters, size_t[] featuresShape,
366         size_t[] padding, size_t[] stride, string mod = __MODULE__, size_t line = __LINE__)
367     {
368         return createOperation("convolutionFeaturesGrad", [parentGrad, filters],
369             ["featuresShape": Variant(featuresShape), "padding": Variant(padding), "stride": Variant(stride)],
370             mod, line);
371     }
372 
373     /**
374         Creates an operation representing the derivative of a convolution operation with respect to the filters.
375 
376         Params:
377             parentGrad = Gradient of some functions w.r.t. the convolution operation.
378             features = The features provided to the convolution operation.
379             filtersShape = The shape of the filters provided to the convolution operation.
380         
381         Returns:
382             The gradient.
383     */
384     Operation convolutionFiltersGrad(Operation parentGrad, Operation features, size_t[] filtersShape,
385         size_t[] padding, size_t[] stride, string mod = __MODULE__, size_t line = __LINE__)
386     {
387         return createOperation("convolutionFiltersGrad", [parentGrad, features],
388             ["filtersShape": Variant(filtersShape), "padding": Variant(padding), "stride": Variant(stride)],
389             mod, line);
390     }
391 
392     /**
393         Creates an operation representing the derivative of a maxpool operation with respect to the feature maps.
394 
395         Params:
396             parentGrad = Gradient of some function w.r.t. the maxpool operation.
397             op = The operation being differentiated.
398 
399         Returns:
400             The gradient.
401     */
402     Operation maxpoolGrad(Operation parentGrad, Operation op, string mod = __MODULE__,
403         size_t line = __LINE__)
404     {
405         return createOperation("maxpoolGrad", [parentGrad, op, op.deps[0]],
406             ["featuresShape": Variant(op.deps[0].outputType.shape), "dims": op.attributes["dims"]], mod, line);
407     }
408 
409     /**
410         Creates an operation representing the computation required for a softmax layer.
411 
412         Params:
413             inputs = The inputs to the softmax function.
414         
415         Returns:
416             The operation.
417     */
418     Operation softmax(Operation inputs, string mod = __MODULE__, size_t line = __LINE__)
419     {
420         return createOperation("softmax", [inputs], null, mod, line);
421     }
422 
423     ///
424     unittest
425     {
426         import std.math : approxEqual;
427         import dopt.core : evaluate;
428 
429         auto y = float32([1, 5], [1.0f, 2.0f, 3.0f, 1.0f, 2.0f]).softmax();
430 
431         assert(approxEqual(
432             y.evaluate().get!float,
433             [0.0674508, 0.18335, 0.498398, 0.0674508, 0.18335]
434         ));
435     }
436 
437     /**
438         Creates an operation representing the gradient of the softmax function.
439     */
440     Operation softmaxGrad(Operation parentGrad, Operation op, string mod = __MODULE__,
441         size_t line = __LINE__)
442     {
443         return createOperation("softmaxGrad", [parentGrad, op], null, mod, line);
444     }
445 
446     /**
447         Creates an operation representing the computation required for a ReLU layer.
448 
449         Params:
450             inputs = The inputs to the ReLU function.
451         
452         Returns:
453             The operation.
454     */
455     Operation relu(Operation inputs, string mod = __MODULE__, size_t line = __LINE__)
456     {
457         return createOperation("relu", [inputs], null, mod, line);
458     }
459 
460     Operation reluGrad(Operation parentGrad, Operation op, string mod = __MODULE__,
461         size_t line = __LINE__)
462     {
463         return createOperation("reluGrad", [parentGrad, op, op.deps[0]], null, mod, line);
464     }
465 
466     Operation addBias(Operation input, Operation bias, string mod = __MODULE__, size_t line = __LINE__)
467     {
468         return createOperation("addBias", [input, bias], null, mod, line);
469     }
470 
471     Operation addBiasGrad(Operation parentGrad, string mod = __MODULE__, size_t line = __LINE__)
472     {
473         return createOperation("addBiasGrad", [parentGrad], null, mod, line);
474     }
475 
476     Operation[] batchNormTrain(Operation input, Operation scale, Operation bias, Operation mean, Operation var,
477         double momentum, string mod = __MODULE__, size_t line = __LINE__)
478     {
479         auto bnop = createOperation("batchNormTrain", [input, scale, bias, mean, var], ["momentum" : Variant(momentum)]
480             , mod, line);
481         
482         //bnop has the running mean/variance packed after the actual forward prop value
483 
484         return [
485             bnop.slice([0], [input.volume]).reshape(input.shape),
486             bnop.slice([input.volume], [input.volume + input.shape[1]]),
487             bnop.slice([input.volume + input.shape[1]], [input.volume + 2 * input.shape[1]])
488         ];
489     }
490 
491     Operation batchNormGrad(Operation parentGrad, Operation input, Operation scale, string mod = __MODULE__,
492         size_t line = __LINE__)
493     {
494         return createOperation("batchNormGrad", [parentGrad, input, scale], null, mod, line);
495     }
496 
497     Operation batchNormInference(Operation input, Operation scale, Operation bias, Operation mean, Operation var,
498         string mod = __MODULE__, size_t line = __LINE__)
499     {
500         return createOperation("batchNormInference", [input, scale, bias, mean, var], null, mod, line);
501     }
502 }