1 /**
2     Contains common neural network operations.
3 
4     These operations are currently only implemented for the CUDA backend.
5 
6     Authors: Henry Gouk
7 */
8 module dopt.core.ops.nnet;
9 
10 import dopt.core.ops;
11 import dopt.core.types;
12 
13 import std.array;
14 import std.functional;
15 import std.variant;
16 
17 package
18 {
19     void initialize()
20     {
21         registerOperation("convolution", OpDef(toDelegate(&verifyConvolution), toDelegate(&judgeConvolution)));
22         registerOperation("maxpool", OpDef(toDelegate(&verifyMaxpool), toDelegate(&judgeMaxpool)));
23         registerOperation("convolutionFeaturesGrad", OpDef(toDelegate(&verifyConvolutionFeaturesGrad),
24             toDelegate(&judgeConvolutionFeaturesGrad)));
25         registerOperation("convolutionFiltersGrad", OpDef(toDelegate(&verifyConvolutionFiltersGrad),
26             toDelegate(&judgeConvolutionFiltersGrad)));
27         registerOperation("maxpoolGrad", OpDef(toDelegate(&verifyMaxpoolGrad), toDelegate(&judgeMaxpoolGrad)));
28         registerOperation("softmax", OpDef(toDelegate(&verifySoftmax), toDelegate(&judgeSoftmax)));
29         registerOperation("softmaxGrad", OpDef(toDelegate(&verifySoftmaxGrad), toDelegate(&judgeSoftmaxGrad)));
30         registerOperation("addBias", OpDef(toDelegate(&verifyAddBias), toDelegate(&judgeAddBias)));
31         registerOperation("addBiasGrad", OpDef(toDelegate(&verifyAddBiasGrad), toDelegate(&judgeAddBiasGrad)));
32         registerOperation("batchNormTrain", OpDef(toDelegate(&verifyBatchNormTrain), toDelegate(&judgeBatchNormTrain)));
33         registerOperation("batchNormGrad", OpDef(toDelegate(&verifyBatchNormGrad), toDelegate(&judgeBatchNormGrad)));
34     }
35 }
36 
37 private
38 {
39     bool verifyConvolution(Operation op)
40     {
41         if(op.deps.length != 2)
42         {
43             return false;
44         }
45 
46         auto imgs = op.deps[0].outputType;
47         auto filters = op.deps[1].outputType;
48 
49         if(imgs.rank != 4 || filters.rank != 4)
50         {
51             return false;
52         }
53 
54         if(imgs.elementType != filters.elementType)
55         {
56             return false;
57         }
58 
59         if(imgs.shape[1] != filters.shape[1])
60         {
61             return false;
62         }
63 
64         return true;
65     }
66 
67     TensorType judgeConvolution(Operation op)
68     {
69         auto imgs = op.deps[0];
70         auto filters = op.deps[1];
71 
72         auto padding = op.attributes["padding"].get!(size_t[]);
73         auto stride = op.attributes["stride"].get!(size_t[]);
74 
75         auto batchSize = imgs.outputType.shape[0];
76         auto outputChannels = filters.outputType.shape[0];
77         auto newHeight = (imgs.outputType.shape[2] + 2 * padding[0] - filters.outputType.shape[2]) / stride[0] + 1;
78         auto newWidth = (imgs.outputType.shape[3] + 2 * padding[1] - filters.outputType.shape[3]) / stride[1] + 1;
79 
80         auto shape = [batchSize, outputChannels, newHeight, newWidth];
81 
82         return TensorType(imgs.outputType.elementType, shape);
83     }
84 
85     bool verifyMaxpool(Operation op)
86     {
87         return op.deps.length == 1
88             && op.deps[0].outputType.rank == 4
89             && op.attributes["dims"].peek!(size_t[]) !is null
90             && op.attributes["dims"].get!(size_t[]).length == 2;
91     }
92 
93     TensorType judgeMaxpool(Operation op)
94     {
95         auto poolDims = op.attributes["dims"].get!(size_t[]);
96         size_t[] shape = new size_t[4];
97         shape[0] = op.deps[0].shape[0];
98         shape[1] = op.deps[0].shape[1];
99         shape[2] = op.deps[0].shape[2] / poolDims[0];
100         shape[3] = op.deps[0].shape[3] / poolDims[1];
101 
102         return TensorType(op.deps[0].outputType.elementType, shape);
103     }
104 
105     bool verifyConvolutionFeaturesGrad(Operation op)
106     {
107         return true;
108     }
109 
110     TensorType judgeConvolutionFeaturesGrad(Operation op)
111     {
112         auto parentGrad = op.deps[0];
113         auto dims = op.attributes["featuresShape"].get!(size_t[]);
114 
115         size_t[] shape = new size_t[4];
116         shape[] = dims[];
117 
118         return TensorType(parentGrad.outputType.elementType, shape);
119     }
120 
121     bool verifyConvolutionFiltersGrad(Operation op)
122     {
123         return true;
124     }
125 
126     TensorType judgeConvolutionFiltersGrad(Operation op)
127     {
128         auto parentGrad = op.deps[0];
129         auto dims = op.attributes["filtersShape"].get!(size_t[]);
130 
131         size_t[] shape = new size_t[4];
132         shape[] = dims[];
133 
134         return TensorType(parentGrad.outputType.elementType, shape);
135     }
136 
137     bool verifyMaxpoolGrad(Operation op)
138     {
139         return true;
140     }
141 
142     TensorType judgeMaxpoolGrad(Operation op)
143     {
144         auto parentGrad = op.deps[0];
145         auto dims = op.attributes["featuresShape"].get!(size_t[]);
146 
147         size_t[] shape = new size_t[4];
148         shape[] = dims[];
149 
150         return TensorType(parentGrad.outputType.elementType, shape);
151     }
152 
153     bool verifySoftmax(Operation op)
154     {
155         return op.deps.length == 1;
156     }
157 
158     TensorType judgeSoftmax(Operation op)
159     {
160         return TensorType(op.deps[0].elementType, op.deps[0].shape);
161     }
162 
163     bool verifySoftmaxGrad(Operation op)
164     {
165         return op.deps.length == 2;
166     }
167 
168     TensorType judgeSoftmaxGrad(Operation op)
169     {
170         return TensorType(op.deps[1].elementType, op.deps[1].shape);
171     }
172 
173     bool verifyAddBias(Operation op)
174     {
175         return true;
176     }
177 
178     TensorType judgeAddBias(Operation op)
179     {
180         return op.deps[0].outputType;
181     }
182 
183     bool verifyAddBiasGrad(Operation op)
184     {
185         return true;
186     }
187 
188     TensorType judgeAddBiasGrad(Operation op)
189     {
190         return TensorType(op.deps[0].elementType, [op.deps[0].shape[1]]);
191     }
192 
193     bool verifyBatchNormTrain(Operation op)
194     {
195         return true;
196     }
197 
198     TensorType judgeBatchNormTrain(Operation op)
199     {
200         return op.deps[0].outputType;
201     }
202 
203     bool verifyBatchNormGrad(Operation op)
204     {
205         return true;
206     }
207 
208     TensorType judgeBatchNormGrad(Operation op)
209     {
210         return TensorType(op.deps[0].elementType, [op.deps[0].volume + op.deps[1].volume + op.deps[2].volume]);
211     }
212 }
213 
214 public
215 {
216     /**
217         Creates a convolution operation that performs the computation required to implement a convolutional layer.
218 
219         Currently this operation only implements 2D convolutions.
220 
221         Params:
222             features = A tensor containing a batch of input feature maps.
223             filters = A tensor containing the filters that will be convolved with the feature maps.
224         
225         Returns:
226             An operation representing convolutions of input imgs with some kernels.
227     */
228     Operation convolution(Operation features, Operation filters, size_t[] padding = [0, 0], size_t[] stride = [1, 1],
229         string mod = __MODULE__, size_t line = __LINE__)
230     {
231         return createOperation("convolution", [features, filters],
232             ["padding": Variant(padding), "stride": Variant(stride)], mod, line);
233     }
234 
235     ///
236     unittest
237     {
238         import dopt.core.cuda : evaluateCUDA;
239 
240         auto features = float32([1, 1, 3, 5], [
241             1.0f, 1.0f, 1.0f, 0.0f, 0.0f,
242             1.0f, 1.0f, 1.0f, 0.0f, 0.0f,
243             1.0f, 1.0f, 1.0f, 0.0f, 0.0f
244         ]);
245 
246         auto filters = float32([1, 1, 1, 2], [
247             -1.0f, 1.0f
248         ]);
249 
250         auto result = convolution(features, filters);
251 
252         auto edges = result.evaluateCUDA().as!float;
253 
254         assert(edges == [
255             0.0f, 0.0f, 1.0f, 0.0f,
256             0.0f, 0.0f, 1.0f, 0.0f,
257             0.0f, 0.0f, 1.0f, 0.0f
258         ]);
259     }
260 
261     /**
262         Creates a transposed convolution operation (also known, incorrectly, as deconvolution).
263 
264         Params:
265             features = The feature maps.
266             filters = The filters to be convolved with the feature maps.
267         
268         Returns:
269             The operation.
270     */
271     Operation convolutionTranspose(Operation features, Operation filters, size_t[] padding = [0, 0],
272         size_t[] stride = [1, 1], string mod = __MODULE__, size_t line = __LINE__)
273     {
274         auto outShape = features.shape.dup;
275         outShape[2 .. $] -= 1;
276         outShape[2 .. $] *= stride[];
277         outShape[2 .. $] += filters.shape[2 .. $] - 2 * padding[];
278 
279         return convolutionFeaturesGrad(features, filters, outShape, padding, stride, mod, line);
280     }
281 
282     /**
283         Creates a max pool operation that performs the computation required to implement a max pooling layer.
284 
285         Params:
286             features = A tensor containing a batch of input feature maps.
287             dims = An array of pool dims.
288 
289         Returns:
290             An operation representing a max pool computation.
291     */
292     Operation maxpool(Operation features, size_t[] dims, string mod = __MODULE__, size_t line = __LINE__)
293     {
294         return createOperation("maxpool", [features], ["dims": Variant(dims)], mod, line);
295     }
296 
297     ///
298     unittest
299     {
300         import dopt.core.cuda : evaluateCUDA;
301 
302         auto features = float32([1, 1, 4, 4], [
303             1.0f, 2.0f, 4.0f, 3.0f,
304             5.0f, 3.0f, 2.0f, 2.0f,
305             0.1f, -4.0f, 3.0f, 2.0f,
306             0.0f, 0.0f, 2.0f, 2.0f
307         ]);
308 
309         auto result = features.maxpool([2,2]);
310 
311         auto pooledFeatures = result.evaluateCUDA().as!float;
312 
313         assert(pooledFeatures == [
314             5.0f, 4.0f,
315             0.1f, 3.0f
316         ]);
317     }
318 
319     /**
320         Creates an operation representing the derivative of a convolution operation with respect to the feature maps.
321 
322         Params:
323             parentGrad = Gradient of some functions w.r.t. the convolution operation.
324             filters = The filters of the convolution operation.
325             featuresShape = The shape of the features fed into the convolution operations.
326         
327         Returns:
328             The gradient.
329     */
330     Operation convolutionFeaturesGrad(Operation parentGrad, Operation filters, size_t[] featuresShape,
331         size_t[] padding, size_t[] stride, string mod = __MODULE__, size_t line = __LINE__)
332     {
333         return createOperation("convolutionFeaturesGrad", [parentGrad, filters],
334             ["featuresShape": Variant(featuresShape), "padding": Variant(padding), "stride": Variant(stride)],
335             mod, line);
336     }
337 
338     /**
339         Creates an operation representing the derivative of a convolution operation with respect to the filters.
340 
341         Params:
342             parentGrad = Gradient of some functions w.r.t. the convolution operation.
343             features = The features provided to the convolution operation.
344             filtersShape = The shape of the filters provided to the convolution operation.
345         
346         Returns:
347             The gradient.
348     */
349     Operation convolutionFiltersGrad(Operation parentGrad, Operation features, size_t[] filtersShape,
350         size_t[] padding, size_t[] stride, string mod = __MODULE__, size_t line = __LINE__)
351     {
352         return createOperation("convolutionFiltersGrad", [parentGrad, features],
353             ["filtersShape": Variant(filtersShape), "padding": Variant(padding), "stride": Variant(stride)],
354             mod, line);
355     }
356 
357     /**
358         Creates an operation representing the derivative of a maxpool operation with respect to the feature maps.
359 
360         Params:
361             parentGrad = Gradient of some function w.r.t. the maxpool operation.
362             op = The operation being differentiated.
363 
364         Returns:
365             The gradient.
366     */
367     Operation maxpoolGrad(Operation parentGrad, Operation op, string mod = __MODULE__,
368         size_t line = __LINE__)
369     {
370         return createOperation("maxpoolGrad", [parentGrad, op, op.deps[0]],
371             ["featuresShape": Variant(op.deps[0].outputType.shape), "dims": op.attributes["dims"]], mod, line);
372     }
373 
374     /**
375         Creates an operation representing the computation required for a softmax layer.
376 
377         Params:
378             inputs = The inputs to the softmax function.
379         
380         Returns:
381             The operation.
382     */
383     Operation softmax(Operation inputs, string mod = __MODULE__, size_t line = __LINE__)
384     {
385         return createOperation("softmax", [inputs], null, mod, line);
386     }
387 
388     ///
389     unittest
390     {
391         import std.math : approxEqual;
392         import dopt.core.cpu : evaluateCUDA;
393 
394         auto y = float32([1, 5], [1.0f, 2.0f, 3.0f, 1.0f, 2.0f]).softmax();
395 
396         assert(approxEqual(
397             y.evaluateCUDA().as!float,
398             [0.0674508, 0.18335, 0.498398, 0.0674508, 0.18335]
399         ));
400     }
401 
402     /**
403         Creates an operation representing the gradient of the softmax function.
404     */
405     Operation softmaxGrad(Operation parentGrad, Operation op, string mod = __MODULE__,
406         size_t line = __LINE__)
407     {
408         return createOperation("softmaxGrad", [parentGrad, op], null, mod, line);
409     }
410 
411     Operation addBias(Operation input, Operation bias, string mod = __MODULE__, size_t line = __LINE__)
412     {
413         return createOperation("addBias", [input, bias], null, mod, line);
414     }
415 
416     Operation addBiasGrad(Operation parentGrad, string mod = __MODULE__, size_t line = __LINE__)
417     {
418         return createOperation("addBiasGrad", [parentGrad], null, mod, line);
419     }
420 
421     Operation batchNormTrain(Operation input, Operation scale, Operation bias, string mod = __MODULE__,
422         size_t line = __LINE__)
423     {
424         return createOperation("batchNormTrain", [input, scale, bias], null, mod, line);
425     }
426 
427     Operation batchNormGrad(Operation parentGrad, Operation input, Operation scale, string mod = __MODULE__,
428         size_t line = __LINE__)
429     {
430         return createOperation("batchNormGrad", [parentGrad, input, scale], null, mod, line);
431     }
432 }