1 /**
2     Contains an implementation of convolutional layers.
3     
4     Authors: Henry Gouk
5 */
6 module dopt.nnet.layers.conv;
7 
8 import dopt.core;
9 import dopt.nnet;
10 import dopt.nnet.util;
11 import dopt.online;
12 
13 /**
14     Encapsulates the additional options for a $(D Layer) created with conv2D.
15 */
16 class Conv2DOptions
17 {
18     this()
19     {
20         _useBias = true;
21         _filterInit = heGaussianInit();
22         _biasInit = constantInit(0.0f);
23         _padding = [0, 0];
24         _stride = [1, 1];
25         _weightDecay = 0.0f;
26         _maxgain = float.infinity;
27         _spectralDecay = 0.0f;
28 
29     }
30 
31     mixin(dynamicProperties(
32         "size_t[]", "padding",
33         "size_t[]", "stride",
34         "ParamInitializer", "filterInit",
35         "ParamInitializer", "biasInit",
36         "Projection", "filterProj",
37         "Projection", "biasProj",
38         "float", "maxgain",
39         "float", "weightDecay",
40         "float", "spectralDecay",
41         "bool", "useBias"
42     ));
43 }
44 
45 ///
46 unittest
47 {
48     //Creates a Conv2DOptions object with the default parameter values
49     auto opts = new Conv2DOptions()
50                .padding([0, 0])
51                .stride([1, 1])
52                .filterInit(heGaussianInit())
53                .biasInit(constantInit(0.0f))
54                .filterProj(null)
55                .biasProj(null)
56                .weightDecay(0.0f)
57                .useBias(true);
58     
59     //The fields can also be accessed again later
60     assert(opts.padding == [0, 0]);
61     assert(opts.stride == [1, 1]);
62 }
63 
64 /**
65     Creates a convolutional layer typically found in a convnet used for image classification.
66 
67     Params:
68         input = The previous (i.e., input) layer.
69         outputChannels = The number of feature maps that this layer should produce.
70         filterDims = The size of the kernels that should be convolved with the inputs.
71         opts = Additional options, with sensible defaults.
72     
73     Returns:
74         The new convolutional $(D Layer).
75 */
76 Layer conv2D(Layer input, size_t outputChannels, size_t[] filterDims, Conv2DOptions opts = new Conv2DOptions())
77 {
78     auto padding = opts.padding;
79     auto stride = opts.stride;
80     auto filterInit = opts.filterInit;
81     auto biasInit = opts.biasInit;
82     auto filterProj = opts.filterProj;
83     auto biasProj = opts.biasProj;
84     auto weightDecay = opts.weightDecay;
85     auto useBias = opts.useBias;
86     auto spectralDecay = opts.spectralDecay;
87 
88     auto x = input.output;
89     auto xTr = input.trainOutput;
90 
91     auto filters = float32([outputChannels, x.shape[1]] ~ filterDims);
92     filterInit(filters);
93 
94     import std.math : isNaN;
95 
96     Operation safeAdd(Operation op1, Operation op2)
97     {
98         if(op1 is null && op2 is null)
99         {
100             return null;
101         }
102         else if(op1 is null)
103         {
104             return op2;
105         }
106         else if(op2 is null)
107         {
108             return op1;
109         }
110         else
111         {
112             return op1 + op2;
113         }
114     }
115 
116     Operation filterLoss;
117     filterLoss = safeAdd(filterLoss, (weightDecay == 0.0f) ? null : (weightDecay * sum(filters * filters)));
118     filterLoss = safeAdd(
119         filterLoss,
120         (spectralDecay == 0.0f) ? null : spectralDecay * spectralNorm(filters, padding, stride)
121     );
122 
123     auto y = x.convolution(filters, padding, stride);
124     auto yTr = xTr.convolution(filters, padding, stride);
125 
126     auto before = xTr.reshape([xTr.shape[0], xTr.volume / xTr.shape[0]]);
127     auto after = yTr.reshape([yTr.shape[0], yTr.volume / yTr.shape[0]]);
128 
129     Operation maxGainProj(Operation newWeights)
130     {
131         auto beforeNorms = sum(before * before, [1]) + 1e-8;
132         auto afterNorms = sum(after * after, [1]) + 1e-8;
133         auto mg = maxElement(sqrt(afterNorms / beforeNorms));
134 
135         if(opts.filterProj is null)
136         {
137             return newWeights * (1.0f / max(float32Constant([], [1.0f]), mg / opts.maxgain));
138         }
139         else
140         {
141             return opts._filterProj(newWeights * (1.0f / max(float32Constant([], [1.0f]), mg / opts.maxgain)));
142         }
143     }
144 
145     if(opts.maxgain != float.infinity)
146     {
147         filterProj = &maxGainProj;
148     }
149 
150     Parameter[] params = [
151             Parameter(filters, filterLoss, filterProj)
152         ];
153 
154     if(useBias)
155     {
156         auto biases = float32([outputChannels]);
157         biasInit(biases);
158 
159         y = y.addBias(biases);
160         yTr = yTr.addBias(biases);
161 
162         params ~= Parameter(biases, null, biasProj);
163     }
164 
165     return new Layer([input], y, yTr, params);
166 }
167 
168 /**
169     Note that this function computes the incorrect norm used by Yoshida and Miyato (2017).
170 
171     Yoshida, Y., & Miyato, T. (2017). Spectral Norm Regularization for Improving the Generalizability of Deep Learning.
172     arXiv preprint arXiv:1705.10941.
173 */
174 private Operation spectralNorm(Operation filters, size_t[] padding, size_t[] stride, size_t numIts = 1)
175 {
176     filters = filters.reshape([filters.shape[0], filters.volume / filters.shape[0]]);
177     auto x = uniformSample([filters.shape[1], 1]) * 2.0f - 1.0f;
178 
179     for(int i = 0; i < numIts; i++)
180     {
181         x = matmul(filters.transpose([1, 0]), matmul(filters, x));
182     }
183 
184     auto v = x / sqrt(sum(x * x));
185     auto y = matmul(filters, v);
186 
187     return sum(y * y);
188 }