1 /** 2 Contains an implementation of convolutional layers. 3 4 Authors: Henry Gouk 5 */ 6 module dopt.nnet.layers.conv; 7 8 import dopt.core; 9 import dopt.nnet; 10 import dopt.nnet.util; 11 import dopt.online; 12 13 /** 14 Encapsulates the additional options for a $(D Layer) created with conv2D. 15 */ 16 class Conv2DOptions 17 { 18 this() 19 { 20 _useBias = true; 21 _filterInit = heGaussianInit(); 22 _biasInit = constantInit(0.0f); 23 _padding = [0, 0]; 24 _stride = [1, 1]; 25 _weightDecay = 0.0f; 26 _maxgain = float.infinity; 27 _spectralDecay = 0.0f; 28 29 } 30 31 mixin(dynamicProperties( 32 "size_t[]", "padding", 33 "size_t[]", "stride", 34 "ParamInitializer", "filterInit", 35 "ParamInitializer", "biasInit", 36 "Projection", "filterProj", 37 "Projection", "biasProj", 38 "float", "maxgain", 39 "float", "weightDecay", 40 "float", "spectralDecay", 41 "bool", "useBias" 42 )); 43 } 44 45 /// 46 unittest 47 { 48 //Creates a Conv2DOptions object with the default parameter values 49 auto opts = new Conv2DOptions() 50 .padding([0, 0]) 51 .stride([1, 1]) 52 .filterInit(heGaussianInit()) 53 .biasInit(constantInit(0.0f)) 54 .filterProj(null) 55 .biasProj(null) 56 .weightDecay(0.0f) 57 .useBias(true); 58 59 //The fields can also be accessed again later 60 assert(opts.padding == [0, 0]); 61 assert(opts.stride == [1, 1]); 62 } 63 64 /** 65 Creates a convolutional layer typically found in a convnet used for image classification. 66 67 Params: 68 input = The previous (i.e., input) layer. 69 outputChannels = The number of feature maps that this layer should produce. 70 filterDims = The size of the kernels that should be convolved with the inputs. 71 opts = Additional options, with sensible defaults. 72 73 Returns: 74 The new convolutional $(D Layer). 75 */ 76 Layer conv2D(Layer input, size_t outputChannels, size_t[] filterDims, Conv2DOptions opts = new Conv2DOptions()) 77 { 78 auto padding = opts.padding; 79 auto stride = opts.stride; 80 auto filterInit = opts.filterInit; 81 auto biasInit = opts.biasInit; 82 auto filterProj = opts.filterProj; 83 auto biasProj = opts.biasProj; 84 auto weightDecay = opts.weightDecay; 85 auto useBias = opts.useBias; 86 auto spectralDecay = opts.spectralDecay; 87 88 auto x = input.output; 89 auto xTr = input.trainOutput; 90 91 auto filters = float32([outputChannels, x.shape[1]] ~ filterDims); 92 filterInit(filters); 93 94 import std.math : isNaN; 95 96 Operation safeAdd(Operation op1, Operation op2) 97 { 98 if(op1 is null && op2 is null) 99 { 100 return null; 101 } 102 else if(op1 is null) 103 { 104 return op2; 105 } 106 else if(op2 is null) 107 { 108 return op1; 109 } 110 else 111 { 112 return op1 + op2; 113 } 114 } 115 116 Operation filterLoss; 117 filterLoss = safeAdd(filterLoss, (weightDecay == 0.0f) ? null : (weightDecay * sum(filters * filters))); 118 filterLoss = safeAdd( 119 filterLoss, 120 (spectralDecay == 0.0f) ? null : spectralDecay * spectralNorm(filters, padding, stride) 121 ); 122 123 auto y = x.convolution(filters, padding, stride); 124 auto yTr = xTr.convolution(filters, padding, stride); 125 126 auto before = xTr.reshape([xTr.shape[0], xTr.volume / xTr.shape[0]]); 127 auto after = yTr.reshape([yTr.shape[0], yTr.volume / yTr.shape[0]]); 128 129 Operation maxGainProj(Operation newWeights) 130 { 131 auto beforeNorms = sum(before * before, [1]) + 1e-8; 132 auto afterNorms = sum(after * after, [1]) + 1e-8; 133 auto mg = maxElement(sqrt(afterNorms / beforeNorms)); 134 135 if(opts.filterProj is null) 136 { 137 return newWeights * (1.0f / max(float32Constant([], [1.0f]), mg / opts.maxgain)); 138 } 139 else 140 { 141 return opts._filterProj(newWeights * (1.0f / max(float32Constant([], [1.0f]), mg / opts.maxgain))); 142 } 143 } 144 145 if(opts.maxgain != float.infinity) 146 { 147 filterProj = &maxGainProj; 148 } 149 150 Parameter[] params = [ 151 Parameter(filters, filterLoss, filterProj) 152 ]; 153 154 if(useBias) 155 { 156 auto biases = float32([outputChannels]); 157 biasInit(biases); 158 159 y = y.addBias(biases); 160 yTr = yTr.addBias(biases); 161 162 params ~= Parameter(biases, null, biasProj); 163 } 164 165 return new Layer([input], y, yTr, params); 166 } 167 168 /** 169 Note that this function computes the incorrect norm used by Yoshida and Miyato (2017). 170 171 Yoshida, Y., & Miyato, T. (2017). Spectral Norm Regularization for Improving the Generalizability of Deep Learning. 172 arXiv preprint arXiv:1705.10941. 173 */ 174 private Operation spectralNorm(Operation filters, size_t[] padding, size_t[] stride, size_t numIts = 1) 175 { 176 filters = filters.reshape([filters.shape[0], filters.volume / filters.shape[0]]); 177 auto x = uniformSample([filters.shape[1], 1]) * 2.0f - 1.0f; 178 179 for(int i = 0; i < numIts; i++) 180 { 181 x = matmul(filters.transpose([1, 0]), matmul(filters, x)); 182 } 183 184 auto v = x / sqrt(sum(x * x)); 185 auto y = matmul(filters, v); 186 187 return sum(y * y); 188 }