1 module dopt.nnet.models.vgg; 2 3 import std.math : isNaN; 4 5 import dopt.core; 6 import dopt.nnet; 7 import dopt.nnet.util; 8 import dopt.nnet.models.maybe; 9 import dopt.online : Projection; 10 11 class VGGOptions 12 { 13 this() 14 { 15 _dropout = false; 16 _batchnorm = false; 17 _maxgainNorm = float.nan; 18 _lipschitzNorm = float.nan; 19 _maxNorm = float.infinity; 20 _spectralDecay = 0.0f; 21 } 22 23 void verify() 24 { 25 import std.exception : enforce; 26 27 int regCtr; 28 29 if(!isNaN(_maxgainNorm)) 30 { 31 regCtr++; 32 33 enforce(_maxgainNorm == 2.0f, "Only a maxgainNorm of 2 is currently supported."); 34 } 35 36 if(!isNaN(_lipschitzNorm)) 37 { 38 regCtr++; 39 } 40 41 enforce(regCtr <= 1, "VGG models currently only support using one of maxgain and the lipschitz constraint"); 42 } 43 44 mixin(dynamicProperties( 45 "bool", "dropout", 46 "bool", "batchnorm", 47 "float", "maxgainNorm", 48 "float", "lipschitzNorm", 49 "float", "maxNorm", 50 "float", "spectralDecay" 51 )); 52 } 53 54 Layer vgg16(Operation features, size_t[] denseLayerSizes = [4096, 4096], VGGOptions opts = new VGGOptions()) 55 { 56 auto sizes = [64, 64, -1, 128, 128, -1, 256, 256, 256, -1, 512, 512, 512, -1, 512, 512, 512, -1]; 57 58 opts.verify(); 59 60 return makeExtractor(features, sizes, opts) 61 .makeTop(denseLayerSizes, opts); 62 } 63 64 Layer vgg19(Operation features, size_t[] denseLayerSizes = [4096, 4096], VGGOptions opts = new VGGOptions()) 65 { 66 auto sizes = [64, 64, -1, 128, 128, -1, 256, 256, 256, 256, -1, 512, 512, 512, 512, -1, 512, 512, 512, 512, -1]; 67 68 opts.verify(); 69 70 return makeExtractor(features, sizes, opts) 71 .makeTop(denseLayerSizes, opts); 72 } 73 74 Layer vgg(Operation features, int[] extractorSizes, size_t[] denseLayerSizes = [4096, 4096], 75 VGGOptions opts = new VGGOptions()) 76 { 77 opts.verify(); 78 79 return makeExtractor(features, extractorSizes, opts) 80 .makeTop(denseLayerSizes, opts); 81 } 82 83 private 84 { 85 Layer makeExtractor(Operation features, int[] sizes, VGGOptions opts) 86 { 87 auto layers = dataSource(features); 88 int poolCtr; 89 90 float drop = opts.dropout ? 0.2f : 0.0f; 91 bool bn = opts.batchnorm; 92 float bnlip = float.infinity; 93 float maxgain = opts.maxNorm; 94 95 if(!isNaN(opts.lipschitzNorm)) 96 { 97 bnlip = opts.maxNorm; 98 } 99 100 if(isNaN(opts.maxgainNorm)) 101 { 102 maxgain = float.infinity; 103 } 104 105 foreach(s; sizes) 106 { 107 if(s == -1) 108 { 109 layers = layers.maxPool([2, 2]); 110 poolCtr++; 111 } 112 else 113 { 114 Projection projFunc; 115 116 if(!isNaN(opts.lipschitzNorm)) 117 { 118 projFunc = projConvParams( 119 float32Constant(opts.maxNorm), 120 layers.trainOutput.shape[2 .. $], 121 [1, 1], 122 [1, 1], 123 opts.lipschitzNorm 124 ); 125 } 126 127 layers = layers 128 .maybeDropout(poolCtr == 0 ? 0.0f : drop) 129 .conv2D(s, [3, 3], new Conv2DOptions() 130 .padding([1, 1]) 131 .maxgain(maxgain) 132 .filterProj(projFunc) 133 .spectralDecay(opts.spectralDecay)) 134 .maybeBatchNorm(bn, new BatchNormOptions() 135 .maxgain(maxgain) 136 .lipschitz(bnlip)) 137 .relu(); 138 } 139 } 140 141 return layers; 142 } 143 144 Layer makeTop(Layer input, size_t[] sizes, VGGOptions opts) 145 { 146 float drop = opts.dropout ? 0.5f : 0.0f; 147 148 Projection projFunc; 149 float maxgain = opts.maxNorm; 150 151 if(!isNaN(opts.lipschitzNorm)) 152 { 153 projFunc = projMatrix(float32Constant(opts.maxNorm), opts.lipschitzNorm); 154 } 155 156 if(isNaN(opts.maxgainNorm)) 157 { 158 maxgain = float.infinity; 159 } 160 161 foreach(i, s; sizes) 162 { 163 input = input 164 .maybeDropout(drop) 165 .dense(s, new DenseOptions() 166 .maxgain(maxgain) 167 .weightProj(projFunc) 168 .spectralDecay(opts.spectralDecay)) 169 .relu(); 170 } 171 172 return input; 173 } 174 }