1 module dopt.nnet.models.vgg;
2 
3 import std.math : isNaN;
4 
5 import dopt.core;
6 import dopt.nnet;
7 import dopt.nnet.util;
8 import dopt.nnet.models.maybe;
9 import dopt.online : Projection;
10 
11 class VGGOptions
12 {
13     this()
14     {
15         _dropout = false;
16         _batchnorm = false;
17         _maxgainNorm = float.nan;
18         _lipschitzNorm = float.nan;
19         _maxNorm = float.infinity;
20         _spectralDecay = 0.0f;
21     }
22 
23     void verify()
24     {
25         import std.exception : enforce;
26 
27         int regCtr;
28 
29         if(!isNaN(_maxgainNorm))
30         {
31             regCtr++;
32 
33             enforce(_maxgainNorm == 2.0f, "Only a maxgainNorm of 2 is currently supported.");
34         }
35 
36         if(!isNaN(_lipschitzNorm))
37         {
38             regCtr++;
39         }
40 
41         enforce(regCtr <= 1, "VGG models currently only support using one of maxgain and the lipschitz constraint");
42     }
43 
44     mixin(dynamicProperties(
45         "bool", "dropout",
46         "bool", "batchnorm",
47         "float", "maxgainNorm",
48         "float", "lipschitzNorm",
49         "float", "maxNorm",
50         "float", "spectralDecay"
51     ));
52 }
53 
54 Layer vgg16(Operation features, size_t[] denseLayerSizes = [4096, 4096], VGGOptions opts = new VGGOptions())
55 {
56     auto sizes = [64, 64, -1, 128, 128, -1, 256, 256, 256, -1, 512, 512, 512, -1, 512, 512, 512, -1];
57 
58     opts.verify();
59 
60     return makeExtractor(features, sizes, opts)
61           .makeTop(denseLayerSizes, opts);
62 }
63 
64 Layer vgg19(Operation features, size_t[] denseLayerSizes = [4096, 4096], VGGOptions opts = new VGGOptions())
65 {
66     auto sizes = [64, 64, -1, 128, 128, -1, 256, 256, 256, 256, -1, 512, 512, 512, 512, -1, 512, 512, 512, 512, -1];
67 
68     opts.verify();
69 
70     return makeExtractor(features, sizes, opts)
71           .makeTop(denseLayerSizes, opts);
72 }
73 
74 Layer vgg(Operation features, int[] extractorSizes, size_t[] denseLayerSizes = [4096, 4096],
75     VGGOptions opts = new VGGOptions())
76 {
77     opts.verify();
78 
79     return makeExtractor(features, extractorSizes, opts)
80           .makeTop(denseLayerSizes, opts);
81 }
82 
83 private
84 {
85     Layer makeExtractor(Operation features, int[] sizes, VGGOptions opts)
86     {
87         auto layers = dataSource(features);
88         int poolCtr;
89 
90         float drop = opts.dropout ? 0.2f : 0.0f;
91         bool bn = opts.batchnorm;
92         float bnlip = float.infinity;
93         float maxgain = opts.maxNorm;
94 
95         if(!isNaN(opts.lipschitzNorm))
96         {
97             bnlip = opts.maxNorm;
98         }
99         
100         if(isNaN(opts.maxgainNorm))
101         {
102             maxgain = float.infinity;
103         }
104 
105         foreach(s; sizes)
106         {
107             if(s == -1)
108             {
109                 layers = layers.maxPool([2, 2]);
110                 poolCtr++;
111             }
112             else
113             {
114                 Projection projFunc;
115 
116                 if(!isNaN(opts.lipschitzNorm))
117                 {
118                     projFunc = projConvParams(
119                         float32Constant(opts.maxNorm),
120                         layers.trainOutput.shape[2 .. $],
121                         [1, 1],
122                         [1, 1],
123                         opts.lipschitzNorm
124                     );
125                 }
126 
127                 layers = layers
128                         .maybeDropout(poolCtr == 0 ? 0.0f : drop)
129                         .conv2D(s, [3, 3], new Conv2DOptions()
130                                               .padding([1, 1])
131                                               .maxgain(maxgain)
132                                               .filterProj(projFunc)
133                                               .spectralDecay(opts.spectralDecay))
134                         .maybeBatchNorm(bn, new BatchNormOptions()
135                                                .maxgain(maxgain)
136                                                .lipschitz(bnlip))
137                         .relu();
138             }
139         }
140 
141         return layers;
142     }
143 
144     Layer makeTop(Layer input, size_t[] sizes, VGGOptions opts)
145     {
146         float drop = opts.dropout ? 0.5f : 0.0f;
147 
148         Projection projFunc;
149         float maxgain = opts.maxNorm;
150 
151         if(!isNaN(opts.lipschitzNorm))
152         {
153             projFunc = projMatrix(float32Constant(opts.maxNorm), opts.lipschitzNorm);
154         }
155         
156         if(isNaN(opts.maxgainNorm))
157         {
158             maxgain = float.infinity;
159         }
160 
161         foreach(i, s; sizes)
162         {
163             input = input
164                    .maybeDropout(drop)
165                    .dense(s, new DenseOptions()
166                                 .maxgain(maxgain)
167                                 .weightProj(projFunc)
168                                 .spectralDecay(opts.spectralDecay))
169                    .relu();
170         }
171 
172         return input;
173     }
174 }