1 /**
2     Contains an implementation of the regularisation techniques presented in Gouk et al. (2018).
3 
4     Gouk, H., Frank, E., Pfahringer, B., & Cree, M. (2018). Regularisation of Neural Networks by Enforcing Lipschitz
5     Continuity. arXiv preprint arXiv:1804.04368.
6 
7     Authors: Henry Gouk
8 */
9 module dopt.nnet.lipschitz;
10 
11 import std.exception;
12 
13 import dopt.core;
14 import dopt.online;
15 
16 /**
17     Returns a projection function that can be used to constrain a matrix norm.
18 
19     The operator norm induced by the vector p-norm is used.
20 
21     Params:
22         maxnorm = A scalar value indicating the maximum allowable operator norm.
23         p = The vector p-norm that will induce the operator norm.
24     
25     Returns:
26         A projection function that can be used with the online optimisation algorithms.
27 */
28 Projection projMatrix(Operation maxnorm, float p = 2)
29 {
30     Operation proj(Operation param)
31     {
32         auto norm = matrixNorm(param, p);
33 
34         return maxNorm(param, norm, maxnorm);
35     }
36 
37     return &proj;
38 }
39 
40 /**
41     Computes the induced operator norm corresponding to the vector p-norm.
42 */
43 Operation matrixNorm(Operation param, float p)
44 {
45     import std.exception : enforce;
46 
47     enforce(param.rank == 2, "This function only operates on matrices");
48 
49     if(p == 1.0f)
50     {
51         /*if(param.rank != 2)
52         {
53             param = param.reshape([param.shape[0], param.volume / param.shape[0]]);
54         }*/
55 
56         /*
57             The matrix norm induced by the L1 vector norm is max Sum_i abs(a_ij), which is the maximum absolute column
58             sum. We are doing a row sum here because the weight matrices in dense and convolutional layers are
59             transposed before being multiplied with the input features.
60         */
61 
62         return param.abs().sum([1]).maxElement();
63     }
64     else if(p == 2.0f)
65     {
66         auto x = uniformSample([param.shape[0], 1]) * 2.0f - 1.0f;
67 
68         for(int i = 0; i < 2; i++)
69         {
70             x = matmul(param.transpose([1, 0]), matmul(param, x));
71         }
72 
73         auto v = x / sqrt(sum(x * x));
74         auto y = matmul(param, v);
75 
76         return sqrt(sum(y * y));
77     }
78     else if(p == float.infinity)
79     {
80         /*
81             The matrix norm induced by the L_infty vector norm is max Sum_j abs(a_ij), which is the maximum absolute
82             row sum. We are doing a column sum here because the weight matrices in dense and convolutional layers are
83             transposed before being multiplied with the input features.
84         */
85 
86         return param.abs().sum([0]).maxElement();
87     }
88     else
89     {
90         import std.conv : to;
91 
92         throw new Exception("Cannot compute matrix norm for p=" ~ p.to!string);
93     }
94 }
95 
96 Projection projConvParams(Operation maxnorm, size_t[] inShape, size_t[] stride, size_t[] padding, float p = 2.0f)
97 {
98     Operation proj(Operation param)
99     {
100         auto norm = convParamsNorm(param, inShape, stride, padding, p);
101 
102         return maxNorm(param, norm, maxnorm);
103     }
104 
105     return &proj;
106 }
107 
108 Operation convParamsNorm(Operation param, size_t[] inShape, size_t[] stride, size_t[] padding, float p = 2.0f,
109     size_t n = 2)
110 {
111     if(p == 2.0f)
112     {
113         auto x = uniformSample([cast(size_t)1, param.shape[1]] ~ inShape) * 2.0f - 1.0f;
114 
115         for(int i = 0; i < n; i++)
116         {
117             x = x
118             .convolution(param, padding, stride)
119             .convolutionTranspose(param, padding, stride);
120         }
121 
122         auto v = x / sqrt(sum(x * x));
123         auto y = convolution(v, param, padding, stride);
124 
125         return sqrt(sum(y * y));
126     }
127     else if(p == 1.0f || p == float.infinity)
128     {
129         //Turns out this is equivalent, but only for $p \in \{1, infty\}$
130         if(param.rank != 2)
131         {
132             param = param.reshape([param.shape[0], param.volume / param.shape[0]]);
133         }
134 
135         return matrixNorm(param, p);
136     }
137     else
138     {
139         import std.conv : to;
140 
141         throw new Exception("Cannot compute convolution params norm for p=" ~ p.to!string);
142     }
143 }
144 
145 unittest
146 {
147     auto k = float32([1, 1, 3, 3], [
148         1, 2, 3, 4, 5, 6, 7, 8, 9
149     ]);
150 
151     auto norm = convParamsNorm(k, [200, 200], [1, 1], [1, 1], 2.0f);
152 
153     import std.stdio;
154     writeln(norm.evaluate().get!float[0]);
155 }
156 
157 /**
158     Performs a projection of param such that the new norm will be less than or equal to maxval.
159 */
160 Operation maxNorm(Operation param, Operation norm, Operation maxval)
161 {
162     return param * (1.0f / max(float32([], [1.0f]), norm / maxval));
163 }