1 /** 2 Provides a useful tools for constructing neural networks. 3 4 Currently only directed acyclic graphs are supported. 5 6 Authors: Henry Gouk 7 */ 8 module dopt.nnet.networks; 9 10 import std.algorithm; 11 import std.array; 12 13 import dopt; 14 15 /** 16 Encapsulates the details of a network with a directed acyclic graph structure. 17 18 This class does not provide facilities to actually train the network---that can be accomplished with the 19 $(D dopt.online) package. 20 */ 21 class DAGNetwork 22 { 23 public 24 { 25 /** 26 Construct a DAGNetwork with the given inputs and outputs. 27 28 Params: 29 inputs = The inputs to the network. This will usually contain a single $(D Operation) representing a 30 batch of feature vectors. 31 outputs = The outputs (i.e., predictions) of the network. 32 */ 33 this(Operation[] inputs, Layer[] outputs) 34 { 35 mInputs = inputs.dup; 36 mOutputs = outputs.map!(x => x.output).array(); 37 mTrainOutputs = outputs.map!(x => x.trainOutput).array(); 38 39 auto layers = topologicalSort(outputs); 40 auto paramsinfo = layers.map!(x => x.params).joiner().array(); 41 mParams = paramsinfo.map!(x => x.symbol).array(); 42 43 foreach(p; paramsinfo) 44 { 45 if(p.loss !is null) 46 { 47 if(mParameterLoss is null) 48 { 49 mParameterLoss = p.loss; 50 } 51 else 52 { 53 mParameterLoss = mParameterLoss + p.loss; 54 } 55 } 56 57 if(p.projection !is null) 58 { 59 mParameterProj[p.symbol] = p.projection; 60 } 61 } 62 63 if(mParameterLoss is null) 64 { 65 //Prevents an annoying-to-debug segfault in user code when there are no param loss terms 66 mParameterLoss = float32([], [0.0f]); 67 } 68 } 69 70 /** 71 The inputs provided when the $(D DAGNetwork) was constructed. 72 */ 73 Operation[] inputs() 74 { 75 return mInputs.dup; 76 } 77 78 /** 79 The $(D Operation) objects produced by the output layers provided during construction. 80 */ 81 Operation[] outputs() 82 { 83 return mOutputs.dup; 84 } 85 86 /** 87 Separate $(D Operation) objects produced by the output layers provided during constructions. 88 89 These should be used when creating the network optimiser. 90 */ 91 Operation[] trainOutputs() 92 { 93 return mTrainOutputs.dup; 94 } 95 96 /** 97 The sum of all the parameter loss terms. 98 99 This will include all the L2 weight decay terms. 100 */ 101 Operation paramLoss() 102 { 103 return mParameterLoss; 104 } 105 106 /** 107 An associative array of projection operations that should be applied to parameters during optimisation. 108 */ 109 Projection[Operation] paramProj() 110 { 111 return mParameterProj; 112 } 113 114 /** 115 An array of all the $(D Operation) nodes in the graph representing network parameters. 116 */ 117 Operation[] params() 118 { 119 return mParams.dup; 120 } 121 } 122 123 private 124 { 125 Operation[] mInputs; 126 Operation[] mOutputs; 127 Operation[] mTrainOutputs; 128 Operation[] mParams; 129 Operation mParameterLoss; 130 Projection[Operation] mParameterProj; 131 } 132 }