1 /**
2     This module enables operation graphs to be evaluated using CPU kernels.
3 
4     Authors: Henry Gouk
5 */
6 module dopt.cpu;
7 
8 import std.exception;
9 
10 import dopt.core;
11 
12 shared static this()
13 {
14     import dopt.cpu.basic;
15     import dopt.cpu.math;
16     import dopt.cpu.nnet;
17     import dopt.cpu.random;
18 
19     dopt.cpu.basic.initialize();
20     dopt.cpu.math.initialize();
21     dopt.cpu.nnet.initialize();
22     dopt.cpu.random.initialize();
23 
24     import std.functional : toDelegate;
25     defaultEvaluator = toDelegate(&evaluateCPU);
26     defaultCompiler = (Operation[] ops) { return new CPUPlan(ops); };
27     defaultVarAllocator = (size_t numBytes) { return new CPUBuffer(numBytes); };
28     defaultArgAllocator = (size_t numBytes) { return new CPUBuffer(numBytes); };
29 }
30 
31 /**
32     Common interface for all CPU kernels.
33 */
34 interface CPUKernel
35 {
36     void execute(Operation op, const(void[])[] inputs, void[] output);
37 }
38 
39 /**
40     Convenience class that allows one to wrap a delegate and implement CPUKernel.
41 */
42 class CPUKernelDelegate : CPUKernel
43 {
44     public
45     {
46         this(void delegate(Operation, const(void[])[], void[]) kern)
47         {
48             mKernel = kern;
49         }
50 
51         void execute(Operation op, const(void[])[] inputs, void[] output)
52         {
53             mKernel(op, inputs, output);
54         }
55     }
56 
57     private
58     {
59         void delegate(Operation op, const(void[])[], void[]) mKernel;
60     }
61 }
62 
63 /**
64     Registers a kernel for the specified operation.
65 
66     Params:
67         opName = The name of the operation.
68         kernel = A kernel that can execute operations of the type specified by opName.
69 
70     Throws:
71         If there is already a kernel registered for the operation.
72 */
73 void registerCPUKernel(string opName, CPUKernel kernel)
74 {
75     enforce((opName in mKernels) is null, "A CPUKernel is already registered for the operation '" ~ opName ~ "'");
76 
77     mKernels[opName] = kernel;
78 }
79 
80 /**
81     Deregisters the kernel associated with the specified operation.
82 
83     Params:
84         opName = The name of the operation that should have its kernel deregistered.
85 */
86 void deregisterCPUKernel(string opName)
87 {
88     mKernels.remove(opName);
89 }
90 
91 /**
92     Provides a list of operations for which a CPUKernel has been registered.
93 
94     Returns:
95         An array of operation names.
96 */
97 string[] listAllCPUOperations()
98 {
99     return mKernels.keys.dup ~ ["constant", "variable", "reshape"];
100 }
101 
102 class CPUBuffer : DeviceBuffer
103 {
104     public
105     {
106         this(size_t len)
107         {
108             mBuffer = new ubyte[len];
109         }
110 
111         this(void[] buf)
112         {
113             mBuffer = buf.dup;
114         }
115 
116         override size_t numBytes() const
117         {
118             return mBuffer.length;
119         }
120 
121         override void get(void[] buf) const
122         {
123             buf[] = mBuffer[];
124         }
125 
126         override void set(const void[] buf)
127         {
128             mBuffer[] = buf[];
129         }
130 
131         ubyte[] raw()
132         {
133             return cast(ubyte[])mBuffer;
134         }
135     }
136 
137     private
138     {
139         void[] mBuffer;
140     }
141 }
142 
143 class CPUPlan : Plan
144 {
145     public
146     {
147         this(Operation[] outputs)
148         {
149             super(outputs);
150         }
151     }
152 
153     protected
154     {
155         override void executeImpl(DeviceBuffer[Operation] args, DeviceBuffer[] rets)
156         {
157             auto tmpRets = evaluateCPU(mOutputs, args);
158 
159             import std.range : zip;
160 
161             foreach(t, r; zip(tmpRets, rets))
162             {
163                 r.set(t);
164             }
165         }
166     }
167 }
168 
169 /**
170     Evaluates an several nodes from the operation graph using the CPU.
171 
172     If the elements of $(D ops) have common dependencies, then each dependency is evaluated only once. For this
173     reason it is recommended that this overload is used when multiple nodes should be evaluated.
174 
175     Params:
176         ops = The nodes of the operation graph that values should be computed for.
177         args = A set of variable assignments.
178 
179     Returns:
180         An array of $(D Buffer) objects, each containing the value of the corresponding element in $(D ops).
181 */
182 DeviceBuffer[] evaluateCPU(Operation[] ops, DeviceBuffer[Operation] args = null)
183 {
184     import std.algorithm : canFind, filter;
185     import std.array : array;
186 
187     //Toposort the operations by dependency
188     Operation[] sortedOps = topologicalSort(ops)
189                                   .filter!(x => !canFind(args.keys, x))
190                                   .array();
191 
192     //Count the number of references to each operation
193     int[Operation] refCounts;
194 
195     foreach(o; ops)
196     {
197         refCounts[o]++;
198     }
199 
200     foreach(o; sortedOps)
201     {
202         foreach(d; o.deps)
203         {
204             refCounts[d]++;
205         }
206     }
207 
208     //Start executing the operations
209     ubyte[][Operation] results;
210 
211     foreach(k, v; args)
212     {
213         results[k] = v.get!ubyte();
214     }
215 
216     foreach(o; sortedOps)
217     {
218         import std.conv : to;
219         import std.stdio : stdout, write, writeln;
220 
221         //Check for some easy optimizations
222         if(o.opType == "variable" && !("variable" in mKernels))
223         {
224             results[o] = o.value.get!ubyte;
225             continue;
226         }
227         else if(o.opType == "constant" && !("constant" in mKernels))
228         {
229             results[o] = o.value.get!ubyte;
230             continue;
231         }
232         else if(o.opType == "reshape" && !("reshape" in mKernels))
233         {
234             results[o] = results[o.deps[0]];
235             continue;
236         }
237 
238         //Allocate a buffer for the output of this operation
239         auto output = new ubyte[o.outputType.volume * o.outputType.elementType.sizeOf()];
240         results[o] = output;
241 
242         //Get the input buffers
243         ubyte[][] inputs;
244 
245         foreach(d; o.deps)
246         {
247             inputs ~= results[d];
248             refCounts[d]--;
249         }
250 
251         //Execute the operation
252         auto kern = mKernels.get(o.opType, null);
253 
254         if(kern is null)
255         {
256             throw new Exception("No CPU kernel registered for operation " ~ o.opType);
257         }
258 
259         kern.execute(o, cast(const(void[])[]) inputs, cast(void[])output);
260 
261         foreach(d; o.deps)
262         {
263             //Remove the pointer to this buffer if we don't need it anymore
264             //This will allow the GC to collect it at some point, if required
265             if(refCounts[d] == 0)
266             {
267                 results[d] = null;
268             }
269         }
270     }
271 
272     DeviceBuffer[] returnVals = new DeviceBuffer[ops.length];
273 
274     foreach(i, o; ops)
275     {
276         returnVals[i] = new CPUBuffer(results[o]);
277     }
278 
279     return returnVals;
280 }
281 
282 private
283 {
284     CPUKernel[string] mKernels;
285 }