1 /** 2 This module enables operation graphs to be evaluated using CPU kernels. 3 4 Authors: Henry Gouk 5 */ 6 module dopt.cpu; 7 8 import std.exception; 9 10 import dopt.core; 11 12 shared static this() 13 { 14 import dopt.cpu.basic; 15 import dopt.cpu.math; 16 import dopt.cpu.nnet; 17 import dopt.cpu.random; 18 19 dopt.cpu.basic.initialize(); 20 dopt.cpu.math.initialize(); 21 dopt.cpu.nnet.initialize(); 22 dopt.cpu.random.initialize(); 23 24 import std.functional : toDelegate; 25 defaultEvaluator = toDelegate(&evaluateCPU); 26 defaultCompiler = (Operation[] ops) { return new CPUPlan(ops); }; 27 defaultVarAllocator = (size_t numBytes) { return new CPUBuffer(numBytes); }; 28 defaultArgAllocator = (size_t numBytes) { return new CPUBuffer(numBytes); }; 29 } 30 31 /** 32 Common interface for all CPU kernels. 33 */ 34 interface CPUKernel 35 { 36 void execute(Operation op, const(void[])[] inputs, void[] output); 37 } 38 39 /** 40 Convenience class that allows one to wrap a delegate and implement CPUKernel. 41 */ 42 class CPUKernelDelegate : CPUKernel 43 { 44 public 45 { 46 this(void delegate(Operation, const(void[])[], void[]) kern) 47 { 48 mKernel = kern; 49 } 50 51 void execute(Operation op, const(void[])[] inputs, void[] output) 52 { 53 mKernel(op, inputs, output); 54 } 55 } 56 57 private 58 { 59 void delegate(Operation op, const(void[])[], void[]) mKernel; 60 } 61 } 62 63 /** 64 Registers a kernel for the specified operation. 65 66 Params: 67 opName = The name of the operation. 68 kernel = A kernel that can execute operations of the type specified by opName. 69 70 Throws: 71 If there is already a kernel registered for the operation. 72 */ 73 void registerCPUKernel(string opName, CPUKernel kernel) 74 { 75 enforce((opName in mKernels) is null, "A CPUKernel is already registered for the operation '" ~ opName ~ "'"); 76 77 mKernels[opName] = kernel; 78 } 79 80 /** 81 Deregisters the kernel associated with the specified operation. 82 83 Params: 84 opName = The name of the operation that should have its kernel deregistered. 85 */ 86 void deregisterCPUKernel(string opName) 87 { 88 mKernels.remove(opName); 89 } 90 91 /** 92 Provides a list of operations for which a CPUKernel has been registered. 93 94 Returns: 95 An array of operation names. 96 */ 97 string[] listAllCPUOperations() 98 { 99 return mKernels.keys.dup ~ ["constant", "variable", "reshape"]; 100 } 101 102 class CPUBuffer : DeviceBuffer 103 { 104 public 105 { 106 this(size_t len) 107 { 108 mBuffer = new ubyte[len]; 109 } 110 111 this(void[] buf) 112 { 113 mBuffer = buf.dup; 114 } 115 116 override size_t numBytes() const 117 { 118 return mBuffer.length; 119 } 120 121 override void get(void[] buf) const 122 { 123 buf[] = mBuffer[]; 124 } 125 126 override void set(const void[] buf) 127 { 128 mBuffer[] = buf[]; 129 } 130 131 ubyte[] raw() 132 { 133 return cast(ubyte[])mBuffer; 134 } 135 } 136 137 private 138 { 139 void[] mBuffer; 140 } 141 } 142 143 class CPUPlan : Plan 144 { 145 public 146 { 147 this(Operation[] outputs) 148 { 149 super(outputs); 150 } 151 } 152 153 protected 154 { 155 override void executeImpl(DeviceBuffer[Operation] args, DeviceBuffer[] rets) 156 { 157 auto tmpRets = evaluateCPU(mOutputs, args); 158 159 import std.range : zip; 160 161 foreach(t, r; zip(tmpRets, rets)) 162 { 163 r.set(t); 164 } 165 } 166 } 167 } 168 169 /** 170 Evaluates an several nodes from the operation graph using the CPU. 171 172 If the elements of $(D ops) have common dependencies, then each dependency is evaluated only once. For this 173 reason it is recommended that this overload is used when multiple nodes should be evaluated. 174 175 Params: 176 ops = The nodes of the operation graph that values should be computed for. 177 args = A set of variable assignments. 178 179 Returns: 180 An array of $(D Buffer) objects, each containing the value of the corresponding element in $(D ops). 181 */ 182 DeviceBuffer[] evaluateCPU(Operation[] ops, DeviceBuffer[Operation] args = null) 183 { 184 import std.algorithm : canFind, filter; 185 import std.array : array; 186 187 //Toposort the operations by dependency 188 Operation[] sortedOps = topologicalSort(ops) 189 .filter!(x => !canFind(args.keys, x)) 190 .array(); 191 192 //Count the number of references to each operation 193 int[Operation] refCounts; 194 195 foreach(o; ops) 196 { 197 refCounts[o]++; 198 } 199 200 foreach(o; sortedOps) 201 { 202 foreach(d; o.deps) 203 { 204 refCounts[d]++; 205 } 206 } 207 208 //Start executing the operations 209 ubyte[][Operation] results; 210 211 foreach(k, v; args) 212 { 213 results[k] = v.get!ubyte(); 214 } 215 216 foreach(o; sortedOps) 217 { 218 import std.conv : to; 219 import std.stdio : stdout, write, writeln; 220 221 //Check for some easy optimizations 222 if(o.opType == "variable" && !("variable" in mKernels)) 223 { 224 results[o] = o.value.get!ubyte; 225 continue; 226 } 227 else if(o.opType == "constant" && !("constant" in mKernels)) 228 { 229 results[o] = o.value.get!ubyte; 230 continue; 231 } 232 else if(o.opType == "reshape" && !("reshape" in mKernels)) 233 { 234 results[o] = results[o.deps[0]]; 235 continue; 236 } 237 238 //Allocate a buffer for the output of this operation 239 auto output = new ubyte[o.outputType.volume * o.outputType.elementType.sizeOf()]; 240 results[o] = output; 241 242 //Get the input buffers 243 ubyte[][] inputs; 244 245 foreach(d; o.deps) 246 { 247 inputs ~= results[d]; 248 refCounts[d]--; 249 } 250 251 //Execute the operation 252 auto kern = mKernels.get(o.opType, null); 253 254 if(kern is null) 255 { 256 throw new Exception("No CPU kernel registered for operation " ~ o.opType); 257 } 258 259 kern.execute(o, cast(const(void[])[]) inputs, cast(void[])output); 260 261 foreach(d; o.deps) 262 { 263 //Remove the pointer to this buffer if we don't need it anymore 264 //This will allow the GC to collect it at some point, if required 265 if(refCounts[d] == 0) 266 { 267 results[d] = null; 268 } 269 } 270 } 271 272 DeviceBuffer[] returnVals = new DeviceBuffer[ops.length]; 273 274 foreach(i, o; ops) 275 { 276 returnVals[i] = new CPUBuffer(results[o]); 277 } 278 279 return returnVals; 280 } 281 282 private 283 { 284 CPUKernel[string] mKernels; 285 }