1 #include <clpeak.h>
2 
runComputeShort(cl::CommandQueue & queue,cl::Program & prog,device_info_t & devInfo)3 int clPeak::runComputeShort(cl::CommandQueue &queue, cl::Program &prog, device_info_t &devInfo)
4 {
5   float timed, gflops;
6   cl_uint workPerWI;
7   cl::NDRange globalSize, localSize;
8   cl_short A = 4;
9   uint iters = devInfo.computeIters;
10 
11   if (!isComputeShort)
12     return 0;
13 
14   try
15   {
16     log->print(NEWLINE TAB TAB "Integer short (16bit) compute (GIOPS)" NEWLINE);
17     log->xmlOpenTag("integer_compute_short");
18     log->xmlAppendAttribs("unit", "giops");
19 
20     cl::Context ctx = queue.getInfo<CL_QUEUE_CONTEXT>();
21 
22     uint64_t globalWIs = (devInfo.numCUs) * (devInfo.computeWgsPerCU) * (devInfo.maxWGSize);
23     uint64_t t = std::min((globalWIs * sizeof(cl_short)), devInfo.maxAllocSize) / sizeof(cl_short);
24     globalWIs = roundToMultipleOf(t, devInfo.maxWGSize);
25 
26     cl::Buffer outputBuf = cl::Buffer(ctx, CL_MEM_WRITE_ONLY, (globalWIs * sizeof(cl_short)));
27 
28     globalSize = globalWIs;
29     localSize = devInfo.maxWGSize;
30 
31     cl::Kernel kernel_v1(prog, "compute_short_v1");
32     kernel_v1.setArg(0, outputBuf), kernel_v1.setArg(1, A);
33 
34     cl::Kernel kernel_v2(prog, "compute_short_v2");
35     kernel_v2.setArg(0, outputBuf), kernel_v2.setArg(1, A);
36 
37     cl::Kernel kernel_v4(prog, "compute_short_v4");
38     kernel_v4.setArg(0, outputBuf), kernel_v4.setArg(1, A);
39 
40     cl::Kernel kernel_v8(prog, "compute_short_v8");
41     kernel_v8.setArg(0, outputBuf), kernel_v8.setArg(1, A);
42 
43     cl::Kernel kernel_v16(prog, "compute_short_v16");
44     kernel_v16.setArg(0, outputBuf), kernel_v16.setArg(1, A);
45 
46     ///////////////////////////////////////////////////////////////////////////
47     // Vector width 1
48     if (!forceTest || strcmp(specifiedTestName, "short") == 0)
49     {
50       log->print(TAB TAB TAB "short   : ");
51 
52       workPerWI = 2048; // Indicates integer operations executed per work-item
53 
54       timed = run_kernel(queue, kernel_v1, globalSize, localSize, iters);
55 
56       gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;
57 
58       log->print(gflops);
59       log->print(NEWLINE);
60       log->xmlRecord("short", gflops);
61     }
62     ///////////////////////////////////////////////////////////////////////////
63 
64     // Vector width 2
65     if (!forceTest || strcmp(specifiedTestName, "short2") == 0)
66     {
67       log->print(TAB TAB TAB "short2  : ");
68 
69       workPerWI = 2048;
70 
71       timed = run_kernel(queue, kernel_v2, globalSize, localSize, iters);
72 
73       gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;
74 
75       log->print(gflops);
76       log->print(NEWLINE);
77       log->xmlRecord("short2", gflops);
78     }
79     ///////////////////////////////////////////////////////////////////////////
80 
81     // Vector width 4
82     if (!forceTest || strcmp(specifiedTestName, "short4") == 0)
83     {
84       log->print(TAB TAB TAB "short4  : ");
85 
86       workPerWI = 2048;
87 
88       timed = run_kernel(queue, kernel_v4, globalSize, localSize, iters);
89 
90       gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;
91 
92       log->print(gflops);
93       log->print(NEWLINE);
94       log->xmlRecord("short4", gflops);
95     }
96     ///////////////////////////////////////////////////////////////////////////
97 
98     // Vector width 8
99     if (!forceTest || strcmp(specifiedTestName, "short8") == 0)
100     {
101       log->print(TAB TAB TAB "short8  : ");
102 
103       workPerWI = 2048;
104 
105       timed = run_kernel(queue, kernel_v8, globalSize, localSize, iters);
106 
107       gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;
108 
109       log->print(gflops);
110       log->print(NEWLINE);
111       log->xmlRecord("short8", gflops);
112     }
113     ///////////////////////////////////////////////////////////////////////////
114 
115     // Vector width 16
116     if (!forceTest || strcmp(specifiedTestName, "short16") == 0)
117     {
118       log->print(TAB TAB TAB "short16 : ");
119 
120       workPerWI = 2048;
121 
122       timed = run_kernel(queue, kernel_v16, globalSize, localSize, iters);
123 
124       gflops = (static_cast<float>(globalWIs) * static_cast<float>(workPerWI)) / timed / 1e3f;
125 
126       log->print(gflops);
127       log->print(NEWLINE);
128       log->xmlRecord("short16", gflops);
129     }
130     ///////////////////////////////////////////////////////////////////////////
131     log->xmlCloseTag(); // integer_compute
132   }
133   catch (cl::Error &error)
134   {
135     stringstream ss;
136     ss << error.what() << " (" << error.err() << ")" NEWLINE
137        << TAB TAB TAB "Tests skipped" NEWLINE;
138     log->print(ss.str());
139     return -1;
140   }
141 
142   return 0;
143 }
144