OSDN Git Service

CUDA
[eos/hostdependX86LINUX64.git] / util / X86LINUX64 / cuda-6.5 / extras / CUPTI / sample / activity_trace_async / activity_trace_async.cpp
1 /*
2  * Copyright 2011-2014 NVIDIA Corporation. All rights reserved
3  *
4  * Sample CUPTI app to print a trace of CUDA API and GPU activity
5  * using asynchronous handling of activity buffers.
6  *
7  */
8
9 #include <stdio.h>
10 #include <cuda.h>
11 #include <cupti.h>
12
13 #define CUPTI_CALL(call)                                                \
14   do {                                                                  \
15     CUptiResult _status = call;                                         \
16     if (_status != CUPTI_SUCCESS) {                                     \
17       const char *errstr;                                               \
18       cuptiGetResultString(_status, &errstr);                           \
19       fprintf(stderr, "%s:%d: error: function %s failed with error %s.\n", \
20               __FILE__, __LINE__, #call, errstr);                       \
21       exit(-1);                                                         \
22     }                                                                   \
23   } while (0)
24
25 #define BUF_SIZE (32 * 1024)
26 #define ALIGN_SIZE (8)
27 #define ALIGN_BUFFER(buffer, align)                                            \
28   (((uintptr_t) (buffer) & ((align)-1)) ? ((buffer) + (align) - ((uintptr_t) (buffer) & ((align)-1))) : (buffer))
29
30 // Timestamp at trace initialization time. Used to normalized other
31 // timestamps
32 static uint64_t startTimestamp;
33
34 static const char *
35 getMemcpyKindString(CUpti_ActivityMemcpyKind kind)
36 {
37   switch (kind) {
38   case CUPTI_ACTIVITY_MEMCPY_KIND_HTOD:
39     return "HtoD";
40   case CUPTI_ACTIVITY_MEMCPY_KIND_DTOH:
41     return "DtoH";
42   case CUPTI_ACTIVITY_MEMCPY_KIND_HTOA:
43     return "HtoA";
44   case CUPTI_ACTIVITY_MEMCPY_KIND_ATOH:
45     return "AtoH";
46   case CUPTI_ACTIVITY_MEMCPY_KIND_ATOA:
47     return "AtoA";
48   case CUPTI_ACTIVITY_MEMCPY_KIND_ATOD:
49     return "AtoD";
50   case CUPTI_ACTIVITY_MEMCPY_KIND_DTOA:
51     return "DtoA";
52   case CUPTI_ACTIVITY_MEMCPY_KIND_DTOD:
53     return "DtoD";
54   case CUPTI_ACTIVITY_MEMCPY_KIND_HTOH:
55     return "HtoH";
56   default:
57     break;
58   }
59
60   return "<unknown>";
61 }
62
63 const char *
64 getActivityOverheadKindString(CUpti_ActivityOverheadKind kind)
65 {
66   switch (kind) {
67   case CUPTI_ACTIVITY_OVERHEAD_DRIVER_COMPILER:
68     return "COMPILER";
69   case CUPTI_ACTIVITY_OVERHEAD_CUPTI_BUFFER_FLUSH:
70     return "BUFFER_FLUSH";
71   case CUPTI_ACTIVITY_OVERHEAD_CUPTI_INSTRUMENTATION:
72     return "INSTRUMENTATION";
73   case CUPTI_ACTIVITY_OVERHEAD_CUPTI_RESOURCE:
74     return "RESOURCE";
75   default:
76     break;
77   }
78
79   return "<unknown>";
80 }
81
82 const char *
83 getActivityObjectKindString(CUpti_ActivityObjectKind kind)
84 {
85   switch (kind) {
86   case CUPTI_ACTIVITY_OBJECT_PROCESS:
87     return "PROCESS";
88   case CUPTI_ACTIVITY_OBJECT_THREAD:
89     return "THREAD";
90   case CUPTI_ACTIVITY_OBJECT_DEVICE:
91     return "DEVICE";
92   case CUPTI_ACTIVITY_OBJECT_CONTEXT:
93     return "CONTEXT";
94   case CUPTI_ACTIVITY_OBJECT_STREAM:
95     return "STREAM";
96   default:
97     break;
98   }
99
100   return "<unknown>";
101 }
102
103 uint32_t
104 getActivityObjectKindId(CUpti_ActivityObjectKind kind, CUpti_ActivityObjectKindId *id)
105 {
106   switch (kind) {
107   case CUPTI_ACTIVITY_OBJECT_PROCESS:
108     return id->pt.processId;
109   case CUPTI_ACTIVITY_OBJECT_THREAD:
110     return id->pt.threadId;
111   case CUPTI_ACTIVITY_OBJECT_DEVICE:
112     return id->dcs.deviceId;
113   case CUPTI_ACTIVITY_OBJECT_CONTEXT:
114     return id->dcs.contextId;
115   case CUPTI_ACTIVITY_OBJECT_STREAM:
116     return id->dcs.streamId;
117   default:
118     break;
119   }
120
121   return 0xffffffff;
122 }
123
124 static const char *
125 getComputeApiKindString(CUpti_ActivityComputeApiKind kind)
126 {
127   switch (kind) {
128   case CUPTI_ACTIVITY_COMPUTE_API_CUDA:
129     return "CUDA";
130   case CUPTI_ACTIVITY_COMPUTE_API_CUDA_MPS:
131     return "CUDA_MPS";
132   default:
133     break;
134   }
135
136   return "<unknown>";
137 }
138
139 static void
140 printActivity(CUpti_Activity *record)
141 {
142   switch (record->kind)
143   {
144   case CUPTI_ACTIVITY_KIND_DEVICE:
145     {
146       CUpti_ActivityDevice *device = (CUpti_ActivityDevice *) record;
147       printf("DEVICE %s (%u), capability %u.%u, global memory (bandwidth %u GB/s, size %u MB), "
148              "multiprocessors %u, clock %u MHz\n",
149              device->name, device->id,
150              device->computeCapabilityMajor, device->computeCapabilityMinor,
151              (unsigned int) (device->globalMemoryBandwidth / 1024 / 1024),
152              (unsigned int) (device->globalMemorySize / 1024 / 1024),
153              device->numMultiprocessors, (unsigned int) (device->coreClockRate / 1000));
154       break;
155     }
156   case CUPTI_ACTIVITY_KIND_DEVICE_ATTRIBUTE:
157     {
158       CUpti_ActivityDeviceAttribute *attribute = (CUpti_ActivityDeviceAttribute *)record;
159       printf("DEVICE_ATTRIBUTE %u, device %u, value=0x%llx\n",
160              attribute->attribute.cupti, attribute->deviceId, (unsigned long long)attribute->value.vUint64);
161       break;
162     }
163   case CUPTI_ACTIVITY_KIND_CONTEXT:
164     {
165       CUpti_ActivityContext *context = (CUpti_ActivityContext *) record;
166       printf("CONTEXT %u, device %u, compute API %s, NULL stream %d\n",
167              context->contextId, context->deviceId,
168              getComputeApiKindString((CUpti_ActivityComputeApiKind) context->computeApiKind),
169              (int) context->nullStreamId);
170       break;
171     }
172   case CUPTI_ACTIVITY_KIND_MEMCPY:
173     {
174       CUpti_ActivityMemcpy *memcpy = (CUpti_ActivityMemcpy *) record;
175       printf("MEMCPY %s [ %llu - %llu ] device %u, context %u, stream %u, correlation %u/r%u\n",
176              getMemcpyKindString((CUpti_ActivityMemcpyKind) memcpy->copyKind),
177              (unsigned long long) (memcpy->start - startTimestamp),
178              (unsigned long long) (memcpy->end - startTimestamp),
179              memcpy->deviceId, memcpy->contextId, memcpy->streamId,
180              memcpy->correlationId, memcpy->runtimeCorrelationId);
181       break;
182     }
183   case CUPTI_ACTIVITY_KIND_MEMSET:
184     {
185       CUpti_ActivityMemset *memset = (CUpti_ActivityMemset *) record;
186       printf("MEMSET value=%u [ %llu - %llu ] device %u, context %u, stream %u, correlation %u/r%u\n",
187              memset->value,
188              (unsigned long long) (memset->start - startTimestamp),
189              (unsigned long long) (memset->end - startTimestamp),
190              memset->deviceId, memset->contextId, memset->streamId,
191              memset->correlationId, memset->runtimeCorrelationId);
192       break;
193     }
194   case CUPTI_ACTIVITY_KIND_KERNEL:
195   case CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL:
196     {
197       const char* kindString = (record->kind == CUPTI_ACTIVITY_KIND_KERNEL) ? "KERNEL" : "CONC KERNEL";
198       CUpti_ActivityKernel2 *kernel = (CUpti_ActivityKernel2 *) record;
199       printf("%s \"%s\" [ %llu - %llu ] device %u, context %u, stream %u, correlation %u\n",
200              kindString,
201              kernel->name,
202              (unsigned long long) (kernel->start - startTimestamp),
203              (unsigned long long) (kernel->end - startTimestamp),
204              kernel->deviceId, kernel->contextId, kernel->streamId,
205              kernel->correlationId);
206       printf("    grid [%u,%u,%u], block [%u,%u,%u], shared memory (static %u, dynamic %u)\n",
207              kernel->gridX, kernel->gridY, kernel->gridZ,
208              kernel->blockX, kernel->blockY, kernel->blockZ,
209              kernel->staticSharedMemory, kernel->dynamicSharedMemory);
210       break;
211     }
212   case CUPTI_ACTIVITY_KIND_DRIVER:
213     {
214       CUpti_ActivityAPI *api = (CUpti_ActivityAPI *) record;
215       printf("DRIVER cbid=%u [ %llu - %llu ] process %u, thread %u, correlation %u\n",
216              api->cbid,
217              (unsigned long long) (api->start - startTimestamp),
218              (unsigned long long) (api->end - startTimestamp),
219              api->processId, api->threadId, api->correlationId);
220       break;
221     }
222   case CUPTI_ACTIVITY_KIND_RUNTIME:
223     {
224       CUpti_ActivityAPI *api = (CUpti_ActivityAPI *) record;
225       printf("RUNTIME cbid=%u [ %llu - %llu ] process %u, thread %u, correlation %u\n",
226              api->cbid,
227              (unsigned long long) (api->start - startTimestamp),
228              (unsigned long long) (api->end - startTimestamp),
229              api->processId, api->threadId, api->correlationId);
230       break;
231     }
232   case CUPTI_ACTIVITY_KIND_NAME:
233     {
234       CUpti_ActivityName *name = (CUpti_ActivityName *) record;
235       switch (name->objectKind)
236       {
237       case CUPTI_ACTIVITY_OBJECT_CONTEXT:
238         printf("NAME  %s %u %s id %u, name %s\n",
239                getActivityObjectKindString(name->objectKind),
240                getActivityObjectKindId(name->objectKind, &name->objectId),
241                getActivityObjectKindString(CUPTI_ACTIVITY_OBJECT_DEVICE),
242                getActivityObjectKindId(CUPTI_ACTIVITY_OBJECT_DEVICE, &name->objectId),
243                name->name);
244         break;
245       case CUPTI_ACTIVITY_OBJECT_STREAM:
246         printf("NAME %s %u %s %u %s id %u, name %s\n",
247                getActivityObjectKindString(name->objectKind),
248                getActivityObjectKindId(name->objectKind, &name->objectId),
249                getActivityObjectKindString(CUPTI_ACTIVITY_OBJECT_CONTEXT),
250                getActivityObjectKindId(CUPTI_ACTIVITY_OBJECT_CONTEXT, &name->objectId),
251                getActivityObjectKindString(CUPTI_ACTIVITY_OBJECT_DEVICE),
252                getActivityObjectKindId(CUPTI_ACTIVITY_OBJECT_DEVICE, &name->objectId),
253                name->name);
254         break;
255       default:
256         printf("NAME %s id %u, name %s\n",
257                getActivityObjectKindString(name->objectKind),
258                getActivityObjectKindId(name->objectKind, &name->objectId),
259                name->name);
260         break;
261       }
262       break;
263     }
264   case CUPTI_ACTIVITY_KIND_MARKER:
265     {
266       CUpti_ActivityMarker *marker = (CUpti_ActivityMarker *) record;
267       printf("MARKER id %u [ %llu ], name %s\n",
268              marker->id, (unsigned long long) marker->timestamp, marker->name);
269       break;
270     }
271   case CUPTI_ACTIVITY_KIND_MARKER_DATA:
272     {
273       CUpti_ActivityMarkerData *marker = (CUpti_ActivityMarkerData *) record;
274       printf("MARKER_DATA id %u, color 0x%x, category %u, payload %llu/%f\n",
275              marker->id, marker->color, marker->category,
276              (unsigned long long) marker->payload.metricValueUint64,
277              marker->payload.metricValueDouble);
278       break;
279     }
280   case CUPTI_ACTIVITY_KIND_OVERHEAD:
281     {
282       CUpti_ActivityOverhead *overhead = (CUpti_ActivityOverhead *) record;
283       printf("OVERHEAD %s [ %llu, %llu ] %s id %u\n",
284              getActivityOverheadKindString(overhead->overheadKind),
285              (unsigned long long) overhead->start - startTimestamp,
286              (unsigned long long) overhead->end - startTimestamp,
287              getActivityObjectKindString(overhead->objectKind),
288              getActivityObjectKindId(overhead->objectKind, &overhead->objectId));
289       break;
290     }
291   default:
292     printf("  <unknown>\n");
293     break;
294   }
295 }
296
297 void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords)
298 {
299   uint8_t *bfr = (uint8_t *) malloc(BUF_SIZE + ALIGN_SIZE);
300   if (bfr == NULL) {
301     printf("Error: out of memory\n");
302     exit(-1);
303   }
304
305   *size = BUF_SIZE;
306   *buffer = ALIGN_BUFFER(bfr, ALIGN_SIZE);
307   *maxNumRecords = 0;
308 }
309
310 void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize)
311 {
312   CUptiResult status;
313   CUpti_Activity *record = NULL;
314
315   if (validSize > 0) {
316     if (ctx == NULL) {
317       printf("==== Starting dump for global ====\n");
318     }
319     else {
320       printf("==== Starting dump for context %p, stream %u ====\n", ctx, streamId);
321     }
322
323     do {
324       status = cuptiActivityGetNextRecord(buffer, validSize, &record);
325       if (status == CUPTI_SUCCESS) {
326         printActivity(record);
327       }
328       else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED)
329         break;
330       else {
331         CUPTI_CALL(status);
332       }
333     } while (1);
334
335     // report any records dropped from the queue
336     size_t dropped;
337     CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped));
338     if (dropped != 0) {
339       printf("Dropped %u activity records\n", (unsigned int) dropped);
340     }
341
342     if (ctx == NULL) {
343       printf("==== Finished dump for global ====\n");
344     }
345     else {
346       printf("==== Finished dump for context %p, stream %u ====\n", ctx, streamId);
347     }
348   }
349
350   free(buffer);
351 }
352
353 void
354 initTrace()
355 {
356   size_t attrValue = 0, attrValueSize = sizeof(size_t);
357   // Device activity record is created when CUDA initializes, so we
358   // want to enable it before cuInit() or any CUDA runtime call.
359   CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_DEVICE));
360   // Enable all other activity record kinds.
361   CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_CONTEXT));
362   CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_DRIVER));
363   CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME));
364   CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMCPY));
365   CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MEMSET));
366   CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_NAME));
367   CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_MARKER));
368   CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL));
369   CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_OVERHEAD));
370
371   // Register callbacks for buffer requests and for buffers completed by CUPTI.
372   CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted));
373
374   // Get and set activity attributes.
375   // Attributes can be set by the CUPTI client to change behavior of the activity API.
376   // Some attributes require to be set before any CUDA context is created to be effective,
377   // e.g. to be applied to all device buffer allocations (see documentation).
378   CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue));
379   printf("%s = %llu\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE", (long long unsigned)attrValue);
380   attrValue *= 2;
381   CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue));
382
383   CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue));
384   printf("%s = %llu\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT", (long long unsigned)attrValue);
385   attrValue *= 2;
386   CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue));
387
388   CUPTI_CALL(cuptiGetTimestamp(&startTimestamp));
389 }