OSDN Git Service

CUDA
[eos/hostdependX86LINUX64.git] / util / X86LINUX64 / cuda-6.5 / include / cuviddec.h
1 /*
2  * Copyright 1993-2008 NVIDIA Corporation.  All rights reserved.
3  *
4  * NOTICE TO USER:   
5  *
6  * This source code is subject to NVIDIA ownership rights under U.S. and 
7  * international Copyright laws.  Users and possessors of this source code 
8  * are hereby granted a nonexclusive, royalty-free license to use this code 
9  * in individual and commercial software.
10  *
11  * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE 
12  * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR 
13  * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH 
14  * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF 
15  * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
16  * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, 
17  * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS 
18  * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 
19  * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE 
20  * OR PERFORMANCE OF THIS SOURCE CODE.  
21  *
22  * U.S. Government End Users.   This source code is a "commercial item" as 
23  * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of 
24  * "commercial computer  software"  and "commercial computer software 
25  * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995) 
26  * and is provided to the U.S. Government only as a commercial end item.  
27  * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through 
28  * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the 
29  * source code with only those rights set forth herein. 
30  *
31  * Any use of this source code in individual and commercial software must 
32  * include, in the user documentation and internal comments to the code,
33  * the above Disclaimer and U.S. Government End Users Notice.
34  */
35  
36 #if !defined(__CUDA_VIDEO_H__)
37 #define __CUDA_VIDEO_H__
38
39 #ifndef __cuda_cuda_h__
40 #include <cuda.h>
41 #endif // __cuda_cuda_h__
42
43 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
44 #if (CUDA_VERSION >= 3020) && (!defined(CUDA_FORCE_API_VERSION) || (CUDA_FORCE_API_VERSION >= 3020))
45 #define __CUVID_DEVPTR64
46 #endif
47 #endif
48
49 #if defined(__cplusplus)
50 extern "C" {
51 #endif /* __cplusplus */
52
53 typedef void *CUvideodecoder;
54 typedef struct _CUcontextlock_st *CUvideoctxlock;
55
56 typedef enum cudaVideoCodec_enum {
57     cudaVideoCodec_MPEG1=0,
58     cudaVideoCodec_MPEG2,
59     cudaVideoCodec_MPEG4,
60     cudaVideoCodec_VC1,
61     cudaVideoCodec_H264,
62     cudaVideoCodec_JPEG,
63     cudaVideoCodec_H264_SVC,
64     cudaVideoCodec_H264_MVC,
65     cudaVideoCodec_HEVC,
66     cudaVideoCodec_NumCodecs,
67     // Uncompressed YUV
68     cudaVideoCodec_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')),   // Y,U,V (4:2:0)
69     cudaVideoCodec_YV12   = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')),   // Y,V,U (4:2:0)
70     cudaVideoCodec_NV12   = (('N'<<24)|('V'<<16)|('1'<<8)|('2')),   // Y,UV  (4:2:0)
71     cudaVideoCodec_YUYV   = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')),   // YUYV/YUY2 (4:2:2)
72     cudaVideoCodec_UYVY   = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y'))    // UYVY (4:2:2)
73 } cudaVideoCodec;
74
75 typedef enum cudaVideoSurfaceFormat_enum {
76     cudaVideoSurfaceFormat_NV12=0   // NV12 (currently the only supported output format)
77 } cudaVideoSurfaceFormat;
78
79 typedef enum cudaVideoDeinterlaceMode_enum {
80     cudaVideoDeinterlaceMode_Weave=0,   // Weave both fields (no deinterlacing)
81     cudaVideoDeinterlaceMode_Bob,       // Drop one field
82     cudaVideoDeinterlaceMode_Adaptive   // Adaptive deinterlacing
83 } cudaVideoDeinterlaceMode;
84
85 typedef enum cudaVideoChromaFormat_enum {
86     cudaVideoChromaFormat_Monochrome=0,
87     cudaVideoChromaFormat_420,
88     cudaVideoChromaFormat_422,
89     cudaVideoChromaFormat_444
90 } cudaVideoChromaFormat;
91
92 typedef enum cudaVideoCreateFlags_enum {
93     cudaVideoCreate_Default = 0x00,     // Default operation mode: use dedicated video engines
94     cudaVideoCreate_PreferCUDA = 0x01,  // Use a CUDA-based decoder if faster than dedicated engines (requires a valid vidLock object for multi-threading)
95     cudaVideoCreate_PreferDXVA = 0x02,  // Go through DXVA internally if possible (requires D3D9 interop)
96     cudaVideoCreate_PreferCUVID = 0x04  // Use dedicated video engines directly
97 } cudaVideoCreateFlags;
98
99
100 typedef struct _CUVIDDECODECREATEINFO
101 {
102     // Decoding
103     unsigned long ulWidth;          // Coded Sequence Width
104     unsigned long ulHeight;         // Coded Sequence Height
105     unsigned long ulNumDecodeSurfaces;  // Maximum number of internal decode surfaces
106     cudaVideoCodec CodecType;        // cudaVideoCodec_XXX
107     cudaVideoChromaFormat ChromaFormat; // cudaVideoChromaFormat_XXX (only 4:2:0 is currently supported)
108     unsigned long ulCreationFlags;  // Decoder creation flags (cudaVideoCreateFlags_XXX)
109     unsigned long Reserved1[5];     // Reserved for future use - set to zero
110     struct {                        // area of the frame that should be displayed
111         short left;
112         short top;
113         short right;
114         short bottom;
115     } display_area;
116     // Output format
117     cudaVideoSurfaceFormat OutputFormat;       // cudaVideoSurfaceFormat_XXX
118     cudaVideoDeinterlaceMode DeinterlaceMode;  // cudaVideoDeinterlaceMode_XXX
119     unsigned long ulTargetWidth;    // Post-processed Output Width 
120     unsigned long ulTargetHeight;   // Post-processed Output Height
121     unsigned long ulNumOutputSurfaces; // Maximum number of output surfaces simultaneously mapped
122     CUvideoctxlock vidLock;         // If non-NULL, context lock used for synchronizing ownership of the cuda context
123     struct {                        // target rectangle in the output frame (for aspect ratio conversion)
124         short left;
125         short top;
126         short right;
127         short bottom;
128     } target_rect;                  // if a null rectangle is specified, {0,0,ulTargetWidth,ulTargetHeight} will be used
129     unsigned long Reserved2[5];     // Reserved for future use - set to zero
130 } CUVIDDECODECREATEINFO;
131
132
133 ////////////////////////////////////////////////////////////////////////////////////////////////
134 //
135 // H.264 Picture Parameters
136 //
137
138 typedef struct _CUVIDH264DPBENTRY
139 {
140     int PicIdx;             // picture index of reference frame
141     int FrameIdx;           // frame_num(short-term) or LongTermFrameIdx(long-term)
142     int is_long_term;       // 0=short term reference, 1=long term reference
143     int not_existing;       // non-existing reference frame (corresponding PicIdx should be set to -1)
144     int used_for_reference; // 0=unused, 1=top_field, 2=bottom_field, 3=both_fields
145     int FieldOrderCnt[2];   // field order count of top and bottom fields
146 } CUVIDH264DPBENTRY;
147
148 typedef struct _CUVIDH264MVCEXT
149 {
150     int num_views_minus1;
151     int view_id;
152     unsigned char inter_view_flag;
153     unsigned char num_inter_view_refs_l0;
154     unsigned char num_inter_view_refs_l1;
155     unsigned char MVCReserved8Bits;
156     int InterViewRefsL0[16];
157     int InterViewRefsL1[16];
158 } CUVIDH264MVCEXT;
159
160 typedef struct _CUVIDH264SVCEXT
161 {
162     unsigned char profile_idc;
163     unsigned char level_idc;
164     unsigned char DQId;
165     unsigned char DQIdMax;
166     unsigned char disable_inter_layer_deblocking_filter_idc;
167     unsigned char ref_layer_chroma_phase_y_plus1;
168     signed char   inter_layer_slice_alpha_c0_offset_div2;
169     signed char   inter_layer_slice_beta_offset_div2;
170     
171     unsigned short DPBEntryValidFlag;
172     unsigned char inter_layer_deblocking_filter_control_present_flag;
173     unsigned char extended_spatial_scalability_idc;
174     unsigned char adaptive_tcoeff_level_prediction_flag;
175     unsigned char slice_header_restriction_flag;
176     unsigned char chroma_phase_x_plus1_flag;
177     unsigned char chroma_phase_y_plus1;
178     
179     unsigned char tcoeff_level_prediction_flag;
180     unsigned char constrained_intra_resampling_flag;
181     unsigned char ref_layer_chroma_phase_x_plus1_flag;
182     unsigned char store_ref_base_pic_flag;
183     unsigned char Reserved8BitsA; 
184     unsigned char Reserved8BitsB; 
185     // For the 4 scaled_ref_layer_XX fields below,
186     // if (extended_spatial_scalability_idc == 1), SPS field, G.7.3.2.1.4, add prefix "seq_"
187     // if (extended_spatial_scalability_idc == 2), SLH field, G.7.3.3.4, 
188     short scaled_ref_layer_left_offset;
189     short scaled_ref_layer_top_offset;
190     short scaled_ref_layer_right_offset;
191     short scaled_ref_layer_bottom_offset;
192     unsigned short Reserved16Bits;
193     struct _CUVIDPICPARAMS *pNextLayer; // Points to the picparams for the next layer to be decoded. Linked list ends at the target layer.
194     int bRefBaseLayer;                 // whether to store ref base pic
195 } CUVIDH264SVCEXT;
196
197 typedef struct _CUVIDH264PICPARAMS
198 {
199     // SPS
200     int log2_max_frame_num_minus4;
201     int pic_order_cnt_type;
202     int log2_max_pic_order_cnt_lsb_minus4;
203     int delta_pic_order_always_zero_flag;
204     int frame_mbs_only_flag;
205     int direct_8x8_inference_flag;
206     int num_ref_frames;             // NOTE: shall meet level 4.1 restrictions
207     unsigned char residual_colour_transform_flag;
208     unsigned char bit_depth_luma_minus8;    // Must be 0 (only 8-bit supported)
209     unsigned char bit_depth_chroma_minus8;  // Must be 0 (only 8-bit supported)
210     unsigned char qpprime_y_zero_transform_bypass_flag;
211     // PPS
212     int entropy_coding_mode_flag;
213     int pic_order_present_flag;
214     int num_ref_idx_l0_active_minus1;
215     int num_ref_idx_l1_active_minus1;
216     int weighted_pred_flag;
217     int weighted_bipred_idc;
218     int pic_init_qp_minus26;
219     int deblocking_filter_control_present_flag;
220     int redundant_pic_cnt_present_flag;
221     int transform_8x8_mode_flag;
222     int MbaffFrameFlag;
223     int constrained_intra_pred_flag;
224     int chroma_qp_index_offset;
225     int second_chroma_qp_index_offset;
226     int ref_pic_flag;
227     int frame_num;
228     int CurrFieldOrderCnt[2];
229     // DPB
230     CUVIDH264DPBENTRY dpb[16];          // List of reference frames within the DPB
231     // Quantization Matrices (raster-order)
232     unsigned char WeightScale4x4[6][16];
233     unsigned char WeightScale8x8[2][64];
234     // FMO/ASO
235     unsigned char fmo_aso_enable;
236     unsigned char num_slice_groups_minus1;
237     unsigned char slice_group_map_type;
238     signed char pic_init_qs_minus26;
239     unsigned int slice_group_change_rate_minus1;
240     union
241     {
242         unsigned long long slice_group_map_addr;
243         const unsigned char *pMb2SliceGroupMap;
244     } fmo;
245     unsigned int  Reserved[12];
246     // SVC/MVC
247     union
248     {
249         CUVIDH264MVCEXT mvcext;
250         CUVIDH264SVCEXT svcext;
251     };
252 } CUVIDH264PICPARAMS;
253
254
255 ////////////////////////////////////////////////////////////////////////////////////////////////
256 //
257 // MPEG-2 Picture Parameters
258 //
259
260 typedef struct _CUVIDMPEG2PICPARAMS
261 {
262     int ForwardRefIdx;          // Picture index of forward reference (P/B-frames)
263     int BackwardRefIdx;         // Picture index of backward reference (B-frames)
264     int picture_coding_type;
265     int full_pel_forward_vector;
266     int full_pel_backward_vector;
267     int f_code[2][2];
268     int intra_dc_precision;
269     int frame_pred_frame_dct;
270     int concealment_motion_vectors;
271     int q_scale_type;
272     int intra_vlc_format;
273     int alternate_scan;
274     int top_field_first;
275     // Quantization matrices (raster order)
276     unsigned char QuantMatrixIntra[64];
277     unsigned char QuantMatrixInter[64];
278 } CUVIDMPEG2PICPARAMS;
279
280 ////////////////////////////////////////////////////////////////////////////////////////////////
281 //
282 // MPEG-4 Picture Parameters
283 //
284
285 // MPEG-4 has VOP types instead of Picture types
286 #define I_VOP 0
287 #define P_VOP 1
288 #define B_VOP 2
289 #define S_VOP 3
290
291 typedef struct _CUVIDMPEG4PICPARAMS
292 {
293     int ForwardRefIdx;          // Picture index of forward reference (P/B-frames)
294     int BackwardRefIdx;         // Picture index of backward reference (B-frames)
295     // VOL
296     int video_object_layer_width;
297     int video_object_layer_height;
298     int vop_time_increment_bitcount;
299     int top_field_first;
300     int resync_marker_disable;
301     int quant_type;
302     int quarter_sample;
303     int short_video_header;
304     int divx_flags;
305     // VOP
306     int vop_coding_type;
307     int vop_coded;
308     int vop_rounding_type;
309     int alternate_vertical_scan_flag;
310     int interlaced;
311     int vop_fcode_forward;
312     int vop_fcode_backward;
313     int trd[2];
314     int trb[2];
315     // Quantization matrices (raster order)
316     unsigned char QuantMatrixIntra[64];
317     unsigned char QuantMatrixInter[64];
318     int gmc_enabled;
319 } CUVIDMPEG4PICPARAMS;
320
321 ////////////////////////////////////////////////////////////////////////////////////////////////
322 //
323 // VC1 Picture Parameters
324 //
325
326 typedef struct _CUVIDVC1PICPARAMS
327 {
328     int ForwardRefIdx;      // Picture index of forward reference (P/B-frames)
329     int BackwardRefIdx;     // Picture index of backward reference (B-frames)
330     int FrameWidth;         // Actual frame width
331     int FrameHeight;        // Actual frame height
332     // PICTURE
333     int intra_pic_flag;     // Set to 1 for I,BI frames
334     int ref_pic_flag;       // Set to 1 for I,P frames
335     int progressive_fcm;    // Progressive frame
336     // SEQUENCE
337     int profile;
338     int postprocflag;
339     int pulldown;
340     int interlace;
341     int tfcntrflag;
342     int finterpflag;
343     int psf;
344     int multires;
345     int syncmarker;
346     int rangered;
347     int maxbframes;
348     // ENTRYPOINT
349     int panscan_flag;
350     int refdist_flag;
351     int extended_mv;
352     int dquant;
353     int vstransform;
354     int loopfilter;
355     int fastuvmc;
356     int overlap;
357     int quantizer;
358     int extended_dmv;
359     int range_mapy_flag;
360     int range_mapy;
361     int range_mapuv_flag;
362     int range_mapuv;
363     int rangeredfrm;    // range reduction state
364 } CUVIDVC1PICPARAMS;
365
366 ////////////////////////////////////////////////////////////////////////////////////////////////
367 //
368 // JPEG Picture Parameters
369 //
370
371 typedef struct _CUVIDJPEGPICPARAMS
372 {
373     int Reserved;
374 } CUVIDJPEGPICPARAMS;
375
376
377 ////////////////////////////////////////////////////////////////////////////////////////////////
378 //
379 // HEVC Picture Parameters
380 //
381
382 typedef struct _CUVIDHEVCPICPARAMS
383 {
384     // sps
385     int pic_width_in_luma_samples;
386     int pic_height_in_luma_samples;
387     unsigned char log2_min_luma_coding_block_size_minus3;
388     unsigned char log2_diff_max_min_luma_coding_block_size;
389     unsigned char log2_min_transform_block_size_minus2;
390     unsigned char log2_diff_max_min_transform_block_size;
391     unsigned char pcm_enabled_flag;
392     unsigned char log2_min_pcm_luma_coding_block_size_minus3;
393     unsigned char log2_diff_max_min_pcm_luma_coding_block_size;
394     unsigned char pcm_sample_bit_depth_luma_minus1;
395     
396     unsigned char pcm_sample_bit_depth_chroma_minus1;
397     unsigned char pcm_loop_filter_disabled_flag;
398     unsigned char strong_intra_smoothing_enabled_flag;
399     unsigned char max_transform_hierarchy_depth_intra;
400     unsigned char max_transform_hierarchy_depth_inter;
401     unsigned char amp_enabled_flag;
402     unsigned char separate_colour_plane_flag;
403     unsigned char log2_max_pic_order_cnt_lsb_minus4;
404
405     unsigned char num_short_term_ref_pic_sets;
406     unsigned char long_term_ref_pics_present_flag;
407     unsigned char num_long_term_ref_pics_sps;
408     unsigned char sps_temporal_mvp_enabled_flag;
409     unsigned char sample_adaptive_offset_enabled_flag;
410     unsigned char scaling_list_enable_flag;
411     unsigned char IrapPicFlag;
412     unsigned char IdrPicFlag;
413     unsigned char reserved1[16];
414     
415     // pps
416     unsigned char dependent_slice_segments_enabled_flag;
417     unsigned char slice_segment_header_extension_present_flag;
418     unsigned char sign_data_hiding_enabled_flag;
419     unsigned char cu_qp_delta_enabled_flag;
420     unsigned char diff_cu_qp_delta_depth;
421     signed char init_qp_minus26;
422     signed char pps_cb_qp_offset;
423     signed char pps_cr_qp_offset;
424     
425     unsigned char constrained_intra_pred_flag;
426     unsigned char weighted_pred_flag;
427     unsigned char weighted_bipred_flag;
428     unsigned char transform_skip_enabled_flag;
429     unsigned char transquant_bypass_enabled_flag;
430     unsigned char entropy_coding_sync_enabled_flag;
431     unsigned char log2_parallel_merge_level_minus2;
432     unsigned char num_extra_slice_header_bits;
433     
434     unsigned char loop_filter_across_tiles_enabled_flag;
435     unsigned char loop_filter_across_slices_enabled_flag;
436     unsigned char output_flag_present_flag;
437     unsigned char num_ref_idx_l0_default_active_minus1;
438     unsigned char num_ref_idx_l1_default_active_minus1;
439     unsigned char lists_modification_present_flag;
440     unsigned char cabac_init_present_flag;
441     unsigned char pps_slice_chroma_qp_offsets_present_flag;
442     
443     unsigned char deblocking_filter_override_enabled_flag;
444     unsigned char pps_deblocking_filter_disabled_flag;
445     signed char pps_beta_offset_div2;
446     signed char pps_tc_offset_div2;
447     unsigned char tiles_enabled_flag;
448     unsigned char uniform_spacing_flag;
449     unsigned char num_tile_columns_minus1;
450     unsigned char num_tile_rows_minus1;
451     
452     unsigned short column_width_minus1[21];
453     unsigned short row_height_minus1[19];
454     unsigned int reserved3[16];
455
456     // RefPicSets
457     int NumBitsForShortTermRPSInSlice;
458     int NumDeltaPocsOfRefRpsIdx;
459     int NumPocTotalCurr;
460     int NumPocStCurrBefore;
461     int NumPocStCurrAfter;
462     int NumPocLtCurr;
463     int CurrPicOrderCntVal;
464     int RefPicIdx[16];                  // [refpic] Indices of valid reference pictures (-1 if unused for reference)
465     int PicOrderCntVal[16];             // [refpic]
466     unsigned char IsLongTerm[16];       // [refpic] 0=not a long-term reference, 1=long-term reference
467     unsigned char RefPicSetStCurrBefore[8]; // [0..NumPocStCurrBefore-1] -> refpic (0..15)
468     unsigned char RefPicSetStCurrAfter[8];  // [0..NumPocStCurrAfter-1] -> refpic (0..15)
469     unsigned char RefPicSetLtCurr[8];       // [0..NumPocLtCurr-1] -> refpic (0..15)
470     unsigned int reserved4[16];
471
472     // scaling lists (diag order)
473     unsigned char ScalingList4x4[6][16];       // [matrixId][i]
474     unsigned char ScalingList8x8[6][64];       // [matrixId][i]
475     unsigned char ScalingList16x16[6][64];     // [matrixId][i]
476     unsigned char ScalingList32x32[2][64];     // [matrixId][i]
477     unsigned char ScalingListDCCoeff16x16[6];  // [matrixId]
478     unsigned char ScalingListDCCoeff32x32[2];  // [matrixId]
479 } CUVIDHEVCPICPARAMS;
480
481
482 ////////////////////////////////////////////////////////////////////////////////////////////////
483 //
484 // Picture Parameters for Decoding 
485 //
486
487 typedef struct _CUVIDPICPARAMS
488 {
489     int PicWidthInMbs;      // Coded Frame Size
490     int FrameHeightInMbs;   // Coded Frame Height
491     int CurrPicIdx;         // Output index of the current picture
492     int field_pic_flag;     // 0=frame picture, 1=field picture
493     int bottom_field_flag;  // 0=top field, 1=bottom field (ignored if field_pic_flag=0)
494     int second_field;       // Second field of a complementary field pair
495     // Bitstream data
496     unsigned int nBitstreamDataLen;        // Number of bytes in bitstream data buffer
497     const unsigned char *pBitstreamData;   // Ptr to bitstream data for this picture (slice-layer)
498     unsigned int nNumSlices;               // Number of slices in this picture
499     const unsigned int *pSliceDataOffsets; // nNumSlices entries, contains offset of each slice within the bitstream data buffer
500     int ref_pic_flag;       // This picture is a reference picture
501     int intra_pic_flag;     // This picture is entirely intra coded
502     unsigned int Reserved[30];             // Reserved for future use
503     // Codec-specific data
504     union {
505         CUVIDMPEG2PICPARAMS mpeg2;          // Also used for MPEG-1
506         CUVIDH264PICPARAMS h264;
507         CUVIDVC1PICPARAMS vc1;
508         CUVIDMPEG4PICPARAMS mpeg4;
509         CUVIDJPEGPICPARAMS jpeg;
510         CUVIDHEVCPICPARAMS hevc;
511         unsigned int CodecReserved[1024];
512     } CodecSpecific;
513 } CUVIDPICPARAMS;
514
515
516 ////////////////////////////////////////////////////////////////////////////////////////////////
517 //
518 // Post-processing
519 //
520
521 typedef struct _CUVIDPROCPARAMS
522 {
523     int progressive_frame;  // Input is progressive (deinterlace_mode will be ignored)
524     int second_field;       // Output the second field (ignored if deinterlace mode is Weave)
525     int top_field_first;    // Input frame is top field first (1st field is top, 2nd field is bottom)
526     int unpaired_field;     // Input only contains one field (2nd field is invalid)
527     // The fields below are used for raw YUV input
528     unsigned int reserved_flags;        // Reserved for future use (set to zero)
529     unsigned int reserved_zero;         // Reserved (set to zero)
530     unsigned long long raw_input_dptr;  // Input CUdeviceptr for raw YUV extensions
531     unsigned int raw_input_pitch;       // pitch in bytes of raw YUV input (should be aligned appropriately)
532     unsigned int raw_input_format;      // Reserved for future use (set to zero)
533     unsigned long long raw_output_dptr; // Reserved for future use (set to zero)
534     unsigned int raw_output_pitch;      // Reserved for future use (set to zero)
535     unsigned int Reserved[48];
536     void *Reserved3[3];
537 } CUVIDPROCPARAMS;
538
539 ////////////////////////////////////////////////////////////////////////////////////////////////
540 //
541 // In order to maximize decode latencies, there should be always at least 2 pictures in the decode
542 // queue at any time, in order to make sure that all decode engines are always busy.
543 //
544 // Overall data flow:
545 //  - cuvidCreateDecoder(...)
546 //  For each picture:
547 //  - cuvidDecodePicture(N)
548 //  - cuvidMapVideoFrame(N-4)
549 //  - do some processing in cuda
550 //  - cuvidUnmapVideoFrame(N-4)
551 //  - cuvidDecodePicture(N+1)
552 //  - cuvidMapVideoFrame(N-3)
553 //    ...
554 //  - cuvidDestroyDecoder(...)
555 //
556 // NOTE:
557 // - In the current version, the cuda context MUST be created from a D3D device, using cuD3D9CtxCreate function.
558 //   For multi-threaded operation, the D3D device must also be created with the D3DCREATE_MULTITHREADED flag.
559 // - There is a limit to how many pictures can be mapped simultaneously (ulNumOutputSurfaces)
560 // - cuVidDecodePicture may block the calling thread if there are too many pictures pending 
561 //   in the decode queue
562 //
563 ////////////////////////////////////////////////////////////////////////////////////////////////
564
565 // Create/Destroy the decoder object
566 extern CUresult CUDAAPI cuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci);
567 extern CUresult CUDAAPI cuvidDestroyDecoder(CUvideodecoder hDecoder);
568
569 // Decode a single picture (field or frame)
570 extern CUresult CUDAAPI cuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams);
571
572 #if !defined(__CUVID_DEVPTR64) || defined(__CUVID_INTERNAL)
573 // Post-process and map a video frame for use in cuda
574 extern CUresult CUDAAPI cuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx,
575                                            unsigned int *pDevPtr, unsigned int *pPitch,
576                                            CUVIDPROCPARAMS *pVPP);
577 // Unmap a previously mapped video frame
578 extern CUresult CUDAAPI cuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr);
579 #endif
580
581 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
582 extern CUresult CUDAAPI cuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr,
583                                              unsigned int *pPitch, CUVIDPROCPARAMS *pVPP);
584 extern CUresult CUDAAPI cuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr);
585 #if defined(__CUVID_DEVPTR64) && !defined(__CUVID_INTERNAL)
586 #define cuvidMapVideoFrame      cuvidMapVideoFrame64
587 #define cuvidUnmapVideoFrame    cuvidUnmapVideoFrame64
588 #endif
589 #endif
590
591 // Get the pointer to the d3d9 surface that is the decode RT
592 extern CUresult CUDAAPI cuvidGetVideoFrameSurface(CUvideodecoder hDecoder, int nPicIdx, void **pSrcSurface);
593
594 ////////////////////////////////////////////////////////////////////////////////////////////////
595 //
596 // Context-locking: to facilitate multi-threaded implementations, the following 4 functions
597 // provide a simple mutex-style host synchronization. If a non-NULL context is specified
598 // in CUVIDDECODECREATEINFO, the codec library will acquire the mutex associated with the given 
599 // context before making any cuda calls.
600 // A multi-threaded application could create a lock associated with a context handle so that
601 // multiple threads can safely share the same cuda context:
602 //  - use cuCtxPopCurrent immediately after context creation in order to create a 'floating' context
603 //    that can be passed to cuvidCtxLockCreate.
604 //  - When using a floating context, all cuda calls should only be made within a cuvidCtxLock/cuvidCtxUnlock section.
605 //
606 // NOTE: This is a safer alternative to cuCtxPushCurrent and cuCtxPopCurrent, and is not related to video
607 // decoder in any way (implemented as a critical section associated with cuCtx{Push|Pop}Current calls).
608
609 extern CUresult CUDAAPI cuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx);
610 extern CUresult CUDAAPI cuvidCtxLockDestroy(CUvideoctxlock lck);
611 extern CUresult CUDAAPI cuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags);
612 extern CUresult CUDAAPI cuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags);
613
614 ////////////////////////////////////////////////////////////////////////////////////////////////
615
616 #if defined(__cplusplus)
617 }
618
619 // Auto-lock helper for C++ applications
620 class CCtxAutoLock
621 {
622 private:
623     CUvideoctxlock m_ctx;
624 public:
625     CCtxAutoLock(CUvideoctxlock ctx):m_ctx(ctx) { cuvidCtxLock(m_ctx,0); }
626     ~CCtxAutoLock() { cuvidCtxUnlock(m_ctx,0); }
627 };
628
629 #endif /* __cplusplus */
630
631 #endif // __CUDA_VIDEO_H__
632