Line data Source code
1 : /*
2 : * GPAC - Multimedia Framework C SDK
3 : *
4 : * Authors: Jean Le Feuvre
5 : * Copyright (c) Telecom ParisTech 2017-2021
6 : * All rights reserved
7 : *
8 : * This file is part of GPAC / NVidia Hardware decoder filter
9 : *
10 : * GPAC is free software; you can redistribute it and/or modify
11 : * it under the terms of the GNU Lesser General Public License as published by
12 : * the Free Software Foundation; either version 2, or (at your option)
13 : * any later version.
14 : *
15 : * GPAC is distributed in the hope that it will be useful,
16 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 : * GNU Lesser General Public License for more details.
19 : *
20 : * You should have received a copy of the GNU Lesser General Public
21 : * License along with this library; see the file COPYING. If not, write to
22 : * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 : *
24 : */
25 :
26 : #include <gpac/thread.h>
27 : #include <gpac/internal/media_dev.h>
28 : #include <gpac/constants.h>
29 : #include <gpac/filters.h>
30 :
31 : #if (!defined(GPAC_STATIC_BUILD) && (defined(WIN32) || defined(GPAC_CONFIG_LINUX) || defined(GPAC_CONFIG_DARWIN)) && !defined(GPAC_DISABLE_NVDEC))
32 :
33 : #include "dec_nvdec_sdk.h"
34 :
35 : //#define EMUL_NV_DLL
36 :
37 : #ifndef GPAC_DISABLE_3D
38 :
39 :
40 : #ifdef LOAD_GL_1_5
41 : GLDECL_EXTERN(glGenBuffers);
42 : GLDECL_EXTERN(glBindBuffer);
43 : GLDECL_EXTERN(glBufferData);
44 : #endif
45 :
46 : #endif
47 :
48 : typedef struct _nv_dec_inst NVDecInstance;
49 :
50 : typedef enum
51 : {
52 : NVDEC_COPY = 0,
53 : NVDEC_SINGLE,
54 : NVDEC_GL
55 : } NVDecFrameMode ;
56 :
57 : typedef enum
58 : {
59 : NVDEC_CUVID = 0,
60 : NVDEC_CUDA,
61 : NVDEC_DXVA
62 : } NVDecVideoMode;
63 :
64 : typedef struct _nv_dec_ctx
65 : {
66 : u32 unload;
67 : NVDecFrameMode fmode;
68 : NVDecVideoMode vmode;
69 : u32 num_surfaces;
70 :
71 : GF_FilterPid *ipid, *opid;
72 : u32 codec_id;
73 : Bool use_gl_texture;
74 : u32 width, height, bpp_luma, bpp_chroma;
75 : cudaVideoCodec codec_type;
76 : cudaVideoChromaFormat chroma_fmt;
77 :
78 : u32 out_size, stride, pix_fmt, stride_uv, nb_planes, uv_height;
79 : u32 reload_decoder_state;
80 : Bool skip_next_frame;
81 : CUresult decode_error, dec_create_error;
82 : Bool frame_size_changed;
83 : Bool needs_resetup;
84 : unsigned long prefer_dec_mode;
85 :
86 : NVDecInstance *dec_inst;
87 :
88 : GF_List *frames;
89 : GF_List *frames_res;
90 : GF_List *src_packets;
91 :
92 : struct __nv_frame *pending_frame;
93 :
94 :
95 : u8 *xps_buf;
96 : u32 xps_buf_size;
97 : u32 nal_size_length;
98 : Bool inject_xps;
99 : u8 *nal_buffer;
100 : u32 nal_buffer_alloc;
101 :
102 : u8 *single_frame_data;
103 : u32 single_frame_data_alloc;
104 :
105 : #ifndef GPAC_DISABLE_3D
106 : Bool gl_provider_requested;
107 : GLint y_tx_id, uv_tx_id;
108 : GLint y_pbo_id, uv_pbo_id;
109 : #endif
110 : } NVDecCtx;
111 :
112 :
113 : struct _nv_dec_inst
114 : {
115 : u32 width, height, bpp_luma, bpp_chroma, stride;
116 : cudaVideoCodec codec_type;
117 : cudaVideoChromaFormat chroma_fmt;
118 : u32 id;
119 : u32 th_id;
120 :
121 : //allocated video parser and decoder
122 : CUvideoparser cu_parser;
123 : CUvideodecoder cu_decoder;
124 :
125 : //current associated context, 0 is none
126 : NVDecCtx *ctx;
127 : };
128 :
129 :
130 : typedef struct __nv_frame
131 : {
132 : CUVIDPARSERDISPINFO frame_info;
133 : NVDecCtx *ctx;
134 : GF_FilterFrameInterface gframe;
135 : Bool y_mapped, uv_mapped;
136 : } NVDecFrame;
137 :
138 : static GF_List *global_unactive_decoders=NULL;
139 : static u32 global_nb_loaded_nvdec = 0;
140 : static u32 global_nb_loaded_decoders = 0;
141 : static GF_Mutex *global_inst_mutex = NULL;
142 : static CUcontext cuda_ctx = NULL;
143 : #ifndef EMUL_NV_DLL
144 : static Bool cuda_ctx_gl = GF_FALSE;
145 : static CUdevice cuda_dev = -1;
146 : #endif
147 :
148 : //#define ENABLE_10BIT_OUTPUT
149 :
150 0 : static GF_Err nvdec_init_decoder(NVDecCtx *ctx)
151 : {
152 : CUresult res;
153 : CUVIDDECODECREATEINFO cuvid_info;
154 :
155 : assert(ctx->dec_inst);
156 :
157 : memset(&cuvid_info, 0, sizeof(CUVIDDECODECREATEINFO));
158 0 : cuvid_info.CodecType = ctx->codec_type;
159 0 : cuvid_info.ulWidth = ctx->width;
160 0 : cuvid_info.ulHeight = ctx->height;
161 0 : cuvid_info.ulNumDecodeSurfaces = ctx->num_surfaces;
162 0 : cuvid_info.ChromaFormat = ctx->chroma_fmt;
163 : cuvid_info.OutputFormat = cudaVideoSurfaceFormat_NV12;
164 : #ifdef ENABLE_10BIT_OUTPUT
165 : if (ctx->bpp_luma + ctx->bpp_chroma > 16)
166 : cuvid_info.OutputFormat = cudaVideoSurfaceFormat_P016;
167 : #endif
168 0 : cuvid_info.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive;
169 0 : cuvid_info.bitDepthMinus8 = ctx->bpp_luma - 8;
170 0 : cuvid_info.ulTargetWidth = ctx->width;
171 0 : cuvid_info.ulTargetHeight = ctx->height;
172 : cuvid_info.display_area.left = 0;
173 0 : cuvid_info.display_area.right = ctx->width;
174 : cuvid_info.display_area.top = 0;
175 0 : cuvid_info.display_area.bottom = ctx->height;
176 :
177 0 : cuvid_info.ulNumOutputSurfaces = 1;
178 0 : cuvid_info.ulCreationFlags = ctx->prefer_dec_mode;
179 :
180 : // create the decoder
181 0 : res = cuvidCreateDecoder(&ctx->dec_inst->cu_decoder, &cuvid_info);
182 0 : if (res != CUDA_SUCCESS) {
183 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to create cuvid decoder %s\n", cudaGetErrorEnum(res) ) );
184 0 : ctx->dec_create_error = res;
185 0 : return GF_IO_ERR;
186 : }
187 0 : global_nb_loaded_decoders++;
188 : assert(global_nb_loaded_decoders);
189 0 : ctx->dec_inst->id = global_nb_loaded_decoders;
190 0 : ctx->dec_inst->th_id = gf_th_id();
191 0 : GF_LOG(GF_LOG_INFO, GF_LOG_CODEC, ("[NVDec] decoder instance %d created (%dx%d) - %d total decoders loaded\n", ctx->dec_inst->id, ctx->width, ctx->height, global_nb_loaded_decoders) );
192 : return GF_OK;
193 : }
194 :
195 0 : Bool load_inactive_dec(NVDecCtx *ctx)
196 : {
197 : u32 i, count;
198 : //look for unactive decoder with same settings
199 0 : if (global_unactive_decoders) {
200 :
201 0 : gf_mx_p(global_inst_mutex);
202 0 : count = gf_list_count(global_unactive_decoders);
203 0 : for (i=0; i<count; i++) {
204 0 : NVDecInstance *inst = gf_list_get(global_unactive_decoders, i);
205 0 : if ((inst->width==ctx->width) && (inst->height==ctx->height) && (inst->bpp_luma == ctx->bpp_luma )
206 0 : && (inst->bpp_chroma == ctx->bpp_chroma ) && (inst->codec_type == ctx->codec_type) && (inst->chroma_fmt == ctx->chroma_fmt )
207 : ) {
208 :
209 0 : gf_list_rem(global_unactive_decoders, i);
210 0 : ctx->dec_inst = inst;
211 0 : inst->ctx = ctx;
212 0 : gf_mx_v(global_inst_mutex);
213 0 : return GF_TRUE;
214 : }
215 : }
216 0 : if (ctx->dec_inst && !ctx->dec_inst->cu_decoder) {
217 0 : ctx->dec_inst->ctx = ctx;
218 0 : gf_mx_v(global_inst_mutex);
219 0 : return GF_FALSE;
220 : }
221 0 : if (ctx->dec_inst) {
222 : NVDecInstance *inst = ctx->dec_inst;
223 0 : if ((inst->width==ctx->width) && (inst->height==ctx->height) && (inst->bpp_luma == ctx->bpp_luma )
224 0 : && (inst->bpp_chroma == ctx->bpp_chroma ) && (inst->codec_type == ctx->codec_type) && (inst->chroma_fmt == ctx->chroma_fmt )
225 : ) {
226 : ctx->dec_inst = inst;
227 0 : inst->ctx = ctx;
228 0 : gf_mx_v(global_inst_mutex);
229 0 : return GF_TRUE;
230 : }
231 : } else {
232 0 : ctx->dec_inst = gf_list_pop_back(global_unactive_decoders);
233 : }
234 0 : gf_mx_v(global_inst_mutex);
235 : }
236 0 : if (!ctx->dec_inst) {
237 0 : GF_SAFEALLOC(ctx->dec_inst, NVDecInstance);
238 0 : if (!ctx->dec_inst)
239 : return GF_FALSE;
240 : }
241 0 : ctx->dec_inst->ctx = ctx;
242 0 : return GF_FALSE;
243 : }
244 :
245 0 : static void nvdec_destroy_decoder(NVDecInstance *inst)
246 : {
247 0 : if (inst->cu_decoder) {
248 0 : cuvidDestroyDecoder(inst->cu_decoder);
249 0 : inst->cu_decoder = NULL;
250 0 : global_nb_loaded_decoders--;
251 0 : GF_LOG(GF_LOG_INFO, GF_LOG_CODEC, ("[NVDec] decoder instance %d destruction - %d decoders still loaded\n", inst->id, global_nb_loaded_decoders ) );
252 : }
253 0 : }
254 :
255 0 : static void update_pix_fmt(NVDecCtx *ctx, Bool use_10bits)
256 : {
257 0 : switch (ctx->chroma_fmt) {
258 0 : case cudaVideoChromaFormat_420:
259 0 : ctx->pix_fmt = use_10bits ? GF_PIXEL_NV12_10 : GF_PIXEL_NV12;
260 0 : break;
261 0 : case cudaVideoChromaFormat_422:
262 0 : ctx->pix_fmt = use_10bits ? GF_PIXEL_YUV422_10 : GF_PIXEL_YUV422;
263 0 : break;
264 0 : case cudaVideoChromaFormat_444:
265 0 : ctx->pix_fmt = use_10bits ? GF_PIXEL_YUV444_10 : GF_PIXEL_YUV444;
266 0 : break;
267 0 : default:
268 0 : ctx->pix_fmt = 0;
269 0 : return;
270 : }
271 0 : gf_pixel_get_size_info(ctx->pix_fmt, ctx->width, ctx->height, &ctx->out_size, &ctx->stride, &ctx->stride_uv, &ctx->nb_planes, &ctx->uv_height);
272 : }
273 :
274 0 : static int CUDAAPI HandleVideoSequence(void *pUserData, CUVIDEOFORMAT *pFormat)
275 : {
276 : Bool use_10bits=GF_FALSE;
277 : Bool skip_output_resize=GF_FALSE;
278 : NVDecInstance *inst= (NVDecInstance *)pUserData;
279 0 : NVDecCtx *ctx = inst->ctx;
280 :
281 : u32 w, h;
282 0 : w = pFormat->coded_width;
283 0 : h = pFormat->coded_height;
284 :
285 0 : if (pFormat->display_area.right && (pFormat->display_area.right<(s32)w)) w = pFormat->display_area.right;
286 0 : if (pFormat->display_area.bottom && (pFormat->display_area.bottom<(s32)h)) h = pFormat->display_area.bottom;
287 :
288 0 : GF_LOG(GF_LOG_INFO, GF_LOG_CODEC, ("[NVDec] Decoder instance %d Video sequence change detected - new setup %u x %u, %u bpp\n", inst->id, pFormat->coded_width, pFormat->coded_height, pFormat->bit_depth_luma_minus8 + 8) );
289 : //no change in config
290 0 : if ((ctx->width == w)
291 0 : && (ctx->height == h)
292 0 : && (ctx->bpp_luma == 8 + pFormat->bit_depth_luma_minus8)
293 0 : && (ctx->bpp_chroma == 8 + pFormat->bit_depth_chroma_minus8)
294 0 : && (ctx->codec_type == pFormat->codec)
295 0 : && (ctx->chroma_fmt == pFormat->chroma_format)
296 : ) {
297 0 : if (ctx->dec_inst && ctx->dec_inst->cu_decoder)
298 : return 1;
299 : skip_output_resize = GF_TRUE;
300 : }
301 :
302 : //commented out since this falls back to soft decoding !
303 : #ifdef ENABLE_10BIT_OUTPUT
304 : if (ctx->bpp_luma + ctx->bpp_chroma > 16) use_10bits = GF_TRUE;
305 : #endif
306 :
307 0 : ctx->width = w;
308 0 : ctx->height = h;
309 0 : ctx->bpp_luma = 8 + pFormat->bit_depth_luma_minus8;
310 0 : ctx->bpp_chroma = 8 + pFormat->bit_depth_chroma_minus8;
311 0 : ctx->codec_type = pFormat->codec;
312 0 : ctx->chroma_fmt = pFormat->chroma_format;
313 0 : ctx->stride = pFormat->coded_width;
314 :
315 : //if load_inatcive returns TRUE, we are reusing an existing decoder with the same config, no need to recreate one
316 0 : if (load_inactive_dec(ctx)) {
317 0 : GF_LOG(GF_LOG_INFO, GF_LOG_CODEC, ("[NVDec] reusing inactive decoder %dx%d - %d total decoders loaded\n", ctx->width, ctx->height, global_nb_loaded_decoders) );
318 0 : ctx->stride = ctx->dec_inst->stride;
319 : //initial config, need to trigger output resize
320 0 : if (!ctx->out_size) ctx->reload_decoder_state = 1;
321 :
322 0 : update_pix_fmt(ctx, use_10bits);
323 0 : return GF_OK;
324 : }
325 0 : if (!ctx->dec_inst) return GF_OUT_OF_MEM;
326 : //if we have an existing decoder but with a different config, let's reload
327 0 : nvdec_destroy_decoder(ctx->dec_inst);
328 :
329 0 : ctx->dec_inst->width = ctx->width;
330 0 : ctx->dec_inst->height = ctx->height;
331 0 : ctx->dec_inst->bpp_luma = ctx->bpp_luma;
332 0 : ctx->dec_inst->bpp_chroma = ctx->bpp_chroma;
333 0 : ctx->dec_inst->codec_type = ctx->codec_type;
334 0 : ctx->dec_inst->chroma_fmt = ctx->chroma_fmt;
335 0 : ctx->dec_inst->ctx = ctx;
336 0 : ctx->stride = use_10bits ? 2*ctx->width : ctx->width;
337 :
338 0 : update_pix_fmt(ctx, use_10bits);
339 :
340 0 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_WIDTH, &PROP_UINT(ctx->width));
341 0 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_HEIGHT, &PROP_UINT(ctx->height));
342 0 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_STRIDE, &PROP_UINT(ctx->stride));
343 0 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_BIT_DEPTH_Y, &PROP_UINT(use_10bits ? ctx->bpp_luma : 8));
344 0 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_BIT_DEPTH_UV, &PROP_UINT(use_10bits ? ctx->bpp_chroma : 8));
345 0 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_PIXFMT, &PROP_UINT(ctx->pix_fmt));
346 :
347 :
348 : assert(ctx->out_size);
349 : assert(ctx->stride);
350 0 : ctx->dec_inst->stride = ctx->stride;
351 :
352 0 : if (! ctx->dec_inst->cu_decoder) {
353 0 : nvdec_init_decoder(ctx);
354 0 : if (!skip_output_resize) {
355 0 : ctx->reload_decoder_state = 1;
356 : }
357 : } else {
358 0 : ctx->reload_decoder_state = 2;
359 : }
360 : return 1;
361 : }
362 :
363 0 : static int CUDAAPI HandlePictureDecode(void *pUserData, CUVIDPICPARAMS *pPicParams)
364 : {
365 : NVDecInstance *inst = (NVDecInstance *)pUserData;
366 0 : inst->ctx->decode_error = cuvidDecodePicture(inst->cu_decoder, pPicParams);
367 :
368 0 : if (inst->ctx->decode_error != CUDA_SUCCESS) {
369 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] decoder instance %d failed to decode picture %s\n", inst->id, cudaGetErrorEnum(inst->ctx->decode_error) ) );
370 : return GF_IO_ERR;
371 : }
372 0 : GF_LOG(GF_LOG_DEBUG, GF_LOG_CODEC, ("[NVDec] decoded picture %u OK\n", pPicParams->CurrPicIdx ) );
373 :
374 : return 1;
375 : }
376 :
377 0 : static int CUDAAPI HandlePictureDisplay(void *pUserData, CUVIDPARSERDISPINFO *pPicParams)
378 : {
379 : u32 i, count;
380 : NVDecFrame *f;
381 : NVDecInstance *inst = (NVDecInstance *)pUserData;
382 0 : NVDecCtx *ctx = (NVDecCtx *)inst->ctx;
383 0 : GF_LOG(GF_LOG_DEBUG, GF_LOG_CODEC, ("[NVDec] picture %u CTS "LLU" ready for display, queuing it\n", pPicParams->picture_index, pPicParams->timestamp) );
384 :
385 0 : f = gf_list_pop_back(ctx->frames_res);
386 0 : if (!f) {
387 0 : GF_SAFEALLOC(f, NVDecFrame);
388 0 : if (!f) return 0;
389 : }
390 0 : f->frame_info = *pPicParams;
391 0 : f->frame_info.timestamp = pPicParams->timestamp;
392 0 : f->ctx = ctx;
393 0 : count = gf_list_count(ctx->frames);
394 0 : for (i=0; i<count; i++) {
395 0 : NVDecFrame *af = gf_list_get(ctx->frames, i);
396 0 : if (af->frame_info.timestamp > f->frame_info.timestamp) {
397 0 : gf_list_insert(ctx->frames, f, i);
398 0 : return 1;
399 : }
400 : }
401 0 : gf_list_add(ctx->frames, f);
402 0 : return 1;
403 : }
404 :
405 0 : static void nvdec_store_paramlist(GF_BitStream *bs, GF_List *psl)
406 : {
407 : u32 i, count;
408 0 : count = gf_list_count(psl);
409 0 : for (i=0; i<count; i++) {
410 0 : GF_NALUFFParam *slc = gf_list_get(psl, i);
411 0 : gf_bs_write_u32(bs, 1);
412 0 : gf_bs_write_data(bs, slc->data, slc->size);
413 : }
414 0 : }
415 :
416 0 : static void nvdec_store_xps(NVDecCtx *ctx, GF_AVCConfig *avc_cfg, GF_HEVCConfig *hevc_cfg)
417 : {
418 : u32 i, count;
419 0 : GF_BitStream *bs = gf_bs_new(NULL, 0, GF_BITSTREAM_WRITE);
420 0 : if (avc_cfg) {
421 0 : ctx->nal_size_length = avc_cfg->nal_unit_size;
422 0 : nvdec_store_paramlist(bs, avc_cfg->sequenceParameterSets);
423 0 : nvdec_store_paramlist(bs, avc_cfg->sequenceParameterSetExtensions);
424 0 : nvdec_store_paramlist(bs, avc_cfg->pictureParameterSets);
425 0 : gf_odf_avc_cfg_del(avc_cfg);
426 0 : } else if (hevc_cfg) {
427 0 : ctx->nal_size_length = hevc_cfg->nal_unit_size;
428 0 : count = gf_list_count(hevc_cfg->param_array);
429 0 : for (i=0; i<count; i++) {
430 0 : GF_NALUFFParamArray *pa = gf_list_get(hevc_cfg->param_array, i);
431 0 : nvdec_store_paramlist(bs, pa->nalus);
432 : }
433 0 : gf_odf_hevc_cfg_del(hevc_cfg);
434 : }
435 0 : if (ctx->xps_buf) gf_free(ctx->xps_buf);
436 0 : ctx->xps_buf = NULL;
437 0 : ctx->xps_buf_size = 0;
438 0 : gf_bs_get_content(bs, &ctx->xps_buf, &ctx->xps_buf_size);
439 0 : gf_bs_del(bs);
440 0 : ctx->inject_xps = GF_TRUE;
441 0 : }
442 :
443 :
444 0 : static GF_Err nvdec_configure_stream(GF_Filter *filter, NVDecCtx *ctx)
445 : {
446 : #ifndef EMUL_NV_DLL
447 : CUresult res;
448 : #endif
449 : GF_HEVCConfig *hevc_cfg = NULL;
450 : GF_AVCConfig *avc_cfg = NULL;
451 0 : const GF_PropertyValue *dcd = gf_filter_pid_get_property(ctx->ipid, GF_PROP_PID_DECODER_CONFIG);
452 : CUVIDPARSERPARAMS oVideoParserParameters;
453 :
454 0 : switch (ctx->codec_id) {
455 0 : case GF_CODECID_MPEG1:
456 0 : ctx->codec_type = cudaVideoCodec_MPEG1;
457 0 : break;
458 0 : case GF_CODECID_MPEG2_SIMPLE:
459 : case GF_CODECID_MPEG2_MAIN:
460 : case GF_CODECID_MPEG2_SNR:
461 : case GF_CODECID_MPEG2_SPATIAL:
462 : case GF_CODECID_MPEG2_HIGH:
463 : case GF_CODECID_MPEG2_422:
464 0 : ctx->codec_type = cudaVideoCodec_MPEG2;
465 0 : break;
466 0 : case GF_CODECID_MPEG4_PART2:
467 0 : ctx->codec_type = cudaVideoCodec_MPEG4;
468 0 : break;
469 0 : case GF_CODECID_AVC:
470 0 : if (!dcd) return GF_EOS;
471 0 : ctx->codec_type = cudaVideoCodec_H264;
472 0 : avc_cfg = gf_odf_avc_cfg_read(dcd->value.data.ptr, dcd->value.data.size);
473 0 : if (!avc_cfg) return GF_NON_COMPLIANT_BITSTREAM;
474 : break;
475 0 : case GF_CODECID_HEVC:
476 0 : if (!dcd) return GF_EOS;
477 0 : ctx->codec_type = cudaVideoCodec_HEVC;
478 0 : hevc_cfg = gf_odf_hevc_cfg_read(dcd->value.data.ptr, dcd->value.data.size, GF_FALSE);
479 0 : if (!hevc_cfg) return GF_NON_COMPLIANT_BITSTREAM;
480 : break;
481 : }
482 :
483 : //create a video parser and a video decoder
484 : memset(&oVideoParserParameters, 0, sizeof(CUVIDPARSERPARAMS));
485 0 : ctx->needs_resetup = GF_FALSE;
486 0 : ctx->nal_size_length = 0;
487 :
488 : //this destroys avc_cfg / hevc_cfg
489 0 : if (avc_cfg || hevc_cfg)
490 0 : nvdec_store_xps(ctx, avc_cfg, hevc_cfg);
491 :
492 0 : if (load_inactive_dec(ctx)) {
493 0 : GF_LOG(GF_LOG_INFO, GF_LOG_CODEC, ("[NVDec] reusing inactive decoder %dx%d - %d total decoders loaded\n", ctx->width, ctx->height, global_nb_loaded_decoders ) );
494 0 : ctx->stride = ctx->dec_inst->stride;
495 : }
496 0 : if (!ctx->dec_inst) {
497 : return GF_OUT_OF_MEM;
498 : }
499 0 : ctx->decode_error = CUDA_SUCCESS;
500 :
501 0 : oVideoParserParameters.CodecType = ctx->codec_type;
502 0 : oVideoParserParameters.ulMaxNumDecodeSurfaces = ctx->num_surfaces;
503 0 : oVideoParserParameters.ulMaxDisplayDelay = 4;
504 0 : oVideoParserParameters.ulClockRate = 1000;
505 0 : oVideoParserParameters.pExtVideoInfo = NULL;
506 0 : oVideoParserParameters.pfnSequenceCallback = HandleVideoSequence; // Called before decoding frames and/or whenever there is a format change
507 0 : oVideoParserParameters.pfnDecodePicture = HandlePictureDecode; // Called when a picture is ready to be decoded (decode order)
508 0 : oVideoParserParameters.pfnDisplayPicture = HandlePictureDisplay; // Called whenever a picture is ready to be displayed (display order)
509 0 : oVideoParserParameters.pUserData = ctx->dec_inst;
510 :
511 : #ifndef EMUL_NV_DLL
512 0 : res = cuCtxPushCurrent(cuda_ctx);
513 0 : if (res != CUDA_SUCCESS) {
514 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to push CUDA CTX %s\n", cudaGetErrorEnum(res) ) );
515 : }
516 0 : res = cuvidCreateVideoParser(&ctx->dec_inst->cu_parser, &oVideoParserParameters);
517 0 : cuCtxPopCurrent(NULL);
518 :
519 0 : if (res != CUDA_SUCCESS) {
520 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to create CUVID parserCTX %s\n", cudaGetErrorEnum(res) ) );
521 : return GF_PROFILE_NOT_SUPPORTED;
522 : }
523 : #endif
524 :
525 0 : GF_LOG(GF_LOG_DEBUG, GF_LOG_CODEC, ("[NVDec] video parser init OK\n") );
526 :
527 0 : switch (ctx->codec_type) {
528 0 : case cudaVideoCodec_MPEG1:
529 0 : gf_filter_set_name(filter, ctx->use_gl_texture ? "NVidia HWGL MPEG-1" : "NVidia HW MPEG-1");
530 0 : break;
531 0 : case cudaVideoCodec_MPEG2:
532 0 : gf_filter_set_name(filter, ctx->use_gl_texture ? "NVidia HWGLMPEG-2" : "NVidia HW MPEG-2");
533 0 : break;
534 0 : case cudaVideoCodec_MPEG4:
535 0 : gf_filter_set_name(filter, ctx->use_gl_texture ? "NVidia HWGL MPEG-4 part2" : "NVidia HW MPEG-4 part2");
536 0 : break;
537 0 : case cudaVideoCodec_H264:
538 0 : gf_filter_set_name(filter, ctx->use_gl_texture ? "NVidia HWGL AVC|H264" : "NVidia HW AVC|H264");
539 0 : break;
540 0 : case cudaVideoCodec_HEVC:
541 0 : gf_filter_set_name(filter, ctx->use_gl_texture ? "NVidia HWGL HEVC" : "NVidia HW HEVC");
542 0 : break;
543 0 : case cudaVideoCodec_VC1:
544 0 : gf_filter_set_name(filter, ctx->use_gl_texture ? "NVidia HWGL VC1" : "NVidia HW VC1");
545 0 : break;
546 : default:
547 : break;
548 : }
549 : return GF_OK;
550 : }
551 :
552 0 : static GF_Err nvdec_configure_pid(GF_Filter *filter, GF_FilterPid *pid, Bool is_remove)
553 : {
554 : const GF_PropertyValue *prop;
555 : #ifndef EMUL_NV_DLL
556 : CUresult res;
557 : #endif
558 0 : NVDecCtx *ctx = (NVDecCtx *) gf_filter_get_udta(filter);
559 :
560 0 : if (is_remove) {
561 0 : if (ctx->opid) {
562 0 : gf_filter_pid_remove(ctx->opid);
563 0 : ctx->opid = NULL;
564 : }
565 0 : ctx->ipid = NULL;
566 :
567 0 : if (ctx->unload == 2) {
568 0 : global_nb_loaded_nvdec--;
569 0 : if (ctx->dec_inst) {
570 : assert(global_unactive_decoders);
571 0 : gf_mx_p(global_inst_mutex);
572 0 : ctx->dec_inst->ctx = NULL;
573 0 : gf_list_add(global_unactive_decoders, ctx->dec_inst);
574 0 : ctx->dec_inst = NULL;
575 0 : gf_mx_v(global_inst_mutex);
576 : }
577 : }
578 : }
579 :
580 :
581 0 : if (ctx->ipid && (ctx->ipid != pid)) return GF_REQUIRES_NEW_INSTANCE;
582 :
583 0 : if (! gf_filter_pid_check_caps(pid))
584 : return GF_NOT_SUPPORTED;
585 0 : ctx->ipid = pid;
586 0 : ctx->use_gl_texture = (ctx->fmode == NVDEC_GL) ? GF_TRUE : GF_FALSE;
587 :
588 : #ifndef GPAC_DISABLE_3D
589 0 : if (ctx->use_gl_texture && (ctx->fmode==NVDEC_GL) && !ctx->gl_provider_requested) {
590 0 : GF_Err e = gf_filter_request_opengl(filter);
591 0 : if (e) {
592 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to request an openGL provider (error %s), will not use OpenGL output\n", gf_error_to_string(e) ));
593 0 : ctx->use_gl_texture = GF_FALSE;
594 : }
595 0 : ctx->gl_provider_requested = GF_TRUE;
596 : }
597 : #endif
598 :
599 0 : prop = gf_filter_pid_get_property(pid, GF_PROP_PID_CODECID);
600 0 : if (!prop) return GF_NOT_SUPPORTED;
601 0 : ctx->codec_id = prop->value.uint;
602 :
603 0 : switch (ctx->codec_id) {
604 : case GF_CODECID_MPEG1:
605 : case GF_CODECID_MPEG2_SIMPLE:
606 : case GF_CODECID_MPEG2_MAIN:
607 : case GF_CODECID_MPEG2_SNR:
608 : case GF_CODECID_MPEG2_SPATIAL:
609 : case GF_CODECID_MPEG2_HIGH:
610 : case GF_CODECID_MPEG2_422:
611 : case GF_CODECID_MPEG4_PART2:
612 : case GF_CODECID_AVC:
613 : case GF_CODECID_HEVC:
614 : break;
615 : default:
616 : return GF_NOT_SUPPORTED;
617 : }
618 :
619 0 : if (!ctx->opid)
620 0 : ctx->opid = gf_filter_pid_new(filter);
621 :
622 0 : gf_filter_pid_copy_properties(ctx->opid, ctx->ipid);
623 0 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DECODER_CONFIG, NULL);
624 0 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DECODER_CONFIG_ENHANCEMENT, NULL);
625 0 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CODECID, &PROP_UINT(GF_CODECID_RAW) );
626 0 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_UNFRAMED, NULL);
627 :
628 0 : gf_filter_pid_set_framing_mode(ctx->ipid, GF_TRUE);
629 :
630 0 : switch (ctx->codec_id) {
631 0 : case GF_CODECID_AVC:
632 : case GF_CODECID_HEVC:
633 0 : prop = gf_filter_pid_get_property(pid, GF_PROP_PID_DECODER_CONFIG);
634 : //not ready yet
635 0 : if (!prop) return GF_OK;
636 : break;
637 : }
638 :
639 : #ifdef GPAC_DISABLE_3D
640 : ctx->use_gl_texture = GF_FALSE;
641 : #endif
642 :
643 0 : if (! cuda_ctx) {
644 : #ifndef EMUL_NV_DLL
645 : int major, minor;
646 : char deviceName[256];
647 0 : res = cuDeviceGet(&cuda_dev, 0);
648 0 : if (res != CUDA_SUCCESS) {
649 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to init cuda device %s\n", cudaGetErrorEnum(res) ) );
650 0 : return GF_IO_ERR;
651 : }
652 :
653 0 : cuDeviceComputeCapability(&major, &minor, cuda_dev);
654 0 : cuDeviceGetName(deviceName, 256, cuda_dev);
655 :
656 0 : GF_LOG(GF_LOG_INFO, GF_LOG_CODEC, ("[NVDec] GPU Device %s (idx 0) has SM %d.%d compute capability\n", deviceName, major, minor));
657 :
658 0 : if (ctx->use_gl_texture) {
659 : #ifndef GPAC_DISABLE_3D
660 0 : res = cuGLCtxCreate(&cuda_ctx, CU_CTX_BLOCKING_SYNC, cuda_dev);
661 :
662 : #ifdef LOAD_GL_1_5
663 : GET_GLFUN(glGenBuffers);
664 : GET_GLFUN(glBindBuffer);
665 : GET_GLFUN(glBufferData);
666 : #endif
667 0 : cuda_ctx_gl = GF_TRUE;
668 :
669 : #endif
670 : } else {
671 0 : res = cuCtxCreate(&cuda_ctx, CU_CTX_BLOCKING_SYNC, cuda_dev);
672 0 : cuda_ctx_gl = GF_FALSE;
673 : }
674 0 : if (res != CUDA_SUCCESS) {
675 0 : if (ctx->use_gl_texture) {
676 0 : cuda_ctx_gl = GF_FALSE;
677 0 : GF_LOG(GF_LOG_WARNING, GF_LOG_CODEC, ("[NVDec] Couldn't initialize cuda OpenGL context (error %s), retrying without OpenGL support\n", cudaGetErrorEnum(res) ) );
678 0 : res = cuCtxCreate(&cuda_ctx, CU_CTX_BLOCKING_SYNC, cuda_dev);
679 0 : if (res != CUDA_SUCCESS) {
680 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to init cuda context %s\n", cudaGetErrorEnum(res) ) );
681 : } else {
682 0 : ctx->use_gl_texture = GF_FALSE;
683 : }
684 : } else {
685 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to init cuda context %s\n", cudaGetErrorEnum(res) ) );
686 : }
687 :
688 0 : if (res != CUDA_SUCCESS) {
689 : return GF_IO_ERR;
690 : }
691 : }
692 : #endif
693 :
694 : }
695 :
696 0 : if (ctx->vmode == NVDEC_DXVA)
697 0 : ctx->prefer_dec_mode = cudaVideoCreate_PreferDXVA;
698 0 : else if (ctx->vmode == NVDEC_CUDA)
699 0 : ctx->prefer_dec_mode = cudaVideoCreate_PreferCUDA;
700 : else
701 0 : ctx->prefer_dec_mode = cudaVideoCreate_PreferCUVID;
702 :
703 0 : if (ctx->unload == 2) {
704 0 : global_nb_loaded_nvdec++;
705 0 : if (!global_inst_mutex ) global_inst_mutex = gf_mx_new("NVDecGlobal");
706 0 : gf_mx_p(global_inst_mutex);
707 0 : if (!global_unactive_decoders) global_unactive_decoders = gf_list_new();
708 0 : gf_mx_v(global_inst_mutex);
709 : }
710 :
711 :
712 0 : ctx->needs_resetup = GF_TRUE;
713 :
714 0 : return GF_OK;
715 : }
716 :
717 0 : static Bool nvdec_process_event(GF_Filter *filter, const GF_FilterEvent *evt)
718 : {
719 0 : NVDecCtx *ctx = (NVDecCtx *)gf_filter_get_udta(filter);
720 0 : if (evt->base.type == GF_FEVT_PLAY) {
721 0 : while (gf_list_count(ctx->frames)) {
722 0 : NVDecFrame *f = gf_list_pop_back(ctx->frames);
723 0 : gf_list_add(ctx->frames_res, f);
724 : }
725 : }
726 0 : return GF_FALSE;
727 : }
728 :
729 :
730 : #if 0
731 : case GF_CODEC_ABORT:
732 : while (gf_list_count(ctx->frames)) {
733 : NVDecFrame *f = (NVDecFrame *) gf_list_pop_back(ctx->frames);
734 : memset(f, 0, sizeof(NVDecFrame));
735 : gf_list_add(ctx->frames_res, f);
736 : }
737 : if (ctx->unload == 2) {
738 : if (ctx->dec_inst) {
739 : assert(global_unactive_decoders);
740 : gf_mx_p(global_inst_mutex);
741 : if (ctx->decode_error) {
742 : GF_LOG(GF_LOG_INFO, GF_LOG_CODEC, ("[NVDec] deactivating decoder %dx%d and destroying instance\n", ctx->width, ctx->height ) );
743 : nvdec_destroy_decoder(ctx->dec_inst);
744 : } else {
745 : GF_LOG(GF_LOG_INFO, GF_LOG_CODEC, ("[NVDec] deactivating decoder %dx%d\n", ctx->width, ctx->height ) );
746 : }
747 : ctx->dec_inst->ctx = NULL;
748 : gf_list_add(global_unactive_decoders, ctx->dec_inst);
749 : ctx->dec_inst = NULL;
750 : gf_mx_v(global_inst_mutex);
751 : }
752 : ctx->needs_resetup = GF_TRUE;
753 : ctx->dec_create_error = CUDA_SUCCESS;
754 : } else if (ctx->unload == 1) {
755 : if (ctx->dec_inst) {
756 : nvdec_destroy_decoder(ctx->dec_inst);
757 : }
758 : ctx->needs_resetup = GF_TRUE;
759 : ctx->dec_create_error = CUDA_SUCCESS;
760 : }
761 : return GF_OK;
762 : }
763 : #endif
764 :
765 : static GF_Err nvdec_send_hw_frame(NVDecCtx *ctx);
766 :
767 0 : static void nvdec_reset_pcks(NVDecCtx *ctx)
768 : {
769 0 : while (gf_list_count(ctx->src_packets)) {
770 0 : GF_FilterPacket *pck = gf_list_pop_back(ctx->src_packets);
771 0 : gf_filter_pck_unref(pck);
772 : }
773 0 : }
774 :
775 0 : static void nvdec_merge_pck_props(NVDecCtx *ctx, NVDecFrame *f, GF_FilterPacket *dst_pck)
776 : {
777 : u32 i, count;
778 : GF_FilterPacket *src_pck = NULL;
779 0 : count = gf_list_count(ctx->src_packets);
780 0 : for (i = 0; i<count; i++) {
781 0 : src_pck = gf_list_get(ctx->src_packets, i);
782 0 : if (gf_filter_pck_get_cts(src_pck) == f->frame_info.timestamp) {
783 0 : gf_filter_pck_merge_properties(src_pck, dst_pck);
784 0 : gf_list_rem(ctx->src_packets, i);
785 0 : gf_filter_pck_unref(src_pck);
786 : return;
787 : }
788 : }
789 : //not found !
790 0 : gf_filter_pck_set_cts(dst_pck, f->frame_info.timestamp);
791 0 : if (!gf_filter_pck_get_interlaced(dst_pck) && !f->frame_info.progressive_frame) {
792 0 : gf_filter_pck_set_interlaced(dst_pck, f->frame_info.top_field_first ? 1 : 2);
793 : }
794 : }
795 :
796 0 : static GF_Err nvdec_process(GF_Filter *filter)
797 : {
798 : NVDecFrame *f;
799 0 : CUdeviceptr map_mem = 0;
800 : CUVIDPROCPARAMS params;
801 0 : unsigned int pitch = 0;
802 : GF_Err e;
803 : u32 pck_size;
804 : const u8 *data;
805 : u8 *output;
806 : GF_FilterPacket *ipck, *dst_pck;
807 : CUVIDSOURCEDATAPACKET cu_pkt;
808 : CUresult res;
809 0 : NVDecCtx *ctx = (NVDecCtx *) gf_filter_get_udta(filter);
810 :
811 0 : ipck = gf_filter_pid_get_packet(ctx->ipid);
812 :
813 0 : if (ctx->needs_resetup) {
814 0 : e = nvdec_configure_stream(filter, ctx);
815 0 : if (e<0) return e;
816 : //not ready
817 0 : if (e==GF_EOS) return GF_OK;
818 : }
819 :
820 : memset(&cu_pkt, 0, sizeof(CUVIDSOURCEDATAPACKET));
821 0 : cu_pkt.flags = CUVID_PKT_TIMESTAMP;
822 0 : pck_size = 0;
823 : data = NULL;
824 0 : if (!ipck) {
825 0 : if (!gf_filter_pid_is_eos(ctx->ipid))
826 : return GF_OK;
827 :
828 0 : cu_pkt.flags |= CUVID_PKT_ENDOFSTREAM;
829 0 : ctx->skip_next_frame = GF_FALSE;
830 : } else {
831 0 : data = gf_filter_pck_get_data(ipck, &pck_size);
832 : }
833 :
834 0 : if (ctx->dec_create_error) {
835 0 : if (ipck) gf_filter_pid_drop_packet(ctx->ipid);
836 0 : else if (gf_filter_pid_is_eos(ctx->ipid)) {
837 0 : gf_filter_pid_set_eos(ctx->opid);
838 0 : return GF_EOS;
839 : }
840 : return GF_IO_ERR;
841 : }
842 :
843 0 : if (data && ctx->nal_size_length) {
844 0 : GF_BitStream *bs = gf_bs_new(ctx->nal_buffer, ctx->nal_buffer_alloc, GF_BITSTREAM_WRITE_DYN);
845 0 : if (!bs) return GF_OUT_OF_MEM;
846 :
847 0 : if (gf_filter_pck_get_sap(ipck)) ctx->inject_xps = GF_TRUE;
848 :
849 0 : if (ctx->inject_xps) {
850 0 : ctx->inject_xps = GF_FALSE;
851 0 : gf_bs_write_data(bs, ctx->xps_buf, ctx->xps_buf_size);
852 : }
853 :
854 0 : while (pck_size) {
855 : u32 i, nal_size = 0;
856 0 : for (i = 0; i<ctx->nal_size_length; i++) {
857 0 : nal_size = (nal_size << 8) + ((u8)data[i]);
858 : }
859 0 : data += ctx->nal_size_length;
860 :
861 0 : if (pck_size < nal_size + ctx->nal_size_length) break;
862 :
863 0 : gf_bs_write_u32(bs, 1);
864 0 : gf_bs_write_data(bs, data, nal_size);
865 0 : data += nal_size;
866 0 : pck_size -= nal_size + ctx->nal_size_length;
867 : }
868 :
869 0 : gf_bs_get_content_no_truncate(bs, &ctx->nal_buffer, &pck_size, &ctx->nal_buffer_alloc);
870 0 : gf_bs_del(bs);
871 0 : data = ctx->nal_buffer;
872 : }
873 :
874 0 : cu_pkt.payload_size = pck_size;
875 0 : cu_pkt.payload = data;
876 0 : if (ipck) cu_pkt.timestamp = gf_filter_pck_get_cts(ipck);
877 :
878 : #ifndef EMUL_NV_DLL
879 0 : res = cuCtxPushCurrent(cuda_ctx);
880 0 : if (res != CUDA_SUCCESS) {
881 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to push CUDA CTX %s\n", cudaGetErrorEnum(res) ) );
882 : }
883 0 : if (ctx->skip_next_frame) {
884 0 : ctx->skip_next_frame = GF_FALSE;
885 : } else {
886 0 : res = cuvidParseVideoData(ctx->dec_inst->cu_parser, &cu_pkt);
887 0 : if (res != CUDA_SUCCESS) {
888 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] decoder instance %d failed to parse data %s\n", ctx->dec_inst->id, cudaGetErrorEnum(res) ) );
889 : }
890 : }
891 : #endif //EMUL_NV_DLL
892 :
893 : //queue reference to source packet props
894 0 : if (ipck) {
895 0 : gf_filter_pck_ref_props(&ipck);
896 0 : gf_list_add(ctx->src_packets, ipck);
897 : }
898 :
899 0 : if (ctx->reload_decoder_state) {
900 0 : if (ctx->reload_decoder_state==2) {
901 0 : nvdec_destroy_decoder(ctx->dec_inst);
902 : } else {
903 0 : ctx->skip_next_frame = GF_TRUE;
904 : }
905 :
906 0 : ctx->reload_decoder_state = 0;
907 0 : if (!ctx->out_size || !ctx->pix_fmt) {
908 0 : cuCtxPopCurrent(NULL);
909 0 : return GF_NOT_SUPPORTED;
910 : }
911 :
912 : //need to setup decoder
913 0 : if (! ctx->dec_inst->cu_decoder) {
914 0 : nvdec_init_decoder(ctx);
915 : }
916 0 : cuCtxPopCurrent(NULL);
917 0 : return GF_OK;
918 : }
919 : //drop packet
920 0 : if (ipck)
921 0 : gf_filter_pid_drop_packet(ctx->ipid);
922 :
923 0 : f = gf_list_pop_front(ctx->frames);
924 0 : if (!f) {
925 : #ifndef EMUL_NV_DLL
926 0 : cuCtxPopCurrent(NULL);
927 : #endif
928 0 : if (!ipck && gf_filter_pid_is_eos(ctx->ipid)) {
929 0 : nvdec_reset_pcks(ctx);
930 0 : gf_filter_pid_set_eos(ctx->opid);
931 0 : return GF_EOS;
932 : }
933 : return GF_OK;
934 : }
935 0 : if (ctx->use_gl_texture || (ctx->fmode==NVDEC_SINGLE) ) {
936 : assert(!ctx->pending_frame);
937 0 : ctx->pending_frame = f;
938 0 : return nvdec_send_hw_frame(ctx);
939 : }
940 :
941 : assert(ctx->out_size);
942 0 : dst_pck = gf_filter_pck_new_alloc(ctx->opid, ctx->out_size, &output);
943 0 : if (!dst_pck) return GF_OUT_OF_MEM;
944 :
945 : memset(¶ms, 0, sizeof(params));
946 0 : params.progressive_frame = f->frame_info.progressive_frame;
947 0 : params.top_field_first = f->frame_info.top_field_first;
948 :
949 0 : nvdec_merge_pck_props(ctx, f, dst_pck);
950 : e = GF_OK;
951 0 : if (gf_filter_pck_get_seek_flag(dst_pck)) {
952 0 : gf_filter_pck_discard(dst_pck);
953 : } else {
954 0 : res = cuvidMapVideoFrame(ctx->dec_inst->cu_decoder, f->frame_info.picture_index, &map_mem, &pitch, ¶ms);
955 0 : if (res == CUDA_SUCCESS) {
956 : CUDA_MEMCPY2D mcpi;
957 : memset(&mcpi, 0, sizeof(CUDA_MEMCPY2D));
958 0 : mcpi.srcMemoryType = CU_MEMORYTYPE_DEVICE;
959 0 : mcpi.srcDevice = map_mem;
960 0 : mcpi.srcPitch = pitch;
961 :
962 0 : mcpi.dstMemoryType = CU_MEMORYTYPE_HOST;
963 0 : mcpi.dstHost = output;
964 0 : mcpi.dstPitch = ctx->stride;
965 0 : mcpi.WidthInBytes = MIN(pitch, ctx->stride);
966 0 : mcpi.Height = ctx->height;
967 :
968 0 : res = cuMemcpy2D(&mcpi);
969 0 : if (res != CUDA_SUCCESS) {
970 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to copy Y video plane from GPU to host mem %s\n", cudaGetErrorEnum(res)));
971 : e = GF_IO_ERR;
972 : } else {
973 :
974 0 : mcpi.srcDevice = map_mem + ctx->height * pitch;
975 0 : mcpi.dstHost = output + ctx->stride * ctx->height;
976 0 : mcpi.dstPitch = ctx->stride_uv;
977 0 : mcpi.WidthInBytes = MIN(pitch, ctx->stride);
978 0 : mcpi.Height = ctx->uv_height;
979 :
980 0 : res = cuMemcpy2D(&mcpi);
981 0 : if (res != CUDA_SUCCESS) {
982 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to copy UV video plane from GPU to host mem %s\n", cudaGetErrorEnum(res)));
983 : e = GF_IO_ERR;
984 : }
985 : }
986 0 : cuvidUnmapVideoFrame(ctx->dec_inst->cu_decoder, map_mem);
987 :
988 0 : gf_filter_pck_send(dst_pck);
989 :
990 : } else {
991 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to map video frame data %s\n", cudaGetErrorEnum(res)));
992 : e = GF_IO_ERR;
993 0 : gf_filter_pck_discard(dst_pck);
994 : }
995 : }
996 0 : cuCtxPopCurrent(NULL);
997 :
998 : memset(f, 0, sizeof(NVDecFrame));
999 0 : gf_list_add(ctx->frames_res, f);
1000 :
1001 0 : return e;
1002 : }
1003 :
1004 0 : void nvframe_release(GF_Filter *filter, GF_FilterPid *pid, GF_FilterPacket *pck)
1005 : {
1006 0 : GF_FilterFrameInterface *frame = gf_filter_pck_get_frame_interface(pck);
1007 0 : NVDecFrame *f = (NVDecFrame*) frame->user_data;
1008 0 : NVDecCtx *ctx = (NVDecCtx *)f->ctx;
1009 :
1010 : memset(f, 0, sizeof(NVDecFrame));
1011 0 : gf_list_add(ctx->frames_res, f);
1012 0 : }
1013 :
1014 : #ifndef GPAC_DISABLE_3D
1015 :
1016 : /*Define codec matrix*/
1017 : typedef struct __matrix GF_NVCodecMatrix;
1018 :
1019 0 : GF_Err nvframe_get_gl_texture(GF_FilterFrameInterface *frame, u32 plane_idx, u32 *gl_tex_format, u32 *gl_tex_id, GF_NVCodecMatrix * texcoordmatrix)
1020 : {
1021 : CUDA_MEMCPY2D mcpi;
1022 : CUVIDPROCPARAMS params;
1023 : CUresult res;
1024 : GF_Err e = GF_OK;
1025 : CUdeviceptr tx_data, vid_data;
1026 : size_t tx_pitch;
1027 : u32 vid_pitch;
1028 : u32 pbo_id, tx_id, gl_fmt, gl_btype = GL_UNSIGNED_BYTE;
1029 0 : NVDecFrame *f = (NVDecFrame *)frame->user_data;
1030 0 : NVDecCtx *ctx = (NVDecCtx *)f->ctx;
1031 :
1032 0 : if (plane_idx>1) return GF_BAD_PARAM;
1033 :
1034 0 : res = cuCtxPushCurrent(cuda_ctx);
1035 0 : if (res != CUDA_SUCCESS) {
1036 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to push CUDA CTX %s\n", cudaGetErrorEnum(res) ) );
1037 : }
1038 :
1039 0 : if (! *gl_tex_id && ! plane_idx && ctx->y_tx_id ) {
1040 0 : cuGLUnregisterBufferObject(ctx->y_pbo_id);
1041 0 : ctx->y_pbo_id = 0;
1042 0 : ctx->y_tx_id = 0;
1043 : }
1044 0 : if (! *gl_tex_id && plane_idx && ctx->uv_tx_id ) {
1045 0 : cuGLUnregisterBufferObject(ctx->uv_pbo_id);
1046 0 : ctx->uv_pbo_id = 0;
1047 0 : ctx->uv_tx_id = 0;
1048 : }
1049 :
1050 : #ifdef ENABLE_10BIT_OUTPUT
1051 : if ((ctx->bpp_luma>8) || (ctx->bpp_chroma>8)) {
1052 : gl_btype = GL_UNSIGNED_SHORT;
1053 : }
1054 : #endif
1055 0 : if (!plane_idx) {
1056 0 : if (!ctx->y_pbo_id) {
1057 0 : glGenBuffers(1, &ctx->y_pbo_id);
1058 0 : glGenTextures(1, &ctx->y_tx_id);
1059 :
1060 0 : glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, ctx->y_pbo_id);
1061 0 : glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, ctx->stride * ctx->height, NULL, GL_STREAM_DRAW_ARB);
1062 0 : glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
1063 :
1064 0 : cuGLRegisterBufferObject(ctx->y_pbo_id);
1065 :
1066 0 : glBindTexture(GL_TEXTURE_2D, ctx->y_tx_id);
1067 0 : glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE, ctx->width, ctx->height, 0, GL_LUMINANCE, gl_btype, NULL);
1068 0 : glBindTexture(GL_TEXTURE_2D, 0);
1069 :
1070 0 : f->y_mapped = GF_FALSE;
1071 : }
1072 0 : *gl_tex_format = GL_TEXTURE_2D;
1073 0 : *gl_tex_id = tx_id = ctx->y_tx_id;
1074 0 : if (f->y_mapped) {
1075 0 : cuCtxPopCurrent(NULL);
1076 0 : return GF_OK;
1077 : }
1078 0 : f->y_mapped = GF_TRUE;
1079 0 : pbo_id = ctx->y_pbo_id;
1080 : gl_fmt = GL_LUMINANCE;
1081 : } else {
1082 0 : if (!ctx->uv_pbo_id) {
1083 0 : glGenBuffers(1, &ctx->uv_pbo_id);
1084 0 : glGenTextures(1, &ctx->uv_tx_id);
1085 :
1086 0 : glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, ctx->uv_pbo_id);
1087 0 : glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, ctx->stride * ctx->height / 2, NULL, GL_STREAM_DRAW_ARB);
1088 0 : glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
1089 :
1090 0 : cuGLRegisterBufferObject(ctx->uv_pbo_id);
1091 :
1092 0 : glBindTexture(GL_TEXTURE_2D, ctx->uv_tx_id);
1093 0 : glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE_ALPHA, ctx->width/2, ctx->height/2, 0, GL_LUMINANCE_ALPHA, gl_btype, NULL);
1094 0 : glBindTexture(GL_TEXTURE_2D, 0);
1095 0 : f->uv_mapped = GF_FALSE;
1096 : }
1097 0 : *gl_tex_format = GL_TEXTURE_2D;
1098 0 : *gl_tex_id = tx_id = ctx->uv_tx_id;
1099 0 : if (f->uv_mapped) {
1100 0 : cuCtxPopCurrent(NULL);
1101 0 : return GF_OK;
1102 : }
1103 0 : f->uv_mapped = GF_TRUE;
1104 0 : pbo_id = ctx->uv_pbo_id;
1105 : gl_fmt = GL_LUMINANCE_ALPHA;
1106 : }
1107 :
1108 0 : cuGLMapBufferObject(&tx_data, &tx_pitch, pbo_id);
1109 0 : if (res != CUDA_SUCCESS) {
1110 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to map GL texture data %s\n", cudaGetErrorEnum(res) ) );
1111 : return GF_IO_ERR;
1112 : }
1113 : assert(tx_pitch != 0);
1114 :
1115 : memset(¶ms, 0, sizeof(params));
1116 0 : params.progressive_frame = f->frame_info.progressive_frame;
1117 : //params.second_field = 0;
1118 0 : params.top_field_first = f->frame_info.top_field_first;
1119 0 : res = cuvidMapVideoFrame(ctx->dec_inst->cu_decoder, f->frame_info.picture_index, &vid_data, &vid_pitch, ¶ms);
1120 :
1121 0 : if (res != CUDA_SUCCESS) {
1122 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to map decoded picture data %s\n", cudaGetErrorEnum(res) ) );
1123 : return GF_IO_ERR;
1124 : }
1125 : assert(vid_pitch != 0);
1126 :
1127 : memset(&mcpi, 0, sizeof(CUDA_MEMCPY2D));
1128 0 : mcpi.srcMemoryType = CU_MEMORYTYPE_DEVICE;
1129 0 : if (plane_idx) {
1130 0 : mcpi.srcDevice = vid_data + ctx->height * vid_pitch;
1131 0 : tx_pitch *= 2; //2 bytes per pixel
1132 : } else {
1133 0 : mcpi.srcDevice = vid_data;
1134 : }
1135 0 : mcpi.srcPitch = vid_pitch;
1136 :
1137 0 : mcpi.dstMemoryType = CU_MEMORYTYPE_DEVICE;
1138 0 : mcpi.dstDevice = tx_data;
1139 0 : mcpi.dstPitch = tx_pitch / ctx->height;
1140 :
1141 0 : mcpi.WidthInBytes = MIN(mcpi.dstPitch, vid_pitch);
1142 0 : mcpi.Height = ctx->height;
1143 0 : if (plane_idx) mcpi.Height /= 2;
1144 :
1145 0 : res = cuMemcpy2D(&mcpi);
1146 0 : if (res != CUDA_SUCCESS) {
1147 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to copy Y video plane from GPU to host mem %s\n", cudaGetErrorEnum(res) ) );
1148 : e = GF_IO_ERR;
1149 : }
1150 :
1151 0 : cuvidUnmapVideoFrame(ctx->dec_inst->cu_decoder, vid_data);
1152 0 : cuGLUnmapBufferObject(pbo_id);
1153 :
1154 :
1155 0 : cuCtxPopCurrent(NULL);
1156 :
1157 : /*bind PBO to texture and call glTexSubImage2D only after PBO transfer is queued, otherwise we'll have a one frame delay*/
1158 0 : glBindTexture(GL_TEXTURE_2D, tx_id);
1159 0 : glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo_id);
1160 :
1161 : #ifdef ENABLE_10BIT_OUTPUT
1162 : if (ctx->bpp_chroma+ctx->bpp_luma>16) {
1163 : Float a, b;
1164 : #error "FIX NVDEC GL color mapping in 10 bit"
1165 : a = 65535.0f / (65472.0f - 63.0f);
1166 : b = -63.0f * a / 65535.0f;
1167 :
1168 : glPixelStorei(GL_UNPACK_ALIGNMENT, 2);
1169 : //glPixelStorei(GL_UNPACK_SWAP_BYTES, 1);
1170 : //glPixelStorei(GL_UNPACK_LSB_FIRST, 1);
1171 : //we use 10 bits but GL will normalise using 16 bits, so we need to multiply the nomralized result by 2^6
1172 : //glPixelTransferf(GL_RED_BIAS, 0.00096317f);
1173 : //glPixelTransferf(GL_RED_SCALE, 0.000015288f);
1174 :
1175 : glPixelTransferf(GL_RED_SCALE, a);
1176 : glPixelTransferf(GL_RED_BIAS, b);
1177 :
1178 : if (plane_idx) {
1179 : glPixelTransferf(GL_ALPHA_SCALE, a);
1180 : glPixelTransferf(GL_ALPHA_BIAS, b);
1181 : }
1182 : }
1183 : #endif
1184 0 : if (!plane_idx) {
1185 0 : glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, ctx->width, ctx->height, gl_fmt , gl_btype, NULL);
1186 : } else {
1187 0 : glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, ctx->width/2, ctx->height/2, gl_fmt , gl_btype, NULL);
1188 : }
1189 :
1190 0 : glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
1191 0 : glBindTexture(GL_TEXTURE_2D, 0);
1192 :
1193 0 : return e;
1194 : }
1195 :
1196 : #endif
1197 :
1198 0 : GF_Err nvframe_get_frame(GF_FilterFrameInterface *frame, u32 plane_idx, const u8 **outPlane, u32 *outStride)
1199 : {
1200 0 : unsigned int pitch = 0;
1201 : GF_Err e = GF_OK;
1202 0 : NVDecFrame *f = (NVDecFrame *)frame->user_data;
1203 0 : NVDecCtx *ctx = (NVDecCtx *)f->ctx;
1204 :
1205 0 : if (plane_idx>=ctx->nb_planes) return GF_BAD_PARAM;
1206 :
1207 : e = GF_OK;
1208 0 : if (!f->y_mapped) {
1209 : CUVIDPROCPARAMS params;
1210 0 : CUdeviceptr map_mem = 0;
1211 : CUresult res;
1212 :
1213 0 : if (ctx->out_size > ctx->single_frame_data_alloc) {
1214 0 : ctx->single_frame_data_alloc = ctx->out_size;
1215 0 : ctx->single_frame_data = gf_realloc(ctx->single_frame_data, ctx->out_size);
1216 : }
1217 0 : f->y_mapped = GF_TRUE;
1218 :
1219 0 : res = cuCtxPushCurrent(cuda_ctx);
1220 0 : if (res != CUDA_SUCCESS) {
1221 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to push CUDA CTX %s\n", cudaGetErrorEnum(res)));
1222 0 : return GF_IO_ERR;
1223 : }
1224 :
1225 : memset(¶ms, 0, sizeof(params));
1226 0 : params.progressive_frame = f->frame_info.progressive_frame;
1227 0 : params.top_field_first = f->frame_info.top_field_first;
1228 :
1229 0 : res = cuvidMapVideoFrame(ctx->dec_inst->cu_decoder, f->frame_info.picture_index, &map_mem, &pitch, ¶ms);
1230 0 : if (res == CUDA_SUCCESS) {
1231 : CUDA_MEMCPY2D mcpi;
1232 : memset(&mcpi, 0, sizeof(CUDA_MEMCPY2D));
1233 0 : mcpi.srcMemoryType = CU_MEMORYTYPE_DEVICE;
1234 0 : mcpi.srcDevice = map_mem;
1235 0 : mcpi.srcPitch = pitch;
1236 :
1237 0 : mcpi.dstMemoryType = CU_MEMORYTYPE_HOST;
1238 0 : mcpi.dstHost = ctx->single_frame_data;
1239 0 : mcpi.dstPitch = ctx->stride;
1240 0 : mcpi.WidthInBytes = MIN(pitch, ctx->stride);
1241 0 : mcpi.Height = ctx->height;
1242 :
1243 0 : res = cuMemcpy2D(&mcpi);
1244 0 : if (res != CUDA_SUCCESS) {
1245 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to copy Y video plane from GPU to host mem %s\n", cudaGetErrorEnum(res)));
1246 : e = GF_IO_ERR;
1247 : }
1248 : else {
1249 :
1250 0 : mcpi.srcDevice = map_mem + ctx->height * pitch;
1251 0 : mcpi.dstHost = ctx->single_frame_data + ctx->stride * ctx->height;
1252 0 : mcpi.dstPitch = ctx->stride_uv;
1253 0 : mcpi.Height = ctx->uv_height;
1254 :
1255 0 : res = cuMemcpy2D(&mcpi);
1256 0 : if (res != CUDA_SUCCESS) {
1257 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to copy UV video plane from GPU to host mem %s\n", cudaGetErrorEnum(res)));
1258 : e = GF_IO_ERR;
1259 : }
1260 : }
1261 0 : cuvidUnmapVideoFrame(ctx->dec_inst->cu_decoder, map_mem);
1262 : }
1263 : else {
1264 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to map video frame %s\n", cudaGetErrorEnum(res)));
1265 : e = GF_IO_ERR;
1266 : }
1267 0 : cuCtxPopCurrent(NULL);
1268 : }
1269 0 : if (e) return e;
1270 :
1271 0 : switch (plane_idx) {
1272 0 : case 0:
1273 0 : *outPlane = ctx->single_frame_data;
1274 0 : *outStride = ctx->stride;
1275 0 : break;
1276 0 : case 1:
1277 0 : *outPlane = ctx->single_frame_data + ctx->stride * ctx->height;
1278 0 : *outStride = ctx->stride_uv;
1279 0 : break;
1280 0 : case 2:
1281 0 : *outPlane = ctx->single_frame_data + ctx->stride * ctx->height + ctx->stride_uv * ctx->uv_height;
1282 0 : *outStride = ctx->stride_uv;
1283 0 : break;
1284 : default:
1285 : return GF_BAD_PARAM;
1286 : }
1287 : return GF_OK;
1288 : }
1289 :
1290 :
1291 0 : GF_Err nvdec_send_hw_frame(NVDecCtx *ctx)
1292 : {
1293 : GF_FilterPacket *dst_pck;
1294 : NVDecFrame *f;
1295 :
1296 0 : if (!ctx->pending_frame) return GF_BAD_PARAM;
1297 : f = ctx->pending_frame;
1298 0 : ctx->pending_frame = NULL;
1299 :
1300 0 : f->gframe.user_data = f;
1301 0 : f->gframe.get_plane = nvframe_get_frame;
1302 : #ifndef GPAC_DISABLE_3D
1303 0 : f->gframe.get_gl_texture = nvframe_get_gl_texture;
1304 : #endif
1305 :
1306 0 : if (ctx->frame_size_changed) {
1307 0 : ctx->frame_size_changed = GF_FALSE;
1308 0 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_WIDTH, &PROP_UINT(ctx->width));
1309 0 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_HEIGHT, &PROP_UINT(ctx->height));
1310 0 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_STRIDE, &PROP_UINT(ctx->stride));
1311 0 : gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_PIXFMT, &PROP_UINT(ctx->pix_fmt));
1312 : }
1313 :
1314 :
1315 0 : if (!gf_list_count(ctx->frames) && ctx->needs_resetup)
1316 0 : f->gframe.flags = GF_FRAME_IFCE_BLOCKING;
1317 :
1318 0 : dst_pck = gf_filter_pck_new_frame_interface(ctx->opid, &f->gframe, nvframe_release);
1319 0 : if (!dst_pck) return GF_OUT_OF_MEM;
1320 :
1321 0 : nvdec_merge_pck_props(ctx, f, dst_pck);
1322 0 : if (gf_filter_pck_get_seek_flag(dst_pck)) {
1323 0 : gf_filter_pck_discard(dst_pck);
1324 : memset(f, 0, sizeof(NVDecFrame));
1325 0 : gf_list_add(ctx->frames_res, f);
1326 : } else {
1327 0 : gf_filter_pck_send(dst_pck);
1328 : }
1329 :
1330 : return GF_OK;
1331 : }
1332 :
1333 :
1334 :
1335 :
1336 : static u32 cuvid_load_state = 0;
1337 : static u32 nb_cuvid_inst=0;
1338 2413 : static void init_cuda_sdk()
1339 : {
1340 2413 : if (!cuvid_load_state) {
1341 : #ifdef EMUL_NV_DLL
1342 : cuvid_load_state = 2;
1343 : nb_cuvid_inst++;
1344 : #else
1345 : CUresult res;
1346 : int device_count;
1347 2276 : res = cuInit(0, __CUDA_API_VERSION);
1348 2276 : nb_cuvid_inst++;
1349 2276 : cuvid_load_state = 1;
1350 2276 : if (res == CUDA_ERROR_SHARED_OBJECT_INIT_FAILED) {
1351 2276 : GF_LOG(GF_LOG_DEBUG, GF_LOG_CODEC, ("[NVDec] cuda lib not found on system\n") );
1352 0 : } else if (res != CUDA_SUCCESS) {
1353 0 : GF_LOG(GF_LOG_INFO, GF_LOG_CODEC, ("[NVDec] failed to init cuda %s\n", cudaGetErrorEnum(res) ) );
1354 : } else {
1355 0 : res = cuDeviceGetCount(&device_count);
1356 0 : if (res != CUDA_SUCCESS) {
1357 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to query cuda/nvidia cards %s\n", cudaGetErrorEnum(res) ) );
1358 : } else {
1359 0 : if (! device_count) {
1360 0 : GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] no device found\n" ) );
1361 : } else {
1362 0 : cuvid_load_state = 2;
1363 : }
1364 : }
1365 : }
1366 : #endif
1367 :
1368 : } else {
1369 137 : nb_cuvid_inst++;
1370 : }
1371 2413 : }
1372 :
1373 0 : static GF_Err nvdec_initialize(GF_Filter *filter)
1374 : {
1375 0 : NVDecCtx *ctx = gf_filter_get_udta(filter);
1376 :
1377 0 : ctx->frames = gf_list_new();
1378 0 : ctx->frames_res = gf_list_new();
1379 0 : ctx->src_packets = gf_list_new();
1380 0 : return GF_OK;
1381 : }
1382 :
1383 0 : static void nvdec_finalize(GF_Filter *filter)
1384 : {
1385 0 : NVDecCtx *ctx = gf_filter_get_udta(filter);
1386 :
1387 0 : nvdec_reset_pcks(ctx);
1388 0 : gf_list_del(ctx->src_packets);
1389 :
1390 0 : if (!global_nb_loaded_nvdec && global_unactive_decoders) {
1391 0 : while (gf_list_count(global_unactive_decoders)) {
1392 0 : NVDecInstance *inst = gf_list_pop_back(global_unactive_decoders);
1393 0 : nvdec_destroy_decoder(inst);
1394 0 : if (inst->cu_parser) cuvidDestroyVideoParser(inst->cu_parser);
1395 0 : gf_free(inst);
1396 : }
1397 0 : gf_list_del(global_unactive_decoders);
1398 :
1399 0 : gf_mx_del(global_inst_mutex);
1400 : }
1401 :
1402 0 : if (ctx->dec_inst) {
1403 0 : nvdec_destroy_decoder(ctx->dec_inst);
1404 0 : if (ctx->dec_inst->cu_parser) cuvidDestroyVideoParser(ctx->dec_inst->cu_parser);
1405 0 : gf_free(ctx->dec_inst);
1406 : }
1407 :
1408 :
1409 : assert(nb_cuvid_inst);
1410 0 : nb_cuvid_inst--;
1411 0 : if (!nb_cuvid_inst) {
1412 0 : if (cuda_ctx) cuCtxDestroy(cuda_ctx);
1413 0 : cuda_ctx = NULL;
1414 0 : cuUninit();
1415 0 : cuvid_load_state = 0;
1416 : }
1417 0 : while (gf_list_count(ctx->frames)) {
1418 0 : NVDecFrame *f = (NVDecFrame *) gf_list_pop_back(ctx->frames);
1419 0 : gf_free(f);
1420 : }
1421 0 : gf_list_del(ctx->frames);
1422 0 : while (gf_list_count(ctx->frames_res)) {
1423 0 : NVDecFrame *f = (NVDecFrame *) gf_list_pop_back(ctx->frames_res);
1424 0 : gf_free(f);
1425 : }
1426 0 : gf_list_del(ctx->frames_res);
1427 :
1428 0 : if (ctx->single_frame_data) gf_free(ctx->single_frame_data);
1429 0 : if (ctx->xps_buf) gf_free(ctx->xps_buf);
1430 0 : if (ctx->nal_buffer) gf_free(ctx->nal_buffer);
1431 0 : }
1432 :
1433 :
1434 : static const GF_FilterCapability NVDecCaps[] =
1435 : {
1436 : CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_STREAM_TYPE, GF_STREAM_VISUAL),
1437 : CAP_BOOL(GF_CAPS_INPUT_EXCLUDED, GF_PROP_PID_UNFRAMED, GF_TRUE),
1438 : CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_CODECID, GF_CODECID_MPEG4_PART2),
1439 : CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_CODECID, GF_CODECID_MPEG2_MAIN),
1440 : CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_CODECID, GF_CODECID_MPEG2_SNR),
1441 : CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_CODECID, GF_CODECID_MPEG2_SPATIAL),
1442 : CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_CODECID, GF_CODECID_MPEG2_HIGH),
1443 : CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_CODECID, GF_CODECID_MPEG2_422),
1444 : CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_CODECID, GF_CODECID_HEVC),
1445 : CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_CODECID, GF_CODECID_AVC),
1446 : CAP_BOOL(GF_CAPS_INPUT_EXCLUDED,GF_PROP_PID_TILE_BASE, GF_TRUE),
1447 : //CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_CODECID, GF_CODECID_VC1),
1448 : CAP_UINT(GF_CAPS_OUTPUT_STATIC, GF_PROP_PID_STREAM_TYPE, GF_STREAM_VISUAL),
1449 : CAP_UINT(GF_CAPS_OUTPUT_STATIC, GF_PROP_PID_CODECID, GF_CODECID_RAW)
1450 : };
1451 :
1452 : #define OFFS(_n) #_n, offsetof(NVDecCtx, _n)
1453 :
1454 : static const GF_FilterArgs NVDecArgs[] =
1455 : {
1456 : { OFFS(num_surfaces), "number of hardware surfaces to allocate", GF_PROP_UINT, "20", NULL, GF_FS_ARG_HINT_ADVANCED },
1457 : { OFFS(unload), "decoder unload mode\n"
1458 : "- no: keep inactive decoder alive\n"
1459 : "- destroy: destroy inactive decoder\n"
1460 : "- reuse: detach decoder from inactive PIDs and reattach to active ones", GF_PROP_UINT, "no", "no|destroy|reuse", GF_FS_ARG_HINT_EXPERT },
1461 : { OFFS(vmode), "video decoder backend\n"
1462 : "- cuvid: use dedicated video engines directly\n"
1463 : "- cuda: use a CUDA-based decoder if faster than dedicated engines\n"
1464 : "- dxva: go through DXVA internally if possible (requires D3D9 interop)", GF_PROP_UINT, "cuvid", "cuvid|cuda|dxva", GF_FS_ARG_HINT_ADVANCED },
1465 : { OFFS(fmode), "frame output mode\n"
1466 : "- copy: each frame is copied and dispatched\n"
1467 : "- single: frame data is only retrieved when used, single memory space for all frames (not safe if multiple consumers)\n"
1468 : "- gl: frame data is mapped to an OpenGL texture"
1469 : , GF_PROP_UINT, "gl", "copy|single|gl", 0 },
1470 :
1471 : { 0 }
1472 : };
1473 :
1474 : GF_FilterRegister NVDecRegister = {
1475 : .name = "nvdec",
1476 : GF_FS_SET_DESCRIPTION("NVidia decoder")
1477 : GF_FS_SET_HELP("This filter decodes MPEG-2, MPEG-4 Part 2, AVC|H264 and HEVC streams through NVideia decoder. It allows GPU frame dispatch or direct frame copy.")
1478 : .private_size = sizeof(NVDecCtx),
1479 : SETCAPS(NVDecCaps),
1480 : .flags = GF_FS_REG_CONFIGURE_MAIN_THREAD,
1481 : .initialize = nvdec_initialize,
1482 : .finalize = nvdec_finalize,
1483 : .args = NVDecArgs,
1484 : .configure_pid = nvdec_configure_pid,
1485 : .process = nvdec_process,
1486 : .process_event = nvdec_process_event
1487 : };
1488 :
1489 :
1490 2877 : const GF_FilterRegister *nvdec_register(GF_FilterSession *session)
1491 : {
1492 : //check if nvdec is not globally blacklisted - if so, do not try to load CUDA SDK which may be time consuming on some devices
1493 2877 : const char *blacklist = gf_opts_get_key("core", "blacklist");
1494 2877 : if (blacklist && strstr(blacklist, "nvdec"))
1495 : return NULL;
1496 :
1497 2413 : init_cuda_sdk();
1498 : //do not register if no SDK
1499 2413 : if (cuvid_load_state != 2) {
1500 : // this is man / md generation, load filter
1501 2413 : if (!gf_opts_get_bool("temp", "gendoc"))
1502 : return NULL;
1503 14 : NVDecRegister.version = "! Warning: CUVID SDK NOT AVAILABLE ON THIS SYSTEM !";
1504 : }
1505 :
1506 : return &NVDecRegister;
1507 : }
1508 :
1509 : #else
1510 :
1511 : const GF_FilterRegister *nvdec_register(GF_FilterSession *session)
1512 : {
1513 : return NULL;
1514 : }
1515 : #endif // (!defined(GPAC_STATIC_BUILD) && (defined(WIN32) || defined(GPAC_CONFIG_LINUX) || defined(GPAC_CONFIG_DARWIN)) && !defined(GPAC_DISABLE_NVDEC))
|