LCOV - coverage.info - filters/dec

LCOV - code coverage report

Current view:	top level - filters - dec_nvdec.c (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	16	688	2.3 %
Date:	2021-04-29 23:48:07	Functions:	2	23	8.7 %

          Line data    Source code

       1             : /*
       2             :  *                      GPAC - Multimedia Framework C SDK
       3             :  *
       4             :  *                      Authors: Jean Le Feuvre
       5             :  *                      Copyright (c) Telecom ParisTech 2017-2021
       6             :  *                                      All rights reserved
       7             :  *
       8             :  *  This file is part of GPAC / NVidia Hardware decoder filter
       9             :  *
      10             :  *  GPAC is free software; you can redistribute it and/or modify
      11             :  *  it under the terms of the GNU Lesser General Public License as published by
      12             :  *  the Free Software Foundation; either version 2, or (at your option)
      13             :  *  any later version.
      14             :  *
      15             :  *  GPAC is distributed in the hope that it will be useful,
      16             :  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
      17             :  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      18             :  *  GNU Lesser General Public License for more details.
      19             :  *
      20             :  *  You should have received a copy of the GNU Lesser General Public
      21             :  *  License along with this library; see the file COPYING.  If not, write to
      22             :  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
      23             :  *
      24             :  */
      25             : 
      26             : #include <gpac/thread.h>
      27             : #include <gpac/internal/media_dev.h>
      28             : #include <gpac/constants.h>
      29             : #include <gpac/filters.h>
      30             : 
      31             : #if (!defined(GPAC_STATIC_BUILD) && (defined(WIN32) || defined(GPAC_CONFIG_LINUX) || defined(GPAC_CONFIG_DARWIN)) && !defined(GPAC_DISABLE_NVDEC))
      32             : 
      33             : #include "dec_nvdec_sdk.h"
      34             : 
      35             : //#define EMUL_NV_DLL
      36             : 
      37             : #ifndef GPAC_DISABLE_3D
      38             : 
      39             : 
      40             : #ifdef LOAD_GL_1_5
      41             : GLDECL_EXTERN(glGenBuffers);
      42             : GLDECL_EXTERN(glBindBuffer);
      43             : GLDECL_EXTERN(glBufferData);
      44             : #endif
      45             : 
      46             : #endif
      47             : 
      48             : typedef struct _nv_dec_inst NVDecInstance;
      49             : 
      50             : typedef enum
      51             : {
      52             :         NVDEC_COPY = 0,
      53             :         NVDEC_SINGLE,
      54             :         NVDEC_GL
      55             : } NVDecFrameMode ;
      56             : 
      57             : typedef enum
      58             : {
      59             :         NVDEC_CUVID = 0,
      60             :         NVDEC_CUDA,
      61             :         NVDEC_DXVA
      62             : } NVDecVideoMode;
      63             : 
      64             : typedef struct _nv_dec_ctx
      65             : {
      66             :         u32 unload;
      67             :         NVDecFrameMode fmode;
      68             :         NVDecVideoMode vmode;
      69             :         u32 num_surfaces;
      70             : 
      71             :         GF_FilterPid *ipid, *opid;
      72             :         u32 codec_id;
      73             :         Bool use_gl_texture;
      74             :         u32 width, height, bpp_luma, bpp_chroma;
      75             :         cudaVideoCodec codec_type;
      76             :         cudaVideoChromaFormat chroma_fmt;
      77             : 
      78             :         u32 out_size, stride, pix_fmt, stride_uv, nb_planes, uv_height;
      79             :         u32 reload_decoder_state;
      80             :         Bool skip_next_frame;
      81             :         CUresult decode_error, dec_create_error;
      82             :         Bool frame_size_changed;
      83             :         Bool needs_resetup;
      84             :         unsigned long prefer_dec_mode;
      85             : 
      86             :         NVDecInstance *dec_inst;
      87             : 
      88             :         GF_List *frames;
      89             :         GF_List *frames_res;
      90             :         GF_List *src_packets;
      91             : 
      92             :         struct __nv_frame *pending_frame;
      93             : 
      94             : 
      95             :         u8 *xps_buf;
      96             :         u32 xps_buf_size;
      97             :         u32 nal_size_length;
      98             :         Bool inject_xps;
      99             :         u8 *nal_buffer;
     100             :         u32 nal_buffer_alloc;
     101             : 
     102             :         u8 *single_frame_data;
     103             :         u32 single_frame_data_alloc;
     104             : 
     105             : #ifndef GPAC_DISABLE_3D
     106             :         Bool gl_provider_requested;
     107             :         GLint y_tx_id, uv_tx_id;
     108             :         GLint y_pbo_id, uv_pbo_id;
     109             : #endif
     110             : } NVDecCtx;
     111             : 
     112             : 
     113             : struct _nv_dec_inst
     114             : {
     115             :         u32 width, height, bpp_luma, bpp_chroma, stride;
     116             :         cudaVideoCodec codec_type;
     117             :         cudaVideoChromaFormat chroma_fmt;
     118             :         u32 id;
     119             :         u32 th_id;
     120             : 
     121             :         //allocated video parser and decoder
     122             :         CUvideoparser cu_parser;
     123             :         CUvideodecoder cu_decoder;
     124             : 
     125             :         //current associated context, 0 is none
     126             :         NVDecCtx *ctx;
     127             : };
     128             : 
     129             : 
     130             : typedef struct __nv_frame
     131             : {
     132             :         CUVIDPARSERDISPINFO frame_info;
     133             :         NVDecCtx *ctx;
     134             :         GF_FilterFrameInterface gframe;
     135             :         Bool y_mapped, uv_mapped;
     136             : } NVDecFrame;
     137             : 
     138             : static GF_List *global_unactive_decoders=NULL;
     139             : static u32 global_nb_loaded_nvdec = 0;
     140             : static u32 global_nb_loaded_decoders = 0;
     141             : static GF_Mutex *global_inst_mutex = NULL;
     142             : static CUcontext cuda_ctx = NULL;
     143             : #ifndef EMUL_NV_DLL
     144             : static Bool cuda_ctx_gl = GF_FALSE;
     145             : static CUdevice  cuda_dev = -1;
     146             : #endif
     147             : 
     148             : //#define ENABLE_10BIT_OUTPUT
     149             : 
     150           0 : static GF_Err nvdec_init_decoder(NVDecCtx *ctx)
     151             : {
     152             :         CUresult res;
     153             :         CUVIDDECODECREATEINFO cuvid_info;
     154             : 
     155             :         assert(ctx->dec_inst);
     156             : 
     157             :         memset(&cuvid_info, 0, sizeof(CUVIDDECODECREATEINFO));
     158           0 :         cuvid_info.CodecType = ctx->codec_type;
     159           0 :         cuvid_info.ulWidth = ctx->width;
     160           0 :         cuvid_info.ulHeight = ctx->height;
     161           0 :         cuvid_info.ulNumDecodeSurfaces = ctx->num_surfaces;
     162           0 :         cuvid_info.ChromaFormat = ctx->chroma_fmt;
     163             :         cuvid_info.OutputFormat = cudaVideoSurfaceFormat_NV12;
     164             : #ifdef ENABLE_10BIT_OUTPUT
     165             :         if (ctx->bpp_luma + ctx->bpp_chroma > 16)
     166             :                 cuvid_info.OutputFormat = cudaVideoSurfaceFormat_P016;
     167             : #endif
     168           0 :     cuvid_info.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive;
     169           0 :         cuvid_info.bitDepthMinus8 = ctx->bpp_luma - 8;
     170           0 :         cuvid_info.ulTargetWidth = ctx->width;
     171           0 :         cuvid_info.ulTargetHeight = ctx->height;
     172             :     cuvid_info.display_area.left = 0;
     173           0 :     cuvid_info.display_area.right = ctx->width;
     174             :     cuvid_info.display_area.top = 0;
     175           0 :         cuvid_info.display_area.bottom = ctx->height;
     176             : 
     177           0 :     cuvid_info.ulNumOutputSurfaces = 1;
     178           0 :         cuvid_info.ulCreationFlags = ctx->prefer_dec_mode;
     179             : 
     180             :     // create the decoder
     181           0 :         res = cuvidCreateDecoder(&ctx->dec_inst->cu_decoder, &cuvid_info);
     182           0 :         if (res != CUDA_SUCCESS) {
     183           0 :                 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to create cuvid decoder %s\n", cudaGetErrorEnum(res) ) );
     184           0 :                 ctx->dec_create_error = res;
     185           0 :                 return GF_IO_ERR;
     186             :         }
     187           0 :         global_nb_loaded_decoders++;
     188             :         assert(global_nb_loaded_decoders);
     189           0 :         ctx->dec_inst->id = global_nb_loaded_decoders;
     190           0 :         ctx->dec_inst->th_id = gf_th_id();
     191           0 :         GF_LOG(GF_LOG_INFO, GF_LOG_CODEC, ("[NVDec] decoder instance %d created (%dx%d) - %d total decoders loaded\n", ctx->dec_inst->id, ctx->width, ctx->height, global_nb_loaded_decoders) );
     192             :         return GF_OK;
     193             : }
     194             : 
     195           0 : Bool load_inactive_dec(NVDecCtx *ctx)
     196             : {
     197             :         u32 i, count;
     198             :         //look for unactive decoder with same settings
     199           0 :         if (global_unactive_decoders) {
     200             : 
     201           0 :                 gf_mx_p(global_inst_mutex);
     202           0 :                 count = gf_list_count(global_unactive_decoders);
     203           0 :                 for (i=0; i<count; i++) {
     204           0 :                         NVDecInstance *inst = gf_list_get(global_unactive_decoders, i);
     205           0 :                         if ((inst->width==ctx->width) && (inst->height==ctx->height) && (inst->bpp_luma == ctx->bpp_luma )
     206           0 :                                 && (inst->bpp_chroma == ctx->bpp_chroma ) && (inst->codec_type == ctx->codec_type) && (inst->chroma_fmt == ctx->chroma_fmt )
     207             :                                 ) {
     208             : 
     209           0 :                                         gf_list_rem(global_unactive_decoders, i);
     210           0 :                                         ctx->dec_inst = inst;
     211           0 :                                         inst->ctx = ctx;
     212           0 :                                         gf_mx_v(global_inst_mutex);
     213           0 :                                         return GF_TRUE;
     214             :                         }
     215             :                 }
     216           0 :                 if (ctx->dec_inst && !ctx->dec_inst->cu_decoder) {
     217           0 :                         ctx->dec_inst->ctx = ctx;
     218           0 :                         gf_mx_v(global_inst_mutex);
     219           0 :                         return GF_FALSE;
     220             :                 }
     221           0 :                 if (ctx->dec_inst) {
     222             :                         NVDecInstance *inst = ctx->dec_inst;
     223           0 :                         if ((inst->width==ctx->width) && (inst->height==ctx->height) && (inst->bpp_luma == ctx->bpp_luma )
     224           0 :                                 && (inst->bpp_chroma == ctx->bpp_chroma ) && (inst->codec_type == ctx->codec_type) && (inst->chroma_fmt == ctx->chroma_fmt )
     225             :                                 ) {
     226             :                                 ctx->dec_inst = inst;
     227           0 :                                 inst->ctx = ctx;
     228           0 :                                 gf_mx_v(global_inst_mutex);
     229           0 :                                 return GF_TRUE;
     230             :                         }
     231             :                 } else {
     232           0 :                         ctx->dec_inst = gf_list_pop_back(global_unactive_decoders);
     233             :                 }
     234           0 :                 gf_mx_v(global_inst_mutex);
     235             :         }
     236           0 :         if (!ctx->dec_inst) {
     237           0 :                 GF_SAFEALLOC(ctx->dec_inst, NVDecInstance);
     238           0 :                 if (!ctx->dec_inst)
     239             :                         return GF_FALSE;
     240             :         }
     241           0 :         ctx->dec_inst->ctx = ctx;
     242           0 :         return GF_FALSE;
     243             : }
     244             : 
     245           0 : static void nvdec_destroy_decoder(NVDecInstance *inst)
     246             : {
     247           0 :         if (inst->cu_decoder) {
     248           0 :                 cuvidDestroyDecoder(inst->cu_decoder);
     249           0 :                 inst->cu_decoder = NULL;
     250           0 :                 global_nb_loaded_decoders--;
     251           0 :                 GF_LOG(GF_LOG_INFO, GF_LOG_CODEC, ("[NVDec] decoder instance %d destruction - %d decoders still loaded\n", inst->id, global_nb_loaded_decoders ) );
     252             :         }
     253           0 : }
     254             : 
     255           0 : static void update_pix_fmt(NVDecCtx *ctx, Bool use_10bits)
     256             : {
     257           0 :         switch (ctx->chroma_fmt) {
     258           0 :         case cudaVideoChromaFormat_420:
     259           0 :                 ctx->pix_fmt = use_10bits ? GF_PIXEL_NV12_10 : GF_PIXEL_NV12;
     260           0 :                 break;
     261           0 :         case cudaVideoChromaFormat_422:
     262           0 :                 ctx->pix_fmt = use_10bits  ? GF_PIXEL_YUV422_10 : GF_PIXEL_YUV422;
     263           0 :                 break;
     264           0 :         case cudaVideoChromaFormat_444:
     265           0 :                 ctx->pix_fmt = use_10bits  ? GF_PIXEL_YUV444_10 : GF_PIXEL_YUV444;
     266           0 :                 break;
     267           0 :         default:
     268           0 :                 ctx->pix_fmt = 0;
     269           0 :                 return;
     270             :         }
     271           0 :         gf_pixel_get_size_info(ctx->pix_fmt, ctx->width, ctx->height, &ctx->out_size, &ctx->stride, &ctx->stride_uv, &ctx->nb_planes, &ctx->uv_height);
     272             : }
     273             : 
     274           0 : static int CUDAAPI HandleVideoSequence(void *pUserData, CUVIDEOFORMAT *pFormat)
     275             : {
     276             :         Bool use_10bits=GF_FALSE;
     277             :         Bool skip_output_resize=GF_FALSE;
     278             :         NVDecInstance *inst= (NVDecInstance *)pUserData;
     279           0 :         NVDecCtx *ctx = inst->ctx;
     280             : 
     281             :         u32 w, h;
     282           0 :         w = pFormat->coded_width;
     283           0 :         h = pFormat->coded_height;
     284             : 
     285           0 :         if (pFormat->display_area.right && (pFormat->display_area.right<(s32)w)) w = pFormat->display_area.right;
     286           0 :         if (pFormat->display_area.bottom && (pFormat->display_area.bottom<(s32)h)) h = pFormat->display_area.bottom;
     287             : 
     288           0 :         GF_LOG(GF_LOG_INFO, GF_LOG_CODEC, ("[NVDec] Decoder instance %d Video sequence change detected - new setup %u x %u, %u bpp\n", inst->id, pFormat->coded_width, pFormat->coded_height, pFormat->bit_depth_luma_minus8 + 8) );
     289             :         //no change in config
     290           0 :         if ((ctx->width == w)
     291           0 :                 && (ctx->height == h)
     292           0 :                 && (ctx->bpp_luma == 8 + pFormat->bit_depth_luma_minus8)
     293           0 :                 && (ctx->bpp_chroma == 8 + pFormat->bit_depth_chroma_minus8)
     294           0 :                 && (ctx->codec_type == pFormat->codec)
     295           0 :                 && (ctx->chroma_fmt == pFormat->chroma_format)
     296             :         ) {
     297           0 :                 if (ctx->dec_inst && ctx->dec_inst->cu_decoder)
     298             :                         return 1;
     299             :                 skip_output_resize = GF_TRUE;
     300             :         }
     301             : 
     302             :         //commented out since this falls back to soft decoding !
     303             : #ifdef ENABLE_10BIT_OUTPUT
     304             :         if (ctx->bpp_luma + ctx->bpp_chroma > 16)  use_10bits = GF_TRUE;
     305             : #endif
     306             : 
     307           0 :         ctx->width = w;
     308           0 :         ctx->height = h;
     309           0 :         ctx->bpp_luma = 8 + pFormat->bit_depth_luma_minus8;
     310           0 :         ctx->bpp_chroma = 8 + pFormat->bit_depth_chroma_minus8;
     311           0 :         ctx->codec_type = pFormat->codec;
     312           0 :         ctx->chroma_fmt = pFormat->chroma_format;
     313           0 :         ctx->stride = pFormat->coded_width;
     314             : 
     315             :         //if load_inatcive returns TRUE, we are reusing an existing decoder with the same config, no need to recreate one
     316           0 :         if (load_inactive_dec(ctx)) {
     317           0 :                 GF_LOG(GF_LOG_INFO, GF_LOG_CODEC, ("[NVDec] reusing inactive decoder %dx%d - %d total decoders loaded\n", ctx->width, ctx->height, global_nb_loaded_decoders) );
     318           0 :                 ctx->stride = ctx->dec_inst->stride;
     319             :                 //initial config, need to trigger output resize
     320           0 :                 if (!ctx->out_size) ctx->reload_decoder_state = 1;
     321             : 
     322           0 :                 update_pix_fmt(ctx, use_10bits);
     323           0 :                 return GF_OK;
     324             :         }
     325           0 :         if (!ctx->dec_inst) return GF_OUT_OF_MEM;
     326             :         //if we have an existing decoder but with a different config, let's reload
     327           0 :         nvdec_destroy_decoder(ctx->dec_inst);
     328             : 
     329           0 :         ctx->dec_inst->width = ctx->width;
     330           0 :         ctx->dec_inst->height = ctx->height;
     331           0 :         ctx->dec_inst->bpp_luma = ctx->bpp_luma;
     332           0 :         ctx->dec_inst->bpp_chroma = ctx->bpp_chroma;
     333           0 :         ctx->dec_inst->codec_type = ctx->codec_type;
     334           0 :         ctx->dec_inst->chroma_fmt = ctx->chroma_fmt;
     335           0 :         ctx->dec_inst->ctx = ctx;
     336           0 :         ctx->stride = use_10bits ? 2*ctx->width : ctx->width;
     337             : 
     338           0 :         update_pix_fmt(ctx, use_10bits);
     339             : 
     340           0 :         gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_WIDTH, &PROP_UINT(ctx->width));
     341           0 :         gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_HEIGHT, &PROP_UINT(ctx->height));
     342           0 :         gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_STRIDE, &PROP_UINT(ctx->stride));
     343           0 :         gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_BIT_DEPTH_Y, &PROP_UINT(use_10bits ? ctx->bpp_luma : 8));
     344           0 :         gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_BIT_DEPTH_UV, &PROP_UINT(use_10bits ? ctx->bpp_chroma : 8));
     345           0 :         gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_PIXFMT, &PROP_UINT(ctx->pix_fmt));
     346             : 
     347             : 
     348             :         assert(ctx->out_size);
     349             :         assert(ctx->stride);
     350           0 :         ctx->dec_inst->stride = ctx->stride;
     351             : 
     352           0 :         if (! ctx->dec_inst->cu_decoder) {
     353           0 :                 nvdec_init_decoder(ctx);
     354           0 :                 if (!skip_output_resize) {
     355           0 :                         ctx->reload_decoder_state = 1;
     356             :                 }
     357             :         } else {
     358           0 :                 ctx->reload_decoder_state = 2;
     359             :         }
     360             :         return 1;
     361             : }
     362             : 
     363           0 : static int CUDAAPI HandlePictureDecode(void *pUserData, CUVIDPICPARAMS *pPicParams)
     364             : {
     365             :         NVDecInstance *inst = (NVDecInstance *)pUserData;
     366           0 :         inst->ctx->decode_error = cuvidDecodePicture(inst->cu_decoder, pPicParams);
     367             : 
     368           0 :         if (inst->ctx->decode_error != CUDA_SUCCESS) {
     369           0 :                 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] decoder instance %d failed to decode picture %s\n", inst->id, cudaGetErrorEnum(inst->ctx->decode_error) ) );
     370             :                 return GF_IO_ERR;
     371             :         }
     372           0 :         GF_LOG(GF_LOG_DEBUG, GF_LOG_CODEC, ("[NVDec] decoded picture %u OK\n", pPicParams->CurrPicIdx ) );
     373             : 
     374             :         return 1;
     375             : }
     376             : 
     377           0 : static int CUDAAPI HandlePictureDisplay(void *pUserData, CUVIDPARSERDISPINFO *pPicParams)
     378             : {
     379             :         u32 i, count;
     380             :         NVDecFrame *f;
     381             :         NVDecInstance *inst = (NVDecInstance *)pUserData;
     382           0 :         NVDecCtx *ctx = (NVDecCtx *)inst->ctx;
     383           0 :         GF_LOG(GF_LOG_DEBUG, GF_LOG_CODEC, ("[NVDec] picture %u CTS "LLU" ready for display, queuing it\n", pPicParams->picture_index, pPicParams->timestamp) );
     384             : 
     385           0 :         f = gf_list_pop_back(ctx->frames_res);
     386           0 :         if (!f) {
     387           0 :                 GF_SAFEALLOC(f, NVDecFrame);
     388           0 :                 if (!f) return 0;
     389             :         }
     390           0 :         f->frame_info = *pPicParams;
     391           0 :         f->frame_info.timestamp = pPicParams->timestamp;
     392           0 :         f->ctx = ctx;
     393           0 :         count = gf_list_count(ctx->frames);
     394           0 :         for (i=0; i<count; i++) {
     395           0 :                 NVDecFrame *af = gf_list_get(ctx->frames, i);
     396           0 :                 if (af->frame_info.timestamp > f->frame_info.timestamp) {
     397           0 :                         gf_list_insert(ctx->frames, f, i);
     398           0 :                         return 1;
     399             :                 }
     400             :         }
     401           0 :         gf_list_add(ctx->frames, f);
     402           0 :         return 1;
     403             : }
     404             : 
     405           0 : static void nvdec_store_paramlist(GF_BitStream *bs, GF_List *psl)
     406             : {
     407             :         u32 i, count;
     408           0 :         count = gf_list_count(psl);
     409           0 :         for (i=0; i<count; i++) {
     410           0 :                 GF_NALUFFParam *slc = gf_list_get(psl, i);
     411           0 :                 gf_bs_write_u32(bs, 1);
     412           0 :                 gf_bs_write_data(bs, slc->data, slc->size);
     413             :         }
     414           0 : }
     415             : 
     416           0 : static void nvdec_store_xps(NVDecCtx *ctx, GF_AVCConfig *avc_cfg, GF_HEVCConfig *hevc_cfg)
     417             : {
     418             :         u32 i, count;
     419           0 :         GF_BitStream *bs = gf_bs_new(NULL, 0, GF_BITSTREAM_WRITE);
     420           0 :         if (avc_cfg) {
     421           0 :                 ctx->nal_size_length = avc_cfg->nal_unit_size;
     422           0 :                 nvdec_store_paramlist(bs, avc_cfg->sequenceParameterSets);
     423           0 :                 nvdec_store_paramlist(bs, avc_cfg->sequenceParameterSetExtensions);
     424           0 :                 nvdec_store_paramlist(bs, avc_cfg->pictureParameterSets);
     425           0 :                 gf_odf_avc_cfg_del(avc_cfg);
     426           0 :         } else if (hevc_cfg) {
     427           0 :                 ctx->nal_size_length = hevc_cfg->nal_unit_size;
     428           0 :                 count = gf_list_count(hevc_cfg->param_array);
     429           0 :                 for (i=0; i<count; i++) {
     430           0 :                         GF_NALUFFParamArray *pa = gf_list_get(hevc_cfg->param_array, i);
     431           0 :                         nvdec_store_paramlist(bs, pa->nalus);
     432             :                 }
     433           0 :                 gf_odf_hevc_cfg_del(hevc_cfg);
     434             :         }
     435           0 :         if (ctx->xps_buf) gf_free(ctx->xps_buf);
     436           0 :         ctx->xps_buf = NULL;
     437           0 :         ctx->xps_buf_size = 0;
     438           0 :         gf_bs_get_content(bs, &ctx->xps_buf, &ctx->xps_buf_size);
     439           0 :         gf_bs_del(bs);
     440           0 :         ctx->inject_xps = GF_TRUE;
     441           0 : }
     442             : 
     443             : 
     444           0 : static GF_Err nvdec_configure_stream(GF_Filter *filter, NVDecCtx *ctx)
     445             : {
     446             : #ifndef EMUL_NV_DLL
     447             :         CUresult res;
     448             : #endif
     449             :         GF_HEVCConfig *hevc_cfg = NULL;
     450             :         GF_AVCConfig *avc_cfg = NULL;
     451           0 :         const GF_PropertyValue *dcd = gf_filter_pid_get_property(ctx->ipid, GF_PROP_PID_DECODER_CONFIG);
     452             :     CUVIDPARSERPARAMS oVideoParserParameters;
     453             : 
     454           0 :         switch (ctx->codec_id) {
     455           0 :         case GF_CODECID_MPEG1:
     456           0 :                 ctx->codec_type = cudaVideoCodec_MPEG1;
     457           0 :                 break;
     458           0 :         case GF_CODECID_MPEG2_SIMPLE:
     459             :         case GF_CODECID_MPEG2_MAIN:
     460             :         case GF_CODECID_MPEG2_SNR:
     461             :         case GF_CODECID_MPEG2_SPATIAL:
     462             :         case GF_CODECID_MPEG2_HIGH:
     463             :         case GF_CODECID_MPEG2_422:
     464           0 :                 ctx->codec_type = cudaVideoCodec_MPEG2;
     465           0 :                 break;
     466           0 :         case GF_CODECID_MPEG4_PART2:
     467           0 :                 ctx->codec_type = cudaVideoCodec_MPEG4;
     468           0 :                 break;
     469           0 :         case GF_CODECID_AVC:
     470           0 :                 if (!dcd) return GF_EOS;
     471           0 :                 ctx->codec_type = cudaVideoCodec_H264;
     472           0 :                 avc_cfg = gf_odf_avc_cfg_read(dcd->value.data.ptr, dcd->value.data.size);
     473           0 :                 if (!avc_cfg) return GF_NON_COMPLIANT_BITSTREAM;
     474             :                 break;
     475           0 :         case GF_CODECID_HEVC:
     476           0 :                 if (!dcd) return GF_EOS;
     477           0 :                 ctx->codec_type = cudaVideoCodec_HEVC;
     478           0 :                 hevc_cfg = gf_odf_hevc_cfg_read(dcd->value.data.ptr, dcd->value.data.size, GF_FALSE);
     479           0 :                 if (!hevc_cfg) return GF_NON_COMPLIANT_BITSTREAM;
     480             :                 break;
     481             :         }
     482             : 
     483             :         //create a video parser and a video decoder
     484             :     memset(&oVideoParserParameters, 0, sizeof(CUVIDPARSERPARAMS));
     485           0 :         ctx->needs_resetup = GF_FALSE;
     486           0 :         ctx->nal_size_length = 0;
     487             : 
     488             :         //this destroys avc_cfg / hevc_cfg
     489           0 :         if (avc_cfg || hevc_cfg)
     490           0 :                 nvdec_store_xps(ctx, avc_cfg, hevc_cfg);
     491             : 
     492           0 :         if (load_inactive_dec(ctx)) {
     493           0 :                 GF_LOG(GF_LOG_INFO, GF_LOG_CODEC, ("[NVDec] reusing inactive decoder %dx%d - %d total decoders loaded\n", ctx->width, ctx->height, global_nb_loaded_decoders ) );
     494           0 :                 ctx->stride = ctx->dec_inst->stride;
     495             :         }
     496           0 :         if (!ctx->dec_inst) {
     497             :                 return GF_OUT_OF_MEM;
     498             :         }
     499           0 :         ctx->decode_error = CUDA_SUCCESS;
     500             : 
     501           0 :         oVideoParserParameters.CodecType = ctx->codec_type;
     502           0 :     oVideoParserParameters.ulMaxNumDecodeSurfaces = ctx->num_surfaces;
     503           0 :     oVideoParserParameters.ulMaxDisplayDelay = 4;
     504           0 :         oVideoParserParameters.ulClockRate = 1000;
     505           0 :     oVideoParserParameters.pExtVideoInfo = NULL;
     506           0 :     oVideoParserParameters.pfnSequenceCallback = HandleVideoSequence;    // Called before decoding frames and/or whenever there is a format change
     507           0 :     oVideoParserParameters.pfnDecodePicture = HandlePictureDecode;    // Called when a picture is ready to be decoded (decode order)
     508           0 :     oVideoParserParameters.pfnDisplayPicture = HandlePictureDisplay;   // Called whenever a picture is ready to be displayed (display order)
     509           0 :         oVideoParserParameters.pUserData = ctx->dec_inst;
     510             : 
     511             : #ifndef EMUL_NV_DLL
     512           0 :     res = cuCtxPushCurrent(cuda_ctx);
     513           0 :         if (res != CUDA_SUCCESS) {
     514           0 :                 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to push CUDA CTX %s\n", cudaGetErrorEnum(res) ) );
     515             :         }
     516           0 :         res = cuvidCreateVideoParser(&ctx->dec_inst->cu_parser, &oVideoParserParameters);
     517           0 :         cuCtxPopCurrent(NULL);
     518             : 
     519           0 :         if (res != CUDA_SUCCESS) {
     520           0 :                 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to create CUVID parserCTX %s\n", cudaGetErrorEnum(res) ) );
     521             :                 return GF_PROFILE_NOT_SUPPORTED;
     522             :         }
     523             : #endif
     524             : 
     525           0 :         GF_LOG(GF_LOG_DEBUG, GF_LOG_CODEC, ("[NVDec] video parser init OK\n") );
     526             : 
     527           0 :         switch (ctx->codec_type) {
     528           0 :         case cudaVideoCodec_MPEG1:
     529           0 :                 gf_filter_set_name(filter, ctx->use_gl_texture ? "NVidia HWGL MPEG-1" : "NVidia HW MPEG-1");
     530           0 :                 break;
     531           0 :         case cudaVideoCodec_MPEG2:
     532           0 :                 gf_filter_set_name(filter, ctx->use_gl_texture ? "NVidia HWGLMPEG-2" : "NVidia HW MPEG-2");
     533           0 :                 break;
     534           0 :         case cudaVideoCodec_MPEG4:
     535           0 :                 gf_filter_set_name(filter, ctx->use_gl_texture ? "NVidia HWGL MPEG-4 part2" : "NVidia HW MPEG-4 part2");
     536           0 :                 break;
     537           0 :         case cudaVideoCodec_H264:
     538           0 :                 gf_filter_set_name(filter, ctx->use_gl_texture ? "NVidia HWGL AVC|H264" : "NVidia HW AVC|H264");
     539           0 :                 break;
     540           0 :         case cudaVideoCodec_HEVC:
     541           0 :                 gf_filter_set_name(filter, ctx->use_gl_texture ? "NVidia HWGL HEVC" : "NVidia HW HEVC");
     542           0 :                 break;
     543           0 :         case cudaVideoCodec_VC1:
     544           0 :                 gf_filter_set_name(filter, ctx->use_gl_texture ? "NVidia HWGL VC1" : "NVidia HW VC1");
     545           0 :                 break;
     546             :         default:
     547             :                 break;
     548             :         }
     549             :         return GF_OK;
     550             : }
     551             : 
     552           0 : static GF_Err nvdec_configure_pid(GF_Filter *filter, GF_FilterPid *pid, Bool is_remove)
     553             : {
     554             :         const GF_PropertyValue *prop;
     555             : #ifndef EMUL_NV_DLL
     556             :         CUresult res;
     557             : #endif
     558           0 :         NVDecCtx *ctx = (NVDecCtx *) gf_filter_get_udta(filter);
     559             : 
     560           0 :         if (is_remove) {
     561           0 :                 if (ctx->opid) {
     562           0 :                         gf_filter_pid_remove(ctx->opid);
     563           0 :                         ctx->opid = NULL;
     564             :                 }
     565           0 :                 ctx->ipid = NULL;
     566             : 
     567           0 :                 if (ctx->unload == 2) {
     568           0 :                         global_nb_loaded_nvdec--;
     569           0 :                         if (ctx->dec_inst) {
     570             :                                 assert(global_unactive_decoders);
     571           0 :                                 gf_mx_p(global_inst_mutex);
     572           0 :                                 ctx->dec_inst->ctx = NULL;
     573           0 :                                 gf_list_add(global_unactive_decoders, ctx->dec_inst);
     574           0 :                                 ctx->dec_inst = NULL;
     575           0 :                                 gf_mx_v(global_inst_mutex);
     576             :                         }
     577             :                 }
     578             :         }
     579             : 
     580             : 
     581           0 :         if (ctx->ipid && (ctx->ipid != pid)) return GF_REQUIRES_NEW_INSTANCE;
     582             : 
     583           0 :         if (! gf_filter_pid_check_caps(pid))
     584             :                 return GF_NOT_SUPPORTED;
     585           0 :         ctx->ipid = pid;
     586           0 :         ctx->use_gl_texture = (ctx->fmode == NVDEC_GL) ? GF_TRUE : GF_FALSE;
     587             : 
     588             : #ifndef GPAC_DISABLE_3D
     589           0 :         if (ctx->use_gl_texture && (ctx->fmode==NVDEC_GL) && !ctx->gl_provider_requested) {
     590           0 :                 GF_Err e = gf_filter_request_opengl(filter);
     591           0 :                 if (e) {
     592           0 :                         GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to request an openGL provider (error %s), will not use OpenGL output\n", gf_error_to_string(e) ));
     593           0 :                         ctx->use_gl_texture = GF_FALSE;
     594             :                 }
     595           0 :                 ctx->gl_provider_requested = GF_TRUE;
     596             :         }
     597             : #endif
     598             : 
     599           0 :         prop = gf_filter_pid_get_property(pid, GF_PROP_PID_CODECID);
     600           0 :         if (!prop) return GF_NOT_SUPPORTED;
     601           0 :         ctx->codec_id = prop->value.uint;
     602             : 
     603           0 :         switch (ctx->codec_id) {
     604             :         case GF_CODECID_MPEG1:
     605             :         case GF_CODECID_MPEG2_SIMPLE:
     606             :         case GF_CODECID_MPEG2_MAIN:
     607             :         case GF_CODECID_MPEG2_SNR:
     608             :         case GF_CODECID_MPEG2_SPATIAL:
     609             :         case GF_CODECID_MPEG2_HIGH:
     610             :         case GF_CODECID_MPEG2_422:
     611             :         case GF_CODECID_MPEG4_PART2:
     612             :         case GF_CODECID_AVC:
     613             :         case GF_CODECID_HEVC:
     614             :                 break;
     615             :         default:
     616             :                 return GF_NOT_SUPPORTED;
     617             :         }
     618             : 
     619           0 :         if (!ctx->opid)
     620           0 :                 ctx->opid = gf_filter_pid_new(filter);
     621             : 
     622           0 :         gf_filter_pid_copy_properties(ctx->opid, ctx->ipid);
     623           0 :         gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DECODER_CONFIG, NULL);
     624           0 :         gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_DECODER_CONFIG_ENHANCEMENT, NULL);
     625           0 :         gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_CODECID, &PROP_UINT(GF_CODECID_RAW) );
     626           0 :         gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_UNFRAMED, NULL);
     627             : 
     628           0 :         gf_filter_pid_set_framing_mode(ctx->ipid, GF_TRUE);
     629             : 
     630           0 :         switch (ctx->codec_id) {
     631           0 :         case GF_CODECID_AVC:
     632             :         case GF_CODECID_HEVC:
     633           0 :                 prop = gf_filter_pid_get_property(pid, GF_PROP_PID_DECODER_CONFIG);
     634             :                 //not ready yet
     635           0 :                 if (!prop) return GF_OK;
     636             :                 break;
     637             :         }
     638             : 
     639             : #ifdef GPAC_DISABLE_3D
     640             :         ctx->use_gl_texture = GF_FALSE;
     641             : #endif
     642             : 
     643           0 :         if (! cuda_ctx) {
     644             : #ifndef EMUL_NV_DLL
     645             :             int major, minor;
     646             :             char deviceName[256];
     647           0 :                 res = cuDeviceGet(&cuda_dev, 0);
     648           0 :                 if (res != CUDA_SUCCESS) {
     649           0 :                         GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to init cuda device %s\n", cudaGetErrorEnum(res) ) );
     650           0 :                         return GF_IO_ERR;
     651             :                 }
     652             : 
     653           0 :                 cuDeviceComputeCapability(&major, &minor, cuda_dev);
     654           0 :                 cuDeviceGetName(deviceName, 256, cuda_dev);
     655             : 
     656           0 :                 GF_LOG(GF_LOG_INFO, GF_LOG_CODEC, ("[NVDec] GPU Device %s (idx 0) has SM %d.%d compute capability\n", deviceName, major, minor));
     657             : 
     658           0 :                 if (ctx->use_gl_texture) {
     659             : #ifndef GPAC_DISABLE_3D
     660           0 :                         res = cuGLCtxCreate(&cuda_ctx, CU_CTX_BLOCKING_SYNC, cuda_dev);
     661             : 
     662             : #ifdef LOAD_GL_1_5
     663             :                         GET_GLFUN(glGenBuffers);
     664             :                         GET_GLFUN(glBindBuffer);
     665             :                         GET_GLFUN(glBufferData);
     666             : #endif
     667           0 :                         cuda_ctx_gl = GF_TRUE;
     668             : 
     669             : #endif
     670             :                 } else {
     671           0 :                         res = cuCtxCreate(&cuda_ctx, CU_CTX_BLOCKING_SYNC, cuda_dev);
     672           0 :                         cuda_ctx_gl = GF_FALSE;
     673             :                 }
     674           0 :                 if (res != CUDA_SUCCESS) {
     675           0 :                         if (ctx->use_gl_texture) {
     676           0 :                                 cuda_ctx_gl = GF_FALSE;
     677           0 :                                 GF_LOG(GF_LOG_WARNING, GF_LOG_CODEC, ("[NVDec] Couldn't initialize cuda OpenGL context (error %s), retrying without OpenGL support\n", cudaGetErrorEnum(res) ) );
     678           0 :                                 res = cuCtxCreate(&cuda_ctx, CU_CTX_BLOCKING_SYNC, cuda_dev);
     679           0 :                                 if (res != CUDA_SUCCESS) {
     680           0 :                                         GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to init cuda context %s\n", cudaGetErrorEnum(res) ) );
     681             :                                 } else {
     682           0 :                                         ctx->use_gl_texture = GF_FALSE;
     683             :                                 }
     684             :                         } else {
     685           0 :                                 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to init cuda context %s\n", cudaGetErrorEnum(res) ) );
     686             :                         }
     687             : 
     688           0 :                         if (res != CUDA_SUCCESS) {
     689             :                                 return GF_IO_ERR;
     690             :                         }
     691             :                 }
     692             : #endif
     693             : 
     694             :         }
     695             : 
     696           0 :         if (ctx->vmode == NVDEC_DXVA)
     697           0 :                 ctx->prefer_dec_mode = cudaVideoCreate_PreferDXVA;
     698           0 :         else if (ctx->vmode == NVDEC_CUDA)
     699           0 :                 ctx->prefer_dec_mode = cudaVideoCreate_PreferCUDA;
     700             :         else
     701           0 :                 ctx->prefer_dec_mode = cudaVideoCreate_PreferCUVID;
     702             : 
     703           0 :         if (ctx->unload == 2) {
     704           0 :                 global_nb_loaded_nvdec++;
     705           0 :                 if (!global_inst_mutex ) global_inst_mutex  = gf_mx_new("NVDecGlobal");
     706           0 :                 gf_mx_p(global_inst_mutex);
     707           0 :                 if (!global_unactive_decoders) global_unactive_decoders = gf_list_new();
     708           0 :                 gf_mx_v(global_inst_mutex);
     709             :         }
     710             : 
     711             : 
     712           0 :         ctx->needs_resetup = GF_TRUE;
     713             : 
     714           0 :         return GF_OK;
     715             : }
     716             : 
     717           0 : static Bool nvdec_process_event(GF_Filter *filter, const GF_FilterEvent *evt)
     718             : {
     719           0 :         NVDecCtx *ctx = (NVDecCtx *)gf_filter_get_udta(filter);
     720           0 :         if (evt->base.type == GF_FEVT_PLAY) {
     721           0 :                 while (gf_list_count(ctx->frames)) {
     722           0 :                         NVDecFrame *f = gf_list_pop_back(ctx->frames);
     723           0 :                         gf_list_add(ctx->frames_res, f);
     724             :                 }
     725             :         }
     726           0 :         return GF_FALSE;
     727             : }
     728             : 
     729             : 
     730             : #if 0
     731             :         case GF_CODEC_ABORT:
     732             :                 while (gf_list_count(ctx->frames)) {
     733             :                         NVDecFrame *f = (NVDecFrame *) gf_list_pop_back(ctx->frames);
     734             :                         memset(f, 0, sizeof(NVDecFrame));
     735             :                         gf_list_add(ctx->frames_res, f);
     736             :                 }
     737             :                 if (ctx->unload == 2) {
     738             :                         if (ctx->dec_inst) {
     739             :                                 assert(global_unactive_decoders);
     740             :                                 gf_mx_p(global_inst_mutex);
     741             :                                 if (ctx->decode_error) {
     742             :                                         GF_LOG(GF_LOG_INFO, GF_LOG_CODEC, ("[NVDec] deactivating decoder %dx%d and destroying instance\n", ctx->width, ctx->height ) );
     743             :                                         nvdec_destroy_decoder(ctx->dec_inst);
     744             :                                 } else {
     745             :                                         GF_LOG(GF_LOG_INFO, GF_LOG_CODEC, ("[NVDec] deactivating decoder %dx%d\n", ctx->width, ctx->height ) );
     746             :                                 }
     747             :                                 ctx->dec_inst->ctx = NULL;
     748             :                                 gf_list_add(global_unactive_decoders, ctx->dec_inst);
     749             :                                 ctx->dec_inst = NULL;
     750             :                                 gf_mx_v(global_inst_mutex);
     751             :                         }
     752             :                         ctx->needs_resetup = GF_TRUE;
     753             :                         ctx->dec_create_error = CUDA_SUCCESS;
     754             :                 } else if (ctx->unload == 1) {
     755             :                         if (ctx->dec_inst) {
     756             :                                 nvdec_destroy_decoder(ctx->dec_inst);
     757             :                         }
     758             :                         ctx->needs_resetup = GF_TRUE;
     759             :                         ctx->dec_create_error = CUDA_SUCCESS;
     760             :                 }
     761             :                 return GF_OK;
     762             :         }
     763             : #endif
     764             : 
     765             : static GF_Err nvdec_send_hw_frame(NVDecCtx *ctx);
     766             : 
     767           0 : static void nvdec_reset_pcks(NVDecCtx *ctx)
     768             : {
     769           0 :         while (gf_list_count(ctx->src_packets)) {
     770           0 :                 GF_FilterPacket *pck = gf_list_pop_back(ctx->src_packets);
     771           0 :                 gf_filter_pck_unref(pck);
     772             :         }
     773           0 : }
     774             : 
     775           0 : static void nvdec_merge_pck_props(NVDecCtx *ctx, NVDecFrame *f,  GF_FilterPacket *dst_pck)
     776             : {
     777             :         u32 i, count;
     778             :         GF_FilterPacket *src_pck = NULL;
     779           0 :         count = gf_list_count(ctx->src_packets);
     780           0 :         for (i = 0; i<count; i++) {
     781           0 :                 src_pck = gf_list_get(ctx->src_packets, i);
     782           0 :                 if (gf_filter_pck_get_cts(src_pck) == f->frame_info.timestamp) {
     783           0 :                         gf_filter_pck_merge_properties(src_pck, dst_pck);
     784           0 :                         gf_list_rem(ctx->src_packets, i);
     785           0 :                         gf_filter_pck_unref(src_pck);
     786             :                         return;
     787             :                 }
     788             :         }
     789             :         //not found !
     790           0 :         gf_filter_pck_set_cts(dst_pck, f->frame_info.timestamp);
     791           0 :         if (!gf_filter_pck_get_interlaced(dst_pck) && !f->frame_info.progressive_frame) {
     792           0 :                 gf_filter_pck_set_interlaced(dst_pck, f->frame_info.top_field_first ? 1 : 2);
     793             :         }
     794             : }
     795             : 
     796           0 : static GF_Err nvdec_process(GF_Filter *filter)
     797             : {
     798             :         NVDecFrame *f;
     799           0 :     CUdeviceptr map_mem = 0;
     800             :         CUVIDPROCPARAMS params;
     801           0 :         unsigned int pitch = 0;
     802             :         GF_Err e;
     803             :         u32 pck_size;
     804             :         const u8 *data;
     805             :         u8 *output;
     806             :         GF_FilterPacket *ipck, *dst_pck;
     807             :     CUVIDSOURCEDATAPACKET cu_pkt;
     808             :         CUresult res;
     809           0 :         NVDecCtx *ctx = (NVDecCtx *) gf_filter_get_udta(filter);
     810             : 
     811           0 :         ipck = gf_filter_pid_get_packet(ctx->ipid);
     812             : 
     813           0 :         if (ctx->needs_resetup) {
     814           0 :                 e = nvdec_configure_stream(filter, ctx);
     815           0 :                 if (e<0) return e;
     816             :                 //not ready
     817           0 :                 if (e==GF_EOS) return GF_OK;
     818             :         }
     819             : 
     820             :         memset(&cu_pkt, 0, sizeof(CUVIDSOURCEDATAPACKET));
     821           0 :         cu_pkt.flags = CUVID_PKT_TIMESTAMP;
     822           0 :         pck_size = 0;
     823             :         data = NULL;
     824           0 :         if (!ipck) {
     825           0 :                 if (!gf_filter_pid_is_eos(ctx->ipid))
     826             :                         return GF_OK;
     827             : 
     828           0 :                 cu_pkt.flags |= CUVID_PKT_ENDOFSTREAM;
     829           0 :                 ctx->skip_next_frame = GF_FALSE;
     830             :         } else {
     831           0 :                 data = gf_filter_pck_get_data(ipck, &pck_size);
     832             :         }
     833             : 
     834           0 :         if (ctx->dec_create_error) {
     835           0 :                 if (ipck) gf_filter_pid_drop_packet(ctx->ipid);
     836           0 :                 else if (gf_filter_pid_is_eos(ctx->ipid)) {
     837           0 :                         gf_filter_pid_set_eos(ctx->opid);
     838           0 :                         return GF_EOS;
     839             :                 }
     840             :                 return GF_IO_ERR;
     841             :         }
     842             : 
     843           0 :         if (data && ctx->nal_size_length) {
     844           0 :                 GF_BitStream *bs = gf_bs_new(ctx->nal_buffer, ctx->nal_buffer_alloc, GF_BITSTREAM_WRITE_DYN);
     845           0 :                 if (!bs) return GF_OUT_OF_MEM;
     846             : 
     847           0 :                 if (gf_filter_pck_get_sap(ipck)) ctx->inject_xps = GF_TRUE;
     848             : 
     849           0 :                 if (ctx->inject_xps) {
     850           0 :                         ctx->inject_xps = GF_FALSE;
     851           0 :                         gf_bs_write_data(bs, ctx->xps_buf, ctx->xps_buf_size);
     852             :                 }
     853             : 
     854           0 :                 while (pck_size) {
     855             :                         u32 i, nal_size = 0;
     856           0 :                         for (i = 0; i<ctx->nal_size_length; i++) {
     857           0 :                                 nal_size = (nal_size << 8) + ((u8)data[i]);
     858             :                         }
     859           0 :                         data += ctx->nal_size_length;
     860             : 
     861           0 :                         if (pck_size < nal_size + ctx->nal_size_length) break;
     862             : 
     863           0 :                         gf_bs_write_u32(bs, 1);
     864           0 :                         gf_bs_write_data(bs, data, nal_size);
     865           0 :                         data += nal_size;
     866           0 :                         pck_size -= nal_size + ctx->nal_size_length;
     867             :                 }
     868             : 
     869           0 :                 gf_bs_get_content_no_truncate(bs, &ctx->nal_buffer, &pck_size, &ctx->nal_buffer_alloc);
     870           0 :                 gf_bs_del(bs);
     871           0 :                 data = ctx->nal_buffer;
     872             :         }
     873             : 
     874           0 :         cu_pkt.payload_size = pck_size;
     875           0 :         cu_pkt.payload = data;
     876           0 :         if (ipck) cu_pkt.timestamp = gf_filter_pck_get_cts(ipck);
     877             : 
     878             : #ifndef EMUL_NV_DLL
     879           0 :     res = cuCtxPushCurrent(cuda_ctx);
     880           0 :         if (res != CUDA_SUCCESS) {
     881           0 :                 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to push CUDA CTX %s\n", cudaGetErrorEnum(res) ) );
     882             :         }
     883           0 :         if (ctx->skip_next_frame) {
     884           0 :                 ctx->skip_next_frame = GF_FALSE;
     885             :         } else {
     886           0 :                 res = cuvidParseVideoData(ctx->dec_inst->cu_parser, &cu_pkt);
     887           0 :                 if (res != CUDA_SUCCESS) {
     888           0 :                         GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] decoder instance %d failed to parse data %s\n", ctx->dec_inst->id, cudaGetErrorEnum(res) ) );
     889             :                 }
     890             :         }
     891             : #endif //EMUL_NV_DLL
     892             : 
     893             :         //queue reference to source packet props
     894           0 :         if (ipck) {
     895           0 :                 gf_filter_pck_ref_props(&ipck);
     896           0 :                 gf_list_add(ctx->src_packets, ipck);
     897             :         }
     898             : 
     899           0 :         if (ctx->reload_decoder_state) {
     900           0 :                 if (ctx->reload_decoder_state==2) {
     901           0 :                         nvdec_destroy_decoder(ctx->dec_inst);
     902             :                 } else {
     903           0 :                         ctx->skip_next_frame = GF_TRUE;
     904             :                 }
     905             : 
     906           0 :                 ctx->reload_decoder_state = 0;
     907           0 :                 if (!ctx->out_size || !ctx->pix_fmt) {
     908           0 :                         cuCtxPopCurrent(NULL);
     909           0 :                         return GF_NOT_SUPPORTED;
     910             :                 }
     911             : 
     912             :                 //need to setup decoder
     913           0 :                 if (! ctx->dec_inst->cu_decoder) {
     914           0 :                         nvdec_init_decoder(ctx);
     915             :                 }
     916           0 :                 cuCtxPopCurrent(NULL);
     917           0 :                 return GF_OK;
     918             :         }
     919             :         //drop packet
     920           0 :         if (ipck)
     921           0 :                 gf_filter_pid_drop_packet(ctx->ipid);
     922             : 
     923           0 :         f = gf_list_pop_front(ctx->frames);
     924           0 :         if (!f) {
     925             : #ifndef EMUL_NV_DLL
     926           0 :                 cuCtxPopCurrent(NULL);
     927             : #endif
     928           0 :                 if (!ipck && gf_filter_pid_is_eos(ctx->ipid)) {
     929           0 :                         nvdec_reset_pcks(ctx);
     930           0 :                         gf_filter_pid_set_eos(ctx->opid);
     931           0 :                         return GF_EOS;
     932             :                 }
     933             :                 return GF_OK;
     934             :         }
     935           0 :         if (ctx->use_gl_texture || (ctx->fmode==NVDEC_SINGLE) ) {
     936             :                 assert(!ctx->pending_frame);
     937           0 :                 ctx->pending_frame = f;
     938           0 :                 return nvdec_send_hw_frame(ctx);
     939             :         }
     940             : 
     941             :         assert(ctx->out_size);
     942           0 :         dst_pck = gf_filter_pck_new_alloc(ctx->opid, ctx->out_size, &output);
     943           0 :         if (!dst_pck) return GF_OUT_OF_MEM;
     944             : 
     945             :         memset(&params, 0, sizeof(params));
     946           0 :         params.progressive_frame = f->frame_info.progressive_frame;
     947           0 :         params.top_field_first = f->frame_info.top_field_first;
     948             : 
     949           0 :         nvdec_merge_pck_props(ctx, f, dst_pck);
     950             :         e = GF_OK;
     951           0 :         if (gf_filter_pck_get_seek_flag(dst_pck)) {
     952           0 :                 gf_filter_pck_discard(dst_pck);
     953             :         } else {
     954           0 :                 res = cuvidMapVideoFrame(ctx->dec_inst->cu_decoder, f->frame_info.picture_index, &map_mem, &pitch, &params);
     955           0 :                 if (res == CUDA_SUCCESS) {
     956             :                         CUDA_MEMCPY2D mcpi;
     957             :                         memset(&mcpi, 0, sizeof(CUDA_MEMCPY2D));
     958           0 :                         mcpi.srcMemoryType = CU_MEMORYTYPE_DEVICE;
     959           0 :                         mcpi.srcDevice = map_mem;
     960           0 :                         mcpi.srcPitch = pitch;
     961             : 
     962           0 :                         mcpi.dstMemoryType = CU_MEMORYTYPE_HOST;
     963           0 :                         mcpi.dstHost = output;
     964           0 :                         mcpi.dstPitch = ctx->stride;
     965           0 :                         mcpi.WidthInBytes = MIN(pitch, ctx->stride);
     966           0 :                         mcpi.Height = ctx->height;
     967             : 
     968           0 :                         res = cuMemcpy2D(&mcpi);
     969           0 :                         if (res != CUDA_SUCCESS) {
     970           0 :                                 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to copy Y video plane from GPU to host mem %s\n", cudaGetErrorEnum(res)));
     971             :                                 e = GF_IO_ERR;
     972             :                         } else {
     973             : 
     974           0 :                                 mcpi.srcDevice = map_mem + ctx->height * pitch;
     975           0 :                                 mcpi.dstHost = output + ctx->stride * ctx->height;
     976           0 :                                 mcpi.dstPitch = ctx->stride_uv;
     977           0 :                                 mcpi.WidthInBytes = MIN(pitch, ctx->stride);
     978           0 :                                 mcpi.Height = ctx->uv_height;
     979             : 
     980           0 :                                 res = cuMemcpy2D(&mcpi);
     981           0 :                                 if (res != CUDA_SUCCESS) {
     982           0 :                                         GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to copy UV video plane from GPU to host mem %s\n", cudaGetErrorEnum(res)));
     983             :                                         e = GF_IO_ERR;
     984             :                                 }
     985             :                         }
     986           0 :                         cuvidUnmapVideoFrame(ctx->dec_inst->cu_decoder, map_mem);
     987             : 
     988           0 :                         gf_filter_pck_send(dst_pck);
     989             : 
     990             :                 } else {
     991           0 :                         GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to map video frame data %s\n", cudaGetErrorEnum(res)));
     992             :                         e = GF_IO_ERR;
     993           0 :                         gf_filter_pck_discard(dst_pck);
     994             :                 }
     995             :         }
     996           0 :         cuCtxPopCurrent(NULL);
     997             : 
     998             :         memset(f, 0, sizeof(NVDecFrame));
     999           0 :         gf_list_add(ctx->frames_res, f);
    1000             : 
    1001           0 :         return e;
    1002             : }
    1003             : 
    1004           0 : void nvframe_release(GF_Filter *filter, GF_FilterPid *pid, GF_FilterPacket *pck)
    1005             : {
    1006           0 :         GF_FilterFrameInterface *frame = gf_filter_pck_get_frame_interface(pck);
    1007           0 :         NVDecFrame *f = (NVDecFrame*) frame->user_data;
    1008           0 :         NVDecCtx *ctx = (NVDecCtx *)f->ctx;
    1009             : 
    1010             :         memset(f, 0, sizeof(NVDecFrame));
    1011           0 :         gf_list_add(ctx->frames_res, f);
    1012           0 : }
    1013             : 
    1014             : #ifndef GPAC_DISABLE_3D
    1015             : 
    1016             : /*Define codec matrix*/
    1017             : typedef struct __matrix GF_NVCodecMatrix;
    1018             : 
    1019           0 : GF_Err nvframe_get_gl_texture(GF_FilterFrameInterface *frame, u32 plane_idx, u32 *gl_tex_format, u32 *gl_tex_id, GF_NVCodecMatrix * texcoordmatrix)
    1020             : {
    1021             :         CUDA_MEMCPY2D mcpi;
    1022             :         CUVIDPROCPARAMS params;
    1023             :         CUresult res;
    1024             :         GF_Err e = GF_OK;
    1025             :         CUdeviceptr tx_data, vid_data;
    1026             :         size_t tx_pitch;
    1027             :         u32 vid_pitch;
    1028             :         u32 pbo_id, tx_id, gl_fmt, gl_btype = GL_UNSIGNED_BYTE;
    1029           0 :         NVDecFrame *f = (NVDecFrame *)frame->user_data;
    1030           0 :         NVDecCtx *ctx = (NVDecCtx *)f->ctx;
    1031             : 
    1032           0 :         if (plane_idx>1) return GF_BAD_PARAM;
    1033             : 
    1034           0 :         res = cuCtxPushCurrent(cuda_ctx);
    1035           0 :         if (res != CUDA_SUCCESS) {
    1036           0 :                 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to push CUDA CTX %s\n", cudaGetErrorEnum(res) ) );
    1037             :         }
    1038             : 
    1039           0 :         if (! *gl_tex_id && ! plane_idx && ctx->y_tx_id ) {
    1040           0 :                 cuGLUnregisterBufferObject(ctx->y_pbo_id);
    1041           0 :                 ctx->y_pbo_id = 0;
    1042           0 :                 ctx->y_tx_id = 0;
    1043             :         }
    1044           0 :         if (! *gl_tex_id && plane_idx && ctx->uv_tx_id ) {
    1045           0 :                 cuGLUnregisterBufferObject(ctx->uv_pbo_id);
    1046           0 :                 ctx->uv_pbo_id = 0;
    1047           0 :                 ctx->uv_tx_id = 0;
    1048             :         }
    1049             : 
    1050             : #ifdef ENABLE_10BIT_OUTPUT
    1051             :         if ((ctx->bpp_luma>8) || (ctx->bpp_chroma>8)) {
    1052             :                 gl_btype = GL_UNSIGNED_SHORT;
    1053             :         }
    1054             : #endif
    1055           0 :         if (!plane_idx) {
    1056           0 :                 if (!ctx->y_pbo_id) {
    1057           0 :                         glGenBuffers(1, &ctx->y_pbo_id);
    1058           0 :                         glGenTextures(1, &ctx->y_tx_id);
    1059             : 
    1060           0 :                 glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, ctx->y_pbo_id);
    1061           0 :                         glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, ctx->stride * ctx->height, NULL, GL_STREAM_DRAW_ARB);
    1062           0 :                         glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
    1063             : 
    1064           0 :                         cuGLRegisterBufferObject(ctx->y_pbo_id);
    1065             : 
    1066           0 :                         glBindTexture(GL_TEXTURE_2D, ctx->y_tx_id);
    1067           0 :                         glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE, ctx->width, ctx->height, 0, GL_LUMINANCE, gl_btype, NULL);
    1068           0 :                         glBindTexture(GL_TEXTURE_2D, 0);
    1069             : 
    1070           0 :                         f->y_mapped = GF_FALSE;
    1071             :                 }
    1072           0 :                 *gl_tex_format = GL_TEXTURE_2D;
    1073           0 :                 *gl_tex_id = tx_id = ctx->y_tx_id;
    1074           0 :                 if (f->y_mapped) {
    1075           0 :                         cuCtxPopCurrent(NULL);
    1076           0 :                         return GF_OK;
    1077             :                 }
    1078           0 :                 f->y_mapped = GF_TRUE;
    1079           0 :                 pbo_id = ctx->y_pbo_id;
    1080             :                 gl_fmt = GL_LUMINANCE;
    1081             :         } else {
    1082           0 :                 if (!ctx->uv_pbo_id) {
    1083           0 :                         glGenBuffers(1, &ctx->uv_pbo_id);
    1084           0 :                         glGenTextures(1, &ctx->uv_tx_id);
    1085             : 
    1086           0 :                 glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, ctx->uv_pbo_id);
    1087           0 :                         glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, ctx->stride * ctx->height / 2, NULL, GL_STREAM_DRAW_ARB);
    1088           0 :                         glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
    1089             : 
    1090           0 :                         cuGLRegisterBufferObject(ctx->uv_pbo_id);
    1091             : 
    1092           0 :                     glBindTexture(GL_TEXTURE_2D, ctx->uv_tx_id);
    1093           0 :                         glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE_ALPHA, ctx->width/2, ctx->height/2, 0, GL_LUMINANCE_ALPHA, gl_btype, NULL);
    1094           0 :                         glBindTexture(GL_TEXTURE_2D, 0);
    1095           0 :                         f->uv_mapped = GF_FALSE;
    1096             :                 }
    1097           0 :                 *gl_tex_format = GL_TEXTURE_2D;
    1098           0 :                 *gl_tex_id = tx_id = ctx->uv_tx_id;
    1099           0 :                 if (f->uv_mapped) {
    1100           0 :                         cuCtxPopCurrent(NULL);
    1101           0 :                         return GF_OK;
    1102             :                 }
    1103           0 :                 f->uv_mapped = GF_TRUE;
    1104           0 :                 pbo_id = ctx->uv_pbo_id;
    1105             :                 gl_fmt = GL_LUMINANCE_ALPHA;
    1106             :         }
    1107             : 
    1108           0 :         cuGLMapBufferObject(&tx_data, &tx_pitch, pbo_id);
    1109           0 :         if (res != CUDA_SUCCESS) {
    1110           0 :                 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to map GL texture data %s\n", cudaGetErrorEnum(res) ) );
    1111             :                 return GF_IO_ERR;
    1112             :         }
    1113             :         assert(tx_pitch != 0);
    1114             : 
    1115             :         memset(&params, 0, sizeof(params));
    1116           0 :         params.progressive_frame = f->frame_info.progressive_frame;
    1117             :         //params.second_field = 0;
    1118           0 :         params.top_field_first = f->frame_info.top_field_first;
    1119           0 :         res = cuvidMapVideoFrame(ctx->dec_inst->cu_decoder, f->frame_info.picture_index, &vid_data, &vid_pitch, &params);
    1120             : 
    1121           0 :         if (res != CUDA_SUCCESS) {
    1122           0 :                 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to map decoded picture data %s\n", cudaGetErrorEnum(res) ) );
    1123             :                 return GF_IO_ERR;
    1124             :         }
    1125             :         assert(vid_pitch != 0);
    1126             : 
    1127             :         memset(&mcpi, 0, sizeof(CUDA_MEMCPY2D));
    1128           0 :         mcpi.srcMemoryType = CU_MEMORYTYPE_DEVICE;
    1129           0 :         if (plane_idx) {
    1130           0 :                 mcpi.srcDevice = vid_data + ctx->height * vid_pitch;
    1131           0 :                 tx_pitch *= 2; //2 bytes per pixel
    1132             :         } else {
    1133           0 :                 mcpi.srcDevice = vid_data;
    1134             :         }
    1135           0 :         mcpi.srcPitch = vid_pitch;
    1136             : 
    1137           0 :         mcpi.dstMemoryType = CU_MEMORYTYPE_DEVICE;
    1138           0 :         mcpi.dstDevice = tx_data;
    1139           0 :         mcpi.dstPitch = tx_pitch / ctx->height;
    1140             : 
    1141           0 :         mcpi.WidthInBytes = MIN(mcpi.dstPitch, vid_pitch);
    1142           0 :         mcpi.Height = ctx->height;
    1143           0 :         if (plane_idx) mcpi.Height /= 2;
    1144             : 
    1145           0 :         res = cuMemcpy2D(&mcpi);
    1146           0 :         if (res != CUDA_SUCCESS) {
    1147           0 :                 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to copy Y video plane from GPU to host mem %s\n", cudaGetErrorEnum(res) ) );
    1148             :                 e = GF_IO_ERR;
    1149             :         }
    1150             : 
    1151           0 :         cuvidUnmapVideoFrame(ctx->dec_inst->cu_decoder, vid_data);
    1152           0 :         cuGLUnmapBufferObject(pbo_id);
    1153             : 
    1154             : 
    1155           0 :         cuCtxPopCurrent(NULL);
    1156             : 
    1157             :         /*bind PBO to texture and call glTexSubImage2D only after PBO transfer is queued, otherwise we'll have a one frame delay*/
    1158           0 :         glBindTexture(GL_TEXTURE_2D, tx_id);
    1159           0 :         glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo_id);
    1160             : 
    1161             : #ifdef ENABLE_10BIT_OUTPUT
    1162             :         if (ctx->bpp_chroma+ctx->bpp_luma>16) {
    1163             :                 Float a, b;
    1164             : #error "FIX NVDEC GL color mapping in 10 bit"
    1165             :                 a = 65535.0f / (65472.0f - 63.0f);
    1166             :                 b = -63.0f * a / 65535.0f;
    1167             : 
    1168             :                 glPixelStorei(GL_UNPACK_ALIGNMENT, 2);
    1169             :                 //glPixelStorei(GL_UNPACK_SWAP_BYTES, 1);
    1170             :                 //glPixelStorei(GL_UNPACK_LSB_FIRST, 1);
    1171             :                 //we use 10 bits but GL will normalise using 16 bits, so we need to multiply the nomralized result by 2^6
    1172             :                 //glPixelTransferf(GL_RED_BIAS, 0.00096317f);
    1173             :                 //glPixelTransferf(GL_RED_SCALE, 0.000015288f);
    1174             : 
    1175             :                 glPixelTransferf(GL_RED_SCALE, a);
    1176             :                 glPixelTransferf(GL_RED_BIAS, b);
    1177             : 
    1178             :                 if (plane_idx) {
    1179             :                         glPixelTransferf(GL_ALPHA_SCALE, a);
    1180             :                         glPixelTransferf(GL_ALPHA_BIAS, b);
    1181             :                 }
    1182             :         }
    1183             : #endif
    1184           0 :         if (!plane_idx) {
    1185           0 :                 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, ctx->width, ctx->height, gl_fmt , gl_btype, NULL);
    1186             :         } else {
    1187           0 :                 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, ctx->width/2, ctx->height/2, gl_fmt , gl_btype, NULL);
    1188             :         }
    1189             : 
    1190           0 :         glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
    1191           0 :         glBindTexture(GL_TEXTURE_2D, 0);
    1192             : 
    1193           0 :         return e;
    1194             : }
    1195             : 
    1196             : #endif
    1197             : 
    1198           0 : GF_Err nvframe_get_frame(GF_FilterFrameInterface *frame, u32 plane_idx, const u8 **outPlane, u32 *outStride)
    1199             : {
    1200           0 :         unsigned int pitch = 0;
    1201             :         GF_Err e = GF_OK;
    1202           0 :         NVDecFrame *f = (NVDecFrame *)frame->user_data;
    1203           0 :         NVDecCtx *ctx = (NVDecCtx *)f->ctx;
    1204             : 
    1205           0 :         if (plane_idx>=ctx->nb_planes) return GF_BAD_PARAM;
    1206             : 
    1207             :         e = GF_OK;
    1208           0 :         if (!f->y_mapped) {
    1209             :                 CUVIDPROCPARAMS params;
    1210           0 :                 CUdeviceptr map_mem = 0;
    1211             :                 CUresult res;
    1212             : 
    1213           0 :                 if (ctx->out_size > ctx->single_frame_data_alloc) {
    1214           0 :                         ctx->single_frame_data_alloc = ctx->out_size;
    1215           0 :                         ctx->single_frame_data = gf_realloc(ctx->single_frame_data, ctx->out_size);
    1216             :                 }
    1217           0 :                 f->y_mapped = GF_TRUE;
    1218             : 
    1219           0 :                 res = cuCtxPushCurrent(cuda_ctx);
    1220           0 :                 if (res != CUDA_SUCCESS) {
    1221           0 :                         GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to push CUDA CTX %s\n", cudaGetErrorEnum(res)));
    1222           0 :                         return GF_IO_ERR;
    1223             :                 }
    1224             : 
    1225             :                 memset(&params, 0, sizeof(params));
    1226           0 :                 params.progressive_frame = f->frame_info.progressive_frame;
    1227           0 :                 params.top_field_first = f->frame_info.top_field_first;
    1228             : 
    1229           0 :                 res = cuvidMapVideoFrame(ctx->dec_inst->cu_decoder, f->frame_info.picture_index, &map_mem, &pitch, &params);
    1230           0 :                 if (res == CUDA_SUCCESS) {
    1231             :                         CUDA_MEMCPY2D mcpi;
    1232             :                         memset(&mcpi, 0, sizeof(CUDA_MEMCPY2D));
    1233           0 :                         mcpi.srcMemoryType = CU_MEMORYTYPE_DEVICE;
    1234           0 :                         mcpi.srcDevice = map_mem;
    1235           0 :                         mcpi.srcPitch = pitch;
    1236             : 
    1237           0 :                         mcpi.dstMemoryType = CU_MEMORYTYPE_HOST;
    1238           0 :                         mcpi.dstHost = ctx->single_frame_data;
    1239           0 :                         mcpi.dstPitch = ctx->stride;
    1240           0 :                         mcpi.WidthInBytes = MIN(pitch, ctx->stride);
    1241           0 :                         mcpi.Height = ctx->height;
    1242             : 
    1243           0 :                         res = cuMemcpy2D(&mcpi);
    1244           0 :                         if (res != CUDA_SUCCESS) {
    1245           0 :                                 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to copy Y video plane from GPU to host mem %s\n", cudaGetErrorEnum(res)));
    1246             :                                 e = GF_IO_ERR;
    1247             :                         }
    1248             :                         else {
    1249             : 
    1250           0 :                                 mcpi.srcDevice = map_mem + ctx->height * pitch;
    1251           0 :                                 mcpi.dstHost = ctx->single_frame_data + ctx->stride * ctx->height;
    1252           0 :                                 mcpi.dstPitch = ctx->stride_uv;
    1253           0 :                                 mcpi.Height = ctx->uv_height;
    1254             : 
    1255           0 :                                 res = cuMemcpy2D(&mcpi);
    1256           0 :                                 if (res != CUDA_SUCCESS) {
    1257           0 :                                         GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to copy UV video plane from GPU to host mem %s\n", cudaGetErrorEnum(res)));
    1258             :                                         e = GF_IO_ERR;
    1259             :                                 }
    1260             :                         }
    1261           0 :                         cuvidUnmapVideoFrame(ctx->dec_inst->cu_decoder, map_mem);
    1262             :                 }
    1263             :                 else {
    1264           0 :                         GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to map video frame %s\n", cudaGetErrorEnum(res)));
    1265             :                         e = GF_IO_ERR;
    1266             :                 }
    1267           0 :                 cuCtxPopCurrent(NULL);
    1268             :         }
    1269           0 :         if (e) return e;
    1270             : 
    1271           0 :         switch (plane_idx) {
    1272           0 :         case 0:
    1273           0 :                 *outPlane = ctx->single_frame_data;
    1274           0 :                 *outStride = ctx->stride;
    1275           0 :                 break;
    1276           0 :         case 1:
    1277           0 :                 *outPlane = ctx->single_frame_data + ctx->stride * ctx->height;
    1278           0 :                 *outStride = ctx->stride_uv;
    1279           0 :                 break;
    1280           0 :         case 2:
    1281           0 :                 *outPlane = ctx->single_frame_data + ctx->stride * ctx->height + ctx->stride_uv * ctx->uv_height;
    1282           0 :                 *outStride = ctx->stride_uv;
    1283           0 :                 break;
    1284             :         default:
    1285             :                 return GF_BAD_PARAM;
    1286             :         }
    1287             :         return GF_OK;
    1288             : }
    1289             : 
    1290             : 
    1291           0 : GF_Err nvdec_send_hw_frame(NVDecCtx *ctx)
    1292             : {
    1293             :         GF_FilterPacket *dst_pck;
    1294             :         NVDecFrame *f;
    1295             : 
    1296           0 :         if (!ctx->pending_frame) return GF_BAD_PARAM;
    1297             :         f = ctx->pending_frame;
    1298           0 :         ctx->pending_frame = NULL;
    1299             : 
    1300           0 :         f->gframe.user_data = f;
    1301           0 :         f->gframe.get_plane = nvframe_get_frame;
    1302             : #ifndef GPAC_DISABLE_3D
    1303           0 :         f->gframe.get_gl_texture = nvframe_get_gl_texture;
    1304             : #endif
    1305             : 
    1306           0 :         if (ctx->frame_size_changed) {
    1307           0 :                 ctx->frame_size_changed = GF_FALSE;
    1308           0 :                 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_WIDTH, &PROP_UINT(ctx->width));
    1309           0 :                 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_HEIGHT, &PROP_UINT(ctx->height));
    1310           0 :                 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_STRIDE, &PROP_UINT(ctx->stride));
    1311           0 :                 gf_filter_pid_set_property(ctx->opid, GF_PROP_PID_PIXFMT, &PROP_UINT(ctx->pix_fmt));
    1312             :         }
    1313             : 
    1314             : 
    1315           0 :         if (!gf_list_count(ctx->frames) && ctx->needs_resetup)
    1316           0 :                 f->gframe.flags = GF_FRAME_IFCE_BLOCKING;
    1317             : 
    1318           0 :         dst_pck = gf_filter_pck_new_frame_interface(ctx->opid, &f->gframe, nvframe_release);
    1319           0 :         if (!dst_pck) return GF_OUT_OF_MEM;
    1320             :         
    1321           0 :         nvdec_merge_pck_props(ctx, f, dst_pck);
    1322           0 :         if (gf_filter_pck_get_seek_flag(dst_pck)) {
    1323           0 :                 gf_filter_pck_discard(dst_pck);
    1324             :                 memset(f, 0, sizeof(NVDecFrame));
    1325           0 :                 gf_list_add(ctx->frames_res, f);
    1326             :         } else {
    1327           0 :                 gf_filter_pck_send(dst_pck);
    1328             :         }
    1329             : 
    1330             :         return GF_OK;
    1331             : }
    1332             : 
    1333             : 
    1334             : 
    1335             : 
    1336             : static u32 cuvid_load_state = 0;
    1337             : static u32 nb_cuvid_inst=0;
    1338        2413 : static void init_cuda_sdk()
    1339             : {
    1340        2413 :         if (!cuvid_load_state) {
    1341             : #ifdef EMUL_NV_DLL
    1342             :         cuvid_load_state = 2;
    1343             :         nb_cuvid_inst++;
    1344             : #else
    1345             :                 CUresult res;
    1346             :                 int device_count;
    1347        2276 :             res = cuInit(0, __CUDA_API_VERSION);
    1348        2276 :                 nb_cuvid_inst++;
    1349        2276 :                 cuvid_load_state = 1;
    1350        2276 :                 if (res == CUDA_ERROR_SHARED_OBJECT_INIT_FAILED) {
    1351        2276 :                         GF_LOG(GF_LOG_DEBUG, GF_LOG_CODEC, ("[NVDec] cuda lib not found on system\n") );
    1352           0 :                 } else if (res != CUDA_SUCCESS) {
    1353           0 :                         GF_LOG(GF_LOG_INFO, GF_LOG_CODEC, ("[NVDec] failed to init cuda %s\n", cudaGetErrorEnum(res) ) );
    1354             :                 } else {
    1355           0 :                         res = cuDeviceGetCount(&device_count);
    1356           0 :                         if (res != CUDA_SUCCESS) {
    1357           0 :                                 GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] failed to query cuda/nvidia cards %s\n", cudaGetErrorEnum(res) ) );
    1358             :                         } else {
    1359           0 :                                 if (! device_count) {
    1360           0 :                                         GF_LOG(GF_LOG_ERROR, GF_LOG_CODEC, ("[NVDec] no device found\n" ) );
    1361             :                                 } else {
    1362           0 :                                         cuvid_load_state = 2;
    1363             :                                 }
    1364             :                         }
    1365             :                 }
    1366             : #endif
    1367             : 
    1368             :         } else {
    1369         137 :                 nb_cuvid_inst++;
    1370             :         }
    1371        2413 : }
    1372             : 
    1373           0 : static GF_Err nvdec_initialize(GF_Filter *filter)
    1374             : {
    1375           0 :         NVDecCtx *ctx = gf_filter_get_udta(filter);
    1376             : 
    1377           0 :         ctx->frames = gf_list_new();
    1378           0 :         ctx->frames_res = gf_list_new();
    1379           0 :         ctx->src_packets = gf_list_new();
    1380           0 :         return GF_OK;
    1381             : }
    1382             : 
    1383           0 : static void nvdec_finalize(GF_Filter *filter)
    1384             : {
    1385           0 :         NVDecCtx *ctx = gf_filter_get_udta(filter);
    1386             : 
    1387           0 :         nvdec_reset_pcks(ctx);
    1388           0 :         gf_list_del(ctx->src_packets);
    1389             : 
    1390           0 :         if (!global_nb_loaded_nvdec && global_unactive_decoders) {
    1391           0 :                 while (gf_list_count(global_unactive_decoders)) {
    1392           0 :                         NVDecInstance *inst = gf_list_pop_back(global_unactive_decoders);
    1393           0 :                         nvdec_destroy_decoder(inst);
    1394           0 :                         if (inst->cu_parser) cuvidDestroyVideoParser(inst->cu_parser);
    1395           0 :                         gf_free(inst);
    1396             :                 }
    1397           0 :                 gf_list_del(global_unactive_decoders);
    1398             : 
    1399           0 :                 gf_mx_del(global_inst_mutex);
    1400             :         }
    1401             : 
    1402           0 :         if (ctx->dec_inst) {
    1403           0 :                 nvdec_destroy_decoder(ctx->dec_inst);
    1404           0 :                 if (ctx->dec_inst->cu_parser) cuvidDestroyVideoParser(ctx->dec_inst->cu_parser);
    1405           0 :                 gf_free(ctx->dec_inst);
    1406             :         }
    1407             : 
    1408             : 
    1409             :         assert(nb_cuvid_inst);
    1410           0 :         nb_cuvid_inst--;
    1411           0 :         if (!nb_cuvid_inst) {
    1412           0 :                 if (cuda_ctx) cuCtxDestroy(cuda_ctx);
    1413           0 :                 cuda_ctx = NULL;
    1414           0 :                 cuUninit();
    1415           0 :                 cuvid_load_state = 0;
    1416             :         }
    1417           0 :         while (gf_list_count(ctx->frames)) {
    1418           0 :                 NVDecFrame *f = (NVDecFrame *) gf_list_pop_back(ctx->frames);
    1419           0 :                 gf_free(f);
    1420             :         }
    1421           0 :         gf_list_del(ctx->frames);
    1422           0 :         while (gf_list_count(ctx->frames_res)) {
    1423           0 :                 NVDecFrame *f = (NVDecFrame *) gf_list_pop_back(ctx->frames_res);
    1424           0 :                 gf_free(f);
    1425             :         }
    1426           0 :         gf_list_del(ctx->frames_res);
    1427             : 
    1428           0 :         if (ctx->single_frame_data) gf_free(ctx->single_frame_data);
    1429           0 :         if (ctx->xps_buf) gf_free(ctx->xps_buf);
    1430           0 :         if (ctx->nal_buffer) gf_free(ctx->nal_buffer);
    1431           0 : }
    1432             : 
    1433             : 
    1434             : static const GF_FilterCapability NVDecCaps[] =
    1435             : {
    1436             :         CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_STREAM_TYPE, GF_STREAM_VISUAL),
    1437             :         CAP_BOOL(GF_CAPS_INPUT_EXCLUDED, GF_PROP_PID_UNFRAMED, GF_TRUE),
    1438             :         CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_CODECID, GF_CODECID_MPEG4_PART2),
    1439             :         CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_CODECID, GF_CODECID_MPEG2_MAIN),
    1440             :         CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_CODECID, GF_CODECID_MPEG2_SNR),
    1441             :         CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_CODECID, GF_CODECID_MPEG2_SPATIAL),
    1442             :         CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_CODECID, GF_CODECID_MPEG2_HIGH),
    1443             :         CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_CODECID, GF_CODECID_MPEG2_422),
    1444             :         CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_CODECID, GF_CODECID_HEVC),
    1445             :         CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_CODECID, GF_CODECID_AVC),
    1446             :         CAP_BOOL(GF_CAPS_INPUT_EXCLUDED,GF_PROP_PID_TILE_BASE, GF_TRUE),
    1447             :         //CAP_UINT(GF_CAPS_INPUT, GF_PROP_PID_CODECID, GF_CODECID_VC1),
    1448             :         CAP_UINT(GF_CAPS_OUTPUT_STATIC, GF_PROP_PID_STREAM_TYPE, GF_STREAM_VISUAL),
    1449             :         CAP_UINT(GF_CAPS_OUTPUT_STATIC, GF_PROP_PID_CODECID, GF_CODECID_RAW)
    1450             : };
    1451             : 
    1452             : #define OFFS(_n)        #_n, offsetof(NVDecCtx, _n)
    1453             : 
    1454             : static const GF_FilterArgs NVDecArgs[] =
    1455             : {
    1456             :         { OFFS(num_surfaces), "number of hardware surfaces to allocate", GF_PROP_UINT, "20", NULL, GF_FS_ARG_HINT_ADVANCED },
    1457             :         { OFFS(unload), "decoder unload mode\n"
    1458             :                 "- no: keep inactive decoder alive\n"
    1459             :                 "- destroy: destroy inactive decoder\n"
    1460             :                 "- reuse: detach decoder from inactive PIDs and reattach to active ones", GF_PROP_UINT, "no", "no|destroy|reuse", GF_FS_ARG_HINT_EXPERT },
    1461             :         { OFFS(vmode), "video decoder backend\n"
    1462             :                 "- cuvid: use dedicated video engines directly\n"
    1463             :                 "- cuda: use a CUDA-based decoder if faster than dedicated engines\n"
    1464             :                 "- dxva: go through DXVA internally if possible (requires D3D9 interop)", GF_PROP_UINT, "cuvid", "cuvid|cuda|dxva", GF_FS_ARG_HINT_ADVANCED },
    1465             :         { OFFS(fmode), "frame output mode\n"
    1466             :                 "- copy: each frame is copied and dispatched\n"
    1467             :                 "- single: frame data is only retrieved when used, single memory space for all frames (not safe if multiple consumers)\n"
    1468             :                 "- gl: frame data is mapped to an OpenGL texture"
    1469             :         , GF_PROP_UINT, "gl", "copy|single|gl", 0 },
    1470             : 
    1471             :         { 0 }
    1472             : };
    1473             : 
    1474             : GF_FilterRegister NVDecRegister = {
    1475             :         .name = "nvdec",
    1476             :         GF_FS_SET_DESCRIPTION("NVidia decoder")
    1477             :         GF_FS_SET_HELP("This filter decodes MPEG-2, MPEG-4 Part 2, AVC|H264 and HEVC streams through NVideia decoder. It allows GPU frame dispatch or direct frame copy.")
    1478             :         .private_size = sizeof(NVDecCtx),
    1479             :         SETCAPS(NVDecCaps),
    1480             :         .flags = GF_FS_REG_CONFIGURE_MAIN_THREAD,
    1481             :         .initialize = nvdec_initialize,
    1482             :         .finalize = nvdec_finalize,
    1483             :         .args = NVDecArgs,
    1484             :         .configure_pid = nvdec_configure_pid,
    1485             :         .process = nvdec_process,
    1486             :         .process_event = nvdec_process_event
    1487             : };
    1488             : 
    1489             : 
    1490        2877 : const GF_FilterRegister *nvdec_register(GF_FilterSession *session)
    1491             : {
    1492             :         //check if nvdec is not globally blacklisted - if so, do not try to load CUDA SDK which may be time consuming on some devices
    1493        2877 :         const char *blacklist = gf_opts_get_key("core", "blacklist");
    1494        2877 :         if (blacklist && strstr(blacklist, "nvdec"))
    1495             :                 return NULL;
    1496             : 
    1497        2413 :         init_cuda_sdk();
    1498             :         //do not register if no SDK
    1499        2413 :         if (cuvid_load_state != 2) {
    1500             :                 // this is man / md generation, load filter
    1501        2413 :                 if (!gf_opts_get_bool("temp", "gendoc"))
    1502             :                         return NULL;
    1503          14 :                 NVDecRegister.version = "! Warning: CUVID SDK NOT AVAILABLE ON THIS SYSTEM !";
    1504             :         }
    1505             : 
    1506             :         return &NVDecRegister;
    1507             : }
    1508             : 
    1509             : #else
    1510             : 
    1511             : const GF_FilterRegister *nvdec_register(GF_FilterSession *session)
    1512             : {
    1513             :         return NULL;
    1514             : }
    1515             : #endif // (!defined(GPAC_STATIC_BUILD) && (defined(WIN32) || defined(GPAC_CONFIG_LINUX) || defined(GPAC_CONFIG_DARWIN)) && !defined(GPAC_DISABLE_NVDEC))

Generated by: LCOV version 1.13