From c674e8377393bbaa2587648cbd23c760120b343b Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 15 Oct 2025 13:40:32 +0100 Subject: [PATCH 1/2] Rename default interface into FFmpeg --- src/torchcodec/_core/BetaCudaDeviceInterface.cpp | 9 ++++----- src/torchcodec/_core/DeviceInterface.h | 4 ++-- src/torchcodec/_core/StreamOptions.h | 4 ++-- src/torchcodec/_core/custom_ops.cpp | 8 ++++---- src/torchcodec/_core/ops.py | 4 ++-- src/torchcodec/decoders/_video_decoder.py | 3 --- test/test_decoders.py | 6 +++--- test/utils.py | 2 +- 8 files changed, 18 insertions(+), 22 deletions(-) diff --git a/src/torchcodec/_core/BetaCudaDeviceInterface.cpp b/src/torchcodec/_core/BetaCudaDeviceInterface.cpp index 78fa8d635..679df2f54 100644 --- a/src/torchcodec/_core/BetaCudaDeviceInterface.cpp +++ b/src/torchcodec/_core/BetaCudaDeviceInterface.cpp @@ -129,7 +129,7 @@ static UniqueCUvideodecoder createDecoder(CUVIDEOFORMAT* videoFormat) { // automatically converted to 8bits by NVDEC itself. That is, the raw frames // we get back from cuvidMapVideoFrame will already be in 8bit format. We // won't need to do the conversion ourselves, so that's a lot easier. - // In the default interface, we have to do the 10 -> 8bits conversion + // In the ffmpeg CUDA interface, we have to do the 10 -> 8bits conversion // ourselves later in convertAVFrameToFrameOutput(), because FFmpeg explicitly // requests 10 or 16bits output formats for >8-bit videos! // https://github.com/FFmpeg/FFmpeg/blob/e05f8acabff468c1382277c1f31fa8e9d90c3202/libavcodec/nvdec.c#L376-L403 @@ -480,8 +480,7 @@ int BetaCudaDeviceInterface::receiveFrame(UniqueAVFrame& avFrame) { procParams.top_field_first = dispInfo.top_field_first; procParams.unpaired_field = dispInfo.repeat_first_field < 0; // We set the NVDEC stream to the current stream. It will be waited upon by - // the NPP stream before any color conversion. Currently, that syncing logic - // is in the default interface. + // the NPP stream before any color conversion. // Re types: we get a cudaStream_t from PyTorch but it's interchangeable with // CUstream procParams.output_stream = reinterpret_cast( @@ -618,8 +617,8 @@ void BetaCudaDeviceInterface::convertAVFrameToFrameOutput( UniqueAVFrame& avFrame, FrameOutput& frameOutput, std::optional preAllocatedOutputTensor) { - // TODONVDEC P2: we may need to handle 10bit videos the same way the default - // interface does it with maybeConvertAVFrameToNV12OrRGB24(). + // TODONVDEC P2: we may need to handle 10bit videos the same way the CUDA + // ffmpeg interface does it with maybeConvertAVFrameToNV12OrRGB24(). TORCH_CHECK( avFrame->format == AV_PIX_FMT_CUDA, "Expected CUDA format frame from BETA CUDA interface"); diff --git a/src/torchcodec/_core/DeviceInterface.h b/src/torchcodec/_core/DeviceInterface.h index cac29e838..982f7e732 100644 --- a/src/torchcodec/_core/DeviceInterface.h +++ b/src/torchcodec/_core/DeviceInterface.h @@ -21,7 +21,7 @@ namespace facebook::torchcodec { // Key for device interface registration with device type + variant support struct DeviceInterfaceKey { torch::DeviceType deviceType; - std::string_view variant = "default"; // e.g., "default", "beta", etc. + std::string_view variant = "ffmpeg"; // e.g., "ffmpeg", "beta", etc. bool operator<(const DeviceInterfaceKey& other) const { if (deviceType != other.deviceType) { @@ -141,7 +141,7 @@ void validateDeviceInterface( std::unique_ptr createDeviceInterface( const torch::Device& device, - const std::string_view variant = "default"); + const std::string_view variant = "ffmpeg"); torch::Tensor rgbAVFrameToTensor(const UniqueAVFrame& avFrame); diff --git a/src/torchcodec/_core/StreamOptions.h b/src/torchcodec/_core/StreamOptions.h index 7728a676e..e5ab256e1 100644 --- a/src/torchcodec/_core/StreamOptions.h +++ b/src/torchcodec/_core/StreamOptions.h @@ -41,8 +41,8 @@ struct VideoStreamOptions { // By default we use CPU for decoding for both C++ and python users. torch::Device device = torch::kCPU; - // Device variant (e.g., "default", "beta", etc.) - std::string_view deviceVariant = "default"; + // Device variant (e.g., "ffmpeg", "beta", etc.) + std::string_view deviceVariant = "ffmpeg"; // Encoding options // TODO-VideoEncoder: Consider adding other optional fields here diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp index 5ba98e2c1..f29f33395 100644 --- a/src/torchcodec/_core/custom_ops.cpp +++ b/src/torchcodec/_core/custom_ops.cpp @@ -43,9 +43,9 @@ TORCH_LIBRARY(torchcodec_ns, m) { m.def( "_create_from_file_like(int file_like_context, str? seek_mode=None) -> Tensor"); m.def( - "_add_video_stream(Tensor(a!) decoder, *, int? num_threads=None, str? dimension_order=None, int? stream_index=None, str device=\"cpu\", str device_variant=\"default\", str transform_specs=\"\", (Tensor, Tensor, Tensor)? custom_frame_mappings=None, str? color_conversion_library=None) -> ()"); + "_add_video_stream(Tensor(a!) decoder, *, int? num_threads=None, str? dimension_order=None, int? stream_index=None, str device=\"cpu\", str device_variant=\"ffmpeg\", str transform_specs=\"\", (Tensor, Tensor, Tensor)? custom_frame_mappings=None, str? color_conversion_library=None) -> ()"); m.def( - "add_video_stream(Tensor(a!) decoder, *, int? num_threads=None, str? dimension_order=None, int? stream_index=None, str device=\"cpu\", str device_variant=\"default\", str transform_specs=\"\", (Tensor, Tensor, Tensor)? custom_frame_mappings=None) -> ()"); + "add_video_stream(Tensor(a!) decoder, *, int? num_threads=None, str? dimension_order=None, int? stream_index=None, str device=\"cpu\", str device_variant=\"ffmpeg\", str transform_specs=\"\", (Tensor, Tensor, Tensor)? custom_frame_mappings=None) -> ()"); m.def( "add_audio_stream(Tensor(a!) decoder, *, int? stream_index=None, int? sample_rate=None, int? num_channels=None) -> ()"); m.def("seek_to_pts(Tensor(a!) decoder, float seconds) -> ()"); @@ -319,7 +319,7 @@ void _add_video_stream( std::optional dimension_order = std::nullopt, std::optional stream_index = std::nullopt, std::string_view device = "cpu", - std::string_view device_variant = "default", + std::string_view device_variant = "ffmpeg", std::string_view transform_specs = "", std::optional> custom_frame_mappings = std::nullopt, @@ -376,7 +376,7 @@ void add_video_stream( std::optional dimension_order = std::nullopt, std::optional stream_index = std::nullopt, std::string_view device = "cpu", - std::string_view device_variant = "default", + std::string_view device_variant = "ffmpeg", std::string_view transform_specs = "", const std::optional>& custom_frame_mappings = std::nullopt) { diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py index 44dc89e2b..9ab1410e8 100644 --- a/src/torchcodec/_core/ops.py +++ b/src/torchcodec/_core/ops.py @@ -304,7 +304,7 @@ def _add_video_stream_abstract( dimension_order: Optional[str] = None, stream_index: Optional[int] = None, device: str = "cpu", - device_variant: str = "default", + device_variant: str = "ffmpeg", transform_specs: str = "", custom_frame_mappings: Optional[ tuple[torch.Tensor, torch.Tensor, torch.Tensor] @@ -322,7 +322,7 @@ def add_video_stream_abstract( dimension_order: Optional[str] = None, stream_index: Optional[int] = None, device: str = "cpu", - device_variant: str = "default", + device_variant: str = "ffmpeg", transform_specs: str = "", custom_frame_mappings: Optional[ tuple[torch.Tensor, torch.Tensor, torch.Tensor] diff --git a/src/torchcodec/decoders/_video_decoder.py b/src/torchcodec/decoders/_video_decoder.py index f22f5a3fc..53e369b01 100644 --- a/src/torchcodec/decoders/_video_decoder.py +++ b/src/torchcodec/decoders/_video_decoder.py @@ -145,9 +145,6 @@ def __init__( device = str(device) device_variant = _get_cuda_backend() - if device_variant == "ffmpeg": - # TODONVDEC P2 rename 'default' into 'ffmpeg' everywhere. - device_variant = "default" # Legacy support for device="cuda:0:beta" syntax # TODONVDEC P2: remove support for this everywhere. This will require diff --git a/test/test_decoders.py b/test/test_decoders.py index 300c953bf..f9c7d2ff6 100644 --- a/test/test_decoders.py +++ b/test/test_decoders.py @@ -1303,7 +1303,7 @@ def test_10bit_videos(self, device, asset): # RuntimeError: Codec configuration not supported on this GPU. # Codec: 4, chroma format: 1, bit depth: 10 # - # It works on the default interface because FFmpeg fallsback to the + # It works on the ffmpeg interface because FFmpeg fallsback to the # CPU, while the BETA interface doesn't. pytest.skip("Asset not supported by NVDEC") @@ -1692,8 +1692,8 @@ def test_beta_cuda_interface_backwards(self, asset, seek_mode): @needs_cuda def test_beta_cuda_interface_small_h265(self): # Test to illustrate current difference in behavior between the BETA and - # the default interface: this video isn't supported by NVDEC, but in the - # default interface, FFMPEG fallsback to the CPU while we don't. + # the ffmpeg interface: this video isn't supported by NVDEC, but in the + # ffmpeg interface, FFMPEG fallsback to the CPU while we don't. VideoDecoder(H265_VIDEO.path, device="cuda").get_frame_at(0) with pytest.raises( diff --git a/test/utils.py b/test/utils.py index 7c91f307c..1f6ab770d 100644 --- a/test/utils.py +++ b/test/utils.py @@ -47,7 +47,7 @@ def unsplit_device_str(device_str: str) -> str: if device_str == "cuda:0:beta": return "cuda", "beta" else: - return device_str, "default" + return device_str, "ffmpeg" def get_ffmpeg_major_version(): From 2890524d77a9f0175786e9f563edbb0ca775f8c9 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 15 Oct 2025 14:29:32 +0100 Subject: [PATCH 2/2] fix --- src/torchcodec/_core/SingleStreamDecoder.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/torchcodec/_core/SingleStreamDecoder.h b/src/torchcodec/_core/SingleStreamDecoder.h index 48821ff09..cf24aa0c3 100644 --- a/src/torchcodec/_core/SingleStreamDecoder.h +++ b/src/torchcodec/_core/SingleStreamDecoder.h @@ -311,7 +311,7 @@ class SingleStreamDecoder { int streamIndex, AVMediaType mediaType, const torch::Device& device = torch::kCPU, - const std::string_view deviceVariant = "default", + const std::string_view deviceVariant = "ffmpeg", std::optional ffmpegThreadCount = std::nullopt); // Returns the "best" stream index for a given media type. The "best" is