Skip to content

Yt dlp plugin

YTDLPPlugin

A plugin for yt-dlp to generate URLs and corresponding titles from the given URL.

Methods:

Name Description
generate_urls

Generates URLs and corresponding titles from the given URL.

Source code in video_sampler/integrations/yt_dlp_plugin.py
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
class YTDLPPlugin:
    """
    A plugin for yt-dlp to generate URLs and corresponding titles from the given URL.

    Methods:
        generate_urls(url, extra_yt_constr_args=None, extra_info_extract_opts=None) -> Iterable[str]:
            Generates URLs and corresponding titles from the given URL.

    """

    def __init__(self, ie_key: str = "Generic"):
        """
        Initialize the YTDLPPlugin instance.
        """
        self.ie_key = ie_key
        self.ydl_opts = {
            "format": best_video_only,
        }

    def generate_urls(
        self,
        url: str,
        extra_info_extract_opts: dict = None,
        get_subs: bool = False,
    ) -> Iterable[tuple[str, str, str | None]]:
        """Generate URLs and download subtitles for a given video URL.

        Args:
            url (str): The URL of the video to download subtitles for.
            extra_info_extract_opts (dict, optional): Additional options for extracting video information.

        Yields:
            tuple: A tuple containing the video title, video format URL, and downloaded subtitles.
        """
        if extra_info_extract_opts is None:
            extra_info_extract_opts = {}
        if get_subs:
            extra_info_extract_opts |= self.get_subtitles_opts()
        extr_args = {"ie_key": self.ie_key} if "ytsearch" not in url else {}

        def preproc_entry(info):
            req_format = info["requested_formats"][0]
            subs = None
            if get_subs and "requested_subtitles" in info:
                subs = download_sub(
                    list(info["requested_subtitles"].values())[0]["url"]
                )
            return info["title"], req_format["url"], subs

        with YoutubeDL(params=(self.ydl_opts | extra_info_extract_opts)) as ydl:
            info = ydl.extract_info(url, download=False, **extr_args)
            if "entries" not in info:
                yield preproc_entry(info)
            else:
                for entry in info.get("entries", []):
                    if not entry:
                        continue
                    yield preproc_entry(entry)

    def get_subtitles_opts(self) -> dict:
        return {
            "postprocessors": [
                {
                    "format": "srt",
                    "key": "FFmpegSubtitlesConvertor",
                    "when": "before_dl",
                }
            ],
            "format": best_video_only,
            "subtitleslangs": ["en.*"],
            "writeautomaticsub": True,
            "writesubtitles": True,
        }

__init__(ie_key='Generic')

Initialize the YTDLPPlugin instance.

Source code in video_sampler/integrations/yt_dlp_plugin.py
81
82
83
84
85
86
87
88
def __init__(self, ie_key: str = "Generic"):
    """
    Initialize the YTDLPPlugin instance.
    """
    self.ie_key = ie_key
    self.ydl_opts = {
        "format": best_video_only,
    }

generate_urls(url, extra_info_extract_opts=None, get_subs=False)

Generate URLs and download subtitles for a given video URL.

Parameters:

Name Type Description Default
url str

The URL of the video to download subtitles for.

required
extra_info_extract_opts dict

Additional options for extracting video information.

None

Yields:

Name Type Description
tuple Iterable[tuple[str, str, str | None]]

A tuple containing the video title, video format URL, and downloaded subtitles.

Source code in video_sampler/integrations/yt_dlp_plugin.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
def generate_urls(
    self,
    url: str,
    extra_info_extract_opts: dict = None,
    get_subs: bool = False,
) -> Iterable[tuple[str, str, str | None]]:
    """Generate URLs and download subtitles for a given video URL.

    Args:
        url (str): The URL of the video to download subtitles for.
        extra_info_extract_opts (dict, optional): Additional options for extracting video information.

    Yields:
        tuple: A tuple containing the video title, video format URL, and downloaded subtitles.
    """
    if extra_info_extract_opts is None:
        extra_info_extract_opts = {}
    if get_subs:
        extra_info_extract_opts |= self.get_subtitles_opts()
    extr_args = {"ie_key": self.ie_key} if "ytsearch" not in url else {}

    def preproc_entry(info):
        req_format = info["requested_formats"][0]
        subs = None
        if get_subs and "requested_subtitles" in info:
            subs = download_sub(
                list(info["requested_subtitles"].values())[0]["url"]
            )
        return info["title"], req_format["url"], subs

    with YoutubeDL(params=(self.ydl_opts | extra_info_extract_opts)) as ydl:
        info = ydl.extract_info(url, download=False, **extr_args)
        if "entries" not in info:
            yield preproc_entry(info)
        else:
            for entry in info.get("entries", []):
                if not entry:
                    continue
                yield preproc_entry(entry)

best_video_best_audio(ctx)

Taken from the yt-dlp documentation as-is

Source code in video_sampler/integrations/yt_dlp_plugin.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def best_video_best_audio(ctx):
    """Taken from the yt-dlp documentation as-is"""
    """Select the best video and the best audio that won't result in an mkv.
    NOTE: This is just an example and does not handle all cases"""

    # formats are already sorted worst to best
    formats = ctx.get("formats")[::-1]

    # acodec='none' means there is no audio
    best_video = next(
        f for f in formats if f["vcodec"] != "none" and f["acodec"] == "none"
    )

    # find compatible audio extension
    audio_ext = {"mp4": "m4a", "webm": "webm"}[best_video["ext"]]
    # vcodec='none' means there is no video
    best_audio = next(
        f
        for f in formats
        if (f["acodec"] != "none" and f["vcodec"] == "none" and f["ext"] == audio_ext)
    )

    # These are the minimum required fields for a merged format
    yield {
        "format_id": f'{best_video["format_id"]}+{best_audio["format_id"]}',
        "ext": best_video["ext"],
        "requested_formats": [best_video, best_audio],
        # Must be + separated list of protocols
        "protocol": f'{best_video["protocol"]}+{best_audio["protocol"]}',
    }

best_video_only(ctx)

Just best video -- save bandwidth

Source code in video_sampler/integrations/yt_dlp_plugin.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def best_video_only(ctx):
    """Just best video -- save bandwidth"""
    # formats are already sorted worst to best
    formats = ctx.get("formats")[::-1]

    # acodec='none' means there is no audio
    best_video = next(f for f in formats if f["vcodec"] != "none")
    # These are the minimum required fields for a merged format
    yield {
        "format_id": f'{best_video["format_id"]}',
        "ext": best_video["ext"],
        "requested_formats": [best_video],
        # Must be + separated list of protocols
        "protocol": f'{best_video["protocol"]}',
    }

no_shorts(info, *, incomplete)

Filter out short videos

Source code in video_sampler/integrations/yt_dlp_plugin.py
64
65
66
67
68
def no_shorts(info, *, incomplete):
    """Filter out short videos"""
    if url := info.get("url", ""):
        if "/shorts" in url:
            return "This is a short video"