From 1b71001149a6ede48369e66ea23e8df03724dd79 Mon Sep 17 00:00:00 2001 From: McSwindler Date: Sat, 20 Apr 2024 12:07:02 -0500 Subject: [PATCH 1/4] [watchertv] Add extractor --- yt_dlp/extractor/_extractors.py | 4 + yt_dlp/extractor/watchertv.py | 215 ++++++++++++++++++++++++++++++++ 2 files changed, 219 insertions(+) create mode 100644 yt_dlp/extractor/watchertv.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 42034275b..d42722be9 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2306,6 +2306,10 @@ from .washingtonpost import ( WashingtonPostArticleIE, ) from .wat import WatIE +from .watchertv import ( + WatcherTVSeasonIE, + WatcherTVIE +) from .wdr import ( WDRIE, WDRPageIE, diff --git a/yt_dlp/extractor/watchertv.py b/yt_dlp/extractor/watchertv.py new file mode 100644 index 000000000..4d28576e5 --- /dev/null +++ b/yt_dlp/extractor/watchertv.py @@ -0,0 +1,215 @@ +import functools + +from .common import InfoExtractor +from .vimeo import VHXEmbedIE +from ..utils import ( + ExtractorError, + OnDemandPagedList, + clean_html, + extract_attributes, + get_element_by_class, + get_element_by_id, + get_elements_html_by_class, + int_or_none, + traverse_obj, + unified_strdate, + urlencode_postdata, +) + + +class WatcherTVIE(InfoExtractor): + _LOGIN_URL = 'https://www.watchertv.com/login' + _NETRC_MACHINE = 'watchertv' + + _VALID_URL = r'https?://(?:www\.)?watchertv\.com/(?:[^/]+/)*videos/(?P[^/]+)/?$' + _TESTS = [ + { + 'url': 'https://www.watchertv.com/ghost-files/season:2/videos/gf-201', + 'note': 'Episode in a series', + 'md5': '99c9aab2cb62157467b7ef5e37266e4e', + 'info_dict': { + 'id': '3129338', + 'display_id': 'gf-201', + 'ext': 'mp4', + 'title': 'The Death Row Poltergeists of Missouri State Penitentiary', + 'description': 'Where Curiosity Meets Comedy', + 'release_date': '20230825', + 'thumbnail': 'https://vhx.imgix.net/watcherentertainment/assets/92c02f39-2ed6-4b51-9e63-1a907b82e2bc.png', + 'series': 'Ghost Files', + 'season_number': 2, + 'season': 'Season 2', + 'episode_number': 1, + 'episode': 'The Death Row Poltergeists of Missouri State Penitentiary', + 'duration': 3853, + 'uploader_id': 'user80538407', + 'uploader_url': 'https://vimeo.com/user80538407', + 'uploader': 'OTT Videos' + }, + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] + }, + { + 'url': 'https://www.watchertv.com/road-files/season:1/videos/rf101', + 'note': 'Episode in a series (missing release_date)', + 'md5': '02f9aaafc8ad9bd1be366cf6a61a68d8', + 'info_dict': { + 'id': '3187312', + 'display_id': 'rf101', + 'ext': 'mp4', + 'title': 'Road Files: Haunted Hill House', + 'description': 'Where Curiosity Meets Comedy', + 'thumbnail': 'https://vhx.imgix.net/watcherentertainment/assets/7445f23c-a3e7-47fb-835a-d288273e2698.png', + 'series': 'Road Files', + 'season_number': 1, + 'season': 'Season 1', + 'episode_number': 1, + 'episode': 'Road Files: Haunted Hill House', + 'duration': 516, + 'uploader_id': 'user80538407', + 'uploader_url': 'https://vimeo.com/user80538407', + 'uploader': 'OTT Videos' + }, + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] + }, + { + 'url': 'https://www.watchertv.com/videos/welcome-beta-users', + 'note': 'Episode not in a series', + 'md5': 'fd1db805f9adc442c38d706bba21ad03', + 'info_dict': { + 'id': '3187107', + 'display_id': 'welcome-beta-users', + 'ext': 'mp4', + 'title': 'Welcome to Watcher!', + 'description': 'Where Curiosity Meets Comedy', + 'release_date': '20240419', + 'thumbnail': 'https://vhx.imgix.net/watcherentertainment/assets/fbb90dc8-ebb0-4597-9a83-95729e234030.jpg', + 'duration': 92, + 'uploader_id': 'user80538407', + 'uploader_url': 'https://vimeo.com/user80538407', + 'uploader': 'OTT Videos' + }, + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] + } + ] + + def _get_authenticity_token(self, display_id): + signin_page = self._download_webpage( + self._LOGIN_URL, display_id, note='Getting authenticity token') + return self._html_search_regex( + r'name=["\']authenticity_token["\'] value=["\'](.+?)["\']', + signin_page, 'authenticity_token') + + def _login(self, display_id): + username, password = self._get_login_info() + if not username: + return True + + response = self._download_webpage( + self._LOGIN_URL, display_id, note='Logging in', fatal=False, + data=urlencode_postdata({ + 'email': username, + 'password': password, + 'authenticity_token': self._get_authenticity_token(display_id), + 'utf8': True + })) + + user_has_subscription = self._search_regex( + r'user_has_subscription:\s*["\'](.+?)["\']', response, 'subscription status', default='none') + if user_has_subscription.lower() == 'true': + return + elif user_has_subscription.lower() == 'false': + return 'Account is not subscribed' + else: + return 'Incorrect username/password' + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = None + if self._get_cookies('https://www.watchertv.com').get('_session'): + webpage = self._download_webpage(url, display_id) + if not webpage or '
[^\/$&?#]+)(?:/?$|/season:(?P[0-9]+)/?$)' + _TESTS = [ + { + 'url': 'https://www.watchertv.com/ghost-files/season:1', + 'note': 'Multi-season series with the season in the url', + 'playlist_count': 8, + 'info_dict': { + 'id': 'ghost-files-season-1', + 'title': 'Ghost Files - Season 1' + } + }, + { + 'url': 'https://www.watchertv.com/are-you-scared', + 'note': 'Multi-season series with the season not in the url', + 'playlist_count': 3, + 'info_dict': { + 'id': 'are-you-scared-season-1', + 'title': 'Are You Scared - Season 1' + } + }, + { + 'url': 'https://www.watchertv.com/watcher-one-offs', + 'note': 'Single-season series', + 'playlist_count': 16, + 'info_dict': { + 'id': 'watcher-one-offs-season-1', + 'title': 'Watcher One Offs - Season 1' + } + } + ] + + def _fetch_page(self, url, season_id, page): + page += 1 + webpage = self._download_webpage( + f'{url}?page={page}', season_id, note=f'Downloading page {page}', expected_status={400}) + yield from [self.url_result(item_url, WatcherTVIE) for item_url in traverse_obj( + get_elements_html_by_class('browse-item-link', webpage), (..., {extract_attributes}, 'href'))] + + def _real_extract(self, url): + season_id = self._match_id(url) + season_num = self._match_valid_url(url).group('season') or 1 + season_title = season_id.replace('-', ' ').title() + + return self.playlist_result( + OnDemandPagedList(functools.partial(self._fetch_page, url, season_id), self._PAGE_SIZE), + f'{season_id}-season-{season_num}', f'{season_title} - Season {season_num}') From dd41cc4ade384e7de9961f4105c7b24c4cc2f98c Mon Sep 17 00:00:00 2001 From: McSwindler Date: Sun, 21 Apr 2024 08:51:10 -0500 Subject: [PATCH 2/4] [watchertv] update extractor to extend dropout instead of duplicating --- yt_dlp/extractor/dropout.py | 37 +++++------ yt_dlp/extractor/watchertv.py | 114 ++-------------------------------- 2 files changed, 24 insertions(+), 127 deletions(-) diff --git a/yt_dlp/extractor/dropout.py b/yt_dlp/extractor/dropout.py index 80ae6c126..b413da334 100644 --- a/yt_dlp/extractor/dropout.py +++ b/yt_dlp/extractor/dropout.py @@ -18,7 +18,7 @@ from ..utils import ( class DropoutIE(InfoExtractor): - _LOGIN_URL = 'https://www.dropout.tv/login' + _HOST = 'https://www.dropout.tv' _NETRC_MACHINE = 'dropout' _VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?:[^/]+/)*videos/(?P[^/]+)/?$' @@ -26,7 +26,7 @@ class DropoutIE(InfoExtractor): { 'url': 'https://www.dropout.tv/game-changer/season:2/videos/yes-or-no', 'note': 'Episode in a series', - 'md5': '5e000fdfd8d8fa46ff40456f1c2af04a', + 'md5': 'fc55805bac60b1ce2ffdc35fb9c51195', 'info_dict': { 'id': '738153', 'display_id': 'yes-or-no', @@ -48,22 +48,22 @@ class DropoutIE(InfoExtractor): 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] }, { - 'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1/videos/episode-1', + 'url': 'https://www.dropout.tv/ch-shorts/season:1/videos/post-apocalyptic-dane-cook', 'note': 'Episode in a series (missing release_date)', - 'md5': '712caf7c191f1c47c8f1879520c2fa5c', + 'md5': 'f260b8d7d0fdbaceae713c9196dac07f', 'info_dict': { - 'id': '320562', - 'display_id': 'episode-1', + 'id': '449042', + 'display_id': 'post-apocalyptic-dane-cook', 'ext': 'mp4', - 'title': 'The Beginning Begins', - 'description': 'The cast introduces their PCs, including a neurotic elf, a goblin PI, and a corn-worshipping cleric.', - 'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/4421ed0d-f630-4c88-9004-5251b2b8adfa.jpg', - 'series': 'Dimension 20: Fantasy High', + 'title': 'Post-Apocalyptic Dane Cook', + 'description': 'Dane Cook is back with his all new special. Don\'t worry, it\'s not the end of the world.', + 'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/5b0678df-d9c3-4864-b811-24db03072f4a.jpg', + 'series': 'CH Shorts', 'season_number': 1, 'season': 'Season 1', 'episode_number': 1, - 'episode': 'The Beginning Begins', - 'duration': 6838, + 'episode': 'Post-Apocalyptic Dane Cook', + 'duration': 135, 'uploader_id': 'user80538407', 'uploader_url': 'https://vimeo.com/user80538407', 'uploader': 'OTT Videos' @@ -73,7 +73,7 @@ class DropoutIE(InfoExtractor): { 'url': 'https://www.dropout.tv/videos/misfits-magic-holiday-special', 'note': 'Episode not in a series', - 'md5': 'c30fa18999c5880d156339f13c953a26', + 'md5': '147e0607bd877a791665c0b7219b512c', 'info_dict': { 'id': '1915774', 'display_id': 'misfits-magic-holiday-special', @@ -93,7 +93,7 @@ class DropoutIE(InfoExtractor): def _get_authenticity_token(self, display_id): signin_page = self._download_webpage( - self._LOGIN_URL, display_id, note='Getting authenticity token') + f'{self._HOST}/login', display_id, note='Getting authenticity token') return self._html_search_regex( r'name=["\']authenticity_token["\'] value=["\'](.+?)["\']', signin_page, 'authenticity_token') @@ -104,7 +104,7 @@ class DropoutIE(InfoExtractor): return True response = self._download_webpage( - self._LOGIN_URL, display_id, note='Logging in', fatal=False, + f'{self._HOST}/login', display_id, note='Logging in', fatal=False, data=urlencode_postdata({ 'email': username, 'password': password, @@ -125,7 +125,7 @@ class DropoutIE(InfoExtractor): display_id = self._match_id(url) webpage = None - if self._get_cookies('https://www.dropout.tv').get('_session'): + if self._get_cookies(self._HOST).get('_session'): webpage = self._download_webpage(url, display_id) if not webpage or '
[^\/$&?#]+)(?:/?$|/season:(?P[0-9]+)/?$)' + _VIDEO_IE = DropoutIE _TESTS = [ { 'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1', @@ -211,7 +212,7 @@ class DropoutSeasonIE(InfoExtractor): page += 1 webpage = self._download_webpage( f'{url}?page={page}', season_id, note=f'Downloading page {page}', expected_status={400}) - yield from [self.url_result(item_url, DropoutIE) for item_url in traverse_obj( + yield from [self.url_result(item_url, self._VIDEO_IE) for item_url in traverse_obj( get_elements_html_by_class('browse-item-link', webpage), (..., {extract_attributes}, 'href'))] def _real_extract(self, url): diff --git a/yt_dlp/extractor/watchertv.py b/yt_dlp/extractor/watchertv.py index 4d28576e5..9079e8e48 100644 --- a/yt_dlp/extractor/watchertv.py +++ b/yt_dlp/extractor/watchertv.py @@ -1,24 +1,8 @@ -import functools - -from .common import InfoExtractor -from .vimeo import VHXEmbedIE -from ..utils import ( - ExtractorError, - OnDemandPagedList, - clean_html, - extract_attributes, - get_element_by_class, - get_element_by_id, - get_elements_html_by_class, - int_or_none, - traverse_obj, - unified_strdate, - urlencode_postdata, -) +from .dropout import DropoutIE, DropoutSeasonIE -class WatcherTVIE(InfoExtractor): - _LOGIN_URL = 'https://www.watchertv.com/login' +class WatcherTVIE(DropoutIE): + _HOST = 'https://www.watchertv.com' _NETRC_MACHINE = 'watchertv' _VALID_URL = r'https?://(?:www\.)?watchertv\.com/(?:[^/]+/)*videos/(?P[^/]+)/?$' @@ -91,83 +75,11 @@ class WatcherTVIE(InfoExtractor): } ] - def _get_authenticity_token(self, display_id): - signin_page = self._download_webpage( - self._LOGIN_URL, display_id, note='Getting authenticity token') - return self._html_search_regex( - r'name=["\']authenticity_token["\'] value=["\'](.+?)["\']', - signin_page, 'authenticity_token') - def _login(self, display_id): - username, password = self._get_login_info() - if not username: - return True - - response = self._download_webpage( - self._LOGIN_URL, display_id, note='Logging in', fatal=False, - data=urlencode_postdata({ - 'email': username, - 'password': password, - 'authenticity_token': self._get_authenticity_token(display_id), - 'utf8': True - })) - - user_has_subscription = self._search_regex( - r'user_has_subscription:\s*["\'](.+?)["\']', response, 'subscription status', default='none') - if user_has_subscription.lower() == 'true': - return - elif user_has_subscription.lower() == 'false': - return 'Account is not subscribed' - else: - return 'Incorrect username/password' - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = None - if self._get_cookies('https://www.watchertv.com').get('_session'): - webpage = self._download_webpage(url, display_id) - if not webpage or '
[^\/$&?#]+)(?:/?$|/season:(?P[0-9]+)/?$)' + _VIDEO_IE = WatcherTVIE _TESTS = [ { 'url': 'https://www.watchertv.com/ghost-files/season:1', @@ -197,19 +109,3 @@ class WatcherTVSeasonIE(InfoExtractor): } } ] - - def _fetch_page(self, url, season_id, page): - page += 1 - webpage = self._download_webpage( - f'{url}?page={page}', season_id, note=f'Downloading page {page}', expected_status={400}) - yield from [self.url_result(item_url, WatcherTVIE) for item_url in traverse_obj( - get_elements_html_by_class('browse-item-link', webpage), (..., {extract_attributes}, 'href'))] - - def _real_extract(self, url): - season_id = self._match_id(url) - season_num = self._match_valid_url(url).group('season') or 1 - season_title = season_id.replace('-', ' ').title() - - return self.playlist_result( - OnDemandPagedList(functools.partial(self._fetch_page, url, season_id), self._PAGE_SIZE), - f'{season_id}-season-{season_num}', f'{season_title} - Season {season_num}') From 123ac3301c371f10d13755714d7a15bdea9ae04a Mon Sep 17 00:00:00 2001 From: McSwindler Date: Tue, 23 Apr 2024 22:48:18 -0500 Subject: [PATCH 3/4] [watchertv] create DropoutBase IEs for Dropout and WatcherTV to extend --- yt_dlp/extractor/dropout.py | 187 ++++++++++++++++++---------------- yt_dlp/extractor/watchertv.py | 7 +- 2 files changed, 100 insertions(+), 94 deletions(-) diff --git a/yt_dlp/extractor/dropout.py b/yt_dlp/extractor/dropout.py index b413da334..6e4f8bdde 100644 --- a/yt_dlp/extractor/dropout.py +++ b/yt_dlp/extractor/dropout.py @@ -17,79 +17,8 @@ from ..utils import ( ) -class DropoutIE(InfoExtractor): - _HOST = 'https://www.dropout.tv' - _NETRC_MACHINE = 'dropout' - - _VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?:[^/]+/)*videos/(?P[^/]+)/?$' - _TESTS = [ - { - 'url': 'https://www.dropout.tv/game-changer/season:2/videos/yes-or-no', - 'note': 'Episode in a series', - 'md5': 'fc55805bac60b1ce2ffdc35fb9c51195', - 'info_dict': { - 'id': '738153', - 'display_id': 'yes-or-no', - 'ext': 'mp4', - 'title': 'Yes or No', - 'description': 'Ally, Brennan, and Zac are asked a simple question, but is there a correct answer?', - 'release_date': '20200508', - 'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/351e3f24-c4a3-459a-8b79-dc80f1e5b7fd.jpg', - 'series': 'Game Changer', - 'season_number': 2, - 'season': 'Season 2', - 'episode_number': 6, - 'episode': 'Yes or No', - 'duration': 1180, - 'uploader_id': 'user80538407', - 'uploader_url': 'https://vimeo.com/user80538407', - 'uploader': 'OTT Videos' - }, - 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] - }, - { - 'url': 'https://www.dropout.tv/ch-shorts/season:1/videos/post-apocalyptic-dane-cook', - 'note': 'Episode in a series (missing release_date)', - 'md5': 'f260b8d7d0fdbaceae713c9196dac07f', - 'info_dict': { - 'id': '449042', - 'display_id': 'post-apocalyptic-dane-cook', - 'ext': 'mp4', - 'title': 'Post-Apocalyptic Dane Cook', - 'description': 'Dane Cook is back with his all new special. Don\'t worry, it\'s not the end of the world.', - 'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/5b0678df-d9c3-4864-b811-24db03072f4a.jpg', - 'series': 'CH Shorts', - 'season_number': 1, - 'season': 'Season 1', - 'episode_number': 1, - 'episode': 'Post-Apocalyptic Dane Cook', - 'duration': 135, - 'uploader_id': 'user80538407', - 'uploader_url': 'https://vimeo.com/user80538407', - 'uploader': 'OTT Videos' - }, - 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] - }, - { - 'url': 'https://www.dropout.tv/videos/misfits-magic-holiday-special', - 'note': 'Episode not in a series', - 'md5': '147e0607bd877a791665c0b7219b512c', - 'info_dict': { - 'id': '1915774', - 'display_id': 'misfits-magic-holiday-special', - 'ext': 'mp4', - 'title': 'Misfits & Magic Holiday Special', - 'description': 'The magical misfits spend Christmas break at Gowpenny, with an unwelcome visitor.', - 'release_date': '20211215', - 'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/d91ea8a6-b250-42ed-907e-b30fb1c65176-8e24b8e5.jpg', - 'duration': 11698, - 'uploader_id': 'user80538407', - 'uploader_url': 'https://vimeo.com/user80538407', - 'uploader': 'OTT Videos' - }, - 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] - } - ] +class DropoutBaseIE(InfoExtractor): + _HOST = None def _get_authenticity_token(self, display_id): signin_page = self._download_webpage( @@ -165,8 +94,102 @@ class DropoutIE(InfoExtractor): } -class DropoutSeasonIE(InfoExtractor): +class DropoutIE(DropoutBaseIE): + _HOST = 'https://www.dropout.tv' + _NETRC_MACHINE = 'dropout' + + _VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?:[^/]+/)*videos/(?P[^/]+)/?$' + _TESTS = [ + { + 'url': 'https://www.dropout.tv/game-changer/season:2/videos/yes-or-no', + 'note': 'Episode in a series', + 'md5': 'fc55805bac60b1ce2ffdc35fb9c51195', + 'info_dict': { + 'id': '738153', + 'display_id': 'yes-or-no', + 'ext': 'mp4', + 'title': 'Yes or No', + 'description': 'Ally, Brennan, and Zac are asked a simple question, but is there a correct answer?', + 'release_date': '20200508', + 'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/351e3f24-c4a3-459a-8b79-dc80f1e5b7fd.jpg', + 'series': 'Game Changer', + 'season_number': 2, + 'season': 'Season 2', + 'episode_number': 6, + 'episode': 'Yes or No', + 'duration': 1180, + 'uploader_id': 'user80538407', + 'uploader_url': 'https://vimeo.com/user80538407', + 'uploader': 'OTT Videos' + }, + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] + }, + { + 'url': 'https://www.dropout.tv/ch-shorts/season:1/videos/post-apocalyptic-dane-cook', + 'note': 'Episode in a series (missing release_date)', + 'md5': 'f260b8d7d0fdbaceae713c9196dac07f', + 'info_dict': { + 'id': '449042', + 'display_id': 'post-apocalyptic-dane-cook', + 'ext': 'mp4', + 'title': 'Post-Apocalyptic Dane Cook', + 'description': 'Dane Cook is back with his all new special. Don\'t worry, it\'s not the end of the world.', + 'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/5b0678df-d9c3-4864-b811-24db03072f4a.jpg', + 'series': 'CH Shorts', + 'season_number': 1, + 'season': 'Season 1', + 'episode_number': 1, + 'episode': 'Post-Apocalyptic Dane Cook', + 'duration': 135, + 'uploader_id': 'user80538407', + 'uploader_url': 'https://vimeo.com/user80538407', + 'uploader': 'OTT Videos' + }, + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] + }, + { + 'url': 'https://www.dropout.tv/videos/misfits-magic-holiday-special', + 'note': 'Episode not in a series', + 'md5': '147e0607bd877a791665c0b7219b512c', + 'info_dict': { + 'id': '1915774', + 'display_id': 'misfits-magic-holiday-special', + 'ext': 'mp4', + 'title': 'Misfits & Magic Holiday Special', + 'description': 'The magical misfits spend Christmas break at Gowpenny, with an unwelcome visitor.', + 'release_date': '20211215', + 'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/d91ea8a6-b250-42ed-907e-b30fb1c65176-8e24b8e5.jpg', + 'duration': 11698, + 'uploader_id': 'user80538407', + 'uploader_url': 'https://vimeo.com/user80538407', + 'uploader': 'OTT Videos' + }, + 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] + } + ] + + +class DropoutSeasonBaseIE(InfoExtractor): _PAGE_SIZE = 24 + + def _fetch_page(self, url, season_id, page): + page += 1 + webpage = self._download_webpage( + f'{url}?page={page}', season_id, note=f'Downloading page {page}', expected_status={400}) + yield from [self.url_result(item_url, self._VIDEO_IE) for item_url in traverse_obj( + get_elements_html_by_class('browse-item-link', webpage), (..., {extract_attributes}, 'href'))] + + def _real_extract(self, url): + season_id = self._match_id(url) + season_num = self._match_valid_url(url).group('season') or 1 + season_title = season_id.replace('-', ' ').title() + + return self.playlist_result( + OnDemandPagedList(functools.partial(self._fetch_page, url, season_id), self._PAGE_SIZE), + f'{season_id}-season-{season_num}', f'{season_title} - Season {season_num}') + + +class DropoutSeasonIE(DropoutSeasonBaseIE): _VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?P[^\/$&?#]+)(?:/?$|/season:(?P[0-9]+)/?$)' _VIDEO_IE = DropoutIE _TESTS = [ @@ -207,19 +230,3 @@ class DropoutSeasonIE(InfoExtractor): } } ] - - def _fetch_page(self, url, season_id, page): - page += 1 - webpage = self._download_webpage( - f'{url}?page={page}', season_id, note=f'Downloading page {page}', expected_status={400}) - yield from [self.url_result(item_url, self._VIDEO_IE) for item_url in traverse_obj( - get_elements_html_by_class('browse-item-link', webpage), (..., {extract_attributes}, 'href'))] - - def _real_extract(self, url): - season_id = self._match_id(url) - season_num = self._match_valid_url(url).group('season') or 1 - season_title = season_id.replace('-', ' ').title() - - return self.playlist_result( - OnDemandPagedList(functools.partial(self._fetch_page, url, season_id), self._PAGE_SIZE), - f'{season_id}-season-{season_num}', f'{season_title} - Season {season_num}') diff --git a/yt_dlp/extractor/watchertv.py b/yt_dlp/extractor/watchertv.py index 9079e8e48..a03100766 100644 --- a/yt_dlp/extractor/watchertv.py +++ b/yt_dlp/extractor/watchertv.py @@ -1,7 +1,7 @@ -from .dropout import DropoutIE, DropoutSeasonIE +from .dropout import DropoutBaseIE, DropoutSeasonBaseIE -class WatcherTVIE(DropoutIE): +class WatcherTVIE(DropoutBaseIE): _HOST = 'https://www.watchertv.com' _NETRC_MACHINE = 'watchertv' @@ -76,8 +76,7 @@ class WatcherTVIE(DropoutIE): ] -class WatcherTVSeasonIE(DropoutSeasonIE): - _PAGE_SIZE = 24 +class WatcherTVSeasonIE(DropoutSeasonBaseIE): _VALID_URL = r'https?://(?:www\.)?watchertv\.com/(?P[^\/$&?#]+)(?:/?$|/season:(?P[0-9]+)/?$)' _VIDEO_IE = WatcherTVIE _TESTS = [ From 31b11c339bec7dca12f3db955fee6d19d896de78 Mon Sep 17 00:00:00 2001 From: McSwindler Date: Wed, 24 Apr 2024 22:12:55 -0500 Subject: [PATCH 4/4] [watchertv] add comments for required fields Co-authored-by: pukkandan --- yt_dlp/extractor/dropout.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/dropout.py b/yt_dlp/extractor/dropout.py index 6e4f8bdde..e3cf8cc7b 100644 --- a/yt_dlp/extractor/dropout.py +++ b/yt_dlp/extractor/dropout.py @@ -18,7 +18,7 @@ from ..utils import ( class DropoutBaseIE(InfoExtractor): - _HOST = None + """Subclasses must define _HOST""" def _get_authenticity_token(self, display_id): signin_page = self._download_webpage( @@ -170,6 +170,7 @@ class DropoutIE(DropoutBaseIE): class DropoutSeasonBaseIE(InfoExtractor): + """Subclasses must define _VIDEO_IE""" _PAGE_SIZE = 24 def _fetch_page(self, url, season_id, page):