From 24870b8df041a8f797bbffd05672d90da2f34e3b Mon Sep 17 00:00:00 2001 From: Szpachlarz Date: Sat, 25 Mar 2023 22:24:29 +0100 Subject: [PATCH 1/3] [extractor/cda] Fix age restriction --- yt_dlp/extractor/cda.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/cda.py b/yt_dlp/extractor/cda.py index 1157114b2..a8e23fb30 100644 --- a/yt_dlp/extractor/cda.py +++ b/yt_dlp/extractor/cda.py @@ -16,7 +16,6 @@ from ..utils import ( merge_dicts, multipart_encode, parse_duration, - random_birthday, traverse_obj, try_call, try_get, @@ -90,11 +89,10 @@ class CDAIE(InfoExtractor): }] def _download_age_confirm_page(self, url, video_id, *args, **kwargs): - form_data = random_birthday('rok', 'miesiac', 'dzien') - form_data.update({'return': url, 'module': 'video', 'module_id': video_id}) + form_data = {'age_confirm': ''} data, content_type = multipart_encode(form_data) return self._download_webpage( - urljoin(url, '/a/validatebirth'), video_id, *args, + url, video_id, *args, data=data, headers={ 'Referer': url, 'Content-Type': content_type, @@ -209,7 +207,7 @@ class CDAIE(InfoExtractor): self.raise_geo_restricted() need_confirm_age = False - if self._html_search_regex(r'(]+action="[^"]*/a/validatebirth[^"]*")', + if self._html_search_regex(r'(]+name="[^"]*age_confirm[^"]*")', webpage, 'birthday validate form', default=None): webpage = self._download_age_confirm_page( url, video_id, note='Confirming age') From 3156c822f2ddd1cb4a96c066a4629e9b95324d3c Mon Sep 17 00:00:00 2001 From: Szpachlarz Date: Mon, 27 Mar 2023 00:04:08 +0200 Subject: [PATCH 2/3] [extractor/cda] Fix tests --- yt_dlp/extractor/cda.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/cda.py b/yt_dlp/extractor/cda.py index a8e23fb30..74ac89155 100644 --- a/yt_dlp/extractor/cda.py +++ b/yt_dlp/extractor/cda.py @@ -62,26 +62,28 @@ class CDAIE(InfoExtractor): 'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'crash404', - 'view_count': int, 'average_rating': float, 'duration': 137, 'age_limit': 0, + 'upload_date': '20160220', + 'timestamp': 1455968218, } }, { # Age-restricted - 'url': 'http://www.cda.pl/video/1273454c4', + 'url': 'https://www.cda.pl/video/12537327dd', 'info_dict': { - 'id': '1273454c4', + 'id': '12537327dd', 'ext': 'mp4', - 'title': 'Bronson (2008) napisy HD 1080p', - 'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c', + 'title': 'Egzorcysta (Showmax) - s01e06 - Utopce', + 'description': 'md5:0b256b7ea8f4a3f19af842500eaf49c5', 'height': 1080, - 'uploader': 'boniek61', + 'uploader': 'pan-pingwin', 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 5554, + 'duration': 786.0, 'age_limit': 18, - 'view_count': int, 'average_rating': float, + 'timestamp': 1669314139, + 'upload_date': '20221124', }, }, { 'url': 'http://ebd.cda.pl/0x0/5749950c', From 7445a558d0c07047f4c4b5bd741db61ea26f9c54 Mon Sep 17 00:00:00 2001 From: Szpachlarz Date: Mon, 27 Mar 2023 00:09:08 +0200 Subject: [PATCH 3/3] [extractor/cda] Remove view_count from web extract --- yt_dlp/extractor/cda.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/yt_dlp/extractor/cda.py b/yt_dlp/extractor/cda.py index 74ac89155..152268d7b 100644 --- a/yt_dlp/extractor/cda.py +++ b/yt_dlp/extractor/cda.py @@ -222,9 +222,6 @@ class CDAIE(InfoExtractor): (?:<\1[^>]*>[^<]*|(?!)(?:.|\n))*? <(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P[^<]+) ''', webpage, 'uploader', default=None, group='uploader') - view_count = self._search_regex( - r'OdsÅ‚ony:(?:\s| )*([0-9]+)', webpage, - 'view_count', default=None) average_rating = self._search_regex( (r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P[0-9.]+)', r']+\bclass=["\']rating["\'][^>]*>(?P[0-9.]+)'), webpage, 'rating', fatal=False, @@ -235,7 +232,6 @@ class CDAIE(InfoExtractor): 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), 'uploader': uploader, - 'view_count': int_or_none(view_count), 'average_rating': float_or_none(average_rating), 'thumbnail': self._og_search_thumbnail(webpage), 'formats': formats,