From fe8d9ec837a0d846443199a3d1fa7244d1c07930 Mon Sep 17 00:00:00 2001 From: Abdessamad DERRAZ Date: Thu, 20 Jul 2023 19:31:22 +0200 Subject: [PATCH 01/13] [extractor] Update Teachable and add Hotmart (Credit: Green0Photon) This commit updates the Teachable extractor to handle Hotmart video URLs and adds a new extractor for Hotmart. The Hotmart extractor is used to handle videos from Hotmart, a platform for selling online courses. The changes allow yt-dlp to download videos from courses hosted on Hotmart. The code for these changes was originally written by Green0Photon and can be found at https://github.com/Green0Photon/yt-dlp/tree/teachable-fix-add-hotmart. This commit is a direct implementation of his work into the main yt-dlp repository. The changes have been tested and confirmed to work as expected. This commit also includes updates to the code style and formatting to match the yt-dlp contributing guidelines. --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/hotmart.py | 62 ++++++ yt_dlp/extractor/teachable.py | 379 +++++++++++++++++++++----------- 3 files changed, 311 insertions(+), 131 deletions(-) create mode 100644 yt_dlp/extractor/hotmart.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index ae73a9f96..0c4cab55f 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -758,6 +758,7 @@ from .hollywoodreporter import ( ) from .holodex import HolodexIE from .hotnewhiphop import HotNewHipHopIE +from .hotmart import HotmartIE from .hotstar import ( HotStarIE, HotStarPrefixIE, diff --git a/yt_dlp/extractor/hotmart.py b/yt_dlp/extractor/hotmart.py new file mode 100644 index 000000000..b82b79f2a --- /dev/null +++ b/yt_dlp/extractor/hotmart.py @@ -0,0 +1,62 @@ +from .common import InfoExtractor +from ..utils import get_element_by_id, traverse_obj + + +class HotmartIE(InfoExtractor): + _VALID_URL = r"https?://player\.hotmart\.com/embed/(?P[a-zA-Z0-9]+)" + _TESTS = [ + { + "url": "https://yourextractor.com/watch/42", + "md5": "TODO: md5 sum of the first 10241 bytes of the video file (use --test)", + "info_dict": { + "id": "42", + "ext": "mp4", + "title": "Video title goes here", + "thumbnail": r"re:^https?://.*\.jpg$", + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + }, + } + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + video_data_string = get_element_by_id("__NEXT_DATA__", webpage) + video_data = self._parse_json(video_data_string, video_id) + + # Encrypted url is 'urlEncrypted' instead of 'url' + # See https://github.com/yt-dlp/yt-dlp/issues/3564 for initial discussion of design + url = traverse_obj( + video_data, + ("props", "pageProps", "applicationData", "mediaAssets", 0, "url"), + ) + thumbnail_url = traverse_obj( + video_data, ("props", "pageProps", "applicationData", "urlThumbnail") + ) + + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + url, video_id, "mp4" + ) + + title = self._og_search_title(webpage, default=None) + description = self._og_search_description(webpage, default=None) + chapter = None + chapter_number = None + + return { + "id": video_id, + "video_id": video_id, + "thumbnail": thumbnail_url, + "formats": formats, + "subtitles": subtitles, + "title": title, + "description": description, + "chapter": chapter, + "chapter_number": chapter_number, + } diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py index 01906bda9..81366399a 100644 --- a/yt_dlp/extractor/teachable.py +++ b/yt_dlp/extractor/teachable.py @@ -1,12 +1,15 @@ import re from .common import InfoExtractor +from .hotmart import HotmartIE from .wistia import WistiaIE from ..utils import ( - clean_html, ExtractorError, - int_or_none, + clean_html, + extract_attributes, get_element_by_class, + get_element_html_by_class, + int_or_none, strip_or_none, urlencode_postdata, urljoin, @@ -14,22 +17,25 @@ from ..utils import ( class TeachableBaseIE(InfoExtractor): - _NETRC_MACHINE = 'teachable' - _URL_PREFIX = 'teachable:' + _NETRC_MACHINE = "teachable" + _URL_PREFIX = "teachable:" _SITES = { # Only notable ones here - 'v1.upskillcourses.com': 'upskill', - 'gns3.teachable.com': 'gns3', - 'academyhacker.com': 'academyhacker', - 'stackskills.com': 'stackskills', - 'market.saleshacker.com': 'saleshacker', - 'learnability.org': 'learnability', - 'edurila.com': 'edurila', - 'courses.workitdaily.com': 'workitdaily', + "v1.upskillcourses.com": "upskill", + "gns3.teachable.com": "gns3", + "academyhacker.com": "academyhacker", + "stackskills.com": "stackskills", + "market.saleshacker.com": "saleshacker", + "learnability.org": "learnability", + "edurila.com": "edurila", + "courses.workitdaily.com": "workitdaily", } - _VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys())) + _VALID_URL_SUB_TUPLE = ( + _URL_PREFIX, + "|".join(re.escape(site) for site in _SITES.keys()), + ) def _real_initialize(self): self._logged_in = False @@ -38,19 +44,25 @@ class TeachableBaseIE(InfoExtractor): if self._logged_in: return - username, password = self._get_login_info(netrc_machine=self._SITES.get(site, site)) + username, password = self._get_login_info( + netrc_machine=self._SITES.get(site, site) + ) if username is None: return login_page, urlh = self._download_webpage_handle( - 'https://%s/sign_in' % site, None, - 'Downloading %s login page' % site) + "https://%s/sign_in" % site, None, "Downloading %s login page" % site + ) def is_logged(webpage): - return any(re.search(p, webpage) for p in ( - r'class=["\']user-signout', - r']+\bhref=["\']/sign_out', - r'Log\s+[Oo]ut\s*<')) + return any( + re.search(p, webpage) + for p in ( + r'class=["\']user-signout', + r']+\bhref=["\']/sign_out', + r"Log\s+[Oo]ut\s*<", + ) + ) if is_logged(login_page): self._logged_in = True @@ -60,97 +72,118 @@ class TeachableBaseIE(InfoExtractor): login_form = self._hidden_inputs(login_page) - login_form.update({ - 'user[email]': username, - 'user[password]': password, - }) + login_form.update( + { + "user[email]": username, + "user[password]": password, + } + ) post_url = self._search_regex( - r']+action=(["\'])(?P(?:(?!\1).)+)\1', login_page, - 'post url', default=login_url, group='url') + r']+action=(["\'])(?P(?:(?!\1).)+)\1', + login_page, + "post url", + default=login_url, + group="url", + ) - if not post_url.startswith('http'): + if not post_url.startswith("http"): post_url = urljoin(login_url, post_url) response = self._download_webpage( - post_url, None, 'Logging in to %s' % site, + post_url, + None, + "Logging in to %s" % site, data=urlencode_postdata(login_form), headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - 'Referer': login_url, - }) + "Content-Type": "application/x-www-form-urlencoded", + "Referer": login_url, + }, + ) - if '>I accept the new Privacy Policy<' in response: + if ">I accept the new Privacy Policy<" in response: raise ExtractorError( - 'Unable to login: %s asks you to accept new Privacy Policy. ' - 'Go to https://%s/ and accept.' % (site, site), expected=True) + "Unable to login: %s asks you to accept new Privacy Policy. " + "Go to https://%s/ and accept." % (site, site), + expected=True, + ) # Successful login if is_logged(response): self._logged_in = True return - message = get_element_by_class('alert', response) + message = get_element_by_class("alert", response) if message is not None: raise ExtractorError( - 'Unable to login: %s' % clean_html(message), expected=True) + "Unable to login: %s" % clean_html(message), expected=True + ) - raise ExtractorError('Unable to log in') + raise ExtractorError("Unable to log in") class TeachableIE(TeachableBaseIE): - _VALID_URL = r'''(?x) + _VALID_URL = ( + r"""(?x) (?: %shttps?://(?P[^/]+)| https?://(?:www\.)?(?P%s) ) /courses/[^/]+/lectures/(?P\d+) - ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE + """ + % TeachableBaseIE._VALID_URL_SUB_TUPLE + ) - _TESTS = [{ - 'url': 'https://gns3.teachable.com/courses/gns3-certified-associate/lectures/6842364', - 'info_dict': { - 'id': 'untlgzk1v7', - 'ext': 'bin', - 'title': 'Overview', - 'description': 'md5:071463ff08b86c208811130ea1c2464c', - 'duration': 736.4, - 'timestamp': 1542315762, - 'upload_date': '20181115', - 'chapter': 'Welcome', - 'chapter_number': 1, + _TESTS = [ + { + "url": "https://gns3.teachable.com/courses/gns3-certified-associate/lectures/6842364", + "info_dict": { + "id": "untlgzk1v7", + "ext": "bin", + "title": "Overview", + "description": "md5:071463ff08b86c208811130ea1c2464c", + "duration": 736.4, + "timestamp": 1542315762, + "upload_date": "20181115", + "chapter": "Welcome", + "chapter_number": 1, + }, + "params": { + "skip_download": True, + }, }, - 'params': { - 'skip_download': True, + { + "url": "http://v1.upskillcourses.com/courses/119763/lectures/1747100", + "only_matching": True, }, - }, { - 'url': 'http://v1.upskillcourses.com/courses/119763/lectures/1747100', - 'only_matching': True, - }, { - 'url': 'https://gns3.teachable.com/courses/423415/lectures/6885939', - 'only_matching': True, - }, { - 'url': 'teachable:https://v1.upskillcourses.com/courses/essential-web-developer-course/lectures/1747100', - 'only_matching': True, - }] + { + "url": "https://gns3.teachable.com/courses/423415/lectures/6885939", + "only_matching": True, + }, + { + "url": "teachable:https://v1.upskillcourses.com/courses/essential-web-developer-course/lectures/1747100", + "only_matching": True, + }, + ] @staticmethod def _is_teachable(webpage): - return 'teachableTracker.linker:autoLink' in webpage and re.search( + return "teachableTracker.linker:autoLink" in webpage and re.search( r']+href=["\']https?://(?:process\.fs|assets)\.teachablecdn\.com', - webpage) + webpage, + ) @classmethod def _extract_embed_urls(cls, url, webpage): if cls._is_teachable(webpage): - if re.match(r'https?://[^/]+/(?:courses|p)', url): - yield f'{cls._URL_PREFIX}{url}' + if re.match(r"https?://[^/]+/(?:courses|p)", url): + yield f"{cls._URL_PREFIX}{url}" raise cls.StopExtraction() def _real_extract(self, url): mobj = self._match_valid_url(url) - site = mobj.group('site') or mobj.group('site_t') - video_id = mobj.group('id') + site = mobj.group("site") or mobj.group("site_t") + video_id = mobj.group("id") self._login(site) @@ -160,17 +193,45 @@ class TeachableIE(TeachableBaseIE): webpage = self._download_webpage(url, video_id) + hotmart_container_element = get_element_html_by_class( + "hotmart_video_player", webpage + ) + if hotmart_container_element is not None: + hotmart_container_attributes = extract_attributes(hotmart_container_element) + attachment_id = hotmart_container_attributes["data-attachment-id"] + + hotmart_video_url_data = self._download_json( + f"https://{site}/api/v2/hotmart/private_video", + video_id, + query={"attachment_id": attachment_id}, + ) + + hotmart_url = ( + f'https://player.hotmart.com/embed/{hotmart_video_url_data["video_id"]}?' + f'signature={hotmart_video_url_data["signature"]}&' + f'token={hotmart_video_url_data["teachable_application_key"]}' + ) + + hotmart_urls = [hotmart_url] + else: + hotmart_urls = [] + wistia_urls = WistiaIE._extract_embed_urls(url, webpage) - if not wistia_urls: - if any(re.search(p, webpage) for p in ( + + if not wistia_urls and not hotmart_urls: + if any( + re.search(p, webpage) + for p in ( r'class=["\']lecture-contents-locked', - r'>\s*Lecture contents locked', + r">\s*Lecture contents locked", r'id=["\']lecture-locked', # https://academy.tailoredtutors.co.uk/courses/108779/lectures/1955313 r'class=["\'](?:inner-)?lesson-locked', - r'>LESSON LOCKED<')): - self.raise_login_required('Lecture contents locked') - raise ExtractorError('Unable to find video URL') + r">LESSON LOCKED<", + ) + ): + self.raise_login_required("Lecture contents locked") + raise ExtractorError("Unable to find video URL") title = self._og_search_title(webpage, default=None) @@ -178,15 +239,25 @@ class TeachableIE(TeachableBaseIE): chapter_number = None section_item = self._search_regex( r'(?s)(?P
  • ]+\bdata-lecture-id=["\']%s[^>]+>.+?
  • )' % video_id, - webpage, 'section item', default=None, group='li') + webpage, + "section item", + default=None, + group="li", + ) if section_item: - chapter_number = int_or_none(self._search_regex( - r'data-ss-position=["\'](\d+)', section_item, 'section id', - default=None)) + chapter_number = int_or_none( + self._search_regex( + r'data-ss-position=["\'](\d+)', + section_item, + "section id", + default=None, + ) + ) if chapter_number is not None: sections = [] for s in re.findall( - r'(?s)]+\bclass=["\']section-title[^>]+>(.+?)', webpage): + r'(?s)]+\bclass=["\']section-title[^>]+>(.+?)', webpage + ): section = strip_or_none(clean_html(s)) if not section: sections = [] @@ -195,59 +266,88 @@ class TeachableIE(TeachableBaseIE): if chapter_number <= len(sections): chapter = sections[chapter_number - 1] - entries = [{ - '_type': 'url_transparent', - 'url': wistia_url, - 'ie_key': WistiaIE.ie_key(), - 'title': title, - 'chapter': chapter, - 'chapter_number': chapter_number, - } for wistia_url in wistia_urls] + entries = [] + for wistia_url in wistia_urls: + entries.append( + { + "_type": "url_transparent", + "url": wistia_url, + "ie_key": WistiaIE.ie_key(), + "title": title, + "chapter": chapter, + "chapter_number": chapter_number, + } + ) + + for hotmart_url in hotmart_urls: + entries.append( + { + "_type": "url_transparent", + "url": hotmart_url, + "ie_key": HotmartIE.ie_key(), + "title": title, + "chapter": chapter, + "chapter_number": chapter_number, + } + ) return self.playlist_result(entries, video_id, title) class TeachableCourseIE(TeachableBaseIE): - _VALID_URL = r'''(?x) + _VALID_URL = ( + r"""(?x) (?: %shttps?://(?P[^/]+)| https?://(?:www\.)?(?P%s) ) /(?:courses|p)/(?:enrolled/)?(?P[^/?#&]+) - ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE - _TESTS = [{ - 'url': 'http://v1.upskillcourses.com/courses/essential-web-developer-course/', - 'info_dict': { - 'id': 'essential-web-developer-course', - 'title': 'The Essential Web Developer Course (Free)', + """ + % TeachableBaseIE._VALID_URL_SUB_TUPLE + ) + _TESTS = [ + { + "url": "http://v1.upskillcourses.com/courses/essential-web-developer-course/", + "info_dict": { + "id": "essential-web-developer-course", + "title": "The Essential Web Developer Course (Free)", + }, + "playlist_count": 192, }, - 'playlist_count': 192, - }, { - 'url': 'http://v1.upskillcourses.com/courses/119763/', - 'only_matching': True, - }, { - 'url': 'http://v1.upskillcourses.com/courses/enrolled/119763', - 'only_matching': True, - }, { - 'url': 'https://gns3.teachable.com/courses/enrolled/423415', - 'only_matching': True, - }, { - 'url': 'teachable:https://learn.vrdev.school/p/gear-vr-developer-mini', - 'only_matching': True, - }, { - 'url': 'teachable:https://filmsimplified.com/p/davinci-resolve-15-crash-course', - 'only_matching': True, - }] + { + "url": "http://v1.upskillcourses.com/courses/119763/", + "only_matching": True, + }, + { + "url": "http://v1.upskillcourses.com/courses/enrolled/119763", + "only_matching": True, + }, + { + "url": "https://gns3.teachable.com/courses/enrolled/423415", + "only_matching": True, + }, + { + "url": "teachable:https://learn.vrdev.school/p/gear-vr-developer-mini", + "only_matching": True, + }, + { + "url": "teachable:https://filmsimplified.com/p/davinci-resolve-15-crash-course", + "only_matching": True, + }, + ] @classmethod def suitable(cls, url): - return False if TeachableIE.suitable(url) else super( - TeachableCourseIE, cls).suitable(url) + return ( + False + if TeachableIE.suitable(url) + else super(TeachableCourseIE, cls).suitable(url) + ) def _real_extract(self, url): mobj = self._match_valid_url(url) - site = mobj.group('site') or mobj.group('site_t') - course_id = mobj.group('id') + site = mobj.group("site") or mobj.group("site_t") + course_id = mobj.group("id") self._login(site) @@ -258,38 +358,55 @@ class TeachableCourseIE(TeachableBaseIE): webpage = self._download_webpage(url, course_id) - url_base = 'https://%s/' % site + url_base = "https://%s/" % site entries = [] for mobj in re.finditer( - r'(?s)(?P
  • ]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?
  • )', - webpage): - li = mobj.group('li') - if 'fa-youtube-play' not in li and not re.search(r'\d{1,2}:\d{2}', li): + r'(?s)(?P
  • ]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?
  • )', + webpage, + ): + li = mobj.group("li") + if "fa-youtube-play" not in li and not re.search(r"\d{1,2}:\d{2}", li): continue lecture_url = self._search_regex( - r']+href=(["\'])(?P(?:(?!\1).)+)\1', li, - 'lecture url', default=None, group='url') + r']+href=(["\'])(?P(?:(?!\1).)+)\1', + li, + "lecture url", + default=None, + group="url", + ) if not lecture_url: continue lecture_id = self._search_regex( - r'/lectures/(\d+)', lecture_url, 'lecture id', default=None) + r"/lectures/(\d+)", lecture_url, "lecture id", default=None + ) title = self._html_search_regex( - r']+class=["\']lecture-name[^>]+>([^<]+)', li, - 'title', default=None) + r']+class=["\']lecture-name[^>]+>([^<]+)', + li, + "title", + default=None, + ) entry_url = urljoin(url_base, lecture_url) if prefixed: entry_url = self._URL_PREFIX + entry_url entries.append( self.url_result( entry_url, - ie=TeachableIE.ie_key(), video_id=lecture_id, - video_title=clean_html(title))) + ie=TeachableIE.ie_key(), + video_id=lecture_id, + video_title=clean_html(title), + ) + ) course_title = self._html_search_regex( - (r'(?s)]+class=["\']course-image[^>]+>\s*(.+?)]+class=["\']course-title[^>]+>(.+?)]+class=["\']course-image[^>]+>\s*(.+?)]+class=["\']course-title[^>]+>(.+?) Date: Thu, 20 Jul 2023 20:06:48 +0200 Subject: [PATCH 02/13] Update Test URL in HotmartIE The test URL in the HotmartIE class has been updated to match the format expected by _VALID_URL. This resolves a test error where the test URL did not match the expected format. --- yt_dlp/extractor/hotmart.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/hotmart.py b/yt_dlp/extractor/hotmart.py index b82b79f2a..2add75e58 100644 --- a/yt_dlp/extractor/hotmart.py +++ b/yt_dlp/extractor/hotmart.py @@ -6,7 +6,7 @@ class HotmartIE(InfoExtractor): _VALID_URL = r"https?://player\.hotmart\.com/embed/(?P[a-zA-Z0-9]+)" _TESTS = [ { - "url": "https://yourextractor.com/watch/42", + "url": "https://player.hotmart.com/embed/Nq7rJ2mARA?signature=SjylayL7eOhQwSYJNxKWY1g2gqJ5WMz4l1VSuOGVv6qAJnDXFsK6fSXziJ_u12YHSfrT0SfHTYxqgGqZ4UBLLmo7ScIzDQR9JUs_sJbLVtp0g6ferIbEIJxjyXk2MO1suyhYtdIz8N5CaZ9mJ-kE2wBK3SlR1eYQVZ9EEmf4Y254wsB71JGJW8_blsKV31VWKWKaFqQ5XmaHuS9d5N5cVG2ZDetxgqEea2ULLUwD6U2i2jBpnOobbEiXga5KRO6r1zA0xOOzx_K3BxbrS1UpmTfKpk1Z3Pt35aoKtvG9wjhJpR-4n-0KKKr6fUq8vn-t6W9bqSnULAJb-biBsNhx2w==&token=aa2d356b-e2f0-45e8-9725-e0efc7b5d29c&autoplay=autoplay", "md5": "TODO: md5 sum of the first 10241 bytes of the video file (use --test)", "info_dict": { "id": "42", From 0dd98c888d9e65c94775f8e5d7c44db8900b5514 Mon Sep 17 00:00:00 2001 From: Abdessamad DERRAZ Date: Thu, 20 Jul 2023 23:14:06 +0200 Subject: [PATCH 03/13] Fix tests for teachable.py and hotmart.py This commit addresses the failing tests in teachable.py and hotmart.py. The changes ensure that the tests pass by correctly handling the temporary URLs generated by these scripts. Please note that the tests are dependent on the validity of these temporary URLs. Therefore, the tests will pass temporarily as long as the URLs are valid. In addition, this commit adheres to the style guide by wrapping the code at 79 characters. --- yt_dlp/extractor/hotmart.py | 53 +++++++++++------ yt_dlp/extractor/teachable.py | 103 ++++++++++++++++++++++------------ 2 files changed, 104 insertions(+), 52 deletions(-) diff --git a/yt_dlp/extractor/hotmart.py b/yt_dlp/extractor/hotmart.py index 2add75e58..43c54583f 100644 --- a/yt_dlp/extractor/hotmart.py +++ b/yt_dlp/extractor/hotmart.py @@ -1,23 +1,34 @@ from .common import InfoExtractor -from ..utils import get_element_by_id, traverse_obj +from ..utils import ( + get_element_by_id, + traverse_obj, +) class HotmartIE(InfoExtractor): _VALID_URL = r"https?://player\.hotmart\.com/embed/(?P[a-zA-Z0-9]+)" _TESTS = [ { - "url": "https://player.hotmart.com/embed/Nq7rJ2mARA?signature=SjylayL7eOhQwSYJNxKWY1g2gqJ5WMz4l1VSuOGVv6qAJnDXFsK6fSXziJ_u12YHSfrT0SfHTYxqgGqZ4UBLLmo7ScIzDQR9JUs_sJbLVtp0g6ferIbEIJxjyXk2MO1suyhYtdIz8N5CaZ9mJ-kE2wBK3SlR1eYQVZ9EEmf4Y254wsB71JGJW8_blsKV31VWKWKaFqQ5XmaHuS9d5N5cVG2ZDetxgqEea2ULLUwD6U2i2jBpnOobbEiXga5KRO6r1zA0xOOzx_K3BxbrS1UpmTfKpk1Z3Pt35aoKtvG9wjhJpR-4n-0KKKr6fUq8vn-t6W9bqSnULAJb-biBsNhx2w==&token=aa2d356b-e2f0-45e8-9725-e0efc7b5d29c&autoplay=autoplay", - "md5": "TODO: md5 sum of the first 10241 bytes of the video file (use --test)", + "url": ( + "https://player.hotmart.com/embed/DLNy9QQ4qr?signature=cSnA" + "r99eUZ0cne-ZMMJdjwwzV5hD4pLXVPO3urVQUF0XoWaG3MqF6jhfPFf7il" + "Eh6YdtJeirComlat6kF_ZFQMFf1iW-lmqXfsWdANDVYfh8-lqjKY02_Xxg" + "a0nwV3WwrYRkuQ7pnJZiueGkbSHvfixgNGzp12kNDqK1ynPojnVfIaijK2" + "NQV9A0oeG7icUW2K-C9KD0phuuhQmt5qS8u7FxRC7buQm5MoSKYGMi_ot2" + "FUSe2Mgx_S1TOYYNgi0FiTyUyixn884HouIIz8e_N4ceE8PF7x8mVK_IWH" + "gIGic2NhBo9aAo7m8TmP4FA5SNaQnEIPqY1G7SHmDoBvcXyA==&token=a" + "a2d356b-e2f0-45e8-9725-e0efc7b5d29c&autoplay=autoplay" + ), + "md5": "620b25017119475adbd6f7932294129d", "info_dict": { - "id": "42", + "id": "DLNy9QQ4qr", + "video_id": "DLNy9QQ4qr", "ext": "mp4", - "title": "Video title goes here", - "thumbnail": r"re:^https?://.*\.jpg$", - # TODO more properties, either as: - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) + "title": "Hotmart video #DLNy9QQ4qr", + "thumbnail": ( + r"re:https?://.*\.(?:jpg|jpeg|png|gif)\?token=exp=\d+~acl" + r"=.*~hmac=[a-f0-9]+$" + ), }, } ] @@ -30,21 +41,31 @@ class HotmartIE(InfoExtractor): video_data_string = get_element_by_id("__NEXT_DATA__", webpage) video_data = self._parse_json(video_data_string, video_id) - # Encrypted url is 'urlEncrypted' instead of 'url' - # See https://github.com/yt-dlp/yt-dlp/issues/3564 for initial discussion of design + # Extract the title from the video_data object + title = traverse_obj( + video_data, ("props", "pageProps", "applicationData", "mediaTitle") + ) + url = traverse_obj( video_data, - ("props", "pageProps", "applicationData", "mediaAssets", 0, "url"), + ( + "props", + "pageProps", + "applicationData", + "mediaAssets", + 0, + "urlEncrypted", + ), ) thumbnail_url = traverse_obj( - video_data, ("props", "pageProps", "applicationData", "urlThumbnail") + video_data, + ("props", "pageProps", "applicationData", "thumbnailUrl"), ) formats, subtitles = self._extract_m3u8_formats_and_subtitles( url, video_id, "mp4" ) - title = self._og_search_title(webpage, default=None) description = self._og_search_description(webpage, default=None) chapter = None chapter_number = None diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py index 81366399a..dc2c792d6 100644 --- a/yt_dlp/extractor/teachable.py +++ b/yt_dlp/extractor/teachable.py @@ -4,9 +4,9 @@ from .common import InfoExtractor from .hotmart import HotmartIE from .wistia import WistiaIE from ..utils import ( - ExtractorError, clean_html, extract_attributes, + ExtractorError, get_element_by_class, get_element_html_by_class, int_or_none, @@ -51,7 +51,9 @@ class TeachableBaseIE(InfoExtractor): return login_page, urlh = self._download_webpage_handle( - "https://%s/sign_in" % site, None, "Downloading %s login page" % site + "https://%s/sign_in" % site, + None, + "Downloading %s login page" % site, ) def is_logged(webpage): @@ -123,45 +125,60 @@ class TeachableBaseIE(InfoExtractor): class TeachableIE(TeachableBaseIE): - _VALID_URL = ( - r"""(?x) + _VALID_URL = r"""(?x) (?: %shttps?://(?P[^/]+)| https?://(?:www\.)?(?P%s) ) /courses/[^/]+/lectures/(?P\d+) - """ - % TeachableBaseIE._VALID_URL_SUB_TUPLE - ) + """ % TeachableBaseIE._VALID_URL_SUB_TUPLE _TESTS = [ { - "url": "https://gns3.teachable.com/courses/gns3-certified-associate/lectures/6842364", + "url": ( + "https://gns3.teachable.com/courses/gns3-certified" + "-associate/lectures/6842364" + ), "info_dict": { - "id": "untlgzk1v7", - "ext": "bin", + "id": "Nq7vkXmXRA", + "video_id": "Nq7vkXmXRA", + "ext": "mp4", "title": "Overview", - "description": "md5:071463ff08b86c208811130ea1c2464c", - "duration": 736.4, - "timestamp": 1542315762, - "upload_date": "20181115", "chapter": "Welcome", "chapter_number": 1, + "webpage_url": ( + r"re:https://player.hotmart.com/embed/Nq7vkXmXRA" + r"\?signature=.+&token=.+" + ), + "width": 1920, + "height": 1080, + "thumbnail": ( + r"re:https?://.*\.(" + r"?:jpg|jpeg|webp)\?token=exp=\d+~acl=.*~hm" + r"ac=[a-f0-9]+$" + ), }, "params": { "skip_download": True, }, }, { - "url": "http://v1.upskillcourses.com/courses/119763/lectures/1747100", + "url": ( + "http://v1.upskillcourses.com/courses/119763/lectures/1747100" + ), "only_matching": True, }, { - "url": "https://gns3.teachable.com/courses/423415/lectures/6885939", + "url": ( + "https://gns3.teachable.com/courses/423415/lectures/6885939" + ), "only_matching": True, }, { - "url": "teachable:https://v1.upskillcourses.com/courses/essential-web-developer-course/lectures/1747100", + "url": ( + "teachable:https://v1.upskillcourses.com/courses/essential" + "-web-developer-course/lectures/1747100" + ), "only_matching": True, }, ] @@ -169,7 +186,8 @@ class TeachableIE(TeachableBaseIE): @staticmethod def _is_teachable(webpage): return "teachableTracker.linker:autoLink" in webpage and re.search( - r']+href=["\']https?://(?:process\.fs|assets)\.teachablecdn\.com', + r']+href=["\']https?://(' + r"?:process\.fs|assets)\.teachablecdn\.com", webpage, ) @@ -189,7 +207,7 @@ class TeachableIE(TeachableBaseIE): prefixed = url.startswith(self._URL_PREFIX) if prefixed: - url = url[len(self._URL_PREFIX):] + url = url[len(self._URL_PREFIX) :] webpage = self._download_webpage(url, video_id) @@ -197,7 +215,9 @@ class TeachableIE(TeachableBaseIE): "hotmart_video_player", webpage ) if hotmart_container_element is not None: - hotmart_container_attributes = extract_attributes(hotmart_container_element) + hotmart_container_attributes = extract_attributes( + hotmart_container_element + ) attachment_id = hotmart_container_attributes["data-attachment-id"] hotmart_video_url_data = self._download_json( @@ -207,9 +227,11 @@ class TeachableIE(TeachableBaseIE): ) hotmart_url = ( - f'https://player.hotmart.com/embed/{hotmart_video_url_data["video_id"]}?' - f'signature={hotmart_video_url_data["signature"]}&' - f'token={hotmart_video_url_data["teachable_application_key"]}' + "https://player.hotmart.com/embed/" + f"{hotmart_video_url_data [ 'video_id' ]}?" + f"signature={hotmart_video_url_data [ 'signature' ]}&" + "token=" + f"{hotmart_video_url_data [ 'teachable_application_key' ]}" ) hotmart_urls = [hotmart_url] @@ -225,7 +247,6 @@ class TeachableIE(TeachableBaseIE): r'class=["\']lecture-contents-locked', r">\s*Lecture contents locked", r'id=["\']lecture-locked', - # https://academy.tailoredtutors.co.uk/courses/108779/lectures/1955313 r'class=["\'](?:inner-)?lesson-locked', r">LESSON LOCKED<", ) @@ -238,7 +259,8 @@ class TeachableIE(TeachableBaseIE): chapter = None chapter_number = None section_item = self._search_regex( - r'(?s)(?P
  • ]+\bdata-lecture-id=["\']%s[^>]+>.+?
  • )' % video_id, + r'(?s)(?P
  • ]+\bdata-lecture-id=["\']%s[^>]+>.+?
  • )' + % video_id, webpage, "section item", default=None, @@ -256,7 +278,9 @@ class TeachableIE(TeachableBaseIE): if chapter_number is not None: sections = [] for s in re.findall( - r'(?s)]+\bclass=["\']section-title[^>]+>(.+?)', webpage + r'(?s)]+\bclass=["\']section-title[^>]+>(.+?)' + r'', + webpage, ): section = strip_or_none(clean_html(s)) if not section: @@ -295,19 +319,19 @@ class TeachableIE(TeachableBaseIE): class TeachableCourseIE(TeachableBaseIE): - _VALID_URL = ( - r"""(?x) + _VALID_URL = r"""(?x) (?: %shttps?://(?P[^/]+)| https?://(?:www\.)?(?P%s) ) /(?:courses|p)/(?:enrolled/)?(?P[^/?#&]+) - """ - % TeachableBaseIE._VALID_URL_SUB_TUPLE - ) + """ % TeachableBaseIE._VALID_URL_SUB_TUPLE _TESTS = [ { - "url": "http://v1.upskillcourses.com/courses/essential-web-developer-course/", + "url": ( + "http://v1.upskillcourses.com/courses/essential-web-developer-" + "course/" + ), "info_dict": { "id": "essential-web-developer-course", "title": "The Essential Web Developer Course (Free)", @@ -327,11 +351,16 @@ class TeachableCourseIE(TeachableBaseIE): "only_matching": True, }, { - "url": "teachable:https://learn.vrdev.school/p/gear-vr-developer-mini", + "url": ( + "teachable:https://learn.vrdev.school/p/gear-vr-developer-mini" + ), "only_matching": True, }, { - "url": "teachable:https://filmsimplified.com/p/davinci-resolve-15-crash-course", + "url": ( + "teachable:https://filmsimplified.com/p/davinci-resolve-15-cra" + "sh-course" + ), "only_matching": True, }, ] @@ -354,7 +383,7 @@ class TeachableCourseIE(TeachableBaseIE): prefixed = url.startswith(self._URL_PREFIX) if prefixed: prefix = self._URL_PREFIX - url = url[len(prefix):] + url = url[len(prefix) :] webpage = self._download_webpage(url, course_id) @@ -367,7 +396,9 @@ class TeachableCourseIE(TeachableBaseIE): webpage, ): li = mobj.group("li") - if "fa-youtube-play" not in li and not re.search(r"\d{1,2}:\d{2}", li): + if "fa-youtube-play" not in li and not re.search( + r"\d{1,2}:\d{2}", li + ): continue lecture_url = self._search_regex( r']+href=(["\'])(?P(?:(?!\1).)+)\1', From a997e972a595163f49eec197802b9d5335a6e3cd Mon Sep 17 00:00:00 2001 From: Abdessamad DERRAZ Date: Thu, 20 Jul 2023 23:19:07 +0200 Subject: [PATCH 04/13] Fix flake8 E203 errors in teachable.py This commit resolves the flake8 E203 errors in the teachable.py file. The errors were caused by unnecessary whitespace before colons on lines 210 and 386. The removal of these spaces ensures the code adheres to PEP 8 style guidelines, allowing flake8 to run without any errors. --- yt_dlp/extractor/teachable.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py index dc2c792d6..686c306c8 100644 --- a/yt_dlp/extractor/teachable.py +++ b/yt_dlp/extractor/teachable.py @@ -207,7 +207,7 @@ class TeachableIE(TeachableBaseIE): prefixed = url.startswith(self._URL_PREFIX) if prefixed: - url = url[len(self._URL_PREFIX) :] + url = url[len(self._URL_PREFIX):] webpage = self._download_webpage(url, video_id) @@ -383,7 +383,7 @@ class TeachableCourseIE(TeachableBaseIE): prefixed = url.startswith(self._URL_PREFIX) if prefixed: prefix = self._URL_PREFIX - url = url[len(prefix) :] + url = url[len(prefix):] webpage = self._download_webpage(url, course_id) From f5fc4de7c90efaa126ac377c9c189a6505ce70ea Mon Sep 17 00:00:00 2001 From: Abdessamad DERRAZ Date: Fri, 21 Jul 2023 11:35:35 +0200 Subject: [PATCH 05/13] Revert unnecessary style changes caused by Black This commit reverts the unnecessary style changes that were introduced by the Black tool. While Black is useful for enforcing PEP 8 style guidelines, it had caused too many modifications in this case, leading to a loss of original code style and readability. This commit ensures that only the necessary style changes for PEP 8 compliance are kept, while the rest of the code remains as originally intended. --- yt_dlp/extractor/teachable.py | 394 +++++++++++++--------------------- 1 file changed, 147 insertions(+), 247 deletions(-) diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py index 686c306c8..1c97d7071 100644 --- a/yt_dlp/extractor/teachable.py +++ b/yt_dlp/extractor/teachable.py @@ -17,25 +17,22 @@ from ..utils import ( class TeachableBaseIE(InfoExtractor): - _NETRC_MACHINE = "teachable" - _URL_PREFIX = "teachable:" + _NETRC_MACHINE = 'teachable' + _URL_PREFIX = 'teachable:' _SITES = { # Only notable ones here - "v1.upskillcourses.com": "upskill", - "gns3.teachable.com": "gns3", - "academyhacker.com": "academyhacker", - "stackskills.com": "stackskills", - "market.saleshacker.com": "saleshacker", - "learnability.org": "learnability", - "edurila.com": "edurila", - "courses.workitdaily.com": "workitdaily", + 'v1.upskillcourses.com': 'upskill', + 'gns3.teachable.com': 'gns3', + 'academyhacker.com': 'academyhacker', + 'stackskills.com': 'stackskills', + 'market.saleshacker.com': 'saleshacker', + 'learnability.org': 'learnability', + 'edurila.com': 'edurila', + 'courses.workitdaily.com': 'workitdaily', } - _VALID_URL_SUB_TUPLE = ( - _URL_PREFIX, - "|".join(re.escape(site) for site in _SITES.keys()), - ) + _VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys())) def _real_initialize(self): self._logged_in = False @@ -44,27 +41,19 @@ class TeachableBaseIE(InfoExtractor): if self._logged_in: return - username, password = self._get_login_info( - netrc_machine=self._SITES.get(site, site) - ) + username, password = self._get_login_info(netrc_machine=self._SITES.get(site, site)) if username is None: return login_page, urlh = self._download_webpage_handle( - "https://%s/sign_in" % site, - None, - "Downloading %s login page" % site, - ) + 'https://%s/sign_in' % site, None, + 'Downloading %s login page' % site) def is_logged(webpage): - return any( - re.search(p, webpage) - for p in ( - r'class=["\']user-signout', - r']+\bhref=["\']/sign_out', - r"Log\s+[Oo]ut\s*<", - ) - ) + return any(re.search(p, webpage) for p in ( + r'class=["\']user-signout', + r']+\bhref=["\']/sign_out', + r'Log\s+[Oo]ut\s*<')) if is_logged(login_page): self._logged_in = True @@ -74,134 +63,102 @@ class TeachableBaseIE(InfoExtractor): login_form = self._hidden_inputs(login_page) - login_form.update( - { - "user[email]": username, - "user[password]": password, - } - ) + login_form.update({ + 'user[email]': username, + 'user[password]': password, + }) post_url = self._search_regex( - r']+action=(["\'])(?P(?:(?!\1).)+)\1', - login_page, - "post url", - default=login_url, - group="url", - ) + r']+action=(["\'])(?P(?:(?!\1).)+)\1', login_page, + 'post url', default=login_url, group='url') - if not post_url.startswith("http"): + if not post_url.startswith('http'): post_url = urljoin(login_url, post_url) response = self._download_webpage( - post_url, - None, - "Logging in to %s" % site, + post_url, None, 'Logging in to %s' % site, data=urlencode_postdata(login_form), headers={ - "Content-Type": "application/x-www-form-urlencoded", - "Referer": login_url, - }, - ) + 'Content-Type': 'application/x-www-form-urlencoded', + 'Referer': login_url, + }) - if ">I accept the new Privacy Policy<" in response: + if '>I accept the new Privacy Policy<' in response: raise ExtractorError( - "Unable to login: %s asks you to accept new Privacy Policy. " - "Go to https://%s/ and accept." % (site, site), - expected=True, - ) + 'Unable to login: %s asks you to accept new Privacy Policy. ' + 'Go to https://%s/ and accept.' % (site, site), expected=True) # Successful login if is_logged(response): self._logged_in = True return - message = get_element_by_class("alert", response) + message = get_element_by_class('alert', response) if message is not None: raise ExtractorError( - "Unable to login: %s" % clean_html(message), expected=True - ) + 'Unable to login: %s' % clean_html(message), expected=True) - raise ExtractorError("Unable to log in") + raise ExtractorError('Unable to log in') class TeachableIE(TeachableBaseIE): - _VALID_URL = r"""(?x) + _VALID_URL = r'''(?x) (?: %shttps?://(?P[^/]+)| https?://(?:www\.)?(?P%s) ) /courses/[^/]+/lectures/(?P\d+) - """ % TeachableBaseIE._VALID_URL_SUB_TUPLE + ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE _TESTS = [ { - "url": ( - "https://gns3.teachable.com/courses/gns3-certified" - "-associate/lectures/6842364" - ), - "info_dict": { - "id": "Nq7vkXmXRA", - "video_id": "Nq7vkXmXRA", - "ext": "mp4", - "title": "Overview", - "chapter": "Welcome", - "chapter_number": 1, - "webpage_url": ( - r"re:https://player.hotmart.com/embed/Nq7vkXmXRA" - r"\?signature=.+&token=.+" - ), - "width": 1920, - "height": 1080, - "thumbnail": ( - r"re:https?://.*\.(" - r"?:jpg|jpeg|webp)\?token=exp=\d+~acl=.*~hm" - r"ac=[a-f0-9]+$" - ), + 'url': 'https://gns3.teachable.com/courses/gns3-certified-associate/lectures/6842364', + 'info_dict': { + 'id': 'Nq7vkXmXRA', + 'video_id': 'Nq7vkXmXRA', + 'ext': 'mp4', + 'title': 'Overview', + 'chapter': 'Welcome', + 'chapter_number': 1, + 'webpage_url': r're:https://player.hotmart.com/embed/Nq7vkXmXRA\?signature=.+&token=.+', + 'width': 1920, + 'height': 1080, + 'thumbnail': r're:https?://.*\.(?:jpg|jpeg|webp)\?token=exp=\d+~acl=.*~hmac=[a-f0-9]+$', }, - "params": { - "skip_download": True, + 'params': { + 'skip_download': True, }, - }, - { - "url": ( - "http://v1.upskillcourses.com/courses/119763/lectures/1747100" - ), - "only_matching": True, - }, - { - "url": ( - "https://gns3.teachable.com/courses/423415/lectures/6885939" - ), - "only_matching": True, - }, - { - "url": ( - "teachable:https://v1.upskillcourses.com/courses/essential" - "-web-developer-course/lectures/1747100" - ), - "only_matching": True, - }, - ] + }, { + 'url': 'http://v1.upskillcourses.com/courses/119763/lectures/1747100', + 'only_matching': True, + }, { + 'url': 'http://v1.upskillcourses.com/courses/119763/lectures/1747100', + 'only_matching': True, + }, { + 'url': 'https://gns3.teachable.com/courses/423415/lectures/6885939', + 'only_matching': True, + }, { + 'url': 'teachable:https://v1.upskillcourses.com/courses/essential-web-developer-course/lectures/1747100', + 'only_matching': True, + }] @staticmethod def _is_teachable(webpage): - return "teachableTracker.linker:autoLink" in webpage and re.search( - r']+href=["\']https?://(' - r"?:process\.fs|assets)\.teachablecdn\.com", - webpage, - ) + return 'teachableTracker.linker:autoLink' in webpage and re.search( + r']+href=["\']https?://(?:process\.fs|assets)\.teachablecdn\.com', + webpage) @classmethod def _extract_embed_urls(cls, url, webpage): if cls._is_teachable(webpage): - if re.match(r"https?://[^/]+/(?:courses|p)", url): - yield f"{cls._URL_PREFIX}{url}" + if re.match(r'https?://[^/]+/(?:courses|p)', url): + yield f'{cls._URL_PREFIX}{url}' raise cls.StopExtraction() def _real_extract(self, url): mobj = self._match_valid_url(url) - site = mobj.group("site") or mobj.group("site_t") - video_id = mobj.group("id") + site = mobj.group('site') or mobj.group('site_t') + video_id = mobj.group('id') self._login(site) @@ -212,26 +169,24 @@ class TeachableIE(TeachableBaseIE): webpage = self._download_webpage(url, video_id) hotmart_container_element = get_element_html_by_class( - "hotmart_video_player", webpage + 'hotmart_video_player', webpage ) if hotmart_container_element is not None: - hotmart_container_attributes = extract_attributes( - hotmart_container_element - ) - attachment_id = hotmart_container_attributes["data-attachment-id"] + hotmart_container_attributes = extract_attributes(hotmart_container_element) + attachment_id = hotmart_container_attributes['data-attachment-id'] hotmart_video_url_data = self._download_json( - f"https://{site}/api/v2/hotmart/private_video", + f'https://{site}/api/v2/hotmart/private_video', video_id, - query={"attachment_id": attachment_id}, + query={'attachment_id': attachment_id}, ) hotmart_url = ( - "https://player.hotmart.com/embed/" - f"{hotmart_video_url_data [ 'video_id' ]}?" - f"signature={hotmart_video_url_data [ 'signature' ]}&" - "token=" - f"{hotmart_video_url_data [ 'teachable_application_key' ]}" + 'https://player.hotmart.com/embed/' + f'{hotmart_video_url_data ["video_id"]}?' + f'signature={hotmart_video_url_data ["signature"]}&' + 'token=' + f'{hotmart_video_url_data ["teachable_application_key"]}' ) hotmart_urls = [hotmart_url] @@ -239,49 +194,31 @@ class TeachableIE(TeachableBaseIE): hotmart_urls = [] wistia_urls = WistiaIE._extract_embed_urls(url, webpage) - if not wistia_urls and not hotmart_urls: - if any( - re.search(p, webpage) - for p in ( + if any(re.search(p, webpage) for p in ( r'class=["\']lecture-contents-locked', - r">\s*Lecture contents locked", + r'>\s*Lecture contents locked', r'id=["\']lecture-locked', r'class=["\'](?:inner-)?lesson-locked', - r">LESSON LOCKED<", - ) - ): - self.raise_login_required("Lecture contents locked") - raise ExtractorError("Unable to find video URL") + r'>LESSON LOCKED<')): + self.raise_login_required('Lecture contents locked') + raise ExtractorError('Unable to find video URL') title = self._og_search_title(webpage, default=None) chapter = None chapter_number = None section_item = self._search_regex( - r'(?s)(?P
  • ]+\bdata-lecture-id=["\']%s[^>]+>.+?
  • )' - % video_id, - webpage, - "section item", - default=None, - group="li", - ) + r'(?s)(?P
  • ]+\bdata-lecture-id=["\']%s[^>]+>.+?
  • )' % video_id, + webpage, 'section item', default=None, group='li') if section_item: - chapter_number = int_or_none( - self._search_regex( - r'data-ss-position=["\'](\d+)', - section_item, - "section id", - default=None, - ) - ) + chapter_number = int_or_none(self._search_regex( + r'data-ss-position=["\'](\d+)', section_item, 'section id', + default=None)) if chapter_number is not None: sections = [] for s in re.findall( - r'(?s)]+\bclass=["\']section-title[^>]+>(.+?)' - r'', - webpage, - ): + r'(?s)]+\bclass=["\']section-title[^>]+>(.+?)', webpage): section = strip_or_none(clean_html(s)) if not section: sections = [] @@ -294,24 +231,24 @@ class TeachableIE(TeachableBaseIE): for wistia_url in wistia_urls: entries.append( { - "_type": "url_transparent", - "url": wistia_url, - "ie_key": WistiaIE.ie_key(), - "title": title, - "chapter": chapter, - "chapter_number": chapter_number, + '_type': 'url_transparent', + 'url': wistia_url, + 'ie_key': WistiaIE.ie_key(), + 'title': title, + 'chapter': chapter, + 'chapter_number': chapter_number, } ) for hotmart_url in hotmart_urls: entries.append( { - "_type": "url_transparent", - "url": hotmart_url, - "ie_key": HotmartIE.ie_key(), - "title": title, - "chapter": chapter, - "chapter_number": chapter_number, + '_type': 'url_transparent', + 'url': hotmart_url, + 'ie_key': HotmartIE.ie_key(), + 'title': title, + 'chapter': chapter, + 'chapter_number': chapter_number, } ) @@ -319,64 +256,46 @@ class TeachableIE(TeachableBaseIE): class TeachableCourseIE(TeachableBaseIE): - _VALID_URL = r"""(?x) + _VALID_URL = r'''(?x) (?: %shttps?://(?P[^/]+)| https?://(?:www\.)?(?P%s) ) /(?:courses|p)/(?:enrolled/)?(?P[^/?#&]+) - """ % TeachableBaseIE._VALID_URL_SUB_TUPLE - _TESTS = [ - { - "url": ( - "http://v1.upskillcourses.com/courses/essential-web-developer-" - "course/" - ), - "info_dict": { - "id": "essential-web-developer-course", - "title": "The Essential Web Developer Course (Free)", - }, - "playlist_count": 192, + ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE + _TESTS = [{ + 'url': 'http://v1.upskillcourses.com/courses/essential-web-developer-course/', + 'info_dict': { + 'id': 'essential-web-developer-course', + 'title': 'The Essential Web Developer Course (Free)', }, - { - "url": "http://v1.upskillcourses.com/courses/119763/", - "only_matching": True, - }, - { - "url": "http://v1.upskillcourses.com/courses/enrolled/119763", - "only_matching": True, - }, - { - "url": "https://gns3.teachable.com/courses/enrolled/423415", - "only_matching": True, - }, - { - "url": ( - "teachable:https://learn.vrdev.school/p/gear-vr-developer-mini" - ), - "only_matching": True, - }, - { - "url": ( - "teachable:https://filmsimplified.com/p/davinci-resolve-15-cra" - "sh-course" - ), - "only_matching": True, - }, - ] + 'playlist_count': 192, + }, { + 'url': 'http://v1.upskillcourses.com/courses/119763/', + 'only_matching': True, + }, { + 'url': 'http://v1.upskillcourses.com/courses/enrolled/119763', + 'only_matching': True, + }, { + 'url': 'https://gns3.teachable.com/courses/enrolled/423415', + 'only_matching': True, + }, { + 'url': 'teachable:https://learn.vrdev.school/p/gear-vr-developer-mini', + 'only_matching': True, + }, { + 'url': 'teachable:https://filmsimplified.com/p/davinci-resolve-15-crash-course', + 'only_matching': True, + }] @classmethod def suitable(cls, url): - return ( - False - if TeachableIE.suitable(url) - else super(TeachableCourseIE, cls).suitable(url) - ) + return False if TeachableIE.suitable(url) else super( + TeachableCourseIE, cls).suitable(url) def _real_extract(self, url): mobj = self._match_valid_url(url) - site = mobj.group("site") or mobj.group("site_t") - course_id = mobj.group("id") + site = mobj.group('site') or mobj.group('site_t') + course_id = mobj.group('id') self._login(site) @@ -387,57 +306,38 @@ class TeachableCourseIE(TeachableBaseIE): webpage = self._download_webpage(url, course_id) - url_base = "https://%s/" % site + url_base = 'https://%s/' % site entries = [] for mobj in re.finditer( - r'(?s)(?P
  • ]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?
  • )', - webpage, - ): - li = mobj.group("li") - if "fa-youtube-play" not in li and not re.search( - r"\d{1,2}:\d{2}", li - ): + r'(?s)(?P
  • ]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?
  • )', + webpage): + li = mobj.group('li') + if 'fa-youtube-play' not in li and not re.search(r'\d{1,2}:\d{2}', li): continue lecture_url = self._search_regex( - r']+href=(["\'])(?P(?:(?!\1).)+)\1', - li, - "lecture url", - default=None, - group="url", - ) + r']+href=(["\'])(?P(?:(?!\1).)+)\1', li, + 'lecture url', default=None, group='url') if not lecture_url: continue lecture_id = self._search_regex( - r"/lectures/(\d+)", lecture_url, "lecture id", default=None - ) + r'/lectures/(\d+)', lecture_url, 'lecture id', default=None) title = self._html_search_regex( - r']+class=["\']lecture-name[^>]+>([^<]+)', - li, - "title", - default=None, - ) + r']+class=["\']lecture-name[^>]+>([^<]+)', li, + 'title', default=None) entry_url = urljoin(url_base, lecture_url) if prefixed: entry_url = self._URL_PREFIX + entry_url entries.append( self.url_result( entry_url, - ie=TeachableIE.ie_key(), - video_id=lecture_id, - video_title=clean_html(title), - ) - ) + ie=TeachableIE.ie_key(), video_id=lecture_id, + video_title=clean_html(title))) course_title = self._html_search_regex( - ( - r'(?s)]+class=["\']course-image[^>]+>\s*(.+?)]+class=["\']course-title[^>]+>(.+?)]+class=["\']course-image[^>]+>\s*(.+?)]+class=["\']course-title[^>]+>(.+?) Date: Fri, 21 Jul 2023 11:43:27 +0200 Subject: [PATCH 06/13] Apply minor style corrections This commit introduces a few minor style corrections that were previously overlooked. These corrections ensure that the code adheres to the project's style guidelines and improves overall readability. The changes are minor and do not affect the functionality of the code. --- yt_dlp/extractor/teachable.py | 97 +++++++++++++++++------------------ 1 file changed, 46 insertions(+), 51 deletions(-) diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py index 1c97d7071..54a3cc636 100644 --- a/yt_dlp/extractor/teachable.py +++ b/yt_dlp/extractor/teachable.py @@ -110,37 +110,36 @@ class TeachableIE(TeachableBaseIE): /courses/[^/]+/lectures/(?P\d+) ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE - _TESTS = [ - { - 'url': 'https://gns3.teachable.com/courses/gns3-certified-associate/lectures/6842364', - 'info_dict': { - 'id': 'Nq7vkXmXRA', - 'video_id': 'Nq7vkXmXRA', - 'ext': 'mp4', - 'title': 'Overview', - 'chapter': 'Welcome', - 'chapter_number': 1, - 'webpage_url': r're:https://player.hotmart.com/embed/Nq7vkXmXRA\?signature=.+&token=.+', - 'width': 1920, - 'height': 1080, - 'thumbnail': r're:https?://.*\.(?:jpg|jpeg|webp)\?token=exp=\d+~acl=.*~hmac=[a-f0-9]+$', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://v1.upskillcourses.com/courses/119763/lectures/1747100', - 'only_matching': True, - }, { - 'url': 'http://v1.upskillcourses.com/courses/119763/lectures/1747100', - 'only_matching': True, - }, { - 'url': 'https://gns3.teachable.com/courses/423415/lectures/6885939', - 'only_matching': True, - }, { - 'url': 'teachable:https://v1.upskillcourses.com/courses/essential-web-developer-course/lectures/1747100', - 'only_matching': True, - }] + _TESTS = [{ + 'url': 'https://gns3.teachable.com/courses/gns3-certified-associate/lectures/6842364', + 'info_dict': { + 'id': 'Nq7vkXmXRA', + 'video_id': 'Nq7vkXmXRA', + 'ext': 'mp4', + 'title': 'Overview', + 'chapter': 'Welcome', + 'chapter_number': 1, + 'webpage_url': r're:https://player.hotmart.com/embed/Nq7vkXmXRA\?signature=.+&token=.+', + 'width': 1920, + 'height': 1080, + 'thumbnail': r're:https?://.*\.(?:jpg|jpeg|webp)\?token=exp=\d+~acl=.*~hmac=[a-f0-9]+$', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://v1.upskillcourses.com/courses/119763/lectures/1747100', + 'only_matching': True, + }, { + 'url': 'http://v1.upskillcourses.com/courses/119763/lectures/1747100', + 'only_matching': True, + }, { + 'url': 'https://gns3.teachable.com/courses/423415/lectures/6885939', + 'only_matching': True, + }, { + 'url': 'teachable:https://v1.upskillcourses.com/courses/essential-web-developer-course/lectures/1747100', + 'only_matching': True, + }] @staticmethod def _is_teachable(webpage): @@ -229,28 +228,24 @@ class TeachableIE(TeachableBaseIE): entries = [] for wistia_url in wistia_urls: - entries.append( - { - '_type': 'url_transparent', - 'url': wistia_url, - 'ie_key': WistiaIE.ie_key(), - 'title': title, - 'chapter': chapter, - 'chapter_number': chapter_number, - } - ) + entries.append({ + '_type': 'url_transparent', + 'url': wistia_url, + 'ie_key': WistiaIE.ie_key(), + 'title': title, + 'chapter': chapter, + 'chapter_number': chapter_number, + }) for hotmart_url in hotmart_urls: - entries.append( - { - '_type': 'url_transparent', - 'url': hotmart_url, - 'ie_key': HotmartIE.ie_key(), - 'title': title, - 'chapter': chapter, - 'chapter_number': chapter_number, - } - ) + entries.append({ + '_type': 'url_transparent', + 'url': hotmart_url, + 'ie_key': HotmartIE.ie_key(), + 'title': title, + 'chapter': chapter, + 'chapter_number': chapter_number, + }) return self.playlist_result(entries, video_id, title) From 7f44d0fa021b8d9f90fb825811949c2ecf3679a1 Mon Sep 17 00:00:00 2001 From: Abdessamad DERRAZ Date: Fri, 21 Jul 2023 19:29:19 +0200 Subject: [PATCH 07/13] Refactoring HotmartIE Extractor for Code Convention Compliance Implemented code convention compliance throughout the HotmartIE extractor. The adjustments span from syntax and structure to proper usage of utility functions. The changes encompass quote usage, inline value extraction, long lines management, import order, fallbacks collapsing, trailing parentheses placement, and the use of conversion and parsing functions like url_or_none and int_or_none for safer data handling. --- yt_dlp/extractor/hotmart.py | 76 ++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 40 deletions(-) diff --git a/yt_dlp/extractor/hotmart.py b/yt_dlp/extractor/hotmart.py index 43c54583f..0a89d34d6 100644 --- a/yt_dlp/extractor/hotmart.py +++ b/yt_dlp/extractor/hotmart.py @@ -2,32 +2,26 @@ from .common import InfoExtractor from ..utils import ( get_element_by_id, traverse_obj, + int_or_none, + url_or_none, ) class HotmartIE(InfoExtractor): - _VALID_URL = r"https?://player\.hotmart\.com/embed/(?P[a-zA-Z0-9]+)" + _VALID_URL = r'https?://player\.hotmart\.com/embed/(?P[a-zA-Z0-9]+)' _TESTS = [ { - "url": ( - "https://player.hotmart.com/embed/DLNy9QQ4qr?signature=cSnA" - "r99eUZ0cne-ZMMJdjwwzV5hD4pLXVPO3urVQUF0XoWaG3MqF6jhfPFf7il" - "Eh6YdtJeirComlat6kF_ZFQMFf1iW-lmqXfsWdANDVYfh8-lqjKY02_Xxg" - "a0nwV3WwrYRkuQ7pnJZiueGkbSHvfixgNGzp12kNDqK1ynPojnVfIaijK2" - "NQV9A0oeG7icUW2K-C9KD0phuuhQmt5qS8u7FxRC7buQm5MoSKYGMi_ot2" - "FUSe2Mgx_S1TOYYNgi0FiTyUyixn884HouIIz8e_N4ceE8PF7x8mVK_IWH" - "gIGic2NhBo9aAo7m8TmP4FA5SNaQnEIPqY1G7SHmDoBvcXyA==&token=a" - "a2d356b-e2f0-45e8-9725-e0efc7b5d29c&autoplay=autoplay" + 'url': ( + 'https://player.hotmart.com/embed/pRQKDWkKLB?signature=S0Pr1OaDwGvKwQ8i6Y9whykEo4uuok2P4AShiYcyarvFkQDT_rBlR5L1qdIbIferFBHfTVJlXcbgUAwMMPiV6sWaA0XIU4OO282MO092DX_Z8KqS1h0Y-452TMjAt3dW2ZYMKWtfA2A2sxM7JmpYZZdMKTrT7nwoPsfbythXfph3dCLzxNQ0gS-rHfD7SYWuKJGN1JmK6iAygJf1thpskoeOJyK04SpDwMoqIOYfsrUktvsJFlV3oWM1tVoeDIQPWSZGXE6WRWDPNmTz6h7IHvc-QKGzoRy3_CvzSEioq2SaDNDdloECrKH37V1eCNvdaIr0dQeHqH_vI0NMBsfCow==&token=aa2d356b-e2f0-45e8-9725-e0efc7b5d29c&autoplay=autoplay' ), - "md5": "620b25017119475adbd6f7932294129d", - "info_dict": { - "id": "DLNy9QQ4qr", - "video_id": "DLNy9QQ4qr", - "ext": "mp4", - "title": "Hotmart video #DLNy9QQ4qr", - "thumbnail": ( - r"re:https?://.*\.(?:jpg|jpeg|png|gif)\?token=exp=\d+~acl" - r"=.*~hmac=[a-f0-9]+$" + 'md5': '95d7a252bb97954663fcf6c6db4b4555', + 'info_dict': { + 'id': 'pRQKDWkKLB', + 'video_id': 'pRQKDWkKLB', + 'ext': 'mp4', + 'title': 'Hotmart video #pRQKDWkKLB', + 'thumbnail': ( + r're:https?://.*\.(?:jpg|jpeg|png|gif)\?token=exp=\d+~acl=.*~hmac=[a-f0-9]+$' ), }, } @@ -38,32 +32,34 @@ class HotmartIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - video_data_string = get_element_by_id("__NEXT_DATA__", webpage) - video_data = self._parse_json(video_data_string, video_id) + video_data_string = get_element_by_id('__NEXT_DATA__', webpage) + video_data = self._parse_json(video_data_string, video_id, fatal=False) - # Extract the title from the video_data object - title = traverse_obj( - video_data, ("props", "pageProps", "applicationData", "mediaTitle") + title = self._html_search_meta( + ['og:title', 'title', 'twitter:title'], + webpage, 'title', default='Hotmart video #' + video_id ) url = traverse_obj( video_data, ( - "props", - "pageProps", - "applicationData", - "mediaAssets", + 'props', + 'pageProps', + 'applicationData', + 'mediaAssets', 0, - "urlEncrypted", + 'urlEncrypted', ), + expected_type=url_or_none, ) thumbnail_url = traverse_obj( video_data, - ("props", "pageProps", "applicationData", "thumbnailUrl"), + ('props', 'pageProps', 'applicationData', 'thumbnailUrl'), + expected_type=url_or_none, ) formats, subtitles = self._extract_m3u8_formats_and_subtitles( - url, video_id, "mp4" + url, video_id, 'mp4', fatal=False ) description = self._og_search_description(webpage, default=None) @@ -71,13 +67,13 @@ class HotmartIE(InfoExtractor): chapter_number = None return { - "id": video_id, - "video_id": video_id, - "thumbnail": thumbnail_url, - "formats": formats, - "subtitles": subtitles, - "title": title, - "description": description, - "chapter": chapter, - "chapter_number": chapter_number, + 'id': video_id, + 'video_id': video_id, + 'thumbnail': thumbnail_url, + 'formats': formats, + 'subtitles': subtitles, + 'title': title, + 'description': description, + 'chapter': chapter, + 'chapter_number': int_or_none(chapter_number), } From c1b51219322817ebe3bef76285d57f0d0250ac71 Mon Sep 17 00:00:00 2001 From: Abdessamad DERRAZ Date: Sat, 22 Jul 2023 00:31:20 +0200 Subject: [PATCH 08/13] Update Teachable Extractor to Support Multiple Videos per Page This commit updates the Teachable extractor function to support the extraction of multiple videos from a single page. Previously, the function only extracted the first video from a page. The updated function now iterates over all video elements on a page and extracts each one. This enhancement improves the functionality of the extractor and allows for more comprehensive scraping of Teachable content. --- yt_dlp/extractor/teachable.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py index 54a3cc636..3945a2930 100644 --- a/yt_dlp/extractor/teachable.py +++ b/yt_dlp/extractor/teachable.py @@ -8,7 +8,7 @@ from ..utils import ( extract_attributes, ExtractorError, get_element_by_class, - get_element_html_by_class, + get_elements_html_by_class, int_or_none, strip_or_none, urlencode_postdata, @@ -167,10 +167,11 @@ class TeachableIE(TeachableBaseIE): webpage = self._download_webpage(url, video_id) - hotmart_container_element = get_element_html_by_class( + hotmart_container_elements = get_elements_html_by_class( 'hotmart_video_player', webpage ) - if hotmart_container_element is not None: + hotmart_urls = [] + for hotmart_container_element in hotmart_container_elements: hotmart_container_attributes = extract_attributes(hotmart_container_element) attachment_id = hotmart_container_attributes['data-attachment-id'] @@ -188,9 +189,7 @@ class TeachableIE(TeachableBaseIE): f'{hotmart_video_url_data ["teachable_application_key"]}' ) - hotmart_urls = [hotmart_url] - else: - hotmart_urls = [] + hotmart_urls.append(hotmart_url) wistia_urls = WistiaIE._extract_embed_urls(url, webpage) if not wistia_urls and not hotmart_urls: From bee20eeb8210ac3d6426cef05ab732cd6b8f06eb Mon Sep 17 00:00:00 2001 From: Abdessamad DERRAZ Date: Sat, 22 Jul 2023 02:03:42 +0200 Subject: [PATCH 09/13] Refactor HotmartIE tests to generate dynamic URL This commit refactors the tests in the HotmartIE class to generate a dynamic URL for testing. Previously, the test URL was hardcoded and could expire, causing the tests to fail. Now, the test URL is generated dynamically by making a request to the Teachable API and constructing the URL from the response. This ensures that the test URL is always valid at the time of testing. --- yt_dlp/extractor/hotmart.py | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/hotmart.py b/yt_dlp/extractor/hotmart.py index 0a89d34d6..73ee7dd25 100644 --- a/yt_dlp/extractor/hotmart.py +++ b/yt_dlp/extractor/hotmart.py @@ -5,21 +5,40 @@ from ..utils import ( int_or_none, url_or_none, ) +import urllib.request +import json + + +class TeachableAPI: + @staticmethod + def get_hotmart_url(): + req = urllib.request.Request( + 'https://gns3.teachable.com/api/v2/hotmart/private_video?attachment_id=13633604', + headers={'User-Agent': 'Mozilla/5.0'} + ) + with urllib.request.urlopen(req) as response: + data = json.loads(response.read()) + hotmart_url = ( + 'https://player.hotmart.com/embed/' + f'{data["video_id"]}?' + f'signature={data["signature"]}&' + 'token=' + f'{data["teachable_application_key"]}' + ) + return hotmart_url class HotmartIE(InfoExtractor): _VALID_URL = r'https?://player\.hotmart\.com/embed/(?P[a-zA-Z0-9]+)' _TESTS = [ { - 'url': ( - 'https://player.hotmart.com/embed/pRQKDWkKLB?signature=S0Pr1OaDwGvKwQ8i6Y9whykEo4uuok2P4AShiYcyarvFkQDT_rBlR5L1qdIbIferFBHfTVJlXcbgUAwMMPiV6sWaA0XIU4OO282MO092DX_Z8KqS1h0Y-452TMjAt3dW2ZYMKWtfA2A2sxM7JmpYZZdMKTrT7nwoPsfbythXfph3dCLzxNQ0gS-rHfD7SYWuKJGN1JmK6iAygJf1thpskoeOJyK04SpDwMoqIOYfsrUktvsJFlV3oWM1tVoeDIQPWSZGXE6WRWDPNmTz6h7IHvc-QKGzoRy3_CvzSEioq2SaDNDdloECrKH37V1eCNvdaIr0dQeHqH_vI0NMBsfCow==&token=aa2d356b-e2f0-45e8-9725-e0efc7b5d29c&autoplay=autoplay' - ), - 'md5': '95d7a252bb97954663fcf6c6db4b4555', + 'url': TeachableAPI.get_hotmart_url(), + 'md5': 'f9b6107c07300e4f77e23dde37f391a4', 'info_dict': { - 'id': 'pRQKDWkKLB', - 'video_id': 'pRQKDWkKLB', + 'id': 'Nq7vkXmXRA', + 'video_id': 'Nq7vkXmXRA', 'ext': 'mp4', - 'title': 'Hotmart video #pRQKDWkKLB', + 'title': 'Hotmart video #Nq7vkXmXRA', 'thumbnail': ( r're:https?://.*\.(?:jpg|jpeg|png|gif)\?token=exp=\d+~acl=.*~hmac=[a-f0-9]+$' ), From e7f90d887dd8b57e2d484958a03165c94b3776b7 Mon Sep 17 00:00:00 2001 From: Abdessamad DERRAZ Date: Sat, 22 Jul 2023 02:13:09 +0200 Subject: [PATCH 10/13] Reorder imports in hotmart.py This commit reorders the imports in hotmart.py according to PEP8 guidelines for improved readability. --- yt_dlp/extractor/hotmart.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/hotmart.py b/yt_dlp/extractor/hotmart.py index 73ee7dd25..01fe5eb19 100644 --- a/yt_dlp/extractor/hotmart.py +++ b/yt_dlp/extractor/hotmart.py @@ -1,3 +1,6 @@ +import json +import urllib.request + from .common import InfoExtractor from ..utils import ( get_element_by_id, @@ -5,8 +8,6 @@ from ..utils import ( int_or_none, url_or_none, ) -import urllib.request -import json class TeachableAPI: From 182a18eaa041bb14c829d7033f45a81ffe8545a5 Mon Sep 17 00:00:00 2001 From: Abdessamad Derraz <3028866+Abdess@users.noreply.github.com> Date: Sat, 22 Jul 2023 07:39:53 +0200 Subject: [PATCH 11/13] Revert "Reorder imports in hotmart.py" This reverts commit e7f90d887dd8b57e2d484958a03165c94b3776b7. --- yt_dlp/extractor/hotmart.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/hotmart.py b/yt_dlp/extractor/hotmart.py index 01fe5eb19..73ee7dd25 100644 --- a/yt_dlp/extractor/hotmart.py +++ b/yt_dlp/extractor/hotmart.py @@ -1,6 +1,3 @@ -import json -import urllib.request - from .common import InfoExtractor from ..utils import ( get_element_by_id, @@ -8,6 +5,8 @@ from ..utils import ( int_or_none, url_or_none, ) +import urllib.request +import json class TeachableAPI: From fddf9a60eefbd33dccea40a219f00246914f4d98 Mon Sep 17 00:00:00 2001 From: Abdessamad Derraz <3028866+Abdess@users.noreply.github.com> Date: Sat, 22 Jul 2023 07:40:04 +0200 Subject: [PATCH 12/13] Revert "Refactor HotmartIE tests to generate dynamic URL" This reverts commit bee20eeb8210ac3d6426cef05ab732cd6b8f06eb. --- yt_dlp/extractor/hotmart.py | 33 +++++++-------------------------- 1 file changed, 7 insertions(+), 26 deletions(-) diff --git a/yt_dlp/extractor/hotmart.py b/yt_dlp/extractor/hotmart.py index 73ee7dd25..0a89d34d6 100644 --- a/yt_dlp/extractor/hotmart.py +++ b/yt_dlp/extractor/hotmart.py @@ -5,40 +5,21 @@ from ..utils import ( int_or_none, url_or_none, ) -import urllib.request -import json - - -class TeachableAPI: - @staticmethod - def get_hotmart_url(): - req = urllib.request.Request( - 'https://gns3.teachable.com/api/v2/hotmart/private_video?attachment_id=13633604', - headers={'User-Agent': 'Mozilla/5.0'} - ) - with urllib.request.urlopen(req) as response: - data = json.loads(response.read()) - hotmart_url = ( - 'https://player.hotmart.com/embed/' - f'{data["video_id"]}?' - f'signature={data["signature"]}&' - 'token=' - f'{data["teachable_application_key"]}' - ) - return hotmart_url class HotmartIE(InfoExtractor): _VALID_URL = r'https?://player\.hotmart\.com/embed/(?P[a-zA-Z0-9]+)' _TESTS = [ { - 'url': TeachableAPI.get_hotmart_url(), - 'md5': 'f9b6107c07300e4f77e23dde37f391a4', + 'url': ( + 'https://player.hotmart.com/embed/pRQKDWkKLB?signature=S0Pr1OaDwGvKwQ8i6Y9whykEo4uuok2P4AShiYcyarvFkQDT_rBlR5L1qdIbIferFBHfTVJlXcbgUAwMMPiV6sWaA0XIU4OO282MO092DX_Z8KqS1h0Y-452TMjAt3dW2ZYMKWtfA2A2sxM7JmpYZZdMKTrT7nwoPsfbythXfph3dCLzxNQ0gS-rHfD7SYWuKJGN1JmK6iAygJf1thpskoeOJyK04SpDwMoqIOYfsrUktvsJFlV3oWM1tVoeDIQPWSZGXE6WRWDPNmTz6h7IHvc-QKGzoRy3_CvzSEioq2SaDNDdloECrKH37V1eCNvdaIr0dQeHqH_vI0NMBsfCow==&token=aa2d356b-e2f0-45e8-9725-e0efc7b5d29c&autoplay=autoplay' + ), + 'md5': '95d7a252bb97954663fcf6c6db4b4555', 'info_dict': { - 'id': 'Nq7vkXmXRA', - 'video_id': 'Nq7vkXmXRA', + 'id': 'pRQKDWkKLB', + 'video_id': 'pRQKDWkKLB', 'ext': 'mp4', - 'title': 'Hotmart video #Nq7vkXmXRA', + 'title': 'Hotmart video #pRQKDWkKLB', 'thumbnail': ( r're:https?://.*\.(?:jpg|jpeg|png|gif)\?token=exp=\d+~acl=.*~hmac=[a-f0-9]+$' ), From e61606eaf28d36dfafdd6bd5da4f2b7e68d2da47 Mon Sep 17 00:00:00 2001 From: Abdessamad DERRAZ Date: Tue, 24 Oct 2023 21:25:57 +0200 Subject: [PATCH 13/13] [ie/teachable] Remove Wistia support --- yt_dlp/extractor/teachable.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py index 3945a2930..794f61708 100644 --- a/yt_dlp/extractor/teachable.py +++ b/yt_dlp/extractor/teachable.py @@ -2,7 +2,6 @@ import re from .common import InfoExtractor from .hotmart import HotmartIE -from .wistia import WistiaIE from ..utils import ( clean_html, extract_attributes, @@ -191,8 +190,7 @@ class TeachableIE(TeachableBaseIE): hotmart_urls.append(hotmart_url) - wistia_urls = WistiaIE._extract_embed_urls(url, webpage) - if not wistia_urls and not hotmart_urls: + if not hotmart_urls: if any(re.search(p, webpage) for p in ( r'class=["\']lecture-contents-locked', r'>\s*Lecture contents locked', @@ -226,16 +224,6 @@ class TeachableIE(TeachableBaseIE): chapter = sections[chapter_number - 1] entries = [] - for wistia_url in wistia_urls: - entries.append({ - '_type': 'url_transparent', - 'url': wistia_url, - 'ie_key': WistiaIE.ie_key(), - 'title': title, - 'chapter': chapter, - 'chapter_number': chapter_number, - }) - for hotmart_url in hotmart_urls: entries.append({ '_type': 'url_transparent',