Modified tele5 extractor

pull/9792/head
JerryZhouSirui 2024-04-26 13:48:19 -04:00
rodzic c9ce57d9bf
commit 7565fcb271
2 zmienionych plików z 95 dodań i 16 usunięć

Wyświetl plik

@ -503,6 +503,7 @@
- **gem.cbc.ca**: [*cbcgem*](## "netrc machine")
- **gem.cbc.ca:live**
- **gem.cbc.ca:playlist**
- **generic**: Generic downloader that works on some sites
- **Genius**
- **GeniusLyrics**
- **GetCourseRu**: [*getcourseru*](## "netrc machine")

Wyświetl plik

@ -1,17 +1,68 @@
import re
import requests
from .dplay import DPlayIE
from ..compat import compat_urlparse
from ..utils import (
ExtractorError,
extract_attributes,
)
def _generate_video_specific_cache_url(slug, parent_slug):
"""
Generate the MAGIC string for the video specific cache url.
:param slug: The part of the url that identifies the video by title.
:param parent_slug: The part of the url that identifies the PARENT directory.
:return: The generated url.
"""
return 'https://de-api.loma-cms.com/feloma/page/{0}/?environment=tele5&parent_slug={1}&v=2'.format(slug,
parent_slug)
def _do_cached_post(s: requests.session,
referer: str,
url: str) -> dict:
"""
Do the API call to CACHED json endpoint.
It is likely connected to the new "loma-cms" API.
:param s: The session we use.
:param referer: The referer url.
:param url: The url to retrieve the cached data for.
:return: The json dict from the response.
"""
r = s.post(url='https://tele5.de/cached',
headers={
'Origin': 'https://tele5.de',
'Referer': referer,
# Referer is a mandatory key,
'User-Agent': 'Youtube-DL',
# User-Agent is a mandatory key, it can be anything!
},
json={'path': url}
)
r.raise_for_status()
return r.json()
class Tele5IE(DPlayIE): # XXX: Do not subclass from concrete IE
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_GEO_COUNTRIES = ['DE']
_TESTS = [{
'url': 'https://tele5.de/mediathek/sorority-babes-in-the-slimeball-bowl-o-rama',
'info_dict': {
'id': '5582852',
'title': 'Sorority Babes in the Slimeball Bowl-O-Rama',
'ext': 'mp4',
'series': 'Sorority Babes in the Slimeball Bowl-O-Rama',
'duration': 4779.88,
'description': 'md5:1d8d30ed3d221613861aaefa8d7e887e',
'timestamp': 1697839800,
'upload_date': '20231020',
'creator': 'Tele5',
'tags': [],
'thumbnail': 'https://eu1-prod-images.disco-api.com/2023/10/02/501fa839-d3ac-3c04-aa61-57f98802c532.jpeg',
},
}, {
'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416',
'only_matching': True,
'info_dict': {
'id': '1549416',
'ext': 'mp4',
@ -26,6 +77,7 @@ class Tele5IE(DPlayIE): # XXX: Do not subclass from concrete IE
}, {
# jwplatform, nexx unavailable
'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/',
'only_matching': True,
'info_dict': {
'id': 'WJuiOlUp',
'ext': 'mp4',
@ -40,6 +92,7 @@ class Tele5IE(DPlayIE): # XXX: Do not subclass from concrete IE
'skip': 'No longer available, redirects to Filme page',
}, {
'url': 'https://tele5.de/mediathek/angel-of-mine/',
'only_matching': True,
'info_dict': {
'id': '1252360',
'ext': 'mp4',
@ -72,18 +125,43 @@ class Tele5IE(DPlayIE): # XXX: Do not subclass from concrete IE
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player_element = self._search_regex(r'(<hyoga-player\b[^>]+?>)', webpage, 'video player')
player_info = extract_attributes(player_element)
asset_id, country, realm = (player_info[x] for x in ('assetid', 'locale', 'realm', ))
endpoint = compat_urlparse.urlparse(player_info['endpoint']).hostname
source_type = player_info.get('sourcetype')
if source_type:
endpoint = '%s-%s' % (source_type, endpoint)
try:
return self._get_disco_api_info(url, asset_id, endpoint, realm, country)
except ExtractorError as e:
if getattr(e, 'message', '') == 'Missing deviceId in context':
self.report_drm(video_id)
raise
content_regex = re.compile(r'https?://(?:www\.)?(?P<environment>[^.]+)\.de/(?P<parent_slug>[^/]+)/(?P<slug>[^/?#&]+)')
m = content_regex.search(url)
if m is not None:
environment, parent_slug, slug = m.groups()
s = requests.session()
headers_for_origin = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0'}
r = s.get(url=url,
headers=headers_for_origin)
r.raise_for_status()
cached_base = _do_cached_post(s=s,
referer=url,
url='https://de-api.loma-cms.com/feloma/configurations/?environment={0}'.format(environment))
site_info = cached_base.get('data').get('settings').get('site')
player_info = site_info.get('player')
sonic_realm = player_info['sonicRealm']
sonic_endpoint = compat_urlparse.urlparse(player_info['sonicEndpoint']).hostname
country = site_info['info']['country']
cached_video_specific = _do_cached_post(s=s, referer=url,
url=_generate_video_specific_cache_url(
slug=slug,
parent_slug=parent_slug))
video_id = cached_video_specific['data']['blocks'][1]['videoId']
try:
return self._get_disco_api_info(url=url,
display_id=video_id,
disco_host=sonic_endpoint,
realm=sonic_realm,
country=country,
api_version=3,
)
except ExtractorError as e:
if getattr(e, 'message', '') == 'Missing deviceId in context':
self.report_drm(video_id)
raise