kopia lustrzana https://github.com/yt-dlp/yt-dlp
Porównaj commity
21 Commity
aac430b2fb
...
68040cc455
Autor | SHA1 | Data |
---|---|---|
kylegustavo | 68040cc455 | |
Simon Sawicki | 64766459e3 | |
pukkandan | 706272edd2 | |
pukkandan | 221a3c6dba | |
Kyle Gonsalves | 8d78a0f118 | |
Kyle Gonsalves | 388bc9c97c | |
Kyle Gonsalves | fd43ff21e2 | |
Kyle Gonsalves | bb87bafce6 | |
Kyle Gonsalves | b0593ecfa4 | |
Kyle Gonsalves | ab1cfa399b | |
Kyle Gonsalves | 1d851a6751 | |
Kyle Gonsalves | d5b48c06e6 | |
Kyle Gonsalves | bdfeb4357b | |
Kyle Gonsalves | fba5c8f305 | |
bashonly | 89f535e265 | |
bashonly | ff38a011d5 | |
bashonly | 8056a3026e | |
Simon Sawicki | 3ee1194288 | |
bashonly | e3b42d8b1b | |
bashonly | c9ce57d9bf | |
bashonly | 02483bea1c |
|
@ -254,7 +254,7 @@ jobs:
|
|||
# We need to fuse our own universal2 wheels for curl_cffi
|
||||
python3 -m pip install -U --user delocate
|
||||
mkdir curl_cffi_whls curl_cffi_universal2
|
||||
python3 devscripts/install_deps.py --print -o --include curl_cffi > requirements.txt
|
||||
python3 devscripts/install_deps.py --print -o --include curl-cffi > requirements.txt
|
||||
for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do
|
||||
python3 -m pip download \
|
||||
--only-binary=:all: \
|
||||
|
@ -362,7 +362,7 @@ jobs:
|
|||
- name: Install Requirements
|
||||
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
|
||||
python devscripts/install_deps.py -o --include build
|
||||
python devscripts/install_deps.py --include py2exe --include curl_cffi
|
||||
python devscripts/install_deps.py --include py2exe --include curl-cffi
|
||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl"
|
||||
|
||||
- name: Prepare
|
||||
|
|
|
@ -53,7 +53,7 @@ jobs:
|
|||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install test requirements
|
||||
run: python3 ./devscripts/install_deps.py --include dev --include curl_cffi
|
||||
run: python3 ./devscripts/install_deps.py --include dev --include curl-cffi
|
||||
- name: Run tests
|
||||
continue-on-error: False
|
||||
run: |
|
||||
|
|
|
@ -202,7 +202,7 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly
|
|||
The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting.
|
||||
|
||||
* [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE)
|
||||
* Can be installed with the `curl_cffi` group, e.g. `pip install yt-dlp[default,curl_cffi]`
|
||||
* Can be installed with the `curl-cffi` group, e.g. `pip install yt-dlp[default,curl-cffi]`
|
||||
* Currently only included in `yt-dlp.exe` and `yt-dlp_macos` builds
|
||||
|
||||
|
||||
|
|
|
@ -53,7 +53,7 @@ dependencies = [
|
|||
|
||||
[project.optional-dependencies]
|
||||
default = []
|
||||
curl_cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"]
|
||||
curl-cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"]
|
||||
secretstorage = [
|
||||
"cffi",
|
||||
"secretstorage",
|
||||
|
|
|
@ -1906,6 +1906,15 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
|||
expected_status=TEAPOT_RESPONSE_STATUS)
|
||||
self.assertEqual(content, TEAPOT_RESPONSE_BODY)
|
||||
|
||||
def test_search_nextjs_data(self):
|
||||
data = '<script id="__NEXT_DATA__" type="application/json">{"props":{}}</script>'
|
||||
self.assertEqual(self.ie._search_nextjs_data(data, None), {'props': {}})
|
||||
self.assertEqual(self.ie._search_nextjs_data('', None, fatal=False), {})
|
||||
self.assertEqual(self.ie._search_nextjs_data('', None, default=None), None)
|
||||
self.assertEqual(self.ie._search_nextjs_data('', None, default={}), {})
|
||||
with self.assertRaises(DeprecationWarning):
|
||||
self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {})
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -2059,7 +2059,22 @@ Line 1
|
|||
assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')
|
||||
|
||||
@unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows')
|
||||
def test_Popen_windows_escaping(self):
|
||||
def test_windows_escaping(self):
|
||||
tests = [
|
||||
'test"&',
|
||||
'%CMDCMDLINE:~-1%&',
|
||||
'a\nb',
|
||||
'"',
|
||||
'\\',
|
||||
'!',
|
||||
'^!',
|
||||
'a \\ b',
|
||||
'a \\" b',
|
||||
'a \\ b\\',
|
||||
# We replace \r with \n
|
||||
('a\r\ra', 'a\n\na'),
|
||||
]
|
||||
|
||||
def run_shell(args):
|
||||
stdout, stderr, error = Popen.run(
|
||||
args, text=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
|
@ -2067,15 +2082,18 @@ Line 1
|
|||
assert not error
|
||||
return stdout
|
||||
|
||||
# Test escaping
|
||||
assert run_shell(['echo', 'test"&']) == '"test""&"\n'
|
||||
assert run_shell(['echo', '%CMDCMDLINE:~-1%&']) == '"%CMDCMDLINE:~-1%&"\n'
|
||||
assert run_shell(['echo', 'a\nb']) == '"a"\n"b"\n'
|
||||
assert run_shell(['echo', '"']) == '""""\n'
|
||||
assert run_shell(['echo', '\\']) == '\\\n'
|
||||
# Test if delayed expansion is disabled
|
||||
assert run_shell(['echo', '^!']) == '"^!"\n'
|
||||
assert run_shell('echo "^!"') == '"^!"\n'
|
||||
for argument in tests:
|
||||
if isinstance(argument, str):
|
||||
expected = argument
|
||||
else:
|
||||
argument, expected = argument
|
||||
|
||||
args = [sys.executable, '-c', 'import sys; print(end=sys.argv[1])', argument, 'end']
|
||||
assert run_shell(args) == expected
|
||||
|
||||
escaped = shell_quote(argument, shell=True)
|
||||
args = f'{sys.executable} -c "import sys; print(end=sys.argv[1])" {escaped} end'
|
||||
assert run_shell(args) == expected
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -105,7 +105,7 @@ class AsobiStageIE(InfoExtractor):
|
|||
video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_]
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
event_data = traverse_obj(
|
||||
self._search_nextjs_data(webpage, video_id, default='{}'),
|
||||
self._search_nextjs_data(webpage, video_id, default={}),
|
||||
('props', 'pageProps', 'eventCMSData', {
|
||||
'title': ('event_name', {str}),
|
||||
'thumbnail': ('event_thumbnail_image', {url_or_none}),
|
||||
|
|
|
@ -623,6 +623,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'info_dict': {
|
||||
'id': '3662a707-0af9-3149-963f-47bea720b460',
|
||||
'title': 'BUGGER',
|
||||
'description': r're:BUGGER The recent revelations by the whistleblower Edward Snowden were fascinating. .{211}\.{3}$',
|
||||
},
|
||||
'playlist_count': 18,
|
||||
}, {
|
||||
|
@ -631,14 +632,14 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'info_dict': {
|
||||
'id': 'p02mprgb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
|
||||
'description': 'md5:2868290467291b37feda7863f7a83f54',
|
||||
'title': 'Germanwings crash site aerial video',
|
||||
'description': r're:(?s)Aerial video showed the site where the Germanwings flight 4U 9525, .{156} BFM TV\.$',
|
||||
'duration': 47,
|
||||
'timestamp': 1427219242,
|
||||
'upload_date': '20150324',
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/news/1024/media/images/81879000/jpg/_81879090_81879089.jpg',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
|
@ -656,12 +657,14 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': 'now SIMORGH_DATA with no video',
|
||||
}, {
|
||||
# single video embedded with data-playable containing XML playlists (regional section)
|
||||
'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'info_dict': {
|
||||
'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'id': '39275083',
|
||||
'display_id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||
'ext': 'mp4',
|
||||
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||
'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
|
||||
|
@ -670,7 +673,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
}, {
|
||||
# single video from video playlist embedded with vxp-playlist-data JSON
|
||||
'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
|
||||
|
@ -683,22 +686,20 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
}
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
# single video story with digitalData
|
||||
# single video story with __PWA_PRELOADED_STATE__
|
||||
'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
|
||||
'info_dict': {
|
||||
'id': 'p02q6gc4',
|
||||
'ext': 'flv',
|
||||
'title': 'Sri Lanka’s spicy secret',
|
||||
'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
|
||||
'timestamp': 1437674293,
|
||||
'upload_date': '20150723',
|
||||
'ext': 'mp4',
|
||||
'title': 'Tasting the spice of life in Jaffna',
|
||||
'description': r're:(?s)BBC Travel Show’s Henry Golding explores the city of Jaffna .{149} aftertaste\.$',
|
||||
'timestamp': 1437935638,
|
||||
'upload_date': '20150726',
|
||||
'duration': 255,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# single video story without digitalData
|
||||
'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
|
||||
|
@ -710,12 +711,10 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'timestamp': 1415867444,
|
||||
'upload_date': '20141113',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
'skip': 'redirects to TopGear home page',
|
||||
}, {
|
||||
# single video embedded with Morph
|
||||
# TODO: replacement test page
|
||||
'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
|
||||
'info_dict': {
|
||||
'id': 'p041vhd0',
|
||||
|
@ -726,27 +725,22 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'uploader': 'BBC Sport',
|
||||
'uploader_id': 'bbc_sport',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Georestricted to UK',
|
||||
'skip': 'Video no longer in page',
|
||||
}, {
|
||||
# single video with playlist.sxml URL in playlist param
|
||||
# single video in __INITIAL_DATA__
|
||||
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
||||
'info_dict': {
|
||||
'id': 'p02xycnp',
|
||||
'ext': 'mp4',
|
||||
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
||||
'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
|
||||
'title': 'Ronaldo to Man Utd, Arsenal to spend?',
|
||||
'description': r're:(?s)BBC Sport\'s David Ornstein rounds up the latest transfer reports, .{359} here\.$',
|
||||
'timestamp': 1437750175,
|
||||
'upload_date': '20150724',
|
||||
'thumbnail': r're:https://(?:[^/]+/)+/media/images/69320000/png/_69320754_mmgossipcolumnextraaugust18.png',
|
||||
'duration': 140,
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# article with multiple videos embedded with playlist.sxml in playlist param
|
||||
# article with multiple videos embedded with Morph.setPayload
|
||||
'url': 'http://www.bbc.com/sport/0/football/34475836',
|
||||
'info_dict': {
|
||||
'id': '34475836',
|
||||
|
@ -754,6 +748,21 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
|
||||
},
|
||||
'playlist_count': 3,
|
||||
}, {
|
||||
# Testing noplaylist
|
||||
'url': 'http://www.bbc.com/sport/0/football/34475836',
|
||||
'info_dict': {
|
||||
'id': 'p034ppnv',
|
||||
'ext': 'mp4',
|
||||
'title': 'All you need to know about Jurgen Klopp',
|
||||
'timestamp': 1444335081,
|
||||
'upload_date': '20151008',
|
||||
'duration': 122.0,
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/onesport/cps/976/cpsprodpb/7542/production/_85981003_klopp.jpg',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
},
|
||||
}, {
|
||||
# school report article with single video
|
||||
'url': 'http://www.bbc.co.uk/schoolreport/35744779',
|
||||
|
@ -762,6 +771,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'title': 'School which breaks down barriers in Jerusalem',
|
||||
},
|
||||
'playlist_count': 1,
|
||||
'skip': 'redirects to Young Reporter home page https://www.bbc.co.uk/news/topics/cg41ylwv43pt',
|
||||
}, {
|
||||
# single video with playlist URL from weather section
|
||||
'url': 'http://www.bbc.com/weather/features/33601775',
|
||||
|
@ -778,18 +788,20 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1437785037,
|
||||
'upload_date': '20150725',
|
||||
'duration': 105,
|
||||
},
|
||||
}, {
|
||||
# video with window.__INITIAL_DATA__ and value as JSON string
|
||||
'url': 'https://www.bbc.com/news/av/world-europe-59468682',
|
||||
'info_dict': {
|
||||
'id': 'p0b71qth',
|
||||
'id': 'p0b779gc',
|
||||
'ext': 'mp4',
|
||||
'title': 'Why France is making this woman a national hero',
|
||||
'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
|
||||
'description': r're:(?s)France is honouring the US-born 20th Century singer and activist Josephine .{291} Casseville$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'timestamp': 1638230731,
|
||||
'upload_date': '20211130',
|
||||
'duration': 125,
|
||||
},
|
||||
}, {
|
||||
# video with script id __NEXT_DATA__ and value as JSON string
|
||||
|
@ -830,6 +842,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'uploader': 'Radio 3',
|
||||
'uploader_id': 'bbc_radio_three',
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}, {
|
||||
'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
|
||||
'info_dict': {
|
||||
|
@ -837,6 +850,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'ext': 'mp4',
|
||||
'title': 'md5:2fabf12a726603193a2879a055f72514',
|
||||
'description': 'Learn English words and phrases from this story',
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1200x675/p06pq9gk.jpg',
|
||||
},
|
||||
'add_ie': [BBCCoUkIE.ie_key()],
|
||||
}, {
|
||||
|
@ -849,24 +863,26 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'alt_title': 'The downsides of positive thinking',
|
||||
'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
|
||||
'duration': 235,
|
||||
'thumbnail': r're:https?://.+/p07c9dsr.jpg',
|
||||
'thumbnail': r're:https?://.+/p07c9dsr\.(?:jpg|webp|png)',
|
||||
'upload_date': '20190604',
|
||||
'categories': ['Psychology'],
|
||||
},
|
||||
}, {
|
||||
# BBC Sounds
|
||||
'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
|
||||
'url': 'https://www.bbc.co.uk/sounds/play/w3ct5rgx',
|
||||
'info_dict': {
|
||||
'id': 'm001q789',
|
||||
'id': 'p0hrw4nr',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Night Tracks Mix - Music for the darkling hour',
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
|
||||
'chapters': 'count:8',
|
||||
'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
|
||||
'uploader': 'Radio 3',
|
||||
'duration': 1800,
|
||||
'uploader_id': 'bbc_radio_three',
|
||||
},
|
||||
'title': 'Are our coastlines being washed away?',
|
||||
'description': r're:(?s)Around the world, coastlines are constantly changing .{2153} Images\)$',
|
||||
'timestamp': 1713556800,
|
||||
'upload_date': '20240419',
|
||||
'duration': 1588,
|
||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0hrnxbl.jpg',
|
||||
'uploader': 'World Service',
|
||||
'uploader_id': 'bbc_world_service',
|
||||
'series': 'CrowdScience',
|
||||
}
|
||||
}, { # onion routes
|
||||
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
|
||||
'only_matching': True,
|
||||
|
@ -1021,8 +1037,8 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
webpage, 'group id', default=None)
|
||||
if group_id:
|
||||
return self.url_result(
|
||||
'https://www.bbc.co.uk/programmes/%s' % group_id,
|
||||
ie=BBCCoUkIE.ie_key())
|
||||
f'https://www.bbc.co.uk/programmes/{group_id}',
|
||||
ie=BBCCoUkIE)
|
||||
|
||||
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||
programme_id = self._search_regex(
|
||||
|
@ -1082,83 +1098,139 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
}
|
||||
|
||||
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
|
||||
# There are several setPayload calls may be present but the video
|
||||
# seems to be always related to the first one
|
||||
morph_payload = self._parse_json(
|
||||
self._search_regex(
|
||||
r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
|
||||
webpage, 'morph payload', default='{}'),
|
||||
playlist_id, fatal=False)
|
||||
# Several setPayload calls may be present but the video(s)
|
||||
# should be in one that mentions leadMedia or videoData
|
||||
morph_payload = self._search_json(
|
||||
r'\bMorph\s*\.\s*setPayload\s*\([^,]+,', webpage, 'morph payload', playlist_id,
|
||||
contains_pattern=r'\{(?:(?!</script>)[\s\S])+?(?:"leadMedia"|\\"videoData\\")\s*:(?:(?!</script>)[\s\S])+\}',
|
||||
default={})
|
||||
if morph_payload:
|
||||
components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
|
||||
for component in components:
|
||||
if not isinstance(component, dict):
|
||||
continue
|
||||
lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
|
||||
if not lead_media:
|
||||
continue
|
||||
identifiers = lead_media.get('identifiers')
|
||||
if not identifiers or not isinstance(identifiers, dict):
|
||||
continue
|
||||
programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
|
||||
for lead_media in traverse_obj(morph_payload, (
|
||||
'body', 'components', ..., 'props', 'leadMedia', {dict})):
|
||||
programme_id = traverse_obj(lead_media, ('identifiers', ('vpid', 'playablePid'), {str}, any))
|
||||
if not programme_id:
|
||||
continue
|
||||
title = lead_media.get('title') or self._og_search_title(webpage)
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
description = lead_media.get('summary')
|
||||
uploader = lead_media.get('masterBrand')
|
||||
uploader_id = lead_media.get('mid')
|
||||
duration = None
|
||||
duration_d = lead_media.get('duration')
|
||||
if isinstance(duration_d, dict):
|
||||
duration = parse_duration(dict_get(
|
||||
duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'title': lead_media.get('title') or self._og_search_title(webpage),
|
||||
**traverse_obj(lead_media, {
|
||||
'description': ('summary', {str}),
|
||||
'duration': ('duration', ('rawDuration', 'formattedDuration', 'spokenDuration'), {parse_duration}),
|
||||
'uploader': ('masterBrand', {str}),
|
||||
'uploader_id': ('mid', {str}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
body = self._parse_json(traverse_obj(morph_payload, (
|
||||
'body', 'content', 'article', 'body')), playlist_id, fatal=False)
|
||||
for video_data in traverse_obj(body, (lambda _, v: v['videoData']['pid'], 'videoData')):
|
||||
if video_data.get('vpid'):
|
||||
video_id = video_data['vpid']
|
||||
formats, subtitles = self._download_media_selector(video_id)
|
||||
entry = {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
else:
|
||||
video_id = video_data['pid']
|
||||
entry = self.url_result(
|
||||
f'https://www.bbc.co.uk/programmes/{video_id}', BBCCoUkIE,
|
||||
video_id, url_transparent=True)
|
||||
entry.update({
|
||||
**traverse_obj(morph_payload, (
|
||||
'body', 'content', 'article', {
|
||||
'timestamp': ('dateTimeInfo', 'dateTime', {parse_iso8601}),
|
||||
}
|
||||
)),
|
||||
**traverse_obj(video_data, {
|
||||
'thumbnail': (('iChefImage', 'image'), {url_or_none}, any),
|
||||
'title': (('title', 'caption'), {str}, any),
|
||||
'duration': ('duration', {parse_duration}),
|
||||
}),
|
||||
})
|
||||
if video_data.get('isLead') and not self._yes_playlist(playlist_id, video_id):
|
||||
return entry
|
||||
entries.append(entry)
|
||||
if entries:
|
||||
playlist_title = traverse_obj(morph_payload, (
|
||||
'body', 'content', 'article', 'headline', {str})) or playlist_title
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
preload_state = self._parse_json(self._search_regex(
|
||||
r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
|
||||
'preload state', default='{}'), playlist_id, fatal=False)
|
||||
if preload_state:
|
||||
current_programme = preload_state.get('programmes', {}).get('current') or {}
|
||||
programme_id = current_programme.get('id')
|
||||
if current_programme and programme_id and current_programme.get('type') == 'playable_item':
|
||||
title = current_programme.get('titles', {}).get('tertiary') or playlist_title
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
synopses = current_programme.get('synopses') or {}
|
||||
network = current_programme.get('network') or {}
|
||||
duration = int_or_none(
|
||||
current_programme.get('duration', {}).get('value'))
|
||||
thumbnail = None
|
||||
image_url = current_programme.get('image_url')
|
||||
if image_url:
|
||||
thumbnail = image_url.replace('{recipe}', 'raw')
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'description': dict_get(synopses, ('long', 'medium', 'short')),
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'uploader': network.get('short_title'),
|
||||
'uploader_id': network.get('id'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'chapters': traverse_obj(preload_state, (
|
||||
'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
|
||||
# various PRELOADED_STATE JSON
|
||||
preload_state = self._search_json(
|
||||
r'window\.__(?:PWA_)?PRELOADED_STATE__\s*=', webpage,
|
||||
'preload state', playlist_id, transform_source=js_to_json, default={})
|
||||
# PRELOADED_STATE with current programmme
|
||||
current_programme = traverse_obj(preload_state, ('programmes', 'current', {dict}))
|
||||
programme_id = traverse_obj(current_programme, ('id', {str}))
|
||||
if programme_id and current_programme.get('type') == 'playable_item':
|
||||
title = traverse_obj(current_programme, ('titles', ('tertiary', 'secondary'), {str}, any)) or playlist_title
|
||||
formats, subtitles = self._download_media_selector(programme_id)
|
||||
return {
|
||||
'id': programme_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
**traverse_obj(current_programme, {
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
|
||||
'thumbnail': ('image_url', {lambda u: url_or_none(u.replace('{recipe}', 'raw'))}),
|
||||
'duration': ('duration', 'value', {int_or_none}),
|
||||
'uploader': ('network', 'short_title', {str}),
|
||||
'uploader_id': ('network', 'id', {str}),
|
||||
'timestamp': ((('availability', 'from'), ('release', 'date')), {parse_iso8601}, any),
|
||||
'series': ('titles', 'primary', {str}),
|
||||
}),
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(preload_state, {
|
||||
'chapters': (
|
||||
'tracklist', 'tracks', lambda _, v: float(v['offset']['start']), {
|
||||
'title': ('titles', {lambda x: join_nonempty(
|
||||
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
|
||||
'start_time': ('offset', 'start', {float_or_none}),
|
||||
'end_time': ('offset', 'end', {float_or_none}),
|
||||
})) or None,
|
||||
}
|
||||
)
|
||||
}),
|
||||
}
|
||||
|
||||
# PWA_PRELOADED_STATE with article video asset
|
||||
asset_id = traverse_obj(preload_state, (
|
||||
'entities', 'articles', lambda k, _: k.rsplit('/', 1)[-1] == playlist_id,
|
||||
'assetVideo', 0, {str}, any))
|
||||
if asset_id:
|
||||
video_id = traverse_obj(preload_state, ('entities', 'videos', asset_id, 'vpid', {str}))
|
||||
if video_id:
|
||||
article = traverse_obj(preload_state, (
|
||||
'entities', 'articles', lambda _, v: v['assetVideo'][0] == asset_id, any))
|
||||
|
||||
def image_url(image_id):
|
||||
return traverse_obj(preload_state, (
|
||||
'entities', 'images', image_id, 'url',
|
||||
{lambda u: url_or_none(u.replace('$recipe', 'raw'))}))
|
||||
|
||||
formats, subtitles = self._download_media_selector(video_id)
|
||||
return {
|
||||
'id': video_id,
|
||||
**traverse_obj(preload_state, ('entities', 'videos', asset_id, {
|
||||
'title': ('title', {str}),
|
||||
'description': (('synopsisLong', 'synopsisMedium', 'synopsisShort'), {str}, any),
|
||||
'thumbnail': (0, {image_url}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
})),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(article, {
|
||||
'timestamp': ('displayDate', {parse_iso8601}),
|
||||
}),
|
||||
}
|
||||
else:
|
||||
return self.url_result(
|
||||
f'https://www.bbc.co.uk/programmes/{asset_id}', BBCCoUkIE,
|
||||
asset_id, playlist_title, display_id=playlist_id,
|
||||
description=playlist_description)
|
||||
|
||||
bbc3_config = self._parse_json(
|
||||
self._search_regex(
|
||||
|
@ -1204,6 +1276,30 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
k_int_or_none = functools.partial(int_or_none, scale=1000)
|
||||
|
||||
def parse_model(model):
|
||||
"""Extract single video from model structure"""
|
||||
item_id = traverse_obj(model, ('versions', 0, 'versionId', {str}))
|
||||
if not item_id:
|
||||
return
|
||||
formats, subtitles = self._download_media_selector(item_id)
|
||||
return {
|
||||
'id': item_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(model, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
|
||||
'duration': ('versions', 0, 'duration', {int}),
|
||||
'timestamp': ('versions', 0, 'availableFrom', {k_int_or_none}),
|
||||
})
|
||||
}
|
||||
|
||||
def is_type(*types):
|
||||
return lambda _, v: v['type'] in types
|
||||
|
||||
initial_data = self._search_regex(
|
||||
r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
|
||||
'quoted preload state', default=None)
|
||||
|
@ -1215,6 +1311,19 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
|
||||
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
|
||||
if initial_data:
|
||||
for video_data in traverse_obj(initial_data, (
|
||||
'stores', 'article', 'articleBodyContent', is_type('video'))):
|
||||
model = traverse_obj(video_data, (
|
||||
'model', 'blocks', is_type('aresMedia'),
|
||||
'model', 'blocks', is_type('aresMediaMetadata'),
|
||||
'model', {dict}, any))
|
||||
entry = parse_model(model)
|
||||
if entry:
|
||||
entries.append(entry)
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def parse_media(media):
|
||||
if not media:
|
||||
return
|
||||
|
@ -1248,62 +1357,87 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
|||
'timestamp': item_time,
|
||||
'description': strip_or_none(item_desc),
|
||||
})
|
||||
for resp in (initial_data.get('data') or {}).values():
|
||||
name = resp.get('name')
|
||||
|
||||
for resp in traverse_obj(initial_data, ('data', lambda _, v: v['name'])):
|
||||
name = resp['name']
|
||||
if name == 'media-experience':
|
||||
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
||||
elif name == 'article':
|
||||
for block in (try_get(resp,
|
||||
(lambda x: x['data']['blocks'],
|
||||
lambda x: x['data']['content']['model']['blocks'],),
|
||||
list) or []):
|
||||
if block.get('type') not in ['media', 'video']:
|
||||
continue
|
||||
parse_media(block.get('model'))
|
||||
for block in traverse_obj(resp, (
|
||||
'data', (None, ('content', 'model')), 'blocks',
|
||||
is_type('media', 'video'), 'model', {dict})):
|
||||
parse_media(block)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
# extract from SIMORGH_DATA hydration JSON
|
||||
simorgh_data = self._search_json(
|
||||
r'window\s*\.\s*SIMORGH_DATA\s*=', webpage,
|
||||
'simorgh data', playlist_id, default={})
|
||||
if simorgh_data:
|
||||
done = False
|
||||
for video_data in traverse_obj(simorgh_data, (
|
||||
'pageData', 'content', 'model', 'blocks', is_type('video', 'legacyMedia'))):
|
||||
model = traverse_obj(video_data, (
|
||||
'model', 'blocks', is_type('aresMedia'),
|
||||
'model', 'blocks', is_type('aresMediaMetadata'),
|
||||
'model', {dict}, any))
|
||||
if video_data['type'] == 'video':
|
||||
entry = parse_model(model)
|
||||
else: # legacyMedia: no duration, subtitles
|
||||
block_id, entry = traverse_obj(model, ('blockId', {str})), None
|
||||
media_data = traverse_obj(simorgh_data, (
|
||||
'pageData', 'promo', 'media',
|
||||
{lambda x: x if x['id'] == block_id else None}))
|
||||
formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
|
||||
'url': ('url', {url_or_none}),
|
||||
'ext': ('format', {str}),
|
||||
'tbr': ('bitrate', {k_int_or_none}),
|
||||
}))
|
||||
if formats:
|
||||
entry = {
|
||||
'id': block_id,
|
||||
'display_id': playlist_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(simorgh_data, ('pageData', 'promo', {
|
||||
'description': ('summary', {str}),
|
||||
})),
|
||||
**traverse_obj(model, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
|
||||
'timestamp': ('firstPublished', {k_int_or_none}),
|
||||
}),
|
||||
}
|
||||
done = True
|
||||
if entry:
|
||||
entries.append(entry)
|
||||
if done:
|
||||
break
|
||||
if entries:
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
||||
def extract_all(pattern):
|
||||
return list(filter(None, map(
|
||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||
re.findall(pattern, webpage))))
|
||||
|
||||
def parse_model(model):
|
||||
'''Extract single video from model structure'''
|
||||
item_id = traverse_obj(model, ('versions', 0, 'versionId', {str}))
|
||||
if not item_id:
|
||||
return
|
||||
formats, subtitles = self._download_media_selector(item_id)
|
||||
return {
|
||||
'id': item_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(model, {
|
||||
'title': ('title', {str}),
|
||||
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||
'description': (
|
||||
'synopses', ('long', 'medium', 'short'), {str}, any),
|
||||
'duration': ('versions', 0, 'duration', {int}),
|
||||
'timestamp': ('versions', 0, 'availableFrom', {lambda x: int_or_none(x, scale=1000)}),
|
||||
})
|
||||
}
|
||||
|
||||
# US accessed article with single embedded video (e.g.
|
||||
# https://www.bbc.com/news/uk-68546268)
|
||||
next_data = traverse_obj(self._search_nextjs_data(webpage, playlist_id, default={}), (
|
||||
'props', 'pageProps', 'page'))
|
||||
next_data = traverse_obj(self._search_nextjs_data(webpage, playlist_id, default={}),
|
||||
('props', 'pageProps', 'page'))
|
||||
model = traverse_obj(next_data, (
|
||||
..., 'contents', lambda _, v: v['type'] == 'video',
|
||||
'model', 'blocks', lambda _, v: v['type'] == 'media',
|
||||
'model', 'blocks', lambda _, v: v['type'] == 'mediaMetadata',
|
||||
..., 'contents', is_type('video'),
|
||||
'model', 'blocks', is_type('media'),
|
||||
'model', 'blocks', is_type('mediaMetadata'),
|
||||
'model', {dict}, any))
|
||||
if model:
|
||||
entry = parse_model(model)
|
||||
if entry:
|
||||
if entry.get('timestamp') is None:
|
||||
if entry := parse_model(model):
|
||||
if not entry.get('timestamp'):
|
||||
entry['timestamp'] = traverse_obj(next_data, (
|
||||
..., 'contents', lambda _, v: v['type'] == 'timestamp',
|
||||
'model', 'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
|
||||
..., 'contents', is_type('timestamp'),
|
||||
'model', 'timestamp', {k_int_or_none}, any))
|
||||
entries.append(entry)
|
||||
return self.playlist_result(
|
||||
entries, playlist_id, playlist_title, playlist_description)
|
||||
|
|
|
@ -1738,12 +1738,16 @@ class InfoExtractor:
|
|||
traverse_json_ld(json_ld)
|
||||
return filter_dict(info)
|
||||
|
||||
def _search_nextjs_data(self, webpage, video_id, *, transform_source=None, fatal=True, **kw):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
|
||||
webpage, 'next.js data', fatal=fatal, **kw),
|
||||
video_id, transform_source=transform_source, fatal=fatal)
|
||||
def _search_nextjs_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT, **kw):
|
||||
if default == '{}':
|
||||
self._downloader.deprecation_warning('using `default=\'{}\'` is deprecated, use `default={}` instead')
|
||||
default = {}
|
||||
if default is not NO_DEFAULT:
|
||||
fatal = False
|
||||
|
||||
return self._search_json(
|
||||
r'<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data',
|
||||
video_id, end_pattern='</script>', fatal=fatal, default=default, **kw)
|
||||
|
||||
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
|
||||
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
|
||||
|
|
|
@ -24,11 +24,15 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||
_BASE_URL = 'https://www.crunchyroll.com'
|
||||
_API_BASE = 'https://api.crunchyroll.com'
|
||||
_NETRC_MACHINE = 'crunchyroll'
|
||||
_REFRESH_TOKEN = None
|
||||
_AUTH_HEADERS = None
|
||||
_AUTH_EXPIRY = None
|
||||
_API_ENDPOINT = None
|
||||
_BASIC_AUTH = None
|
||||
_BASIC_AUTH = 'Basic ' + base64.b64encode(':'.join((
|
||||
't-kdgp2h8c3jub8fn0fq',
|
||||
'yfLDfMfrYvKXh4JXS1LEI2cCqu1v5Wan',
|
||||
)).encode()).decode()
|
||||
_IS_PREMIUM = None
|
||||
_CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q')
|
||||
_LOCALE_LOOKUP = {
|
||||
'ar': 'ar-SA',
|
||||
'de': 'de-DE',
|
||||
|
@ -43,69 +47,74 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||
'hi': 'hi-IN',
|
||||
}
|
||||
|
||||
@property
|
||||
def is_logged_in(self):
|
||||
return bool(self._get_cookies(self._BASE_URL).get('etp_rt'))
|
||||
def _set_auth_info(self, response):
|
||||
CrunchyrollBaseIE._IS_PREMIUM = 'cr_premium' in traverse_obj(response, ('access_token', {jwt_decode_hs256}, 'benefits', ...))
|
||||
CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': response['token_type'] + ' ' + response['access_token']}
|
||||
CrunchyrollBaseIE._AUTH_EXPIRY = time_seconds(seconds=traverse_obj(response, ('expires_in', {float_or_none}), default=300) - 10)
|
||||
|
||||
def _request_token(self, headers, data, note='Requesting token', errnote='Failed to request token'):
|
||||
try: # TODO: Add impersonation support here
|
||||
return self._download_json(
|
||||
f'{self._BASE_URL}/auth/v1/token', None, note=note, errnote=errnote,
|
||||
headers=headers, data=urlencode_postdata(data))
|
||||
except ExtractorError as error:
|
||||
if not isinstance(error.cause, HTTPError) or error.cause.status != 403:
|
||||
raise
|
||||
raise ExtractorError(
|
||||
'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
|
||||
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
|
||||
'and your browser\'s User-Agent (with --user-agent)', expected=True)
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self.is_logged_in:
|
||||
if not CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
CrunchyrollBaseIE._REFRESH_TOKEN = self.cache.load(self._NETRC_MACHINE, username)
|
||||
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
return
|
||||
|
||||
upsell_response = self._download_json(
|
||||
f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
|
||||
query={
|
||||
'sess_id': 1,
|
||||
'device_id': 'whatvalueshouldbeforweb',
|
||||
'device_type': 'com.crunchyroll.static',
|
||||
'access_token': 'giKq5eY27ny3cqz',
|
||||
'referer': f'{self._BASE_URL}/welcome/login'
|
||||
})
|
||||
if upsell_response['code'] != 'ok':
|
||||
raise ExtractorError('Could not get session id')
|
||||
session_id = upsell_response['data']['session_id']
|
||||
|
||||
login_response = self._download_json(
|
||||
f'{self._API_BASE}/login.1.json', None, 'Logging in',
|
||||
data=urlencode_postdata({
|
||||
'account': username,
|
||||
'password': password,
|
||||
'session_id': session_id
|
||||
}))
|
||||
if login_response['code'] != 'ok':
|
||||
raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
|
||||
if not self.is_logged_in:
|
||||
raise ExtractorError('Login succeeded but did not set etp_rt cookie')
|
||||
|
||||
def _update_auth(self):
|
||||
if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
|
||||
return
|
||||
|
||||
if not CrunchyrollBaseIE._BASIC_AUTH:
|
||||
cx_api_param = self._CLIENT_ID[self.is_logged_in]
|
||||
self.write_debug(f'Using cxApiParam={cx_api_param}')
|
||||
CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
|
||||
|
||||
auth_headers = {'Authorization': CrunchyrollBaseIE._BASIC_AUTH}
|
||||
if self.is_logged_in:
|
||||
grant_type = 'etp_rt_cookie'
|
||||
else:
|
||||
grant_type = 'client_id'
|
||||
auth_headers['ETP-Anonymous-ID'] = uuid.uuid4()
|
||||
try:
|
||||
auth_response = self._download_json(
|
||||
f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
|
||||
headers=auth_headers, data=f'grant_type={grant_type}'.encode())
|
||||
login_response = self._request_token(
|
||||
headers={'Authorization': self._BASIC_AUTH}, data={
|
||||
'username': username,
|
||||
'password': password,
|
||||
'grant_type': 'password',
|
||||
'scope': 'offline_access',
|
||||
}, note='Logging in', errnote='Failed to log in')
|
||||
except ExtractorError as error:
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 403:
|
||||
raise ExtractorError(
|
||||
'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
|
||||
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
|
||||
'and your browser\'s User-Agent (with --user-agent)', expected=True)
|
||||
if isinstance(error.cause, HTTPError) and error.cause.status == 401:
|
||||
raise ExtractorError('Invalid username and/or password', expected=True)
|
||||
raise
|
||||
|
||||
CrunchyrollBaseIE._IS_PREMIUM = 'cr_premium' in traverse_obj(auth_response, ('access_token', {jwt_decode_hs256}, 'benefits', ...))
|
||||
CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
|
||||
CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
|
||||
CrunchyrollBaseIE._REFRESH_TOKEN = login_response['refresh_token']
|
||||
self.cache.store(self._NETRC_MACHINE, username, CrunchyrollBaseIE._REFRESH_TOKEN)
|
||||
self._set_auth_info(login_response)
|
||||
|
||||
def _update_auth(self):
|
||||
if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_EXPIRY > time_seconds():
|
||||
return
|
||||
|
||||
auth_headers = {'Authorization': self._BASIC_AUTH}
|
||||
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
data = {
|
||||
'refresh_token': CrunchyrollBaseIE._REFRESH_TOKEN,
|
||||
'grant_type': 'refresh_token',
|
||||
'scope': 'offline_access',
|
||||
}
|
||||
else:
|
||||
data = {'grant_type': 'client_id'}
|
||||
auth_headers['ETP-Anonymous-ID'] = uuid.uuid4()
|
||||
try:
|
||||
auth_response = self._request_token(auth_headers, data)
|
||||
except ExtractorError as error:
|
||||
username, password = self._get_login_info()
|
||||
if not username or not isinstance(error.cause, HTTPError) or error.cause.status != 400:
|
||||
raise
|
||||
self.to_screen('Refresh token has expired. Re-logging in')
|
||||
CrunchyrollBaseIE._REFRESH_TOKEN = None
|
||||
self.cache.store(self._NETRC_MACHINE, username, None)
|
||||
self._perform_login(username, password)
|
||||
return
|
||||
|
||||
self._set_auth_info(auth_response)
|
||||
|
||||
def _locale_from_language(self, language):
|
||||
config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
|
||||
|
@ -168,7 +177,8 @@ class CrunchyrollBaseIE(InfoExtractor):
|
|||
self._update_auth()
|
||||
stream_response = self._download_json(
|
||||
f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play',
|
||||
display_id, note='Downloading stream info', headers=CrunchyrollBaseIE._AUTH_HEADERS)
|
||||
display_id, note='Downloading stream info', errnote='Failed to download stream info',
|
||||
headers=CrunchyrollBaseIE._AUTH_HEADERS)
|
||||
|
||||
available_formats = {'': ('', '', stream_response['url'])}
|
||||
for hardsub_lang, stream in traverse_obj(stream_response, ('hardSubs', {dict.items}, lambda _, v: v[1]['url'])):
|
||||
|
@ -383,9 +393,9 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
|||
|
||||
if not self._IS_PREMIUM and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
|
||||
message = f'This {object_type} is for premium members only'
|
||||
if self.is_logged_in:
|
||||
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
raise ExtractorError(message, expected=True)
|
||||
self.raise_login_required(message)
|
||||
self.raise_login_required(message, method='password')
|
||||
|
||||
result['formats'], result['subtitles'] = self._extract_stream(internal_id)
|
||||
|
||||
|
@ -575,9 +585,9 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
|||
|
||||
if not self._IS_PREMIUM and response.get('isPremiumOnly'):
|
||||
message = f'This {response.get("type") or "media"} is for premium members only'
|
||||
if self.is_logged_in:
|
||||
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||
raise ExtractorError(message, expected=True)
|
||||
self.raise_login_required(message)
|
||||
self.raise_login_required(message, method='password')
|
||||
|
||||
result = self._transform_music_response(response)
|
||||
result['formats'], _ = self._extract_stream(f'music/{internal_id}', internal_id)
|
||||
|
|
|
@ -560,7 +560,7 @@ class FacebookIE(InfoExtractor):
|
|||
js_data, lambda x: x['jsmods']['instances'], list) or [])
|
||||
|
||||
def extract_dash_manifest(video, formats):
|
||||
dash_manifest = video.get('dash_manifest')
|
||||
dash_manifest = traverse_obj(video, 'dash_manifest', 'playlist', expected_type=str)
|
||||
if dash_manifest:
|
||||
formats.extend(self._parse_mpd_formats(
|
||||
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import itertools
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
KNOWN_EXTENSIONS,
|
||||
|
@ -14,7 +14,6 @@ from ..utils import (
|
|||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
@ -199,6 +198,27 @@ class PatreonIE(PatreonBaseIE):
|
|||
'channel_id': '2147162',
|
||||
'uploader_url': 'https://www.patreon.com/yaboyroshi',
|
||||
},
|
||||
}, {
|
||||
# NSFW vimeo embed URL
|
||||
'url': 'https://www.patreon.com/posts/4k-spiderman-4k-96414599',
|
||||
'info_dict': {
|
||||
'id': '902250943',
|
||||
'ext': 'mp4',
|
||||
'title': '❤️(4K) Spiderman Girl Yeonhwa’s Gift ❤️(4K) 스파이더맨걸 연화의 선물',
|
||||
'description': '❤️(4K) Spiderman Girl Yeonhwa’s Gift \n❤️(4K) 스파이더맨걸 연화의 선물',
|
||||
'uploader': 'Npickyeonhwa',
|
||||
'uploader_id': '90574422',
|
||||
'uploader_url': 'https://www.patreon.com/Yeonhwa726',
|
||||
'channel_id': '10237902',
|
||||
'channel_url': 'https://www.patreon.com/Yeonhwa726',
|
||||
'duration': 70,
|
||||
'timestamp': 1705150153,
|
||||
'upload_date': '20240113',
|
||||
'comment_count': int,
|
||||
'like_count': int,
|
||||
'thumbnail': r're:^https?://.+',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -268,16 +288,19 @@ class PatreonIE(PatreonBaseIE):
|
|||
})
|
||||
|
||||
# handle Vimeo embeds
|
||||
if try_get(attributes, lambda x: x['embed']['provider']) == 'Vimeo':
|
||||
embed_html = try_get(attributes, lambda x: x['embed']['html'])
|
||||
v_url = url_or_none(compat_urllib_parse_unquote(
|
||||
self._search_regex(r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', embed_html, 'vimeo url', fatal=False)))
|
||||
if v_url:
|
||||
v_url = VimeoIE._smuggle_referrer(v_url, 'https://patreon.com')
|
||||
if self._request_webpage(v_url, video_id, 'Checking Vimeo embed URL', fatal=False, errnote=False):
|
||||
return self.url_result(v_url, VimeoIE, url_transparent=True, **info)
|
||||
if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
|
||||
v_url = urllib.parse.unquote(self._html_search_regex(
|
||||
r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
|
||||
traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
|
||||
if url_or_none(v_url) and self._request_webpage(
|
||||
v_url, video_id, 'Checking Vimeo embed URL',
|
||||
headers={'Referer': 'https://patreon.com/'},
|
||||
fatal=False, errnote=False):
|
||||
return self.url_result(
|
||||
VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
|
||||
VimeoIE, url_transparent=True, **info)
|
||||
|
||||
embed_url = try_get(attributes, lambda x: x['embed']['url'])
|
||||
embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
|
||||
if embed_url and self._request_webpage(embed_url, video_id, 'Checking embed URL', fatal=False, errnote=False):
|
||||
return self.url_result(embed_url, **info)
|
||||
|
||||
|
|
|
@ -174,7 +174,7 @@ class TheaterComplexTownBaseIE(StacommuBaseIE):
|
|||
|
||||
|
||||
class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?videos/episodes/(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:(?:en|ja)/)?videos/episodes/(?P<id>\w+)'
|
||||
IE_NAME = 'theatercomplextown:vod'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.theater-complex.town/videos/episodes/hoxqidYNoAn7bP92DN6p78',
|
||||
|
@ -195,6 +195,9 @@ class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
|
|||
}, {
|
||||
'url': 'https://www.theater-complex.town/en/videos/episodes/6QT7XYwM9dJz5Gf9VB6K5y',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.theater-complex.town/ja/videos/episodes/hoxqidYNoAn7bP92DN6p78',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_API_PATH = 'videoEpisodes'
|
||||
|
@ -204,7 +207,7 @@ class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
|
|||
|
||||
|
||||
class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?ppv/(?P<id>\w+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:(?:en|ja)/)?ppv/(?P<id>\w+)'
|
||||
IE_NAME = 'theatercomplextown:ppv'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.theater-complex.town/ppv/wytW3X7khrjJBUpKuV3jen',
|
||||
|
@ -223,6 +226,9 @@ class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
|
|||
}, {
|
||||
'url': 'https://www.theater-complex.town/en/ppv/wytW3X7khrjJBUpKuV3jen',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.theater-complex.town/ja/ppv/qwUVmLmGEiZ3ZW6it9uGys',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_API_PATH = 'events'
|
||||
|
|
|
@ -41,7 +41,7 @@ class STVPlayerIE(InfoExtractor):
|
|||
ptype, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
webpage = self._download_webpage(url, video_id, fatal=False) or ''
|
||||
props = self._search_nextjs_data(webpage, video_id, default='{}').get('props') or {}
|
||||
props = self._search_nextjs_data(webpage, video_id, default={}).get('props') or {}
|
||||
player_api_cache = try_get(
|
||||
props, lambda x: x['initialReduxState']['playerApiCache']) or {}
|
||||
|
||||
|
|
|
@ -776,7 +776,7 @@ class TikTokIE(TikTokBaseIE):
|
|||
status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0
|
||||
video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict}))
|
||||
|
||||
elif next_data := self._search_nextjs_data(webpage, video_id, default='{}'):
|
||||
elif next_data := self._search_nextjs_data(webpage, video_id, default={}):
|
||||
self.write_debug('Found next.js data')
|
||||
status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0
|
||||
video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict}))
|
||||
|
|
|
@ -147,7 +147,7 @@ class WrestleUniverseBaseIE(InfoExtractor):
|
|||
metadata = self._call_api(video_id, msg='metadata', query={'al': lang or 'ja'}, auth=False, fatal=False)
|
||||
if not metadata:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
nextjs_data = self._search_nextjs_data(webpage, video_id)
|
||||
nextjs_data = self._search_nextjs_data(webpage, video_id, fatal=False)
|
||||
metadata = traverse_obj(nextjs_data, (
|
||||
'props', 'pageProps', *variadic(props_keys, (str, bytes, dict, set)), {dict})) or {}
|
||||
return metadata
|
||||
|
|
|
@ -1638,16 +1638,14 @@ def get_filesystem_encoding():
|
|||
return encoding if encoding is not None else 'utf-8'
|
||||
|
||||
|
||||
_WINDOWS_QUOTE_TRANS = str.maketrans({'"': '\\"', '\\': '\\\\'})
|
||||
_WINDOWS_QUOTE_TRANS = str.maketrans({'"': R'\"'})
|
||||
_CMD_QUOTE_TRANS = str.maketrans({
|
||||
# Keep quotes balanced by replacing them with `""` instead of `\\"`
|
||||
'"': '""',
|
||||
# Requires a variable `=` containing `"^\n\n"` (set in `utils.Popen`)
|
||||
# These require an env-variable `=` containing `"^\n\n"` (set in `utils.Popen`)
|
||||
# `=` should be unique since variables containing `=` cannot be set using cmd
|
||||
'\n': '%=%',
|
||||
# While we are only required to escape backslashes immediately before quotes,
|
||||
# we instead escape all of 'em anyways to be consistent
|
||||
'\\': '\\\\',
|
||||
'\r': '%=%',
|
||||
# Use zero length variable replacement so `%` doesn't get expanded
|
||||
# `cd` is always set as long as extensions are enabled (`/E:ON` in `utils.Popen`)
|
||||
'%': '%%cd:~,%',
|
||||
|
@ -1656,19 +1654,14 @@ _CMD_QUOTE_TRANS = str.maketrans({
|
|||
|
||||
def shell_quote(args, *, shell=False):
|
||||
args = list(variadic(args))
|
||||
if any(isinstance(item, bytes) for item in args):
|
||||
deprecation_warning('Passing bytes to utils.shell_quote is deprecated')
|
||||
encoding = get_filesystem_encoding()
|
||||
for index, item in enumerate(args):
|
||||
if isinstance(item, bytes):
|
||||
args[index] = item.decode(encoding)
|
||||
|
||||
if compat_os_name != 'nt':
|
||||
return shlex.join(args)
|
||||
|
||||
trans = _CMD_QUOTE_TRANS if shell else _WINDOWS_QUOTE_TRANS
|
||||
return ' '.join(
|
||||
s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII) else s.translate(trans).join('""')
|
||||
s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII)
|
||||
else re.sub(r'(\\+)("|$)', r'\1\1\2', s).translate(trans).join('""')
|
||||
for s in args)
|
||||
|
||||
|
||||
|
|
Ładowanie…
Reference in New Issue