Porównaj commity

...

9 Commity

Autor SHA1 Wiadomość Data
bashonly 6c7bfb5c7c
oops
Authored by: bashonly
2024-04-23 18:28:43 -05:00
bashonly 805f40a001
add tests
Authored by: bashonly
2024-04-23 18:22:58 -05:00
bashonly b77ccbc67b
new impl
Authored by: bashonly
2024-04-23 18:04:12 -05:00
bashonly d5d83395ab
revert to orig impl
Authored by: bashonly
2024-04-23 14:33:48 -05:00
bashonly f2f108b754
fix test
Authored by: bashonly
2024-04-22 23:56:28 -05:00
bashonly d66fa6a118
Merge branch 'yt-dlp:master' into feat/impersonate-response 2024-04-22 17:46:43 -05:00
bashonly 3b48105b8c
slight impl change
Authored by: bashonly
2024-04-22 17:41:20 -05:00
bashonly 89f535e265
[ci] Fix `curl-cffi` installation (Bugfix for 02483bea1c)
Authored by: bashonly
2024-04-22 20:36:01 +00:00
bashonly ff38a011d5
[ie/crunchyroll] Fix auth and remove cookies support (#9749)
Closes #9745
Authored by: bashonly
2024-04-21 22:41:40 +00:00
6 zmienionych plików z 117 dodań i 90 usunięć

Wyświetl plik

@ -53,7 +53,7 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
- name: Install test requirements
run: python3 ./devscripts/install_deps.py --include dev --include curl_cffi
run: python3 ./devscripts/install_deps.py --include dev --include curl-cffi
- name: Run tests
continue-on-error: False
run: |

Wyświetl plik

@ -785,6 +785,25 @@ class TestHTTPImpersonateRequestHandler(TestRequestHandlerBase):
assert res.status == 200
assert std_headers['user-agent'].lower() not in res.read().decode().lower()
def test_response_extensions(self, handler):
with handler() as rh:
for target in rh.supported_targets:
request = Request(
f'http://127.0.0.1:{self.http_port}/gen_200', extensions={'impersonate': target})
res = validate_and_send(rh, request)
assert res.extensions['impersonate'] == rh._get_request_target(request)
def test_http_error_response_extensions(self, handler):
with handler() as rh:
for target in rh.supported_targets:
request = Request(
f'http://127.0.0.1:{self.http_port}/gen_404', extensions={'impersonate': target})
try:
validate_and_send(rh, request)
except HTTPError as e:
res = e.response
assert res.extensions['impersonate'] == rh._get_request_target(request)
class TestRequestHandlerMisc:
"""Misc generic tests for request handlers, not related to request or validation testing"""

Wyświetl plik

@ -24,11 +24,15 @@ class CrunchyrollBaseIE(InfoExtractor):
_BASE_URL = 'https://www.crunchyroll.com'
_API_BASE = 'https://api.crunchyroll.com'
_NETRC_MACHINE = 'crunchyroll'
_REFRESH_TOKEN = None
_AUTH_HEADERS = None
_AUTH_EXPIRY = None
_API_ENDPOINT = None
_BASIC_AUTH = None
_BASIC_AUTH = 'Basic ' + base64.b64encode(':'.join((
't-kdgp2h8c3jub8fn0fq',
'yfLDfMfrYvKXh4JXS1LEI2cCqu1v5Wan',
)).encode()).decode()
_IS_PREMIUM = None
_CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q')
_LOCALE_LOOKUP = {
'ar': 'ar-SA',
'de': 'de-DE',
@ -43,69 +47,74 @@ class CrunchyrollBaseIE(InfoExtractor):
'hi': 'hi-IN',
}
@property
def is_logged_in(self):
return bool(self._get_cookies(self._BASE_URL).get('etp_rt'))
def _set_auth_info(self, response):
CrunchyrollBaseIE._IS_PREMIUM = 'cr_premium' in traverse_obj(response, ('access_token', {jwt_decode_hs256}, 'benefits', ...))
CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': response['token_type'] + ' ' + response['access_token']}
CrunchyrollBaseIE._AUTH_EXPIRY = time_seconds(seconds=traverse_obj(response, ('expires_in', {float_or_none}), default=300) - 10)
def _request_token(self, headers, data, note='Requesting token', errnote='Failed to request token'):
try: # TODO: Add impersonation support here
return self._download_json(
f'{self._BASE_URL}/auth/v1/token', None, note=note, errnote=errnote,
headers=headers, data=urlencode_postdata(data))
except ExtractorError as error:
if not isinstance(error.cause, HTTPError) or error.cause.status != 403:
raise
raise ExtractorError(
'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
'and your browser\'s User-Agent (with --user-agent)', expected=True)
def _perform_login(self, username, password):
if self.is_logged_in:
if not CrunchyrollBaseIE._REFRESH_TOKEN:
CrunchyrollBaseIE._REFRESH_TOKEN = self.cache.load(self._NETRC_MACHINE, username)
if CrunchyrollBaseIE._REFRESH_TOKEN:
return
upsell_response = self._download_json(
f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
query={
'sess_id': 1,
'device_id': 'whatvalueshouldbeforweb',
'device_type': 'com.crunchyroll.static',
'access_token': 'giKq5eY27ny3cqz',
'referer': f'{self._BASE_URL}/welcome/login'
})
if upsell_response['code'] != 'ok':
raise ExtractorError('Could not get session id')
session_id = upsell_response['data']['session_id']
login_response = self._download_json(
f'{self._API_BASE}/login.1.json', None, 'Logging in',
data=urlencode_postdata({
'account': username,
'password': password,
'session_id': session_id
}))
if login_response['code'] != 'ok':
raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
if not self.is_logged_in:
raise ExtractorError('Login succeeded but did not set etp_rt cookie')
def _update_auth(self):
if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
return
if not CrunchyrollBaseIE._BASIC_AUTH:
cx_api_param = self._CLIENT_ID[self.is_logged_in]
self.write_debug(f'Using cxApiParam={cx_api_param}')
CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
auth_headers = {'Authorization': CrunchyrollBaseIE._BASIC_AUTH}
if self.is_logged_in:
grant_type = 'etp_rt_cookie'
else:
grant_type = 'client_id'
auth_headers['ETP-Anonymous-ID'] = uuid.uuid4()
try:
auth_response = self._download_json(
f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
headers=auth_headers, data=f'grant_type={grant_type}'.encode())
login_response = self._request_token(
headers={'Authorization': self._BASIC_AUTH}, data={
'username': username,
'password': password,
'grant_type': 'password',
'scope': 'offline_access',
}, note='Logging in', errnote='Failed to log in')
except ExtractorError as error:
if isinstance(error.cause, HTTPError) and error.cause.status == 403:
raise ExtractorError(
'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
'and your browser\'s User-Agent (with --user-agent)', expected=True)
if isinstance(error.cause, HTTPError) and error.cause.status == 401:
raise ExtractorError('Invalid username and/or password', expected=True)
raise
CrunchyrollBaseIE._IS_PREMIUM = 'cr_premium' in traverse_obj(auth_response, ('access_token', {jwt_decode_hs256}, 'benefits', ...))
CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
CrunchyrollBaseIE._REFRESH_TOKEN = login_response['refresh_token']
self.cache.store(self._NETRC_MACHINE, username, CrunchyrollBaseIE._REFRESH_TOKEN)
self._set_auth_info(login_response)
def _update_auth(self):
if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_EXPIRY > time_seconds():
return
auth_headers = {'Authorization': self._BASIC_AUTH}
if CrunchyrollBaseIE._REFRESH_TOKEN:
data = {
'refresh_token': CrunchyrollBaseIE._REFRESH_TOKEN,
'grant_type': 'refresh_token',
'scope': 'offline_access',
}
else:
data = {'grant_type': 'client_id'}
auth_headers['ETP-Anonymous-ID'] = uuid.uuid4()
try:
auth_response = self._request_token(auth_headers, data)
except ExtractorError as error:
username, password = self._get_login_info()
if not username or not isinstance(error.cause, HTTPError) or error.cause.status != 400:
raise
self.to_screen('Refresh token has expired. Re-logging in')
CrunchyrollBaseIE._REFRESH_TOKEN = None
self.cache.store(self._NETRC_MACHINE, username, None)
self._perform_login(username, password)
return
self._set_auth_info(auth_response)
def _locale_from_language(self, language):
config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
@ -168,7 +177,8 @@ class CrunchyrollBaseIE(InfoExtractor):
self._update_auth()
stream_response = self._download_json(
f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play',
display_id, note='Downloading stream info', headers=CrunchyrollBaseIE._AUTH_HEADERS)
display_id, note='Downloading stream info', errnote='Failed to download stream info',
headers=CrunchyrollBaseIE._AUTH_HEADERS)
available_formats = {'': ('', '', stream_response['url'])}
for hardsub_lang, stream in traverse_obj(stream_response, ('hardSubs', {dict.items}, lambda _, v: v[1]['url'])):
@ -383,9 +393,9 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
if not self._IS_PREMIUM and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
message = f'This {object_type} is for premium members only'
if self.is_logged_in:
if CrunchyrollBaseIE._REFRESH_TOKEN:
raise ExtractorError(message, expected=True)
self.raise_login_required(message)
self.raise_login_required(message, method='password')
result['formats'], result['subtitles'] = self._extract_stream(internal_id)
@ -575,9 +585,9 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
if not self._IS_PREMIUM and response.get('isPremiumOnly'):
message = f'This {response.get("type") or "media"} is for premium members only'
if self.is_logged_in:
if CrunchyrollBaseIE._REFRESH_TOKEN:
raise ExtractorError(message, expected=True)
self.raise_login_required(message)
self.raise_login_required(message, method='password')
result = self._transform_music_response(response)
result['formats'], _ = self._extract_stream(f'music/{internal_id}', internal_id)

Wyświetl plik

@ -5,7 +5,13 @@ import math
import urllib.parse
from ._helper import InstanceStoreMixin, select_proxy
from .common import Features, Request, register_preference, register_rh
from .common import (
Features,
Request,
Response,
register_preference,
register_rh,
)
from .exceptions import (
CertificateVerifyError,
HTTPError,
@ -14,11 +20,7 @@ from .exceptions import (
SSLError,
TransportError,
)
from .impersonate import (
ImpersonateRequestHandler,
ImpersonateResponse,
ImpersonateTarget,
)
from .impersonate import ImpersonateRequestHandler, ImpersonateTarget
from ..dependencies import curl_cffi
from ..utils import int_or_none
@ -78,16 +80,15 @@ class CurlCFFIResponseReader(io.IOBase):
super().close()
class CurlCFFIResponseAdapter(ImpersonateResponse):
class CurlCFFIResponseAdapter(Response):
fp: CurlCFFIResponseReader
def __init__(self, response: curl_cffi.requests.Response, impersonate: ImpersonateTarget):
def __init__(self, response: curl_cffi.requests.Response):
super().__init__(
fp=CurlCFFIResponseReader(response),
headers=response.headers,
url=response.url,
status=response.status_code,
impersonate=impersonate)
status=response.status_code)
def read(self, amt=None):
try:
@ -131,6 +132,15 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
extensions.pop('cookiejar', None)
extensions.pop('timeout', None)
def send(self, request: Request) -> Response:
try:
response = super().send(request)
except HTTPError as e:
e.response.extensions['impersonate'] = self._get_request_target(request)
raise
response.extensions['impersonate'] = self._get_request_target(request)
return response
def _send(self, request: Request):
max_redirects_exceeded = False
session: curl_cffi.requests.Session = self._get_instance(
@ -177,8 +187,6 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
session.curl.setopt(CurlOpt.LOW_SPEED_LIMIT, 1) # 1 byte per second
session.curl.setopt(CurlOpt.LOW_SPEED_TIME, math.ceil(timeout))
impersonate_target = self._get_request_target(request)
try:
curl_response = session.request(
method=request.method,
@ -188,7 +196,8 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
verify=self.verify,
max_redirects=5,
timeout=timeout,
impersonate=self._SUPPORTED_IMPERSONATE_TARGET_MAP.get(impersonate_target),
impersonate=self._SUPPORTED_IMPERSONATE_TARGET_MAP.get(
self._get_request_target(request)),
interface=self.source_address,
stream=True
)
@ -208,7 +217,7 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
else:
raise TransportError(cause=e) from e
response = CurlCFFIResponseAdapter(curl_response, impersonate_target)
response = CurlCFFIResponseAdapter(curl_response)
if not 200 <= response.status < 300:
raise HTTPError(response, redirect_loop=max_redirects_exceeded)

Wyświetl plik

@ -517,6 +517,7 @@ class Response(io.IOBase):
self.reason = reason or HTTPStatus(status).phrase
except ValueError:
self.reason = None
self.extensions = {}
def readable(self):
return self.fp.readable()

Wyświetl plik

@ -5,7 +5,7 @@ from abc import ABC
from dataclasses import dataclass
from typing import Any
from .common import RequestHandler, Response, register_preference
from .common import RequestHandler, register_preference
from .exceptions import UnsupportedRequest
from ..compat.types import NoneType
from ..utils import classproperty, join_nonempty
@ -58,18 +58,6 @@ class ImpersonateTarget:
return cls(**mobj.groupdict())
class ImpersonateResponse(Response):
"""
Wrapper class for a Response to an impersonated Request.
Parameters:
@param impersonate: the ImpersonateTarget used in the originating Request.
"""
def __init__(self, *args, impersonate: ImpersonateTarget, **kwargs):
super().__init__(*args, **kwargs)
self.impersonate = impersonate
class ImpersonateRequestHandler(RequestHandler, ABC):
"""
Base class for request handlers that support browser impersonation.