From b44e0f8b984d250a09bdc11f626f83465b04e770 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Fri, 29 Mar 2024 15:56:57 +1300 Subject: [PATCH 1/8] [test] Add http proxy tests --- test/conftest.py | 44 ++++- test/parameters.json | 1 + test/test_http_proxy.py | 377 ++++++++++++++++++++++++++++++++++++++++ test/test_networking.py | 106 ++++------- test/test_websockets.py | 26 +++ 5 files changed, 483 insertions(+), 71 deletions(-) create mode 100644 test/test_http_proxy.py diff --git a/test/conftest.py b/test/conftest.py index 2fbc269e1..859fc6ad3 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,4 +1,3 @@ -import functools import inspect import pytest @@ -10,7 +9,9 @@ from yt_dlp.utils._utils import _YDLLogger as FakeLogger @pytest.fixture def handler(request): - RH_KEY = request.param + RH_KEY = getattr(request, 'param', None) + if not RH_KEY: + return if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler): handler = RH_KEY elif RH_KEY in _REQUEST_HANDLERS: @@ -18,9 +19,46 @@ def handler(request): else: pytest.skip(f'{RH_KEY} request handler is not available') - return functools.partial(handler, logger=FakeLogger) + class HandlerWrapper(handler): + RH_KEY = handler.RH_KEY + def __init__(self, *args, **kwargs): + super().__init__(logger=FakeLogger, *args, **kwargs) + + return HandlerWrapper + + +@pytest.fixture(autouse=True) +def skip_handler(request, handler): + for marker in request.node.iter_markers('skip_handler'): + if marker.args[0] == handler.RH_KEY: + pytest.skip(marker.args[1] if len(marker.args) > 1 else '') + + +@pytest.fixture(autouse=True) +def skip_handler_if(request, handler): + for marker in request.node.iter_markers('skip_handler_if'): + if marker.args[0] == handler.RH_KEY and marker.args[1](request): + pytest.skip(marker.args[2] if len(marker.args) > 2 else '') + +@pytest.fixture(autouse=True) +def skip_handlers_if(request, handler): + for marker in request.node.iter_markers('skip_handlers_if'): + if handler and marker.args[0](request, handler): + pytest.skip(marker.args[1] if len(marker.args) > 1 else '') def validate_and_send(rh, req): rh.validate(req) return rh.send(req) + + +def pytest_configure(config): + config.addinivalue_line( + "markers", "skip_handler(handler): skip test for the given handler", + ) + config.addinivalue_line( + "markers", "skip_handler_if(handler): skip test for the given handler if condition is true" + ) + config.addinivalue_line( + "markers", "skip_handlers_if(handler): skip test for handlers when the condition is true" + ) diff --git a/test/parameters.json b/test/parameters.json index 8789ce14b..cb7a7439a 100644 --- a/test/parameters.json +++ b/test/parameters.json @@ -1,4 +1,5 @@ { + "debug_printtraffic": true, "check_formats": false, "consoletitle": false, "continuedl": true, diff --git a/test/test_http_proxy.py b/test/test_http_proxy.py new file mode 100644 index 000000000..69268b9c7 --- /dev/null +++ b/test/test_http_proxy.py @@ -0,0 +1,377 @@ +import abc +import base64 +import contextlib +import functools +import json +import os +import random +import ssl +import threading +from http.server import BaseHTTPRequestHandler +from socketserver import BaseRequestHandler, ThreadingTCPServer + +import pytest + +from test.helper import http_server_port, verify_address_availability +from test.test_networking import TEST_DIR +from test.test_socks import IPv6ThreadingTCPServer +from yt_dlp.dependencies import urllib3 +from yt_dlp.networking import Request +from yt_dlp.networking.exceptions import ProxyError, HTTPError, SSLError + + +class HTTPProxyAuthMixin: + + def proxy_auth_error(self): + self.send_response(407) + self.send_header('Proxy-Authenticate', 'Basic realm="test http proxy"') + self.end_headers() + return False + + def do_proxy_auth(self, username, password): + if username is None and password is None: + return True + + proxy_auth_header = self.headers.get('Proxy-Authorization', None) + if proxy_auth_header is None: + return self.proxy_auth_error() + + if not proxy_auth_header.startswith('Basic '): + return self.proxy_auth_error() + + auth = proxy_auth_header[6:] + + try: + auth_username, auth_password = base64.b64decode(auth).decode().split(':', 1) + except Exception: + return self.proxy_auth_error() + + if auth_username != (username or '') or auth_password != (password or ''): + return self.proxy_auth_error() + return True + + +class HTTPProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin): + def __init__(self, *args, proxy_info=None, username=None, password=None, request_handler=None, **kwargs): + self.username = username + self.password = password + self.proxy_info = proxy_info + super().__init__(*args, **kwargs) + + def do_GET(self): + self.do_proxy_auth(self.username, self.password) + if self.path.endswith('/proxy_info'): + payload = json.dumps(self.proxy_info or { + 'client_address': self.client_address, + 'connect': False, + 'connect_host': None, + 'connect_port': None, + 'headers': dict(self.headers), + 'path': self.path, + 'proxy': ':'.join(str(y) for y in self.connection.getsockname()), + }) + self.send_response(200) + self.send_header('Content-Type', 'application/json; charset=utf-8') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload.encode()) + + +if urllib3: + import urllib3.util.ssltransport + + class SSLTransport(urllib3.util.ssltransport.SSLTransport): + """ + Modified version of urllib3 SSLTransport to support server side SSL + + This allows us to chain multiple TLS connections. + """ + def __init__(self, socket, ssl_context, server_hostname=None, suppress_ragged_eofs=True, server_side=False): + self.incoming = ssl.MemoryBIO() + self.outgoing = ssl.MemoryBIO() + + self.suppress_ragged_eofs = suppress_ragged_eofs + self.socket = socket + + self.sslobj = ssl_context.wrap_bio( + self.incoming, + self.outgoing, + server_hostname=server_hostname, + server_side=server_side + ) + self._ssl_io_loop(self.sslobj.do_handshake) + + @property + def _io_refs(self): + return self.socket._io_refs + + @_io_refs.setter + def _io_refs(self, value): + self.socket._io_refs = value +else: + SSLTransport = None + + +class HTTPSProxyHandler(HTTPProxyHandler): + def __init__(self, request, *args, **kwargs): + certfn = os.path.join(TEST_DIR, 'testcert.pem') + sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + sslctx.load_cert_chain(certfn, None) + if isinstance(request, ssl.SSLSocket): + request = SSLTransport(request, ssl_context=sslctx, server_side=True) + else: + request = sslctx.wrap_socket(request, server_side=True) + super().__init__(request, *args, **kwargs) + + +class WebsocketsProxyHandler(BaseRequestHandler): + def __init__(self, *args, proxy_info=None, **kwargs): + self.proxy_info = proxy_info + super().__init__(*args, **kwargs) + + def handle(self): + import websockets.sync.server + protocol = websockets.ServerProtocol() + connection = websockets.sync.server.ServerConnection(socket=self.request, protocol=protocol, + close_timeout=0) + connection.handshake() + connection.send(json.dumps(self.proxy_info)) + connection.close() + + +class HTTPConnectProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin): + protocol_version = 'HTTP/1.1' + default_request_version = 'HTTP/1.1' + + def __init__(self, *args, username=None, password=None, request_handler=None, **kwargs): + self.username = username + self.password = password + self.request_handler = request_handler + super().__init__(*args, **kwargs) + + def do_CONNECT(self): + self.do_proxy_auth(self.username, self.password) + self.send_response(200) + self.end_headers() + proxy_info = { + 'client_address': self.client_address, + 'connect': True, + 'connect_host': self.path.split(':')[0], + 'connect_port': int(self.path.split(':')[1]), + 'headers': dict(self.headers), + 'path': self.path, + 'proxy': ':'.join(str(y) for y in self.connection.getsockname()), + } + self.request_handler(self.request, self.client_address, self.server, proxy_info=proxy_info) + + +class HTTPSConnectProxyHandler(HTTPConnectProxyHandler): + def __init__(self, request, *args, **kwargs): + certfn = os.path.join(TEST_DIR, 'testcert.pem') + sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + sslctx.load_cert_chain(certfn, None) + request = sslctx.wrap_socket(request, server_side=True) + super().__init__(request, *args, **kwargs) + + +@contextlib.contextmanager +def proxy_server(proxy_server_class, request_handler, bind_ip=None, **proxy_server_kwargs): + server = server_thread = None + try: + bind_address = bind_ip or '127.0.0.1' + server_type = ThreadingTCPServer if '.' in bind_address else IPv6ThreadingTCPServer + server = server_type( + (bind_address, 0), functools.partial(proxy_server_class, request_handler=request_handler, **proxy_server_kwargs)) + server_port = http_server_port(server) + server_thread = threading.Thread(target=server.serve_forever) + server_thread.daemon = True + server_thread.start() + if '.' not in bind_address: + yield f'[{bind_address}]:{server_port}' + else: + yield f'{bind_address}:{server_port}' + finally: + server.shutdown() + server.server_close() + server_thread.join(2.0) + + +class HTTPProxyTestContext(abc.ABC): + REQUEST_HANDLER_CLASS = None + REQUEST_PROTO = None + def http_server(self, server_class, *args, **kwargs): + return proxy_server(server_class, self.REQUEST_HANDLER_CLASS, *args, **kwargs) + + @abc.abstractmethod + def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs) -> dict: + """return a dict of proxy_info""" + + +class HTTPProxyHTTPTestContext(HTTPProxyTestContext): + # Standard HTTP Proxy for http requests + REQUEST_HANDLER_CLASS = HTTPProxyHandler + REQUEST_PROTO = 'http' + + def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs): + request = Request(f'http://{target_domain or "127.0.0.1"}:{target_port or "40000"}/proxy_info', **req_kwargs) + handler.validate(request) + return json.loads(handler.send(request).read().decode()) + + +class HTTPProxyHTTPSTestContext(HTTPProxyTestContext): + # HTTP Connect proxy, for https requests + REQUEST_HANDLER_CLASS = HTTPSProxyHandler + REQUEST_PROTO = 'https' + + def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs): + request = Request(f'https://{target_domain or "127.0.0.1"}:{target_port or "40000"}/proxy_info', **req_kwargs) + handler.validate(request) + return json.loads(handler.send(request).read().decode()) + + +class HTTPProxyWebsocketsTestContext(HTTPProxyTestContext): + REQUEST_HANDLER_CLASS = WebsocketsProxyHandler + REQUEST_PROTO = 'ws' + + def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs): + request = Request(f'ws://{target_domain or "127.0.0.1"}:{target_port or "40000"}', **req_kwargs) + handler.validate(request) + ws = handler.send(request) + ws.send('proxy_info') + proxy_info = ws.recv() + ws.close() + return json.loads(proxy_info) + +# todo: wss + + +CTX_MAP = { + 'http': HTTPProxyHTTPTestContext, + 'https': HTTPProxyHTTPSTestContext, + 'ws': HTTPProxyWebsocketsTestContext, +} + + +@pytest.fixture(scope='module') +def ctx(request): + return CTX_MAP[request.param]() + + +@pytest.mark.parametrize( + 'handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) +@pytest.mark.parametrize('ctx', ['http'], indirect=True) # pure http proxy can only support http +class TestHTTPProxy: + def test_http_no_auth(self, handler, ctx): + with ctx.http_server(HTTPProxyHandler) as server_address: + with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert proxy_info['connect'] is False + assert 'Proxy-Authorization' not in proxy_info['headers'] + + def test_http_auth(self, handler, ctx): + with ctx.http_server(HTTPProxyHandler, username='test', password='test') as server_address: + with handler(proxies={ctx.REQUEST_PROTO: f'http://test:test@{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert 'Proxy-Authorization' in proxy_info['headers'] + + def test_http_bad_auth(self, handler, ctx): + with ctx.http_server(HTTPProxyHandler, username='test', password='test') as server_address: + with handler(proxies={ctx.REQUEST_PROTO: f'http://test:bad@{server_address}'}) as rh: + with pytest.raises(HTTPError) as exc_info: + ctx.proxy_info_request(rh) + assert exc_info.value.response.status == 407 + + def test_http_source_address(self, handler, ctx): + with ctx.http_server(HTTPProxyHandler) as server_address: + source_address = f'127.0.0.{random.randint(5, 255)}' + verify_address_availability(source_address) + with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}, + source_address=source_address) as rh: + response = ctx.proxy_info_request(rh) + assert response['client_address'][0] == source_address + + @pytest.mark.skip_handler('Urllib', 'urllib does not support https proxies') + def test_https(self, handler, ctx): + with ctx.http_server(HTTPSProxyHandler) as server_address: + with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert proxy_info['connect'] is False + assert 'Proxy-Authorization' not in proxy_info['headers'] + + @pytest.mark.skip_handler('Urllib', 'urllib does not support https proxies') + def test_https_verify_failed(self, handler, ctx): + with ctx.http_server(HTTPSProxyHandler) as server_address: + with handler(verify=True, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh: + # Accept SSLError as may not be feasible to tell if it is proxy or request error. + # note: if request proto also does ssl verification, this may also be the error of the request. + # Until we can support passing custom cacerts to handlers, we cannot properly test this for all cases. + with pytest.raises((ProxyError, SSLError)): + ctx.proxy_info_request(rh) + + def test_http_with_idn(self, handler, ctx): + with ctx.http_server(HTTPProxyHandler) as server_address: + with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh, target_domain='中文.tw') + assert proxy_info['path'].startswith('http://xn--fiq228c.tw') + assert proxy_info['headers']['Host'].split(':', 1)[0] == 'xn--fiq228c.tw' + + +@pytest.mark.parametrize( + 'handler,ctx', [ + ('Requests', 'https'), + ('CurlCFFI', 'https'), + ], indirect=True) +class TestHTTPConnectProxy: + def test_http_connect_no_auth(self, handler, ctx): + with ctx.http_server(HTTPConnectProxyHandler) as server_address: + with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert proxy_info['connect'] is True + assert 'Proxy-Authorization' not in proxy_info['headers'] + + def test_http_connect_auth(self, handler, ctx): + with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address: + with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://test:test@{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert 'Proxy-Authorization' in proxy_info['headers'] + + def test_http_connect_bad_auth(self, handler, ctx): + with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address: + with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://test:bad@{server_address}'}) as rh: + with pytest.raises(ProxyError): + ctx.proxy_info_request(rh) + + def test_http_connect_source_address(self, handler, ctx): + with ctx.http_server(HTTPConnectProxyHandler) as server_address: + source_address = f'127.0.0.{random.randint(5, 255)}' + verify_address_availability(source_address) + with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}, + source_address=source_address, + verify=False) as rh: + response = ctx.proxy_info_request(rh) + assert response['client_address'][0] == source_address + + @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test') + def test_https_connect_proxy(self, handler, ctx): + with ctx.http_server(HTTPSConnectProxyHandler) as server_address: + with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert proxy_info['connect'] is True + assert 'Proxy-Authorization' not in proxy_info['headers'] + + @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test') + def test_https_connect_verify_failed(self, handler, ctx): + with ctx.http_server(HTTPSConnectProxyHandler) as server_address: + with handler(verify=True, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh: + # Accept SSLError as may not be feasible to tell if it is proxy or request error. + # note: if request proto also does ssl verification, this may also be the error of the request. + # Until we can support passing custom cacerts to handlers, we cannot properly test this for all cases. + with pytest.raises((ProxyError, SSLError)): + ctx.proxy_info_request(rh) + + @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test') + def test_https_connect_proxy_auth(self, handler, ctx): + with ctx.http_server(HTTPSConnectProxyHandler, username='test', password='test') as server_address: + with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://test:test@{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert 'Proxy-Authorization' in proxy_info['headers'] diff --git a/test/test_networking.py b/test/test_networking.py index b50f70d08..00fc542e7 100644 --- a/test/test_networking.py +++ b/test/test_networking.py @@ -6,6 +6,8 @@ import sys import pytest +from yt_dlp.networking.common import Features + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import gzip @@ -642,81 +644,49 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert res.read().decode().endswith('\n\n') assert res.read() == b'' - -class TestHTTPProxy(TestRequestHandlerBase): - # Note: this only tests http urls over non-CONNECT proxy - @classmethod - def setup_class(cls): - super().setup_class() - # HTTP Proxy server - cls.proxy = http.server.ThreadingHTTPServer( - ('127.0.0.1', 0), _build_proxy_handler('normal')) - cls.proxy_port = http_server_port(cls.proxy) - cls.proxy_thread = threading.Thread(target=cls.proxy.serve_forever) - cls.proxy_thread.daemon = True - cls.proxy_thread.start() - - # Geo proxy server - cls.geo_proxy = http.server.ThreadingHTTPServer( - ('127.0.0.1', 0), _build_proxy_handler('geo')) - cls.geo_port = http_server_port(cls.geo_proxy) - cls.geo_proxy_thread = threading.Thread(target=cls.geo_proxy.serve_forever) - cls.geo_proxy_thread.daemon = True - cls.geo_proxy_thread.start() - - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) - def test_http_proxy(self, handler): - http_proxy = f'http://127.0.0.1:{self.proxy_port}' - geo_proxy = f'http://127.0.0.1:{self.geo_port}' - - # Test global http proxy - # Test per request http proxy - # Test per request http proxy disables proxy - url = 'http://foo.com/bar' - - # Global HTTP proxy - with handler(proxies={'http': http_proxy}) as rh: - res = validate_and_send(rh, Request(url)).read().decode() - assert res == f'normal: {url}' - - # Per request proxy overrides global - res = validate_and_send(rh, Request(url, proxies={'http': geo_proxy})).read().decode() - assert res == f'geo: {url}' - - # and setting to None disables all proxies for that request - real_url = f'http://127.0.0.1:{self.http_port}/headers' - res = validate_and_send( - rh, Request(real_url, proxies={'http': None})).read().decode() - assert res != f'normal: {real_url}' - assert 'Accept' in res + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) + def test_request_disable_proxy(self, handler): + for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['http']: + # Given the handler is configured with a proxy + with handler(proxies={'http': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh: + # When a proxy is explicitly set to None for the request + res = validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'http': None})) + # Then no proxy should be used + res.close() + assert res.status == 200 @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) + @pytest.mark.skip_handlers_if( + lambda _, handler: Features.NO_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support NO_PROXY') def test_noproxy(self, handler): - with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh: - # NO_PROXY - for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'): - nop_response = validate_and_send( - rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})).read().decode( - 'utf-8') - assert 'Accept' in nop_response + for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['http']: + # Given the handler is configured with a proxy + with handler(proxies={'http': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh: + for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'): + # When request no proxy includes the request url host + nop_response = validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})) + # Then the proxy should not be used + assert nop_response.status == 200 + nop_response.close() @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) + @pytest.mark.skip_handlers_if( + lambda _, handler: Features.ALL_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support ALL_PROXY') def test_allproxy(self, handler): - url = 'http://foo.com/bar' - with handler() as rh: - response = validate_and_send(rh, Request(url, proxies={'all': f'http://127.0.0.1:{self.proxy_port}'})).read().decode( - 'utf-8') - assert response == f'normal: {url}' + # This is a bit of a hacky test, but it should be enough to check whether the handler is using the proxy. + # 0.1s might not be enough of a timeout if proxy is not used in all cases, but should still get failures. + with handler(proxies={'all': f'http://10.255.255.255'}, timeout=0.1) as rh: + with pytest.raises(TransportError): + validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) - def test_http_proxy_with_idn(self, handler): - with handler(proxies={ - 'http': f'http://127.0.0.1:{self.proxy_port}', - }) as rh: - url = 'http://中文.tw/' - response = rh.send(Request(url)).read().decode() - # b'xn--fiq228c' is '中文'.encode('idna') - assert response == 'normal: http://xn--fiq228c.tw/' + with handler(timeout=0.1) as rh: + with pytest.raises(TransportError): + validate_and_send( + rh, Request( + f'http://127.0.0.1:{self.http_port}/headers', proxies={'all': f'http://10.255.255.255'})).close() class TestClientCertificate: diff --git a/test/test_websockets.py b/test/test_websockets.py index b294b0932..b8af05895 100644 --- a/test/test_websockets.py +++ b/test/test_websockets.py @@ -7,6 +7,7 @@ import sys import pytest from test.helper import verify_address_availability +from yt_dlp.networking.common import Features sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -303,6 +304,31 @@ class TestWebsSocketRequestHandlerConformance: ) as rh: ws_validate_and_send(rh, Request(self.mtls_wss_base_url)).close() + @pytest.mark.parametrize('handler', ['Websockets'], indirect=True) + def test_request_disable_proxy(self, handler): + for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['ws']: + # Given handler is configured with a proxy + with handler(proxies={'ws': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh: + # When a proxy is explicitly set to None for the request + ws = ws_validate_and_send(rh, Request(self.ws_base_url, proxies={'http': None})) + # Then no proxy should be used + assert ws.status == 101 + ws.close() + + @pytest.mark.parametrize('handler', ['Websockets'], indirect=True) + @pytest.mark.skip_handlers_if( + lambda _, handler: Features.NO_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support NO_PROXY') + def test_noproxy(self, handler): + for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['ws']: + # Given the handler is configured with a proxy + with handler(proxies={'ws': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh: + for no_proxy in (f'127.0.0.1:{self.ws_port}', '127.0.0.1', 'localhost'): + # When request no proxy includes the request url host + ws = ws_validate_and_send(rh, Request(self.ws_base_url, proxies={'no': no_proxy})) + # Then the proxy should not be used + assert ws.status == 101 + ws.close() + def create_fake_ws_connection(raised): import websockets.sync.client From e565e45a6fe947df190625d7d287eb8505ff3729 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Sun, 31 Mar 2024 14:36:10 +1300 Subject: [PATCH 2/8] [rh:curl_cffi] Fix HTTPS proxy support --- yt_dlp/networking/_curlcffi.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/yt_dlp/networking/_curlcffi.py b/yt_dlp/networking/_curlcffi.py index 39d1f70fb..d2afce089 100644 --- a/yt_dlp/networking/_curlcffi.py +++ b/yt_dlp/networking/_curlcffi.py @@ -21,7 +21,7 @@ from .exceptions import ( TransportError, ) from .impersonate import ImpersonateRequestHandler, ImpersonateTarget -from ..dependencies import curl_cffi +from ..dependencies import curl_cffi, certifi from ..utils import int_or_none if curl_cffi is None: @@ -156,6 +156,13 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin): # See: https://curl.se/libcurl/c/CURLOPT_HTTPPROXYTUNNEL.html session.curl.setopt(CurlOpt.HTTPPROXYTUNNEL, 1) + # curl_cffi does not currently set these for proxies + session.curl.setopt(CurlOpt.PROXY_CAINFO, certifi.where()) + + if not self.verify: + session.curl.setopt(CurlOpt.PROXY_SSL_VERIFYPEER, 0) + session.curl.setopt(CurlOpt.PROXY_SSL_VERIFYHOST, 0) + headers = self._get_impersonate_headers(request) if self._client_cert: @@ -203,7 +210,10 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin): max_redirects_exceeded = True curl_response = e.response - elif e.code == CurlECode.PROXY: + elif ( + e.code == CurlECode.PROXY + or (e.code == CurlECode.RECV_ERROR and 'Received HTTP code 407 from proxy after CONNECT' in str(e)) + ): raise ProxyError(cause=e) from e else: raise TransportError(cause=e) from e From 14505063ec4376cf9c010e86ad287f5aae34d5d8 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Sun, 31 Mar 2024 14:36:10 +1300 Subject: [PATCH 3/8] cleanup --- test/conftest.py | 8 +- test/helper.py | 5 ++ test/test_http_proxy.py | 35 +------- test/test_networking.py | 175 +++++++++++++++++++--------------------- test/test_websockets.py | 33 ++++---- 5 files changed, 109 insertions(+), 147 deletions(-) diff --git a/test/conftest.py b/test/conftest.py index 859fc6ad3..06d2bcddb 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -30,6 +30,7 @@ def handler(request): @pytest.fixture(autouse=True) def skip_handler(request, handler): + # usage: pytest.mark.skip_handler('my_handler', 'reason') for marker in request.node.iter_markers('skip_handler'): if marker.args[0] == handler.RH_KEY: pytest.skip(marker.args[1] if len(marker.args) > 1 else '') @@ -37,20 +38,19 @@ def skip_handler(request, handler): @pytest.fixture(autouse=True) def skip_handler_if(request, handler): + # usage: pytest.mark.skip_handler_if('my_handler', lambda request: True, 'reason') for marker in request.node.iter_markers('skip_handler_if'): if marker.args[0] == handler.RH_KEY and marker.args[1](request): pytest.skip(marker.args[2] if len(marker.args) > 2 else '') + @pytest.fixture(autouse=True) def skip_handlers_if(request, handler): + # usage: pytest.mark.skip_handlers_if(lambda request, handler: True, 'reason') for marker in request.node.iter_markers('skip_handlers_if'): if handler and marker.args[0](request, handler): pytest.skip(marker.args[1] if len(marker.args) > 1 else '') -def validate_and_send(rh, req): - rh.validate(req) - return rh.send(req) - def pytest_configure(config): config.addinivalue_line( diff --git a/test/helper.py b/test/helper.py index 7760fd8d7..e7473120d 100644 --- a/test/helper.py +++ b/test/helper.py @@ -338,3 +338,8 @@ def http_server_port(httpd): def verify_address_availability(address): if find_available_port(address) is None: pytest.skip(f'Unable to bind to source address {address} (address may not exist)') + + +def validate_and_send(rh, req): + rh.validate(req) + return rh.send(req) diff --git a/test/test_http_proxy.py b/test/test_http_proxy.py index 69268b9c7..97ceef9f9 100644 --- a/test/test_http_proxy.py +++ b/test/test_http_proxy.py @@ -17,7 +17,7 @@ from test.test_networking import TEST_DIR from test.test_socks import IPv6ThreadingTCPServer from yt_dlp.dependencies import urllib3 from yt_dlp.networking import Request -from yt_dlp.networking.exceptions import ProxyError, HTTPError, SSLError +from yt_dlp.networking.exceptions import HTTPError, ProxyError, SSLError class HTTPProxyAuthMixin: @@ -124,21 +124,6 @@ class HTTPSProxyHandler(HTTPProxyHandler): super().__init__(request, *args, **kwargs) -class WebsocketsProxyHandler(BaseRequestHandler): - def __init__(self, *args, proxy_info=None, **kwargs): - self.proxy_info = proxy_info - super().__init__(*args, **kwargs) - - def handle(self): - import websockets.sync.server - protocol = websockets.ServerProtocol() - connection = websockets.sync.server.ServerConnection(socket=self.request, protocol=protocol, - close_timeout=0) - connection.handshake() - connection.send(json.dumps(self.proxy_info)) - connection.close() - - class HTTPConnectProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin): protocol_version = 'HTTP/1.1' default_request_version = 'HTTP/1.1' @@ -199,6 +184,7 @@ def proxy_server(proxy_server_class, request_handler, bind_ip=None, **proxy_serv class HTTPProxyTestContext(abc.ABC): REQUEST_HANDLER_CLASS = None REQUEST_PROTO = None + def http_server(self, server_class, *args, **kwargs): return proxy_server(server_class, self.REQUEST_HANDLER_CLASS, *args, **kwargs) @@ -229,26 +215,9 @@ class HTTPProxyHTTPSTestContext(HTTPProxyTestContext): return json.loads(handler.send(request).read().decode()) -class HTTPProxyWebsocketsTestContext(HTTPProxyTestContext): - REQUEST_HANDLER_CLASS = WebsocketsProxyHandler - REQUEST_PROTO = 'ws' - - def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs): - request = Request(f'ws://{target_domain or "127.0.0.1"}:{target_port or "40000"}', **req_kwargs) - handler.validate(request) - ws = handler.send(request) - ws.send('proxy_info') - proxy_info = ws.recv() - ws.close() - return json.loads(proxy_info) - -# todo: wss - - CTX_MAP = { 'http': HTTPProxyHTTPTestContext, 'https': HTTPProxyHTTPSTestContext, - 'ws': HTTPProxyWebsocketsTestContext, } diff --git a/test/test_networking.py b/test/test_networking.py index 00fc542e7..3ad2515fe 100644 --- a/test/test_networking.py +++ b/test/test_networking.py @@ -29,8 +29,12 @@ import zlib from email.message import Message from http.cookiejar import CookieJar -from test.conftest import validate_and_send -from test.helper import FakeYDL, http_server_port, verify_address_availability +from test.helper import ( + FakeYDL, + http_server_port, + validate_and_send, + verify_address_availability, +) from yt_dlp.cookies import YoutubeDLCookieJar from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3 from yt_dlp.networking import ( @@ -64,21 +68,6 @@ from yt_dlp.utils.networking import HTTPHeaderDict, std_headers TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -def _build_proxy_handler(name): - class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): - proxy_name = name - - def log_message(self, format, *args): - pass - - def do_GET(self): - self.send_response(200) - self.send_header('Content-Type', 'text/plain; charset=utf-8') - self.end_headers() - self.wfile.write(f'{self.proxy_name}: {self.path}'.encode()) - return HTTPTestRequestHandler - - class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): protocol_version = 'HTTP/1.1' default_request_version = 'HTTP/1.1' @@ -319,8 +308,9 @@ class TestRequestHandlerBase: cls.https_server_thread.start() +@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) class TestHTTPRequestHandler(TestRequestHandlerBase): - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) + def test_verify_cert(self, handler): with handler() as rh: with pytest.raises(CertificateVerifyError): @@ -331,7 +321,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert r.status == 200 r.close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_ssl_error(self, handler): # HTTPS server with too old TLS version # XXX: is there a better way to test this than to create a new server? @@ -349,7 +338,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers')) assert not issubclass(exc_info.type, CertificateVerifyError) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_percent_encode(self, handler): with handler() as rh: # Unicode characters should be encoded with uppercase percent-encoding @@ -361,7 +349,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert res.status == 200 res.close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) @pytest.mark.parametrize('path', [ '/a/b/./../../headers', '/redirect_dotsegments', @@ -377,15 +364,13 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert res.url == f'http://127.0.0.1:{self.http_port}/headers' res.close() - # Not supported by CurlCFFI (non-standard) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi (non-standard)') def test_unicode_path_redirection(self, handler): with handler() as rh: r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect')) assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html' r.close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_raise_http_error(self, handler): with handler() as rh: for bad_status in (400, 500, 599, 302): @@ -395,7 +380,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): # Should not raise an error validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_response_url(self, handler): with handler() as rh: # Response url should be that of the last url in redirect chain @@ -407,7 +391,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): res2.close() # Covers some basic cases we expect some level of consistency between request handlers for - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) @pytest.mark.parametrize('redirect_status,method,expected', [ # A 303 must either use GET or HEAD for subsequent request (303, 'POST', ('', 'GET', False)), @@ -449,7 +432,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert expected[1] == res.headers.get('method') assert expected[2] == ('content-length' in headers.decode().lower()) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_request_cookie_header(self, handler): # We should accept a Cookie header being passed as in normal headers and handle it appropriately. with handler() as rh: @@ -482,19 +464,16 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert b'cookie: test=ytdlp' not in data.lower() assert b'cookie: test=test3' in data.lower() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_redirect_loop(self, handler): with handler() as rh: with pytest.raises(HTTPError, match='redirect loop'): validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop')) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_incompleteread(self, handler): with handler(timeout=2) as rh: with pytest.raises(IncompleteRead, match='13 bytes read, 234221 more expected'): validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_cookies(self, handler): cookiejar = YoutubeDLCookieJar() cookiejar.set_cookie(http.cookiejar.Cookie( @@ -511,7 +490,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read() assert b'cookie: test=ytdlp' in data.lower() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_headers(self, handler): with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh: @@ -527,7 +505,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert b'test2: test2' not in data assert b'test3: test3' in data - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_read_timeout(self, handler): with handler() as rh: # Default timeout is 20 seconds, so this should go through @@ -543,7 +520,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): validate_and_send( rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4})) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_connect_timeout(self, handler): # nothing should be listening on this port connect_timeout_url = 'http://10.255.255.255' @@ -562,7 +538,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): rh, Request(connect_timeout_url, extensions={'timeout': 0.01})) assert 0.01 <= time.time() - now < 20 - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_source_address(self, handler): source_address = f'127.0.0.{random.randint(5, 255)}' # on some systems these loopback addresses we need for testing may not be available @@ -574,13 +549,13 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert source_address == data # Not supported by CurlCFFI - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi') def test_gzip_trailing_garbage(self, handler): with handler() as rh: data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode() assert data == '