diff --git a/README.md b/README.md index 6592a694..9504f717 100644 --- a/README.md +++ b/README.md @@ -315,8 +315,30 @@ ytt_api.fetch(video_id) Using the `WebshareProxyConfig` will default to using rotating residential proxies and requires no further configuration. +You can also limit the pool of IPs that you will be rotating through to those located in specific countries. By +choosing locations that are close to the machine that is running your code, you can reduce latency. Also, this +can be used to work around location-based restrictions. + +```python +ytt_api = YouTubeTranscriptApi( + proxy_config=WebshareProxyConfig( + proxy_username="", + proxy_password="", + filter_ip_locations=["de", "us"], + ) +) + +# Webshare will now only rotate through IPs located in Germany or the United States! +ytt_api.fetch(video_id) +``` + +You can find the +full list of available locations (and how many IPs are available in each location) +[here](https://www.webshare.io/features/proxy-locations?referral_code=w0xno53eb50g). + Note that [referral links are used here](https://www.webshare.io/?referral_code=w0xno53eb50g) and any purchases -made through these links will support this Open Source project, which is very much appreciated! 💖😊🙏💖 +made through these links will support this Open Source project (at no additional cost of course!), which is very much +appreciated! 💖😊🙏💖 However, you are of course free to integrate your own proxy solution using the `GenericProxyConfig` class, if you prefer using another provider or want to implement your own solution, as covered by the following section. diff --git a/youtube_transcript_api/proxies.py b/youtube_transcript_api/proxies.py index b50a1825..0b868d84 100644 --- a/youtube_transcript_api/proxies.py +++ b/youtube_transcript_api/proxies.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import TypedDict, Optional +from typing import TypedDict, Optional, List class InvalidProxyConfig(Exception): @@ -117,6 +117,7 @@ def __init__( self, proxy_username: str, proxy_password: str, + filter_ip_locations: Optional[List[str]] = None, retries_when_blocked: int = 10, domain_name: str = DEFAULT_DOMAIN_NAME, proxy_port: int = DEFAULT_PORT, @@ -133,6 +134,14 @@ def __init__( https://dashboard.webshare.io/proxy/settings :param proxy_password: "Proxy Password" found at https://dashboard.webshare.io/proxy/settings + :param filter_ip_locations: If you want to limit the pool of IPs that you will + be rotating through to those located in specific countries, you can provide + a list of location codes here. By choosing locations that are close to the + machine that is running this code, you can reduce latency. Also, this can + be used to work around location-based restrictions. + You can find the full list of available locations (and how many IPs are + available in each location) at + https://www.webshare.io/features/proxy-locations?referral_code=w0xno53eb50g :param retries_when_blocked: Define how many times we should retry if a request is blocked. When using rotating residential proxies with a large IP pool it makes sense to retry a couple of times when a blocked IP is encountered, @@ -143,12 +152,16 @@ def __init__( self.proxy_password = proxy_password self.domain_name = domain_name self.proxy_port = proxy_port + self._filter_ip_locations = filter_ip_locations or [] self._retries_when_blocked = retries_when_blocked @property def url(self) -> str: + location_codes = "".join( + f"-{location_code.upper()}" for location_code in self._filter_ip_locations + ) return ( - f"http://{self.proxy_username}-rotate:{self.proxy_password}" + f"http://{self.proxy_username}{location_codes}-rotate:{self.proxy_password}" f"@{self.domain_name}:{self.proxy_port}/" ) diff --git a/youtube_transcript_api/test/test_proxies.py b/youtube_transcript_api/test/test_proxies.py index d0ffe154..a50a5387 100644 --- a/youtube_transcript_api/test/test_proxies.py +++ b/youtube_transcript_api/test/test_proxies.py @@ -63,3 +63,31 @@ def test_to_requests_dict(self): "http": "http://user-rotate:password@p.webshare.io:80/", "https": "http://user-rotate:password@p.webshare.io:80/", } + + def test_to_requests_dict__with_location_filter(self): + proxy_config = WebshareProxyConfig( + proxy_username="user", + proxy_password="password", + filter_ip_locations=["us"], + ) + + request_dict = proxy_config.to_requests_dict() + + assert request_dict == { + "http": "http://user-US-rotate:password@p.webshare.io:80/", + "https": "http://user-US-rotate:password@p.webshare.io:80/", + } + + def test_to_requests_dict__with_multiple_location_filters(self): + proxy_config = WebshareProxyConfig( + proxy_username="user", + proxy_password="password", + filter_ip_locations=["de", "us"], + ) + + request_dict = proxy_config.to_requests_dict() + + assert request_dict == { + "http": "http://user-DE-US-rotate:password@p.webshare.io:80/", + "https": "http://user-DE-US-rotate:password@p.webshare.io:80/", + }