Skip to content

Commit 8bd53c4

Browse files
Made it possible to use --proxy-type when using check mode.
1 parent ea09958 commit 8bd53c4

6 files changed

Lines changed: 47 additions & 27 deletions

File tree

ProxyEater/Proxy.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -245,14 +245,25 @@ def check_all(self, timeout: int = 10, threads_no: int = 21, url: str = 'http://
245245
else:
246246
on_progress_callback = lambda proxy_list, progress: None
247247

248+
length = len(self)
249+
finished: int = 0 # The number of proxies that have been checked.
250+
248251
def check_proxy(proxy_: Proxy):
252+
"""
253+
This function is used for checking the status of a proxy.
254+
255+
:param proxy_: The proxy to check.
256+
:return:
257+
"""
258+
nonlocal finished
249259
proxy_.check_status(timeout, url)
250260
if (not proxy_.is_alive) and remove_dead:
251261
self.remove(proxy_)
262+
finished += 1
263+
on_progress_callback(self, finished / length * 99.99)
252264

253265
threads = []
254-
length = len(self)
255-
for i, proxy in enumerate(self.copy()):
266+
for proxy in self.copy():
256267
thread = threading.Thread(target=check_proxy, args=(proxy,))
257268
threads.append(thread)
258269
thread.start()
@@ -262,8 +273,8 @@ def check_proxy(proxy_: Proxy):
262273
threads.remove(thread)
263274
break
264275
time.sleep(0.1)
265-
on_progress_callback(self, i / length * 100)
266276

277+
# Wait for all threads to finish
267278
for thread in threads:
268279
thread.join()
269280

ProxyEater/Scraper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33

44
from typing import Callable as _Callable
55

6-
import requests # This module is used to send requests to the server.
76
import pandas # This module is used to parse the html table.
7+
import requests # This module is used to send requests to the server.
88

99
from random_user_agent.user_agent import UserAgent # This module is used to generate random user agents.
1010

ProxyEater/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# ProxyEater
22
# CodeWriter21
33

4-
__version__ = "1.5.0"
4+
__version__ = "1.5.1"
55
__author__ = "CodeWriter21"
66
__email__ = "CodeWriter21@gmail.com"
77
__license__ = "Apache-2.0"

ProxyEater/__main__.py

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -40,19 +40,6 @@ def scrape(args):
4040
else:
4141
proxy = None
4242

43-
proxy_types = []
44-
# Parse the proxy type
45-
if args.proxy_type:
46-
proxy_types = [x.strip() for x in args.proxy_type.split(',')]
47-
if not proxy_types:
48-
proxy_types = ['http', 'https', 'socks4', 'socks5']
49-
try:
50-
proxy_types = [ProxyType.from_name(x) for x in proxy_types]
51-
except ValueError as e:
52-
logger.error(e)
53-
return
54-
logger.info(f'Using proxy types: {[proxy_type.name for proxy_type in proxy_types]}')
55-
5643
useragent = args.useragent
5744

5845
proxies = ProxyList()
@@ -86,7 +73,7 @@ def checking_callback(proxy_list: ProxyList, progress: float):
8673
collected_proxies_count = proxies_.count
8774
# Filter the proxies
8875
logger.info('Filtering the proxies...')
89-
proxies_ = proxies_.filter(type_=proxy_types)
76+
proxies_ = proxies_.filter(type_=args.proxy_types)
9077
if args.verbose:
9178
logger.info(f'{scraper.name}: Removed {collected_proxies_count - proxies_.count} proxies of wrong type.')
9279
collected_proxies_count = proxies_.count
@@ -154,6 +141,14 @@ def check(args):
154141
logger.error(f'The source format {args.source_format} is not valid.')
155142
return
156143

144+
if len(args.proxy_types) < 4:
145+
loaded_proxies_count = proxies.count
146+
# Filter the proxies
147+
logger.info('Filtering the proxies...')
148+
proxies = proxies.filter(type_=args.proxy_types)
149+
if args.verbose:
150+
logger.info(f'Removed {loaded_proxies_count - proxies.count} proxies of wrong type.')
151+
157152
logger.progress_bar = log21.ProgressBar(format_='Proxies: {count} {prefix}{bar}{suffix} {percentage}%', style='{',
158153
additional_variables={'count': 0})
159154

@@ -209,6 +204,7 @@ def main():
209204
parser.add_argument('--format', '-f', help='The format for saving the proxies in text file(default:'
210205
'"{scheme}://{ip}:{port}").',
211206
default='{scheme}://{ip}:{port}')
207+
parser.add_argument('--proxy-type', '-type', help=f'The type of the proxies(default:all).', default='')
212208
parser.add_argument('--include-status', '-is', help=f'Include the status of the proxies in the output file.',
213209
action='store_true')
214210
parser.add_argument('--threads', '-t', help=f'The number of threads to use for scraping(default:25).', type=int,
@@ -222,7 +218,6 @@ def main():
222218
version='%(prog)s ' + ProxyEater.__version__)
223219
scrap_arguments = parser.add_argument_group('Scrape', 'Scrape mode arguments')
224220
scrap_arguments.add_argument('--proxy', '-p', help=f'The proxy to use for scraping.')
225-
scrap_arguments.add_argument('--proxy-type', '-type', help=f'The type of the proxies(default:all).', default='')
226221
scrap_arguments.add_argument('--useragent', '-ua', help=f'The useragent of the requests(default:random).')
227222
scrap_arguments.add_argument('--include-geolocation', '-ig',
228223
help=f'Include the geolocation info of the proxies in the output file.',
@@ -273,6 +268,20 @@ def main():
273268
args.output = pathlib.Path('.') / f'proxies-{i}.{ext}'
274269
i += 1
275270

271+
proxy_types = []
272+
# Parse the proxy type
273+
if args.proxy_type:
274+
proxy_types = [x.strip() for x in args.proxy_type.split(',')]
275+
if not proxy_types:
276+
proxy_types = ['http', 'https', 'socks4', 'socks5']
277+
try:
278+
proxy_types = [ProxyType.from_name(x) for x in proxy_types]
279+
except ValueError as e:
280+
logger.error(e)
281+
return
282+
logger.info(f'Using proxy types: {[proxy_type.name for proxy_type in proxy_types]}')
283+
args.proxy_types = proxy_types
284+
276285
args.mode = args.mode.lower()
277286
if args.mode == 'scrape':
278287
scrape(args)

README.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ProxyEater\[1.5.0\]
1+
ProxyEater\[1.5.1\]
22
===================
33

44
![version](https://img.shields.io/pypi/v/ProxyEater)
@@ -35,9 +35,9 @@ Usage
3535

3636
```
3737
usage: ProxyEater [-h] [--source SOURCE] [--output OUTPUT] [--file-format { text, json, csv }]
38-
[--format FORMAT] [--include-status] [--threads THREADS] [--timeout TIMEOUT]
39-
[--url URL] [--verbose] [--quiet] [--version] [--proxy PROXY] [--proxy-type
40-
PROXY_TYPE] [--useragent USERAGENT] [--include-geolocation] [--no-check]
38+
[--format FORMAT] [--proxy-type PROXY_TYPE] [--include-status] [--threads
39+
THREADS] [--timeout TIMEOUT] [--url URL] [--verbose] [--quiet] [--version]
40+
[--proxy PROXY] [--useragent USERAGENT] [--include-geolocation] [--no-check]
4141
[--source-format { text, json, csv }] [--default-type { http, https, socks4,
4242
socks5 }]
4343
mode
@@ -58,6 +58,8 @@ options:
5858
--format FORMAT, -f FORMAT
5959
The format for saving the proxies in text
6060
file(default:"{scheme}://{ip}:{port}").
61+
--proxy-type PROXY_TYPE, -type PROXY_TYPE
62+
The type of the proxies(default:all).
6163
--include-status, -is
6264
Include the status of the proxies in the output file.
6365
--threads THREADS, -t THREADS
@@ -78,8 +80,6 @@ Scrape:
7880
7981
--proxy PROXY, -p PROXY
8082
The proxy to use for scraping.
81-
--proxy-type PROXY_TYPE, -type PROXY_TYPE
82-
The type of the proxies(default:all).
8383
--useragent USERAGENT, -ua USERAGENT
8484
The useragent of the requests(default:random).
8585
--include-geolocation, -ig

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
setup(
99
name='ProxyEater',
10-
version='1.5.0',
10+
version='1.5.1',
1111
author='CodeWriter21',
1212
author_email='CodeWriter21@gmail.com',
1313
description='A Python Proxy Scraper for gathering fresh proxies.',

0 commit comments

Comments
 (0)