@@ -40,19 +40,6 @@ def scrape(args):
4040 else :
4141 proxy = None
4242
43- proxy_types = []
44- # Parse the proxy type
45- if args .proxy_type :
46- proxy_types = [x .strip () for x in args .proxy_type .split (',' )]
47- if not proxy_types :
48- proxy_types = ['http' , 'https' , 'socks4' , 'socks5' ]
49- try :
50- proxy_types = [ProxyType .from_name (x ) for x in proxy_types ]
51- except ValueError as e :
52- logger .error (e )
53- return
54- logger .info (f'Using proxy types: { [proxy_type .name for proxy_type in proxy_types ]} ' )
55-
5643 useragent = args .useragent
5744
5845 proxies = ProxyList ()
@@ -86,7 +73,7 @@ def checking_callback(proxy_list: ProxyList, progress: float):
8673 collected_proxies_count = proxies_ .count
8774 # Filter the proxies
8875 logger .info ('Filtering the proxies...' )
89- proxies_ = proxies_ .filter (type_ = proxy_types )
76+ proxies_ = proxies_ .filter (type_ = args . proxy_types )
9077 if args .verbose :
9178 logger .info (f'{ scraper .name } : Removed { collected_proxies_count - proxies_ .count } proxies of wrong type.' )
9279 collected_proxies_count = proxies_ .count
@@ -154,6 +141,14 @@ def check(args):
154141 logger .error (f'The source format { args .source_format } is not valid.' )
155142 return
156143
144+ if len (args .proxy_types ) < 4 :
145+ loaded_proxies_count = proxies .count
146+ # Filter the proxies
147+ logger .info ('Filtering the proxies...' )
148+ proxies = proxies .filter (type_ = args .proxy_types )
149+ if args .verbose :
150+ logger .info (f'Removed { loaded_proxies_count - proxies .count } proxies of wrong type.' )
151+
157152 logger .progress_bar = log21 .ProgressBar (format_ = 'Proxies: {count} {prefix}{bar}{suffix} {percentage}%' , style = '{' ,
158153 additional_variables = {'count' : 0 })
159154
@@ -209,6 +204,7 @@ def main():
209204 parser .add_argument ('--format' , '-f' , help = 'The format for saving the proxies in text file(default:'
210205 '"{scheme}://{ip}:{port}").' ,
211206 default = '{scheme}://{ip}:{port}' )
207+ parser .add_argument ('--proxy-type' , '-type' , help = f'The type of the proxies(default:all).' , default = '' )
212208 parser .add_argument ('--include-status' , '-is' , help = f'Include the status of the proxies in the output file.' ,
213209 action = 'store_true' )
214210 parser .add_argument ('--threads' , '-t' , help = f'The number of threads to use for scraping(default:25).' , type = int ,
@@ -222,7 +218,6 @@ def main():
222218 version = '%(prog)s ' + ProxyEater .__version__ )
223219 scrap_arguments = parser .add_argument_group ('Scrape' , 'Scrape mode arguments' )
224220 scrap_arguments .add_argument ('--proxy' , '-p' , help = f'The proxy to use for scraping.' )
225- scrap_arguments .add_argument ('--proxy-type' , '-type' , help = f'The type of the proxies(default:all).' , default = '' )
226221 scrap_arguments .add_argument ('--useragent' , '-ua' , help = f'The useragent of the requests(default:random).' )
227222 scrap_arguments .add_argument ('--include-geolocation' , '-ig' ,
228223 help = f'Include the geolocation info of the proxies in the output file.' ,
@@ -273,6 +268,20 @@ def main():
273268 args .output = pathlib .Path ('.' ) / f'proxies-{ i } .{ ext } '
274269 i += 1
275270
271+ proxy_types = []
272+ # Parse the proxy type
273+ if args .proxy_type :
274+ proxy_types = [x .strip () for x in args .proxy_type .split (',' )]
275+ if not proxy_types :
276+ proxy_types = ['http' , 'https' , 'socks4' , 'socks5' ]
277+ try :
278+ proxy_types = [ProxyType .from_name (x ) for x in proxy_types ]
279+ except ValueError as e :
280+ logger .error (e )
281+ return
282+ logger .info (f'Using proxy types: { [proxy_type .name for proxy_type in proxy_types ]} ' )
283+ args .proxy_types = proxy_types
284+
276285 args .mode = args .mode .lower ()
277286 if args .mode == 'scrape' :
278287 scrape (args )
0 commit comments