Made it possible to use --proxy-type when using check mode.

MPCodeWriter21 · Jul 31, 2022 · 8bd53c4 · 8bd53c4
1 parent ea09958
commit 8bd53c4
Show file tree

Hide file tree

Showing 6 changed files with 47 additions and 27 deletions.
diff --git a/ProxyEater/Proxy.py b/ProxyEater/Proxy.py
@@ -245,14 +245,25 @@ def check_all(self, timeout: int = 10, threads_no: int = 21, url: str = 'http://
         else:
             on_progress_callback = lambda proxy_list, progress: None
 
+        length = len(self)
+        finished: int = 0  # The number of proxies that have been checked.
+
         def check_proxy(proxy_: Proxy):
+            """
+            This function is used for checking the status of a proxy.
+
+            :param proxy_: The proxy to check.
+            :return:
+            """
+            nonlocal finished
             proxy_.check_status(timeout, url)
             if (not proxy_.is_alive) and remove_dead:
                 self.remove(proxy_)
+            finished += 1
+            on_progress_callback(self, finished / length * 99.99)
 
         threads = []
-        length = len(self)
-        for i, proxy in enumerate(self.copy()):
+        for proxy in self.copy():
             thread = threading.Thread(target=check_proxy, args=(proxy,))
             threads.append(thread)
             thread.start()
@@ -262,8 +273,8 @@ def check_proxy(proxy_: Proxy):
                         threads.remove(thread)
                         break
                 time.sleep(0.1)
-            on_progress_callback(self, i / length * 100)
 
+        # Wait for all threads to finish
         for thread in threads:
             thread.join()
 

diff --git a/ProxyEater/Scraper.py b/ProxyEater/Scraper.py
@@ -3,8 +3,8 @@
 
 from typing import Callable as _Callable
 
-import requests  # This module is used to send requests to the server.
 import pandas  # This module is used to parse the html table.
+import requests  # This module is used to send requests to the server.
 
 from random_user_agent.user_agent import UserAgent  # This module is used to generate random user agents.
 

diff --git a/ProxyEater/__init__.py b/ProxyEater/__init__.py
@@ -1,7 +1,7 @@
 # ProxyEater
 # CodeWriter21
 
-__version__ = "1.5.0"
+__version__ = "1.5.1"
 __author__ = "CodeWriter21"
 __email__ = "[email protected]"
 __license__ = "Apache-2.0"

diff --git a/ProxyEater/__main__.py b/ProxyEater/__main__.py
@@ -40,19 +40,6 @@ def scrape(args):
     else:
         proxy = None
 
-    proxy_types = []
-    # Parse the proxy type
-    if args.proxy_type:
-        proxy_types = [x.strip() for x in args.proxy_type.split(',')]
-    if not proxy_types:
-        proxy_types = ['http', 'https', 'socks4', 'socks5']
-    try:
-        proxy_types = [ProxyType.from_name(x) for x in proxy_types]
-    except ValueError as e:
-        logger.error(e)
-        return
-    logger.info(f'Using proxy types: {[proxy_type.name for proxy_type in proxy_types]}')
-
     useragent = args.useragent
 
     proxies = ProxyList()
@@ -86,7 +73,7 @@ def checking_callback(proxy_list: ProxyList, progress: float):
         collected_proxies_count = proxies_.count
         # Filter the proxies
         logger.info('Filtering the proxies...')
-        proxies_ = proxies_.filter(type_=proxy_types)
+        proxies_ = proxies_.filter(type_=args.proxy_types)
         if args.verbose:
             logger.info(f'{scraper.name}: Removed {collected_proxies_count - proxies_.count} proxies of wrong type.')
         collected_proxies_count = proxies_.count
@@ -154,6 +141,14 @@ def check(args):
         logger.error(f'The source format {args.source_format} is not valid.')
         return
 
+    if len(args.proxy_types) < 4:
+        loaded_proxies_count = proxies.count
+        # Filter the proxies
+        logger.info('Filtering the proxies...')
+        proxies = proxies.filter(type_=args.proxy_types)
+        if args.verbose:
+            logger.info(f'Removed {loaded_proxies_count - proxies.count} proxies of wrong type.')
+
     logger.progress_bar = log21.ProgressBar(format_='Proxies: {count} {prefix}{bar}{suffix} {percentage}%', style='{',
                                             additional_variables={'count': 0})
 
@@ -209,6 +204,7 @@ def main():
         parser.add_argument('--format', '-f', help='The format for saving the proxies in text file(default:'
                                                    '"{scheme}://{ip}:{port}").',
                             default='{scheme}://{ip}:{port}')
+        parser.add_argument('--proxy-type', '-type', help=f'The type of the proxies(default:all).', default='')
         parser.add_argument('--include-status', '-is', help=f'Include the status of the proxies in the output file.',
                             action='store_true')
         parser.add_argument('--threads', '-t', help=f'The number of threads to use for scraping(default:25).', type=int,
@@ -222,7 +218,6 @@ def main():
                             version='%(prog)s ' + ProxyEater.__version__)
         scrap_arguments = parser.add_argument_group('Scrape', 'Scrape mode arguments')
         scrap_arguments.add_argument('--proxy', '-p', help=f'The proxy to use for scraping.')
-        scrap_arguments.add_argument('--proxy-type', '-type', help=f'The type of the proxies(default:all).', default='')
         scrap_arguments.add_argument('--useragent', '-ua', help=f'The useragent of the requests(default:random).')
         scrap_arguments.add_argument('--include-geolocation', '-ig',
                                      help=f'Include the geolocation info of the proxies in the output file.',
@@ -273,6 +268,20 @@ def main():
                 args.output = pathlib.Path('.') / f'proxies-{i}.{ext}'
                 i += 1
 
+        proxy_types = []
+        # Parse the proxy type
+        if args.proxy_type:
+            proxy_types = [x.strip() for x in args.proxy_type.split(',')]
+        if not proxy_types:
+            proxy_types = ['http', 'https', 'socks4', 'socks5']
+        try:
+            proxy_types = [ProxyType.from_name(x) for x in proxy_types]
+        except ValueError as e:
+            logger.error(e)
+            return
+        logger.info(f'Using proxy types: {[proxy_type.name for proxy_type in proxy_types]}')
+        args.proxy_types = proxy_types
+
         args.mode = args.mode.lower()
         if args.mode == 'scrape':
             scrape(args)

diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-ProxyEater\[1.5.0\]
+ProxyEater\[1.5.1\]
 ===================
 
 ![version](https://img.shields.io/pypi/v/ProxyEater)
@@ -35,9 +35,9 @@ Usage
 
 ```
 usage: ProxyEater [-h] [--source SOURCE] [--output OUTPUT] [--file-format { text, json, csv }]
-                  [--format FORMAT] [--include-status] [--threads THREADS] [--timeout TIMEOUT]
-                  [--url URL] [--verbose] [--quiet] [--version] [--proxy PROXY] [--proxy-type
-                  PROXY_TYPE] [--useragent USERAGENT] [--include-geolocation] [--no-check]
+                  [--format FORMAT] [--proxy-type PROXY_TYPE] [--include-status] [--threads
+                  THREADS] [--timeout TIMEOUT] [--url URL] [--verbose] [--quiet] [--version]
+                  [--proxy PROXY] [--useragent USERAGENT] [--include-geolocation] [--no-check]
                   [--source-format { text, json, csv }] [--default-type { http, https, socks4,
                   socks5 }]
                   mode
@@ -58,6 +58,8 @@ options:
   --format FORMAT, -f FORMAT
                         The format for saving the proxies in text
                         file(default:"{scheme}://{ip}:{port}").
+  --proxy-type PROXY_TYPE, -type PROXY_TYPE
+                        The type of the proxies(default:all).
   --include-status, -is
                         Include the status of the proxies in the output file.
   --threads THREADS, -t THREADS
@@ -78,8 +80,6 @@ Scrape:
 
   --proxy PROXY, -p PROXY
                         The proxy to use for scraping.
-  --proxy-type PROXY_TYPE, -type PROXY_TYPE
-                        The type of the proxies(default:all).
   --useragent USERAGENT, -ua USERAGENT
                         The useragent of the requests(default:random).
   --include-geolocation, -ig

diff --git a/setup.py b/setup.py
@@ -7,7 +7,7 @@
 
 setup(
     name='ProxyEater',
-    version='1.5.0',
+    version='1.5.1',
     author='CodeWriter21',
     author_email='[email protected]',
     description='A Python Proxy Scraper for gathering fresh proxies.',