Skip to content

Commit

Permalink
feature to turn on safe search filter as in #99
Browse files Browse the repository at this point in the history
resolved the bug where time_range filter was ignored as in #93
made some improvements on the IncompleteReadException as in #83
option to exclude numbered ordering in image names as in #100
  • Loading branch information
Vasa committed May 16, 2018
1 parent 68f8bf7 commit 0ec21c7
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 10 deletions.
12 changes: 12 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,18 @@ Arguments
| | | |
| | | The path looks like this: "path/to/chromedriver". In windows it will be "path/to/chromedriver.exe" |
+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
| safe_search | sa | Searches for images with the Safe Search filter On |
| | | |
| | | And this filter will be Off by default if you do not specify the safe_search argument |
| | | |
| | | This argument does not take any value. Just add '--safe_search' or '-sa' in your query. |
+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
| no_numbering | nn | When you specify this argument, the script does not add ordered numbering as prefix to the images it downloads |
| | | |
| | | If this argument is not specified, the images are numbered in order in which they are downloaded |
| | | |
| | | This argument does not take any value. Just add '--no_numbering' or '-nn' in your query. |
+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+
| help | h | show the help message regarding the usage of the above arguments |
+-------------------+-------------+-------------------------------------------------------------------------------------------------------------------------------+

Expand Down
38 changes: 29 additions & 9 deletions google_images_download/google_images_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,15 @@
from urllib.request import URLError, HTTPError
from urllib.parse import quote
import http.client
from http.client import IncompleteRead
http.client._MAXHEADERS = 1000
else: # If the Current Version of Python is 2.x
import urllib2
from urllib2 import Request, urlopen
from urllib2 import URLError, HTTPError
from urllib import quote
import httplib
from httplib import IncompleteRead
httplib._MAXHEADERS = 1000
import time # Importing the time library to check the time of code execution
import os
Expand All @@ -33,11 +35,11 @@
import socket

args_list = ["keywords", "keywords_from_file", "prefix_keywords", "suffix_keywords",
"limit", "related_images", "format", "color", "color_type", "usage_rights", "size",
"limit", "format", "color", "color_type", "usage_rights", "size",
"exact_size", "aspect_ratio", "type", "time", "time_range", "delay", "url", "single_image",
"output_directory", "image_directory", "no_directory", "proxy", "similar_images", "specific_site",
"print_urls", "print_size", "print_paths", "metadata", "extract_metadata", "socket_timeout",
"thumbnail", "language", "prefix", "chromedriver"]
"thumbnail", "language", "prefix", "chromedriver", "related_images", "safe_search", "no_numbering"]


def user_input():
Expand Down Expand Up @@ -104,6 +106,8 @@ def user_input():
parser.add_argument('-px', '--proxy', help='specify a proxy address and port', type=str, required=False)
parser.add_argument('-cd', '--chromedriver', help='specify the path to chromedriver executable in your local machine', type=str, required=False)
parser.add_argument('-ri', '--related_images', default=False, help="Downloads images that are similar to the keyword provided", action="store_true")
parser.add_argument('-sa', '--safe_search', default=False, help="Turns on the safe search filter while searching for images", action="store_true")
parser.add_argument('-nn', '--no_numbering', default=False, help="Allows you to exclude the default numbering of images", action="store_true")

args = parser.parse_args()
arguments = vars(args)
Expand Down Expand Up @@ -358,7 +362,7 @@ def build_url_parameters(self,arguments):
if arguments['time_range']:
json_acceptable_string = arguments['time_range'].replace("'", "\"")
d = json.loads(json_acceptable_string)
time_range = '&cdr:1,cd_min:' + d['time_min'] + ',cd_max:' + d['time_min']
time_range = ',cdr:1,cd_min:' + d['time_min'] + ',cd_max:' + d['time_min']
else:
time_range = ''

Expand Down Expand Up @@ -394,7 +398,9 @@ def build_url_parameters(self,arguments):


#building main search URL
def build_search_url(self,search_term,params,url,similar_images,specific_site):
def build_search_url(self,search_term,params,url,similar_images,specific_site,safe_search):
#check safe_search
safe_search_string = "&safe=active"
# check the args and choose the URL
if url:
url = url
Expand All @@ -408,7 +414,12 @@ def build_search_url(self,search_term,params,url,similar_images,specific_site):
else:
url = 'https://www.google.com/search?q=' + quote(
search_term) + '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch' + params + '&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
#print(url)

#safe search check
if safe_search:
url = url + safe_search_string

# print(url)
return url


Expand Down Expand Up @@ -539,7 +550,7 @@ def download_image_thumbnail(self,image_url,main_directory,dir_name,return_image


# Download Images
def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size):
def download_image(self,image_url,image_format,main_directory,dir_name,count,print_urls,socket_timeout,prefix,print_size,no_numbering):
if print_urls:
print("Image URL: " + image_url)
try:
Expand Down Expand Up @@ -574,7 +585,10 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri
else:
prefix = ''

path = main_directory + "/" + dir_name + "/" + prefix + str(count) + ". " + image_name
if no_numbering:
path = main_directory + "/" + dir_name + "/" + prefix + image_name
else:
path = main_directory + "/" + dir_name + "/" + prefix + str(count) + ". " + image_name

try:
output_file = open(path, 'wb')
Expand Down Expand Up @@ -632,6 +646,12 @@ def download_image(self,image_url,image_format,main_directory,dir_name,count,pri
return_image_name = ''
absolute_path = ''

except IncompleteRead as e:
download_status = 'fail'
download_message = "IncompleteReadError on an image...trying next one..." + " Error: " + str(e)
return_image_name = ''
absolute_path = ''

return download_status,download_message,return_image_name,absolute_path


Expand Down Expand Up @@ -686,7 +706,7 @@ def _get_all_items(self,page,main_directory,dir_name,limit,arguments):
items.append(object) # Append all the links in the list named 'Links'

#download the images
download_status,download_message,return_image_name,absolute_path = self.download_image(object['image_link'],object['image_format'],main_directory,dir_name,count,arguments['print_urls'],arguments['socket_timeout'],arguments['prefix'],arguments['print_size'])
download_status,download_message,return_image_name,absolute_path = self.download_image(object['image_link'],object['image_format'],main_directory,dir_name,count,arguments['print_urls'],arguments['socket_timeout'],arguments['prefix'],arguments['print_size'],arguments['no_numbering'])
print(download_message)
if download_status == "success":

Expand Down Expand Up @@ -812,7 +832,7 @@ def download(self,arguments):

params = self.build_url_parameters(arguments) #building URL with params

url = self.build_search_url(search_term,params,arguments['url'],arguments['similar_images'],arguments['specific_site']) #building main search url
url = self.build_search_url(search_term,params,arguments['url'],arguments['similar_images'],arguments['specific_site'],arguments['safe_search']) #building main search url

if limit < 101:
raw_html = self.download_page(url) # download page
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from codecs import open
from os import path

__version__ = '2.2.2'
__version__ = '2.3.0'

here = path.abspath(path.dirname(__file__))

Expand Down

0 comments on commit 0ec21c7

Please sign in to comment.