creativecommons · TimidRobot · Mar 12, 2024 · Mar 12, 2024 · Mar 12, 2024 · Mar 12, 2024
@@ -138,10 +138,10 @@ def time_trend_helper(df):
     Extract year-wise count of entries from a DataFrame.
 
     Args:
-        df (DataFrame): Input DataFrame containing dates.
+    - df (DataFrame): Input DataFrame containing dates.
 
     Returns:
-        DataFrame: DataFrame with counts of entries per year.
+    - DataFrame: DataFrame with counts of entries per year.
     """
     year_list = []
     for date_row in df["dates"][0:]:
@@ -167,7 +167,7 @@ def time_trend(csv_path):
     Generate a line graph to show the time trend of the license usage.
 
     Args:
-        csv_path (str): Path to the CSV file.
+    - csv_path (str): Path to the CSV file.
     """
     df = pd.read_csv(csv_path)
     count_df = time_trend_helper(df)
@@ -205,10 +205,10 @@ def time_trend_compile_helper(yearly_count):
     Filter yearly trend data for the years between 2018 and 2022.
 
     Args:
-        yearly_count (DataFrame): DataFrame with "year" and "Counts" columns.
+    - yearly_count (DataFrame): DataFrame with "year" and "Counts" columns.
 
     Returns:
-        DataFrame: Filtered yearly count data.
+    - DataFrame: Filtered yearly count data.
     """
     Years = np.arange(2018, 2023)
     yearly_count["year"] = list(yearly_count.index)
@@ -370,10 +370,10 @@ def view_compare_helper(df):
     Calculate maximum views of pictures under a license.
 
     Args:
-        df (DataFrame): Input DataFrame.
+    - df (DataFrame): Input DataFrame.
 
     Returns:
-        int: Maximum views.
+    - int: Maximum views.
     """
     highest_view = int(max(df["views"]))
     df = df.sort_values("views", ascending=False)

@@ -17,26 +17,36 @@
 from requests.adapters import HTTPAdapter
 from urllib3.util.retry import Retry
 
+# Set up current working directory
 CWD = os.path.dirname(os.path.abspath(__file__))
+# Load environment variables
 dotenv_path = os.path.join(os.path.dirname(CWD), ".env")
 load_dotenv(dotenv_path)
 
+# Get the current date
 today = dt.datetime.today()
+# Retrieve API keys
 API_KEYS = os.getenv("GOOGLE_API_KEYS").split(",")
 API_KEYS_IND = 0
+# Set up file path for CSV report
 DATA_WRITE_FILE = (
     f"{CWD}" f"/data_deviantart_{today.year}_{today.month}_{today.day}.csv"
 )
+# Retrieve Programmable Search Engine key from environment variables
 PSE_KEY = os.getenv("PSE_KEY")
 
 
 def get_license_list():
-    """Provides the list of license from 2018's record of Creative Commons.
+    """
+    Provides the list of license from 2018's record of Creative Commons.
+
     Returns:
-        np.array: An np array containing all license types that should be
-        searched via Programmable Search Engine.
+    - np.array: An array containing all license types that should be
+    searched via Programmable Search Engine.
     """
+    # Read license data from file
     cc_license_data = pd.read_csv(f"{CWD}/legal-tool-paths.txt", header=None)
+    # Define regex pattern to extract license types
     license_pattern = r"((?:[^/]+/){2}(?:[^/]+)).*"
     license_list = (
         cc_license_data[0]
@@ -48,14 +58,14 @@ def get_license_list():
 
 
 def get_request_url(license):
-    """Provides the API Endpoint URL for specified parameter combinations.
+    """
+    Provides the API Endpoint URL for specified parameter combinations.
     Args:
-        license:
-            A string representing the type of license, and should be a segment
-            of its URL towards the license description.
+    - license (str): A string representing the type of license. It's a
+    segment of the URL towards the license description.
+
     Returns:
-        string: A string representing the API Endpoint URL for the query
-        specified by this function's parameters.
+    - str: The API Endpoint URL for the query specified by parameters.
     """
     try:
         api_key = API_KEYS[API_KEYS_IND]
@@ -73,19 +83,19 @@ def get_request_url(license):
 
 
 def get_response_elems(license):
-    """Provides the metadata for query of specified parameters
+    """
+    Provides the metadata for query of specified parameters
     Args:
-        license:
-            A string representing the type of license, and should be a segment
-            of its URL towards the license description. Alternatively, the
-            default None value stands for having no assumption about license
-            type.
+    - license (str): A string representing the type of license.
+    It's a segment of the URL towards the license description. If not provided,
+    it defaults to None, indicating no assumption about the license type.
 
     Returns:
-        dict: A dictionary mapping metadata to its value provided from the API
-        query of specified parameters.
+    - dict: A dictionary mapping metadata to its value provided from the API
+    query.
     """
     try:
+        # Make a request to the API and handle potential retries
         request_url = get_request_url(license)
         max_retries = Retry(
             total=5,
@@ -104,6 +114,7 @@ def get_response_elems(license):
         return search_data_dict
     except Exception as e:
         if isinstance(e, requests.exceptions.HTTPError):
+            # If quota limit exceeded, switch to the next API key
             global API_KEYS_IND
             API_KEYS_IND += 1
             print(
@@ -115,20 +126,18 @@ def get_response_elems(license):
 
 
 def set_up_data_file():
-    """Writes the header row to file to contain DeviantArt data."""
+    """Writes the header row to the file to contain DeviantArt data."""
     header_title = "LICENSE TYPE,Document Count"
     with open(DATA_WRITE_FILE, "w") as f:
         f.write(f"{header_title}\n")
 
 
 def record_license_data(license_type):
-    """Writes the row for LICENSE_TYPE to file to contain DeviantArt data.
+    """Writes the row for LICENSE_TYPE to the file to contain DeviantArt data.
     Args:
-        license_type:
-            A string representing the type of license, and should be a segment
-            of its URL towards the license description. Alternatively, the
-            default None value stands for having no assumption about license
-            type.
+    - license_type(str): A string representing the type of license.
+    It's a segment of the URL towards the license description. If not provided,
+    it defaults to None, indicating no assumption about the license type.
     """
     data_log = (
         f"{license_type},"
@@ -139,10 +148,14 @@ def record_license_data(license_type):
 
 
 def record_all_licenses():
-    """Records the data of all license types findable in the license list and
-    records these data into the DATA_WRITE_FILE as specified in that constant.
     """
+    Records the data for all available license types listed in the license
+    list and writes this data into the DATA_WRITE_FILE, as specified by the
+    constant.
+    """
+    # Get the list of license types
     license_list = get_license_list()
+    # Record data for each license types
     for license_type in license_list:
         record_license_data(license_type)
 

@@ -20,32 +20,45 @@
 import pandas as pd
 
 
-def drop_empty_column(csv_path, new_csv_path):  # attribute is string
+def drop_empty_column(csv_path, new_csv_path):
+    """
+    Drops columns with 'Unnamed' in the name from the CSV file.
+    Args:
+    - csv_path (str): Path to the original CSV file.
+    - new_csv_path (str): Path to save the cleaned CSV file.
+    """
     df = pd.read_csv(csv_path)
-    for col in df.columns:  # to get the column list
+    for col in df.columns:
         if "Unnamed" in col:
             data = df.drop(col, axis=1)
             print("Dropping column", col)
     data.to_csv(new_csv_path)
     print("Dropping empty columns")
 
 
-def drop_duplicate_id(csv_path, new_csv_path):  # attribute is string
+def drop_duplicate_id(csv_path, new_csv_path):
+    """
+    Drops duplicate rows based on the 'id' column from the CSV file.
+
+    Args:
+    - csv_path (str): Path to the original CSV file.
+    - new_csv_path (str): Path to save the cleaned CSV file.
+    """
     df = pd.read_csv(csv_path)
     data = df.drop_duplicates(subset=["id"])
     data.to_csv(new_csv_path)
     print("Dropping duplicates")
 
 
-def save_new_data(
-    csv_path, column_name_list, new_csv_path
-):  # attribute is string
+def save_new_data(csv_path, column_name_list, new_csv_path):
     """
-    column_name_list must belongs to the
-    existing column names from original csv
-    csv_path is the path of original csv
-    This function generate a new dataframe
-    to save final data with useful columns
+    Saves specified columns from the original CSV file to a new CSV file.
+
+    Args:
+    - csv_path (str): Path to the original CSV file.
+    - column_name_list (list of str): List of column names to be saved
+    (belongs to the existing column names from original csv)
+    - new_csv_path (str): Path to save the new CSV file.
     """
     df = pd.read_csv(csv_path)
     new_df = pd.DataFrame()

@@ -1,3 +1,8 @@
+"""
+Fetching photo information from Flickr API for photos under
+each Creative Commons license and saving the data into a JSON file
+"""
+
 # Standard library
 import json
 import os
@@ -9,24 +14,29 @@
 import flickrapi
 from dotenv import load_dotenv
 
+# Get the current working directory
 CWD = os.path.dirname(os.path.abspath(__file__))
+# Load environment variables
 dotenv_path = os.path.join(os.path.dirname(CWD), ".env")
 load_dotenv(dotenv_path)
 
 
 def main():
+    # Initialize Flickr API instance
     flickr = flickrapi.FlickrAPI(
         os.getenv("FLICKR_API_KEY"),
         os.getenv("FLICKR_API_SECRET"),
         format="json",
     )
 
-    # use search method to pull general photo info under each cc license data
-    # saved in photos.json
+    # Dictionary to store photo data for each Creative Commons license
     dic = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 9: 0, 10: 0}
+    # Use search method to retrieve photo info for each license
+    # and store it in the dictionary
     for i in dic.keys():
         photosJson = flickr.photos.search(license=i, per_page=500)
         dic[i] = [json.loads(photosJson.decode("utf-8"))]
+    # Save the dictionary containing photo data to a JSON file
     with open(os.path.join(CWD, "photos.json"), "w") as json_file:
         json.dump(dic, json_file)