Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Xview Dataset Draftv01 #259

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
196 changes: 196 additions & 0 deletions component-library/input/input-Xview_dataset.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "818f3c71",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dataset download started successfully.\n",
"C:\\Users\\loren\\Desktop\\dataset_download\\Tried_downloads\\train_labels.zip\n",
"File downloaded successfully.\n"
]
}
],
"source": [
"import os\n",
"import shutil\n",
"import time\n",
"from selenium import webdriver\n",
"from selenium.webdriver.common.by import By\n",
"from selenium.webdriver.support.ui import WebDriverWait\n",
"from selenium.webdriver.support import expected_conditions as EC\n",
"from urllib.parse import urlparse\n",
"\n",
"def login_and_download(username, password, move_to_dir, chromedriver_path, max_download_time, label): \n",
" \n",
" # Set Chrome options to automatically download files to the specified directory\n",
" options = webdriver.ChromeOptions()\n",
" prefs = {\n",
" \"download.default_directory\": move_to_dir,\n",
" \"download.prompt_for_download\": False,\n",
" \"download.directory_upgrade\": True,\n",
" \"safebrowsing.enabled\": True\n",
" }\n",
" options.add_experimental_option(\"prefs\", prefs)\n",
"\n",
" # Start a new instance of Chrome web browser\n",
" driver = webdriver.Chrome(executable_path=chromedriver_path, options=options)\n",
" \n",
" # Open the login page\n",
" url_login = r'https://challenge.xviewdataset.org/login'\n",
" driver.get(url_login)\n",
"\n",
" # Find the username and password fields and enter credentials\n",
" username_field = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.NAME, 'email')))\n",
" password_field = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.NAME, 'password')))\n",
" username_field.send_keys(username)\n",
" password_field.send_keys(password)\n",
"\n",
" # Find and click the login button\n",
" login_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CLASS_NAME, 'btn.primary')))\n",
" login_button.click()\n",
" \n",
" # Wait for the page to load after login\n",
" time.sleep(1)\n",
" \n",
" # Open the Download page\n",
" url_download = r'https://challenge.xviewdataset.org/download-links'\n",
" driver.get(url_download)\n",
" \n",
" # Wait for the overlay element to be present\n",
" overlay_element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'overlay--active')))\n",
"\n",
" # Remove the automaic pop-up overlay \n",
" body_element = driver.find_element_by_tag_name('body')\n",
" body_element.click()\n",
" time.sleep(1)\n",
" \n",
" # Switch between the possible download files\n",
" search_text = \"\"\n",
" match label:\n",
" case \"TI.zip\":\n",
" search_text = '//a[contains(text(), \"Download Training Images (zip)\")]'\n",
" case \"TL.zip\":\n",
" search_text = '//a[contains(text(), \"Download Training Labels (zip)\")]'\n",
" case \"VI.zip\":\n",
" search_text = '//a[contains(text(), \"Download Validation Images (zip)\")]'\n",
" case \"TI.tgz\":\n",
" search_text = '//a[contains(text(), \"Download Training Images (tgz)\")]'\n",
" case \"TL.tgz\":\n",
" search_text = '//a[contains(text(), \"Download Training Labels (tgz)\")]'\n",
" case \"VI.tgz\":\n",
" search_text = '//a[contains(text(), \"Download Validation Images (tgz)\")]'\n",
" case _:\n",
" raise ValueError(\"Error: This is an invalid download option\") \n",
" \n",
" # Wait for the download link to be present\n",
" download_link_element = WebDriverWait(driver, 100).until(EC.presence_of_element_located((By.XPATH, search_text)))\n",
" \n",
" # Get the dynamic download link from the href attribute\n",
" download_link = download_link_element.get_attribute('href')\n",
" \n",
" # Download the dataset using the obtained link\n",
" if download_link:\n",
" driver.get(download_link)\n",
" print(\"Dataset download started successfully.\")\n",
" \n",
" # Extract the filename from the download link URL\n",
" parsed_url = urlparse(download_link)\n",
" filename = parsed_url.path.split('/')[-1]\n",
" downloaded_file = os.path.join(move_to_dir, filename)\n",
" print(downloaded_file)\n",
" \n",
" # Check if the download directory exists\n",
" if not os.path.exists(move_to_dir):\n",
" os.makedirs(move_to_dir)\n",
" \n",
" # Wait for the file to be completely downloaded\n",
" start_time = time.time()\n",
" \n",
" while True:\n",
" if os.path.exists(downloaded_file) and os.path.getsize(downloaded_file) > 0:\n",
" print(\"File downloaded successfully.\")\n",
" break\n",
" elif time.time() - start_time > max_download_time:\n",
" print(\"Error: Maximum wait time exceeded.\")\n",
" break\n",
" else:\n",
" time.sleep(5)\n",
" \n",
" else:\n",
" print(\"Failed to get the download link.\")\n",
"\n",
" # Close the browser\n",
" driver.quit()\n",
" \n",
" \n",
"username = # enter your Username\n",
"password = # enter your password\n",
"move_to_dir = # provide a movetodir\n",
"chromedriver_path = # provide a path to chromedriver\n",
"max_download_time = 2700 # must be ajusted acording to the file size and internet speed\n",
"label = #the label of the file desired to download\n",
"\n",
"login_and_download(username, password, move_to_dir, chromedriver_path, max_download_time, label)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "794506c5",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "e7b2f96d",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "0047693d",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "ba530873",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
11 changes: 11 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from setuptools import setup

setup(name='claimed-component-library',
version='0.1',
description='CLAIMED component library',
url='https://github.com/claimed-framework/component-library',
author='The CLAIMED authors',
author_email='[email protected]',
license='Apache2',
packages=['component-library'],
zip_safe=False)