temp
This commit is contained in:
120
flatten-step3.py
120
flatten-step3.py
@@ -1,61 +1,61 @@
|
|||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
def flatten_gpx_files(source_dir: str = ".", target_dir: str = "ALL_GPX"):
|
def flatten_gpx_files(source_dir: str = ".", target_dir: str = "ALL_GPX"):
|
||||||
"""
|
"""
|
||||||
Find all .gpx files under source_dir (including subfolders)
|
Find all .gpx files under source_dir (including subfolders)
|
||||||
and copy them into a single flat folder.
|
and copy them into a single flat folder.
|
||||||
"""
|
"""
|
||||||
source_path = Path(source_dir).resolve()
|
source_path = Path(source_dir).resolve()
|
||||||
target_path = Path(target_dir).resolve()
|
target_path = Path(target_dir).resolve()
|
||||||
|
|
||||||
# Create target folder
|
# Create target folder
|
||||||
target_path.mkdir(parents=True, exist_ok=True)
|
target_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
print(f"Searching for .gpx files in: {source_path}")
|
print(f"Searching for .gpx files in: {source_path}")
|
||||||
print(f"Copying to flat folder: {target_path}\n")
|
print(f"Copying to flat folder: {target_path}\n")
|
||||||
|
|
||||||
gpx_files = list(source_path.rglob("*.gpx"))
|
gpx_files = list(source_path.rglob("*.gpx"))
|
||||||
|
|
||||||
if not gpx_files:
|
if not gpx_files:
|
||||||
print("No .gpx files found.")
|
print("No .gpx files found.")
|
||||||
return
|
return
|
||||||
|
|
||||||
copied = 0
|
copied = 0
|
||||||
for gpx_file in gpx_files:
|
for gpx_file in gpx_files:
|
||||||
# New filename: original_name__parent_folder.gpx (helps avoid name collisions)
|
# New filename: original_name__parent_folder.gpx (helps avoid name collisions)
|
||||||
parent_name = gpx_file.parent.name
|
parent_name = gpx_file.parent.name
|
||||||
new_name = f"{gpx_file.stem}__{parent_name}{gpx_file.suffix}"
|
new_name = f"{gpx_file.stem}__{parent_name}{gpx_file.suffix}"
|
||||||
|
|
||||||
destination = target_path / new_name
|
destination = target_path / new_name
|
||||||
|
|
||||||
# If filename already exists, add a number
|
# If filename already exists, add a number
|
||||||
counter = 1
|
counter = 1
|
||||||
while destination.exists():
|
while destination.exists():
|
||||||
destination = target_path / f"{gpx_file.stem}__{parent_name}_{counter}{gpx_file.suffix}"
|
destination = target_path / f"{gpx_file.stem}__{parent_name}_{counter}{gpx_file.suffix}"
|
||||||
counter += 1
|
counter += 1
|
||||||
|
|
||||||
try:
|
try:
|
||||||
shutil.copy2(gpx_file, destination)
|
shutil.copy2(gpx_file, destination)
|
||||||
print(f"Copied: {gpx_file.name} → {new_name}")
|
print(f"Copied: {gpx_file.name} → {new_name}")
|
||||||
copied += 1
|
copied += 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Failed {gpx_file.name}: {e}")
|
print(f"Failed {gpx_file.name}: {e}")
|
||||||
|
|
||||||
print("\n" + "="*50)
|
print("\n" + "="*50)
|
||||||
print(f"Done! {copied} .gpx files flattened into '{target_path.name}/'")
|
print(f"Done! {copied} .gpx files flattened into '{target_path.name}/'")
|
||||||
print("="*50)
|
print("="*50)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="Flatten all .gpx files into one folder")
|
parser = argparse.ArgumentParser(description="Flatten all .gpx files into one folder")
|
||||||
parser.add_argument("source", nargs="?", default=".",
|
parser.add_argument("source", nargs="?", default=".",
|
||||||
help="Source directory to search (default: current)")
|
help="Source directory to search (default: current)")
|
||||||
parser.add_argument("-o", "--output", default="ALL_GPX",
|
parser.add_argument("-o", "--output", default="ALL_GPX",
|
||||||
help="Output folder name (default: ALL_GPX)")
|
help="Output folder name (default: ALL_GPX)")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
flatten_gpx_files(args.source, args.output)
|
flatten_gpx_files(args.source, args.output)
|
||||||
@@ -1,100 +1,100 @@
|
|||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
def rename_files_in_folders(root_dir: str = ".", recursive: bool = True, dry_run: bool = False):
|
def rename_files_in_folders(root_dir: str = ".", recursive: bool = True, dry_run: bool = False):
|
||||||
"""
|
"""
|
||||||
Rename files by appending the top-level folder name.
|
Rename files by appending the top-level folder name.
|
||||||
|
|
||||||
Example: image.jpg inside 'extracted_MyBackup' becomes image_MyBackup.jpg
|
Example: image.jpg inside 'extracted_MyBackup' becomes image_MyBackup.jpg
|
||||||
"""
|
"""
|
||||||
base_path = Path(root_dir).resolve()
|
base_path = Path(root_dir).resolve()
|
||||||
|
|
||||||
if not base_path.exists():
|
if not base_path.exists():
|
||||||
print(f"Error: Folder '{base_path}' does not exist.")
|
print(f"Error: Folder '{base_path}' does not exist.")
|
||||||
return
|
return
|
||||||
|
|
||||||
print(f"Scanning folder: {base_path}")
|
print(f"Scanning folder: {base_path}")
|
||||||
if dry_run:
|
if dry_run:
|
||||||
print("*** DRY RUN MODE - No files will be renamed ***\n")
|
print("*** DRY RUN MODE - No files will be renamed ***\n")
|
||||||
|
|
||||||
renamed_count = 0
|
renamed_count = 0
|
||||||
skipped_count = 0
|
skipped_count = 0
|
||||||
|
|
||||||
# Walk through all directories
|
# Walk through all directories
|
||||||
for dir_path in base_path.rglob("*") if recursive else base_path.iterdir():
|
for dir_path in base_path.rglob("*") if recursive else base_path.iterdir():
|
||||||
if not dir_path.is_dir():
|
if not dir_path.is_dir():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
folder_name = dir_path.name
|
folder_name = dir_path.name
|
||||||
|
|
||||||
# Skip root folder itself and hidden folders
|
# Skip root folder itself and hidden folders
|
||||||
if folder_name.startswith('.') or folder_name == base_path.name:
|
if folder_name.startswith('.') or folder_name == base_path.name:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print(f"\nProcessing folder: {folder_name}")
|
print(f"\nProcessing folder: {folder_name}")
|
||||||
|
|
||||||
for file_path in dir_path.iterdir():
|
for file_path in dir_path.iterdir():
|
||||||
if not file_path.is_file():
|
if not file_path.is_file():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Get file components
|
# Get file components
|
||||||
original_name = file_path.stem # filename without extension
|
original_name = file_path.stem # filename without extension
|
||||||
ext = file_path.suffix # .jpg, .png, etc.
|
ext = file_path.suffix # .jpg, .png, etc.
|
||||||
|
|
||||||
# Skip if file already has the folder name (to avoid double renaming)
|
# Skip if file already has the folder name (to avoid double renaming)
|
||||||
if folder_name in original_name:
|
if folder_name in original_name:
|
||||||
print(f" Skipped (already processed): {file_path.name}")
|
print(f" Skipped (already processed): {file_path.name}")
|
||||||
skipped_count += 1
|
skipped_count += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# New filename: originalName_folderName.ext
|
# New filename: originalName_folderName.ext
|
||||||
new_name = f"{original_name}_{folder_name}{ext}"
|
new_name = f"{original_name}_{folder_name}{ext}"
|
||||||
new_path = file_path.parent / new_name
|
new_path = file_path.parent / new_name
|
||||||
|
|
||||||
# Check if target file already exists
|
# Check if target file already exists
|
||||||
if new_path.exists():
|
if new_path.exists():
|
||||||
print(f" Warning: Target already exists → {new_name}")
|
print(f" Warning: Target already exists → {new_name}")
|
||||||
skipped_count += 1
|
skipped_count += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if dry_run:
|
if dry_run:
|
||||||
print(f" Would rename: {file_path.name} → {new_name}")
|
print(f" Would rename: {file_path.name} → {new_name}")
|
||||||
else:
|
else:
|
||||||
file_path.rename(new_path)
|
file_path.rename(new_path)
|
||||||
print(f" Renamed: {file_path.name} → {new_name}")
|
print(f" Renamed: {file_path.name} → {new_name}")
|
||||||
renamed_count += 1
|
renamed_count += 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f" Error renaming {file_path.name}: {e}")
|
print(f" Error renaming {file_path.name}: {e}")
|
||||||
skipped_count += 1
|
skipped_count += 1
|
||||||
|
|
||||||
print("\n" + "="*60)
|
print("\n" + "="*60)
|
||||||
print("Renaming completed!")
|
print("Renaming completed!")
|
||||||
print(f"Files renamed: {renamed_count}")
|
print(f"Files renamed: {renamed_count}")
|
||||||
print(f"Files skipped: {skipped_count}")
|
print(f"Files skipped: {skipped_count}")
|
||||||
if dry_run:
|
if dry_run:
|
||||||
print("This was a DRY RUN — no actual changes were made.")
|
print("This was a DRY RUN — no actual changes were made.")
|
||||||
print("="*60)
|
print("="*60)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="Rename files by appending their top-level folder name."
|
description="Rename files by appending their top-level folder name."
|
||||||
)
|
)
|
||||||
parser.add_argument("folder", nargs="?", default=".",
|
parser.add_argument("folder", nargs="?", default=".",
|
||||||
help="Root folder to process (default: current directory)")
|
help="Root folder to process (default: current directory)")
|
||||||
parser.add_argument("-r", "--recursive", action="store_true",
|
parser.add_argument("-r", "--recursive", action="store_true",
|
||||||
help="Process subfolders recursively (recommended)")
|
help="Process subfolders recursively (recommended)")
|
||||||
parser.add_argument("--dry-run", action="store_true",
|
parser.add_argument("--dry-run", action="store_true",
|
||||||
help="Show what would be renamed without making changes")
|
help="Show what would be renamed without making changes")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# By default we enable recursive since you usually want to process extracted_ folders
|
# By default we enable recursive since you usually want to process extracted_ folders
|
||||||
rename_files_in_folders(
|
rename_files_in_folders(
|
||||||
root_dir=args.folder,
|
root_dir=args.folder,
|
||||||
recursive=args.recursive if hasattr(args, 'recursive') else True,
|
recursive=args.recursive if hasattr(args, 'recursive') else True,
|
||||||
dry_run=args.dry_run
|
dry_run=args.dry_run
|
||||||
)
|
)
|
||||||
@@ -1,75 +1,75 @@
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# Install these first:
|
# Install these first:
|
||||||
# pip install patool py7zr rarfile
|
# pip install patool py7zr rarfile
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from patoolib import extract_archive
|
from patoolib import extract_archive
|
||||||
except ImportError:
|
except ImportError:
|
||||||
print("Error: 'patool' is not installed. Run: pip install patool py7zr rarfile")
|
print("Error: 'patool' is not installed. Run: pip install patool py7zr rarfile")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
def extract_archive_to_folder(archive_path: Path, base_dir: Path):
|
def extract_archive_to_folder(archive_path: Path, base_dir: Path):
|
||||||
"""Extract a single archive to extracted_{name} folder."""
|
"""Extract a single archive to extracted_{name} folder."""
|
||||||
# Get archive name without extension
|
# Get archive name without extension
|
||||||
name_without_ext = archive_path.stem
|
name_without_ext = archive_path.stem
|
||||||
extract_dir = base_dir / f"bg_mountains_{name_without_ext}"
|
extract_dir = base_dir / f"bg_mountains_{name_without_ext}"
|
||||||
|
|
||||||
# Create the output directory if it doesn't exist
|
# Create the output directory if it doesn't exist
|
||||||
extract_dir.mkdir(parents=True, exist_ok=True)
|
extract_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
print(f"Extracting: {archive_path.name} → {extract_dir.name}/")
|
print(f"Extracting: {archive_path.name} → {extract_dir.name}/")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# patool automatically detects format (zip, rar, 7z, etc.)
|
# patool automatically detects format (zip, rar, 7z, etc.)
|
||||||
extract_archive(str(archive_path), outdir=str(extract_dir), verbosity=0)
|
extract_archive(str(archive_path), outdir=str(extract_dir), verbosity=0)
|
||||||
print(f"✓ Successfully extracted: {archive_path.name}\n")
|
print(f"✓ Successfully extracted: {archive_path.name}\n")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"✗ Failed to extract {archive_path.name}: {e}\n")
|
print(f"✗ Failed to extract {archive_path.name}: {e}\n")
|
||||||
|
|
||||||
def main(folder_path: str = ".", recursive: bool = False):
|
def main(folder_path: str = ".", recursive: bool = False):
|
||||||
base_dir = Path(folder_path).resolve()
|
base_dir = Path(folder_path).resolve()
|
||||||
|
|
||||||
if not base_dir.exists():
|
if not base_dir.exists():
|
||||||
print(f"Error: Folder '{base_dir}' does not exist.")
|
print(f"Error: Folder '{base_dir}' does not exist.")
|
||||||
return
|
return
|
||||||
|
|
||||||
print(f"Scanning for archives in: {base_dir}\n")
|
print(f"Scanning for archives in: {base_dir}\n")
|
||||||
|
|
||||||
# Supported extensions
|
# Supported extensions
|
||||||
extensions = {'.zip', '.rar', '.7z'}
|
extensions = {'.zip', '.rar', '.7z'}
|
||||||
|
|
||||||
# Find all matching archives
|
# Find all matching archives
|
||||||
if recursive:
|
if recursive:
|
||||||
archive_files = [p for p in base_dir.rglob("*") if p.is_file() and p.suffix.lower() in extensions]
|
archive_files = [p for p in base_dir.rglob("*") if p.is_file() and p.suffix.lower() in extensions]
|
||||||
else:
|
else:
|
||||||
archive_files = [p for p in base_dir.iterdir() if p.is_file() and p.suffix.lower() in extensions]
|
archive_files = [p for p in base_dir.iterdir() if p.is_file() and p.suffix.lower() in extensions]
|
||||||
|
|
||||||
if not archive_files:
|
if not archive_files:
|
||||||
print("No .zip, .rar, or .7z files found.")
|
print("No .zip, .rar, or .7z files found.")
|
||||||
return
|
return
|
||||||
|
|
||||||
print(f"Found {len(archive_files)} archive(s).\n")
|
print(f"Found {len(archive_files)} archive(s).\n")
|
||||||
|
|
||||||
for archive in sorted(archive_files):
|
for archive in sorted(archive_files):
|
||||||
extract_archive_to_folder(archive, base_dir)
|
extract_archive_to_folder(archive, base_dir)
|
||||||
|
|
||||||
print("Extraction process completed!")
|
print("Extraction process completed!")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# Usage examples:
|
# Usage examples:
|
||||||
# python extract_archives.py # current folder, non-recursive
|
# python extract_archives.py # current folder, non-recursive
|
||||||
# python extract_archives.py "/path/to/folder" # specific folder
|
# python extract_archives.py "/path/to/folder" # specific folder
|
||||||
# python extract_archives.py "/path/to/folder" --recursive
|
# python extract_archives.py "/path/to/folder" --recursive
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="Extract .zip/.rar/.7z archives into separate folders.")
|
parser = argparse.ArgumentParser(description="Extract .zip/.rar/.7z archives into separate folders.")
|
||||||
parser.add_argument("folder", nargs="?", default=".", help="Folder to scan (default: current directory)")
|
parser.add_argument("folder", nargs="?", default=".", help="Folder to scan (default: current directory)")
|
||||||
parser.add_argument("-r", "--recursive", action="store_true", help="Search subfolders recursively")
|
parser.add_argument("-r", "--recursive", action="store_true", help="Search subfolders recursively")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
main(args.folder, args.recursive)
|
main(args.folder, args.recursive)
|
||||||
342
visit.py
342
visit.py
@@ -1,172 +1,172 @@
|
|||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
from selenium.webdriver.chrome.options import Options
|
from selenium.webdriver.chrome.options import Options
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
from selenium.webdriver.support.ui import WebDriverWait
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
from selenium.webdriver.support import expected_conditions as EC
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
from selenium.common.exceptions import TimeoutException, NoSuchElementException, WebDriverException
|
from selenium.common.exceptions import TimeoutException, NoSuchElementException, WebDriverException
|
||||||
import time
|
import time
|
||||||
import os
|
import os
|
||||||
|
|
||||||
# ========================= CONFIG =========================
|
# ========================= CONFIG =========================
|
||||||
PROXY_IP = "192.168.0.38"
|
PROXY_IP = "192.168.0.38"
|
||||||
PROXY_PORT = "1080"
|
PROXY_PORT = "1080"
|
||||||
DOWNLOAD_PATH = os.path.join(os.getcwd(), "downloads")
|
DOWNLOAD_PATH = os.path.join(os.getcwd(), "downloads")
|
||||||
LOG_FILE = "download_log.txt"
|
LOG_FILE = "download_log.txt"
|
||||||
|
|
||||||
os.makedirs(DOWNLOAD_PATH, exist_ok=True)
|
os.makedirs(DOWNLOAD_PATH, exist_ok=True)
|
||||||
|
|
||||||
# ====================== CHROME OPTIONS (Optimized for speed) ======================
|
# ====================== CHROME OPTIONS (Optimized for speed) ======================
|
||||||
chrome_options = Options()
|
chrome_options = Options()
|
||||||
|
|
||||||
# Proxy & basic stealth
|
# Proxy & basic stealth
|
||||||
chrome_options.add_argument(f'--proxy-server=socks5://{PROXY_IP}:{PROXY_PORT}')
|
chrome_options.add_argument(f'--proxy-server=socks5://{PROXY_IP}:{PROXY_PORT}')
|
||||||
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36")
|
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36")
|
||||||
chrome_options.add_argument("--no-sandbox")
|
chrome_options.add_argument("--no-sandbox")
|
||||||
chrome_options.add_argument("--disable-dev-shm-usage")
|
chrome_options.add_argument("--disable-dev-shm-usage")
|
||||||
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
|
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
|
||||||
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
||||||
chrome_options.add_experimental_option('useAutomationExtension', False)
|
chrome_options.add_experimental_option('useAutomationExtension', False)
|
||||||
|
|
||||||
# === SPEED OPTIMIZATIONS ===
|
# === SPEED OPTIMIZATIONS ===
|
||||||
chrome_options.add_argument("--blink-settings=imagesEnabled=false") # Disable images
|
chrome_options.add_argument("--blink-settings=imagesEnabled=false") # Disable images
|
||||||
chrome_options.add_argument("--disable-gpu")
|
chrome_options.add_argument("--disable-gpu")
|
||||||
chrome_options.add_argument("--disable-extensions")
|
chrome_options.add_argument("--disable-extensions")
|
||||||
chrome_options.add_argument("--disable-plugins")
|
chrome_options.add_argument("--disable-plugins")
|
||||||
chrome_options.add_argument("--disable-popup-blocking")
|
chrome_options.add_argument("--disable-popup-blocking")
|
||||||
|
|
||||||
# Fast page load strategy (doesn't wait for images/media)
|
# Fast page load strategy (doesn't wait for images/media)
|
||||||
chrome_options.page_load_strategy = "eager" # or "none" if you want even faster (but less stable)
|
chrome_options.page_load_strategy = "eager" # or "none" if you want even faster (but less stable)
|
||||||
|
|
||||||
# Block media, stylesheets, fonts, etc.
|
# Block media, stylesheets, fonts, etc.
|
||||||
prefs = {
|
prefs = {
|
||||||
"download.default_directory": DOWNLOAD_PATH,
|
"download.default_directory": DOWNLOAD_PATH,
|
||||||
"download.prompt_for_download": False,
|
"download.prompt_for_download": False,
|
||||||
"download.directory_upgrade": True,
|
"download.directory_upgrade": True,
|
||||||
"safebrowsing.enabled": True,
|
"safebrowsing.enabled": True,
|
||||||
"profile.default_content_setting_values.automatic_downloads": 1,
|
"profile.default_content_setting_values.automatic_downloads": 1,
|
||||||
"profile.managed_default_content_settings.images": 2, # 2 = block images
|
"profile.managed_default_content_settings.images": 2, # 2 = block images
|
||||||
"profile.managed_default_content_settings.stylesheets": 2, # block CSS
|
"profile.managed_default_content_settings.stylesheets": 2, # block CSS
|
||||||
"profile.managed_default_content_settings.fonts": 2, # block fonts
|
"profile.managed_default_content_settings.fonts": 2, # block fonts
|
||||||
"profile.managed_default_content_settings.media_stream": 2, # block video/audio
|
"profile.managed_default_content_settings.media_stream": 2, # block video/audio
|
||||||
}
|
}
|
||||||
|
|
||||||
chrome_options.add_experimental_option("prefs", prefs)
|
chrome_options.add_experimental_option("prefs", prefs)
|
||||||
|
|
||||||
# Optional: Run headless (much faster, recommended)
|
# Optional: Run headless (much faster, recommended)
|
||||||
# chrome_options.add_argument("--headless=new")
|
# chrome_options.add_argument("--headless=new")
|
||||||
|
|
||||||
# Initialize driver
|
# Initialize driver
|
||||||
driver = webdriver.Chrome(options=chrome_options)
|
driver = webdriver.Chrome(options=chrome_options)
|
||||||
|
|
||||||
def wait_for_page_load(driver, timeout=20):
|
def wait_for_page_load(driver, timeout=20):
|
||||||
"""Faster page load wait"""
|
"""Faster page load wait"""
|
||||||
try:
|
try:
|
||||||
WebDriverWait(driver, timeout).until(
|
WebDriverWait(driver, timeout).until(
|
||||||
lambda d: d.execute_script("return document.readyState") in ["interactive", "complete"]
|
lambda d: d.execute_script("return document.readyState") in ["interactive", "complete"]
|
||||||
)
|
)
|
||||||
except TimeoutException:
|
except TimeoutException:
|
||||||
print(" → Page load timed out, continuing anyway...")
|
print(" → Page load timed out, continuing anyway...")
|
||||||
|
|
||||||
def is_download_finished(download_path, timeout=120, check_interval=2):
|
def is_download_finished(download_path, timeout=120, check_interval=2):
|
||||||
"""Slightly faster download checker"""
|
"""Slightly faster download checker"""
|
||||||
end_time = time.time() + timeout
|
end_time = time.time() + timeout
|
||||||
while time.time() < end_time:
|
while time.time() < end_time:
|
||||||
partial_files = [f for f in os.listdir(download_path) if f.endswith(('.crdownload', '.part', '.tmp'))]
|
partial_files = [f for f in os.listdir(download_path) if f.endswith(('.crdownload', '.part', '.tmp'))]
|
||||||
completed_files = [f for f in os.listdir(download_path) if not f.endswith(('.crdownload', '.part', '.tmp'))]
|
completed_files = [f for f in os.listdir(download_path) if not f.endswith(('.crdownload', '.part', '.tmp'))]
|
||||||
|
|
||||||
if completed_files and not partial_files:
|
if completed_files and not partial_files:
|
||||||
return True, completed_files
|
return True, completed_files
|
||||||
|
|
||||||
if partial_files:
|
if partial_files:
|
||||||
print(f" → Still downloading... ({len(partial_files)} partial)")
|
print(f" → Still downloading... ({len(partial_files)} partial)")
|
||||||
time.sleep(check_interval)
|
time.sleep(check_interval)
|
||||||
return False, []
|
return False, []
|
||||||
|
|
||||||
def clear_partial_downloads():
|
def clear_partial_downloads():
|
||||||
for filename in os.listdir(DOWNLOAD_PATH):
|
for filename in os.listdir(DOWNLOAD_PATH):
|
||||||
if filename.endswith(('.crdownload', '.part', '.tmp')):
|
if filename.endswith(('.crdownload', '.part', '.tmp')):
|
||||||
try:
|
try:
|
||||||
os.remove(os.path.join(DOWNLOAD_PATH, filename))
|
os.remove(os.path.join(DOWNLOAD_PATH, filename))
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def log_result(url: str, success: bool):
|
def log_result(url: str, success: bool):
|
||||||
status = "True" if success else "False"
|
status = "True" if success else "False"
|
||||||
with open(LOG_FILE, "a", encoding="utf-8") as f:
|
with open(LOG_FILE, "a", encoding="utf-8") as f:
|
||||||
f.write(f"{url} | {status}\n")
|
f.write(f"{url} | {status}\n")
|
||||||
print(f" Logged: {'SUCCESS' if success else 'FAILED'}")
|
print(f" Logged: {'SUCCESS' if success else 'FAILED'}")
|
||||||
|
|
||||||
def run_download(url: str):
|
def run_download(url: str):
|
||||||
try:
|
try:
|
||||||
print(f"\n[{time.strftime('%H:%M:%S')}] Processing → {url}")
|
print(f"\n[{time.strftime('%H:%M:%S')}] Processing → {url}")
|
||||||
clear_partial_downloads()
|
clear_partial_downloads()
|
||||||
|
|
||||||
print(" Navigating...")
|
print(" Navigating...")
|
||||||
driver.get(url)
|
driver.get(url)
|
||||||
|
|
||||||
wait_for_page_load(driver, timeout=20)
|
wait_for_page_load(driver, timeout=20)
|
||||||
print(" ✓ Page loaded (eager).")
|
print(" ✓ Page loaded (eager).")
|
||||||
|
|
||||||
wait = WebDriverWait(driver, 10) # Reduced timeout
|
wait = WebDriverWait(driver, 5) # Reduced timeout
|
||||||
|
|
||||||
# === License checkbox (quick check) ===
|
# === License checkbox (quick check) ===
|
||||||
try:
|
try:
|
||||||
checkbox = wait.until(EC.presence_of_element_located((By.NAME, "license_agree")))
|
checkbox = wait.until(EC.presence_of_element_located((By.NAME, "license_agree")))
|
||||||
if checkbox.is_enabled() and checkbox.is_displayed():
|
if checkbox.is_enabled() and checkbox.is_displayed():
|
||||||
checkbox.click()
|
checkbox.click()
|
||||||
print(" ✓ License checkbox accepted.")
|
print(" ✓ License checkbox accepted.")
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
except (TimeoutException, NoSuchElementException):
|
except (TimeoutException, NoSuchElementException):
|
||||||
print(" → No checkbox found. Continuing...")
|
print(" → No checkbox found. Continuing...")
|
||||||
|
|
||||||
# === Download button (quick detection) ===
|
# === Download button (quick detection) ===
|
||||||
try:
|
try:
|
||||||
download_btn = wait.until(EC.element_to_be_clickable((By.ID, "jd_license_submit")))
|
download_btn = wait.until(EC.element_to_be_clickable((By.ID, "jd_license_submit")))
|
||||||
download_btn.click()
|
download_btn.click()
|
||||||
print(" ✓ Download button clicked.")
|
print(" ✓ Download button clicked.")
|
||||||
except (TimeoutException, NoSuchElementException):
|
except (TimeoutException, NoSuchElementException):
|
||||||
print(" ✗ Download button NOT found or not clickable → Skipping")
|
print(" ✗ Download button NOT found or not clickable → Skipping")
|
||||||
log_result(url, False)
|
log_result(url, False)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# === Wait for download ===
|
# === Wait for download ===
|
||||||
print(" Waiting for download (max 2 min)...")
|
print(" Waiting for download (max 2 min)...")
|
||||||
success, files = is_download_finished(DOWNLOAD_PATH, timeout=120)
|
success, files = is_download_finished(DOWNLOAD_PATH, timeout=30)
|
||||||
if success and files:
|
if success and files:
|
||||||
print(f" ✓ Download completed! Files: {files}")
|
print(f" ✓ Download completed! Files: {files}")
|
||||||
log_result(url, True)
|
log_result(url, True)
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
print(" ✗ Download timed out.")
|
print(" ✗ Download timed out.")
|
||||||
log_result(url, False)
|
log_result(url, False)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
except WebDriverException as e:
|
except WebDriverException as e:
|
||||||
print(f" ✗ WebDriver error: {e}")
|
print(f" ✗ WebDriver error: {e}")
|
||||||
log_result(url, False)
|
log_result(url, False)
|
||||||
return False
|
return False
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f" ✗ Unexpected error: {e}")
|
print(f" ✗ Unexpected error: {e}")
|
||||||
log_result(url, False)
|
log_result(url, False)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# ====================== MAIN ======================
|
# ====================== MAIN ======================
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
with open("url-final.txt", "r", encoding="utf-8") as f:
|
with open("url-final.txt", "r", encoding="utf-8") as f:
|
||||||
links = [line.strip() for line in f if line.strip()]
|
links = [line.strip() for line in f if line.strip()]
|
||||||
|
|
||||||
links = links[780:] # start from 251st
|
links = links[780:] # start from 251st
|
||||||
print(f"Loaded {len(links)} URLs. Starting from index 250.")
|
print(f"Loaded {len(links)} URLs. Starting from index 250.")
|
||||||
|
|
||||||
success_count = 0
|
success_count = 0
|
||||||
for i, url in enumerate(links, 1):
|
for i, url in enumerate(links, 1):
|
||||||
print(f"--- [{i}/{len(links)}] ---")
|
print(f"--- [{i}/{len(links)}] ---")
|
||||||
if run_download(url):
|
if run_download(url):
|
||||||
success_count += 1
|
success_count += 1
|
||||||
time.sleep(3) # Reduced delay between requests
|
time.sleep(3) # Reduced delay between requests
|
||||||
|
|
||||||
print(f"\n=== FINISHED ===")
|
print(f"\n=== FINISHED ===")
|
||||||
print(f"Total: {len(links)} | Success: {success_count} | Failed: {len(links) - success_count}")
|
print(f"Total: {len(links)} | Success: {success_count} | Failed: {len(links) - success_count}")
|
||||||
print(f"Log: {LOG_FILE}")
|
print(f"Log: {LOG_FILE}")
|
||||||
|
|
||||||
driver.quit()
|
driver.quit()
|
||||||
Reference in New Issue
Block a user