from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException, NoSuchElementException, WebDriverException import time import os # ========================= CONFIG ========================= PROXY_IP = "192.168.0.38" PROXY_PORT = "1080" DOWNLOAD_PATH = os.path.join(os.getcwd(), "downloads") LOG_FILE = "download_log.txt" os.makedirs(DOWNLOAD_PATH, exist_ok=True) # ====================== CHROME OPTIONS (Optimized for speed) ====================== chrome_options = Options() # Proxy & basic stealth chrome_options.add_argument(f'--proxy-server=socks5://{PROXY_IP}:{PROXY_PORT}') chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage") chrome_options.add_argument("--disable-blink-features=AutomationControlled") chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"]) chrome_options.add_experimental_option('useAutomationExtension', False) # === SPEED OPTIMIZATIONS === chrome_options.add_argument("--blink-settings=imagesEnabled=false") # Disable images chrome_options.add_argument("--disable-gpu") chrome_options.add_argument("--disable-extensions") chrome_options.add_argument("--disable-plugins") chrome_options.add_argument("--disable-popup-blocking") # Fast page load strategy (doesn't wait for images/media) chrome_options.page_load_strategy = "eager" # or "none" if you want even faster (but less stable) # Block media, stylesheets, fonts, etc. prefs = { "download.default_directory": DOWNLOAD_PATH, "download.prompt_for_download": False, "download.directory_upgrade": True, "safebrowsing.enabled": True, "profile.default_content_setting_values.automatic_downloads": 1, "profile.managed_default_content_settings.images": 2, # 2 = block images "profile.managed_default_content_settings.stylesheets": 2, # block CSS "profile.managed_default_content_settings.fonts": 2, # block fonts "profile.managed_default_content_settings.media_stream": 2, # block video/audio } chrome_options.add_experimental_option("prefs", prefs) # Optional: Run headless (much faster, recommended) # chrome_options.add_argument("--headless=new") # Initialize driver driver = webdriver.Chrome(options=chrome_options) def wait_for_page_load(driver, timeout=20): """Faster page load wait""" try: WebDriverWait(driver, timeout).until( lambda d: d.execute_script("return document.readyState") in ["interactive", "complete"] ) except TimeoutException: print(" → Page load timed out, continuing anyway...") def is_download_finished(download_path, timeout=120, check_interval=2): """Slightly faster download checker""" end_time = time.time() + timeout while time.time() < end_time: partial_files = [f for f in os.listdir(download_path) if f.endswith(('.crdownload', '.part', '.tmp'))] completed_files = [f for f in os.listdir(download_path) if not f.endswith(('.crdownload', '.part', '.tmp'))] if completed_files and not partial_files: return True, completed_files if partial_files: print(f" → Still downloading... ({len(partial_files)} partial)") time.sleep(check_interval) return False, [] def clear_partial_downloads(): for filename in os.listdir(DOWNLOAD_PATH): if filename.endswith(('.crdownload', '.part', '.tmp')): try: os.remove(os.path.join(DOWNLOAD_PATH, filename)) except: pass def log_result(url: str, success: bool): status = "True" if success else "False" with open(LOG_FILE, "a", encoding="utf-8") as f: f.write(f"{url} | {status}\n") print(f" Logged: {'SUCCESS' if success else 'FAILED'}") def run_download(url: str): try: print(f"\n[{time.strftime('%H:%M:%S')}] Processing → {url}") clear_partial_downloads() print(" Navigating...") driver.get(url) wait_for_page_load(driver, timeout=20) print(" ✓ Page loaded (eager).") wait = WebDriverWait(driver, 5) # Reduced timeout # === License checkbox (quick check) === try: checkbox = wait.until(EC.presence_of_element_located((By.NAME, "license_agree"))) if checkbox.is_enabled() and checkbox.is_displayed(): checkbox.click() print(" ✓ License checkbox accepted.") time.sleep(0.5) except (TimeoutException, NoSuchElementException): print(" → No checkbox found. Continuing...") # === Download button (quick detection) === try: download_btn = wait.until(EC.element_to_be_clickable((By.ID, "jd_license_submit"))) download_btn.click() print(" ✓ Download button clicked.") except (TimeoutException, NoSuchElementException): print(" ✗ Download button NOT found or not clickable → Skipping") log_result(url, False) return False # === Wait for download === print(" Waiting for download (max 2 min)...") success, files = is_download_finished(DOWNLOAD_PATH, timeout=30) if success and files: print(f" ✓ Download completed! Files: {files}") log_result(url, True) return True else: print(" ✗ Download timed out.") log_result(url, False) return False except WebDriverException as e: print(f" ✗ WebDriver error: {e}") log_result(url, False) return False except Exception as e: print(f" ✗ Unexpected error: {e}") log_result(url, False) return False # ====================== MAIN ====================== if __name__ == "__main__": with open("url-final.txt", "r", encoding="utf-8") as f: links = [line.strip() for line in f if line.strip()] links = links[780:] # start from 251st print(f"Loaded {len(links)} URLs. Starting from index 250.") success_count = 0 for i, url in enumerate(links, 1): print(f"--- [{i}/{len(links)}] ---") if run_download(url): success_count += 1 time.sleep(3) # Reduced delay between requests print(f"\n=== FINISHED ===") print(f"Total: {len(links)} | Success: {success_count} | Failed: {len(links) - success_count}") print(f"Log: {LOG_FILE}") driver.quit()