172 lines
6.8 KiB
Python
172 lines
6.8 KiB
Python
from selenium import webdriver
|
|
from selenium.webdriver.chrome.options import Options
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
from selenium.common.exceptions import TimeoutException, NoSuchElementException, WebDriverException
|
|
import time
|
|
import os
|
|
|
|
# ========================= CONFIG =========================
|
|
PROXY_IP = "192.168.0.38"
|
|
PROXY_PORT = "1080"
|
|
DOWNLOAD_PATH = os.path.join(os.getcwd(), "downloads")
|
|
LOG_FILE = "download_log.txt"
|
|
|
|
os.makedirs(DOWNLOAD_PATH, exist_ok=True)
|
|
|
|
# ====================== CHROME OPTIONS (Optimized for speed) ======================
|
|
chrome_options = Options()
|
|
|
|
# Proxy & basic stealth
|
|
chrome_options.add_argument(f'--proxy-server=socks5://{PROXY_IP}:{PROXY_PORT}')
|
|
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36")
|
|
chrome_options.add_argument("--no-sandbox")
|
|
chrome_options.add_argument("--disable-dev-shm-usage")
|
|
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
|
|
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
|
chrome_options.add_experimental_option('useAutomationExtension', False)
|
|
|
|
# === SPEED OPTIMIZATIONS ===
|
|
chrome_options.add_argument("--blink-settings=imagesEnabled=false") # Disable images
|
|
chrome_options.add_argument("--disable-gpu")
|
|
chrome_options.add_argument("--disable-extensions")
|
|
chrome_options.add_argument("--disable-plugins")
|
|
chrome_options.add_argument("--disable-popup-blocking")
|
|
|
|
# Fast page load strategy (doesn't wait for images/media)
|
|
chrome_options.page_load_strategy = "eager" # or "none" if you want even faster (but less stable)
|
|
|
|
# Block media, stylesheets, fonts, etc.
|
|
prefs = {
|
|
"download.default_directory": DOWNLOAD_PATH,
|
|
"download.prompt_for_download": False,
|
|
"download.directory_upgrade": True,
|
|
"safebrowsing.enabled": True,
|
|
"profile.default_content_setting_values.automatic_downloads": 1,
|
|
"profile.managed_default_content_settings.images": 2, # 2 = block images
|
|
"profile.managed_default_content_settings.stylesheets": 2, # block CSS
|
|
"profile.managed_default_content_settings.fonts": 2, # block fonts
|
|
"profile.managed_default_content_settings.media_stream": 2, # block video/audio
|
|
}
|
|
|
|
chrome_options.add_experimental_option("prefs", prefs)
|
|
|
|
# Optional: Run headless (much faster, recommended)
|
|
# chrome_options.add_argument("--headless=new")
|
|
|
|
# Initialize driver
|
|
driver = webdriver.Chrome(options=chrome_options)
|
|
|
|
def wait_for_page_load(driver, timeout=20):
|
|
"""Faster page load wait"""
|
|
try:
|
|
WebDriverWait(driver, timeout).until(
|
|
lambda d: d.execute_script("return document.readyState") in ["interactive", "complete"]
|
|
)
|
|
except TimeoutException:
|
|
print(" → Page load timed out, continuing anyway...")
|
|
|
|
def is_download_finished(download_path, timeout=120, check_interval=2):
|
|
"""Slightly faster download checker"""
|
|
end_time = time.time() + timeout
|
|
while time.time() < end_time:
|
|
partial_files = [f for f in os.listdir(download_path) if f.endswith(('.crdownload', '.part', '.tmp'))]
|
|
completed_files = [f for f in os.listdir(download_path) if not f.endswith(('.crdownload', '.part', '.tmp'))]
|
|
|
|
if completed_files and not partial_files:
|
|
return True, completed_files
|
|
|
|
if partial_files:
|
|
print(f" → Still downloading... ({len(partial_files)} partial)")
|
|
time.sleep(check_interval)
|
|
return False, []
|
|
|
|
def clear_partial_downloads():
|
|
for filename in os.listdir(DOWNLOAD_PATH):
|
|
if filename.endswith(('.crdownload', '.part', '.tmp')):
|
|
try:
|
|
os.remove(os.path.join(DOWNLOAD_PATH, filename))
|
|
except:
|
|
pass
|
|
|
|
def log_result(url: str, success: bool):
|
|
status = "True" if success else "False"
|
|
with open(LOG_FILE, "a", encoding="utf-8") as f:
|
|
f.write(f"{url} | {status}\n")
|
|
print(f" Logged: {'SUCCESS' if success else 'FAILED'}")
|
|
|
|
def run_download(url: str):
|
|
try:
|
|
print(f"\n[{time.strftime('%H:%M:%S')}] Processing → {url}")
|
|
clear_partial_downloads()
|
|
|
|
print(" Navigating...")
|
|
driver.get(url)
|
|
|
|
wait_for_page_load(driver, timeout=20)
|
|
print(" ✓ Page loaded (eager).")
|
|
|
|
wait = WebDriverWait(driver, 5) # Reduced timeout
|
|
|
|
# === License checkbox (quick check) ===
|
|
try:
|
|
checkbox = wait.until(EC.presence_of_element_located((By.NAME, "license_agree")))
|
|
if checkbox.is_enabled() and checkbox.is_displayed():
|
|
checkbox.click()
|
|
print(" ✓ License checkbox accepted.")
|
|
time.sleep(0.5)
|
|
except (TimeoutException, NoSuchElementException):
|
|
print(" → No checkbox found. Continuing...")
|
|
|
|
# === Download button (quick detection) ===
|
|
try:
|
|
download_btn = wait.until(EC.element_to_be_clickable((By.ID, "jd_license_submit")))
|
|
download_btn.click()
|
|
print(" ✓ Download button clicked.")
|
|
except (TimeoutException, NoSuchElementException):
|
|
print(" ✗ Download button NOT found or not clickable → Skipping")
|
|
log_result(url, False)
|
|
return False
|
|
|
|
# === Wait for download ===
|
|
print(" Waiting for download (max 2 min)...")
|
|
success, files = is_download_finished(DOWNLOAD_PATH, timeout=30)
|
|
if success and files:
|
|
print(f" ✓ Download completed! Files: {files}")
|
|
log_result(url, True)
|
|
return True
|
|
else:
|
|
print(" ✗ Download timed out.")
|
|
log_result(url, False)
|
|
return False
|
|
|
|
except WebDriverException as e:
|
|
print(f" ✗ WebDriver error: {e}")
|
|
log_result(url, False)
|
|
return False
|
|
except Exception as e:
|
|
print(f" ✗ Unexpected error: {e}")
|
|
log_result(url, False)
|
|
return False
|
|
|
|
# ====================== MAIN ======================
|
|
if __name__ == "__main__":
|
|
with open("url-final.txt", "r", encoding="utf-8") as f:
|
|
links = [line.strip() for line in f if line.strip()]
|
|
|
|
links = links[780:] # start from 251st
|
|
print(f"Loaded {len(links)} URLs. Starting from index 250.")
|
|
|
|
success_count = 0
|
|
for i, url in enumerate(links, 1):
|
|
print(f"--- [{i}/{len(links)}] ---")
|
|
if run_download(url):
|
|
success_count += 1
|
|
time.sleep(3) # Reduced delay between requests
|
|
|
|
print(f"\n=== FINISHED ===")
|
|
print(f"Total: {len(links)} | Success: {success_count} | Failed: {len(links) - success_count}")
|
|
print(f"Log: {LOG_FILE}")
|
|
|
|
driver.quit() |