diff --git a/flatten-step3.py b/flatten-step3.py
index 86f03ca..83b93e2 100644
--- a/flatten-step3.py
+++ b/flatten-step3.py
@@ -1,61 +1,61 @@
-import shutil
-from pathlib import Path
-
-def flatten_gpx_files(source_dir: str = ".", target_dir: str = "ALL_GPX"):
-    """
-    Find all .gpx files under source_dir (including subfolders)
-    and copy them into a single flat folder.
-    """
-    source_path = Path(source_dir).resolve()
-    target_path = Path(target_dir).resolve()
-
-    # Create target folder
-    target_path.mkdir(parents=True, exist_ok=True)
-
-    print(f"Searching for .gpx files in: {source_path}")
-    print(f"Copying to flat folder: {target_path}\n")
-
-    gpx_files = list(source_path.rglob("*.gpx"))
-    
-    if not gpx_files:
-        print("No .gpx files found.")
-        return
-
-    copied = 0
-    for gpx_file in gpx_files:
-        # New filename: original_name__parent_folder.gpx  (helps avoid name collisions)
-        parent_name = gpx_file.parent.name
-        new_name = f"{gpx_file.stem}__{parent_name}{gpx_file.suffix}"
-        
-        destination = target_path / new_name
-
-        # If filename already exists, add a number
-        counter = 1
-        while destination.exists():
-            destination = target_path / f"{gpx_file.stem}__{parent_name}_{counter}{gpx_file.suffix}"
-            counter += 1
-
-        try:
-            shutil.copy2(gpx_file, destination)
-            print(f"Copied: {gpx_file.name}  →  {new_name}")
-            copied += 1
-        except Exception as e:
-            print(f"Failed {gpx_file.name}: {e}")
-
-    print("\n" + "="*50)
-    print(f"Done! {copied} .gpx files flattened into '{target_path.name}/'")
-    print("="*50)
-
-
-if __name__ == "__main__":
-    import argparse
-    
-    parser = argparse.ArgumentParser(description="Flatten all .gpx files into one folder")
-    parser.add_argument("source", nargs="?", default=".", 
-                        help="Source directory to search (default: current)")
-    parser.add_argument("-o", "--output", default="ALL_GPX", 
-                        help="Output folder name (default: ALL_GPX)")
-    
-    args = parser.parse_args()
-    
+import shutil
+from pathlib import Path
+
+def flatten_gpx_files(source_dir: str = ".", target_dir: str = "ALL_GPX"):
+    """
+    Find all .gpx files under source_dir (including subfolders)
+    and copy them into a single flat folder.
+    """
+    source_path = Path(source_dir).resolve()
+    target_path = Path(target_dir).resolve()
+
+    # Create target folder
+    target_path.mkdir(parents=True, exist_ok=True)
+
+    print(f"Searching for .gpx files in: {source_path}")
+    print(f"Copying to flat folder: {target_path}\n")
+
+    gpx_files = list(source_path.rglob("*.gpx"))
+    
+    if not gpx_files:
+        print("No .gpx files found.")
+        return
+
+    copied = 0
+    for gpx_file in gpx_files:
+        # New filename: original_name__parent_folder.gpx  (helps avoid name collisions)
+        parent_name = gpx_file.parent.name
+        new_name = f"{gpx_file.stem}__{parent_name}{gpx_file.suffix}"
+        
+        destination = target_path / new_name
+
+        # If filename already exists, add a number
+        counter = 1
+        while destination.exists():
+            destination = target_path / f"{gpx_file.stem}__{parent_name}_{counter}{gpx_file.suffix}"
+            counter += 1
+
+        try:
+            shutil.copy2(gpx_file, destination)
+            print(f"Copied: {gpx_file.name}  →  {new_name}")
+            copied += 1
+        except Exception as e:
+            print(f"Failed {gpx_file.name}: {e}")
+
+    print("\n" + "="*50)
+    print(f"Done! {copied} .gpx files flattened into '{target_path.name}/'")
+    print("="*50)
+
+
+if __name__ == "__main__":
+    import argparse
+    
+    parser = argparse.ArgumentParser(description="Flatten all .gpx files into one folder")
+    parser.add_argument("source", nargs="?", default=".", 
+                        help="Source directory to search (default: current)")
+    parser.add_argument("-o", "--output", default="ALL_GPX", 
+                        help="Output folder name (default: ALL_GPX)")
+    
+    args = parser.parse_args()
+    
     flatten_gpx_files(args.source, args.output)
\ No newline at end of file
diff --git a/parse-archive-structure-step-2.py b/parse-archive-structure-step-2.py
index ccf122d..0f10a25 100644
--- a/parse-archive-structure-step-2.py
+++ b/parse-archive-structure-step-2.py
@@ -1,100 +1,100 @@
-import os
-from pathlib import Path
-
-def rename_files_in_folders(root_dir: str = ".", recursive: bool = True, dry_run: bool = False):
-    """
-    Rename files by appending the top-level folder name.
-    
-    Example: image.jpg inside 'extracted_MyBackup' becomes image_MyBackup.jpg
-    """
-    base_path = Path(root_dir).resolve()
-    
-    if not base_path.exists():
-        print(f"Error: Folder '{base_path}' does not exist.")
-        return
-    
-    print(f"Scanning folder: {base_path}")
-    if dry_run:
-        print("*** DRY RUN MODE - No files will be renamed ***\n")
-    
-    renamed_count = 0
-    skipped_count = 0
-    
-    # Walk through all directories
-    for dir_path in base_path.rglob("*") if recursive else base_path.iterdir():
-        if not dir_path.is_dir():
-            continue
-            
-        folder_name = dir_path.name
-        
-        # Skip root folder itself and hidden folders
-        if folder_name.startswith('.') or folder_name == base_path.name:
-            continue
-            
-        print(f"\nProcessing folder: {folder_name}")
-        
-        for file_path in dir_path.iterdir():
-            if not file_path.is_file():
-                continue
-                
-            # Get file components
-            original_name = file_path.stem      # filename without extension
-            ext = file_path.suffix              # .jpg, .png, etc.
-            
-            # Skip if file already has the folder name (to avoid double renaming)
-            if folder_name in original_name:
-                print(f"  Skipped (already processed): {file_path.name}")
-                skipped_count += 1
-                continue
-            
-            # New filename: originalName_folderName.ext
-            new_name = f"{original_name}_{folder_name}{ext}"
-            new_path = file_path.parent / new_name
-            
-            # Check if target file already exists
-            if new_path.exists():
-                print(f"  Warning: Target already exists → {new_name}")
-                skipped_count += 1
-                continue
-            
-            try:
-                if dry_run:
-                    print(f"  Would rename: {file_path.name}  →  {new_name}")
-                else:
-                    file_path.rename(new_path)
-                    print(f"  Renamed: {file_path.name}  →  {new_name}")
-                renamed_count += 1
-            except Exception as e:
-                print(f"  Error renaming {file_path.name}: {e}")
-                skipped_count += 1
-    
-    print("\n" + "="*60)
-    print("Renaming completed!")
-    print(f"Files renamed:     {renamed_count}")
-    print(f"Files skipped:     {skipped_count}")
-    if dry_run:
-        print("This was a DRY RUN — no actual changes were made.")
-    print("="*60)
-
-
-if __name__ == "__main__":
-    import argparse
-    
-    parser = argparse.ArgumentParser(
-        description="Rename files by appending their top-level folder name."
-    )
-    parser.add_argument("folder", nargs="?", default=".", 
-                        help="Root folder to process (default: current directory)")
-    parser.add_argument("-r", "--recursive", action="store_true", 
-                        help="Process subfolders recursively (recommended)")
-    parser.add_argument("--dry-run", action="store_true",
-                        help="Show what would be renamed without making changes")
-    
-    args = parser.parse_args()
-    
-    # By default we enable recursive since you usually want to process extracted_ folders
-    rename_files_in_folders(
-        root_dir=args.folder,
-        recursive=args.recursive if hasattr(args, 'recursive') else True,
-        dry_run=args.dry_run
+import os
+from pathlib import Path
+
+def rename_files_in_folders(root_dir: str = ".", recursive: bool = True, dry_run: bool = False):
+    """
+    Rename files by appending the top-level folder name.
+    
+    Example: image.jpg inside 'extracted_MyBackup' becomes image_MyBackup.jpg
+    """
+    base_path = Path(root_dir).resolve()
+    
+    if not base_path.exists():
+        print(f"Error: Folder '{base_path}' does not exist.")
+        return
+    
+    print(f"Scanning folder: {base_path}")
+    if dry_run:
+        print("*** DRY RUN MODE - No files will be renamed ***\n")
+    
+    renamed_count = 0
+    skipped_count = 0
+    
+    # Walk through all directories
+    for dir_path in base_path.rglob("*") if recursive else base_path.iterdir():
+        if not dir_path.is_dir():
+            continue
+            
+        folder_name = dir_path.name
+        
+        # Skip root folder itself and hidden folders
+        if folder_name.startswith('.') or folder_name == base_path.name:
+            continue
+            
+        print(f"\nProcessing folder: {folder_name}")
+        
+        for file_path in dir_path.iterdir():
+            if not file_path.is_file():
+                continue
+                
+            # Get file components
+            original_name = file_path.stem      # filename without extension
+            ext = file_path.suffix              # .jpg, .png, etc.
+            
+            # Skip if file already has the folder name (to avoid double renaming)
+            if folder_name in original_name:
+                print(f"  Skipped (already processed): {file_path.name}")
+                skipped_count += 1
+                continue
+            
+            # New filename: originalName_folderName.ext
+            new_name = f"{original_name}_{folder_name}{ext}"
+            new_path = file_path.parent / new_name
+            
+            # Check if target file already exists
+            if new_path.exists():
+                print(f"  Warning: Target already exists → {new_name}")
+                skipped_count += 1
+                continue
+            
+            try:
+                if dry_run:
+                    print(f"  Would rename: {file_path.name}  →  {new_name}")
+                else:
+                    file_path.rename(new_path)
+                    print(f"  Renamed: {file_path.name}  →  {new_name}")
+                renamed_count += 1
+            except Exception as e:
+                print(f"  Error renaming {file_path.name}: {e}")
+                skipped_count += 1
+    
+    print("\n" + "="*60)
+    print("Renaming completed!")
+    print(f"Files renamed:     {renamed_count}")
+    print(f"Files skipped:     {skipped_count}")
+    if dry_run:
+        print("This was a DRY RUN — no actual changes were made.")
+    print("="*60)
+
+
+if __name__ == "__main__":
+    import argparse
+    
+    parser = argparse.ArgumentParser(
+        description="Rename files by appending their top-level folder name."
+    )
+    parser.add_argument("folder", nargs="?", default=".", 
+                        help="Root folder to process (default: current directory)")
+    parser.add_argument("-r", "--recursive", action="store_true", 
+                        help="Process subfolders recursively (recommended)")
+    parser.add_argument("--dry-run", action="store_true",
+                        help="Show what would be renamed without making changes")
+    
+    args = parser.parse_args()
+    
+    # By default we enable recursive since you usually want to process extracted_ folders
+    rename_files_in_folders(
+        root_dir=args.folder,
+        recursive=args.recursive if hasattr(args, 'recursive') else True,
+        dry_run=args.dry_run
     )
\ No newline at end of file
diff --git a/parse-archives.py b/parse-archives.py
index 7bb1eda..d575d5c 100644
--- a/parse-archives.py
+++ b/parse-archives.py
@@ -1,75 +1,75 @@
-import os
-import sys
-from pathlib import Path
-
-# Install these first:
-# pip install patool py7zr rarfile
-
-try:
-    from patoolib import extract_archive
-except ImportError:
-    print("Error: 'patool' is not installed. Run: pip install patool py7zr rarfile")
-    sys.exit(1)
-
-def extract_archive_to_folder(archive_path: Path, base_dir: Path):
-    """Extract a single archive to extracted_{name} folder."""
-    # Get archive name without extension
-    name_without_ext = archive_path.stem
-    extract_dir = base_dir / f"bg_mountains_{name_without_ext}"
-    
-    # Create the output directory if it doesn't exist
-    extract_dir.mkdir(parents=True, exist_ok=True)
-    
-    print(f"Extracting: {archive_path.name} → {extract_dir.name}/")
-    
-    try:
-        # patool automatically detects format (zip, rar, 7z, etc.)
-        extract_archive(str(archive_path), outdir=str(extract_dir), verbosity=0)
-        print(f"✓ Successfully extracted: {archive_path.name}\n")
-    except Exception as e:
-        print(f"✗ Failed to extract {archive_path.name}: {e}\n")
-
-def main(folder_path: str = ".", recursive: bool = False):
-    base_dir = Path(folder_path).resolve()
-    
-    if not base_dir.exists():
-        print(f"Error: Folder '{base_dir}' does not exist.")
-        return
-    
-    print(f"Scanning for archives in: {base_dir}\n")
-    
-    # Supported extensions
-    extensions = {'.zip', '.rar', '.7z'}
-    
-    # Find all matching archives
-    if recursive:
-        archive_files = [p for p in base_dir.rglob("*") if p.is_file() and p.suffix.lower() in extensions]
-    else:
-        archive_files = [p for p in base_dir.iterdir() if p.is_file() and p.suffix.lower() in extensions]
-    
-    if not archive_files:
-        print("No .zip, .rar, or .7z files found.")
-        return
-    
-    print(f"Found {len(archive_files)} archive(s).\n")
-    
-    for archive in sorted(archive_files):
-        extract_archive_to_folder(archive, base_dir)
-    
-    print("Extraction process completed!")
-
-if __name__ == "__main__":
-    # Usage examples:
-    # python extract_archives.py                    # current folder, non-recursive
-    # python extract_archives.py "/path/to/folder"   # specific folder
-    # python extract_archives.py "/path/to/folder" --recursive
-    
-    import argparse
-    
-    parser = argparse.ArgumentParser(description="Extract .zip/.rar/.7z archives into separate folders.")
-    parser.add_argument("folder", nargs="?", default=".", help="Folder to scan (default: current directory)")
-    parser.add_argument("-r", "--recursive", action="store_true", help="Search subfolders recursively")
-    
-    args = parser.parse_args()
-    
+import os
+import sys
+from pathlib import Path
+
+# Install these first:
+# pip install patool py7zr rarfile
+
+try:
+    from patoolib import extract_archive
+except ImportError:
+    print("Error: 'patool' is not installed. Run: pip install patool py7zr rarfile")
+    sys.exit(1)
+
+def extract_archive_to_folder(archive_path: Path, base_dir: Path):
+    """Extract a single archive to extracted_{name} folder."""
+    # Get archive name without extension
+    name_without_ext = archive_path.stem
+    extract_dir = base_dir / f"bg_mountains_{name_without_ext}"
+    
+    # Create the output directory if it doesn't exist
+    extract_dir.mkdir(parents=True, exist_ok=True)
+    
+    print(f"Extracting: {archive_path.name} → {extract_dir.name}/")
+    
+    try:
+        # patool automatically detects format (zip, rar, 7z, etc.)
+        extract_archive(str(archive_path), outdir=str(extract_dir), verbosity=0)
+        print(f"✓ Successfully extracted: {archive_path.name}\n")
+    except Exception as e:
+        print(f"✗ Failed to extract {archive_path.name}: {e}\n")
+
+def main(folder_path: str = ".", recursive: bool = False):
+    base_dir = Path(folder_path).resolve()
+    
+    if not base_dir.exists():
+        print(f"Error: Folder '{base_dir}' does not exist.")
+        return
+    
+    print(f"Scanning for archives in: {base_dir}\n")
+    
+    # Supported extensions
+    extensions = {'.zip', '.rar', '.7z'}
+    
+    # Find all matching archives
+    if recursive:
+        archive_files = [p for p in base_dir.rglob("*") if p.is_file() and p.suffix.lower() in extensions]
+    else:
+        archive_files = [p for p in base_dir.iterdir() if p.is_file() and p.suffix.lower() in extensions]
+    
+    if not archive_files:
+        print("No .zip, .rar, or .7z files found.")
+        return
+    
+    print(f"Found {len(archive_files)} archive(s).\n")
+    
+    for archive in sorted(archive_files):
+        extract_archive_to_folder(archive, base_dir)
+    
+    print("Extraction process completed!")
+
+if __name__ == "__main__":
+    # Usage examples:
+    # python extract_archives.py                    # current folder, non-recursive
+    # python extract_archives.py "/path/to/folder"   # specific folder
+    # python extract_archives.py "/path/to/folder" --recursive
+    
+    import argparse
+    
+    parser = argparse.ArgumentParser(description="Extract .zip/.rar/.7z archives into separate folders.")
+    parser.add_argument("folder", nargs="?", default=".", help="Folder to scan (default: current directory)")
+    parser.add_argument("-r", "--recursive", action="store_true", help="Search subfolders recursively")
+    
+    args = parser.parse_args()
+    
     main(args.folder, args.recursive)
\ No newline at end of file
diff --git a/readme.md b/readme.md
new file mode 100644
index 0000000..9e4968d
--- /dev/null
+++ b/readme.md
@@ -0,0 +1,1125 @@
+Save this as **`jd_parallel_downloader.py`**.
+
+It uses **3–4 separate Firefox instances**, one per worker, captures `div.jd_summary_list`, downloads the file, and writes a crash-safe registry with:
+
+`url`, `downloaded_filepath`, `from_remote_metadata`, `internal_hash`
+
+It also stores `download_file_sha256` separately.
+
+```python
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+Parallel Selenium Firefox downloader with source registry mapping.
+
+Main outputs:
+  output/source_registry.jsonl   # crash-safe full registry
+  output/source_registry.csv     # combined table
+  output/source_registry.md      # markdown mapping table
+  downloads_by_hash/R_<HASH>/... # downloaded files grouped by internal hash
+
+Run example:
+  python jd_parallel_downloader.py --input url-final.txt --start 780 --workers 4 --proxy 192.168.0.38:1080
+
+Headless:
+  python jd_parallel_downloader.py --input url-final.txt --start 780 --workers 4 --proxy 192.168.0.38:1080 --headless
+
+Resume behavior:
+  - Successful URLs already present in output/source_registry.jsonl are skipped.
+  - Failed URLs are retried.
+"""
+
+from __future__ import annotations
+
+import argparse
+import csv
+import hashlib
+import json
+import os
+import re
+import shutil
+import sys
+import time
+import traceback
+from concurrent.futures import ProcessPoolExecutor, as_completed
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Dict, Iterable, List, Optional, Sequence, Tuple
+from urllib.parse import urldefrag, urlsplit, urlunsplit
+
+from selenium import webdriver
+from selenium.common.exceptions import (
+    ElementClickInterceptedException,
+    JavascriptException,
+    NoSuchElementException,
+    TimeoutException,
+    WebDriverException,
+)
+from selenium.webdriver.common.by import By
+from selenium.webdriver.firefox.options import Options as FirefoxOptions
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.ui import WebDriverWait
+
+
+# ============================================================
+# Defaults
+# ============================================================
+
+DEFAULT_OUTPUT_DIR = "output"
+DEFAULT_DOWNLOAD_ROOT = "downloads_by_hash"
+DEFAULT_WORKDIR = "worker_runtime"
+DEFAULT_WORKERS = 4
+
+PARTIAL_SUFFIXES = (
+    ".part",
+    ".crdownload",
+    ".tmp",
+)
+
+DOWNLOAD_MIME_TYPES = ",".join(
+    [
+        "application/octet-stream",
+        "application/x-garmin-gdb",
+        "application/gdb",
+        "application/gpx+xml",
+        "application/xml",
+        "text/xml",
+        "text/plain",
+        "application/zip",
+        "application/x-zip-compressed",
+        "application/x-gzip",
+        "application/gzip",
+        "application/vnd.google-earth.kml+xml",
+        "application/vnd.google-earth.kmz",
+        "application/x-msdownload",
+        "binary/octet-stream",
+    ]
+)
+
+
+# ============================================================
+# Small helpers
+# ============================================================
+
+def utc_now_iso() -> str:
+    return datetime.now(timezone.utc).isoformat(timespec="seconds")
+
+
+def ensure_dir(path: Path) -> Path:
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+
+
+def normalize_url(url: str) -> str:
+    """
+    Normalizes the URL enough to make stable deterministic IDs.
+
+    Keeps query string because download pages often identify resources through query params.
+    Removes fragment because it normally does not affect server-side download identity.
+    """
+    url = url.strip()
+    url, _fragment = urldefrag(url)
+
+    parts = urlsplit(url)
+    scheme = parts.scheme.lower()
+    netloc = parts.netloc.lower()
+
+    return urlunsplit((scheme, netloc, parts.path, parts.query, ""))
+
+
+def make_internal_hash(url: str) -> str:
+    """
+    Stable registry ID.
+
+    Important: this hash is based on the normalized source URL, not on file contents.
+    That means you can later inject R_<HASH> into GPX metadata without invalidating
+    the mapping ID itself.
+    """
+    normalized = normalize_url(url)
+    digest = hashlib.sha256(normalized.encode("utf-8")).hexdigest()[:16].upper()
+    return f"R_{digest}"
+
+
+def sha256_file(path: Path, chunk_size: int = 1024 * 1024) -> str:
+    h = hashlib.sha256()
+    with path.open("rb") as f:
+        while True:
+            block = f.read(chunk_size)
+            if not block:
+                break
+            h.update(block)
+    return h.hexdigest()
+
+
+def safe_filename(name: str, fallback: str = "download.bin") -> str:
+    name = name.strip()
+    if not name:
+        return fallback
+
+    name = name.replace("\\", "_").replace("/", "_")
+    name = re.sub(r"[\x00-\x1f\x7f]+", "_", name)
+    name = re.sub(r'[:*?"<>|]+', "_", name)
+    name = re.sub(r"\s+", " ", name).strip()
+
+    return name or fallback
+
+
+def json_dumps_compact(obj) -> str:
+    return json.dumps(obj, ensure_ascii=False, sort_keys=True, separators=(",", ":"))
+
+
+def write_jsonl_line(path: Path, record: dict) -> None:
+    ensure_dir(path.parent)
+    line = json.dumps(record, ensure_ascii=False, sort_keys=True)
+    with path.open("a", encoding="utf-8", newline="\n") as f:
+        f.write(line + "\n")
+        f.flush()
+        os.fsync(f.fileno())
+
+
+def read_jsonl(path: Path) -> List[dict]:
+    if not path.exists():
+        return []
+
+    records = []
+    with path.open("r", encoding="utf-8") as f:
+        for line_no, line in enumerate(f, 1):
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                records.append(json.loads(line))
+            except Exception:
+                print(f"[WARN] Broken JSONL line ignored: {path}:{line_no}", file=sys.stderr)
+    return records
+
+
+def chunk_evenly(items: Sequence[dict], workers: int) -> List[List[dict]]:
+    chunks = [[] for _ in range(workers)]
+    for i, item in enumerate(items):
+        chunks[i % workers].append(item)
+    return [c for c in chunks if c]
+
+
+def truncate_text(text: str, max_len: int = 350) -> str:
+    text = re.sub(r"\s+", " ", text or "").strip()
+    if len(text) <= max_len:
+        return text
+    return text[: max_len - 3] + "..."
+
+
+# ============================================================
+# Firefox setup
+# ============================================================
+
+@dataclass
+class WorkerConfig:
+    worker_id: int
+    proxy_host: Optional[str]
+    proxy_port: Optional[int]
+    headless: bool
+    page_load_timeout: int
+    element_timeout: int
+    download_timeout: int
+    delay_between_urls: float
+    workdir: str
+    output_dir: str
+    download_root: str
+    user_agent: str
+
+
+def build_firefox_driver(cfg: WorkerConfig, temp_download_dir: Path) -> webdriver.Firefox:
+    ensure_dir(temp_download_dir)
+
+    options = FirefoxOptions()
+    options.page_load_strategy = "eager"
+
+    if cfg.headless:
+        options.add_argument("--headless")
+
+    # UA
+    if cfg.user_agent:
+        options.set_preference("general.useragent.override", cfg.user_agent)
+
+    # Proxy
+    if cfg.proxy_host and cfg.proxy_port:
+        options.set_preference("network.proxy.type", 1)
+        options.set_preference("network.proxy.socks", cfg.proxy_host)
+        options.set_preference("network.proxy.socks_port", int(cfg.proxy_port))
+        options.set_preference("network.proxy.socks_version", 5)
+        options.set_preference("network.proxy.socks_remote_dns", True)
+
+    # Downloads
+    options.set_preference("browser.download.folderList", 2)
+    options.set_preference("browser.download.dir", str(temp_download_dir.resolve()))
+    options.set_preference("browser.download.useDownloadDir", True)
+    options.set_preference("browser.download.manager.showWhenStarting", False)
+    options.set_preference("browser.download.alwaysOpenPanel", False)
+    options.set_preference("browser.helperApps.neverAsk.saveToDisk", DOWNLOAD_MIME_TYPES)
+    options.set_preference("browser.helperApps.neverAsk.openFile", DOWNLOAD_MIME_TYPES)
+    options.set_preference("pdfjs.disabled", True)
+
+    # Speed / lighter pages
+    options.set_preference("permissions.default.image", 2)
+    options.set_preference("gfx.downloadable_fonts.enabled", False)
+    options.set_preference("media.autoplay.default", 5)
+    options.set_preference("dom.ipc.plugins.enabled.libflashplayer.so", False)
+
+    # Reduce automation noise; not guaranteed stealth, just less obvious.
+    options.set_preference("dom.webdriver.enabled", False)
+    options.set_preference("useAutomationExtension", False)
+
+    driver = webdriver.Firefox(options=options)
+    driver.set_page_load_timeout(cfg.page_load_timeout)
+    return driver
+
+
+# ============================================================
+# Selenium page logic
+# ============================================================
+
+def wait_for_page_load(driver: webdriver.Firefox, timeout: int = 20) -> None:
+    try:
+        WebDriverWait(driver, timeout).until(
+            lambda d: d.execute_script("return document.readyState") in ("interactive", "complete")
+        )
+    except TimeoutException:
+        pass
+
+
+def click_element(driver, element) -> bool:
+    try:
+        element.click()
+        return True
+    except ElementClickInterceptedException:
+        try:
+            driver.execute_script("arguments[0].click();", element)
+            return True
+        except JavascriptException:
+            return False
+    except Exception:
+        try:
+            driver.execute_script("arguments[0].click();", element)
+            return True
+        except Exception:
+            return False
+
+
+def extract_jd_summary_metadata(driver: webdriver.Firefox) -> Dict:
+    """
+    Captures div.jd_summary_list.
+
+    Usually there is one, but this supports multiple.
+    """
+    metadata = {
+        "page_title": "",
+        "jd_summary_count": 0,
+        "jd_summary_items": [],
+    }
+
+    try:
+        metadata["page_title"] = driver.title or ""
+    except Exception:
+        pass
+
+    try:
+        elements = driver.find_elements(By.CSS_SELECTOR, "div.jd_summary_list")
+    except Exception:
+        elements = []
+
+    metadata["jd_summary_count"] = len(elements)
+
+    for idx, el in enumerate(elements):
+        item = {
+            "index": idx,
+            "text": "",
+            "html": "",
+            "links": [],
+        }
+
+        try:
+            item["text"] = (el.text or "").strip()
+        except Exception:
+            pass
+
+        try:
+            item["html"] = el.get_attribute("innerHTML") or ""
+        except Exception:
+            pass
+
+        try:
+            links = el.find_elements(By.CSS_SELECTOR, "a")
+            for a in links:
+                try:
+                    item["links"].append(
+                        {
+                            "text": (a.text or "").strip(),
+                            "href": a.get_attribute("href") or "",
+                        }
+                    )
+                except Exception:
+                    continue
+        except Exception:
+            pass
+
+        metadata["jd_summary_items"].append(item)
+
+    return metadata
+
+
+def accept_license_if_present(driver: webdriver.Firefox, timeout: int) -> bool:
+    wait = WebDriverWait(driver, timeout)
+    try:
+        checkbox = wait.until(EC.presence_of_element_located((By.NAME, "license_agree")))
+    except TimeoutException:
+        return False
+
+    try:
+        if checkbox.is_enabled() and checkbox.is_displayed() and not checkbox.is_selected():
+            return click_element(driver, checkbox)
+    except Exception:
+        return False
+
+    return False
+
+
+def click_download_button(driver: webdriver.Firefox, timeout: int) -> bool:
+    wait = WebDriverWait(driver, timeout)
+
+    selectors = [
+        (By.ID, "jd_license_submit"),
+        (By.CSS_SELECTOR, "#jd_license_submit"),
+        (By.CSS_SELECTOR, "input[type='submit']"),
+        (By.CSS_SELECTOR, "button[type='submit']"),
+        (By.CSS_SELECTOR, "a[href*='download']"),
+    ]
+
+    for by, selector in selectors:
+        try:
+            btn = wait.until(EC.element_to_be_clickable((by, selector)))
+            if click_element(driver, btn):
+                return True
+        except TimeoutException:
+            continue
+        except Exception:
+            continue
+
+    return False
+
+
+# ============================================================
+# Download detection
+# ============================================================
+
+def clear_partial_downloads(download_dir: Path) -> None:
+    if not download_dir.exists():
+        return
+
+    for path in download_dir.iterdir():
+        if path.is_file() and path.name.endswith(PARTIAL_SUFFIXES):
+            try:
+                path.unlink()
+            except Exception:
+                pass
+
+
+def list_completed_files(download_dir: Path) -> List[Path]:
+    if not download_dir.exists():
+        return []
+
+    files = []
+    for p in download_dir.iterdir():
+        if not p.is_file():
+            continue
+        if p.name.endswith(PARTIAL_SUFFIXES):
+            continue
+        files.append(p)
+
+    return files
+
+
+def has_partial_files(download_dir: Path) -> bool:
+    if not download_dir.exists():
+        return False
+
+    for p in download_dir.iterdir():
+        if p.is_file() and p.name.endswith(PARTIAL_SUFFIXES):
+            return True
+    return False
+
+
+def wait_for_new_download(
+    download_dir: Path,
+    before_names: set,
+    timeout: int,
+    check_interval: float = 1.0,
+    stable_polls_required: int = 2,
+) -> Optional[Path]:
+    """
+    Waits for a new completed file whose size is stable for several polls.
+    """
+    end = time.time() + timeout
+    size_state: Dict[str, Tuple[int, int]] = {}
+
+    while time.time() < end:
+        completed = list_completed_files(download_dir)
+        new_completed = [p for p in completed if p.name not in before_names]
+
+        if new_completed:
+            # Prefer most recently modified file.
+            new_completed.sort(key=lambda p: p.stat().st_mtime, reverse=True)
+
+            for p in new_completed:
+                try:
+                    current_size = p.stat().st_size
+                except FileNotFoundError:
+                    continue
+
+                prev_size, stable_count = size_state.get(str(p), (-1, 0))
+
+                if current_size == prev_size and current_size > 0:
+                    stable_count += 1
+                else:
+                    stable_count = 0
+
+                size_state[str(p)] = (current_size, stable_count)
+
+                if stable_count >= stable_polls_required and not has_partial_files(download_dir):
+                    return p
+
+        time.sleep(check_interval)
+
+    return None
+
+
+def move_to_hash_folder(
+    temp_file: Path,
+    internal_hash: str,
+    final_download_root: Path,
+) -> Path:
+    hash_dir = ensure_dir(final_download_root / internal_hash)
+    original_name = safe_filename(temp_file.name)
+    dest = hash_dir / original_name
+
+    if dest.exists():
+        stem = dest.stem
+        suffix = dest.suffix
+        counter = 2
+        while True:
+            candidate = hash_dir / f"{stem}__{counter}{suffix}"
+            if not candidate.exists():
+                dest = candidate
+                break
+            counter += 1
+
+    shutil.move(str(temp_file), str(dest))
+    return dest.resolve()
+
+
+# ============================================================
+# Optional GPX tagging helper
+# ============================================================
+
+def add_internal_hash_to_gpx_file(gpx_path: Path, internal_hash: str) -> bool:
+    """
+    Optional helper for plain .gpx files.
+
+    This is intentionally conservative and only handles XML GPX files directly.
+    It does NOT modify .gdb, because Garmin GDB is binary and should not be patched
+    by raw string insertion.
+
+    Current downloader does not call this automatically.
+    Keep the original file clean; tag in a later controlled post-processing stage.
+    """
+    import xml.etree.ElementTree as ET
+
+    try:
+        tree = ET.parse(str(gpx_path))
+        root = tree.getroot()
+
+        ns_match = re.match(r"\{(.+?)\}", root.tag)
+        ns = ns_match.group(1) if ns_match else ""
+        prefix = f"{{{ns}}}" if ns else ""
+
+        metadata = root.find(f"{prefix}metadata")
+        if metadata is None:
+            metadata = ET.Element(f"{prefix}metadata")
+            root.insert(0, metadata)
+
+        extensions = metadata.find(f"{prefix}extensions")
+        if extensions is None:
+            extensions = ET.SubElement(metadata, f"{prefix}extensions")
+
+        tag = ET.SubElement(extensions, "source_registry_hash")
+        tag.text = internal_hash
+
+        tree.write(str(gpx_path), encoding="utf-8", xml_declaration=True)
+        return True
+    except Exception:
+        return False
+
+
+# ============================================================
+# One URL processing
+# ============================================================
+
+def process_one_url(
+    driver: webdriver.Firefox,
+    url_item: dict,
+    cfg: WorkerConfig,
+    temp_download_dir: Path,
+    final_download_root: Path,
+) -> dict:
+    url = url_item["url"]
+    input_index = url_item["input_index"]
+    internal_hash = make_internal_hash(url)
+
+    started_at = utc_now_iso()
+
+    record = {
+        "success": False,
+        "status": "started",
+        "error": "",
+        "worker_id": cfg.worker_id,
+        "input_index": input_index,
+        "url": url,
+        "normalized_url": normalize_url(url),
+        "internal_hash": internal_hash,
+        "downloaded_filepath": None,
+        "download_original_filename": None,
+        "download_file_sha256": None,
+        "from_remote_metadata": None,
+        "started_at_utc": started_at,
+        "finished_at_utc": None,
+    }
+
+    try:
+        clear_partial_downloads(temp_download_dir)
+
+        before_names = {p.name for p in list_completed_files(temp_download_dir)}
+
+        driver.get(url)
+        wait_for_page_load(driver, timeout=cfg.page_load_timeout)
+
+        metadata = extract_jd_summary_metadata(driver)
+        record["from_remote_metadata"] = metadata
+
+        accept_license_if_present(driver, timeout=cfg.element_timeout)
+
+        clicked = click_download_button(driver, timeout=cfg.element_timeout)
+        if not clicked:
+            record["status"] = "download_button_not_found_or_not_clickable"
+            record["finished_at_utc"] = utc_now_iso()
+            return record
+
+        downloaded_temp_file = wait_for_new_download(
+            temp_download_dir,
+            before_names=before_names,
+            timeout=cfg.download_timeout,
+        )
+
+        if downloaded_temp_file is None:
+            record["status"] = "download_timeout"
+            record["finished_at_utc"] = utc_now_iso()
+            return record
+
+        file_sha256 = sha256_file(downloaded_temp_file)
+        original_filename = downloaded_temp_file.name
+
+        final_path = move_to_hash_folder(
+            temp_file=downloaded_temp_file,
+            internal_hash=internal_hash,
+            final_download_root=final_download_root,
+        )
+
+        # Sidecar metadata next to file for direct local browsing.
+        sidecar_path = final_path.with_suffix(final_path.suffix + ".source.json")
+        sidecar = {
+            "url": url,
+            "normalized_url": normalize_url(url),
+            "internal_hash": internal_hash,
+            "downloaded_filepath": str(final_path),
+            "download_original_filename": original_filename,
+            "download_file_sha256": file_sha256,
+            "from_remote_metadata": metadata,
+            "created_at_utc": utc_now_iso(),
+        }
+        sidecar_path.write_text(
+            json.dumps(sidecar, ensure_ascii=False, indent=2, sort_keys=True),
+            encoding="utf-8",
+        )
+
+        record["success"] = True
+        record["status"] = "downloaded"
+        record["downloaded_filepath"] = str(final_path)
+        record["download_original_filename"] = original_filename
+        record["download_file_sha256"] = file_sha256
+        record["finished_at_utc"] = utc_now_iso()
+        return record
+
+    except WebDriverException as e:
+        record["status"] = "webdriver_error"
+        record["error"] = str(e)
+        record["finished_at_utc"] = utc_now_iso()
+        return record
+
+    except Exception as e:
+        record["status"] = "unexpected_error"
+        record["error"] = str(e) + "\n" + traceback.format_exc()
+        record["finished_at_utc"] = utc_now_iso()
+        return record
+
+
+# ============================================================
+# Worker
+# ============================================================
+
+def worker_main(url_items: List[dict], cfg_dict: dict) -> dict:
+    cfg = WorkerConfig(**cfg_dict)
+
+    workdir = Path(cfg.workdir)
+    output_dir = Path(cfg.output_dir)
+    final_download_root = Path(cfg.download_root)
+
+    temp_download_dir = ensure_dir(workdir / f"worker_{cfg.worker_id:02d}" / "temp_downloads")
+    worker_jsonl = output_dir / f"worker_{cfg.worker_id:02d}.jsonl"
+
+    result_summary = {
+        "worker_id": cfg.worker_id,
+        "total": len(url_items),
+        "success": 0,
+        "failed": 0,
+        "jsonl": str(worker_jsonl),
+    }
+
+    driver = None
+
+    try:
+        driver = build_firefox_driver(cfg, temp_download_dir)
+
+        for n, url_item in enumerate(url_items, 1):
+            print(
+                f"[worker {cfg.worker_id:02d}] [{n}/{len(url_items)}] "
+                f"input_index={url_item['input_index']} {url_item['url']}",
+                flush=True,
+            )
+
+            record = process_one_url(
+                driver=driver,
+                url_item=url_item,
+                cfg=cfg,
+                temp_download_dir=temp_download_dir,
+                final_download_root=final_download_root,
+            )
+
+            write_jsonl_line(worker_jsonl, record)
+
+            if record.get("success"):
+                result_summary["success"] += 1
+                print(
+                    f"[worker {cfg.worker_id:02d}] OK {record['internal_hash']} "
+                    f"{record.get('downloaded_filepath')}",
+                    flush=True,
+                )
+            else:
+                result_summary["failed"] += 1
+                print(
+                    f"[worker {cfg.worker_id:02d}] FAIL {record['internal_hash']} "
+                    f"{record.get('status')}",
+                    flush=True,
+                )
+
+            if cfg.delay_between_urls > 0:
+                time.sleep(cfg.delay_between_urls)
+
+    finally:
+        if driver is not None:
+            try:
+                driver.quit()
+            except Exception:
+                pass
+
+    return result_summary
+
+
+# ============================================================
+# Registry combining
+# ============================================================
+
+def load_success_seen(output_dir: Path) -> set:
+    """
+    Used for resume.
+
+    Only successful URLs are skipped. Failed URLs are retried.
+    """
+    seen = set()
+
+    paths = []
+    combined = output_dir / "source_registry.jsonl"
+    if combined.exists():
+        paths.append(combined)
+
+    paths.extend(sorted(output_dir.glob("worker_*.jsonl")))
+
+    for path in paths:
+        for r in read_jsonl(path):
+            if r.get("success") and r.get("url"):
+                seen.add(normalize_url(r["url"]))
+
+    return seen
+
+
+def collect_worker_records(output_dir: Path) -> List[dict]:
+    records = []
+    for path in sorted(output_dir.glob("worker_*.jsonl")):
+        records.extend(read_jsonl(path))
+
+    # De-duplicate by normalized_url.
+    # Prefer latest successful record; otherwise keep latest record.
+    best_by_url: Dict[str, dict] = {}
+
+    for r in records:
+        url = r.get("url")
+        if not url:
+            continue
+
+        key = normalize_url(url)
+        old = best_by_url.get(key)
+
+        if old is None:
+            best_by_url[key] = r
+            continue
+
+        old_success = bool(old.get("success"))
+        new_success = bool(r.get("success"))
+
+        if new_success and not old_success:
+            best_by_url[key] = r
+            continue
+
+        if new_success == old_success:
+            old_finished = old.get("finished_at_utc") or ""
+            new_finished = r.get("finished_at_utc") or ""
+            if new_finished >= old_finished:
+                best_by_url[key] = r
+
+    final_records = list(best_by_url.values())
+    final_records.sort(key=lambda r: int(r.get("input_index", 0)))
+    return final_records
+
+
+def write_combined_outputs(output_dir: Path) -> None:
+    ensure_dir(output_dir)
+
+    records = collect_worker_records(output_dir)
+
+    jsonl_path = output_dir / "source_registry.jsonl"
+    csv_path = output_dir / "source_registry.csv"
+    md_path = output_dir / "source_registry.md"
+
+    with jsonl_path.open("w", encoding="utf-8", newline="\n") as f:
+        for r in records:
+            f.write(json.dumps(r, ensure_ascii=False, sort_keys=True) + "\n")
+
+    csv_fields = [
+        "input_index",
+        "success",
+        "status",
+        "internal_hash",
+        "url",
+        "downloaded_filepath",
+        "download_original_filename",
+        "download_file_sha256",
+        "from_remote_metadata",
+        "error",
+        "started_at_utc",
+        "finished_at_utc",
+    ]
+
+    with csv_path.open("w", encoding="utf-8", newline="") as f:
+        writer = csv.DictWriter(f, fieldnames=csv_fields)
+        writer.writeheader()
+
+        for r in records:
+            row = {}
+            for field in csv_fields:
+                value = r.get(field)
+                if field == "from_remote_metadata":
+                    value = json_dumps_compact(value)
+                row[field] = value
+            writer.writerow(row)
+
+    with md_path.open("w", encoding="utf-8", newline="\n") as f:
+        f.write("| internal_hash | success | status | url | downloaded_filepath | details |\n")
+        f.write("|---|---:|---|---|---|---|\n")
+
+        for r in records:
+            meta = r.get("from_remote_metadata") or {}
+            details_parts = []
+
+            title = truncate_text(meta.get("page_title", ""), 100)
+            if title:
+                details_parts.append(f"title: {title}")
+
+            items = meta.get("jd_summary_items") or []
+            if items:
+                first_text = truncate_text(items[0].get("text", ""), 180)
+                if first_text:
+                    details_parts.append(first_text)
+
+            details = "<br>".join(details_parts)
+            details = details.replace("|", "\\|")
+
+            f.write(
+                "| {internal_hash} | {success} | {status} | {url} | {path} | {details} |\n".format(
+                    internal_hash=r.get("internal_hash", ""),
+                    success=str(bool(r.get("success"))),
+                    status=str(r.get("status", "")).replace("|", "\\|"),
+                    url=str(r.get("url", "")).replace("|", "\\|"),
+                    path=str(r.get("downloaded_filepath") or "").replace("|", "\\|"),
+                    details=details,
+                )
+            )
+
+    print(f"\nCombined registry written:")
+    print(f"  JSONL: {jsonl_path}")
+    print(f"  CSV:   {csv_path}")
+    print(f"  MD:    {md_path}")
+
+
+# ============================================================
+# Input / args
+# ============================================================
+
+def parse_proxy(proxy: Optional[str]) -> Tuple[Optional[str], Optional[int]]:
+    if not proxy:
+        return None, None
+
+    proxy = proxy.strip()
+    if "://" in proxy:
+        proxy = proxy.split("://", 1)[1]
+
+    if ":" not in proxy:
+        raise ValueError("Proxy must be in host:port format, example: 192.168.0.38:1080")
+
+    host, port_s = proxy.rsplit(":", 1)
+    return host.strip(), int(port_s.strip())
+
+
+def load_urls(input_path: Path, start: int, limit: Optional[int]) -> List[dict]:
+    urls = []
+    with input_path.open("r", encoding="utf-8") as f:
+        for idx, line in enumerate(f):
+            url = line.strip()
+            if not url:
+                continue
+            urls.append({"input_index": idx, "url": url})
+
+    sliced = urls[start:]
+    if limit is not None:
+        sliced = sliced[:limit]
+
+    return sliced
+
+
+def build_arg_parser() -> argparse.ArgumentParser:
+    p = argparse.ArgumentParser(
+        description="Parallel Firefox downloader with jd_summary_list registry capture."
+    )
+
+    p.add_argument("--input", default="url-final.txt", help="Input URL file, one URL per line.")
+    p.add_argument("--start", type=int, default=0, help="Start offset in input file. Example: 780")
+    p.add_argument("--limit", type=int, default=None, help="Optional max URLs to process.")
+
+    p.add_argument("--workers", type=int, default=DEFAULT_WORKERS, help="Firefox worker count.")
+    p.add_argument("--proxy", default=None, help="SOCKS5 proxy as host:port, example 192.168.0.38:1080")
+    p.add_argument("--headless", action="store_true", help="Run Firefox headless.")
+
+    p.add_argument("--output-dir", default=DEFAULT_OUTPUT_DIR)
+    p.add_argument("--download-root", default=DEFAULT_DOWNLOAD_ROOT)
+    p.add_argument("--workdir", default=DEFAULT_WORKDIR)
+
+    p.add_argument("--page-load-timeout", type=int, default=25)
+    p.add_argument("--element-timeout", type=int, default=7)
+    p.add_argument("--download-timeout", type=int, default=60)
+    p.add_argument("--delay", type=float, default=2.0, help="Delay between URLs per worker.")
+
+    p.add_argument(
+        "--user-agent",
+        default=(
+            "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) "
+            "Gecko/20100101 Firefox/126.0"
+        ),
+    )
+
+    p.add_argument(
+        "--no-resume",
+        action="store_true",
+        help="Do not skip URLs already successful in previous registry files.",
+    )
+
+    p.add_argument(
+        "--combine-only",
+        action="store_true",
+        help="Only combine existing worker_*.jsonl files into source_registry.* outputs.",
+    )
+
+    return p
+
+
+# ============================================================
+# Main
+# ============================================================
+
+def main() -> int:
+    args = build_arg_parser().parse_args()
+
+    input_path = Path(args.input)
+    output_dir = ensure_dir(Path(args.output_dir))
+    download_root = ensure_dir(Path(args.download_root))
+    workdir = ensure_dir(Path(args.workdir))
+
+    if args.combine_only:
+        write_combined_outputs(output_dir)
+        return 0
+
+    if not input_path.exists():
+        print(f"Input file not found: {input_path}", file=sys.stderr)
+        return 2
+
+    proxy_host, proxy_port = parse_proxy(args.proxy)
+
+    url_items = load_urls(input_path=input_path, start=args.start, limit=args.limit)
+
+    if not args.no_resume:
+        seen_success = load_success_seen(output_dir)
+        before_count = len(url_items)
+        url_items = [
+            item for item in url_items
+            if normalize_url(item["url"]) not in seen_success
+        ]
+        skipped = before_count - len(url_items)
+    else:
+        skipped = 0
+
+    if not url_items:
+        print("No URLs to process.")
+        if skipped:
+            print(f"Skipped already-successful URLs: {skipped}")
+        write_combined_outputs(output_dir)
+        return 0
+
+    workers = max(1, min(int(args.workers), len(url_items)))
+    chunks = chunk_evenly(url_items, workers)
+
+    print("=== CONFIG ===")
+    print(f"Input:          {input_path}")
+    print(f"Start offset:   {args.start}")
+    print(f"URLs to run:    {len(url_items)}")
+    print(f"Skipped resume: {skipped}")
+    print(f"Workers:        {workers}")
+    print(f"Proxy:          {proxy_host}:{proxy_port}" if proxy_host else "Proxy:          none")
+    print(f"Headless:       {args.headless}")
+    print(f"Output dir:     {output_dir.resolve()}")
+    print(f"Download root:  {download_root.resolve()}")
+    print("==============\n")
+
+    cfgs = []
+    for worker_id, _chunk in enumerate(chunks):
+        cfg = WorkerConfig(
+            worker_id=worker_id,
+            proxy_host=proxy_host,
+            proxy_port=proxy_port,
+            headless=bool(args.headless),
+            page_load_timeout=int(args.page_load_timeout),
+            element_timeout=int(args.element_timeout),
+            download_timeout=int(args.download_timeout),
+            delay_between_urls=float(args.delay),
+            workdir=str(workdir),
+            output_dir=str(output_dir),
+            download_root=str(download_root),
+            user_agent=str(args.user_agent),
+        )
+        cfgs.append(cfg)
+
+    total_success = 0
+    total_failed = 0
+
+    with ProcessPoolExecutor(max_workers=workers) as executor:
+        futures = []
+        for chunk, cfg in zip(chunks, cfgs):
+            futures.append(executor.submit(worker_main, chunk, cfg.__dict__))
+
+        for fut in as_completed(futures):
+            try:
+                summary = fut.result()
+                total_success += int(summary.get("success", 0))
+                total_failed += int(summary.get("failed", 0))
+                print(
+                    f"\nWorker {summary.get('worker_id')} finished: "
+                    f"success={summary.get('success')} failed={summary.get('failed')}"
+                )
+            except Exception as e:
+                total_failed += 1
+                print(f"[FATAL] Worker crashed: {e}", file=sys.stderr)
+                traceback.print_exc()
+
+    write_combined_outputs(output_dir)
+
+    print("\n=== FINISHED ===")
+    print(f"Success this run: {total_success}")
+    print(f"Failed this run:  {total_failed}")
+    print(f"Registry ID form: R_<16 hex chars from normalized URL SHA256>")
+    print("For GDB files, do not raw-patch the binary. Use the registry or a controlled converter/post-processor.")
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
+```
+
+Run it like this:
+
+```bash
+python jd_parallel_downloader.py --input url-final.txt --start 780 --workers 4 --proxy 192.168.0.38:1080
+```
+
+Headless:
+
+```bash
+python jd_parallel_downloader.py --input url-final.txt --start 780 --workers 4 --proxy 192.168.0.38:1080 --headless
+```
+
+Important behavior:
+
+* Each worker has its own Firefox instance.
+* Each worker has its own temporary download directory.
+* Final files are moved into:
+
+```text
+downloads_by_hash/
+  R_ABCDEF1234567890/
+    downloaded_file.gpx
+    downloaded_file.gpx.source.json
+```
+
+* Main registry files are:
+
+```text
+output/source_registry.jsonl
+output/source_registry.csv
+output/source_registry.md
+```
+
+For GPX/GDB injection later:
+
+* GPX can safely receive a metadata extension like:
+
+```xml
+<source_registry_hash>R_ABCDEF1234567890</source_registry_hash>
+```
+
+* GDB should not be patched as raw binary. Better flow is: keep the registry + sidecar, or convert GDB to GPX with GPSBabel, inject the hash into GPX metadata, and keep the original GDB mapped through `source_registry.csv/jsonl`.
diff --git a/visit.py b/visit.py
index 3012938..155e319 100644
--- a/visit.py
+++ b/visit.py
@@ -1,172 +1,172 @@
-from selenium import webdriver
-from selenium.webdriver.chrome.options import Options
-from selenium.webdriver.common.by import By
-from selenium.webdriver.support.ui import WebDriverWait
-from selenium.webdriver.support import expected_conditions as EC
-from selenium.common.exceptions import TimeoutException, NoSuchElementException, WebDriverException
-import time
-import os
-
-# ========================= CONFIG =========================
-PROXY_IP = "192.168.0.38"
-PROXY_PORT = "1080"
-DOWNLOAD_PATH = os.path.join(os.getcwd(), "downloads")
-LOG_FILE = "download_log.txt"
-
-os.makedirs(DOWNLOAD_PATH, exist_ok=True)
-
-# ====================== CHROME OPTIONS (Optimized for speed) ======================
-chrome_options = Options()
-
-# Proxy & basic stealth
-chrome_options.add_argument(f'--proxy-server=socks5://{PROXY_IP}:{PROXY_PORT}')
-chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36")
-chrome_options.add_argument("--no-sandbox")
-chrome_options.add_argument("--disable-dev-shm-usage")
-chrome_options.add_argument("--disable-blink-features=AutomationControlled")
-chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
-chrome_options.add_experimental_option('useAutomationExtension', False)
-
-# === SPEED OPTIMIZATIONS ===
-chrome_options.add_argument("--blink-settings=imagesEnabled=false")   # Disable images
-chrome_options.add_argument("--disable-gpu")
-chrome_options.add_argument("--disable-extensions")
-chrome_options.add_argument("--disable-plugins")
-chrome_options.add_argument("--disable-popup-blocking")
-
-# Fast page load strategy (doesn't wait for images/media)
-chrome_options.page_load_strategy = "eager"   # or "none" if you want even faster (but less stable)
-
-# Block media, stylesheets, fonts, etc.
-prefs = {
-    "download.default_directory": DOWNLOAD_PATH,
-    "download.prompt_for_download": False,
-    "download.directory_upgrade": True,
-    "safebrowsing.enabled": True,
-    "profile.default_content_setting_values.automatic_downloads": 1,
-    "profile.managed_default_content_settings.images": 2,        # 2 = block images
-    "profile.managed_default_content_settings.stylesheets": 2,   # block CSS
-    "profile.managed_default_content_settings.fonts": 2,         # block fonts
-    "profile.managed_default_content_settings.media_stream": 2,  # block video/audio
-}
-
-chrome_options.add_experimental_option("prefs", prefs)
-
-# Optional: Run headless (much faster, recommended)
-# chrome_options.add_argument("--headless=new")
-
-# Initialize driver
-driver = webdriver.Chrome(options=chrome_options)
-
-def wait_for_page_load(driver, timeout=20):
-    """Faster page load wait"""
-    try:
-        WebDriverWait(driver, timeout).until(
-            lambda d: d.execute_script("return document.readyState") in ["interactive", "complete"]
-        )
-    except TimeoutException:
-        print("   → Page load timed out, continuing anyway...")
-
-def is_download_finished(download_path, timeout=120, check_interval=2):
-    """Slightly faster download checker"""
-    end_time = time.time() + timeout
-    while time.time() < end_time:
-        partial_files = [f for f in os.listdir(download_path) if f.endswith(('.crdownload', '.part', '.tmp'))]
-        completed_files = [f for f in os.listdir(download_path) if not f.endswith(('.crdownload', '.part', '.tmp'))]
-
-        if completed_files and not partial_files:
-            return True, completed_files
-
-        if partial_files:
-            print(f"   → Still downloading... ({len(partial_files)} partial)")
-        time.sleep(check_interval)
-    return False, []
-
-def clear_partial_downloads():
-    for filename in os.listdir(DOWNLOAD_PATH):
-        if filename.endswith(('.crdownload', '.part', '.tmp')):
-            try:
-                os.remove(os.path.join(DOWNLOAD_PATH, filename))
-            except:
-                pass
-
-def log_result(url: str, success: bool):
-    status = "True" if success else "False"
-    with open(LOG_FILE, "a", encoding="utf-8") as f:
-        f.write(f"{url} | {status}\n")
-    print(f"   Logged: {'SUCCESS' if success else 'FAILED'}")
-
-def run_download(url: str):
-    try:
-        print(f"\n[{time.strftime('%H:%M:%S')}] Processing → {url}")
-        clear_partial_downloads()
-
-        print("   Navigating...")
-        driver.get(url)
-
-        wait_for_page_load(driver, timeout=20)
-        print("   ✓ Page loaded (eager).")
-
-        wait = WebDriverWait(driver, 10)  # Reduced timeout
-
-        # === License checkbox (quick check) ===
-        try:
-            checkbox = wait.until(EC.presence_of_element_located((By.NAME, "license_agree")))
-            if checkbox.is_enabled() and checkbox.is_displayed():
-                checkbox.click()
-                print("   ✓ License checkbox accepted.")
-                time.sleep(0.5)
-        except (TimeoutException, NoSuchElementException):
-            print("   → No checkbox found. Continuing...")
-
-        # === Download button (quick detection) ===
-        try:
-            download_btn = wait.until(EC.element_to_be_clickable((By.ID, "jd_license_submit")))
-            download_btn.click()
-            print("   ✓ Download button clicked.")
-        except (TimeoutException, NoSuchElementException):
-            print("   ✗ Download button NOT found or not clickable → Skipping")
-            log_result(url, False)
-            return False
-
-        # === Wait for download ===
-        print("   Waiting for download (max 2 min)...")
-        success, files = is_download_finished(DOWNLOAD_PATH, timeout=120)
-        if success and files:
-            print(f"   ✓ Download completed! Files: {files}")
-            log_result(url, True)
-            return True
-        else:
-            print("   ✗ Download timed out.")
-            log_result(url, False)
-            return False
-
-    except WebDriverException as e:
-        print(f"   ✗ WebDriver error: {e}")
-        log_result(url, False)
-        return False
-    except Exception as e:
-        print(f"   ✗ Unexpected error: {e}")
-        log_result(url, False)
-        return False
-
-# ====================== MAIN ======================
-if __name__ == "__main__":
-    with open("url-final.txt", "r", encoding="utf-8") as f:
-        links = [line.strip() for line in f if line.strip()]
-
-    links = links[780:]  # start from 251st
-    print(f"Loaded {len(links)} URLs. Starting from index 250.")
-
-    success_count = 0
-    for i, url in enumerate(links, 1):
-        print(f"--- [{i}/{len(links)}] ---")
-        if run_download(url):
-            success_count += 1
-        time.sleep(3)  # Reduced delay between requests
-
-    print(f"\n=== FINISHED ===")
-    print(f"Total: {len(links)} | Success: {success_count} | Failed: {len(links) - success_count}")
-    print(f"Log: {LOG_FILE}")
-
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.common.exceptions import TimeoutException, NoSuchElementException, WebDriverException
+import time
+import os
+
+# ========================= CONFIG =========================
+PROXY_IP = "192.168.0.38"
+PROXY_PORT = "1080"
+DOWNLOAD_PATH = os.path.join(os.getcwd(), "downloads")
+LOG_FILE = "download_log.txt"
+
+os.makedirs(DOWNLOAD_PATH, exist_ok=True)
+
+# ====================== CHROME OPTIONS (Optimized for speed) ======================
+chrome_options = Options()
+
+# Proxy & basic stealth
+chrome_options.add_argument(f'--proxy-server=socks5://{PROXY_IP}:{PROXY_PORT}')
+chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36")
+chrome_options.add_argument("--no-sandbox")
+chrome_options.add_argument("--disable-dev-shm-usage")
+chrome_options.add_argument("--disable-blink-features=AutomationControlled")
+chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
+chrome_options.add_experimental_option('useAutomationExtension', False)
+
+# === SPEED OPTIMIZATIONS ===
+chrome_options.add_argument("--blink-settings=imagesEnabled=false")   # Disable images
+chrome_options.add_argument("--disable-gpu")
+chrome_options.add_argument("--disable-extensions")
+chrome_options.add_argument("--disable-plugins")
+chrome_options.add_argument("--disable-popup-blocking")
+
+# Fast page load strategy (doesn't wait for images/media)
+chrome_options.page_load_strategy = "eager"   # or "none" if you want even faster (but less stable)
+
+# Block media, stylesheets, fonts, etc.
+prefs = {
+    "download.default_directory": DOWNLOAD_PATH,
+    "download.prompt_for_download": False,
+    "download.directory_upgrade": True,
+    "safebrowsing.enabled": True,
+    "profile.default_content_setting_values.automatic_downloads": 1,
+    "profile.managed_default_content_settings.images": 2,        # 2 = block images
+    "profile.managed_default_content_settings.stylesheets": 2,   # block CSS
+    "profile.managed_default_content_settings.fonts": 2,         # block fonts
+    "profile.managed_default_content_settings.media_stream": 2,  # block video/audio
+}
+
+chrome_options.add_experimental_option("prefs", prefs)
+
+# Optional: Run headless (much faster, recommended)
+# chrome_options.add_argument("--headless=new")
+
+# Initialize driver
+driver = webdriver.Chrome(options=chrome_options)
+
+def wait_for_page_load(driver, timeout=20):
+    """Faster page load wait"""
+    try:
+        WebDriverWait(driver, timeout).until(
+            lambda d: d.execute_script("return document.readyState") in ["interactive", "complete"]
+        )
+    except TimeoutException:
+        print("   → Page load timed out, continuing anyway...")
+
+def is_download_finished(download_path, timeout=120, check_interval=2):
+    """Slightly faster download checker"""
+    end_time = time.time() + timeout
+    while time.time() < end_time:
+        partial_files = [f for f in os.listdir(download_path) if f.endswith(('.crdownload', '.part', '.tmp'))]
+        completed_files = [f for f in os.listdir(download_path) if not f.endswith(('.crdownload', '.part', '.tmp'))]
+
+        if completed_files and not partial_files:
+            return True, completed_files
+
+        if partial_files:
+            print(f"   → Still downloading... ({len(partial_files)} partial)")
+        time.sleep(check_interval)
+    return False, []
+
+def clear_partial_downloads():
+    for filename in os.listdir(DOWNLOAD_PATH):
+        if filename.endswith(('.crdownload', '.part', '.tmp')):
+            try:
+                os.remove(os.path.join(DOWNLOAD_PATH, filename))
+            except:
+                pass
+
+def log_result(url: str, success: bool):
+    status = "True" if success else "False"
+    with open(LOG_FILE, "a", encoding="utf-8") as f:
+        f.write(f"{url} | {status}\n")
+    print(f"   Logged: {'SUCCESS' if success else 'FAILED'}")
+
+def run_download(url: str):
+    try:
+        print(f"\n[{time.strftime('%H:%M:%S')}] Processing → {url}")
+        clear_partial_downloads()
+
+        print("   Navigating...")
+        driver.get(url)
+
+        wait_for_page_load(driver, timeout=20)
+        print("   ✓ Page loaded (eager).")
+
+        wait = WebDriverWait(driver, 5)  # Reduced timeout
+
+        # === License checkbox (quick check) ===
+        try:
+            checkbox = wait.until(EC.presence_of_element_located((By.NAME, "license_agree")))
+            if checkbox.is_enabled() and checkbox.is_displayed():
+                checkbox.click()
+                print("   ✓ License checkbox accepted.")
+                time.sleep(0.5)
+        except (TimeoutException, NoSuchElementException):
+            print("   → No checkbox found. Continuing...")
+
+        # === Download button (quick detection) ===
+        try:
+            download_btn = wait.until(EC.element_to_be_clickable((By.ID, "jd_license_submit")))
+            download_btn.click()
+            print("   ✓ Download button clicked.")
+        except (TimeoutException, NoSuchElementException):
+            print("   ✗ Download button NOT found or not clickable → Skipping")
+            log_result(url, False)
+            return False
+
+        # === Wait for download ===
+        print("   Waiting for download (max 2 min)...")
+        success, files = is_download_finished(DOWNLOAD_PATH, timeout=30)
+        if success and files:
+            print(f"   ✓ Download completed! Files: {files}")
+            log_result(url, True)
+            return True
+        else:
+            print("   ✗ Download timed out.")
+            log_result(url, False)
+            return False
+
+    except WebDriverException as e:
+        print(f"   ✗ WebDriver error: {e}")
+        log_result(url, False)
+        return False
+    except Exception as e:
+        print(f"   ✗ Unexpected error: {e}")
+        log_result(url, False)
+        return False
+
+# ====================== MAIN ======================
+if __name__ == "__main__":
+    with open("url-final.txt", "r", encoding="utf-8") as f:
+        links = [line.strip() for line in f if line.strip()]
+
+    links = links[780:]  # start from 251st
+    print(f"Loaded {len(links)} URLs. Starting from index 250.")
+
+    success_count = 0
+    for i, url in enumerate(links, 1):
+        print(f"--- [{i}/{len(links)}] ---")
+        if run_download(url):
+            success_count += 1
+        time.sleep(3)  # Reduced delay between requests
+
+    print(f"\n=== FINISHED ===")
+    print(f"Total: {len(links)} | Success: {success_count} | Failed: {len(links) - success_count}")
+    print(f"Log: {LOG_FILE}")
+
     driver.quit()
\ No newline at end of file