8-2-2 testcluster

2026-03-24 10:05:35 +00:00
parent b66c400a2c
commit c90ba02813
5 changed files with 229557 additions and 382 deletions
--- a/clusterwide-file-manifest/8-2-2/manifest-8-2-2.ndm5
+++ b/clusterwide-file-manifest/8-2-2/manifest-8-2-2.ndm5
--- a/stages/stage0-remote-run.sh
+++ b/stages/stage0-remote-run.sh
@@ -1,25 +0,0 @@
-#!/bin/bash
-ssh_keyfile='/export/home/cloudian/cloudian-installation-key'
-script='/tmp/nmd/nmd-md5sum-manifest/stages/stage1-gather-node-manifest.sh'
-
-# Ensure environment variables are set
-if [[ -z "$ssh_keyfile" || -z "$script" ]]; then
-    echo "Error: Please set 'ssh_keyfile' and 'script' environment variables."
-    exit 1
-fi
-
-# Extract hostnames/IPs from /etc/hosts, skipping comments and localhost
-HOSTS=$(grep -v '^#' /etc/hosts | grep -v 'localhost' | awk '{print $2}')
-
-for HOST in $HOSTS; do
-    echo "--- Executing on: $HOST ---"
-    
-    # Run the local script on the remote host using bash -s
-    # -i: Uses the specified ssh_keyfile
-    # -o StrictHostKeyChecking=no: Prevents the script from hanging on new host prompts
-    ssh -i "$ssh_keyfile" -o StrictHostKeyChecking=no "$HOST" "bash -s" < "$script"
-    
-    # The loop naturally waits for the SSH command to finish before the next iteration
-    echo "--- Finished: $HOST ---"
-done
-
--- a/stages/stage1-gather-node-manifest.sh
+++ b/stages/stage1-gather-node-manifest.sh
@@ -1,55 +0,0 @@
-
-#!/usr/bin/env bash
-# manifest_gen.sh — minimal manifest generator using find + md5sum
-# Default output: /tmp/manifest-<hostname>.txt
-# Format: "<md5>  <absolute_path>"
-
-set -euo pipefail
-
-HOST="$(hostname -s 2>/dev/null || hostname 2>/dev/null || echo unknown)"
-OUT="./manifest-${HOST}.txt"
-ROOT="${1:-/}"
-
-# Default excludes (edit here)
-EXCLUDES=(/proc /sys /dev /run /tmp /var/run /var/lock /lost+found /mnt /media /cloudian* /var/lib/cassandra)
-
-ERR="${OUT}.err"
-: > "$OUT"
-: > "$ERR"
-
-ROOT="$(cd "$ROOT" && pwd -P)"
-
-{
-  echo "# manifest_version=md5sum-v1"
-  echo "# created_utc=$(date -u +%Y-%m-%dT%H:%M:%SZ)"
-  echo "# host=$HOST"
-  echo "# root=$ROOT"
-  echo -n "# excludes="
-  printf "%s " "${EXCLUDES[@]}"
-  echo
-  echo "# format=<md5>  <absolute_path>"
-} >> "$OUT"
-
-# Build prune expression
-PRUNE=()
-for ex in "${EXCLUDES[@]}"; do
-  # normalize excludes if they exist; if they don't, still keep as-is
-  if [ -e "$ex" ]; then
-    ex="$(cd "$ex" 2>/dev/null && pwd -P || echo "$ex")"
-  fi
-  PRUNE+=( -path "$ex" -o -path "$ex/*" -o )
-done
-unset 'PRUNE[${#PRUNE[@]}-1]'  # drop last -o
-
-# Deterministic ordering if available (GNU sort -z)
-if sort -z </dev/null >/dev/null 2>&1; then
-  find "$ROOT" \( "${PRUNE[@]}" \) -prune -o -type f -print0 2>>"$ERR" \
-    | sort -z 2>>"$ERR" \
-    | xargs -0 -r md5sum -- 2>>"$ERR" >> "$OUT"
-else
-  find "$ROOT" \( "${PRUNE[@]}" \) -prune -o -type f -print0 2>>"$ERR" \
-    | xargs -0 -r md5sum -- 2>>"$ERR" >> "$OUT"
-fi
-
-echo "Wrote: $OUT" >&2
-echo "Errs:  $ERR" >&2
--- a/stages/stage2-analyze-clusterwide-manifests.py
+++ b/stages/stage2-analyze-clusterwide-manifests.py
@@ -1,134 +0,0 @@
-import os
-import glob
-import json
-import argparse
-import sys
-from collections import defaultdict
-
-def parse_node_manifest(filepath):
-    """
-    Parses a single node manifest file.
-    Returns:
-        host (str): The hostname found in the header.
-        file_data (dict): Dictionary { filepath: md5_hash }
-    """
-    host = None
-    file_data = {}
-    
-    try:
-        with open(filepath, 'r') as f:
-            for line in f:
-                line = line.strip()
-                if not line:
-                    continue
-                
-                # Parse Headers
-                if line.startswith('#'):
-                    if line.startswith('# host='):
-                        host = line.split('=')[1].strip()
-                    continue
-                
-                # Parse Data (md5  absolute_path)
-                # We split by whitespace, maxsplit=1 to preserve spaces in filenames if they exist
-                parts = line.split(maxsplit=1)
-                if len(parts) == 2:
-                    md5, path = parts
-                    file_data[path] = md5
-                    
-    except Exception as e:
-        print(f"Error reading file {filepath}: {e}", file=sys.stderr)
-        return None, None
-
-    if not host:
-        print(f"Warning: No '# host=' header found in {filepath}. Using filename as ID.", file=sys.stderr)
-        host = os.path.basename(filepath)
-
-    return host, file_data
-
-def generate_golden_manifest(input_dir, output_file):
-    manifest_files = glob.glob(os.path.join(input_dir, '*'))
-    
-    if not manifest_files:
-        print(f"No files found in directory: {input_dir}")
-        return
-
-    # 1. Aggregation Phase
-    # Structure: global_registry[filepath] = { hostname: hash }
-    global_registry = defaultdict(dict)
-    all_hosts = set()
-
-    print(f"Scanning {len(manifest_files)} manifests...")
-
-    for fpath in manifest_files:
-        # Skip if it's a directory
-        if os.path.isdir(fpath): 
-            continue
-            
-        host, data = parse_node_manifest(fpath)
-        if host and data:
-            all_hosts.add(host)
-            for path, md5 in data.items():
-                global_registry[path][host] = md5
-
-    # 2. Analysis Phase
-    cluster_manifest = {
-        "meta": {
-            "total_hosts": len(all_hosts),
-            "hosts": list(sorted(all_hosts)),
-            "generated_at": str(os.path.basename(output_file))
-        },
-        "consistent_files": {}, # Files identical on ALL hosts
-        "varying_files": {}     # Files that differ or are missing on some hosts
-    }
-
-    print("Analyzing file consistency...")
-
-    for path, host_map in global_registry.items():
-        unique_hashes = set(host_map.values())
-        present_on_hosts = set(host_map.keys())
-        
-        # Condition 1: Consistent
-        # Present on ALL hosts AND has exactly 1 unique hash
-        if present_on_hosts == all_hosts and len(unique_hashes) == 1:
-            # Store just the hash, as it is the "Ground Truth"
-            cluster_manifest["consistent_files"][path] = list(unique_hashes)[0]
-        
-        # Condition 2: Varying
-        else:
-            issue_type = []
-            if len(unique_hashes) > 1:
-                issue_type.append("hash_mismatch")
-            if present_on_hosts != all_hosts:
-                issue_type.append("presence_mismatch")
-            
-            # For varying files, we need the specific details per host
-            # so the comparison tool knows what to expect where.
-            
-            # Fill in "MISSING" for hosts that don't have the file
-            full_map = host_map.copy()
-            for h in all_hosts:
-                if h not in full_map:
-                    full_map[h] = "MISSING"
-
-            cluster_manifest["varying_files"][path] = {
-                "issues": issue_type,
-                "states": full_map
-            }
-
-    # 3. Output Phase
-    try:
-        with open(output_file, 'w') as f:
-            json.dump(cluster_manifest, f, indent=2, sort_keys=True)
-        print(f"Success! Cluster manifest written to: {output_file}")
-        print(f"Stats: {len(cluster_manifest['consistent_files'])} consistent files, {len(cluster_manifest['varying_files'])} varying files.")
-    except Exception as e:
-        print(f"Error writing output file: {e}")
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Generate a cluster-wide ground truth manifest.")
-    parser.add_argument("input_dir", help="Directory containing node manifest files")
-    parser.add_argument("output_file", help="Path to write the resulting JSON manifest")
-    
-    args = parser.parse_args()
-    
-    generate_golden_manifest(args.input_dir, args.output_file)
--- a/stages/stage3-compare-sub-manifest-c.py
+++ b/stages/stage3-compare-sub-manifest-c.py
@@ -1,168 +0,0 @@
-import json
-import argparse
-import sys
-import os
-
-def parse_manifest_file(filepath):
-    """
-    Parses the target sub-manifest file to extract host and file hashes.
-    """
-    host = None
-    file_data = {}
-    
-    try:
-        with open(filepath, 'r') as f:
-            for line in f:
-                line = line.strip()
-                if not line: continue
-                
-                # Extract Header Hostname
-                if line.startswith('#'):
-                    if line.startswith('# host='):
-                        host = line.split('=')[1].strip()
-                    continue
-                
-                # Extract Hash and Path
-                parts = line.split(maxsplit=1)
-                if len(parts) == 2:
-                    md5, path = parts
-                    file_data[path] = md5
-                    
-    except Exception as e:
-        print(f"Critical Error: Could not read manifest file: {e}")
-        sys.exit(1)
-
-    if not host:
-        print("Warning: No '# host=' header found. Validation of 'varying' files will be impossible.")
-    
-    return host, file_data
-
-def compare_against_research(manifest_path, research_path):
-    # 1. Load the Ground Truth (Research)
-    try:
-        with open(research_path, 'r') as f:
-            research = json.load(f)
-    except Exception as e:
-        print(f"Critical Error: Could not load research JSON: {e}")
-        sys.exit(1)
-
-    # 2. Parse the Sub-Manifest (Target)
-    target_host, target_files = parse_manifest_file(manifest_path)
-    
-    print(f"--- VALIDATION REPORT ---")
-    print(f"Target Host: {target_host if target_host else 'UNKNOWN'}")
-    print(f"Files Scanned: {len(target_files)}")
-    print(f"Research Baseline: {research['meta']['total_hosts']} hosts known")
-    print("-" * 30)
-
-    # Buckets for results
-    results = {
-        "ok_consistent": [],      # Matches cluster-wide invariant
-        "ok_varying": [],         # Matches expected value for this specific host
-        "fail_consistent": [],    # Violated a cluster-wide invariant
-        "fail_varying": [],       # Violated the specific expectation for this host
-        "unknown_file": [],       # File not present in original research
-        "unverifiable_host": []   # File is varying, but we don't know this host
-    }
-
-    # 3. The Comparison Logic
-    for fpath, fhash in target_files.items():
-        
-        # CHECK A: Is it a Known Consistent File?
-        if fpath in research['consistent_files']:
-            expected_hash = research['consistent_files'][fpath]
-            if fhash == expected_hash:
-                results['ok_consistent'].append(fpath)
-            else:
-                results['fail_consistent'].append({
-                    "path": fpath, 
-                    "found": fhash, 
-                    "expected": expected_hash
-                })
-
-        # CHECK B: Is it a Known Varying File?
-        elif fpath in research['varying_files']:
-            # We need the hostname to validate varying files
-            if not target_host:
-                results['unverifiable_host'].append(fpath)
-                continue
-
-            node_states = research['varying_files'][fpath]['states']
-            
-            # Does the research know about this host?
-            if target_host in node_states:
-                expected_hash = node_states[target_host]
-                
-                if expected_hash == "MISSING":
-                    # We found a file that research says shouldn't exist on this node
-                    results['fail_varying'].append({
-                        "path": fpath,
-                        "found": fhash,
-                        "expected": "File should not exist on this node"
-                    })
-                elif fhash == expected_hash:
-                    results['ok_varying'].append(fpath)
-                else:
-                    results['fail_varying'].append({
-                        "path": fpath,
-                        "found": fhash,
-                        "expected": expected_hash
-                    })
-            else:
-                # This is a new host not in the original research
-                # We can't validate varying files for unknown hosts
-                results['unverifiable_host'].append(fpath)
-
-        # CHECK C: Unknown File
-        else:
-            results['unknown_file'].append(fpath)
-
-    # 4. Print Summary
-    # OK Sections
-    if results['ok_consistent']:
-        print(f"\n[PASS] Consistent Files Verified: {len(results['ok_consistent'])}")
-    if results['ok_varying']:
-        print(f"[PASS] Varying Files Verified:    {len(results['ok_varying'])}")
-
-    # FAIL Sections
-    if results['fail_consistent']:
-        print(f"\n[FAIL] GLOBAL CONSISTENCY VIOLATIONS ({len(results['fail_consistent'])})")
-        print("These files MUST be identical across the cluster but differed:")
-        for item in results['fail_consistent']:
-            print(f"  X {item['path']}")
-            print(f"    Expected: {item['expected']}")
-            print(f"    Found:    {item['found']}")
-
-    if results['fail_varying']:
-        print(f"\n[FAIL] NODE SPECIFIC VIOLATIONS ({len(results['fail_varying'])})")
-        print(f"These files did not match the expected state for {target_host}:")
-        for item in results['fail_varying']:
-            print(f"  X {item['path']}")
-            print(f"    Expected: {item['expected']}")
-            print(f"    Found:    {item['found']}")
-
-    # WARNING Sections
-    if results['unknown_file']:
-        print(f"\n[WARN] UNKNOWN FILES ({len(results['unknown_file'])})")
-        print("These files were not seen in the original research scan:")
-        for f in results['unknown_file'][:5]: print(f"  ? {f}")
-        if len(results['unknown_file']) > 5: print(f"  ... and {len(results['unknown_file'])-5} more")
-
-    if results['unverifiable_host']:
-        print(f"\n[WARN] UNVERIFIABLE FILES ({len(results['unverifiable_host'])})")
-        print("These files vary per-node, but this host is new/unknown to the research:")
-        for f in results['unverifiable_host'][:5]: print(f"  ? {f}")
-
-    # Exit Code
-    if results['fail_consistent'] or results['fail_varying']:
-        sys.exit(1)
-    sys.exit(0)
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Validate a sub-manifest against cluster research.")
-    parser.add_argument("sub_manifest", help="The partial manifest file (subdir scan)")
-    parser.add_argument("research_json", help="The cluster_ground_truth.json file")
-    
-    args = parser.parse_args()
-    
-    compare_against_research(args.sub_manifest, args.research_json)