8-2-2 testcluster
This commit is contained in:
229557
clusterwide-file-manifest/8-2-2/manifest-8-2-2.ndm5
Normal file
229557
clusterwide-file-manifest/8-2-2/manifest-8-2-2.ndm5
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,25 +0,0 @@
|
||||
#!/bin/bash
|
||||
ssh_keyfile='/export/home/cloudian/cloudian-installation-key'
|
||||
script='/tmp/nmd/nmd-md5sum-manifest/stages/stage1-gather-node-manifest.sh'
|
||||
|
||||
# Ensure environment variables are set
|
||||
if [[ -z "$ssh_keyfile" || -z "$script" ]]; then
|
||||
echo "Error: Please set 'ssh_keyfile' and 'script' environment variables."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Extract hostnames/IPs from /etc/hosts, skipping comments and localhost
|
||||
HOSTS=$(grep -v '^#' /etc/hosts | grep -v 'localhost' | awk '{print $2}')
|
||||
|
||||
for HOST in $HOSTS; do
|
||||
echo "--- Executing on: $HOST ---"
|
||||
|
||||
# Run the local script on the remote host using bash -s
|
||||
# -i: Uses the specified ssh_keyfile
|
||||
# -o StrictHostKeyChecking=no: Prevents the script from hanging on new host prompts
|
||||
ssh -i "$ssh_keyfile" -o StrictHostKeyChecking=no "$HOST" "bash -s" < "$script"
|
||||
|
||||
# The loop naturally waits for the SSH command to finish before the next iteration
|
||||
echo "--- Finished: $HOST ---"
|
||||
done
|
||||
|
||||
@@ -1,55 +0,0 @@
|
||||
|
||||
#!/usr/bin/env bash
|
||||
# manifest_gen.sh — minimal manifest generator using find + md5sum
|
||||
# Default output: /tmp/manifest-<hostname>.txt
|
||||
# Format: "<md5> <absolute_path>"
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
HOST="$(hostname -s 2>/dev/null || hostname 2>/dev/null || echo unknown)"
|
||||
OUT="./manifest-${HOST}.txt"
|
||||
ROOT="${1:-/}"
|
||||
|
||||
# Default excludes (edit here)
|
||||
EXCLUDES=(/proc /sys /dev /run /tmp /var/run /var/lock /lost+found /mnt /media /cloudian* /var/lib/cassandra)
|
||||
|
||||
ERR="${OUT}.err"
|
||||
: > "$OUT"
|
||||
: > "$ERR"
|
||||
|
||||
ROOT="$(cd "$ROOT" && pwd -P)"
|
||||
|
||||
{
|
||||
echo "# manifest_version=md5sum-v1"
|
||||
echo "# created_utc=$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||||
echo "# host=$HOST"
|
||||
echo "# root=$ROOT"
|
||||
echo -n "# excludes="
|
||||
printf "%s " "${EXCLUDES[@]}"
|
||||
echo
|
||||
echo "# format=<md5> <absolute_path>"
|
||||
} >> "$OUT"
|
||||
|
||||
# Build prune expression
|
||||
PRUNE=()
|
||||
for ex in "${EXCLUDES[@]}"; do
|
||||
# normalize excludes if they exist; if they don't, still keep as-is
|
||||
if [ -e "$ex" ]; then
|
||||
ex="$(cd "$ex" 2>/dev/null && pwd -P || echo "$ex")"
|
||||
fi
|
||||
PRUNE+=( -path "$ex" -o -path "$ex/*" -o )
|
||||
done
|
||||
unset 'PRUNE[${#PRUNE[@]}-1]' # drop last -o
|
||||
|
||||
# Deterministic ordering if available (GNU sort -z)
|
||||
if sort -z </dev/null >/dev/null 2>&1; then
|
||||
find "$ROOT" \( "${PRUNE[@]}" \) -prune -o -type f -print0 2>>"$ERR" \
|
||||
| sort -z 2>>"$ERR" \
|
||||
| xargs -0 -r md5sum -- 2>>"$ERR" >> "$OUT"
|
||||
else
|
||||
find "$ROOT" \( "${PRUNE[@]}" \) -prune -o -type f -print0 2>>"$ERR" \
|
||||
| xargs -0 -r md5sum -- 2>>"$ERR" >> "$OUT"
|
||||
fi
|
||||
|
||||
echo "Wrote: $OUT" >&2
|
||||
echo "Errs: $ERR" >&2
|
||||
@@ -1,134 +0,0 @@
|
||||
import os
|
||||
import glob
|
||||
import json
|
||||
import argparse
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
|
||||
def parse_node_manifest(filepath):
|
||||
"""
|
||||
Parses a single node manifest file.
|
||||
Returns:
|
||||
host (str): The hostname found in the header.
|
||||
file_data (dict): Dictionary { filepath: md5_hash }
|
||||
"""
|
||||
host = None
|
||||
file_data = {}
|
||||
|
||||
try:
|
||||
with open(filepath, 'r') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Parse Headers
|
||||
if line.startswith('#'):
|
||||
if line.startswith('# host='):
|
||||
host = line.split('=')[1].strip()
|
||||
continue
|
||||
|
||||
# Parse Data (md5 absolute_path)
|
||||
# We split by whitespace, maxsplit=1 to preserve spaces in filenames if they exist
|
||||
parts = line.split(maxsplit=1)
|
||||
if len(parts) == 2:
|
||||
md5, path = parts
|
||||
file_data[path] = md5
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error reading file {filepath}: {e}", file=sys.stderr)
|
||||
return None, None
|
||||
|
||||
if not host:
|
||||
print(f"Warning: No '# host=' header found in {filepath}. Using filename as ID.", file=sys.stderr)
|
||||
host = os.path.basename(filepath)
|
||||
|
||||
return host, file_data
|
||||
|
||||
def generate_golden_manifest(input_dir, output_file):
|
||||
manifest_files = glob.glob(os.path.join(input_dir, '*'))
|
||||
|
||||
if not manifest_files:
|
||||
print(f"No files found in directory: {input_dir}")
|
||||
return
|
||||
|
||||
# 1. Aggregation Phase
|
||||
# Structure: global_registry[filepath] = { hostname: hash }
|
||||
global_registry = defaultdict(dict)
|
||||
all_hosts = set()
|
||||
|
||||
print(f"Scanning {len(manifest_files)} manifests...")
|
||||
|
||||
for fpath in manifest_files:
|
||||
# Skip if it's a directory
|
||||
if os.path.isdir(fpath):
|
||||
continue
|
||||
|
||||
host, data = parse_node_manifest(fpath)
|
||||
if host and data:
|
||||
all_hosts.add(host)
|
||||
for path, md5 in data.items():
|
||||
global_registry[path][host] = md5
|
||||
|
||||
# 2. Analysis Phase
|
||||
cluster_manifest = {
|
||||
"meta": {
|
||||
"total_hosts": len(all_hosts),
|
||||
"hosts": list(sorted(all_hosts)),
|
||||
"generated_at": str(os.path.basename(output_file))
|
||||
},
|
||||
"consistent_files": {}, # Files identical on ALL hosts
|
||||
"varying_files": {} # Files that differ or are missing on some hosts
|
||||
}
|
||||
|
||||
print("Analyzing file consistency...")
|
||||
|
||||
for path, host_map in global_registry.items():
|
||||
unique_hashes = set(host_map.values())
|
||||
present_on_hosts = set(host_map.keys())
|
||||
|
||||
# Condition 1: Consistent
|
||||
# Present on ALL hosts AND has exactly 1 unique hash
|
||||
if present_on_hosts == all_hosts and len(unique_hashes) == 1:
|
||||
# Store just the hash, as it is the "Ground Truth"
|
||||
cluster_manifest["consistent_files"][path] = list(unique_hashes)[0]
|
||||
|
||||
# Condition 2: Varying
|
||||
else:
|
||||
issue_type = []
|
||||
if len(unique_hashes) > 1:
|
||||
issue_type.append("hash_mismatch")
|
||||
if present_on_hosts != all_hosts:
|
||||
issue_type.append("presence_mismatch")
|
||||
|
||||
# For varying files, we need the specific details per host
|
||||
# so the comparison tool knows what to expect where.
|
||||
|
||||
# Fill in "MISSING" for hosts that don't have the file
|
||||
full_map = host_map.copy()
|
||||
for h in all_hosts:
|
||||
if h not in full_map:
|
||||
full_map[h] = "MISSING"
|
||||
|
||||
cluster_manifest["varying_files"][path] = {
|
||||
"issues": issue_type,
|
||||
"states": full_map
|
||||
}
|
||||
|
||||
# 3. Output Phase
|
||||
try:
|
||||
with open(output_file, 'w') as f:
|
||||
json.dump(cluster_manifest, f, indent=2, sort_keys=True)
|
||||
print(f"Success! Cluster manifest written to: {output_file}")
|
||||
print(f"Stats: {len(cluster_manifest['consistent_files'])} consistent files, {len(cluster_manifest['varying_files'])} varying files.")
|
||||
except Exception as e:
|
||||
print(f"Error writing output file: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Generate a cluster-wide ground truth manifest.")
|
||||
parser.add_argument("input_dir", help="Directory containing node manifest files")
|
||||
parser.add_argument("output_file", help="Path to write the resulting JSON manifest")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
generate_golden_manifest(args.input_dir, args.output_file)
|
||||
@@ -1,168 +0,0 @@
|
||||
import json
|
||||
import argparse
|
||||
import sys
|
||||
import os
|
||||
|
||||
def parse_manifest_file(filepath):
|
||||
"""
|
||||
Parses the target sub-manifest file to extract host and file hashes.
|
||||
"""
|
||||
host = None
|
||||
file_data = {}
|
||||
|
||||
try:
|
||||
with open(filepath, 'r') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line: continue
|
||||
|
||||
# Extract Header Hostname
|
||||
if line.startswith('#'):
|
||||
if line.startswith('# host='):
|
||||
host = line.split('=')[1].strip()
|
||||
continue
|
||||
|
||||
# Extract Hash and Path
|
||||
parts = line.split(maxsplit=1)
|
||||
if len(parts) == 2:
|
||||
md5, path = parts
|
||||
file_data[path] = md5
|
||||
|
||||
except Exception as e:
|
||||
print(f"Critical Error: Could not read manifest file: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if not host:
|
||||
print("Warning: No '# host=' header found. Validation of 'varying' files will be impossible.")
|
||||
|
||||
return host, file_data
|
||||
|
||||
def compare_against_research(manifest_path, research_path):
|
||||
# 1. Load the Ground Truth (Research)
|
||||
try:
|
||||
with open(research_path, 'r') as f:
|
||||
research = json.load(f)
|
||||
except Exception as e:
|
||||
print(f"Critical Error: Could not load research JSON: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
# 2. Parse the Sub-Manifest (Target)
|
||||
target_host, target_files = parse_manifest_file(manifest_path)
|
||||
|
||||
print(f"--- VALIDATION REPORT ---")
|
||||
print(f"Target Host: {target_host if target_host else 'UNKNOWN'}")
|
||||
print(f"Files Scanned: {len(target_files)}")
|
||||
print(f"Research Baseline: {research['meta']['total_hosts']} hosts known")
|
||||
print("-" * 30)
|
||||
|
||||
# Buckets for results
|
||||
results = {
|
||||
"ok_consistent": [], # Matches cluster-wide invariant
|
||||
"ok_varying": [], # Matches expected value for this specific host
|
||||
"fail_consistent": [], # Violated a cluster-wide invariant
|
||||
"fail_varying": [], # Violated the specific expectation for this host
|
||||
"unknown_file": [], # File not present in original research
|
||||
"unverifiable_host": [] # File is varying, but we don't know this host
|
||||
}
|
||||
|
||||
# 3. The Comparison Logic
|
||||
for fpath, fhash in target_files.items():
|
||||
|
||||
# CHECK A: Is it a Known Consistent File?
|
||||
if fpath in research['consistent_files']:
|
||||
expected_hash = research['consistent_files'][fpath]
|
||||
if fhash == expected_hash:
|
||||
results['ok_consistent'].append(fpath)
|
||||
else:
|
||||
results['fail_consistent'].append({
|
||||
"path": fpath,
|
||||
"found": fhash,
|
||||
"expected": expected_hash
|
||||
})
|
||||
|
||||
# CHECK B: Is it a Known Varying File?
|
||||
elif fpath in research['varying_files']:
|
||||
# We need the hostname to validate varying files
|
||||
if not target_host:
|
||||
results['unverifiable_host'].append(fpath)
|
||||
continue
|
||||
|
||||
node_states = research['varying_files'][fpath]['states']
|
||||
|
||||
# Does the research know about this host?
|
||||
if target_host in node_states:
|
||||
expected_hash = node_states[target_host]
|
||||
|
||||
if expected_hash == "MISSING":
|
||||
# We found a file that research says shouldn't exist on this node
|
||||
results['fail_varying'].append({
|
||||
"path": fpath,
|
||||
"found": fhash,
|
||||
"expected": "File should not exist on this node"
|
||||
})
|
||||
elif fhash == expected_hash:
|
||||
results['ok_varying'].append(fpath)
|
||||
else:
|
||||
results['fail_varying'].append({
|
||||
"path": fpath,
|
||||
"found": fhash,
|
||||
"expected": expected_hash
|
||||
})
|
||||
else:
|
||||
# This is a new host not in the original research
|
||||
# We can't validate varying files for unknown hosts
|
||||
results['unverifiable_host'].append(fpath)
|
||||
|
||||
# CHECK C: Unknown File
|
||||
else:
|
||||
results['unknown_file'].append(fpath)
|
||||
|
||||
# 4. Print Summary
|
||||
# OK Sections
|
||||
if results['ok_consistent']:
|
||||
print(f"\n[PASS] Consistent Files Verified: {len(results['ok_consistent'])}")
|
||||
if results['ok_varying']:
|
||||
print(f"[PASS] Varying Files Verified: {len(results['ok_varying'])}")
|
||||
|
||||
# FAIL Sections
|
||||
if results['fail_consistent']:
|
||||
print(f"\n[FAIL] GLOBAL CONSISTENCY VIOLATIONS ({len(results['fail_consistent'])})")
|
||||
print("These files MUST be identical across the cluster but differed:")
|
||||
for item in results['fail_consistent']:
|
||||
print(f" X {item['path']}")
|
||||
print(f" Expected: {item['expected']}")
|
||||
print(f" Found: {item['found']}")
|
||||
|
||||
if results['fail_varying']:
|
||||
print(f"\n[FAIL] NODE SPECIFIC VIOLATIONS ({len(results['fail_varying'])})")
|
||||
print(f"These files did not match the expected state for {target_host}:")
|
||||
for item in results['fail_varying']:
|
||||
print(f" X {item['path']}")
|
||||
print(f" Expected: {item['expected']}")
|
||||
print(f" Found: {item['found']}")
|
||||
|
||||
# WARNING Sections
|
||||
if results['unknown_file']:
|
||||
print(f"\n[WARN] UNKNOWN FILES ({len(results['unknown_file'])})")
|
||||
print("These files were not seen in the original research scan:")
|
||||
for f in results['unknown_file'][:5]: print(f" ? {f}")
|
||||
if len(results['unknown_file']) > 5: print(f" ... and {len(results['unknown_file'])-5} more")
|
||||
|
||||
if results['unverifiable_host']:
|
||||
print(f"\n[WARN] UNVERIFIABLE FILES ({len(results['unverifiable_host'])})")
|
||||
print("These files vary per-node, but this host is new/unknown to the research:")
|
||||
for f in results['unverifiable_host'][:5]: print(f" ? {f}")
|
||||
|
||||
# Exit Code
|
||||
if results['fail_consistent'] or results['fail_varying']:
|
||||
sys.exit(1)
|
||||
sys.exit(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Validate a sub-manifest against cluster research.")
|
||||
parser.add_argument("sub_manifest", help="The partial manifest file (subdir scan)")
|
||||
parser.add_argument("research_json", help="The cluster_ground_truth.json file")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
compare_against_research(args.sub_manifest, args.research_json)
|
||||
Reference in New Issue
Block a user