Observability¶
Learn how to monitor and diagnose NUMA locality in your application.
StatsCollector¶
Collects locality metrics using sharded counters:
use numaperf::{StatsCollector, Topology};
use std::sync::Arc;
let topo = Arc::new(Topology::discover()?);
let collector = StatsCollector::new(&topo);
// Record metrics
collector.record_local_execution(); // Work ran locally
collector.record_steal(NodeId::new(1)); // Stole from node 1
collector.record_remote_access(); // Remote memory access
// Take snapshot
let stats = collector.snapshot();
println!("Locality ratio: {:.1}%", stats.locality_ratio() * 100.0);
LocalityStats¶
Point-in-time snapshot of metrics:
let stats = collector.snapshot();
// Global metrics
println!("Local executions: {}", stats.local_executions());
println!("Remote steals: {}", stats.remote_steals());
println!("Locality ratio: {:.2}", stats.locality_ratio());
// Per-node metrics
for (node_id, node_stats) in stats.per_node() {
println!("Node {}:", node_id.as_u32());
println!(" Local: {}", node_stats.local_executions);
println!(" Steals from: {}", node_stats.steals_from);
println!(" Steals to: {}", node_stats.steals_to);
}
LocalityReport¶
Diagnostic report with health assessment:
use numaperf::LocalityReport;
let stats = collector.snapshot();
let report = LocalityReport::generate(&stats);
// Print full report
println!("{}", report);
// Check health
println!("Health: {:?}", report.health());
// Get recommendations
if report.has_recommendations() {
println!("Recommendations:");
for rec in report.recommendations() {
println!(" - {}", rec);
}
}
LocalityHealth¶
Classification of locality effectiveness:
use numaperf::LocalityHealth;
match report.health() {
LocalityHealth::Excellent => println!("95%+ local - optimal"),
LocalityHealth::Good => println!("85-95% local - good"),
LocalityHealth::Fair => println!("70-85% local - acceptable"),
LocalityHealth::Poor => println!("<70% local - needs attention"),
}
// Check acceptability
if report.health().is_acceptable() {
println!("Locality is acceptable");
}
Integration with Executor¶
Track locality in your workload:
let topo = Arc::new(Topology::discover()?);
let stats = Arc::new(StatsCollector::new(&topo));
let exec = NumaExecutor::builder(Arc::clone(&topo))
.steal_policy(StealPolicy::LocalThenSocketThenRemote)
.build()?;
// Submit work with tracking
for node in topo.numa_nodes() {
let s = Arc::clone(&stats);
exec.submit_to_node(node.id(), move || {
s.record_local_execution();
// Do work...
});
}
exec.shutdown();
// Analyze
let snapshot = stats.snapshot();
let report = LocalityReport::generate(&snapshot);
println!("{}", report);
Periodic Reporting¶
use std::thread;
use std::time::Duration;
let stats = Arc::new(StatsCollector::new(&topo));
let stats_clone = Arc::clone(&stats);
// Reporter thread
thread::spawn(move || {
loop {
thread::sleep(Duration::from_secs(10));
let snapshot = stats_clone.snapshot();
let report = LocalityReport::generate(&snapshot);
log::info!("Locality: {:.1}%, Health: {:?}",
snapshot.locality_ratio() * 100.0,
report.health());
if !report.health().is_acceptable() {
for rec in report.recommendations() {
log::warn!("Recommendation: {}", rec);
}
}
}
});
Sample Report Output¶
=== Locality Report ===
Overall Statistics:
Local executions: 9,847
Remote steals: 153
Locality ratio: 98.5%
Per-Node Statistics:
Node 0: 4,923 local, 12 steals from, 8 steals to
Node 1: 4,924 local, 8 steals from, 12 steals to
Health: EXCELLENT
No recommendations - locality is optimal.
Metrics Reference¶
| Metric | Description |
|---|---|
local_executions |
Work items that ran on their submitted node |
remote_steals |
Work items stolen from other nodes |
locality_ratio |
local / (local + remote) |
steals_from |
Work stolen FROM this node |
steals_to |
Work stolen TO this node |
Best Practices¶
- Create collector early - Before starting work
- Record consistently - Every work item should be tracked
- Report periodically - Don't wait until shutdown
- Act on recommendations - Adjust steal policy or pinning
- Set alerting thresholds - Alert when health degrades