Grid Services Monitoring Working Group
Displaying Aggregated Service Metrics in Ganglia
The Nagios-based monitoring prototype is distributed with a "publisher" .cgi script to calculate aggregate service metrics and publish these through the web interface as XML.
This documented describes a simple integration of this interface allowing aggregated metrics to be displayed in graphical form in the
Ganglia
monitoring system.
- Install a cron e.g. on the Ganglia server to fetch the aggregate metrics published on the Nagios server and push them into Ganglia. Note ganglia 3.0.4 or above is required (because of the use of gmetric host 'spoofing').
/opt/lcg/sbin/wlcg2gmetric.pl -s CERN_PPS -u
https://pps-monitoring.cern.ch/cgi-bin/publisher.cgi
-v
- Patch the ganglia installation .php in two places - graph.php and cluster_view.php, by default in /var/www/html/ganglia.
*** graph.php 2007-07-30 17:10:16.000000000 +0200
--- graph.php_iann 2007-07-30 17:06:00.000000000 +0200
***************
*** 210,217 ****
--- 210,241 ----
."DEF:'bytes_out'='${rrd_dir}/pkts_out.rrd':'sum':AVERAGE "
."LINE2:'bytes_in'#$mem_cached_color:'In' "
."LINE2:'bytes_out'#$mem_used_color:'Out' ";
}
+ #
+ # Handle aggregate WLCG metrics
+ # (eg. sam, local and npm, but this code does not care
+ # and looks for org.wlcg.aggregate-status.xxxx_report)
+ #
+ else if (strncmp($graph,"org.wlcg.aggregate-status.",26) == 0 )
+ {
+ # Construct the name of the rrd file from the graph name
+ $metric = substr($graph,0,strlen($graph)-7);
+ # Chop the graph name up to make a useful title string
+ $style = substr($metric,4,strlen($metric)-4);
+ $style = substr($metric,4,strlen($metric)-4);
+
+ $upper_limit = "--upper-limit 10";
+ $lower_limit = "--lower-limit 0";
+
+ $vertical_label = "--vertical-label Services ";
+
+ $series ="DEF:'ok'='${rrd_dir}/${metric}.rrd':'sum':AVERAGE "
+ ."DEF:'total'='${rrd_dir}/${metric}.rrd':'num':AVERAGE "
+ ."CDEF:'crit'=total,ok,- "
+ ."AREA:'ok'#00FF00:'OK' "
+ ."STACK:'crit'#FF0000:'CRITICAL' ";
+ }
else
{
/* Got a strange value for $graph */
exit();
*** cluster_view.php 2007-07-30 17:18:54.000000000 +0200
--- cluster_view.php_iann 2007-07-30 17:18:14.000000000 +0200
***************
*** 36,44 ****
#
$graph_args = "c=$cluster_url&$get_metric_string&st=$cluster[LOCALTIME]";
$tpl->assign("graph_args", $graph_args);
if (!isset($optional_graphs))
! $optional_graphs = array();
foreach ($optional_graphs as $g) {
$tpl->newBlock('optional_graphs');
$tpl->assign('name',$g);
$tpl->assign('graph_args',$graph_args);
--- 36,46 ----
#
$graph_args = "c=$cluster_url&$get_metric_string&st=$cluster[LOCALTIME]";
$tpl->assign("graph_args", $graph_args);
if (!isset($optional_graphs))
! $optional_graphs = array("org.wlcg.aggregate-status.sam",
! "org.wlcg.aggregate-status.local",
! "org.wlcg.aggregate-status.npm");
foreach ($optional_graphs as $g) {
$tpl->newBlock('optional_graphs');
$tpl->assign('name',$g);
$tpl->assign('graph_args',$graph_args);
--
IanNeilson - 30 Jul 2007