#!/usr/bin/perl
#
# Copyright (C) 2010 Steve Schnepp
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; version 2 dated June,
# 1991.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#

use strict;
use warnings;

use English qw(-no_match_vars);
use IO::Socket;
use IO::File;
use File::Path qw(mkpath);
use Getopt::Long;
use Pod::Usage;

use List::Util qw(min max);

use Munin::Common::Daemon;
use Munin::Node::SpoolWriter;
use Munin::Common::Defaults;
use Munin::Common::Logger;

my $host = "localhost:4949";
my $metahostname;
my $SPOOLDIR = $Munin::Common::Defaults::MUNIN_SPOOLDIR;
my $intervalsize = 86400;
my $timeout = 3600;
my $minrate;
my $retaincount = 7;
# Asyncd collects data from plugins periodically.  We want to avoid skipping the next value due to
# slight deviations of this period.  Thus we tolerate a slightly shorter time distance between two
# successive requests for values from a plugin.
my $min_update_rate_threshold = 0.75;
my $nocleanup;
my $do_fork;
my $verbose;
my $debug;
my $screen;
my $help;
my $update_rate = 300;
my @nodes;
my %spoolwriter;

GetOptions(
	"host=s" => \$host,
	"spooldir|s=s" => \$SPOOLDIR,
	"interval|i=i" => \$intervalsize,
	"timeout=i" => \$timeout,
	"minrate=i" => \$minrate,
	"retain|r=i" => \$retaincount,
	"update-rate=i" => \$update_rate,

	"fork" => \$do_fork,

	"help|h" => \$help,
	"verbose|v" => \$verbose,
	"nocleanup|n" => \$nocleanup,
	"debug" => \$debug,
        "screen" => \$screen,
) or pod2usage(1);
if ($help) {
	pod2usage(1);
}

if ( $verbose || $debug || $screen ) {
    my %log;
    $log{output} = 'screen' if $screen;
    $log{level}  = 'info'   if $verbose;
    $log{level}  = 'debug'  if $debug;
    Munin::Common::Logger::configure(%log);
}

# $minrate defaults to $update_rate.
$minrate = $update_rate if ! defined $minrate;

# Debug implies Verbose
$verbose = 1 if $debug;

unless (-d $SPOOLDIR) {
	mkpath($SPOOLDIR, { verbose => $verbose, } )
		or LOGCROAK("Cannot create '$SPOOLDIR': $!");
}

my $process_name = "main";

my $sock = new IO::Socket::INET(
	PeerAddr        => "$host",
	Proto   => 'tcp'
);
if (!$sock) {
	print STDERR "[$$][$process_name] Failed to connect to munin-node - trying again in a few seconds ...\n" if $verbose;
	sleep 20;
	$sock = new IO::Socket::INET(
		PeerAddr => "$host",
		Proto => 'tcp'
	) || LOGCROAK("Error connecting to munin node ($host): $!");
}
my $nodeheader = <$sock>;
print STDERR "[sock][>] nodes\n" if $debug;
print $sock "nodes\n";

# get all virtual node names exposed by one physical node
while(my $line = <$sock>) {
	chomp($line);
	print STDERR "[sock][<] $line\n" if $debug;
	last if $line =~ /^\./;

	# un-taint nodename TODO: too strict?
	$line =~ /^([\w.]+)$/x;
	$line = $1;

	push(@nodes, $line);
}

print $sock "quit\n";
close ($sock);
( $metahostname ) = ( $nodeheader =~ /munin node at (\S+)\n/);
$metahostname = "unknown" unless $metahostname;

foreach my $node (@nodes) {
	unless (-d "$SPOOLDIR/$node") {
		mkpath("$SPOOLDIR/$node", { verbose => $verbose, } )
			or die ("Cannot create '$SPOOLDIR/$node': $!");
	}
	$spoolwriter{$node} = Munin::Node::SpoolWriter->new(
	spooldir => "$SPOOLDIR/$node",
	interval_size => $intervalsize,
	interval_keep => $retaincount,
	hostname  => $metahostname,
	);
}

$0 = "munin-asyncd [$metahostname] [idle]";

my $plugins = {};
{
	INFO("[$$][$process_name] Reading config from $host");
	my $sock = new IO::Socket::INET(
		PeerAddr	=> "$host",
		Proto	=> 'tcp'
	) || die "Error creating socket: $!";

	local $0 = "munin-asyncd [$metahostname] [list]";
	DEBUG("[sock][>] cap multigraph dirtyconfig");
	print $sock "cap multigraph dirtyconfig\n";
	<$sock>; # Read the first header comment line
	<$sock>; # Read the multigraph response line
	foreach my $node (@nodes)
	{
		print STDERR "[sock][>] list $node\n" if $debug;
		print $sock "list $node\n";
		my $plugins_line = <$sock>;
		chomp($plugins_line);
		print STDERR "[sock][<] $plugins_line\n" if $debug;
		{
			my $fh_list = IO::File->new(
				"$SPOOLDIR/$node/munin-daemon.list",
				"w",
			);
			my $sanitised_plugins_line = $plugins_line;
			$sanitised_plugins_line =~ s/[^_A-Za-z0-9 ]/_/g;
			print $fh_list $sanitised_plugins_line;
			print $fh_list "\n";
		}

		#un-taint plugin_line TODO: more strict?
		$plugins_line =~ /^(.*)$/x;
		$plugins_line = $1;

		$plugins->{$node} = [ split(/ /, $plugins_line) ];
	}
}

my $keepgoing = 1;

sub termhandler {
	$keepgoing = 0;
}

# Q&D child collection
$SIG{CHLD} = 'IGNORE';
$SIG{HUP} = 'IGNORE';
$SIG{INT} = 'termhandler';
$SIG{TERM} = 'termhandler';

# now, update regularly...
# ... but each plugin in its own process to avoid delay-leaking
my %last_updated;
my $last_cleanup=0;
# all preparations are finished: we can notify our caller, that we are ready
Munin::Common::Daemon::emit_sd_notify_message();
MAIN: while($keepgoing) {
	my $when = time;

	my $process_name_orig = $process_name;

	# start the next run close to the end of a munin-node update operation
	# (i.e. try to avoid overlapping activities)
	my $when_next = int((int($when / $minrate) + 0.75) * $minrate);
	while ($when_next <= $when) {
		$when_next = $when_next + $minrate;
	}

	foreach my $node (@nodes)
	{
		my $sock;
		PLUGIN: foreach my $plugin ( @{$plugins->{$node}} ) {
			# See if this plugin should be updated
			my $plugin_rate = $spoolwriter{$node}->get_metadata("plugin_rates/$plugin") || 300;
			# the perfect time for the plugin
			my $wanted_plugin_next = ($last_updated{$plugin} || 0) + $plugin_rate;
			# It is acceptable that plugins are executed slightly too early.
			# (tolerating slight deviations of the request period)
			my $minimum_plugin_next = ($last_updated{$plugin} || 0) + ($plugin_rate * $min_update_rate_threshold);
			if ($when < $minimum_plugin_next) {
				# not yet, wake me up later, and see next plugin
				if ($wanted_plugin_next < $when_next) {
					$when_next = $wanted_plugin_next;
				}

				next;
			}

			# Should update it - based on wall clock time
			my $should_have_been = $when - ($when % $plugin_rate);
			my $should_be_next = $should_have_been + $plugin_rate;

			$last_updated{$plugin} = $should_have_been;
			if ($should_be_next < $when_next) {
				$when_next = $should_be_next;
			}

			DEBUG("[$$][$process_name] $plugin: should_have_been $should_have_been (" .
				localtime($should_have_been) .
				"), should_be_next: $should_be_next (" .
				localtime($should_be_next) .
				"), when: $when (" .
				localtime($when) .
				"), when_next: $when_next (" .
				localtime($when_next) . ")" );

			if ($do_fork && fork()) {
				# parent, return directly
				next PLUGIN;
			}

			unless ($sock) {
				$sock = new IO::Socket::INET(
					PeerAddr	=> "$host",
					Proto	=> 'tcp'
				);
				unless ($sock) {
					warn "Error creating socket: $!, moving to next plugin to try again";
					next;
				}
				<$sock>; # skip header
			}

			# Setting the command name for a useful top information
			$process_name = "plugin:$plugin";
			local $0 = "munin-asyncd [$metahostname] [$process_name]";

			fetch_data($node, $plugin, $when, $sock);

			# We end here if we forked
			last MAIN if $do_fork;
		}
		$spoolwriter{$node}->set_metadata("lastruntime", $when);
	}
	$process_name = $process_name_orig;

	$process_name = "main";
	if ($sock)
	{
		if ( $sock->connected ) {
			print STDERR "[$$][$process_name][>] quit\n" if $verbose;
			print $sock "quit\n" ;
		}
		print STDERR "[$$][$process_name] closing sock\n" if $verbose;
		$sock = undef;
	}

	# Clean spool dir
	if (!$nocleanup && $last_cleanup<(time - 600)) {
		$last_cleanup = time;
		foreach ( @nodes ) { $spoolwriter{$_}->cleanup() };
	}

	# Sleep until next plugin exec.
	my $sleep_sec = $when_next - time;

	# "sleep" expects an unsigned integer - thus we may not let a wrapped number splip through.
	if ($sleep_sec > 0) {
		INFO("[$$][$process_name] Sleeping $sleep_sec sec");
		sleep $sleep_sec;
	} else {
		INFO("[$$][$process_name] Already late : should sleep $sleep_sec sec\n");
	}
}

print STDERR "[$$][$process_name] Exiting\n" if $verbose;

sub fetch_data
{
	my $node = shift;
	my $plugin = shift;
	my $when = shift;
	my $sock = shift;


		print STDERR "[$$][$process_name][>][$plugin] asking for config\n" if $verbose;

		print STDERR "[$$][$process_name][>][$plugin][sock] config $plugin\n" if $debug;
		print $sock "config $plugin\n";

		my $is_dirtyconfig;

		my $output_rows = [];

		while(my $line = <$sock>) {
			chomp($line);
			print STDERR "[$$][$process_name][<][$plugin][sock] $line\n" if $debug;

			if ($line =~ m/^\./) {
				# Starting with . => end
				last;
			}

			push @$output_rows, $line;
			if ($line =~ m/^update_rate (\d+)/) {
				# The plugin has a special update_rate: overriding it
				# XXX - Doesn't take into account a per field update_rate

				# This has to be sent back to the master
				$spoolwriter{$node}->set_metadata("plugin_rates/$plugin", $1);
			}

			$is_dirtyconfig = 1 if ($line =~ m/^[^.]+\.value /);
		}

		# handle the special case of cli update_rate
		if ($update_rate && ! grep { $_ =~ m/^update_rate (\d+)/ } @$output_rows ) {
			push @$output_rows, "update_rate $update_rate";
		}

		# if dirty config, bypass the "fetch" part
		goto WRITE_SPOOL if $is_dirtyconfig;

		INFO("[$$][$process_name][>][$plugin] asking for data\n");
                DEBUG("[sock][>][$plugin] fetch $plugin\n");
		print $sock "fetch $plugin\n";

		while(my $line = <$sock>) {
			chomp($line);
			DEBUG("[sock][<][$plugin] $line");

			if ($line =~ m/^\./) {
				# Starting with . => end
				last;
			}

			# Save the line
			push @$output_rows, $line;
		}

WRITE_SPOOL:
		# Write the whole load into the spool
		$spoolwriter{$node}->write($when, $plugin, $output_rows);
}

__END__

=head1 NAME

munin-asyncd - A program to spool munin-node calls

=head1 SYNOPSIS

munin-asyncd [options]

 Options:
        --host <hostname:port>  Connect to this munin-node [localhost:4949]
     -s --spooldir <spooldir>   Store the spooled data in this dir [@@SPOOLDIR@@]
     -i --interval <seconds>    Override default interval size of one day [86400]
        --update-rate <seconds> Override default update_rate [300]
        --timeout <seconds>     Wake up at least this number of seconds. [3600]
        --minrate <seconds>     This is the minimal rate you want to poll a node [$update_rate]
                                Note that having $update_rate < $minrate leads to unexpected results.
     -r --retain <count>        Specify number of interval files to retain [7]
     -n --nocleanup             Disable automated spool dir cleanup

        --fork                  Do fork
        --screen                Log to screen instead of syslog
        --debug                 Log debug messages
     -v --verbose               Be verbose
     -h --help                  View this message
