/*
 * Common functions of watchdog drivers that support pretimeout mechanics.
 *
 * This file is licensed under  the terms of the GNU General Public
 * License version 2. This program is licensed "as is" without any
 * warranty of any kind, whether express or implied.
 */

#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/smp.h>
#include <asm/cacheflush.h>
#include <linux/delay.h>
#include <linux/kexec.h>
#include "watchdog_pretimeout.h"

#define PRETIMEOUT_DISABLED 0
#define PRETIMEOUT_HANG 1
#define PRETIMEOUT_PANIC 2
#define PRETIMEOUT_DIRECT_KEXEC 3

/* Action to take on pre-timeout */
int pretimeout_action = PRETIMEOUT_HANG;

bool watchdog_pretimeout_enabled(void)
{
	return (pretimeout_action != PRETIMEOUT_DISABLED);
}
EXPORT_SYMBOL_GPL(watchdog_pretimeout_enabled);

bool watchdog_pretimeout_will_hang(void)
{
	return (pretimeout_action == PRETIMEOUT_HANG);
}
EXPORT_SYMBOL_GPL(watchdog_pretimeout_will_hang);

/* What percent of the watchdog timeout generates a pre-timeout */
int pretimeout_percent = 90;

int watchdog_pretimeout_percent(void)
{
	return pretimeout_percent;
}
EXPORT_SYMBOL_GPL(watchdog_pretimeout_percent);

/* If a pretimeout occurs, this is used to allow only one panic to happen. */
static atomic_t preop_panic_excl = ATOMIC_INIT(-1);

static void pretimeout_hang(void)
{
	for (;;)
		cpu_relax();
}

/* hang function pointer. A default is provided but can be overriden */
void (*pretimeout_hang_fn)(void) = pretimeout_hang;

void watchdog_pretimeout_set_hang_fn(void (*hang_fn)(void))
{
	pretimeout_hang_fn = hang_fn;
}
EXPORT_SYMBOL_GPL(watchdog_pretimeout_set_hang_fn);

/*
 * The per-core pre-watchdog handler, intended to be used as an NMI.
 * Ultimately, we want to get a call stack for each CPU in to the
 * dmesg buffer, send at least a minimal message out serial port,
 * and get in to the kdump crash kernel.
 *
 * Two paths into the crash kernel are:
 * 1. Spin all CPUs until the hardware watchdog resets us and a bootloader
 *    jumps into the crash kernel, or
 * 2. Spin all but one CPU, which will do the crash_kexec.
 *
 * In the case of #1, the most important job of this NMI is to flush all
 * caches so that DRAM is in a coherent state. To that end, this function
 * does many cache flushes as it works.
 *
 * The case of #2 is "cleaner" and does not require coupling between the
 * kernel and the boot loader, but it may be less reliable since we're
 * not resetting nearly as much hardware.
 */
static inline void pretimeout_handle(void)
{
	static DEFINE_SPINLOCK(nmi_dumpstack_lock);
	int cpu = raw_smp_processor_id();

#ifdef CONFIG_AMP_WATCHDOG_HACK
	extern void kexec_on_watchdog_repair_header(void);
	kexec_on_watchdog_repair_header();
#endif

	/* Make DRAM coherent for kexec on watchdog. */
	flush_cache_all();
	mb();

	/*
	 * The remainder of this function tries to collect additional debug.
	 * This may cause some dirty cache lines to be evicted leaving DRAM in
	 * a temporarily incoherent state again. If a watchdog reset happens
	 * during such an incoherent state, the most likely damage is corrupted
	 * information collected after this point. We believe the value of more
	 * debug information outweighs the risk of causing additional damage.
	 */

#ifdef CONFIG_TRACING
	trace_puts("WATCHDOG NMI\n");
#endif
	trace_dump_stack(0);
	/* Push above traces to DRAM in case work below takes too long */
	flush_cache_all();
	mb();

	/* Serialize callstacks in dmesg buffer from other CPUs */
	spin_lock(&nmi_dumpstack_lock);

	/*
	 * Blow through any console output that was interrupted by this NMI.
	 * This is equivalent to bust_spinlocks(1) on ARM, but we have to
	 * directly increment the global instead because bust_spinlocks()
	 * is not exported
	 */
	++oops_in_progress;
	printk(KERN_ERR "WDOG%d Dump:\n", cpu);
	dump_stack();
	spin_unlock(&nmi_dumpstack_lock);
	flush_cache_all();
	mb();

	/*
	 * Get a minimal message out serial. Minimize length of this msg
	 * since it is slow, risking HW reset while writing.
	 */
	printk(KERN_EMERG "WDOG%d\n", cpu);
	flush_cache_all();
	mb();
	if ((pretimeout_action != PRETIMEOUT_HANG)
			&& atomic_inc_and_test(&preop_panic_excl)) {
		/*
		 * While it would seem better to actually coordinate with
		 * other cores, we still want a timeout in case one of them
		 * never responds to the NMI. So, for simplicity, we'll just
		 * wait a hard-coded amount of time and then proceed.
		 */
		mdelay(1000);

		if (pretimeout_action == PRETIMEOUT_DIRECT_KEXEC) {
			/* Returns only if crash kernel not loaded */
			crash_kexec(NULL);
		}
		panic("Watchdog pre-timeout");
	}

	pretimeout_hang_fn();
}

void watchdog_pretimeout_handle(void)
{
	if (!watchdog_pretimeout_enabled())
		return;
	pretimeout_handle();
}
EXPORT_SYMBOL_GPL(watchdog_pretimeout_handle);

module_param(pretimeout_percent, int, 0600);
MODULE_PARM_DESC(pretimeout_percent, "At what percentage of the "
		 "timeout should a pre-watchdog NMI occur 0..100");

module_param(pretimeout_action, int, 0600);
MODULE_PARM_DESC(pretimeout_action, "Action for pretimeout. "
		 "0 = do not register a pretimeout. "
		 "1 = flush caches and hang, waiting for HW reset (default). "
		 "2 = panic. "
		 "3 = kexec directly, skipping much panic code.");
