/*
 * Pegmatite cpuidle driver
 *
 * Copyright (c) 2014 Lexmark International Inc.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <linux/of.h>
#include <linux/io.h>
#include <linux/cpuidle.h>
#include <linux/cpu_pm.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/clockchips.h>
#include <linux/irqchip/arm-gic.h>
#include <linux/tick.h>

#include <asm/cpuidle.h>

#include <memory/mckinley5.h>

int (*mck_sleep_sr)(int c_state, int wake_us);
EXPORT_SYMBOL(mck_sleep_sr);

struct pegmatite_cpuidle {
	struct mckinley_idle *mc;
        int                 c_state_index;
};

extern int pegmatite_cpus_kill(const cpumask_t *cpus);
extern void pegmatite_cpu_suspend(void);
extern void pegmatite_cpus_resume(const cpumask_t *cpus);

static struct pegmatite_cpuidle *cpuidle_data;
static atomic_t irq_barrier;
/* Flag to indicate that a CPU has a pending IRQ even after migrating. */
static bool irq_pending_abort;

static void cpumask_other_cpuidle(struct cpuidle_device *dev, cpumask_t *other_cpus)
{
	cpumask_and(other_cpus, &dev->coupled_cpus, cpu_online_mask);
	cpumask_clear_cpu(smp_processor_id(), other_cpus);
}

static void pegmatite_do_idle(void)
{
	unsigned long flags;

	/* Be sure any pending writes are out. We don't want to get hit by an
	 * asynchronous abort inside the idler. */
	mb();

	/* Be sure to keep all interrupts (IRQ & FIQ) out of the idler */
	local_irq_save(flags);
	local_fiq_disable();

        if (mck_sleep_sr)
        {
            mck_sleep_sr(cpuidle_data->c_state_index,ktime_to_us(tick_nohz_get_sleep_length())); 
        }
        else
        {
            mckinley_do_idle(cpuidle_data->mc);
        }

	/* Restore takes care of both the state of IRQ and FIQ. Do not
	 * unconditionally enable FIQ, as it might have already been masked! */
	local_irq_restore(flags);
}

/*
 * Shut down auxiliary cores, idle, boot auxiliary cores
 */
static void pegmatite_cpu0_do_lowpower(struct cpuidle_device *dev,
				       struct cpuidle_driver *drv)
{
	cpumask_t aux_cpus;

	cpumask_other_cpuidle(dev, &aux_cpus);

	pegmatite_cpus_kill(&aux_cpus);
	pegmatite_do_idle();
	pegmatite_cpus_resume(&aux_cpus);
}

static int pegmatite_enter_powerdown(struct cpuidle_device *dev,
				     struct cpuidle_driver *drv,
				     int index)
{
	/* CPU 0 is arbitrarily chosen to remain powered */
	bool powered_cpu = dev->cpu == 0;

        if (dev->cpu == 0) {
            cpuidle_data->c_state_index = index;
        }
	/* Fast path when only one CPU is online */
	if (num_online_cpus() == 1) {
		pegmatite_do_idle();
		return index;
	}

	if (!powered_cpu)
		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);

	local_fiq_disable();

	if (powered_cpu) {
		/* 1. Save GIC dist state before subordinates migrate */
		irq_pending_abort = 0;
		cpu_cluster_pm_enter();
		cpuidle_coupled_parallel_barrier(dev, &irq_barrier);

		/* 2. Auxiliaries migrate irqs */
		cpuidle_coupled_parallel_barrier(dev, &irq_barrier);

		/* 3. Sleep (or die) then restore GIC state */
		if (!irq_pending_abort)
			pegmatite_cpu0_do_lowpower(dev, drv);

		/* 4. Restore GIC distributor state. */
		cpu_cluster_pm_exit();

		/* 5. Signal GIC distributor state is restored. */
		cpuidle_coupled_parallel_barrier(dev, &irq_barrier);
	} else {
		/* 1. Save GIC dist state before subordinates migrate */
		cpuidle_coupled_parallel_barrier(dev, &irq_barrier);

		/* 2. Auxiliaries migrate irqs */
		cpu_pm_enter();

		/*
		 * This guarantees the upcoming WFI won't return by masking
		 * events at the GIC. However, if this is necesary, that
		 * indicates that we would lose an interrupt by powering off
		 * the CPU. By not masking, CPU0 may hang waiting for this CPU
		 * to go into idle. This is a more obvious failure than a
		 * missed interrupt.
		 */
		/* gic_cpu_if_down(); */

		gic_migrate_affinity(gic_get_cpu_id(0));
		/*
		 * If any dying CPUs still have pending interrupts, no CPUs
		 * will suspend. This could be true in the case of a pending SGI.
		 * Skip the suspend but still run the cleanup.
		 */
		if (gic_irq_pending())
			irq_pending_abort = 1;
		cpuidle_coupled_parallel_barrier(dev, &irq_barrier);

		/* 3. Sleep (or die) then restore GIC state */
		if (!irq_pending_abort)
			pegmatite_cpu_suspend();

		/* 4. Wait for GIC distributor state to be restored. */
		cpuidle_coupled_parallel_barrier(dev, &irq_barrier);

		/* 5. Now it is safe to restore CPU state. */
		cpu_pm_exit();
	}

	/* Make sure the GIC state is restored before enabling IRQs */
	cpuidle_coupled_parallel_barrier(dev, &irq_barrier);

	local_fiq_enable();

	if (!powered_cpu)
		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);

	return index;
}

static int pegmatite_enter_self_refresh(struct cpuidle_device *dev,
					struct cpuidle_driver *drv,
					int index)
{
	cpumask_t other_cpus;

	cpumask_other_cpuidle(dev, &other_cpus);
	cpuidle_coupled_parallel_barrier(dev, &irq_barrier);

        if (dev->cpu == 0) {
            cpuidle_data->c_state_index = index;
        }
	/*
	 * Auxiliary cores race to get to WFI here. This is a reasonable
	 * approach because there's no consequence besides higher power
	 * consumption if another CPU accesses memory after CPU0 enters
	 * self-refresh. The alternative would be to migrate interrupts away
	 * from the cores and poll hardware to make sure they're in WFI before
	 * attempting self-refresh, but the migration is costly enough that it
	 * would measureably impact the overall efficiency.
	 *
	 * This mode is fast, but not guaranteed to be successful.
	 * TODO: make this a non-coupled state and do self-refresh on the last
	 *       CPU to go down
	 */
	if (dev->cpu == 0) {
		pegmatite_do_idle();
	} else {
		wfi();
	}
	arch_send_wakeup_ipi_mask(&other_cpus);

	return index;
}

static struct cpuidle_driver pegmatite_idle_driver = {
	.name       = "pegmatite_idle",
	.owner      = THIS_MODULE,
	.states = {
		[0] = ARM_CPUIDLE_WFI_STATE_PWR(750),
		[1] = {
			.enter            = pegmatite_enter_self_refresh,
			.exit_latency     = 20,
			.power_usage      = 150,
			.target_residency = 47, /* Break-even with WFI: 750t = 1400*20 + 150t; t = 46.6 */
			.flags            = CPUIDLE_FLAG_TIME_VALID |
			CPUIDLE_FLAG_COUPLED,
			.name             = "SR",
			.desc             = "DRAM self-refresh",
		},
		[2] = {
			.enter            = pegmatite_enter_powerdown,
			.exit_latency     = 350,
			.power_usage      = 0,
			.target_residency = 653, /* Experimental */
			.flags            = CPUIDLE_FLAG_TIME_VALID |
			CPUIDLE_FLAG_COUPLED,
			.name             = "PD-SR",
			.desc             = "CPU 1-3 off, DRAM self-refresh",
		},
                [3] = {
                        .enter            = pegmatite_enter_powerdown,
                        .exit_latency     = 2000,
                        .power_usage      = 0,     /* doesn't seem to care about power, just assumes each index is BETTER if latency and residency req is met. */
                        .target_residency = 30000, /* Experimental - need to measure, desire to keep exit latency < 10% of residency?? */
                        .flags            = CPUIDLE_FLAG_TIME_VALID |
                        CPUIDLE_FLAG_COUPLED,
                        .name             = "PDall-SR",
                        .desc             = "CPU 0-3 off, DRAM self-refresh",
                },
	},
	.state_count = 4,
	.safe_state_index = 0,
};

static int pegmatite_cpuidle_probe(struct platform_device *pdev)
{
	struct device_node *np = pdev->dev.of_node;
	struct pegmatite_cpuidle *data;
	int ret;

	/* Singleton for the physical sram address */
	BUG_ON(cpuidle_data);

	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
	if (!data)
		return -ENOMEM;

	data->mc = of_get_mckinley_idle(np, "memory-controller", 0);
	if (IS_ERR(data->mc))
		return PTR_ERR(data->mc);

	platform_set_drvdata(pdev, data);
	cpuidle_data = data;

	ret = cpuidle_register(&pegmatite_idle_driver, cpu_possible_mask);
	if (ret) {
		dev_err(&pdev->dev, "failed to register cpuidle driver\n");
		goto err;
	}

	return 0;
err:
	cpuidle_data = NULL;
	mckinley_idle_put(data->mc);
	return ret;
}

static const struct of_device_id of_cpuidle_table[] = {
	{ .compatible = "marvell,pegmatite-cpuidle" },
	{}
};

static struct platform_driver pegmatite_cpuidle_driver = {
	.probe  = pegmatite_cpuidle_probe,
	.driver = {
		.name = "pegmatite_cpuidle",
		.owner = THIS_MODULE,
		.of_match_table = of_cpuidle_table,
	},
};

module_platform_driver(pegmatite_cpuidle_driver);
