/*
 * AURORA AU-G0603 SPI controller driver
 *
 * Copyright (C) 2010-2012 Pixelworks, Inc.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include <linux/version.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
#include <linux/interrupt.h>
#include <linux/completion.h>
#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/dmaengine.h>
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/clk.h>
#include <linux/platform_device.h>
#include <linux/of_platform.h>
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/timer.h>
#include <linux/bitops.h>
#include <linux/pm_runtime.h>

#include <linux/spi/spi.h>

#include <asm/io.h>
#include <asm/dma.h>

#include <mach/hardware.h>
#include <mach/platform.h>

#define SPI_USE_QUEUE

/*** hardware registers *************/
#define SPI_CR		0x00
#define   SPI_CR_EN		BIT(31)
#define   SPI_CR_RT_SHIFT	26
#define   SPI_CR_RT_MASK	(7 << 26)
#define   SPI_CR_MD		BIT(24)
#define   SPI_CR_PO		BIT(21)
#define   SPI_CR_PH		BIT(20)
#define   SPI_CR_CI		BIT(7)
#define   SPI_CR_SO		BIT(6)
#define SPI_TC		0x04
#define   SPI_TC_IT_SHIFT	20
#define   SPI_TC_IT_MASK	(31 << 20)
#define   SPI_TC_DE		BIT(19)
#define   SPI_TC_DT_SHIFT	14
#define   SPI_TC_DT_MASK	(31 << 14)
#define   SPI_TC_MSF		BIT(12)
#define   SPI_TC_SZ_SHIFT	8
#define   SPI_TC_SZ_MASK	(3 << 8)
#define   SPI_TC_LEN_SHIFT	0
#define   SPI_TC_LEN_MASK	0x3f
#define SPI_RC		0x08
#define   SPI_RC_RT_SHIFT	26
#define   SPI_RC_RT_MASK	(3 << 26)
#define   SPI_RC_IT_SHIFT	20
#define   SPI_RC_IT_MASK	(31 << 20)
#define   SPI_RC_DE		BIT(19)
#define   SPI_RC_DT_SHIFT	14
#define   SPI_RC_DT_MASK	(31 << 14)
#define   SPI_RC_MSF		BIT(12)
#define   SPI_RC_SZ_SHIFT	8
#define   SPI_RC_SZ_MASK	(3 << 8)
#define   SPI_RC_LEN_SHIFT	0
#define   SPI_RC_LEN_MASK	0x3f
#define SPI_IEN		0x0c
#define   SPI_IEN_TD		BIT(10)
#define   SPI_IEN_TE		BIT(6)
#define   SPI_IEN_RF		BIT(5)
#define   SPI_IEN_TF		BIT(2)
#define   SPI_IEN_RE		BIT(1)
#define   SPI_IEN_DN		BIT(0)
#define SPI_ST		0x10
#define   SPI_ST_TD		BIT(10)
#define   SPI_ST_TE		BIT(6)
#define   SPI_ST_RF		BIT(5)
#define   SPI_ST_TF		BIT(2)
#define   SPI_ST_RE		BIT(1)
#define   SPI_ST_DN		BIT(0)
#define SPI_ICLR	0x14
#define   SPI_ICLR_TF		BIT(2)
#define   SPI_ICLR_RE		BIT(1)
#define   SPI_ICLR_DN		BIT(0)
#define SPI_SS		0x18
#define SPI_CT		0x1c
#define SPI_TCD		0x20
#define SPI_RCD		0x24
#define SPI_SSD		0x28
#define SPI_TDC		0x30
#define   SPI_TDC_INIT1X_SHIFT	24
#define   SPI_TDC_INIT1X_MASK	(0x1f << 24)
#define SPI_RDC		0x34
#define   SPI_RDC_DROP_SHIFT	24
#define   SPI_RDC_DROP_MASK	(0x1f << 24)
#define SPI_TDF		0x38
#define SPI_RDF		0x3c

/****************/

#define SPI_FIFO_SIZE		128
#define SPI_USE_DMA_THRESHOLD	SPI_FIFO_SIZE
#define SPI_USE_DMA_MAX		0xffc		// hw limit, broken LLI handling
#define SPI_TC_IT		31
#define SPI_RC_IT		31


#define DEBUG 1
#ifdef DEBUG
static int spi_debug = 1;
module_param_named(debug, spi_debug, int, 0644);
MODULE_PARM_DESC(debug, "debug level (0 off, 1 errors, 2 trace, 3 full, 4 too much)");

#define SPI_PRINTK(level, fmt, ...) do { \
	if (spi_debug >= level) \
		printk(KERN_DEBUG "PWSPI: %s:%d: "fmt, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
	} while (0)
#else
#define SPI_PRINTK(...) do { } while (0)
#endif

static int spi_dma = 1;
module_param_named(dma, spi_dma, int, 0644);
MODULE_PARM_DESC(dma, "enable SPI DMA transfers");

/* spi clock period = 2 * HPER * main system bus cycle time. In topazeh
 * customer required 192MHz / 2 / 2 = 48MHz */
static int clock_div_min = 2;
module_param_named(clock_div, clock_div_min, int, 0664);
MODULE_PARM_DESC(clock_div, "minimum clock divider, >= 2");


// for regression testing, allow to override the mode and chip select
static int spi_force_mode = -1;
static int spi_force_cs = -1;
module_param_named(force_mode, spi_force_mode, int, 0664);
module_param_named(force_cs, spi_force_cs, int, 0664);
MODULE_PARM_DESC(force_mode, "override SPI mode (0, 3) for testing");
MODULE_PARM_DESC(force_cs, "override chip select (0...3) for testing");

/* SPI clock internal enable. In topazeh
 * 1: internal, 0: external */
static int clock_internal_in;
module_param_named(clock_in_en, clock_internal_in, int, 0664);
MODULE_PARM_DESC(clock_in_en, "spi internal clock, 1 or 0");

struct aug0603_spi {
	struct device		*dev;
	struct platform_device	*pdev;
	struct spi_master	*master;
	struct resource		*ioarea;
	void __iomem		*regs;
	int			irq;
	struct clk		*clk;

	struct spi_message	*current_msg;
	unsigned int		current_msg_len;
	struct completion	msg_done;

	struct spi_transfer	*cur_tx_transfer;
	struct spi_transfer	*cur_rx_transfer;
	unsigned int		txp;		// TX buf pointer cur xfer
	unsigned int		rxp;		// RX buf pointer cur xfer
	unsigned int		tx_msg_count;	// bytes sent cur msg
	unsigned int		rx_msg_count;	// bytes read cur msg
	unsigned int		tx_msg_len;	// TX len cur msg
	unsigned int		rx_msg_len;	// RX len cur msg
	unsigned int		cur_clock_div;

	struct sg_table		tx_sgt;
	struct sg_table		rx_sgt;
	struct dma_chan		*dma_rx_channel;
	struct dma_chan		*dma_tx_channel;
	dma_cookie_t		tx_cookie;
	dma_cookie_t		rx_cookie;
	u8			*residue_ptr;
	unsigned int		dma_running : 1;
	unsigned int		dma_is_tx : 1;
	unsigned int		residue : 2;
};


static void aug0603_message_done(struct aug0603_spi	*aug0603,
				 int	status);


static inline u32 aug0603_read_reg(struct aug0603_spi *aug0603, u32 reg)
{
	u32 val = readl(aug0603->regs + reg);
	return val;
}

static inline void aug0603_write_reg(struct aug0603_spi *aug0603, u32 reg, u32 val)
{
	writel(val, aug0603->regs + reg);
}

static inline void aug0603_change_reg(struct aug0603_spi *aug0603,
				      u32 reg, u32 clear, u32 set)
{
	u32 val = aug0603_read_reg(aug0603, reg);
	aug0603_write_reg(aug0603, reg, (val & ~clear) | set);
}

static void aug0603_print_spi_message(const struct spi_message *msg, bool rx)
{
#ifdef DEBUG
	struct spi_transfer *xfer;
	unsigned int count = 0, i, l;
	char buf[64];

	if (spi_debug < 3)
		return;

	printk("=============== SPI %s\n", rx ? "RX" : "TX");
	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
		printk("    xfer: #%d @%p, length: %u, tx_buf %p, rx_buf %p\n",
		       count++, xfer, xfer->len, xfer->tx_buf, xfer->rx_buf);

		l = min(xfer->len, 16U);
		if (!rx && l && xfer->tx_buf) {
			for (i = 0; i < l; i++)
				sprintf(buf + 3 * i, "%02x ", ((u8*)xfer->tx_buf)[i]);
			buf[3 * i - 1] = '\0';
			printk("	TX: %s%s\n", buf, xfer->len > l ? "..." : "");
		}
		if (rx && l && xfer->rx_buf) {
			for (i = 0; i < l; i++)
				sprintf(buf + 3 * i, "%02x ", ((u8*)xfer->rx_buf)[i]);
			buf[3 * i - 1] = '\0';
			printk("	RX: %s%s\n", buf, xfer->len > l ? "..." : "");
		}
	}
#endif
}


static void aug0603_dma_unmap(struct aug0603_spi *aug0603,
			      struct spi_message *msg)
{
	if (aug0603->tx_sgt.sgl) {
		dma_unmap_sg(aug0603->dev, aug0603->tx_sgt.sgl,
			     aug0603->tx_sgt.nents, DMA_TO_DEVICE);
		sg_free_table(&aug0603->tx_sgt);
	}
	if (aug0603->rx_sgt.sgl) {
		dma_unmap_sg(aug0603->dev, aug0603->rx_sgt.sgl,
			     aug0603->rx_sgt.nents, DMA_FROM_DEVICE);
		sg_free_table(&aug0603->rx_sgt);
	}
}

static void aug0603_dma_done(void *data)
{
	struct aug0603_spi *aug0603 = data;
	struct spi_message *msg;
	u32 rx_val, byte;
	struct dma_tx_state state;
	enum dma_status status;
	int i;

	SPI_PRINTK(3, "\n");

	msg = aug0603->current_msg;
	BUG_ON(!msg);

	if (aug0603->dma_is_tx)
		status = dmaengine_tx_status(aug0603->dma_tx_channel,
					     aug0603->tx_cookie, &state);
	else
		status = dmaengine_tx_status(aug0603->dma_rx_channel,
					     aug0603->rx_cookie, &state);
	if (status != DMA_COMPLETE) {
		dev_err(aug0603->dev, "DMA status != COMPLETE %d\n", status);
		aug0603_message_done(aug0603, EIO);
		return;
	}

	if (aug0603->residue) {
		while (!(aug0603_read_reg(aug0603, SPI_ST) & SPI_ST_RF))
			;
		rx_val = aug0603_read_reg(aug0603, SPI_RDF);
		SPI_PRINTK(5, "RDF = %08x\n", rx_val);
		for (i = 0; i< aug0603->residue; i++) {
			byte = rx_val & 0xff;
			rx_val >>= 8;
			SPI_PRINTK(5, "RX %p = %02x\n", aug0603->residue_ptr, byte);
			*aug0603->residue_ptr++ = byte;
		}
	}
	aug0603_dma_unmap(aug0603, msg);
	aug0603->dma_running = 0;
	msg->actual_length = aug0603->current_msg_len;
	aug0603_message_done(aug0603, 0);
}

static int aug0603_do_next_message_dma(struct aug0603_spi *aug0603,
				       struct spi_message *msg)
{
	struct dma_slave_config rx_conf = {
		.src_addr = aug0603->ioarea->start + SPI_RDF, // 0x04000000
		.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
		.src_maxburst = 16,
		.direction = DMA_DEV_TO_MEM,
		.device_fc = false,
	};
	struct dma_slave_config tx_conf = {
		.dst_addr = aug0603->ioarea->start + SPI_TDF, // 0x04000000
		.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
		.dst_maxburst = 16,
		.direction = DMA_MEM_TO_DEV,
		.device_fc = false,
	};
	struct dma_async_tx_descriptor *rxdesc = NULL;
	struct dma_async_tx_descriptor *txdesc = NULL;
	struct spi_transfer *xfer, *tx = NULL;
	unsigned long flags;
	unsigned int tcount, bcount, tx_val;
	int err = 0, n_xfers = 0, rx_len = 0, tx_len = 0, n_rx = 0, n_tx = 0;
	int init1x_len = 0, drop_len = 0;
	int tx_dma = 0, rx_dma = 0, io_mode = 0;
	u8 *p = NULL;
	int cs;

	SPI_PRINTK(3, "\n");

	// FIXME
	if (msg->is_dma_mapped) {
		SPI_PRINTK(1, "ERROR: msg->is_dma_mapped is not supported\n");
		err = -EINVAL;
		goto out;
	}
	dmaengine_slave_config(aug0603->dma_rx_channel, &rx_conf);
	dmaengine_slave_config(aug0603->dma_tx_channel, &tx_conf);
	aug0603->residue = 0;
	aug0603->residue_ptr = NULL;

	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
		SPI_PRINTK(3, "xfer %d @%p len %d tx %p rx %p\n", n_xfers,
			   xfer, xfer->len, xfer->tx_buf, xfer->rx_buf);

		n_xfers++;
		aug0603->current_msg_len += xfer->len;
		if (xfer->tx_buf) {
			tx_len = aug0603->current_msg_len;
			n_tx++;
		}
		if (xfer->rx_buf) {
			rx_len = aug0603->current_msg_len;
			n_rx++;
		}
		// this doesn't handle any error cases, only supports
		// messages used by m25p80 flash driver
		if (xfer->rx_nbits == SPI_NBITS_QUAD)
			io_mode = 4;
		else if (xfer->rx_nbits == SPI_NBITS_DUAL)
			io_mode = 2;
	}
	SPI_PRINTK(2, "%d xfers, len %d, tx_len %d, rx_len %d, io_mode %u\n",
		   n_xfers, aug0603->current_msg_len, tx_len, rx_len, io_mode);

	tx = list_entry(msg->transfers.next, struct spi_transfer, transfer_list);
	if (io_mode) {
		init1x_len = tx->len;
		if (!tx->rx_buf)
			drop_len = tx->len;
		aug0603_change_reg(aug0603, SPI_CR,
				   SPI_CR_RT_MASK, io_mode << SPI_CR_RT_SHIFT);
	}

	if (tx_len > SPI_USE_DMA_THRESHOLD) {
		tx_dma = 1;
		err = sg_alloc_table(&aug0603->tx_sgt, n_tx, GFP_KERNEL);
		if (err)
			goto out;
		// note the round_up() only works for the last sg element,
		// aug0603_check_dma catches this
		n_xfers = 0;
		list_for_each_entry(xfer, &msg->transfers, transfer_list) {
			if (!xfer->tx_buf)
				continue;
			sg_set_buf(&aug0603->tx_sgt.sgl[n_xfers],
				   xfer->tx_buf, round_up(xfer->len, 4));
			n_xfers++;
		}
		err = dma_map_sg(aug0603->dev, aug0603->tx_sgt.sgl,
				 aug0603->tx_sgt.nents, DMA_TO_DEVICE);
		if (err != aug0603->tx_sgt.nents) {
			err = -ENOMEM;
			goto out;
		}
		txdesc = dmaengine_prep_slave_sg(aug0603->dma_tx_channel,
						 aug0603->tx_sgt.sgl,
						 aug0603->tx_sgt.nents,
						 DMA_MEM_TO_DEV,
						 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
	}

	if (rx_len > SPI_USE_DMA_THRESHOLD) {
		rx_dma = 1;
		err = sg_alloc_table(&aug0603->rx_sgt, n_rx, GFP_KERNEL);
		if (err)
			goto out;
		// note the round_down() only works for the last sg element,
		// aug0603_check_dma catches this
		n_xfers = 0;
		list_for_each_entry(xfer, &msg->transfers, transfer_list) {
			if (!xfer->rx_buf)
				continue;
			sg_set_buf(&aug0603->rx_sgt.sgl[n_xfers],
				   xfer->rx_buf, round_down(xfer->len, 4));
			p = xfer->rx_buf;
			n_xfers++;
		}
		err = dma_map_sg(aug0603->dev, aug0603->rx_sgt.sgl,
				 aug0603->rx_sgt.nents, DMA_FROM_DEVICE);
		if (err != aug0603->rx_sgt.nents) {
			err = -ENOMEM;
			goto out;
		}
		rxdesc = dmaengine_prep_slave_sg(aug0603->dma_rx_channel,
						 aug0603->rx_sgt.sgl,
						 aug0603->rx_sgt.nents,
						 DMA_DEV_TO_MEM,
						 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
		aug0603->residue = (rx_len - drop_len) & 3;
		aug0603->residue_ptr = p + round_down(rx_len - drop_len, 4);
	}

	if (rx_dma) {
		aug0603->dma_is_tx = 0;
		rxdesc->callback = aug0603_dma_done;
		rxdesc->callback_param = aug0603;
	} else {
		aug0603->dma_is_tx = 1;
		txdesc->callback = aug0603_dma_done;
		txdesc->callback_param = aug0603;
	}

	if (tx_dma)
		aug0603_change_reg(aug0603, SPI_TC, SPI_TC_DT_MASK,
				   (16 << SPI_TC_DT_SHIFT) | SPI_TC_DE);
	else
		aug0603_change_reg(aug0603, SPI_TC, SPI_TC_DE, 0);
	if (rx_dma)
		aug0603_change_reg(aug0603, SPI_RC, SPI_RC_DT_MASK,
				   (16 << SPI_RC_DT_SHIFT) | SPI_RC_DE);
	else
		aug0603_change_reg(aug0603, SPI_RC, SPI_RC_DE, 0);

	SPI_PRINTK(3, "INIT1X = %d TDC = %d\n", init1x_len, tx_len);
	aug0603_write_reg(aug0603, SPI_TDC, tx_len | (init1x_len << SPI_TDC_INIT1X_SHIFT));

	SPI_PRINTK(3, "DROP = %d RDC = %d\n", drop_len, rx_len);
	aug0603_write_reg(aug0603, SPI_RDC, rx_len | (drop_len << SPI_RDC_DROP_SHIFT));

	cs = msg->spi->chip_select;
	if (spi_force_cs != -1)
		cs = spi_force_cs;
	aug0603_write_reg(aug0603, SPI_SS, 1 << cs);

	// If RDC != 0, after EN=1 core starts sending dummy bytes;
	// setting RCD to a high value so we have time to fill the TX FIFO
	// would work, but it is difficult to find a good value that ensures
	// data integrity while not killing performance.
	// A better workaround is to set RT to the _invalid_ value 3, fill
	// the TX FIFO and set RT back to 2 (rt=3 stops the SPI clock when
	// the TX FIFO is empty).
	// (Note that EN=0 resets the FIFOs so they cannot be filled first.)
	aug0603_change_reg(aug0603, SPI_RC, 0, 3 << SPI_RC_RT_SHIFT);

	// disable IRQs, only need dma_8ch callback
	aug0603_write_reg(aug0603, SPI_IEN, 0);

	aug0603->dma_running = 1;
	if (tx_dma) {
		SPI_PRINTK(3, "======> DMA MEM2PER\n");
		aug0603->tx_cookie = dmaengine_submit(txdesc);
		dma_async_issue_pending(aug0603->dma_tx_channel);
	}
	if (rx_dma) {
		SPI_PRINTK(3, "======> DMA PER2MEM\n");
		aug0603->rx_cookie = dmaengine_submit(rxdesc);
		dma_async_issue_pending(aug0603->dma_rx_channel);
	}

	local_irq_save(flags);

	// Enable SPI Master
	aug0603_change_reg(aug0603, SPI_CR, 0, SPI_CR_EN);

	if (!tx_dma) {
		// fill FIFO
		SPI_PRINTK(3, "======> FILL TX FIFO\n");
		bcount = tx_val = 0;
		list_for_each_entry(xfer, &msg->transfers, transfer_list) {
			if (!xfer->tx_buf)
				continue;
			for (tcount = 0; tcount < tx->len; tcount++) {
				tx_val |= ((u8 *)tx->tx_buf)[tcount] << (8 * bcount++);
				if (bcount == 4) {
					aug0603_write_reg(aug0603, SPI_TDF, tx_val);
					SPI_PRINTK(4, "TDF = %08x\n", tx_val);
					bcount = tx_val = 0;
				}
			}
		}
		if (bcount) {
			aug0603_write_reg(aug0603, SPI_TDF, tx_val);
			SPI_PRINTK(4, "TDF = %08x\n", tx_val);
		}
	}

	// Woraround, see above
	aug0603_change_reg(aug0603, SPI_RC, SPI_RC_RT_MASK, 2 << SPI_RC_RT_SHIFT);

	local_irq_restore(flags);
	return 0;

out:
	aug0603_dma_unmap(aug0603, msg);
	aug0603_message_done(aug0603, err);
	dev_err(aug0603->dev, "can't start DMA: %d\n", err);
	return err;
}

static int aug0603_do_next_message_cpu(struct aug0603_spi *aug0603,
				       struct spi_message *msg)
{
	struct spi_transfer *xfer, *tx;
	unsigned long flags;
	unsigned int txp, bytecnt;
	unsigned int tx_val, tdc, rdc;
	unsigned int io_mode = 0;
	int init1x_len = 0;
	int tx_count = 0;
	int err = 0;
	int cs;
	u8 byte;

	SPI_PRINTK(3, "\n");

	// count number of bytes to xfer
	aug0603->tx_msg_len = 0;
	aug0603->rx_msg_len = 0;
	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
		SPI_PRINTK(3, "xfer @%p len %d tx %p rx %p\n",
			   xfer, xfer->len, xfer->tx_buf, xfer->rx_buf);
		aug0603->current_msg_len += xfer->len;
		if (xfer->tx_buf)
			aug0603->tx_msg_len = aug0603->current_msg_len;
		if (xfer->rx_buf)
			aug0603->rx_msg_len = aug0603->current_msg_len;
		// this doesn't handle any error cases, only supports
		// messages used by m25p80 flash driver
		if (xfer->rx_nbits == SPI_NBITS_QUAD)
			io_mode = 4;
		else if (xfer->rx_nbits == SPI_NBITS_DUAL)
			io_mode = 2;
	}
	SPI_PRINTK(2, "cur len %d, tx len %d, rx len %d, io mode %u\n",
		   aug0603->current_msg_len,
		   aug0603->tx_msg_len, aug0603->rx_msg_len, io_mode);

	tx = list_entry(msg->transfers.next, struct spi_transfer, transfer_list);
	if (io_mode) {
		init1x_len = tx->len;
		aug0603_change_reg(aug0603, SPI_CR,
				   SPI_CR_RT_MASK, io_mode << SPI_CR_RT_SHIFT);
	}

	// make sure DMA is disabled
	aug0603_change_reg(aug0603, SPI_TC, SPI_TC_DT_MASK | SPI_TC_DE, 0);
	aug0603_change_reg(aug0603, SPI_RC, SPI_RC_DT_MASK | SPI_RC_DE, 0);

	cs = msg->spi->chip_select;
	if (spi_force_cs != -1)
		cs = spi_force_cs;
	aug0603_write_reg(aug0603, SPI_SS, 1 << cs);

	tdc = aug0603->tx_msg_len;
	rdc = aug0603->rx_msg_len;
	SPI_PRINTK(3, "TDC %d, RDC %d\n", tdc, rdc);

	aug0603_write_reg(aug0603, SPI_TDC, tdc | (init1x_len << SPI_TDC_INIT1X_SHIFT));
	aug0603_write_reg(aug0603, SPI_RDC, rdc);

	// If RDC != 0, after EN=1 core starts sending dummy bytes;
	// setting RCD to a high value so we have time to fill the TX FIFO
	// would work, but it is difficult to find a good value that ensures
	// data integrity while not killing performance.
	// A better workaround is to set RT to the _invalid_ value 3, fill
	// the TX FIFO and set RT back to 2 (rt=3 stops the SPI clock when
	// the TX FIFO is empty).
	// (Note that EN=0 resets the FIFOs so they cannot be filled first.)
	aug0603_change_reg(aug0603, SPI_RC, 0, 3 << SPI_RC_RT_SHIFT);

	// prepare RX
	aug0603->cur_rx_transfer = list_entry(msg->transfers.next, struct spi_transfer, transfer_list);
	aug0603->rxp = 0;
	aug0603->rx_msg_count = 0;

	local_irq_save(flags);

	aug0603_write_reg(aug0603, SPI_ICLR, ~0);
	aug0603_write_reg(aug0603, SPI_IEN, (rdc ? SPI_IEN_RF : 0) | (tdc ? SPI_IEN_TE : 0));
	aug0603_change_reg(aug0603, SPI_CR, 0, SPI_CR_EN);

	// prime TX FIFO
	txp = bytecnt = tx_val = 0;
	while (tx_count < aug0603->tx_msg_len && tx_count < SPI_FIFO_SIZE) {
		if (txp >= tx->len) {
			if (list_is_last(&tx->transfer_list, &msg->transfers)) {
				SPI_PRINTK(1, "internal error\n");
				err = -EINVAL;
				goto out;
			}
			tx = list_entry(tx->transfer_list.next, struct spi_transfer, transfer_list);
			txp = 0;
		}
		if (tx->tx_buf)
			byte = ((u8 *)tx->tx_buf)[txp];
		else
			byte = 0; // send dummy bytes
		tx_val |= byte << (8 * bytecnt);
		SPI_PRINTK(5, "TX %p[%u] = %02x\n", tx->tx_buf, txp, byte);
		txp++;

		if (++bytecnt == 4) {
			aug0603_write_reg(aug0603, SPI_TDF, tx_val);
			SPI_PRINTK(5, "TDF = %08x\n", tx_val);
			bytecnt = tx_val = 0;
		}
		tx_count++;
	}
	if (bytecnt) {
		aug0603_write_reg(aug0603, SPI_TDF, tx_val);
		SPI_PRINTK(4, "TDF = %08x\n", tx_val);
	}
	aug0603->cur_tx_transfer = tx;
	aug0603->txp = txp;
	aug0603->tx_msg_count = tx_count;

	// hw bug workaround, rt=2 allows RX when TX FIFO empty
	if (aug0603->tx_msg_count >= aug0603->tx_msg_len)
		aug0603_change_reg(aug0603, SPI_RC, SPI_RC_RT_MASK, 2 << SPI_RC_RT_SHIFT);

	local_irq_restore(flags);
	return 0;

out:
	aug0603_message_done(aug0603, err);
	dev_err(aug0603->dev, "can't start transfer: %d\n", err);
	return err;
}

static void aug0603_set_clock(struct aug0603_spi *aug0603, u32 speed_hz)
{
	u32 clock_div;

	if (clock_div_min < 2)
		clock_div_min = 2;
	clock_div = (clk_get_rate(aug0603->clk) + speed_hz - 1) / speed_hz / 2;
	if (clock_div < clock_div_min)
		clock_div = clock_div_min;
	aug0603->cur_clock_div = clock_div;
	SPI_PRINTK(3, "Configure SPI clock: HPER = %d\n", clock_div);
	aug0603_write_reg(aug0603, SPI_CT, clock_div);
}

/* If addresses or lengths are not 32bit aligned, dmaengine code
 * will do byte transfers for initial/remaining bytes.  However,
 * the transfer width is fixed in au-g0603 for the entire transfer,
 * thus it can't work.
 * For simplicitly, we only handle these cases with DMA:
 * 1. all transfer start addresses must be 32bit aligned
 * 2. length of the last transfer can be unaligned
 *    a) on TX, DMA reads up to 3 bytes past the end of the buffer
 *       into the SPI TX FIFO, but SPI core will ignore these bytes
 *    b) on RX, the last 1...3 bytes will be read by CPU from RX FIFO
 * 3. transfers shorter than the FIFO size are handled by CPU for effeciency
 * 4. as a special case, the hw can drop intial RX dummy bytes for the
 *    common case of [tx command, rx data] messages, thus we can use DMA
 */
static bool aug0603_check_dma(struct aug0603_spi *aug0603,
			      struct spi_message *msg)
{
	unsigned int prev_rx_len = 0, prev_tx_len = 0, total_len = 0;
	struct spi_transfer *xfer;

	if (!spi_dma || !aug0603->dma_rx_channel || !aug0603->dma_tx_channel)
		return false;
	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
		if (((unsigned long)xfer->tx_buf & 3)
		    || ((unsigned long)xfer->rx_buf & 3))
			return false;
		if (xfer->rx_buf && (prev_rx_len & 3))
			return false;
		if (xfer->tx_buf && (prev_tx_len & 3))
			return false;
		if (xfer->rx_buf)
			prev_rx_len = xfer->len;
		if (xfer->tx_buf)
			prev_tx_len = xfer->len;
		total_len += xfer->len;
	}
	if (total_len <= SPI_USE_DMA_THRESHOLD)
		return false;
	if (total_len > (4 * SPI_USE_DMA_MAX))
		return false;
	SPI_PRINTK(4, "Using DMA, total len %u\n", total_len);
	return true;
}

static int aug0603_do_transfer(struct aug0603_spi *aug0603,
			       struct spi_message *msg)
{
	struct spi_device *spi = msg->spi;
	struct spi_transfer *xfer;
	int err = 0;
	u32 speed_hz, bpw, sz;
	u32 cr, tc, rc;

	SPI_PRINTK(2, "\n");
	msg->status = -EINPROGRESS;
	msg->actual_length = 0;

	aug0603_print_spi_message(msg, false);

	aug0603->current_msg = msg;
	aug0603->current_msg_len = 0;

	// FIXME: we use speed_hz and bpw from first xfer only
	xfer = list_entry(msg->transfers.next, struct spi_transfer, transfer_list);
	if (xfer->speed_hz)
		speed_hz = min(xfer->speed_hz, msg->spi->max_speed_hz);
	else
		speed_hz = msg->spi->max_speed_hz;
	aug0603_set_clock(aug0603, speed_hz);

	if (xfer->bits_per_word)
		bpw = xfer->bits_per_word;
	else
		bpw = msg->spi->bits_per_word;
	sz = bpw >> 4;

	cr = (spi->mode & SPI_CPOL ? SPI_CR_PO : 0) |
		(spi->mode & SPI_CPHA ? SPI_CR_PH : 0) |
		(clock_internal_in ? SPI_CR_CI : 0) |
		SPI_CR_SO | SPI_CR_MD;
	tc = SPI_TC_MSF | (SPI_TC_IT << SPI_TC_IT_SHIFT) |
			   (sz << SPI_TC_SZ_SHIFT) | bpw;
	rc = SPI_RC_MSF | (SPI_RC_IT << SPI_RC_IT_SHIFT) |
			   (sz << SPI_RC_SZ_SHIFT) | bpw;
	aug0603_write_reg(aug0603, SPI_IEN, 0);
	aug0603_write_reg(aug0603, SPI_ICLR, ~0);
	aug0603_write_reg(aug0603, SPI_TCD, 0);
	aug0603_write_reg(aug0603, SPI_RCD, 0);
	aug0603_write_reg(aug0603, SPI_SSD, 0);
	aug0603_write_reg(aug0603, SPI_CR, cr);
	aug0603_write_reg(aug0603, SPI_TC, tc);
	aug0603_write_reg(aug0603, SPI_RC, rc);

	if (aug0603_check_dma(aug0603, msg))
		err = aug0603_do_next_message_dma(aug0603, msg);
	else
		err = aug0603_do_next_message_cpu(aug0603, msg);

	SPI_PRINTK(4, "DONE (err: %d) \n", err);
	return err;
}

#ifdef SPI_USE_QUEUE
static int aug0603_transfer_one_message(struct spi_master *master,
					struct spi_message *msg)
{
	struct aug0603_spi *aug0603 = spi_master_get_devdata(master);
	int err;

	reinit_completion(&aug0603->msg_done);
	err = aug0603_do_transfer(aug0603, msg);
	if (err)
		return err;
	if (wait_for_completion_timeout(&aug0603->msg_done, HZ) == 0) {
		dev_err(aug0603->dev, "timeout\n");
		err = -ETIMEDOUT;
		dmaengine_terminate_sync(aug0603->dma_rx_channel);
		dmaengine_terminate_sync(aug0603->dma_tx_channel);
		if (aug0603->dma_running) {
			aug0603_dma_unmap(aug0603, msg);
			aug0603->dma_running = 0;
		}
		aug0603_message_done(aug0603, -ETIMEDOUT);
	}
	SPI_PRINTK(4, "DONE (err: %d) \n", err);
	spi_finalize_current_message(master);
	return err;
}

static int aug0603_prepare_transfer_hardware(struct spi_master *master)
{
	struct aug0603_spi *aug0603 = spi_master_get_devdata(master);
	SPI_PRINTK(2, "\n");
	return 0;
}

static int aug0603_unprepare_transfer_hardware(struct spi_master *master)
{
	struct aug0603_spi *aug0603 = spi_master_get_devdata(master);
	SPI_PRINTK(2, "\n");
	return 0;
}
#else
static int aug0603_transfer(struct spi_device *spi, struct spi_message *msg)
{
	struct aug0603_spi *aug0603 = spi_master_get_devdata(spi->master);
	int err;

	pm_runtime_get_sync(aug0603->dev);
	err = aug0603_do_transfer(aug0603, msg);
	pm_runtime_put(aug0603->dev);
	return err;
}
#endif


static int aug0603_setup(struct spi_device *spi)
{
	unsigned int bpw = spi->bits_per_word;

	SPI_PRINTK(2, "\n");
	if (!spi->max_speed_hz) {
		dev_err(&spi->dev, "max_speed_hz not set\n");
		return -EINVAL;
	}
	if (!(bpw == 8 || bpw == 16 || bpw == 32)) {
		dev_err(&spi->dev, "Invalid SPI word length %u\n", bpw);
		return -EINVAL;
	}
	if (spi->chip_select > 3) { // FIXME use DT
		dev_err(&spi->dev, "Invalid SPI chip select %d\n", spi->chip_select);
		return -EINVAL;
	}
	if (spi->mode & ~(spi->master->mode_bits)) {
		dev_err(&spi->dev, "Unsupported SPI mode bits %#x\n", spi->mode);
		return -EINVAL;
	}

	return 0;
}

static void aug0603_cleanup(struct spi_device *spi)
{
	SPI_PRINTK(2, "\n");
}

static void aug0603_message_done(struct aug0603_spi *aug0603, int status)
{
	struct spi_message *msg;

	msg = aug0603->current_msg;

	SPI_PRINTK(3, "msg: @%p, status: %d\n", msg, status);
	SPI_PRINTK(3, "actual_len: %d, msg_len: %d\n",
		   msg->actual_length, aug0603->current_msg_len);
	if (msg->actual_length != aug0603->current_msg_len) {
		SPI_PRINTK(1, "WARN len values DIFFER %d != %d\n",
			   aug0603->current_msg_len, msg->actual_length);
	}

	msg->status = status;

	if (aug0603->tx_msg_len > aug0603->rx_msg_len) {
		// when CT.IT=0 only one entry shall wait
		aug0603_change_reg(aug0603, SPI_TC, SPI_TC_IT_MASK,
				   0 << SPI_TC_IT_SHIFT);
		// wait TX FIFO transmitted
		aug0603_write_reg(aug0603, SPI_ICLR, SPI_ST_TE);
		while (!(aug0603_read_reg(aug0603, SPI_ST) & SPI_ST_TE))
			;
		// wait for TX transfer drain
		// 32 bit * 1 / (192MHz / 2 / HPER) = 1/3 us * HPER
		udelay(UINT_MAX - 2 < aug0603->cur_clock_div ?
		       aug0603->cur_clock_div / 3 + 1 :
		       DIV_ROUND_UP(aug0603->cur_clock_div, 3));
	}

	// Disable SPI HW
	aug0603_write_reg(aug0603, SPI_SS, 0);
	aug0603_change_reg(aug0603, SPI_CR, SPI_CR_EN | SPI_CR_RT_MASK, 0);

	// Clear IRQ regs
	aug0603_write_reg(aug0603, SPI_IEN, 0);
	aug0603_write_reg(aug0603, SPI_ICLR, ~0);

	aug0603_write_reg(aug0603, SPI_RDC, 0);
	aug0603_write_reg(aug0603, SPI_TDC, 0);

	// Cleanup SPI
	aug0603->current_msg = NULL;
	aug0603->current_msg_len = 0;
	aug0603->cur_tx_transfer = NULL;
	aug0603->cur_rx_transfer = NULL;
	aug0603->tx_msg_count = 0;
	aug0603->rx_msg_count = 0;
	aug0603->tx_msg_len = 0;
	aug0603->rx_msg_len = 0;
	aug0603->txp = 0;
	aug0603->rxp = 0;

	SPI_PRINTK(3, "SPI message complete\n");
	aug0603_print_spi_message(msg, true);
#ifdef SPI_USE_QUEUE
	complete(&aug0603->msg_done);
#else
	msg->complete(msg->context);
#endif
}


/*
 * aug0603_spi_isr():
 *
 * Interrupt Service Routine
 */
static irqreturn_t aug0603_spi_isr(int irq, void *dev)
{
	struct aug0603_spi *aug0603 = dev;
	struct spi_message *msg;
	struct spi_transfer *rx = aug0603->cur_rx_transfer;
	struct spi_transfer *tx = aug0603->cur_tx_transfer;
	unsigned int rxp = aug0603->rxp;
	unsigned int txp = aug0603->txp;
	unsigned int rx_count = aug0603->rx_msg_count;
	unsigned int tx_count = aug0603->tx_msg_count;
	unsigned int rx_len = aug0603->rx_msg_len;
	unsigned int tx_len = aug0603->tx_msg_len;
	unsigned int tx_empty, rx_full;
	unsigned int rx_val, tx_val = 0, bytecnt;
	u32 status, mask;
	u8 byte;

	msg = aug0603->current_msg;

again:
	status = aug0603_read_reg(aug0603, SPI_ST);
	aug0603_write_reg(aug0603, SPI_ICLR, SPI_ST_TE | SPI_ST_RF);
	mask = aug0603_read_reg(aug0603, SPI_IEN);
	SPI_PRINTK(4, "Status: %08x mask %08x\n", status, mask);
	status &= mask;
	tx_empty = status & SPI_ST_TE;
	rx_full = status & SPI_ST_RF;

	if (rx_full && rx_count < rx_len) {
		rx_val = aug0603_read_reg(aug0603, SPI_RDF);
		SPI_PRINTK(5, "RDF = %08x\n", rx_val);

		for (bytecnt = 0; rx_count < rx_len && bytecnt < 4; bytecnt++) {
			byte = rx_val & 0xff;
			rx_val >>= 8;
			SPI_PRINTK(5, "RX %p[%u] = %02x\n", rx->rx_buf, rxp, byte);
			if (rx->rx_buf)
				((u8 *)rx->rx_buf)[rxp] = byte;
			// else discard dummy byte
			rx_count++;

			if (++rxp >= rx->len && rx_count < rx_len) {
				if (list_is_last(&rx->transfer_list, &msg->transfers)) {
					SPI_PRINTK(1, "internal error (%u %u  %u %u)\n",
						   rx_count, aug0603->rx_msg_len,
						   rxp, rx->len);
					rx_count = rx_len;
					break;
				}
				rx = list_entry(rx->transfer_list.next, struct spi_transfer, transfer_list);
				rxp = 0;
			}
		}
		aug0603->cur_rx_transfer = rx;
		aug0603->rxp = rxp;
		aug0603->rx_msg_count = rx_count;
	}
	if (rx_full && aug0603->rx_msg_count >= aug0603->rx_msg_len) {
		aug0603_change_reg(aug0603, SPI_IEN, SPI_IEN_RF, 0);
		SPI_PRINTK(3, "RX done: %u >= %u\n",
			   aug0603->rx_msg_count, aug0603->rx_msg_len);
	}
	if (tx_empty && tx_count < tx_len) {
		for (bytecnt = 0; tx_count < tx_len && bytecnt < 4; bytecnt++) {
			if (txp >= tx->len) {
				if (list_is_last(&tx->transfer_list, &msg->transfers)) {
					SPI_PRINTK(1, "internal error (%u %u  %u %u)\n",
						   tx_count, aug0603->tx_msg_len,
						   txp, tx->len);
					tx_count = tx_len;
					break;
				}
				tx = list_entry(tx->transfer_list.next, struct spi_transfer, transfer_list);
				txp = 0;
			}
			if (tx->tx_buf)
				byte = ((u8 *)tx->tx_buf)[txp];
			else
				byte = 0; // send dummy bytes
			SPI_PRINTK(5, "TX %p[%u] = %02x\n", tx->tx_buf, txp, byte);
			tx_val |= byte << (8 * bytecnt);
			txp++;
			tx_count++;
		}
		aug0603_write_reg(aug0603, SPI_TDF, tx_val);
		SPI_PRINTK(5, "TDF = %08x\n", tx_val);
		tx_val = 0;
		aug0603->cur_tx_transfer = tx;
		aug0603->txp = txp;
		aug0603->tx_msg_count = tx_count;
	}
	if (tx_empty && aug0603->tx_msg_count >= aug0603->tx_msg_len) {
		aug0603_change_reg(aug0603, SPI_IEN, SPI_IEN_TE, 0);
		// rt=2 allows RX when TX FIFO empty
		aug0603_change_reg(aug0603, SPI_RC, SPI_RC_RT_MASK, 2 << SPI_RC_RT_SHIFT);
		SPI_PRINTK(3, "TX done: %u >= %u\n",
			   aug0603->tx_msg_count, aug0603->tx_msg_len);
	}

	if (tx_empty || rx_full) {
		SPI_PRINTK(5, "again %u %u\n", tx_empty, rx_full);
		goto again;
	}

	if ((aug0603->rx_msg_count >= aug0603->rx_msg_len)
	    && (aug0603->tx_msg_count >= aug0603->tx_msg_len)) {
		SPI_PRINTK(3, "xfer done\n");
		msg->actual_length = aug0603->current_msg_len;
		aug0603_message_done(aug0603, 0);
	}

	return IRQ_HANDLED;
}

static void aug0603_dma_probe(struct aug0603_spi *aug0603)
{
	dma_cap_mask_t mask;

	dma_cap_zero(mask);
	dma_cap_set(DMA_SLAVE, mask);

	aug0603->dma_rx_channel = dma_request_slave_channel(aug0603->dev, "rx");
	if (!aug0603->dma_rx_channel) {
		dev_err(aug0603->dev, "no RX DMA channel!\n");
		return;
	}
	aug0603->dma_tx_channel = dma_request_slave_channel(aug0603->dev, "tx");
	if (!aug0603->dma_tx_channel) {
		dev_err(aug0603->dev, "no TX DMA channel!\n");
		dma_release_channel(aug0603->dma_rx_channel);
		aug0603->dma_rx_channel = NULL;
		return;
	}

	dev_info(aug0603->dev, "setup for DMA on RX %s, TX %s\n",
		 dma_chan_name(aug0603->dma_rx_channel),
		 dma_chan_name(aug0603->dma_tx_channel));
}

static void aug0603_dma_remove(struct aug0603_spi *aug0603)
{
	if (aug0603->dma_running) {
		dmaengine_terminate_sync(aug0603->dma_rx_channel);
		dmaengine_terminate_sync(aug0603->dma_tx_channel);
		aug0603->dma_running = 0;
	}
	if (aug0603->dma_rx_channel) {
		dma_release_channel(aug0603->dma_rx_channel);
		aug0603->dma_rx_channel = NULL;
	}
	if (aug0603->dma_tx_channel) {
		dma_release_channel(aug0603->dma_tx_channel);
		aug0603->dma_tx_channel = NULL;
	}
}

static int aug0603_spi_probe(struct platform_device *pdev)
{
	struct aug0603_spi	*aug0603;
	struct spi_master	*master;
	struct resource		*res;
	int err = 0;

	SPI_PRINTK(1, "\n");

	master = spi_alloc_master(&pdev->dev, sizeof(struct aug0603_spi));
	if (master == NULL) {
		dev_err(&pdev->dev, "No Mem for SPI Master\n");
		err = -ENOMEM;
		goto err_nomem;
	}

	aug0603 = spi_master_get_devdata(master);
	aug0603->master = master;
	aug0603->pdev = pdev;
	aug0603->dev = &pdev->dev;
	init_completion(&aug0603->msg_done);

	aug0603->clk = clk_get(&pdev->dev, NULL);
	if (IS_ERR(aug0603->clk)) {
		dev_err(&pdev->dev, "Cannot get clock\n");
		err = PTR_ERR(aug0603->clk);
		goto err_no_clk;
	}
	clk_prepare_enable(aug0603->clk);

	aug0603_dma_probe(aug0603);

	master->bus_num = pdev->id;
	master->num_chipselect = 8;
	master->cleanup = aug0603_cleanup;
	master->setup = aug0603_setup;
#ifdef SPI_USE_QUEUE
	master->prepare_transfer_hardware = aug0603_prepare_transfer_hardware;
	master->transfer_one_message = aug0603_transfer_one_message;
	master->unprepare_transfer_hardware = aug0603_unprepare_transfer_hardware;
	master->auto_runtime_pm = true;
#else
	master->transfer = aug0603_transfer;
#endif
	master->mode_bits = SPI_CPOL | SPI_CPHA
		| SPI_TX_DUAL | SPI_TX_QUAD | SPI_RX_DUAL | SPI_RX_QUAD;
	platform_set_drvdata(pdev, aug0603);

	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
	if (res == NULL) {
		dev_err(&pdev->dev, "Cannot get IORESOURCE_MEM\n");
		err = -ENOENT;
		goto err_no_iores;
	}

	aug0603->ioarea = request_mem_region(res->start,
					     (res->end - res->start) + 1,
					     pdev->name);
	if (aug0603->ioarea == NULL) {
		dev_err(&pdev->dev, "Cannot reserve region\n");
		err = -ENXIO;
		goto err_no_iores;
	}
	aug0603->regs = ioremap(res->start, (res->end - res->start) + 1);
	if (aug0603->regs == NULL) {
		dev_err(&pdev->dev, "Cannot map IO\n");
		err = -ENXIO;
		goto err_no_iomap;
	}

	pm_runtime_set_active(&pdev->dev);
	pm_runtime_enable(&pdev->dev);

	aug0603->irq = platform_get_irq(pdev, 0);
	if (aug0603->irq < 0) {
		dev_err(&pdev->dev, "No IRQ specified\n");
		err = -ENOENT;
		goto err_no_irq;
	}
	err = request_irq(aug0603->irq, aug0603_spi_isr, 0, pdev->name, aug0603);
	if (err) {
		dev_err(&pdev->dev, "Cannot claim IRQ\n");
		goto err_no_irq;
	}

	printk(KERN_INFO "SPI: AURORA AU-G0600 at v: 0x%p, p: 0x%8x, irq: %d.\n",
	       aug0603->regs, res->start, aug0603->irq);

	master->dev.of_node = pdev->dev.of_node;
	err = spi_register_master (master);
	if (err) {
		dev_err(&pdev->dev, "Could not register SPI master \n");
		goto err_no_spi;
	}
	return 0;

err_no_spi:
	free_irq(aug0603->irq, aug0603);
err_no_irq:
	pm_runtime_disable(&pdev->dev);
	iounmap(aug0603->regs);
err_no_iomap:
	release_resource(aug0603->ioarea);
err_no_iores:
	clk_disable_unprepare(aug0603->clk);
	clk_put(aug0603->clk);
err_no_clk:
	spi_master_put(aug0603->master);
err_nomem:
	return err;
}

static int __exit aug0603_spi_remove(struct platform_device *pdev)
{
	struct aug0603_spi *aug0603 = platform_get_drvdata(pdev);

	SPI_PRINTK(1, "\n");
	free_irq(aug0603->irq, aug0603);
	aug0603_write_reg(aug0603, SPI_IEN, 0);
	aug0603_write_reg(aug0603, SPI_CR, 0);
	aug0603_dma_remove(aug0603);
	platform_set_drvdata(pdev, NULL);
	spi_unregister_master(aug0603->master);
	clk_disable_unprepare(aug0603->clk);
	clk_put(aug0603->clk);
	iounmap(aug0603->regs);
	release_resource(aug0603->ioarea);
	spi_master_put(aug0603->master);
	return 0;
}

#ifdef CONFIG_PM
static int aug0603_suspend(struct device *dev)
{
	struct aug0603_spi *aug0603 = dev_get_drvdata(dev);
	dev_dbg(dev, "aug0603_spi_suspend()\n");
	clk_disable(aug0603->clk);
	return 0;
}

static int aug0603_resume(struct device *dev)
{
	struct aug0603_spi *aug0603 = dev_get_drvdata(dev);
	dev_dbg(dev, "aug0603_spi_resume()\n");
	clk_enable(aug0603->clk);
	return 0;
}
#endif

static struct dev_pm_ops aug0603_pm_ops = {
	SET_SYSTEM_SLEEP_PM_OPS(aug0603_suspend, aug0603_resume)
	SET_RUNTIME_PM_OPS(aug0603_suspend, aug0603_resume, NULL)
};

static struct of_device_id aug0603_spi_of_match[] = {
	{ .compatible = "aurora,au-g0603" },
	{ },
};

static struct platform_driver aug0603_spidrv = {
	.driver = {
		.name	= "aug0603",
		.owner	= THIS_MODULE,
		.of_match_table = aug0603_spi_of_match,
		.pm = &aug0603_pm_ops,
	},
	.probe		= aug0603_spi_probe,
	.remove		=  __exit_p(aug0603_spi_remove),
};
module_platform_driver(aug0603_spidrv);

MODULE_DESCRIPTION("Aurora AU-G0603 SPI Controller Driver");
MODULE_AUTHOR("Pixelworks Inc. <cchierici@pixelworks.com>");
MODULE_LICENSE("GPL");
