/*
 * offload engine driver for the Marvell M2M engine
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 */

#include <linux/init.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/platform_device.h>
#include <linux/platform_data/dma-mv_m2m.h>
#include <linux/of.h>
#include <linux/of_irq.h>
#include <linux/irqdomain.h>
#include <linux/kthread.h>
#include <linux/sched/rt.h>

#include "dmaengine.h"
#include "mv_m2m.h"

static void mv_m2m_issue_pending(struct dma_chan *chan);

static void mv_desc_init(struct mv_m2m_desc_slot *desc)
{
	struct mv_m2m_desc *hw_desc = desc->hw_desc;
	memset(hw_desc, 0, sizeof(struct mv_m2m_desc));
}

static void mv_desc_set_ctl(struct mv_m2m_desc_slot *desc,
				   u32 byte_count)
{
	struct mv_m2m_desc *hw_desc = desc->hw_desc;
	hw_desc->ctl = mv_m2m_hw_desc_ctl(1, 0, byte_count);
}

static void mv_desc_set_next_desc(struct mv_m2m_desc_slot *desc,
				  u32 next_desc_addr)
{
	struct mv_m2m_desc *hw_desc = desc->hw_desc;
	BUG_ON(hw_desc->dma_next);
	hw_desc->dma_next = next_desc_addr;
}

static void mv_desc_set_dest_addr(struct mv_m2m_desc_slot *desc,
				  dma_addr_t addr)
{
	struct mv_m2m_desc *hw_desc = desc->hw_desc;
	hw_desc->dma_dest = addr;
}

static void mv_desc_set_src_addr(struct mv_m2m_desc_slot *desc,
				 dma_addr_t addr)
{
	struct mv_m2m_desc *hw_desc = desc->hw_desc;
	hw_desc->dma_src = addr;
}

static void mv_desc_set_xlate(struct mv_m2m_desc_slot *desc,
				 dma_addr_t addr)
{
	struct mv_m2m_desc *hw_desc = desc->hw_desc;
	hw_desc->xlate = addr;
	hw_desc->ctl |= mv_m2m_hw_desc_ctl(0, 1, 0);
}

static u32 mv_chan_get_current_desc(struct mv_m2m_chan *chan)
{
	return readl(&chan->m2m_base->current_desc_addr);
}

static void mv_chan_set_next_descriptor(struct mv_m2m_chan *chan,
					u32 next_desc_addr)
{
	chan->desc_addr = next_desc_addr;
}

static void mv_m2m_set_pcie_xlate_reg_addr(struct mv_m2m_chan *chan,
					u32 addr)
{
	writel(addr, &chan->m2m_base->pcie_xlate_reg_addr);
}

static void mv_m2m_hw_reset(struct mv_m2m_chan *chan)
{
	writel((1 << M2M_DESC_START_SOFT_RESET_SHIFT), &chan->m2m_base->desc_start);
	writel(0, &chan->m2m_base->desc_start);
}
static void mv_m2m_enable_interrupts(struct mv_m2m_chan *chan)
{
	M2M_REG_MASKWRITE(&chan->m2m_base->IEN, M2M_IRQ_CHAIN_END, 0, 0xFFFFFFFF);
}

static void mv_m2m_disable_interrupts(struct mv_m2m_chan *chan)
{
	M2M_REG_MASKWRITE(&chan->m2m_base->IEN, M2M_IRQ_ALL, 0, 0x00000000);
}

static u32 mv_m2m_pending_interrupts(struct mv_m2m_chan *chan)
{
	return readl(&chan->m2m_base->IPEND);
}

static void mv_m2m_clear_interrupts(struct mv_m2m_chan *chan, int irqs)
{
	writel(irqs, &chan->m2m_base->IACK);
}

static void mv_chan_activate(struct mv_m2m_chan *chan)
{
	dev_dbg(mv_chan_to_devp(chan), " activate chan.\n");
	writel(chan->desc_addr, &chan->m2m_base->desc_start);
}

static char mv_chan_is_busy(struct mv_m2m_chan *chan)
{
	if (M2M_REG_MASKREAD(&chan->m2m_base->status, M2M_STATUS_IDLE_MASK, M2M_STATUS_IDLE_SHIFT)) {
		return 0;
	}
	return 1;
}

/**
 * mv_m2m_free_slots - flags descriptor slots for reuse
 * @slot: Slot to free
 * Caller must hold &mv_chan->lock while calling this function
 */
static void mv_m2m_free_slots(struct mv_m2m_chan *mv_chan,
			      struct mv_m2m_desc_slot *slot)
{
	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d slot %p\n",
		__func__, __LINE__, slot);

	slot->in_use = 0;
}

/*
 * mv_m2m_start_new_chain - program the engine to operate on new chain headed by
 * sw_desc
 * Caller must hold &mv_chan->lock while calling this function
 */
static void mv_m2m_start_new_chain(struct mv_m2m_chan *mv_chan,
				   struct mv_m2m_desc_slot *sw_desc)
{
	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: sw_desc %p\n",
		__func__, __LINE__, sw_desc);

	/* set the hardware chain */
	mv_chan_set_next_descriptor(mv_chan, sw_desc->async_tx.phys);
	mv_chan->pending++;
	mv_m2m_issue_pending(&mv_chan->dmachan);
}

static dma_cookie_t
mv_m2m_run_tx_complete_actions(struct mv_m2m_desc_slot *desc,
	struct mv_m2m_chan *mv_chan, dma_cookie_t cookie)
{
	BUG_ON(desc->async_tx.cookie < 0);

	if (desc->async_tx.cookie > 0) {
		cookie = desc->async_tx.cookie;

		/* call the callback (must not sleep or submit new
		 * operations to this channel)
		 */
		if (desc->async_tx.callback)
			desc->async_tx.callback(
				desc->async_tx.callback_param);
	}

	/* unmap dma addresses
	 */
	dma_descriptor_unmap(&desc->async_tx);

	/* run dependent operations */
	dma_run_dependencies(&desc->async_tx);

	return cookie;
}

static int
mv_m2m_clean_completed_slots(struct mv_m2m_chan *mv_chan)
{
	struct mv_m2m_desc_slot *iter, *_iter;

	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__);
	list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
				 completed_node) {

		if (async_tx_test_ack(&iter->async_tx)) {
			list_del(&iter->completed_node);
			mv_m2m_free_slots(mv_chan, iter);
		}
	}
	return 0;
}

static int
mv_m2m_clean_slot(struct mv_m2m_desc_slot *desc,
	struct mv_m2m_chan *mv_chan)
{
	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: desc %p flags 0x%x\n",
		__func__, __LINE__, desc, desc->async_tx.flags);
	list_del(&desc->chain_node);
	/* the client is allowed to attach dependent operations
	 * until 'ack' is set
	 */
	if (!async_tx_test_ack(&desc->async_tx)) {
		/* move this slot to the completed_slots */
		list_add_tail(&desc->completed_node, &mv_chan->completed_slots);
		return 0;
	}

	mv_m2m_free_slots(mv_chan, desc);
	return 0;
}

/* This function must be called with the mv_m2m_chan spinlock held */
static void mv_m2m_slot_cleanup(struct mv_m2m_chan *mv_chan)
{
	struct mv_m2m_desc_slot *iter, *_iter;
	dma_cookie_t cookie = 0;
	int busy = mv_chan_is_busy(mv_chan);
	u32 current_desc = mv_chan_get_current_desc(mv_chan);
	int seen_current = 0;

	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__);
	dev_dbg(mv_chan_to_devp(mv_chan), "current_desc %x\n", current_desc);
	mv_m2m_clean_completed_slots(mv_chan);

	/* free completed slots from the chain starting with
	 * the oldest descriptor
	 */

	list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
					chain_node) {
		prefetch(_iter);
		prefetch(&_iter->async_tx);

		/* do not advance past the current descriptor loaded into the
		 * hardware channel, subsequent descriptors are either in
		 * process or have not been submitted
		 */
		if (seen_current)
                {
			dev_dbg(mv_chan_to_devp(mv_chan), "break 1\n");
			break;
                }

		/* stop the search if we reach the current descriptor and the
		 * channel is busy
		 */
		if (iter->async_tx.phys == current_desc) {
			seen_current = 1;
			if (busy)
                        {
				dev_dbg(mv_chan_to_devp(mv_chan), "break 2\n");
				break;
			}
		}

		cookie = mv_m2m_run_tx_complete_actions(iter, mv_chan, cookie);

		if (mv_m2m_clean_slot(iter, mv_chan))
			break;
	}

	if ((busy == 0) && !list_empty(&mv_chan->chain)) {
		struct mv_m2m_desc_slot *chain_head;
		chain_head = list_entry(mv_chan->chain.next,
					struct mv_m2m_desc_slot,
					chain_node);

		mv_m2m_start_new_chain(mv_chan, chain_head);
	}

	if (cookie > 0)
		mv_chan->dmachan.completed_cookie = cookie;
}

static int mv_m2m_thread(void *data)
{
	struct mv_m2m_chan *chan = (struct mv_m2m_chan *) data;

	while (!kthread_should_stop()) {
		wait_event_interruptible(chan->wq, atomic_read(&chan->irqs_pending));

		atomic_dec(&chan->irqs_pending);
		spin_lock(&chan->lock);
		mv_m2m_slot_cleanup(chan);
		spin_unlock(&chan->lock);
	}
	return 0;
}

static struct mv_m2m_desc_slot *
mv_m2m_alloc_slot(struct mv_m2m_chan *mv_chan)
{
	struct mv_m2m_desc_slot *iter, *_iter;
	int retry = 0;

	/* start search from the last allocated descriptor
	 * if a descriptor can not be found start searching
	 * from the beginning of the list
	 */
retry:
	if (retry == 0)
		iter = mv_chan->last_used;
	else
		iter = list_entry(&mv_chan->all_slots,
			struct mv_m2m_desc_slot,
			slot_node);

	list_for_each_entry_safe_continue(
		iter, _iter, &mv_chan->all_slots, slot_node) {
		prefetch(_iter);
		prefetch(&_iter->async_tx);
		if (iter->in_use) {
			/* give up after finding the first busy slot
			 * on the second pass through the list
			 */
			if (retry)
				break;

			continue;
		}

		iter->async_tx.flags = 0;
		iter->async_tx.cookie = 0;
		iter->in_use = 1;
		mv_chan->last_used = iter;
		return iter;
	}
	if (!retry++)
		goto retry;

	/* try to free some slots if the allocation fails */
	atomic_inc(&mv_chan->irqs_pending);
	wake_up(&mv_chan->wq);

	return NULL;
}

/************************ DMA engine API functions ****************************/
static dma_cookie_t
mv_m2m_tx_submit(struct dma_async_tx_descriptor *tx)
{
	struct mv_m2m_desc_slot *sw_desc = to_mv_m2m_slot(tx);
	struct mv_m2m_chan *mv_chan = to_mv_m2m_chan(tx->chan);
	struct mv_m2m_desc_slot *grp_start, *old_chain_tail;
	dma_cookie_t cookie;
	int new_hw_chain = 1;

	dev_dbg(mv_chan_to_devp(mv_chan),
		"%s sw_desc %p: async_tx %p\n",
		__func__, sw_desc, &sw_desc->async_tx);

	grp_start = sw_desc->group_head;

	spin_lock(&mv_chan->lock);
	cookie = dma_cookie_assign(tx);

	if (list_empty(&mv_chan->chain))
		list_splice_init(&sw_desc->tx_list, &mv_chan->chain);
	else {
		new_hw_chain = 0;

		old_chain_tail = list_entry(mv_chan->chain.prev,
					    struct mv_m2m_desc_slot,
					    chain_node);
		list_splice_init(&sw_desc->tx_list,
				 &old_chain_tail->chain_node);

		/* fix up the hardware chain */
		mv_desc_set_next_desc(old_chain_tail, grp_start->async_tx.phys);

		/* if the channel is not busy */
		if (!mv_chan_is_busy(mv_chan)) {
			u32 current_desc = mv_chan_get_current_desc(mv_chan);
			/*
			 * and the curren desc is the end of the chain before
			 * the append, then we need to start the channel
			 */
			if (current_desc == old_chain_tail->async_tx.phys) {
				new_hw_chain = 1;
                        }
		}
	}

	if (new_hw_chain)
		mv_m2m_start_new_chain(mv_chan, grp_start);

	spin_unlock(&mv_chan->lock);

	return cookie;
}

/* returns the number of allocated descriptors */
static int mv_m2m_alloc_chan_resources(struct dma_chan *chan)
{
	char *hw_desc;
	int idx;
	struct mv_m2m_chan *mv_chan = to_mv_m2m_chan(chan);
	struct mv_m2m_desc_slot *slot = NULL;
	int num_descs_in_pool = MV_M2M_POOL_SIZE/sizeof(struct mv_m2m_desc);

	/* Allocate descriptor slots */
	idx = mv_chan->slots_allocated = 0;
	while (idx < num_descs_in_pool) {
		slot = kzalloc(sizeof(*slot), GFP_KERNEL);
		if (!slot) {
			printk(KERN_INFO "MV M2M Channel only initialized"
				" %d descriptor slots", idx);
			break;
		}
		hw_desc = (char *) mv_chan->dma_desc_pool_virt;
		slot->hw_desc = (void *) &hw_desc[idx * sizeof(struct mv_m2m_desc)];

		dma_async_tx_descriptor_init(&slot->async_tx, chan);
		slot->async_tx.tx_submit = mv_m2m_tx_submit;
		INIT_LIST_HEAD(&slot->chain_node);
		INIT_LIST_HEAD(&slot->slot_node);
		INIT_LIST_HEAD(&slot->tx_list);
		hw_desc = (char *) mv_chan->dma_desc_pool;
		slot->async_tx.phys =
			(dma_addr_t) &hw_desc[idx * sizeof(struct mv_m2m_desc)];
		/* If a descriptor crosses a 64kb boundary, mark it as used.  the hardware can't deal with that */
		if((slot->async_tx.phys / 0x1000) != ((slot->async_tx.phys + sizeof(struct mv_m2m_desc)) / 0x1000)) {
			slot->in_use = 1;
		} else {
			slot->in_use = 0;
		}
		slot->unmap_len = 0;

		spin_lock(&mv_chan->lock);
		mv_chan->slots_allocated = idx;
		list_add_tail(&slot->slot_node, &mv_chan->all_slots);
		spin_unlock(&mv_chan->lock);
		idx++;
	}

	if (mv_chan->slots_allocated && !mv_chan->last_used)
		mv_chan->last_used = list_entry(mv_chan->all_slots.next,
					struct mv_m2m_desc_slot,
					slot_node);

	dev_dbg(mv_chan_to_devp(mv_chan),
		"allocated %d descriptor slots last_used: %p\n",
		mv_chan->slots_allocated, mv_chan->last_used);

	return mv_chan->slots_allocated ? : -ENOMEM;
}

static struct dma_async_tx_descriptor *mv_m2m_prep_dma_sg(struct dma_chan *chan,
	struct scatterlist *dst_sg, unsigned int dst_nents,
	struct scatterlist *src_sg, unsigned int src_nents,
	unsigned long flags)
{
	struct mv_m2m_chan *mv_chan = to_mv_m2m_chan(chan);
	struct mv_m2m_desc_slot *iter, *alloc_start = NULL, *alloc_tail = NULL;
	dma_addr_t src, dest;
	size_t len, src_avail, dst_avail, src_offset, dest_offset;
	LIST_HEAD(chain);

	if (dst_nents == 0 || src_nents == 0)
		return NULL;

	if (dst_sg == NULL || src_sg == NULL)
		return NULL;

	if (((flags & DMA_M2M_LOCAL_TO_REMOTE) || (flags & DMA_M2M_REMOTE_TO_LOCAL) || (flags & DMA_M2M_REMOTE_TO_REMOTE)) && mv_chan->remote_capable == 0)
		return NULL;

	spin_lock(&mv_chan->lock);

	src_avail = sg_dma_len(src_sg);
	dst_avail = sg_dma_len(dst_sg);

	do {
		iter = mv_m2m_alloc_slot(mv_chan);
		if(!iter) {

			dev_err(chan->device->dev, "Failed to get a slot!\n");
#if 0
			list_splice(&chain, &mv_chan->completed_slots);
			mv_m2m_clean_completed_slots(mv_chan);
#endif
			spin_unlock(&mv_chan->lock);
			return NULL;
		}

		if(!alloc_start)
			alloc_start = iter;

		len = min_t(size_t, src_avail, dst_avail);
		len = min_t(size_t, len, MV_M2M_MAX_BYTE_COUNT);

		src = sg_dma_address(src_sg) + sg_dma_len(src_sg) - src_avail;
		dest = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) - dst_avail;

		/* Break up a transfer if it is going to cross a 32MB boundary */
		src_offset = src & ~THIRTY_TWO_MB_MASK;
		len = min_t(size_t, THIRTY_TWO_MB - src_offset, len);

		dest_offset = dest & ~THIRTY_TWO_MB_MASK;
		len = min_t(size_t, THIRTY_TWO_MB - dest_offset, len);

		mv_desc_init(iter);
		mv_desc_set_ctl(iter, len);

		if(flags & DMA_M2M_LOCAL_TO_REMOTE || flags & DMA_M2M_REMOTE_TO_REMOTE) {
			mv_desc_set_xlate(iter, dest & ~(mv_chan->pci_window_size - 1));
			mv_desc_set_dest_addr(iter, (dest & (mv_chan->pci_window_size - 1)) | mv_chan->pci_window);
		} else {
			mv_desc_set_dest_addr(iter, dest);
		}

		if(flags & DMA_M2M_REMOTE_TO_LOCAL || flags & DMA_M2M_REMOTE_TO_REMOTE) {
			mv_desc_set_xlate(iter, src & ~(mv_chan->pci_window_size - 1));
			mv_desc_set_src_addr(iter, (src & (mv_chan->pci_window_size - 1)) | mv_chan->pci_window);
		} else {
			mv_desc_set_src_addr(iter, src);
		}

		if(alloc_tail != NULL) {
			mv_desc_set_next_desc(alloc_tail, iter->async_tx.phys);
		}

		async_tx_ack(&iter->async_tx);

		iter->unmap_len = len;
		iter->async_tx.flags |= flags;

		list_add_tail(&iter->chain_node, &chain);
		alloc_tail = iter;

		src_avail -= len;
		dst_avail -= len;

		if(src_avail == 0) {
			src_sg = sg_next(src_sg);
			if(src_sg)
				src_avail = sg_dma_len(src_sg);
		}

		if(dst_avail == 0) {
			dst_sg = sg_next(dst_sg);
			if(dst_sg)
				dst_avail = sg_dma_len(dst_sg);
		}

	} while(dst_sg && src_sg);

	alloc_tail->async_tx.flags = flags;
	alloc_tail->group_head = alloc_start;
	alloc_tail->async_tx.cookie = -EBUSY;
	list_splice(&chain, &alloc_tail->tx_list);

	spin_unlock(&mv_chan->lock);

	return &alloc_tail->async_tx;
}

static struct dma_async_tx_descriptor *
mv_m2m_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
		size_t len, unsigned long flags)
{
	struct mv_m2m_chan *mv_chan = to_mv_m2m_chan(chan);
	struct scatterlist s_sg, d_sg;
	struct dma_async_tx_descriptor *tx;

	dev_dbg(mv_chan_to_devp(mv_chan),
		"%s dest: %x src %x len: %u flags: %ld\n",
		__func__, dest, src, len, flags);

	sg_init_table(&s_sg, 1);
	sg_init_table(&d_sg, 1);

	sg_dma_len(&s_sg) = len;
	sg_dma_address(&s_sg) = (dma_addr_t)src;

	sg_dma_len(&d_sg) = len;
	sg_dma_address(&d_sg) = (dma_addr_t)dest;

	flags |= DMA_M2M_LOCAL_TO_LOCAL;

	tx = mv_m2m_prep_dma_sg(chan, &d_sg, 1, &s_sg, 1, flags);

	dev_dbg(mv_chan_to_devp(mv_chan),
		"%s async_tx %p\n",
		__func__, tx);

	return tx;
}

static void mv_m2m_free_chan_resources(struct dma_chan *chan)
{
	struct mv_m2m_chan *mv_chan = to_mv_m2m_chan(chan);
	struct mv_m2m_desc_slot *iter, *_iter;
	int in_use_descs = 0;

	spin_lock(&mv_chan->lock);

	mv_m2m_slot_cleanup(mv_chan);

	list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
					chain_node) {
		in_use_descs++;
		list_del(&iter->chain_node);
	}
	list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
				 completed_node) {
		in_use_descs++;
		list_del(&iter->completed_node);
	}
	list_for_each_entry_safe_reverse(
		iter, _iter, &mv_chan->all_slots, slot_node) {
		list_del(&iter->slot_node);
		kfree(iter);
		mv_chan->slots_allocated--;
	}
	mv_chan->last_used = NULL;

	dev_dbg(mv_chan_to_devp(mv_chan), "%s slots_allocated %d\n",
		__func__, mv_chan->slots_allocated);
	spin_unlock(&mv_chan->lock);

	if (in_use_descs)
		dev_err(mv_chan_to_devp(mv_chan),
			"freeing %d in use descriptors!\n", in_use_descs);
}

/**
 * mv_m2m_status - poll the status of an M2M transaction
 * @chan: M2M channel handle
 * @cookie: M2M transaction identifier
 * @txstate: M2M transactions state holder (or NULL)
 */
static enum dma_status mv_m2m_status(struct dma_chan *chan,
					  dma_cookie_t cookie,
					  struct dma_tx_state *txstate)
{
	struct mv_m2m_chan *mv_chan = to_mv_m2m_chan(chan);
	enum dma_status ret;


	ret = dma_cookie_status(chan, cookie, txstate);
	if (ret == DMA_COMPLETE) {
		spin_lock(&mv_chan->lock);
		mv_m2m_clean_completed_slots(mv_chan);
		spin_unlock(&mv_chan->lock);
		return ret;
	}

	spin_lock(&mv_chan->lock);
	mv_m2m_slot_cleanup(mv_chan);
	spin_unlock(&mv_chan->lock);

	return dma_cookie_status(chan, cookie, txstate);
}

static irqreturn_t mv_m2m_interrupt_handler(int irq, void *data)
{
	struct mv_m2m_chan *chan = data;
	u32 intr_cause = mv_m2m_pending_interrupts(chan);

	dev_dbg(mv_chan_to_devp(chan), "intr cause %x\n", intr_cause);

	atomic_inc(&chan->irqs_pending);
	wake_up(&chan->wq);

	mv_m2m_clear_interrupts(chan, intr_cause);

	return IRQ_HANDLED;
}

static void mv_m2m_issue_pending(struct dma_chan *chan)
{
	struct mv_m2m_chan *mv_chan = to_mv_m2m_chan(chan);

	if (mv_chan->pending >= MV_M2M_THRESHOLD) {
		mv_chan->pending = 0;
		mv_chan_activate(mv_chan);
	}
}

/*
 * Perform a transaction to verify the HW works.
 */
#define MV_M2M_TEST_SIZE (128 * 1024)

static int mv_m2m_memcpy_self_test(struct mv_m2m_chan *mv_chan)
{
	int i;
	void *src, *dest;
	dma_addr_t src_dma, dest_dma;
	struct dma_chan *dma_chan;
	dma_cookie_t cookie;
	struct dma_async_tx_descriptor *tx;
	int err = 0;

	src = kmalloc(sizeof(u8) * MV_M2M_TEST_SIZE, GFP_KERNEL);
	if (!src)
		return -ENOMEM;

	dest = kzalloc(sizeof(u8) * MV_M2M_TEST_SIZE, GFP_KERNEL);
	if (!dest) {
		kfree(src);
		return -ENOMEM;
	}

	/* Fill in src buffer */
	for (i = 0; i < MV_M2M_TEST_SIZE; i++)
		((u8 *) src)[i] = (u8)i;

	dma_chan = &mv_chan->dmachan;
	if (mv_m2m_alloc_chan_resources(dma_chan) < 1) {
		err = -ENODEV;
		goto out;
	}

	dest_dma = dma_map_single(dma_chan->device->dev, dest,
				  MV_M2M_TEST_SIZE, DMA_FROM_DEVICE);

	src_dma = dma_map_single(dma_chan->device->dev, src,
				 MV_M2M_TEST_SIZE, DMA_TO_DEVICE);

	tx = mv_m2m_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
				    MV_M2M_TEST_SIZE, 0);
	cookie = mv_m2m_tx_submit(tx);

	mv_m2m_issue_pending(dma_chan);
	async_tx_ack(tx);
	msleep(1);

	if (mv_m2m_status(dma_chan, cookie, NULL) !=
	    DMA_COMPLETE) {
		dev_err(dma_chan->device->dev,
			"Self-test memcpy timed out, disabling\n");
		err = -ENODEV;
		goto free_resources;
	}

	if (memcmp(src, dest, MV_M2M_TEST_SIZE)) {
		dev_err(dma_chan->device->dev,
			"Self-test memcpy failed compare, disabling\n");
		err = -ENODEV;
		goto free_resources;
	}

free_resources:
	mv_m2m_free_chan_resources(dma_chan);
out:
	kfree(src);
	kfree(dest);
	return err;
}

static int mv_m2m_sg_self_test(struct mv_m2m_chan *mv_chan)
{
	int i;
	void *src, *dest;
	dma_addr_t src_dma, dest_dma;
	struct dma_chan *dma_chan;
	struct scatterlist *s_sg = NULL, *d_sg = NULL;
	dma_cookie_t cookie;
	struct dma_async_tx_descriptor *tx;
	int err = 0;

	src = kmalloc(sizeof(u8) * MV_M2M_TEST_SIZE, GFP_KERNEL);
	if (!src)
		return -ENOMEM;

	dest = kzalloc(sizeof(u8) * MV_M2M_TEST_SIZE, GFP_KERNEL);
	if (!dest) {
		err = -ENOMEM;
		goto out;
	}

	/* Fill in src buffer */
	for (i = 0; i < MV_M2M_TEST_SIZE; i++)
		((u8 *) src)[i] = (u8)i;

	s_sg = kmalloc((sizeof(struct scatterlist) * 3), GFP_KERNEL);
	if (!s_sg) {
		err = -ENOMEM;
		goto out;
	}
	sg_init_table(s_sg, 3);

	d_sg = kmalloc((sizeof(struct scatterlist) * 2), GFP_KERNEL);
	if (!d_sg) {
		err = -ENOMEM;
		goto out;
	}
	sg_init_table(d_sg, 2);

	dma_chan = &mv_chan->dmachan;
	if (mv_m2m_alloc_chan_resources(dma_chan) < 1) {
		err = -ENODEV;
		goto out;
	}

	dest_dma = dma_map_single(dma_chan->device->dev, dest,
				  MV_M2M_TEST_SIZE, DMA_FROM_DEVICE);

	src_dma = dma_map_single(dma_chan->device->dev, src,
				 MV_M2M_TEST_SIZE, DMA_TO_DEVICE);

	sg_dma_len(&s_sg[0]) = MV_M2M_TEST_SIZE / 4;
	sg_dma_address(&s_sg[0]) = (dma_addr_t)src_dma;

	sg_dma_len(&s_sg[1]) = MV_M2M_TEST_SIZE / 2;
	sg_dma_address(&s_sg[1]) = (dma_addr_t)src_dma + (MV_M2M_TEST_SIZE / 4);

	sg_dma_len(&s_sg[2]) = MV_M2M_TEST_SIZE / 4;
	sg_dma_address(&s_sg[2]) = (dma_addr_t)src_dma + ((MV_M2M_TEST_SIZE / 4) * 3);

	sg_dma_len(&d_sg[0]) = MV_M2M_TEST_SIZE / 2;
	sg_dma_address(&d_sg[0]) = (dma_addr_t)dest_dma;

	sg_dma_len(&d_sg[1]) = MV_M2M_TEST_SIZE / 2;
	sg_dma_address(&d_sg[1]) = (dma_addr_t)dest_dma + (MV_M2M_TEST_SIZE / 2);

	tx = mv_m2m_prep_dma_sg(dma_chan, d_sg, 2, s_sg, 3, DMA_M2M_LOCAL_TO_LOCAL);

	cookie = mv_m2m_tx_submit(tx);

	mv_m2m_issue_pending(dma_chan);
	async_tx_ack(tx);
	msleep(1);

	if (mv_m2m_status(dma_chan, cookie, NULL) !=
	    DMA_COMPLETE) {
		dev_err(dma_chan->device->dev,
			"Self-test sg copy timed out, disabling\n");
		err = -ENODEV;
		goto free_resources;
	}

	if (memcmp(src, dest, MV_M2M_TEST_SIZE)) {
		dev_err(dma_chan->device->dev,
			"Self-test sg copy failed compare, disabling\n");
		err = -ENODEV;
		goto free_resources;
	}

free_resources:
	mv_m2m_free_chan_resources(dma_chan);
out:
	kfree(s_sg);
	kfree(d_sg);
	kfree(src);
	kfree(dest);

	return err;
}

/* This driver does not implement any of the optional DMA operations. */
static int
mv_m2m_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
	       unsigned long arg)
{
	return -ENOSYS;
}

static int mv_m2m_channel_remove(struct mv_m2m_chan *mv_chan)
{
	struct dma_chan *chan, *_chan;
	struct device *dev = mv_chan->dmadev.dev;

	dma_async_device_unregister(&mv_chan->dmadev);

	dma_free_coherent(dev, MV_M2M_POOL_SIZE,
			  mv_chan->dma_desc_pool_virt, mv_chan->dma_desc_pool);

	list_for_each_entry_safe(chan, _chan, &mv_chan->dmadev.channels,
				 device_node) {
		list_del(&chan->device_node);
	}

	mv_m2m_disable_interrupts(mv_chan);
	free_irq(mv_chan->irq, mv_chan);

	atomic_inc(&mv_chan->irqs_pending);
	kthread_stop(mv_chan->irq_thread);
	devm_kfree(dev, mv_chan);

	return 0;
}

static struct mv_m2m_chan *
mv_m2m_channel_add(struct mv_m2m_device *m2mdev,
		   struct platform_device *pdev,
		   int irq)
{
	int ret = 0;
	struct mv_m2m_chan *mv_chan;
	struct dma_device *dma_dev;
	struct sched_param param = { .sched_priority = MAX_RT_PRIO/2 };

	mv_chan = devm_kzalloc(&pdev->dev, sizeof(*mv_chan), GFP_KERNEL);
	if (!mv_chan) {
		return ERR_PTR(-ENOMEM);
	}

	mv_chan->irq = irq;

	dma_dev = &mv_chan->dmadev;

	/* allocate coherent memory for hardware descriptors
	 * note: writecombine gives slightly better performance, but
	 * requires that we explicitly flush the writes
	 */
	mv_chan->dma_desc_pool_virt =
	  dma_alloc_writecombine(&pdev->dev, MV_M2M_POOL_SIZE,
				 &mv_chan->dma_desc_pool, GFP_KERNEL);
	if (!mv_chan->dma_desc_pool_virt) {
		return ERR_PTR(-ENOMEM);
	}

	dma_cap_zero(dma_dev->cap_mask);
        dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
        dma_cap_set(DMA_SG, dma_dev->cap_mask);

	INIT_LIST_HEAD(&dma_dev->channels);

	/* set base routines */
	dma_dev->device_alloc_chan_resources = mv_m2m_alloc_chan_resources;
	dma_dev->device_free_chan_resources = mv_m2m_free_chan_resources;
	dma_dev->device_tx_status = mv_m2m_status;
	dma_dev->device_issue_pending = mv_m2m_issue_pending;
	dma_dev->device_control = mv_m2m_control;
	dma_dev->device_prep_dma_memcpy = mv_m2m_prep_dma_memcpy;
	dma_dev->device_prep_dma_sg = mv_m2m_prep_dma_sg;
	dma_dev->dev = &pdev->dev;

	mv_chan->pci_window = m2mdev->pci_window;
	mv_chan->pci_window_size = m2mdev->pci_window_size;
	mv_chan->remote_capable = m2mdev->remote_capable;
	mv_chan->m2m_base = (struct M2M_REGS_s *)m2mdev->m2m_base;
	if (!mv_chan->m2m_base) {
		ret = -ENOMEM;
		goto err_free_dma;
	}
	atomic_set(&mv_chan->irqs_pending, 0);
	init_waitqueue_head(&mv_chan->wq);
	mv_chan->irq_thread = kthread_run(mv_m2m_thread, mv_chan, dev_name(&pdev->dev));
	if (IS_ERR(mv_chan->irq_thread)) {
		ret = -ENOMEM;
		goto err_free_dma;
	}
	sched_setscheduler(mv_chan->irq_thread, SCHED_FIFO, &param);

	/* if we have a pcie translation register address, set it */
	if (m2mdev->xlate_reg_addr) {
		mv_m2m_set_pcie_xlate_reg_addr(mv_chan, m2mdev->xlate_reg_addr);
	}

	/* give the hardware block a soft reset */
	mv_m2m_hw_reset(mv_chan);

	/* clear any pending before enabling interrupts */
	mv_m2m_clear_interrupts(mv_chan, M2M_IRQ_ALL);

	ret = request_irq(mv_chan->irq, mv_m2m_interrupt_handler,
			  0, dev_name(&pdev->dev), mv_chan);
	if (ret)
		goto err_free_dma;

	mv_m2m_enable_interrupts(mv_chan);

	spin_lock_init(&mv_chan->lock);
	INIT_LIST_HEAD(&mv_chan->chain);
	INIT_LIST_HEAD(&mv_chan->completed_slots);
	INIT_LIST_HEAD(&mv_chan->all_slots);
	mv_chan->dmachan.device = dma_dev;
	dma_cookie_init(&mv_chan->dmachan);

	list_add_tail(&mv_chan->dmachan.device_node, &dma_dev->channels);

	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
		ret = mv_m2m_memcpy_self_test(mv_chan);
		if (ret)
			goto err_free_irq;
	}

	if (dma_has_cap(DMA_SG, dma_dev->cap_mask)) {
		ret = mv_m2m_sg_self_test(mv_chan);
		if (ret)
			goto err_free_irq;
	}

	dma_async_device_register(dma_dev);
	return mv_chan;

err_free_irq:
	free_irq(mv_chan->irq, mv_chan);
err_free_dma:
	dma_free_coherent(&pdev->dev, MV_M2M_POOL_SIZE,
			  mv_chan->dma_desc_pool_virt, mv_chan->dma_desc_pool);
	return ERR_PTR(ret);
}

static int mv_m2m_probe(struct platform_device *pdev)
{
	struct mv_m2m_device *m2mdev;
	struct resource *res;
	struct device_node *np = pdev->dev.of_node;
	int irq, ret;

	dev_notice(&pdev->dev, "Marvell M2M driver\n");

	m2mdev = devm_kzalloc(&pdev->dev, sizeof(*m2mdev), GFP_KERNEL);
	if (!m2mdev)
		return -ENOMEM;

	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
	if (!res) {
		return -ENODEV;
	}

	m2mdev->m2m_base = devm_ioremap(&pdev->dev, res->start,
					resource_size(res));
	if (!m2mdev->m2m_base) {
		return -EBUSY;
	}

	platform_set_drvdata(pdev, m2mdev);

	irq = irq_of_parse_and_map(np, 0);
	if (!irq) {
		return -ENODEV;
	}

	if (of_property_read_u32(np, "pci_window", &m2mdev->pci_window)) {
		m2mdev->pci_window = 0;
	}

	if (of_property_read_u32(np, "pci_window_size", &m2mdev->pci_window_size)) {
		m2mdev->pci_window_size = 0;
	}

	if (of_property_read_u32(np, "xlate_reg_addr", &m2mdev->xlate_reg_addr)) {
		m2mdev->xlate_reg_addr = 0;
	}

	if (m2mdev->xlate_reg_addr != 0 && m2mdev->pci_window != 0 && m2mdev->pci_window_size != 0) {
		m2mdev->remote_capable = 1;
		dev_notice(&pdev->dev, "Remote capable\n");
	} else {
		m2mdev->remote_capable = 0;
	}

	m2mdev->channel = mv_m2m_channel_add(m2mdev, pdev, irq);

	if (IS_ERR(m2mdev->channel)) {
		ret = PTR_ERR(m2mdev->channel);
		m2mdev->channel = NULL;
		irq_dispose_mapping(irq);
		return ret;
	}

	return 0;
}

static int mv_m2m_remove(struct platform_device *pdev)
{
	struct mv_m2m_device *m2mdev = platform_get_drvdata(pdev);

	if (m2mdev->channel)
		mv_m2m_channel_remove(m2mdev->channel);

	return 0;
}

static struct of_device_id mv_m2m_dt_ids[] = {
       { .compatible = "mrvl,m2m", },
       {},
};
MODULE_DEVICE_TABLE(of, mv_m2m_dt_ids);

static struct platform_driver mv_m2m_driver = {
	.probe		= mv_m2m_probe,
	.remove		= mv_m2m_remove,
	.driver		= {
		.owner	        = THIS_MODULE,
		.name	        = MV_M2M_NAME,
		.of_match_table = of_match_ptr(mv_m2m_dt_ids),
	},
};

static int __init mv_m2m_init(void)
{
	return platform_driver_register(&mv_m2m_driver);
}
module_init(mv_m2m_init);

static void __exit mv_m2m_exit(void)
{
	platform_driver_unregister(&mv_m2m_driver);
}
module_exit(mv_m2m_exit);

MODULE_DESCRIPTION("DMA engine driver for Marvell's M2M engine");
MODULE_LICENSE("GPL");
