/*
 * Driver for the Marvell 61xx CDMA Controller
 *
 * Derived from drivers/dma/dw_dmac.c
 *
 * Copyright (C) 2007-2008 Atmel Corporation
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/delay.h>
#include <linux/dmaengine.h>
#include <linux/dma-mapping.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_platform.h>
#include <linux/of_irq.h>
#include <linux/of_address.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/stat.h>
#include <linux/jiffies.h>
#include <asm/atomic.h>
#include "cdma.h"
/*
 * This supports the Marvell Central DMA Controller used in the
 * Marvell 88PA61x0, which does not support descriptor writeback.
 */

/*
 * Number of physical channels to allocate for each driver.
 */
static int max_owned  = 6;
module_param(max_owned,  int, S_IRUGO);
static int max_shared = 5;
module_param(max_shared, int, S_IRUGO);
static int max_cyclic = 1;
module_param(max_cyclic, int, S_IRUGO);
static int max_memops = 0;
module_param(max_memops, int, S_IRUGO);

/*
 * Number of virtual shared channels to allocate.
 */
static int max_vshared = 64;
module_param(max_vshared, int, S_IRUGO);

/*
 * Residue reporting control.
 */

/* In mv61vc_check_residue_running(), ignore data stuck in the fifo
 * when the destination is in memory
 */
int ignore_fifo_residue = 1;
module_param(ignore_fifo_residue, int, S_IRUGO);

/*
 * Debug control. Never commit code with these enabled.
 */
int vdebug = 0;
module_param(vdebug, int, S_IRUGO);

static int vdumptx = 0;
module_param(vdumptx, int, S_IRUGO);

int dumppausedesc = 0;
module_param(dumppausedesc, int, S_IRUGO);

int vresidue = 0; /* verbose residue debug */
module_param(vresidue,  int, S_IRUGO);

/*----------------------------------------------------------------------*/

/* these need to persist for kmem_cache after the _init functions return */
static const char desc_cache_name[]="mv61_desc_struct";
static const char chain_cache_name[] ="mv61_chain_struct";

/*----------------------------------------------------------------------*/

static struct mv61_vdma_chan *mv61_dispatch_get(struct mv61_dma *mv61p);
static void mv61vc_issue_queue(struct mv61_vdma *mv61v, struct mv61_vdma_chan *mv61vc);
static void mv61_dispatch_put(struct mv61_vdma_chan *mv61vc,
					struct mv61_desc *desc);
static int mv61_dispatch_drop(struct mv61_dma *mv61p, int ch_index);
static void mv61_dispatch_free_pchan(struct mv61_dma *mv61p, int pch_index);
static void mv61vc_terminate_dma(struct mv61_vdma_chan *mv61vc,
					struct mv61_pdma_chan *mv61pc,
					enum mv61_dma_flow_ctrl flowcontrol,
					int flags);
static int mv61vc_device_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
							unsigned long arg);

/*----------------------------------------------------------------------*/

/**
 * mv61vc_dostart - launch a transaction or subtransaction
 * @mv61vc: virtual channel control structure
 * @desc: top level transaction control structure
 *
 * Call with mv61p->biglock held and irqs disabled.
 * Call with mv61vc->lock held and irqs disabled.
 *
 * The transaction belongs to a virtual channel,
 * but it is launched on a physical channel.
 */
int mv61vc_dostart(struct mv61_vdma_chan *mv61vc, struct mv61_desc *desc)
{
	struct mv61_pdma_chan	*mv61pc;
	struct mv61_pdma_chan_regs *pcregs;
	u32			tmp;

	mv61pc = mv61_vpmap_v_to_p(mv61vc);
	BUG_ON(!mv61pc);

	pcregs = mv61pc->ch_regs;

	/* ASSERT:  channel is idle */
	tmp = readl(&pcregs->CFG);
	if (tmp & CDMA_CFG_ENABLE_MASK) {
		__dev_vdbg(chan2dev(&mv61vc->chan), "dostart:%d\n", __LINE__);
		/* The tasklet will hopefully advance the queue... */
		return -EBUSY;
	}

	memset(&mv61vc->hwstat, 0, sizeof(mv61vc->hwstat));

	mv61vc->residue = 0;
	mv61vc->status = DMA_IN_PROGRESS;

	writel(desc->txregs.CFG, &pcregs->CFG);
	writel(desc->txregs.FillValue, &pcregs->FillValue);
	writel(desc->txregs.intEn, &pcregs->intEn);
	writel(desc->txregs.TimerControl, &pcregs->TimerControl);

	writel(mv61vc->def.Control,&pcregs->Control );

	wmb();

	tmp = readl(&pcregs->CFG);
	writel(tmp | CDMA_CFG_ENABLE_MASK, &pcregs->CFG);

	dma_sync_single_for_device(chan2parent(&mv61vc->chan), desc->lli_phys,
			sizeof(desc->lli), DMA_TO_DEVICE);
	writel(desc->lli_phys,&pcregs->CDR);

	return 0;
}

/**
 * mv61vc_descriptor_complete - notify client and clean up first descriptor in list
 * @mv61vc: virtual channel control structure
 * @desc_list: list of top-level transaction descriptors to check
 *
 * Call with mv61vc->lock released.
 */
static int mv61vc_descriptor_complete (struct mv61_vdma_chan *mv61vc,
						struct list_head *desc_list, int exec_callback)
{
	dma_async_tx_callback		callback;
	void				*param;
	struct dma_async_tx_descriptor	*txd;
	struct mv61_desc 		*desc;
	unsigned long 			lockvcflags;

	spin_lock_irqsave(&mv61vc->lock, lockvcflags);

	if(list_empty(desc_list)) {
		spin_unlock_irqrestore(&mv61vc->lock, lockvcflags);
		return 0;
	}

	desc = list_first_entry(desc_list, struct mv61_desc, desc_node);

	txd = &desc->txd;

	mv61vc->completed = txd->cookie;
	callback = txd->callback;
	param = txd->callback_param;

	list_del(&desc->desc_node);

	/*
	 * The def.valid flag is initialized during configuration of a
	 * newly-requested channel and won't change until the channel is released.
	 * Shouldn't need the lock to check it, but just in case ....
	 */
	if (!mv61vc->def.valid) {
		/*
		 * This was a memop, so we're responsible for the dma_unmap_*
		 * calls.
		 */
		spin_unlock_irqrestore(&mv61vc->lock, lockvcflags);

		dma_descriptor_unmap(txd);
	}
	else {
		spin_unlock_irqrestore(&mv61vc->lock, lockvcflags);
	}

	/*
	 * The API requires that no submissions are done from a
	 * callback.
	 */
	if (exec_callback && callback)
		callback(param);

	/*
	 * The transaction descriptor chain is now disconnected from the virtual
	 * channel's transaction lists, so the main locks are no longer needed.
	 * It is still a member of the top all_chains tracking list for memory
	 * leak detection, but the descriptor functions will handle that safely.
	 */
	mv61_desc_put(desc);

	return 1;
}

/**
 * mv61vc_process_desc_list - process list of transactions
 * @mv61vc: virtual channel control structure
 * @desc_list: list to check
 *
 * Call with mv61vc->lock released.
 */
static void _mv61vc_process_desc_list(struct mv61_vdma_chan
					*mv61vc, struct list_head *desc_list, int exec_callback)
{
	int	processed;

	do {
		processed = mv61vc_descriptor_complete(mv61vc, desc_list, exec_callback);
	} while(processed > 0);
}

static void mv61vc_process_desc_list(struct mv61_vdma_chan
					*mv61vc, struct list_head *desc_list)
{
	_mv61vc_process_desc_list(mv61vc, desc_list, 1);
}

static void mv61vc_flush_desc_list(struct mv61_vdma_chan
					*mv61vc, struct list_head *desc_list)
{
	_mv61vc_process_desc_list(mv61vc, desc_list, 0);
}

/**
 * mv61_issue queue - move transactions to the active list
 * @mv61v: this top virtual dma control instance
 * @mv61vc: virtual channel control structure
 *
 * Call with all locks released.
 * Never try to lock multiple virtual channels here concurrently.
 */
static void mv61vc_issue_queue(struct mv61_vdma *mv61v, struct mv61_vdma_chan *mv61vc)
{
	struct mv61_desc	*desc;
	struct mv61_vdma_chan 	*mv61vc_new;
	struct mv61_dma 	*mv61p = mv61v->mv61p;
	unsigned long 		biglockflags;
	unsigned long 		lockvcflags;
	dma_cookie_t		cookie;

	__dev_vdbg(chan2dev(&mv61vc->chan), "issue_queue:%d\n", __LINE__);

	spin_lock_irqsave(&mv61p->biglock, biglockflags);
	spin_lock_irqsave(&mv61vc->lock, lockvcflags);

	if(list_empty(&mv61vc->queue)) {
		spin_unlock_irqrestore(&mv61vc->lock, lockvcflags);
		goto done;
	}

	if(mv61v->vtype == MV61_VDMA_SHARED) {
		while(!list_empty(&mv61vc->queue)) {
			desc = list_first_entry(&mv61vc->queue,
						struct mv61_desc, desc_node);
			mv61_dispatch_put(mv61vc, desc);
		}

		__dev_vdbg(chan2dev(&mv61vc->chan), "issue_queue:%d\n", __LINE__);

		/* finished with the current vchannel */
		spin_unlock_irqrestore(&mv61vc->lock, lockvcflags);

		/*
		 * Kickstart the dispatcher. May return a different vchannel that
		 * was already waiting, so get a new channel lock
		 */
		mv61vc_new = mv61_dispatch_get(mv61p);
		if(mv61vc_new) {
			unsigned long flags;
			spin_lock_irqsave(&mv61vc_new->lock, flags);
	                __dev_vdbg(chan2dev(&mv61vc->chan), "issue_queue:%d\n", __LINE__);
			desc = mv61vc_first_active(mv61vc);
			if(desc) {
				cookie = desc->txd.cookie;
				desc = desc->active;
			}
			if(desc) {
				mv61vc->started = cookie;
				mv61vc->residue = 0;
				mv61vc_dostart(mv61vc, desc);
			}
			spin_unlock_irqrestore(&mv61vc_new->lock, flags);
		}
	} else {
		__dev_vdbg(chan2dev(&mv61vc->chan), "issue_queue:%d\n", __LINE__);

		list_splice_tail_init(&mv61vc->queue, &mv61vc->active_list);
		desc = mv61vc_first_active(mv61vc);
		if(desc) {
			cookie = desc->txd.cookie;
			mv61vc->started = cookie;
			mv61vc->residue = 0;
			mv61vc_dostart(mv61vc, desc);
		}
		spin_unlock_irqrestore(&mv61vc->lock, lockvcflags);
	}
	__dev_vdbg(chan2dev(&mv61vc->chan), "issue_queue:%d\n", __LINE__);

done:
	spin_unlock_irqrestore(&mv61p->biglock, biglockflags);
}

void mv61vc_handle_error(struct mv61_vdma *mv61v, struct mv61_vdma_chan *mv61vc)
{
	__dev_vdbg(chan2dev(&mv61vc->chan), "handle_error:%d irqs=%08x\n",__LINE__,
								mv61vc->irqs);
	mv61vc_device_control(&mv61vc->chan, DMA_TERMINATE_ALL, 0);
}

static void mv61_dma_tasklet(unsigned long data)
{
	struct mv61_dma 	*mv61p = (struct mv61_dma *)data;
	struct mv61_vdma	*mv61v;
	struct mv61_vdma_chan	*mv61vc;
	int 			i;
	int 			j;
	unsigned long 		biglockflags;
	unsigned long 		lockvcflags;
	u32 			interrupts;

	for(i = MV61_VDMA_OWNED; i < MV61_NR_VDMA_CONTROLLERS; i++) {
		mv61v = mv61p->mv61v[i];

		/* skip unused controllers */
		if(!mv61v)
			continue;

		for (j = 0; j < mv61v->dma.chancnt; j++) {
			mv61vc = &mv61v->chan[j];
			BUG_ON(!mv61vc);
			if(mv61v->vtype == MV61_VDMA_CYCLIC) {
				mv61vc_cyclic_dma_tasklet_handler(
							mv61p, mv61v, mv61vc);
				continue;
			}
			if(mv61vc->irqs){
				spin_lock_irqsave(&mv61p->biglock, biglockflags);
				spin_lock_irqsave(&mv61vc->lock, lockvcflags);
				interrupts = mv61vc->irqs;
				mv61vc->irqs = 0;
				spin_unlock_irqrestore(&mv61vc->lock, lockvcflags);
				spin_unlock_irqrestore(&mv61p->biglock, biglockflags);
				if(interrupts & CDMA_INTPEND_TIMEOUTPEND_MASK)
					mv61vc_handle_error(mv61v, mv61vc);
			}
			mv61vc_process_desc_list(mv61vc, &mv61vc->complete_list);
		}
	}
}

/*
 * Never try to lock multiple virtual channels here concurrently.
 */
static irqreturn_t mv61_dma_interrupt(int irq, void *dev_id)
{
	struct mv61_dma 	*mv61p = dev_id;
	struct mv61_pdma_chan	*mv61pc;
	struct mv61_vdma_chan	*mv61vc;
	struct mv61_desc	*desc;
	struct mv61_pdma_chan_regs *pcregs;
	u32 status = 0;
	int 			i;
	int 			tmp;
	u32 			pend;
	unsigned long 		biglockflags;
	unsigned long 		lockvcflags;
	enum mv61_dma_flow_ctrl	flowcontrol;
	dma_cookie_t		cookie;

	spin_lock_irqsave(&mv61p->biglock, biglockflags);

	if(mv61p->irq_call_cnt != ~0)
		mv61p->irq_call_cnt++;

	if(mv61p->CDMAInt) {
		status = readl(mv61p->CDMAInt);
		if(!status) {
			__dev_vdbg(mv61p->dev, "mv61_cdma interrupt: false interrupt\n");
			spin_unlock_irqrestore(&mv61p->biglock, biglockflags);
			return IRQ_NONE;
		}
	}

	if(vdebug)
		dev_printk(KERN_ALERT, mv61p->dev, "interrupt: status=0x%x\n", status);
	for (i = 0; i < mv61p->pchannels; i++) {
		if(status & 1) {
			mv61pc = &mv61p->chan[i];
			BUG_ON(!mv61pc);
			pcregs = mv61pc->ch_regs;
			BUG_ON(!pcregs);
			if(vdebug)
				mv61_dump_pchan(pcregs);
			tmp = readl(&pcregs->intEn);
			pend = readl(&pcregs->intPend);
			if (mv61pc->vtype != MV61_VDMA_CYCLIC)
			writel((tmp & ~pend), &pcregs->intEn);
			writel(pend, &pcregs->intAck);

			if (pend & CDMA_INTPEND_CHAINDONEPEND_MASK) {
				mv61vc = mv61_vpmap_p_to_v(mv61pc);
				BUG_ON(!mv61vc);

				spin_lock_irqsave(&mv61vc->lock, lockvcflags);

				flowcontrol = MV61_CDMA_VAR_RD_FIELD(mv61vc->def.CFG,
								CDMA_CFG_FLOWCTRL);
				mv61vc->status = DMA_COMPLETE;
				mv61vc->residue = 0;
				tmp = readl(&pcregs->CFG);

				if (tmp & CDMA_CFG_ENABLE_MASK) {
					mv61vc_terminate_dma(mv61vc, mv61pc,
								flowcontrol,
								MV61_PDMA_FLUSH);
				}
				if(mv61pc->vtype == MV61_VDMA_SHARED) {
					/* manage the subchain */
					desc = mv61vc_first_active(mv61vc);
					BUG_ON(!desc);
					if(!mv61_tx_list_next(desc)) {
						mv61vc->status = DMA_COMPLETE;
						mv61_retire_tx(mv61vc);
					}

					/* decrement the vchan waiting count */
					mv61_dispatch_drop(mv61p,
							mv61vc->chan.chan_id);

					/* drop the old vchannel lock */
					spin_unlock_irqrestore(&mv61vc->lock,
								lockvcflags);

					/* mark the pchannel as available */
					mv61_dispatch_free_pchan(mv61p, i);
					mv61_vpmap_unpair(mv61vc, mv61pc);
					mv61pc = NULL;

					/* get and lock a NEW vchannel */
					mv61vc = mv61_dispatch_get(mv61p);
					if(mv61vc) {
						spin_lock_irqsave(&mv61vc->lock,
								lockvcflags);
					}
				} else {
					mv61vc->status = DMA_COMPLETE;
					mv61vc->residue = 0;
					mv61_retire_tx(mv61vc);
				}
				if(mv61vc) {
					desc = mv61vc_first_active(mv61vc);
					/* get the next subchain */
					if(desc) {
						cookie = desc->txd.cookie;
						desc = desc->active;
					}
					if(desc) {
						mv61vc->started = cookie;
						mv61vc->residue = 0;
						mv61vc_dostart(mv61vc, desc);
					}
					spin_unlock_irqrestore(&mv61vc->lock,
								lockvcflags);
				}
			} else if (pend & CDMA_INTPEND_CLEARCOMPLETEPEND_MASK){
#if 0
				dev_printk(KERN_ALERT, mv61p->dev, "interrupt: "
						"chan %d: pend=0x%x\n", i, pend);
#endif
			} else {
				/*
				 * Remaining interrupt flags are
				 * CDMA_INTPEND_TERMCNTPEND_MASK (cyclic)
				 * CDMA_INTPEND_TIMEOUTPEND_MASK (idle timout)
				 *
				 * All communication with the outside world
				 * uses vchannels.
				 */
				mv61vc = mv61_vpmap_p_to_v(mv61pc);
				BUG_ON(!mv61vc);

				spin_lock_irqsave(&mv61vc->lock, lockvcflags);
				mv61vc->irqs |= pend;
				spin_unlock_irqrestore(&mv61vc->lock, lockvcflags);
#if 0
				dev_printk(KERN_ALERT, mv61p->dev, "interrupt: "
						"chan %d: pend=0x%x\n", i, pend);
#endif
			}
		}
		status = status >> 1;
	}

	tasklet_schedule(&mv61p->tasklet);

	spin_unlock_irqrestore(&mv61p->biglock, biglockflags);

	return IRQ_HANDLED;
}

/**
 * mv61_dispatch_drop - drop a shared vchannel's waitcount in dispatcher
 * @mv61p: top level physical control structure
 * @vch_index: index of the virtual channel within the MV61_VDMA_SHARED instance
 *
 * The waitcount is used like a refcount.
 *
 * Call with mv61p->biglock held.
 */
static int mv61_dispatch_drop(struct mv61_dma *mv61p, int vch_index)
{
	if (!mv61p->dispatch->vwaitcnt[vch_index])
		return 0;
	if(!(--mv61p->dispatch->vwaitcnt[vch_index]))
		mv61p->dispatch->vchanw[vch_index >> DISPATCH_REGSHIFT] &=
				~(1 << (vch_index & DISPATCH_MASK));
	return mv61p->dispatch->vwaitcnt[vch_index];
}

/**
 * mv61_dispatch_free_pchan - mark physical channel as available for sharing
 * @mv61p: top level physical control structure
 * @pch_index: index of the shared channel
 *
 * Call with mv61p->dispatch->lock held. The waitcount is used like a
 * refcount.
 */
static void mv61_dispatch_free_pchan(struct mv61_dma *mv61p, int pch_index)
{
	mv61p->dispatch->pchans |= (1 << pch_index);
}

/**
 * mv61_dispatch_get - round-robin arbitration for shared channels
 * @mv61p: top level physical control structure
 *
 * Runs with irqs disabled. Take every opportunity to bail out early,
 * may be faster if vchanw is a short or char to reduce shifts, TBD.
 *
 * Call with mv61p->biglock held.
 */
static struct mv61_vdma_chan *mv61_dispatch_get(struct mv61_dma *mv61p)
{
	struct mv61_dma_dispatch	*dispatch = mv61p->dispatch;
	struct mv61_vdma		*mv61v = mv61p->mv61v[MV61_VDMA_SHARED];
	struct mv61_vdma_chan		*mv61vc;
	int 				i, j;
	int				next_slot;
	int				vbank;
	int				onehot;
	int				ch_index;

	if(!dispatch->pchans) {
		/* no physical channels available */
		return NULL;
	}

	/*
	 * vchanw[vbank] may be checked twice: first for channels beyond
	 * the last channel dispatched, then for channels at or below
	 * the last channel. It's probably faster than the logic to prevent it.
	 */

	next_slot = dispatch->vlast + 1;
	vbank = next_slot >> DISPATCH_REGSHIFT;
	i = vbank;
	if(dispatch->vchanw[i]) {
		j = next_slot & DISPATCH_MASK;
		onehot = 1 << j;
		for (; j < DISPATCH_REGWIDTH; j++) {
			if(dispatch->vchanw[i] & onehot) {
				ch_index = j | (i << DISPATCH_REGSHIFT);
				mv61vc = &mv61v->chan[ch_index];
				/* looking for an unpaired vchannel */
				if(!mv61_vpmap_v_to_p(mv61vc))
					goto found;
			}
			onehot = onehot << 1;
		}
	}
	do {
		i++;
		if (i == dispatch->regcnt)
			i = 0;
		if(!dispatch->vchanw[i])
			continue;
		onehot = 1;
		for (j = 0; j < DISPATCH_REGWIDTH; j++) {
			if(dispatch->vchanw[i] & onehot) {
				ch_index = j | (i << DISPATCH_REGSHIFT);
				mv61vc = &mv61v->chan[ch_index];
				/* looking for an unpaired vchannel */
				if(!mv61_vpmap_v_to_p(mv61vc))
					goto found;
			}
			onehot = onehot << 1;
		}
	} while (i != vbank);

	return NULL;

found:
	dispatch->vlast = ch_index;

	/* we already know there's a pchan bit set */
	for (i = 0; i < MV61_DMA_MAX_NR_PCHANNELS; i ++) {
		if(dispatch->pchans & (1 << i)) {
			/* mark the pchan as unavailable */
			dispatch->pchans &= ~(1 << i);
			break;
		}
	}

	mv61_vpmap_pair(mv61vc, &mv61p->chan[i]);

	return mv61vc;
}

/**
 * mv61_dispatch_put - issue a subchain to the dispatcher
 * @mv61vc: virtual channel control structure
 * @desc: top level transaction control structure
 *
 * Call with mv61p->biglock held.
 * Call with mv61vc->lock held.
 */
static void mv61_dispatch_put(struct mv61_vdma_chan *mv61vc,
					struct mv61_desc *desc)
{
	struct mv61_vdma		*mv61v;
	struct mv61_dma_dispatch 	*dispatch;
	int 				chan_id = mv61vc->chan.chan_id;

	__dev_vdbg(chan2dev(&mv61vc->chan), "dispatch_put:%d\n", __LINE__);

	mv61v = mv61vc->mv61v;
	dispatch = mv61v->mv61p->dispatch;

	dispatch->vwaitcnt[chan_id] += desc->chains;
	dispatch->vchanw[chan_id >> DISPATCH_REGSHIFT] |= 1 <<
						(chan_id & DISPATCH_MASK);
	list_move_tail(&desc->desc_node, &mv61vc->active_list);

	__dev_vdbg(chan2dev(&mv61vc->chan), "dispatch_put:%d\n", __LINE__);
}

/*
 * mv61vc_tx_submit - dmaengine API to submit prepared transactions to queue.
 * @tx: dmaengine API subset of transaction descriptor
 */
dma_cookie_t mv61vc_tx_submit(struct dma_async_tx_descriptor *tx)
{
	struct mv61_desc		*desc = txd_to_mv61_desc(tx);
	struct mv61_vdma_chan		*mv61vc = dchan_to_mv61_vdma_chan(tx->chan);
	dma_cookie_t			cookie;
	unsigned long 			lockvcflags;

	spin_lock_irqsave(&mv61vc->lock, lockvcflags);
	cookie = mv61vc_assign_cookie(mv61vc, desc);
	list_add_tail(&desc->desc_node, &mv61vc->queue);

	__dev_vdbg(chan2dev(tx->chan), "tx_submit: queued desc %p cookie %u\n",
		 desc, desc->txd.cookie);
	if(vdumptx && desc)
		mv61_tx_dump(desc);
	spin_unlock_irqrestore(&mv61vc->lock, lockvcflags);

	return cookie;
}

/*
 * mv61vc_prep_dma_mem_op - common back-end for memcpy and memset
 * @chan: dmaengine API subset of channel control structure
 * @dest: destination physical address
 * @src: source physical address
 * @len: length in bytes
 * @fill: enable memory fill operation
 * @value: fill value
 * @flags: dmaengine API flags
 *
 * Only supports word transfers. Must be word-aligned and physically
 * contiguous. NULL destination is allowed by hardware, but not allowed
 * here.
 */
static struct dma_async_tx_descriptor *
mv61vc_prep_dma_mem_op(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
		size_t len, bool fill, int value, unsigned long flags)
{
	struct mv61_vdma_chan	*mv61vc = dchan_to_mv61_vdma_chan(chan);
	struct mv61_desc	*firstdesc = NULL;
	struct mv61_chain	*prevlink = NULL;
	struct mv61_lli		*lli;
	struct mv61_tx_regs	*txr;
	size_t			xfer_count = 0;
	size_t			offset;
	int			align;

	__dev_vdbg(chan2dev(chan), "prep_dma_mem_op d=0x%x s=0x%x l=0x%zx f=0x%lx\n",
			dest, src, len, flags);

	if (unlikely(!dest)) {
		dev_dbg(chan2dev(chan), "prep_dma_mem_op: "
						"attempting write to NULL!\n");
		return NULL;
	}

	if (unlikely(!len)) {
		dev_dbg(chan2dev(chan), "prep_dma_mem_op: length is zero!\n");
		return NULL;
	}

	if(fill)
		align = MV61_MEMFILL_ALIGN;
	else
		align = MV61_MEMCPY_ALIGN;

	if (!dmaengine_check_align(align, src, dest, len)) {
		dev_dbg(chan2dev(chan), "prep_dma_mem_op: unaligned access!\n");
		return NULL;
	}

	firstdesc = mv61_desc_get(mv61vc);
	if (!firstdesc)
		goto err_desc_get;

	firstdesc->chains = 1;

	xfer_count = min_t(size_t, len, MV61_SUBCHAIN_MAX_SIZE);

	/* set up control regs common to the whole transaction */
	txr =  &firstdesc->txregs;
	txr->CFG = 0;
	txr->FillValue = value;
	txr->TimerControl = 0;
	txr->intEn = 0;

	if(fill)
		MV61_CDMA_VAR_WR_FIELD(txr->CFG,CDMA_CFG_FILL, 1);

	MV61_CDMA_VAR_WR_FIELD(txr->CFG,CDMA_CFG_WRDELAY, 0);
	MV61_CDMA_VAR_WR_FIELD(txr->CFG,CDMA_CFG_DATAUNITSIZE,
			       			MV61_DMA_UNIT_SIZE_32BIT);
	MV61_CDMA_VAR_WR_FIELD(txr->CFG,CDMA_CFG_FLOWCTRL,
						MV61_DMA_MEMORY_TO_MEMORY);
	MV61_CDMA_VAR_WR_FIELD(txr->CFG,CDMA_CFG_DESTPID, 0);
	MV61_CDMA_VAR_WR_FIELD(txr->CFG,CDMA_CFG_SRCPID, 0);
	MV61_CDMA_VAR_WR_FIELD(txr->CFG,CDMA_CFG_DESTENDIAN,
		 				MV61_DMA_LITTLE_ENDIAN);
	MV61_CDMA_VAR_WR_FIELD(txr->CFG,CDMA_CFG_SRCENDIAN,
		 				MV61_DMA_LITTLE_ENDIAN);
	MV61_CDMA_VAR_WR_FIELD(txr->intEn, CDMA_INTEN_CHAINDONEEN, 1);
	MV61_CDMA_VAR_WR_FIELD(txr->intEn, CDMA_INTEN_CLEARCOMPLETEEN, 1);

	/* set up control regs specific to this piece of the transaction */
	lli = &firstdesc->lli;

	lli->SrcAddr = src;
	lli->DestAddr = dest;
	lli->OwnLength = xfer_count & DESCRIPTOR_OWNLENGTH_LENGTH_MASK; /* Bits 15:0 are the length */
	lli->OwnLength |= DESCRIPTOR_OWNLENGTH_OWN_MASK; /* Set the own bit to dma */

	/* create the subchain descriptors if needed */
	for (offset = xfer_count; offset < len; offset += xfer_count) {
		struct mv61_chain	*link;
		xfer_count = min_t(size_t, (len - offset),
				MV61_SUBCHAIN_MAX_SIZE);

		link = mv61_link_get(mv61vc);
		if (!link)
			goto err_desc_get;

		if(prevlink) {
			prevlink->lli.NextCtrl = link->phys & DESCRIPTOR_NEXTCTRL_NEXT_MASK;

			dma_sync_single_for_device(chan2parent(chan),
					prevlink->phys, sizeof(prevlink->lli),
					DMA_TO_DEVICE);
			prevlink->synced = 1;
		} else {
			firstdesc->lli.NextCtrl = link->phys & DESCRIPTOR_NEXTCTRL_NEXT_MASK;
		}

		lli = &link->lli;

		lli->SrcAddr = src + offset;
		lli->DestAddr = dest + offset;
		lli->OwnLength = xfer_count & DESCRIPTOR_OWNLENGTH_LENGTH_MASK; /* Bits 15:0 are the length */
		lli->OwnLength |= DESCRIPTOR_OWNLENGTH_OWN_MASK; /* Set the own bit to dma */

		list_add_tail(&link->chain_node, &firstdesc->subchain);
		prevlink = link;
	}

	lli->NextCtrl = (DESCRIPTOR_NEXTCTRL_INT_MASK | DESCRIPTOR_NEXTCTRL_STOP_MASK); /* This is the last descriptor so set the stop and int bit */

	if(prevlink) {
		dma_sync_single_for_device(chan2parent(chan),
				prevlink->phys, sizeof(prevlink->lli),
				DMA_TO_DEVICE);
		prevlink->synced = 1;
	}

	firstdesc->txd.flags = flags;
	firstdesc->len = len;

	return &firstdesc->txd;

err_desc_get:
	mv61_desc_put(firstdesc);
	return NULL;
}

/*
 * mv61vc_prep_dma_memcpy - dmaengine API wrapper for mv61vc_prep_dma_mem_op
 * @chan: dmaengine API subset of channel control structure
 * @dest: destination physical address
 * @src: source physical address
 * @len: length in bytes
 * @flags: dmaengine API flags
 *
 * Only supports word transfers. Must be word-aligned and physically
 * contiguous.
 */
static struct dma_async_tx_descriptor *
mv61vc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
		size_t len, unsigned long flags)
{
	return mv61vc_prep_dma_mem_op(chan, dest, src, len, 0, 0, flags);
}

/*
 * mv61vc_prep_slave_sg_wrap - prepare wrapped peripheral dma transactions
 * @chan: dmaengine API subset of channel control structure
 * @sgl: scatterlist for source or destination memory
 * @sg_len: number of scatterlist entries
 * @direction: dma direction defined by linux/dma_mapping.h
 * @flags: dmaengine API flags
 *
 * If mv61vc->wrap is non-zero, mv61vc_prep_slave_sg() will pass request through
 * to this alternate implementation. It is a kludge for supporting
 * dma to small contiguous register blocks instead of single data registers.
 */
static struct dma_async_tx_descriptor *
mv61vc_prep_slave_sg_wrap(struct dma_chan *chan, struct scatterlist *sgl,
		unsigned int sg_len, enum dma_transfer_direction direction,
		unsigned long flags)
{
	struct mv61_vdma_chan	*mv61vc = dchan_to_mv61_vdma_chan(chan);
	struct mv61_vdma	*mv61v = ddev_to_mv61_vdma(chan->device);
	struct mv61_desc	*desc = NULL;
	struct mv61_desc	*prevdesc = NULL;
	struct mv61_desc	*firstdesc = NULL;
	struct mv61_lli		*lli = NULL;
	struct mv61_chain	*prevlink = NULL;
	unsigned int		i;
	struct scatterlist	*sg;
	size_t			total_len = 0;
	enum mv61_dma_flow_ctrl	flowcontrol;
	int 			wrapcount = 0;
	int 			remaining;

	__dev_vdbg(chan2dev(chan), "prep_dma_slave_sg_wrap\n");

	if (unlikely(!mv61vc->def.valid || !sg_len))
		return NULL;

	flowcontrol = MV61_CDMA_VAR_RD_FIELD(mv61vc->def.CFG,CDMA_CFG_FLOWCTRL);
	/* need a descriptor for each scatterlist entry */
	for_each_sg(sgl, sg, sg_len, i) {
		struct mv61_chain	*link;
		u32		len;
		u32		mem;
		int 		err;
		int 		linklen;
		mem = sg_phys(sg);
		len = sg_dma_len(sg);

		__dev_vdbg(chan2dev(chan), "prep_dma_slave: sg_virt=0x%p "
						"sg_phys=0x%08x len=%u\n",
						sg_virt(sg), mem, len);

		/* wrapping can cross scatterlist entries */
		remaining = mv61vc->wrap ? mv61vc->wrap -
				(total_len % mv61vc->wrap) : 0;
		if(remaining)
			/* previous sg buffer didn't end at wrap boundary */
			if(len < remaining)
				linklen = len;
			else
				linklen = remaining;
		else
			if(len < mv61vc->wrap)
				linklen = len;
			else
				linklen = mv61vc->wrap;

		/*TODO: break up large scatterlist entries for shared transactions */
		if ((firstdesc == NULL) || (mv61v->vtype == MV61_VDMA_SHARED)) {
			/* these descriptors will never be auto-loaded */
			__dev_vdbg(chan2dev(chan), "prep_dma_slave: trans desc\n");
			desc = mv61_desc_get(mv61vc);
			if (!desc) {
				dev_err(chan2dev(chan),
					"not enough descriptors available\n");
				goto err_desc_get;
			}
			if (firstdesc == NULL) { /* if first is a null pointer, this IS first */
				firstdesc = desc;
			} else {
				list_add_tail(&desc->tx_list,
					&firstdesc->tx_list);
			}
			desc->txregs.CFG = mv61vc->def.CFG;
			desc->txregs.FillValue = 0;
			desc->txregs.TimerControl = mv61vc->def.TimerControl;
			desc->txregs.intEn = 0;
			if (mv61vc->def.TimerControl & CDMA_TIMERCONTROL_TIMERENABLE_MASK)
				MV61_CDMA_VAR_WR_FIELD(desc->txregs.intEn,
						CDMA_INTEN_TIMEOUTEN, 1);
			MV61_CDMA_VAR_WR_FIELD(desc->txregs.intEn,
						CDMA_INTEN_CHAINDONEEN, 1);
			MV61_CDMA_VAR_WR_FIELD(desc->txregs.intEn,
						CDMA_INTEN_CLEARCOMPLETEEN, 1);
			lli = &desc->lli;
			prevlink = NULL;
			prevdesc = desc;
			firstdesc->chains++;

			err = mv61vc_prep_slave_lli(lli, mv61vc, direction, flowcontrol,
							mem, linklen, total_len);
			if(err)
				goto err_cfg;
			__dev_vdbg(chan2dev(chan), "prep_dma_slave: desc = %p\n", desc);

			total_len += linklen;
			prevdesc->sublen += linklen;

			for(wrapcount = linklen; wrapcount < len; wrapcount +=
							linklen) {
				if((len - wrapcount) > mv61vc->wrap)
					linklen = mv61vc->wrap;
				else
					linklen = len - wrapcount;

				link = mv61_link_get(mv61vc);
				if (!link) {
					dev_err(chan2dev(chan),
						"not enough descriptors available\n");
					goto err_desc_get;
				}
				BUG_ON(!prevdesc);
				prevdesc->links++;
				if(prevlink) {
					prevlink->lli.NextCtrl = link->phys & DESCRIPTOR_NEXTCTRL_NEXT_MASK;

					/* the lli descriptor is always DMA_TO_DEVICE */
					dma_sync_single_for_device(chan2parent(chan),
							prevlink->phys,
							sizeof(prevlink->lli),
							DMA_TO_DEVICE);
					prevlink->synced = 1;
				} else {
					/* previous link was top of subchain */
					prevdesc->lli.NextCtrl = link->phys & DESCRIPTOR_NEXTCTRL_NEXT_MASK;
				}
				list_add_tail(&link->chain_node, &prevdesc->subchain);
				lli = &link->lli;
				prevlink = link;
				err = mv61vc_prep_slave_lli(lli, mv61vc, direction,
							flowcontrol, mem + wrapcount,
							linklen,
							total_len);
				if(err)
					goto err_cfg;

				__dev_vdbg(chan2dev(chan), "prep_dma_slave: link = %p\n",
										link);
				total_len += linklen;
				prevdesc->sublen += linklen;
			}
		} else {
			/* these descriptors will always be auto-loaded */
			for(wrapcount = 0; wrapcount < len; wrapcount +=
							linklen) {
				/* first pass linklen comes from top of for_each_sg */
				if(wrapcount) {
					if((len - wrapcount) > mv61vc->wrap)
						linklen = mv61vc->wrap;
					else
						linklen = len - wrapcount;
			 	}
				link = mv61_link_get(mv61vc);
				if (!link) {
					dev_err(chan2dev(chan),
						"not enough descriptors available\n");
					goto err_desc_get;
				}
				BUG_ON(!prevdesc);
				prevdesc->links++;
				if(prevlink) {
					prevlink->lli.NextCtrl = link->phys & DESCRIPTOR_NEXTCTRL_NEXT_MASK;

					/* the lli descriptor is always DMA_TO_DEVICE */
					dma_sync_single_for_device(chan2parent(chan),
							prevlink->phys,
							sizeof(prevlink->lli),
							DMA_TO_DEVICE);
					prevlink->synced = 1;
				} else {
					/* previous link was top of subchain */
					prevdesc->lli.NextCtrl = link->phys & DESCRIPTOR_NEXTCTRL_NEXT_MASK;
				}
				list_add_tail(&link->chain_node, &prevdesc->subchain);
				lli = &link->lli;
				prevlink = link;
				err = mv61vc_prep_slave_lli(lli, mv61vc, direction,
							flowcontrol, mem + wrapcount,
							linklen,
							total_len);
				if(err)
					goto err_cfg;

				__dev_vdbg(chan2dev(chan), "prep_dma_slave: link = %p\n",
										link);
				total_len += linklen;
				prevdesc->sublen += linklen;
			}
		}
	}

	if(lli) {
		lli->NextCtrl = (DESCRIPTOR_NEXTCTRL_INT_MASK | DESCRIPTOR_NEXTCTRL_STOP_MASK); /* This is the last descriptor so set the stop and int bit */
	}

	if(prevlink) {
		dma_sync_single_for_device(chan2parent(chan),
				prevlink->phys, sizeof(prevlink->lli),
				DMA_TO_DEVICE);
		prevlink->synced = 1;
	}

	/* TODO: change this to len per subchain? */
	firstdesc->len = total_len;

	return &firstdesc->txd;

err_cfg:
err_desc_get:
	mv61_desc_put(firstdesc);
	return NULL;
}

/**
 * mv61vc_prep_slave_linear - debug use *ONLY*
 * @buf: physical address of a contiguous dma buffer
 * @len: transfer length in bytes
 *
 * All other arguments are the same as mv61c_prep_slave_sg(), aka
 * chan->device->device_prep_slave_sg(). For debug use ONLY, may be
 * removed later.
 */
struct dma_async_tx_descriptor *
mv61vc_prep_slave_linear (struct dma_chan *chan, dma_addr_t buf,
		unsigned int len, enum dma_transfer_direction direction,
		unsigned long flags)
{
	struct mv61_vdma_chan	*mv61vc = dchan_to_mv61_vdma_chan(chan);
	struct mv61_desc	*desc;
	struct mv61_lli		*lli = NULL;
	enum mv61_dma_flow_ctrl	flowcontrol;
	int err;

	__dev_vdbg(chan2dev(chan), "prep_dma_slave_linear\n");

	if (unlikely(!mv61vc->def.valid || !len))
		return NULL;

	flowcontrol = MV61_CDMA_VAR_RD_FIELD(mv61vc->def.CFG,CDMA_CFG_FLOWCTRL);

	/* these descriptors will never be auto-loaded */
	__dev_vdbg(chan2dev(chan), "prep_dma_slave_linear: trans desc\n");
	desc = mv61_desc_get(mv61vc);
	if (!desc) {
		dev_err(chan2dev(chan),
			"not enough descriptors available\n");
		goto err_desc_get;
	}
	desc->txregs.CFG = mv61vc->def.CFG;
	desc->txregs.FillValue = 0;
	desc->txregs.TimerControl = mv61vc->def.TimerControl;
	desc->txregs.intEn = 0;
	if (mv61vc->def.TimerControl & CDMA_TIMERCONTROL_TIMERENABLE_MASK)
		MV61_CDMA_VAR_WR_FIELD(desc->txregs.intEn,
				CDMA_INTEN_TIMEOUTEN, 1);
	MV61_CDMA_VAR_WR_FIELD(desc->txregs.intEn,
				CDMA_INTEN_CHAINDONEEN, 1);
	MV61_CDMA_VAR_WR_FIELD(desc->txregs.intEn,
				CDMA_INTEN_CLEARCOMPLETEEN, 1);
	lli = &desc->lli;
	desc->chains++;
	err = mv61vc_prep_slave_lli(lli, mv61vc, direction, flowcontrol,
					buf, len, len);
	if(err)
		goto err_cfg;
	__dev_vdbg(chan2dev(chan), "prep_dma_slave_linear: desc = %p\n", desc);

	if(lli) {
		lli->NextCtrl = (DESCRIPTOR_NEXTCTRL_INT_MASK | DESCRIPTOR_NEXTCTRL_STOP_MASK); /* This is the last descriptor so set the stop and int bit */
	}

	desc->len = len;
	desc->sublen = len;

	return &desc->txd;

err_cfg:
err_desc_get:
	mv61_desc_put(desc);
	return NULL;
}
EXPORT_SYMBOL(mv61vc_prep_slave_linear);

/*
 * mv61vc_prep_slave_sg - dmaengine API for preparing peripheral dma transaction
 * @chan: dmaengine API subset of channel control structure
 * @sgl: scatterlist for source or destination memory
 * @sg_len: number of scatterlist entries
 * @direction: dma direction defined by linux/dma_mapping.h
 * @flags: dmaengine API flags
 */
static struct dma_async_tx_descriptor *
mv61vc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
		unsigned int sg_len, enum dma_transfer_direction direction,
		unsigned long flags, void *context)
{
	struct mv61_vdma_chan	*mv61vc = dchan_to_mv61_vdma_chan(chan);
	struct mv61_vdma	*mv61v = ddev_to_mv61_vdma(chan->device);
	struct mv61_desc	*desc;
	struct mv61_desc	*prevdesc = NULL;
	struct mv61_desc	*firstdesc = NULL;
	struct mv61_lli		*lli = NULL;
	struct mv61_chain	*prevlink = NULL;
	unsigned int		i;
	struct scatterlist	*sg;
	size_t			total_len = 0;
	enum mv61_dma_flow_ctrl	flowcontrol;

	/* wrapping is a big kludge, so just bail out to a new function */
	if(mv61vc->wrap)
		return(mv61vc_prep_slave_sg_wrap(chan, sgl, sg_len, direction, flags));

	__dev_vdbg(chan2dev(chan), "prep_dma_slave_sg\n");

	if (unlikely(!mv61vc->def.valid || !sg_len))
		return NULL;

	flowcontrol = MV61_CDMA_VAR_RD_FIELD(mv61vc->def.CFG,CDMA_CFG_FLOWCTRL);

	/* peripheral-to-peripheral isn't intended mode, but try to support it */
	if(flowcontrol == MV61_DMA_PERIPHERAL_TO_PERIPHERAL) {
		/* there shouldn't be a scatterlist for periph-to-periph */
		if(sgl) {
			dev_err(chan2dev(chan),
					"scatterlist should be null "
					"for periph-to-periph dma\n");
			return NULL;
		}
		/* no meaningful value for dma_transfer_direction here, ignore it */
		desc = mv61_desc_get(mv61vc);
		if (!desc) {
			dev_err(chan2dev(chan),
				"not enough descriptors available\n");
			goto err_desc_get;
		}
		desc->txregs.CFG = mv61vc->def.CFG;
		desc->txregs.FillValue = 0;
		desc->txregs.TimerControl = mv61vc->def.TimerControl;
		desc->txregs.intEn = 0;
		if (mv61vc->def.TimerControl & CDMA_TIMERCONTROL_TIMERENABLE_MASK)
			MV61_CDMA_VAR_WR_FIELD(desc->txregs.intEn,
						CDMA_INTEN_TIMEOUTEN, 1);
		MV61_CDMA_VAR_WR_FIELD(desc->txregs.intEn,
						CDMA_INTEN_CHAINDONEEN, 1);
		MV61_CDMA_VAR_WR_FIELD(desc->txregs.intEn,
						CDMA_INTEN_CLEARCOMPLETEEN, 1);
		lli = &desc->lli;
		total_len = sg_len; /* HACK: overloading the sg_len field */

		lli->SrcAddr = mv61vc->def.SrcAddr;
		lli->DestAddr = mv61vc->def.DestAddr;
		lli->NextCtrl = (DESCRIPTOR_NEXTCTRL_INT_MASK | DESCRIPTOR_NEXTCTRL_STOP_MASK); /* This is the last descriptor so set the stop and int bit */
		lli->OwnLength = total_len & DESCRIPTOR_OWNLENGTH_LENGTH_MASK; /* Bits 15:0 are the length */
		lli->OwnLength |= DESCRIPTOR_OWNLENGTH_OWN_MASK; /* Set the own bit to dma */

		firstdesc = desc;
		firstdesc->len = total_len;
		firstdesc->sublen = total_len;
		firstdesc->chains = 1;

		goto done_prep_slave;

	}

	/* need a descriptor for each scatterlist entry */
	for_each_sg(sgl, sg, sg_len, i) {
		struct mv61_chain	*link;
		u32		len;
		u32		mem;
		int err;
		mem = sg_phys(sg);
		len = sg_dma_len(sg);

		__dev_vdbg(chan2dev(chan), "prep_dma_slave: sg_virt=0x%p "
						"sg_phys=0x%08x len=%u\n",
						sg_virt(sg), mem, len);

		/*TODO: break up large scatterlist entries for shared transactions */
		if ((firstdesc == NULL) || (mv61v->vtype == MV61_VDMA_SHARED)) {
			/* these descriptors will never be auto-loaded */
			__dev_vdbg(chan2dev(chan), "prep_dma_slave: trans desc\n");
			desc = mv61_desc_get(mv61vc);
			if (!desc) {
				dev_err(chan2dev(chan),
					"not enough descriptors available\n");
				goto err_desc_get;
			}
			if ((firstdesc == NULL)) {
				firstdesc = desc;
			} else {
				list_add_tail(&desc->tx_list,
					&firstdesc->tx_list);
			}
			desc->txregs.CFG = mv61vc->def.CFG;
			desc->txregs.FillValue = 0;
			desc->txregs.TimerControl = mv61vc->def.TimerControl;
			desc->txregs.intEn = 0;
			if (mv61vc->def.TimerControl & CDMA_TIMERCONTROL_TIMERENABLE_MASK)
				MV61_CDMA_VAR_WR_FIELD(desc->txregs.intEn,
						CDMA_INTEN_TIMEOUTEN, 1);
			MV61_CDMA_VAR_WR_FIELD(desc->txregs.intEn,
						CDMA_INTEN_CHAINDONEEN, 1);
			MV61_CDMA_VAR_WR_FIELD(desc->txregs.intEn,
						CDMA_INTEN_CLEARCOMPLETEEN, 1);
			lli = &desc->lli;
			prevlink = NULL;
			prevdesc = desc;
			firstdesc->chains++;
			err = mv61vc_prep_slave_lli(lli, mv61vc, direction, flowcontrol,
							mem, len, total_len);
			if(err)
				goto err_cfg;
			__dev_vdbg(chan2dev(chan), "prep_dma_slave: desc = %p\n", desc);
		} else {
			/* these descriptors will always be auto-loaded */
			__dev_vdbg(chan2dev(chan), "prep_dma_slave: lli desc\n");
			link = mv61_link_get(mv61vc);
			if (!link) {
				dev_err(chan2dev(chan),
					"not enough descriptors available\n");
				goto err_desc_get;
			}
			BUG_ON(!prevdesc);
			prevdesc->links++;
			if(prevlink) {
				prevlink->lli.NextCtrl = link->phys & DESCRIPTOR_NEXTCTRL_NEXT_MASK;

				/* the lli descriptor is always DMA_TO_DEVICE */
				dma_sync_single_for_device(chan2parent(chan),
						prevlink->phys,
						sizeof(prevlink->lli),
						DMA_TO_DEVICE);
				prevlink->synced = 1;
			} else {
				/* previous link was top of subchain */
				prevdesc->lli.NextCtrl = link->phys & DESCRIPTOR_NEXTCTRL_NEXT_MASK;
			}
			list_add_tail(&link->chain_node, &prevdesc->subchain);
			lli = &link->lli;
			prevlink = link;
			err = mv61vc_prep_slave_lli(lli, mv61vc, direction, flowcontrol,
							mem, len, total_len);
			if(err)
				goto err_cfg;
		}
		prevdesc->sublen += len;
		total_len += len;
	}

	if(lli) {
		lli->NextCtrl = (DESCRIPTOR_NEXTCTRL_INT_MASK | DESCRIPTOR_NEXTCTRL_STOP_MASK); /* This is the last descriptor so set the stop and int bit */
	}

	if(prevlink) {
		dma_sync_single_for_device(chan2parent(chan),
				prevlink->phys, sizeof(prevlink->lli),
				DMA_TO_DEVICE);
		prevlink->synced = 1;
	}

	/* TODO: change this to len per subchain? */
	firstdesc->len = total_len;

done_prep_slave:
	return &firstdesc->txd;

err_cfg:
err_desc_get:
	mv61_desc_put(firstdesc);
	return NULL;
}

/**
 * mv61vc_terminate_dma - halt dma and flush fifo to memory
 * @mv61pc: physical channel control structure
 *
 * Call with mv61p->biglock held.
 *
 * This should normally happen very fast. The timeouts are long just to ensure they
 * never happen in normal operation. If it does time out, the hardware is probably
 * fatally broken, but try to avoid hanging just to facilitate debug.
 */
static void mv61vc_terminate_dma(struct mv61_vdma_chan *mv61vc,
				struct mv61_pdma_chan *mv61pc,
				enum mv61_dma_flow_ctrl flowcontrol, int flags)
{
	struct mv61_pdma_chan_regs 	*pcregs = NULL;
	struct mv61_desc		*desc;
	u32				tmp;
	int				i;

	if(!mv61pc)
		return;

	pcregs = mv61pc->ch_regs;
	if(!pcregs)
		return;

	tmp = readl(&pcregs->CFG);
	if(!(tmp & CDMA_CFG_ENABLE_MASK))
		return;

	if(flags & MV61_PDMA_FLUSH) {
		int timeout_us = 100;

		/* set halt to flush fifo and/or complete current chain fetch */
		writel(tmp | (CDMA_CFG_HALT_MASK), &pcregs->CFG);
		wmb();

		/* if the destination is memory, wait for fifo to flush */
		if((flowcontrol == MV61_DMA_PERIPHERAL_TO_MEMORY) ||
				(flowcontrol == MV61_DMA_MEMORY_TO_MEMORY)) {

			/* This should never time out unless a bus is hung. */
			for(i = 0; i < timeout_us; i++) {
				int done_mask = (CDMA_STATUS_DATABUFEMPTY_MASK |
						CDMA_STATUS_RDCMDBUFEMPTY_MASK |
						CDMA_STATUS_WRCMDBUFEMPTY_MASK);
				if(readl(&pcregs->Status) & done_mask ) {
					break;
				}
				else {
					udelay(1);
				}
			}
			if(i == timeout_us) {
				WARN(1, "Timeout halting cdma physical "
						"channel %d\n", mv61pc->index);
			}
		}
	}

	/* get the transaction descriptor for the current dma op */
	desc = mv61vc_first_active(mv61vc);
	BUG_ON(!desc);
	desc = desc->active;
	BUG_ON(!desc);

	mv61vc->hwstat.Status = readl(&pcregs->Status);
	mv61vc->hwstat.CPR = readl(&pcregs->CPR);
	mv61vc->hwstat.CDR = readl(&pcregs->CDR);
	mv61vc->hwstat.Control = readl(&pcregs->Control);

	/* now disable and wait for last mem operation to finish */
	tmp = readl(&pcregs->CFG);
	tmp &= ~(CDMA_CFG_HALT_MASK);
	tmp &= ~(CDMA_CFG_ENABLE_MASK);
	writel(tmp, &pcregs->CFG);
	wmb();

	/* This should never time out unless a bus is hung. */
	for(i = 0; i < 100; i++) {
		if(!(readl(&pcregs->CFG) & CDMA_CFG_ENABLE_MASK)) {
			break;
		}
		else {
			udelay(1);
		}
	}
	if(i == 100) {
			WARN(1, "Timeout disabling cdma physical "
					"channel %d\n", mv61pc->index);
	}

	/* clean up the interrupts */
	writel(0, &pcregs->intEn);
	writel(~0, &pcregs->intAck);

	if(flags & MV61_PDMA_CLEAR) {
		/* restore channel to reset state */
		mv61_clear_pchannel(mv61pc);
	}

	return;
}

/**
 * mv61vc_device_control - dmaengine API for channel control
 * @chan: dma channel control structure defined by API
 * @cmd:
 * @arg:
 *
 * Pausing is supported for both transmitting and receiving to allow
 * loopback testing. However, this implementation is primarily intended
 * for receivers.
 *
 * Pausing a transmitter is likely to result in data loss unless the peripheral
 * can accept all the data in the fifo immediately. There is no way to determine
 * how much data is left in the fifo if it is not empty, so there's no way to
 * know how much data to resend.
 *
 * Call with all locks released.
 */
static int mv61vc_device_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
							unsigned long arg)
{
	struct mv61_vdma_chan		*mv61vc = dchan_to_mv61_vdma_chan(chan);
	struct mv61_vdma		*mv61v = mv61vc->mv61v;
	struct mv61_dma			*mv61p = mv61v->mv61p;
	struct mv61_pdma_chan		*mv61pc;
	LIST_HEAD(flush_list);
	u32				tmp;
	unsigned long 			biglockflags;
	unsigned long 			lockvcflags;
	enum mv61_dma_flow_ctrl		flowcontrol;
	int err = 0;

	__dev_vdbg(chan2dev(chan), "mv61vc_device_control:%d\n", __LINE__);

	spin_lock_irqsave(&mv61p->biglock, biglockflags);
	spin_lock_irqsave(&mv61vc->lock, lockvcflags);

	flowcontrol = MV61_CDMA_VAR_RD_FIELD(mv61vc->def.CFG,CDMA_CFG_FLOWCTRL);

	switch(cmd) {
	case DMA_TERMINATE_ALL:
		mv61vc->status = DMA_COMPLETE;
		mv61vc->irqs = 0;
		break;
	case DMA_PAUSE:
		if(mv61v->vtype != MV61_VDMA_OWNED &&
				mv61v->vtype != MV61_VDMA_CYCLIC) {
			err = -EINVAL;
			goto mv61vc_device_control_unlock_done;
		}
		if(mv61vc->status == DMA_PAUSED) {
			goto mv61vc_device_control_unlock_done;
		}
		mv61vc->status = DMA_PAUSED;
		break;
	case DMA_RESUME:
		if(mv61v->vtype != MV61_VDMA_OWNED &&
				mv61v->vtype != MV61_VDMA_CYCLIC) {
			err = -EINVAL;
			goto mv61vc_device_control_unlock_done;
		}
		if((mv61vc->status != DMA_PAUSED) && (mv61vc->status != DMA_ERROR))  {
			goto mv61vc_device_control_unlock_done;
		}

		mv61vc_resume_dma(mv61vc, flowcontrol);
		goto mv61vc_device_control_unlock_done;
	default:
		err = -EINVAL;
		goto mv61vc_device_control_unlock_done;
	}

	mv61pc = mv61_vpmap_v_to_p(mv61vc);

	switch(mv61vc->status) {
	case DMA_PAUSED:
		__dev_vdbg(chan2dev(chan), "mv61vc_device_control:%d\n", __LINE__);
		if(mv61pc) {
			mv61vc_terminate_dma(mv61vc, mv61pc, flowcontrol,
					(MV61_PDMA_FLUSH | MV61_PDMA_CLEAR));
		}
		/* fall through */
	//case DMA_ERROR:
		mv61vc->residue = mv61vc_check_residue(mv61vc);
		if(mv61vc->residue == 0) {
			/*
			 * In the DMA_PAUSED state, residue == 0 is ambiguous.
			 * It can mean that the transaction actually completed
			 * before it was paused, but per the API, it can also
			 * mean that residue reporting is not supported.
			 *
			 * Detecting a completed transaction can
			 * resolve the ambiguity IF a callback was registered.
			 */
			mv61_retire_tx(mv61vc);
			spin_unlock_irqrestore(&mv61vc->lock, lockvcflags);
			spin_unlock_irqrestore(&mv61p->biglock, biglockflags);
			/*
			 * Process all complete descriptors.
			 */
			mv61vc_process_desc_list(mv61vc, &mv61vc->complete_list);
			goto mv61vc_device_control_done;
		}
		else {
			if(mv61vc->residue < 0)
				mv61vc->residue = 0;

			goto mv61vc_device_control_unlock_done;
		}

	default:
		__dev_vdbg(chan2dev(chan), "mv61vc_device_control:%d\n", __LINE__);
		if(mv61pc) {
			mv61vc_terminate_dma(mv61vc, mv61pc, flowcontrol,
					(MV61_PDMA_FLUSH | MV61_PDMA_CLEAR));
		}
		/* Cancel remaining transactions */

		/* active_list entries will end up before queued entries */
		if(!list_empty(&mv61vc->queue))
			list_splice_init(&mv61vc->queue, &flush_list);
		if(!list_empty(&mv61vc->active_list))
			list_splice_init(&mv61vc->active_list, &flush_list);

		if(mv61v->vtype == MV61_VDMA_SHARED) {
			do {
				tmp = mv61_dispatch_drop(mv61v->mv61p, mv61vc->chan.chan_id);
			} while (tmp);

			if(mv61pc) {
				mv61_dispatch_free_pchan(mv61v->mv61p, mv61pc->index);
				mv61_vpmap_unpair(mv61vc, mv61pc);
				mv61pc = NULL;
			}
		}

		mv61vc->status = DMA_COMPLETE;

		spin_unlock_irqrestore(&mv61vc->lock, lockvcflags);
		spin_unlock_irqrestore(&mv61p->biglock, biglockflags);

		/*
		 * Process all complete descriptors.
		 */
		mv61vc_process_desc_list(mv61vc, &mv61vc->complete_list);

		/*
		 * Flush all pending and queued descriptors.
		 */
		mv61vc_flush_desc_list(mv61vc, &flush_list);
		goto mv61vc_device_control_done;
	}

mv61vc_device_control_unlock_done:
	spin_unlock_irqrestore(&mv61vc->lock, lockvcflags);
	spin_unlock_irqrestore(&mv61p->biglock, biglockflags);

mv61vc_device_control_done:
	return err;
}

/**
 * mv61vc_tx_status - dmaengine API for checking transaction status
 * @chan: dma channel control structure defined by API
 * @cookie: transaction cookie defined by API
 * @txstate: transaction status defined by API as of kernel 2.6.35
 *
 * Call with mv61vc->lock released.
 */
static enum dma_status
mv61vc_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
						struct dma_tx_state *txstate)
{
	struct mv61_vdma_chan	*mv61vc = dchan_to_mv61_vdma_chan(chan);
	struct mv61_vdma	*mv61v = mv61vc->mv61v;
	struct mv61_dma	*mv61p = mv61v->mv61p;
	dma_cookie_t		last_used;
	dma_cookie_t		last_complete;
	int			cookie_status;
	unsigned long 		biglockflags;
	unsigned long 		lockvcflags;
	int 			residue;
	enum dma_status		status;

	if(vdebug) {
		struct mv61_pdma_chan	*mv61pc;
		struct mv61_pdma_chan_regs *pcregs = NULL;

		spin_lock_irqsave(&mv61p->biglock, biglockflags);
		spin_lock_irqsave(&mv61vc->lock, lockvcflags);

		mv61pc = mv61_vpmap_v_to_p(mv61vc);

		__dev_vdbg(chan2dev(chan), "mv61vc_tx_status chan=0x%p, "
					"cookie=0x%08x, irq count=%u\n",
					chan, cookie, mv61p->irq_call_cnt);

		if(mv61p->CDMAInt) {
			printk(KERN_NOTICE "polled cdma int status=0x%x\n",
					readl(mv61p->CDMAInt));
		}
		if(mv61pc) {
			pcregs = mv61pc->ch_regs;
			mv61_dump_pchan(pcregs);
		}

		spin_unlock_irqrestore(&mv61vc->lock, lockvcflags);
		spin_unlock_irqrestore(&mv61p->biglock, biglockflags);
	}

	spin_lock_irqsave(&mv61p->biglock, biglockflags);
	spin_lock_irqsave(&mv61vc->lock, lockvcflags);

	last_complete = mv61vc->completed;
	last_used = chan->cookie;

	cookie_status = dma_async_is_complete(cookie, last_complete, last_used);
	switch(mv61vc->status) {
	case DMA_PAUSED:
	case DMA_ERROR:
		residue = mv61vc->residue;
		if(dumppausedesc)
			mv61vc_pause_dump(mv61vc);
		break;
	case DMA_IN_PROGRESS:
		residue = mv61vc_check_residue_running(mv61vc);
		if(residue < 0) {
			residue = 0;
		}
		break;
	default:
		residue = 0;
		break;
	}

	/* The api is a little unclear on what to report if the cookie
	 * has not been started yet. For now, assume that reporting
	 * residue for the wrong cookie would be useless, but reporting
	 * error or paused is useful regardless of the cookie.
	 */
	if(cookie_status == DMA_IN_PROGRESS) {
		if(cookie != mv61vc->started)
			residue = 0;
		if(mv61vc->status != DMA_COMPLETE)
			status = mv61vc->status;
		else
			status = cookie_status;
	}
	else {
		status = DMA_COMPLETE;
		residue = 0;
	}
	spin_unlock_irqrestore(&mv61vc->lock, lockvcflags);
	spin_unlock_irqrestore(&mv61p->biglock, biglockflags);

	/*
	 * Residue determination has not been implemented, so
	 * set it to 0 per the API.
	 */
	dma_set_tx_state(txstate, last_complete, last_used, residue);

	return status;
}

/**
 * mv61vc_issue_pending - dmaengine API for starting the queue
 * @chan: dma channel control structure defined by API
 *
 * Call with mv61vc->lock released.
 */
static void mv61vc_issue_pending(struct dma_chan *chan)
{
	struct mv61_vdma_chan	*mv61vc = dchan_to_mv61_vdma_chan(chan);

	__dev_vdbg(chan2dev(chan), "issue_pending chan=%p\n", chan);
	mv61vc = dchan_to_mv61_vdma_chan(chan);

	/* locks handled inside the call */
	mv61vc_issue_queue(ddev_to_mv61_vdma(chan->device), mv61vc);

	__dev_vdbg(chan2dev(chan), "issue_pending completed, chan=%p \n", chan);
}

/**
 * mv61vc_alloc_chan_resources - dmaengine API for initializing a channel
 * @chan: dma channel control structure defined by API
 *
 * Call with mv61vc->lock released.
 */
static int mv61vc_alloc_chan_resources(struct dma_chan *chan)
{
	struct mv61_vdma_chan	*mv61vc = dchan_to_mv61_vdma_chan(chan);
	struct mv61_dma_slave	*mv61s;
	unsigned long 		lockvcflags;

	__dev_vdbg(chan2dev(chan), "alloc_chan_resources\n");

	spin_lock_irqsave(&mv61vc->lock, lockvcflags);

	/* ASSERT:  channel is idle */
	BUG_ON(!list_empty(&mv61vc->queue));
	BUG_ON(!list_empty(&mv61vc->active_list));
	BUG_ON(!list_empty(&mv61vc->complete_list));

	mv61vc->completed = chan->cookie = 1;

	mv61s = chan->private;
	if (mv61s) {
		/*
		 * We need controller-specific data to set up slave
		 * transfers.
		 */
		mv61vc->wrap = mv61s->wrap;
		mv61vc->def.valid = 1;
		mv61vc->def.TimerControl = 0;
		MV61_CDMA_VAR_WR_FIELD(mv61vc->def.SrcAddr, CDMA_SRCADDR_SRCADDR,
								mv61s->src_reg);
		MV61_CDMA_VAR_WR_FIELD(mv61vc->def.DestAddr, CDMA_DESTADDR_DESTADDR,
								mv61s->dest_reg);
		if(mv61s->timer) {

			MV61_CDMA_VAR_WR_FIELD(mv61vc->def.TimerControl,
					CDMA_TIMERCONTROL_TIMERENABLE, 1);
			MV61_CDMA_VAR_WR_FIELD(mv61vc->def.TimerControl,
					CDMA_TIMERCONTROL_TIMEBASE,
					mv61s->timebase);
			MV61_CDMA_VAR_WR_FIELD(mv61vc->def.TimerControl,
					CDMA_TIMERCONTROL_COUNT,
					mv61s->timer);
		}

		mv61vc->def.Control = 0;
		mv61vc->def.CFG = 0;
		MV61_CDMA_VAR_WR_FIELD(mv61vc->def.CFG,CDMA_CFG_WRDELAY,
							mv61s->wr_delay);
		MV61_CDMA_VAR_WR_FIELD(mv61vc->def.CFG,CDMA_CFG_DATAUNITSIZE,
							mv61s->data_unit_size);
		MV61_CDMA_VAR_WR_FIELD(mv61vc->def.CFG,CDMA_CFG_FLOWCTRL,
							mv61s->flowctrl);
		MV61_CDMA_VAR_WR_FIELD(mv61vc->def.CFG,CDMA_CFG_DESTPID,
							mv61s->dest_pid);
		MV61_CDMA_VAR_WR_FIELD(mv61vc->def.CFG,CDMA_CFG_SRCPID,
							mv61s->src_pid);
		MV61_CDMA_VAR_WR_FIELD(mv61vc->def.CFG,CDMA_CFG_DESTENDIAN,
							mv61s->destendian);
		MV61_CDMA_VAR_WR_FIELD(mv61vc->def.CFG,CDMA_CFG_SRCENDIAN,
							mv61s->srcendian);

		MV61_CDMA_VAR_WR_FIELD(mv61vc->def.Control,CDMA_CONTROL_DESTADDRINC,
							mv61s->dest_addr_inc);
		MV61_CDMA_VAR_WR_FIELD(mv61vc->def.Control,CDMA_CONTROL_SRCADDRINC,
							mv61s->src_addr_inc);
		MV61_CDMA_VAR_WR_FIELD(mv61vc->def.Control,CDMA_CONTROL_DESTXFERWIDTH,
							mv61s->dest_width);
		MV61_CDMA_VAR_WR_FIELD(mv61vc->def.Control,CDMA_CONTROL_SRCXFERWIDTH,
							mv61s->src_width);
		MV61_CDMA_VAR_WR_FIELD(mv61vc->def.Control,CDMA_CONTROL_DESTBURSTSIZE,
							mv61s->dest_burst);
		MV61_CDMA_VAR_WR_FIELD(mv61vc->def.Control,CDMA_CONTROL_SRCBURSTSIZE,
							mv61s->src_burst);
	}

	spin_unlock_irqrestore(&mv61vc->lock, lockvcflags);
	return 0;
}

/**
 * mv61vc_free_chan_resources - dmaengine API for releasing a channel
 * @chan: dma channel control structure defined by API
 *
 * Call with all locks released.
 */
static void mv61vc_free_chan_resources(struct dma_chan *chan)
{
	struct mv61_vdma_chan	*mv61vc = dchan_to_mv61_vdma_chan(chan);
	LIST_HEAD(list);
	unsigned long 		lockvcflags;

	__dev_vdbg(chan2dev(chan), "free_chan_resources:%d\n", __LINE__);

	mv61vc_device_control(chan, DMA_TERMINATE_ALL, 0);

	spin_lock_irqsave(&mv61vc->lock, lockvcflags);

	/* ASSERT:  channel is idle */
	BUG_ON(!list_empty(&mv61vc->queue));
	BUG_ON(!list_empty(&mv61vc->active_list));
	BUG_ON(!list_empty(&mv61vc->complete_list));

	mv61vc->chan.private = 0;
	mv61vc->def.valid = 0;
	mv61vc->chan.cookie = mv61vc->completed = 1;
	spin_unlock_irqrestore(&mv61vc->lock, lockvcflags);
}

/**
 * mv61_vpmap_dispatch_init - initialize the channel mapping and dispatcher
 * @mv61v: this top virtual dma control instance
 * @pdata: platform data of physical device
 *
 * Still single-threaded when this is called, but lock to be consistent.
 *
 * Physical channels are assigned in contiguous blocks.
 * All virtual channels that are not SHARED that are directly mapped to
 * corresponding physical channels.
 */
static void __init mv61_vpmap_dispatch_init (struct mv61_vdma *mv61v,
					struct mv61_dma_platform_data *pdata)
{
	struct mv61_dma		*mv61p = mv61v->mv61p; /* top phys dma ctrl */
	struct mv61_pdma_chan	*mv61pc;
	struct mv61_vdma_chan	*mv61vc = NULL;
	struct mv61_dma_vpmap	*vpmap = mv61p->vpmap;
	int			vid = mv61v->vtype;
	int			i;
	/* offsets within tables for this instance */
	int 			firstp = 0;
	int 			firstv = 0;
	/* absolute index into tables */
	int 			vindex;
	int			pindex;
	unsigned long 		biglockflags;

	spin_lock_irqsave(&mv61p->biglock, biglockflags);

	for (i = 0; i < vid; i++) {
		firstp += pdata->nr_pool_chans[i];
		firstv += pdata->nr_virt_chans[i];
	}

	vpmap->voffset[vid] = firstv;

	for(i = 0; i < pdata->nr_virt_chans[vid]; i++) {
		vindex = i + firstv;
		vpmap->v_to_p[vindex] = NULL;
	}

	for (i = 0; i < pdata->nr_pool_chans[vid]; i++) {
		vindex = i + firstv;
		pindex = i + firstp;

		mv61pc = &mv61p->chan[pindex];
		mv61pc->vtype = vid;

		if(vid == MV61_VDMA_SHARED) {
			/* mark the physical channel as available */
			mv61_dispatch_free_pchan(mv61p, mv61pc->index);
			vpmap->p_to_v[pindex] = NULL;
		}
		else {
			/* cross link the physical and virtual channels */
			mv61vc = &mv61v->chan[i];

			vpmap->v_to_p[vindex] = mv61pc;
			vpmap->p_to_v[pindex] = mv61vc;
		}
	}

	spin_unlock_irqrestore(&mv61p->biglock, biglockflags);
}

/**
 * mv61_init_channels - initialize physical channels and channel allocation
 * @mv61p: top physical dma control
 * @pdata: platform data of physical device
 *
 * Still single-threaded when this is called, but lock to be consistent.
 *
 * Physical channels are assigned in contiguous blocks.
 * All virtual channels that are not SHARED that are directly mapped to
 * corresponding physical channels.
 */
static int __init mv61_init_channels(struct mv61_dma *mv61p,
			struct mv61_dma_platform_data *pdata)
{
	int 		i;
	int 		chans_avail = mv61p->pchannels;
	int 		ret = 0;
	unsigned long 	biglockflags;

	if(chans_avail > MV61_DMA_MAX_NR_VCHANNELS) {
		ret = -EINVAL;
		goto done;
	}

	spin_lock_irqsave(&mv61p->biglock, biglockflags);

	for (i = 0; i < chans_avail; i++) {
		struct mv61_pdma_chan	*mv61pc = &mv61p->chan[i];
		mv61pc->index = i;
		mv61pc->mv61p = mv61p;

		mv61pc->ch_regs = mv61p->ch_regs[i];
		mv61pc->vtype = MV61_VDMA_UNASSIGNED;

		mv61_clear_pchannel(mv61pc);
	}

	pdata->nr_pool_chans[MV61_VDMA_OWNED] = (max_owned > chans_avail) ?
							chans_avail :
							max_owned;
	chans_avail -= pdata->nr_pool_chans[MV61_VDMA_OWNED];
	pdata->nr_virt_chans[MV61_VDMA_OWNED] = pdata->nr_pool_chans[MV61_VDMA_OWNED];

	if(chans_avail) {
		pdata->nr_pool_chans[MV61_VDMA_SHARED] = max_shared > chans_avail ?
								chans_avail :
								max_shared;
		chans_avail -= pdata->nr_pool_chans[MV61_VDMA_SHARED];
		pdata->nr_virt_chans[MV61_VDMA_SHARED] = max_vshared;
	} else {
		pdata->nr_pool_chans[MV61_VDMA_SHARED] = 0;
		pdata->nr_virt_chans[MV61_VDMA_SHARED] = 0;
	}

	if(chans_avail) {
		pdata->nr_pool_chans[MV61_VDMA_CYCLIC] = max_cyclic > chans_avail ?
								chans_avail :
								max_cyclic;
		chans_avail -= pdata->nr_pool_chans[MV61_VDMA_CYCLIC];
		pdata->nr_virt_chans[MV61_VDMA_CYCLIC] =
					pdata->nr_pool_chans[MV61_VDMA_CYCLIC];
	} else {
		pdata->nr_pool_chans[MV61_VDMA_CYCLIC] = 0;
		pdata->nr_virt_chans[MV61_VDMA_CYCLIC] = 0;
	}

	if(chans_avail) {
		pdata->nr_pool_chans[MV61_VDMA_MEMOPS] = max_memops > chans_avail ?
								chans_avail :
								max_memops;
		chans_avail -= pdata->nr_pool_chans[MV61_VDMA_MEMOPS];
		pdata->nr_virt_chans[MV61_VDMA_MEMOPS] =
					pdata->nr_pool_chans[MV61_VDMA_MEMOPS];
	} else {
		pdata->nr_pool_chans[MV61_VDMA_MEMOPS] = 0;
		pdata->nr_virt_chans[MV61_VDMA_MEMOPS] = 0;
	}

	if(pdata->nr_virt_chans[MV61_VDMA_SHARED] > MV61_DMA_MAX_NR_VCHANNELS)
		ret = -EINVAL;
	else
		ret = 0;

	spin_unlock_irqrestore(&mv61p->biglock, biglockflags);

done:
	return ret;
}

/**
 * mv61_dma_probe - initialize the physical dma support and dispatcher
 * @pdev: physical dma conroller platform device
 *
 * Provides the hardware layer and channel pool dispatcher to
 *
 */
static int mv61_dma_probe(struct platform_device *pdev)
{
	struct mv61_dma_platform_data *pdata;
	struct mv61_dma		*mv61p;
	size_t			size;
	int			irq;
	int			err;
	int			i;
	struct device_node *node = pdev->dev.of_node;
	int			max_temp;
	void __iomem		*base;
	int			ctr0;
	int			channel_offset;
	struct resource		*res;
	int reva = 0;

	if(vdebug)
		dev_printk(KERN_INFO, &pdev->dev, "Central DMA "
							"driver probe\n");

	pdata = devm_kzalloc(&pdev->dev, sizeof(struct mv61_dma_platform_data), GFP_KERNEL);
	if(!pdata)
		return -ENOMEM;

	pdev->dev.platform_data = pdata;

	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
	base = devm_ioremap_resource(&pdev->dev, res);
	if (IS_ERR(base)) {
		dev_dbg(&pdev->dev, "Couldn't get base register address!!\n");
		return PTR_ERR(base);
	}

	pdata->nr_channels = readl(base + CDMAPR_OFFSET);
	if (pdata->nr_channels > MV61_DMA_MAX_NR_PCHANNELS || pdata->nr_channels <= 0) {
		dev_dbg(&pdev->dev, "Invalid number of channels %d\n", pdata->nr_channels);
		return -EINVAL;
	}

	ctr0 = readl(base + CTR0_OFFSET);
	switch(ctr0) {
		case 0x00010001:
			channel_offset = 0x1000;
			reva = 1;
			break;
		case 0x00020004:
		case 0x00020005:
		default:
			channel_offset = 0x100;
			break;
	}

	if (!of_property_read_u32(node, "max_owned", &max_temp))
		max_owned = max_temp;

	if (!of_property_read_u32(node, "max_shared", &max_temp))
		max_shared = max_temp;

	if (!of_property_read_u32(node, "max_cyclic", &max_temp))
		max_cyclic = max_temp;

	if (!of_property_read_u32(node, "max_memops", &max_temp))
		max_memops = max_temp;

	irq = irq_of_parse_and_map(node, 0);
	if (irq < 0)
		return irq;

	size = sizeof(struct mv61_dma);
	size += pdata->nr_channels * sizeof(struct mv61_pdma_chan);

	mv61p = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);

	if (!mv61p)
		return -ENOMEM;

	spin_lock_init(&mv61p->biglock);
	spin_lock_init(&mv61p->all_chains_lock);
	INIT_LIST_HEAD(&mv61p->all_chains);
	mv61p->dev = &pdev->dev;

	mv61p->CDMAInt = base;
	for (i = 0; i < pdata->nr_channels; i++) {
		base += channel_offset;
		mv61p->ch_regs[i] = base;
	}

	err = request_irq(irq, mv61_dma_interrupt, 0, MV_CDMA_NAME, mv61p);
	if (err)
		goto err_irq;

	platform_set_drvdata(pdev, mv61p);

	tasklet_init(&mv61p->tasklet, mv61_dma_tasklet, (unsigned long)mv61p);

	mv61p->pchannels = pdata->nr_channels;
	err = mv61_init_channels(mv61p, pdata);
	if(err)
		goto err_release_m;

	/* Clear/disable all interrupts on all channels. */
	/*TODO*/

	/*
	 * create the dispatcher data to be used by interrupt handler,
	 * channel flags will be initialized by virtual controller
	 */
	size = sizeof(struct mv61_dma_dispatch);
	size += pdata->nr_virt_chans[MV61_VDMA_SHARED] * sizeof(int);

	mv61p->dispatch = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
	if(!mv61p->dispatch) {
		err = -ENOMEM;
		goto err_release_m;
	}
	mv61p->dispatch->chancnt = pdata->nr_virt_chans[MV61_VDMA_SHARED];
	mv61p->dispatch->regcnt = mv61p->dispatch->chancnt >> DISPATCH_REGSHIFT;

	/*
	 * Create the virtual to physical channel map.
	 */
	size = sizeof(struct mv61_dma_vpmap);
	for(i = 0; i < MV61_NR_VDMA_CONTROLLERS; i++)
		size += pdata->nr_virt_chans[i] * sizeof(int);

	mv61p->vpmap = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
	if(!mv61p->vpmap) {
		err = -ENOMEM;
		goto err_release_m;
	}

	/* transaction descriptors will not be DMAed, so no special requirments */
	mv61p->desc_cachep = kmem_cache_create(desc_cache_name,
					sizeof(struct mv61_desc), sizeof(u32), 0,
					(void *)NULL);
	/* linked list descriptors will need to be synced for DMA */
	mv61p->chain_cachep  = kmem_cache_create(chain_cache_name,
					sizeof(struct mv61_chain),
					cache_line_size(),
					(SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA),
					(void *)NULL);
	if(!mv61p->desc_cachep || !mv61p->chain_cachep) {
		err = -ENOMEM;
		goto err_release_m;
	}

	mv61p->reva = reva;

	return 0;

err_irq:
err_release_m:
	free_irq(platform_get_irq(pdev, 0), mv61p);
	tasklet_kill(&mv61p->tasklet);

	if(mv61p->chain_cachep)
		kmem_cache_destroy(mv61p->chain_cachep);
	if(mv61p->desc_cachep)
		kmem_cache_destroy(mv61p->desc_cachep);

	platform_set_drvdata(pdev, NULL);
	return err;
}

/**
 * mv61_vdma_probe - initialize the virtual dma interface for dmaengine API
 * @pdev: instance of a virtual dma controller platform device
 *
 * Each virtual controller provides unique capabilities:
 *	MV61_VDMA_OWNED  provides dedicated scatter-gather peripheral channels
 *	MV61_VDMA_SHARED provides shared scatter-gather peripheral channels
 *	MV61_VDMA_CYCLIC provides cyclic dma support for audio
 *	MV61_VDMA_MEMOPS provides memcpy and memset services for the kernel
 *
 * This will be called once for each virtual controller instance. I don't think
 * the calls can happen in parallel, but use mv61p->vprobelock to be sure.
 */
static int mv61_vdma_probe(struct platform_device *pdev)
{
	struct platform_device *mv61_pdev;	/* ptr to real pool device */
	struct mv61_dma_platform_data *pdata;	/* ptr to pool device data */
	struct mv61_vdma_platform_data *vpdata; /* ptr to our own data */
	struct mv61_vdma	*mv61v;		/* virtual driver data */
	struct mv61_dma		*mv61p;		/* pool driver data */
	int			vid;	/* virt dev instance type/id */
	size_t			size;
	int			i;
	int 			ret;
	struct device_node *node = pdev->dev.of_node;
	struct device_node *cdma_node;

	if(vdebug)
		dev_printk(KERN_INFO, &pdev->dev, "Marvell Virtual DMA "
							"driver probe\n");
	cdma_node = of_find_node_by_name(NULL, "cdma");
	if(cdma_node) {
		mv61_pdev = of_find_device_by_node(cdma_node);
	} else {
		dev_printk(KERN_INFO, &pdev->dev, "Cannot find cdma node\n");
		return -EINVAL;
	}

	if (of_property_read_u32(node, "id", &vid)) {
		dev_printk(KERN_INFO, &pdev->dev, "Cannot find node id\n");
		return -EINVAL;
	}

	vpdata = kzalloc(sizeof(struct mv61_vdma_platform_data), GFP_KERNEL);
	if (!vpdata) {
		ret = -ENOMEM;
		goto vdma_probe_done;
	}
	pdev->dev.platform_data = vpdata;
	pdev->id = vid;
	pdata = mv61_pdev->dev.platform_data;
	/*mv61 = mv61_pdev->dev.p->driver_data;*/
	mv61p = platform_get_drvdata(mv61_pdev);
	if(!mv61p) {
		ret = -ENXIO;
		goto vdma_probe_done;
	}

	mv61p->mv61v[vid] = NULL;

	if (!pdata) {
		ret = -EINVAL;
		goto vdma_probe_done;
	}
	if(!pdata->nr_virt_chans[vid]) {
		ret = 0;
		goto vdma_probe_done;
	}
	if(!pdata->nr_pool_chans[vid]) {
		ret = -EINVAL;
		goto vdma_probe_done;
	}
	if((vid != MV61_VDMA_SHARED) && (pdata->nr_virt_chans[vid] !=
						pdata->nr_pool_chans[vid])) {
		ret = -EINVAL;
		goto vdma_probe_done;
	}
	size = sizeof(struct mv61_vdma);
	size += pdata->nr_virt_chans[vid] * sizeof(struct mv61_vdma_chan);

	mv61v = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);

	if (!mv61v) {
		ret = -ENOMEM;
		goto vdma_probe_done;
	}

	mv61v->mv61p = mv61p;
	mv61v->vtype = vid;

	switch(vid) {
	case MV61_VDMA_OWNED:
	case MV61_VDMA_SHARED:
		dma_cap_set(DMA_SLAVE, mv61v->dma.cap_mask);
		break;
	case MV61_VDMA_CYCLIC:
		dma_cap_set(DMA_CYCLIC, mv61v->dma.cap_mask);
		break;
	case MV61_VDMA_MEMOPS:
		dma_cap_set(DMA_MEMCPY, mv61v->dma.cap_mask);
		break;
	default:
		ret = -EINVAL;
		goto vdma_probe_done;
	}

	platform_set_drvdata(pdev, mv61v);
	mv61p->mv61v[vid] = mv61v;

	INIT_LIST_HEAD(&mv61v->dma.channels);
	for (i = 0; i < pdata->nr_virt_chans[vid]; i++, mv61v->dma.chancnt++) {
		struct mv61_vdma_chan	*mv61vc = &mv61v->chan[i];
		mv61vc->mv61v = mv61v;
		mv61vc->def.valid = 0;
		mv61vc->status = DMA_COMPLETE;
		mv61vc->chan.device = &mv61v->dma;
		mv61vc->chan.cookie = mv61vc->completed = 1;
		mv61vc->chan.chan_id = i;
		mv61vc->residue = 0;

		list_add_tail(&mv61vc->chan.device_node, &mv61v->dma.channels);
		spin_lock_init(&mv61vc->lock);
		INIT_LIST_HEAD(&mv61vc->active_list);
		INIT_LIST_HEAD(&mv61vc->queue);
		INIT_LIST_HEAD(&mv61vc->complete_list);
	}


	mv61_vpmap_dispatch_init(mv61v, pdata);

	mv61v->dma.dev = &pdev->dev;
	if(dma_has_cap(DMA_MEMCPY, mv61v->dma.cap_mask))
		mv61v->dma.device_prep_dma_memcpy = mv61vc_prep_dma_memcpy;
	if(dma_has_cap(DMA_SLAVE, mv61v->dma.cap_mask))
		mv61v->dma.device_prep_slave_sg = mv61vc_prep_slave_sg;
	if(dma_has_cap(DMA_CYCLIC, mv61v->dma.cap_mask))
		mv61v->dma.device_prep_dma_cyclic = mv61vc_prep_dma_cyclic;
	mv61v->dma.device_control = mv61vc_device_control;
	mv61v->dma.device_alloc_chan_resources = mv61vc_alloc_chan_resources;
	mv61v->dma.device_free_chan_resources = mv61vc_free_chan_resources;
	mv61v->dma.device_tx_status = mv61vc_tx_status;
	mv61v->dma.device_issue_pending = mv61vc_issue_pending;
	mv61v->dma.copy_align = MV61_MEMCPY_ALIGN;
	mv61v->dma.fill_align = MV61_MEMFILL_ALIGN;

	{
		char *description = NULL;

		switch(vid) {
		case MV61_VDMA_OWNED:
			description = "owned";
			break;
		case MV61_VDMA_SHARED:
			description = "shared";
			break;
		case MV61_VDMA_CYCLIC:
			description = "cyclic";
			break;
		case MV61_VDMA_MEMOPS:
			description = "memops";
			break;
		default:
			description = "unknown";
			break;
		}

		dev_printk(KERN_INFO, &pdev->dev, "Virtual DMA Controller "
				"type %d:%s, "
				"%d virt channels, %d phys channels\n",
				vid, description,
				mv61v->dma.chancnt,
				pdata->nr_pool_chans[vid]);
	}

	ret = dma_async_device_register(&mv61v->dma);

vdma_probe_done:
	return ret;
}

static int __exit mv61_remove(struct platform_device *pdev)
{
	struct mv61_dma		*mv61p = platform_get_drvdata(pdev);

	printk(KERN_INFO "Central DMA module cdma_remove\n");

	free_irq(platform_get_irq(pdev, 0), mv61p);
	tasklet_kill(&mv61p->tasklet);

	if(!list_empty(&mv61p->all_chains))
		printk(KERN_CRIT "mv61x0-cdma: attempt to exit with descriptors!!!\n");

	/* desperation: delete any remaining descriptors, ignoring lock */
	while(!list_empty(&mv61p->all_chains)) {
		struct mv61_desc	*desc;
		desc = list_entry(mv61p->all_chains.next, struct mv61_desc,
							all_chains_node);
		printk(KERN_CRIT "mv61x0-cdma: deleting desc %p\n", desc);
		list_del(&desc->all_chains_node);
		mv61_desc_put(desc);
	}

	if(mv61p->chain_cachep)
		kmem_cache_destroy(mv61p->chain_cachep);
	if(mv61p->desc_cachep)
		kmem_cache_destroy(mv61p->desc_cachep);

 	kfree(pdev->dev.platform_data);

	return 0;
}

static void mv61_shutdown(struct platform_device *pdev)
{
}

static int mv61_suspend_noirq(struct device *dev)
{
	return 0;
}

static int mv61_resume_noirq(struct device *dev)
{
	/*TODO*/
	return 0;
}

static int __exit mv61_vdma_remove(struct platform_device *pdev)
{
	struct mv61_vdma	*mv61v = platform_get_drvdata(pdev);

	printk(KERN_INFO "Central DMA module vdma_remove\n");

	if(mv61v) {
		dma_async_device_unregister(&mv61v->dma);
		kfree(mv61v);
	}

	return 0;
}

static void mv61_vdma_shutdown(struct platform_device *pdev)
{
}

static int mv61_vdma_suspend_noirq(struct device *dev)
{
	return 0;
}

static int mv61_vdma_resume_noirq(struct device *dev)
{
	return 0;
}

static struct of_device_id mv61_cdma_dt_ids[] = {
	{ .compatible = "mrvl,mv61_cdma", },
	{}
};
MODULE_DEVICE_TABLE(of, mv61_cdma_dt_ids);

static struct dev_pm_ops mv61_dev_pm_ops = {
	.suspend_noirq = mv61_suspend_noirq,
	.resume_noirq = mv61_resume_noirq,
};

static struct platform_driver mv61_driver = {
	.probe          = mv61_dma_probe,
	.remove		= __exit_p(mv61_remove),
	.shutdown	= mv61_shutdown,
	.driver = {
		.name	= MV_CDMA_NAME,
		.pm	= &mv61_dev_pm_ops,
		.of_match_table = mv61_cdma_dt_ids,
	},
};

static struct of_device_id mv61_vdma_dt_ids[] = {
	{ .compatible = "mrvl,mv61_vdma", },
	{}
};
MODULE_DEVICE_TABLE(of, mv61_vdma_dt_ids);

static struct dev_pm_ops mv61_vdma_dev_pm_ops = {
	.suspend_noirq = mv61_vdma_suspend_noirq,
	.resume_noirq = mv61_vdma_resume_noirq,
};

static struct platform_driver mv61_vdma_driver = {
	.probe          = mv61_vdma_probe,
	.remove		= __exit_p(mv61_vdma_remove),
	.shutdown	= mv61_vdma_shutdown,
	.driver = {
		.name	= MV_VDMA_NAME,
		.pm	= &mv61_vdma_dev_pm_ops,
		.of_match_table = mv61_vdma_dt_ids,
	},
};

static int __init mv61_init(void)
{
	int rc;

	printk(KERN_INFO "Central DMA module init\n");

	rc = platform_driver_register(&mv61_driver);
	if(rc != 0) {
		printk("%s: failed to register mv61_driver %d\n", __func__, rc);
		goto done;
	}

	rc = platform_driver_register(&mv61_vdma_driver);
	if(rc != 0) {
		printk("%s: failed to register mv61_vdma_driver %d\n", __func__, rc);
		platform_driver_unregister(&mv61_driver);
	}
done:
	return rc;
}
module_init(mv61_init);

static void __exit mv61_exit(void)
{
	platform_driver_unregister(&mv61_vdma_driver);
	platform_driver_unregister(&mv61_driver);
	printk(KERN_INFO "Central DMA module unregistered\n");
}
module_exit(mv61_exit);

MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("Marvell Central DMA Controller driver");
