/*
 * A driver for the DSP embedded in the Quatro processors
 *
 *  Quasar DSP kernel driver
 * 
 * Copyright (c) 2015, The Linux Foundation.
 * All rights reserved.
 *
 * Redistribution and use
 * in source and binary forms, with or without modification,
 * are permitted (subject to the limitations in the disclaimer
 * below) provided that the following conditions are met :
 *   *Redistributions of source code must retain the above
 *    copyright notice, this list of conditions and the
 *    following disclaimer.
 *   *Redistributions in binary form must reproduce the
 *    above copyright notice, this list of conditions and
 *    the following disclaimer
 *    in the documentation and/or other materials provided
 *    with the distribution.
 *
 *  NO EXPRESS OR IMPLIED LICENSES TO ANY PARTYS PATENT
 *  RIGHTS ARE GRANTED BY THIS LICENSE.
 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
 *  AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
 *  WARRANTIES, INCLUDING,
 *  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
 *  AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 *  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
 *  OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 *  OR PROFITS;
 *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 *  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 *  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
 *  OF SUCH DAMAGE
 *
 */
 // =========================================================
//
//  $DateTime: 2022/04/22 09:45:19 $
//  $Change: 60182 $
//
// =========================================================
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/delay.h>
#include <linux/jiffies.h>
#include <linux/platform_device.h>
#include <linux/of_platform.h>
#include <linux/io.h>
#ifdef __KERNEL__
#include <linux/interrupt.h>
#include <linux/device.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/poll.h>
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/slab.h>
#endif
#include <linux/dma-mapping.h>
#include <linux/time.h>
#include <quasar/qbsocregs.h>
#include <quasar/qioctl.h>

#define QDSP_DEVNAME	"qdsp"

#define MAX_MAILBOX 512

/* define this to alloc from dma coherent memory for dsp code
*  instead of using kmalloc and cache flushing (dma_mapping)
*/
#define DSP_CODESEG_DMA

/* for cache-coherenct socs, when using dma coherent allocs for code/data
 * storage, the kernel will give us cacheable memory (because it is coherent)
 * so we need to tell the dsp that it should go through the coherency 
 * interface for code.  XMEM instructions need to be coherent also, if
 * the application allocs parm storage in cacheable memory, which they don't
 * for now so that's a module parameter
 */
#ifdef DSP0_COHERENCY_EXT_CTL_OFF
#define DSP_COHERENCY_EXT_CTL_OFF DSP0_COHERENCY_EXT_CTL_OFF
#endif

static int xmemcoherent = 0;
module_param(xmemcoherent, int, 0644);
MODULE_PARM_DESC(xmemcoherent, "Use CCI for XMEM access");

/* !! Global cache of loaded dsp modules
*/
static LIST_HEAD(g_moduleslist);
static DEFINE_SPINLOCK(g_moduleslock);

struct dsp_module {
	struct list_head	modules;
	int			refs;
	struct platform_device 	*pdev;
	volatile u8 		*code_la, *code_pa, *code_user;
	volatile u8 		*aligned_code_la, *aligned_code_pa;
	volatile u8	 	*data_la, *data_pa, *data_user;
	u32			code_pa_high;
	dma_addr_t		code_dma, data_dma;
	u32			code_size_user;
	u32			data_size_user;
	u32			size_alloc;
};

struct dsp_quasar {
	struct platform_device 	*pdev;
	int			ref;
	int			minor;
	u32			ctlbase, ctlend;
	u32			regbase, regend;
	volatile u8		__iomem	*ctls;
	volatile u8		__iomem	*regs;
	int			irq;
	int			irqh;
	struct cdev		cdev;
	struct dsp_module	*current_module;
	int			inttime;
	wait_queue_head_t	mbq;	/* wait-queue for mailbox interrupt */
	struct q_doneinfo	mailbox[MAX_MAILBOX];
	int			mb_head, mb_tail;

	spinlock_t		lock;
};

static int dsp_readl(struct dsp_quasar* dsp, u32 reg, unsigned* val)
{
	volatile u32 rv = 0;
	int ret = 0;

	if(reg >= dsp->regbase && reg < dsp->regend)
		rv = readl(dsp->regs + (reg - dsp->regbase));
	else if(reg >= dsp->ctlbase && reg < dsp->ctlend)
		rv = readl(dsp->ctls + (reg - dsp->ctlbase));
	else {
		printk(KERN_WARNING "qdsp ioctl getreg addr range error\n");
		ret = -EINVAL;
	}
	/*printk("read reg %08lx with %08lx\n", reg, rv);*/
	*val = (unsigned long)rv;
	return ret;
}

static int dsp_writel(struct dsp_quasar* dsp, u32 reg, u32 val)
{
	/*printk("write reg %08lx with %08lx\n", reg, val);*/
	if(reg >= dsp->regbase && reg < dsp->regend)
		writel(val, dsp->regs + (reg - dsp->regbase));
	else if(reg >= dsp->ctlbase && reg < dsp->ctlend)
		writel(val, dsp->ctls + (reg - dsp->ctlbase));
	else {
		printk(KERN_WARNING "qdsp ioctl setreg addr range error\n");
		return -EINVAL;
	}
	return 0;
}

static irqreturn_t quasar_dsp_interrupt(int irq, void *dev_id)
{
	struct dsp_quasar *dsp = (struct dsp_quasar *)dev_id;
	int ret;
	unsigned long flags;
	unsigned val, pc;

	if(irq == dsp->irq) {
		ret = dsp_readl(dsp, dsp->regbase + DSP0_CTL_MAILBOX_2_VALID_OFF, &val);
		ret = dsp_readl(dsp, dsp->regbase + DSP0_CTL_MAILBOX_2_OFF, &val);

		printk("DSP%d msg %02x %06X (%d pend)\n", dsp->minor, val >> 24,
				val & 0xFFFFFF, (dsp->mb_head - dsp->mb_tail) % MAX_MAILBOX);

		/* mail msg type is upper byte of val, detail in lower 3.
		*/
		pc = val & 0xffffff;
		switch(val >> 24) {
		case 0x00:	// ABORT
			break;
		case 0x11:	// print string
			break;
		case 0x10:	// print register, register is in R2, so copy it as the detail
			break;
		case 0x12:	// print registers, register is in R2 & R3, so copy it as the detail
			break;
		default:
			printk("DSP%d - bad msg type\n", dsp->minor);
			break;
		}
		dsp->mailbox[dsp->mb_head].msg = val;
		dsp->mailbox[dsp->mb_head].detail = pc;
		/* cycle count in R1.a */
		dsp_readl(dsp, dsp->regbase + DSP0_REG_FILE_A_START_OFF + 4, &val);
		dsp->mailbox[dsp->mb_head].cycles  = val;

		/*printk("dspx cy=%0llx\n",
			dsp->mailbox[dsp->mb_head].cycles);*/
	}
	else if(irq == dsp->irqh) {
		struct timeval  tv;

		dsp->mailbox[dsp->mb_head].msg = QDSP_HALTED;
		/* cycle count in R1.a */
		dsp->mailbox[dsp->mb_head].detail  = dsp_readl(dsp, dsp->regbase + DSP0_REG_FILE_A_START_OFF + 4, &val);
		do_gettimeofday(&tv);
		dsp->mailbox[dsp->mb_head].endtime = (unsigned long long)tv.tv_sec * 1000000 + (unsigned long long)tv.tv_usec;
		dsp->mailbox[dsp->mb_head].cycles  = val;
		/*
		printk("dsp%d et=%0llx cy=%0llx\n", dsp->minor,
			dsp->mailbox[dsp->mb_head].endtime,	dsp->mailbox[dsp->mb_head].cycles);
		*/
	}
	else
		printk(KERN_WARNING "qdsp - spurious int\n");

	spin_lock_irqsave(&dsp->lock, flags);
	dsp->mb_head++;
	if(dsp->mb_head >= MAX_MAILBOX)
		dsp->mb_head = 0;
	if(dsp->mb_head == dsp->mb_tail) {
		printk("qdsp - OVERFLOW in msgs\n");
	}
	spin_unlock_irqrestore(&dsp->lock, flags);

	/* wakeup tasks waiting on a mailbox to read
	*/
	wake_up_interruptible(&dsp->mbq);

	ret = IRQ_HANDLED;
	return ret;
}

static int qdsp_open(struct inode* inode, struct file* filp)
{
	struct dsp_quasar *dsp;

	dsp = container_of(inode->i_cdev, struct dsp_quasar, cdev);
	if(dsp->ref > 0)
		return -EBUSY;
	dsp->ref++;
	dsp->minor = iminor(inode);
	dsp->mb_head = dsp->mb_tail = 0;
	filp->private_data = dsp;
	return 0;
}

static int qdsp_release(struct inode* inode, struct file* filp)
{
	struct dsp_quasar *dsp;

	dsp = container_of(inode->i_cdev, struct dsp_quasar, cdev);
	if(dsp->ref <= 0)
		return -EFAULT;
	dsp->ref--;
	filp->private_data = NULL;
	return 0;
}

static ssize_t qdsp_read(struct file* filp, char __user *buffer, size_t length, loff_t* offset)
{
	struct dsp_quasar *dsp;
	size_t rl;
	char rb[64];
	int ret = 0;

	dsp = (struct dsp_quasar*)filp->private_data;

	rl = snprintf(rb, 64, "hi from %d\n", dsp->minor);
	ret = copy_to_user(buffer, rb, min(rl, length));
	return min(rl, length);
}

static ssize_t qdsp_write(struct file* filp, const char __user *buffer, size_t length, loff_t* offset)
{
	struct dsp_quasar *dsp;
	char *kbuf;
	int ret = 0;

	dsp = (struct dsp_quasar*)filp->private_data;

	if(! length || ! buffer)
		return -EINVAL;

	kbuf = kmalloc(length + 1, GFP_KERNEL);
	if(! kbuf)
		return -ENOMEM;

	ret = copy_from_user(kbuf, buffer, length);
	kbuf[length] = '\0';
	printk("qdsp %d wrt -%s-", dsp->minor, kbuf);
	kfree(kbuf);
	return length;
}

static unsigned int qdsp_poll(struct file *filp, poll_table *wait)
{
	struct dsp_quasar* dsp;
	unsigned int mask = 0;
	unsigned long flags;

	dsp = (struct dsp_quasar*)filp->private_data;

	poll_wait(filp, &dsp->mbq, wait);
	spin_lock_irqsave(&dsp->lock, flags);
	if(dsp->mb_head != dsp->mb_tail) {
		/*printk("rdbl %d\n", (dsp->mb_head - dsp->mb_tail) % MAX_MAILBOX);*/
		mask |= POLLIN | POLLRDNORM; /* readable */
	}
	spin_unlock_irqrestore(&dsp->lock, flags);
	return mask;
}

static int qdsp_setcode(struct dsp_quasar* dsp,
		unsigned startAddress,
		void* userCode, unsigned codeSize,
		void* userData, unsigned dataSize,
		int nparams, unsigned* params
)
{
	struct dsp_module *mod;
	unsigned long flags;
	struct list_head *modl;
	int ret = 0;
	unsigned regval;

	/* find module in cached list */
	spin_lock_irqsave(&g_moduleslock, flags);
	mod = NULL;
	list_for_each(modl, &g_moduleslist) {
		mod = list_entry(modl, struct dsp_module, modules);
		if(
				mod->code_user == userCode
			&&	mod->code_size_user == codeSize
			&&	mod->data_user == userData
			&&	mod->data_size_user == dataSize
		) {
			mod->refs++;
			break;
		}
		mod = NULL;
	}
	spin_unlock_irqrestore(&g_moduleslock, flags);
	if(! mod) {
		/* alloc a module list entry */
		mod = (struct dsp_module*)kzalloc(sizeof(struct dsp_module), GFP_KERNEL);
		if(! mod) {
			printk("qdsp - can't alloc module list entry\n");
			return -ENOMEM;
		}
		INIT_LIST_HEAD(&mod->modules);

		/* dsp code needs to be 256 byte aligned size wise */
		mod->size_alloc = (codeSize + 0xFF) & ~0xFF;
		mod->size_alloc += (dataSize + 0xFF) & ~0xFF;

		mod->code_user = (volatile u8*)userCode;
		mod->code_size_user = codeSize;

		mod->data_user = (volatile u8*)userData;
		mod->data_size_user = dataSize;

		mod->pdev = dsp->pdev;

		/* code and data are allocced in one chunk since theres no reason not to
		 * and it might save up to nearly 2 pages of bytes depending on the sizes of each
		 * since they can be packed tighter
		 */
#ifdef DSP_CODESEG_DMA
		/* this allocates "coherent dma memory" but only on systems that have coherent
		 * memory (only the 55xx does for our use).  otherwise this gives us regular
		 * kernel memory marked uncached.  for cache-coherent socs, we need to set
		 * the dsp's memory h/w to go through that interface
		 */
		mod->code_la = dma_alloc_coherent(
				&mod->pdev->dev,
				mod->size_alloc,
				&mod->code_dma,
				GFP_KERNEL | GFP_DMA
		);
		if(! mod->code_la) {
			printk("qdsp - can't dmaalloc %u bytes of dsp code/data\n", mod->size_alloc);
			kfree(mod);
			return -ENOMEM;
		}
		mod->code_pa = (u8*)(size_t)mod->code_dma;
		
#ifdef DSP_COHERENCY_EXT_CTL_OFF
		/* Turn off cache coherent codebase
		*/
		dsp_readl(dsp, dsp->regbase + DSP_COHERENCY_EXT_CTL_OFF, &regval);
		regval &= ~DSP0_COHERENCY_EXT_CTL__IC__MASK;
		dsp_writel(dsp, dsp->regbase + DSP_COHERENCY_EXT_CTL_OFF, regval);
#endif
		
#else
		/* this allocates regular cached kernel memory
		*/
		mod->code_la = kmalloc(mod->size_alloc, GFP_KERNEL);
		if(! mod->code_la) {
			printk("qdsp - can't alloc %u bytes of dsp code/data\n", mod->size_alloc);
			kfree(mod);
			return -ENOMEM;
		}
#ifdef DSP_COHERENCY_EXT_CTL_OFF
		/* 2020-09-09 since this is cached memory, maybe it is better 
		   to turn on coherency */
		/* Turn on cache coherent codebase
		*/
		dsp_readl(dsp, dsp->regbase + DSP_COHERENCY_EXT_CTL_OFF, &regval);
		regval |= DSP0_COHERENCY_EXT_CTL__IC__MASK;
		dsp_writel(dsp, dsp->regbase + DSP_COHERENCY_EXT_CTL_OFF, regval);
#endif
#endif
		/* Align both code and data area to 256 byte boundary
		*/
		mod->aligned_code_la = (u8*)(((size_t)mod->code_la + 0xFF) &~0xFF);

		ret = copy_from_user((void*)mod->aligned_code_la, userCode, codeSize);

#ifndef DSP_CODESEG_DMA
		/* need to flush dcache for kmalloc'd memory
		*/
		mod->code_dma = dma_map_single(&mod->pdev->dev, mod->code_la, mod->size_alloc, DMA_TO_DEVICE);
		if (dma_mapping_error(&mod->pdev->dev, mod->code_dma)) {
			kfree(mod->code_la);
			return -ENOMEM;
		}
		mod->code_pa = (u8*)(size_t)mod->code_dma;
#endif
		mod->aligned_code_pa = (u8*)(((size_t)mod->code_pa + 0xFF) &~0xFF);
		mod->data_la = (u8*)((size_t)mod->aligned_code_la + ((mod->code_size_user + 0xFF) & ~0xFF));
		mod->data_pa = (u8*)((size_t)mod->aligned_code_pa + ((mod->code_size_user + 0xFF) & ~0xFF));

		/*
		printk("##### copied %u bytes of code to la %px pa %px\n",
				codeSize, mod->aligned_code_la, mod->aligned_code_pa);
		*/
		if(userData && dataSize) {
			ret = copy_from_user((void*)mod->data_la, userData, dataSize);
			/*
			printk("##### copied %u bytes of data to la %px pa %px\n",
					dataSize, mod->data_la, mod->data_pa);
			*/
		}
		/* refcount = 1 for new modules */
		mod->refs = 1;
		/* enlist module */
		spin_lock_irqsave(&g_moduleslock, flags);
		list_add_tail(&mod->modules, &g_moduleslist);
		spin_unlock_irqrestore(&g_moduleslock, flags);
	}
	/*
	else
		printk("qdsp - using cached module la %px for code user=%lx\n",
		mod->code_la, userCode);
	*/
	if (mod != dsp->current_module) {
		dsp->current_module = mod;

		/* turn off/invalidate DSP's icache since starting code
		*  that wasn't run last
		*/
		dsp_readl(dsp, dsp->regbase + DSP0_ICACHE_CTL_OFF, &regval);
		dsp_writel(dsp, dsp->regbase + DSP0_ICACHE_CTL_OFF,
			((regval | 1) & ~ 4)); /* disable */
		dsp_writel(dsp, dsp->regbase + DSP0_ICACHE_CTL_OFF,
			((regval | 4) & ~ 1)); /* enable, invalidate cache */
		do {
			/* wait for dsp to clear icache */
			dsp_readl(dsp, dsp->regbase + DSP0_ICACHE_CTL_OFF, &regval);
		} while(!(regval & 0x80));

		/* enable, normal op */
		dsp_writel(dsp, dsp->regbase + DSP0_ICACHE_CTL_OFF, 0);
	}
	/* setup address
	*/
	regval = (unsigned long)mod->aligned_code_pa & DSP0_CS_BASE__SegmentBase__MASK;

	/* setup high-part of address for PAE systems
	*/
	mod->code_pa_high = ((unsigned long)mod->aligned_code_pa >> 32) & DSP0_CS_BASE__SegmentBaseExt__MASK;
	regval |= (mod->code_pa_high << DSP0_CS_BASE__SegmentBaseExt__SHIFT) &
		DSP0_CS_BASE__SegmentBaseExt__MASK;

// 	printk("DSP0_CS_BASE_OFF => regval: 0x%08x\n", regval);

	/* setup code base / length to module
	*/
	ret = dsp_writel(dsp, dsp->regbase + DSP0_CS_BASE_OFF,
		(unsigned int)regval);
	if(! ret)
		ret = dsp_writel(dsp, dsp->regbase + DSP0_CS_LENGTH_OFF,
			(mod->code_size_user + 0xFF) & ~0xFF);

	dsp_writel(dsp, dsp->regbase + DSP0_PC_OFF, startAddress);

// 	printk("DSP0_PC_OFF: regval: 0x%08x\n", startAddress);

	/* setup reg1.b as start address for spring-boarding if needed
	*/
	dsp_writel(dsp, dsp->regbase + DSP0_REG_FILE_B_START_OFF + (1*4), startAddress);
	do {
		/* wait for dsp icache to be idle */
		dsp_readl(dsp, dsp->regbase + DSP0_ICACHE_CTL_OFF, &regval);
	} while(!(regval & 0x80));
	do {
		/* wait for dsp to be halted and idle */
		dsp_readl(dsp, dsp->regbase + DSP0_STATUS_OFF, &regval);
	} while((regval & 3) != 3);
#ifdef DSP_NO_SPRINGBOARD
	/* start DSP directly at start address */
	dsp_writel(dsp, dsp->regbase + DSP0_PC_OFF, startAddress);
#else
	/* normally start DSP at 0 and have it do common init from reset */
	dsp_writel(dsp, dsp->regbase + DSP0_PC_OFF, 0);
#endif
	return ret;
}

static long qdsp_ioctl(struct file* filp, unsigned int cmd, unsigned long arg)
{
	struct dsp_quasar *dsp;
	struct dsp_module *mod;
	struct q_regio ioval;
	struct q_dsp_start qstrt;
	unsigned long flags;
	int ret = 0;

	dsp = (struct dsp_quasar*)filp->private_data;

	/*printk("qdsp%d ioctl %x\n", dsp->minor, cmd);*/
	switch(cmd)
	{
	case QDSPSETREG:
		/* write regs */
		ret = copy_from_user(&ioval, (void*)arg, sizeof(struct q_regio));
		ret = dsp_writel(dsp, ioval.reg, ioval.val);
		break;
	case QDSPGETREG:
		/* read regs */
		ret = copy_from_user(&ioval, (void*)arg, sizeof(struct q_regio));
		ret = dsp_readl(dsp, ioval.reg, &ioval.val);
		ret = copy_to_user((void*)arg, &ioval, sizeof(struct q_regio));
		break;
	case QDSPLOADMODULE:
		ret = copy_from_user(&qstrt, (void*)arg, sizeof(struct q_dsp_start));
		ret = qdsp_setcode(dsp,
			qstrt.start_addr,
			qstrt.text_addr, qstrt.text_size,
			qstrt.data_addr, qstrt.data_size,
			qstrt.nparams, qstrt.params
		);
		break;
	case QDSPGETINST:
		/* get inst base */
		ret = copy_from_user(&ioval, (void*)arg, sizeof(struct q_regio));
		mod = dsp->current_module;
		if(! mod) {
			printk("qdsp - no loaded module\n");
			return -ENOMEM;
		}
		if(! mod->code_la)
			return -ENOMEM;
		if(ioval.reg > mod->code_size_user)
			return -EINVAL;
		if(ioval.reg & 0x3)
			return -EINVAL;
		ioval.val = *(u32*)(mod->code_la + ioval.reg);
		//printk("readinst at %08lx = %08lx\n", ioval.reg, ioval.val);
		ret = copy_to_user((void*)arg, &ioval, sizeof(struct q_regio));
		ret = 0;
		break;
	case QDSPEXECUTE:
		ret = copy_from_user(&qstrt, (void*)arg, sizeof(struct q_dsp_start));
		/* load code if not cached and set code base */
		ret = qdsp_setcode(dsp,
			qstrt.start_addr,
			qstrt.text_addr, qstrt.text_size,
			qstrt.data_addr, qstrt.data_size,
			qstrt.nparams, qstrt.params
		);
		if(ret) break;
		/* start it up */
		dsp_writel(dsp, dsp->regbase + DSP0_GO_OFF, 1);
		break;
	case QGETDONE:
		/* read mailbox */
		if(dsp->mb_head == dsp->mb_tail) {
			/*printk("getmailbox - empty\n");*/
			ret = -ENOMSG;
		}
		else {
			ret = copy_to_user((void*)arg,
				&dsp->mailbox[dsp->mb_tail],
				sizeof(struct q_doneinfo));
			spin_lock_irqsave(&dsp->lock, flags);
			dsp->mb_tail++;
			if(dsp->mb_tail >= MAX_MAILBOX)
				dsp->mb_tail = 0;
			spin_unlock_irqrestore(&dsp->lock, flags);
		}
		break;

	case QDSPDISABLEINTHALT:
		disable_irq(dsp->irqh);
		break;

	case QDSPENABLEINTHALT:
		enable_irq(dsp->irqh);
		break;

	default:
		printk(KERN_WARNING "qdsp - bad ioctl %x\n", cmd);
		ret = -EINVAL;
	}
	return ret;
}

static struct file_operations quasar_dsp_ops = {
	.owner		= THIS_MODULE,
	.open		= qdsp_open,
	.release	= qdsp_release,
	.read		= qdsp_read,
	.write		= qdsp_write,
	.poll		= qdsp_poll,
	.unlocked_ioctl		= qdsp_ioctl,
	.compat_ioctl	= qdsp_ioctl 
};

static int __init quasar_dsp_probe(struct platform_device *pdev)
{
	struct resource	*regs, *ctls;
	struct dsp_quasar *dsp;
	dev_t  dspn;
	int ret;
	static int ndsps = 0;

	dsp = kzalloc(sizeof(struct dsp_quasar), GFP_KERNEL);
	if (!dsp) {
		dev_dbg(&pdev->dev, "out of memory\n");
		return -ENOMEM;
	}
	cdev_init(&dsp->cdev, &quasar_dsp_ops);

	dsp->pdev = pdev;
	dsp->mb_head = dsp->mb_tail = 0;

	ctls = platform_get_resource(pdev, IORESOURCE_MEM, 0);
	if (! ctls) {
		dev_dbg(&pdev->dev, "no mmio ctl resource defined\n");
		ret = -ENXIO;
		goto out;
	}
	dsp->ctlbase = ctls->start;
	dsp->ctlend  = ctls->end;

	regs = platform_get_resource(pdev, IORESOURCE_MEM, 1);
	if (!regs) {
		dev_dbg(&pdev->dev, "no mmio reg resource defined\n");
		ret = -ENXIO;
		goto out;
	}
	dsp->regbase = regs->start;
	dsp->regend  = regs->end;

	dsp->irq = platform_get_irq(pdev, 0);
	if (dsp->irq < 0) {
		dev_dbg(&pdev->dev, "could not get irq\n");
		ret = -ENXIO;
		goto out;
	}

	dsp->irqh = platform_get_irq(pdev, 1);
	if (dsp->irqh < 0) {
		dev_dbg(&pdev->dev, "could not get irqh\n");
		ret = -ENXIO;
		goto out;
	}

	dsp->ctls = ioremap(ctls->start, ctls->end - ctls->start + 1);
	if (!dsp->ctls) {
		ret = -ENOMEM;
		dev_dbg(&pdev->dev, "could not map ctl I/O memory\n");
		goto out;
	}

	dsp->regs = ioremap(regs->start, regs->end - regs->start + 1);
	if (!dsp->regs) {
		ret = -ENOMEM;
		dev_dbg(&pdev->dev, "could not map reg I/O memory\n");
		goto out_iounmapctl;
	}
	spin_lock_init(&dsp->lock);
	{
		volatile u8 __iomem *reg;
		volatile u8 __iomem *ctl;
		u32 temp;

		ctl = ioremap(DSPGP_DSPCLKDISCTRL, 8);
		reg = ioremap(RSTGEN_SWRSTSTATIC4, 4);
		switch (ndsps)
		{
		default:
			printk(KERN_ERR "QDSP%d Error!\n", ndsps);
			break;
		case 0:
			writel(readl(ctl) & DSPGP_DSPCLKDISCTRL__DSP_DSP0__INV_MASK, ctl);
			while (readl(ctl + 4) & DSPGP_DSPCLKDISSTAT__DSP_DSP0__MASK) {;}
			writel(readl(reg) & RSTGEN_SWRSTSTATIC4__DSP_DSP0__INV_MASK, reg);
			printk(KERN_INFO "QDSP%d Reset\n", ndsps);
			break;
		case 1:
			writel(readl(ctl) & DSPGP_DSPCLKDISCTRL__DSP_DSP1__INV_MASK, ctl);
			while (readl(ctl + 4) & DSPGP_DSPCLKDISSTAT__DSP_DSP1__MASK) {;}
			writel(readl(reg) & RSTGEN_SWRSTSTATIC4__DSP_DSP1__INV_MASK, reg);
			printk(KERN_INFO "QDSP%d Reset\n", ndsps);
			break;
		case 2:
			writel(readl(ctl) & DSPGP_DSPCLKDISCTRL__DSP_DSP2__INV_MASK, ctl);
			while (readl(ctl + 4) & DSPGP_DSPCLKDISSTAT__DSP_DSP2__MASK) {;}
			writel(readl(reg) & RSTGEN_SWRSTSTATIC4__DSP_DSP2__INV_MASK, reg);
			printk(KERN_INFO "QDSP%d Reset\n", ndsps);
			break;
		case 3:
			writel(readl(ctl) & DSPGP_DSPCLKDISCTRL__DSP_DSP3__INV_MASK, ctl);
			while (readl(ctl + 4) & DSPGP_DSPCLKDISSTAT__DSP_DSP3__MASK) {;}
			writel(readl(reg) & RSTGEN_SWRSTSTATIC4__DSP_DSP3__INV_MASK, reg);
			printk(KERN_INFO "QDSP%d Reset\n", ndsps);
			break;
#if defined(Q6600)
		case 4:
			writel(readl(ctl) & DSPGP_DSPCLKDISCTRL__DSP_DSP4__INV_MASK, ctl);
			while (readl(ctl + 4) & DSPGP_DSPCLKDISSTAT__DSP_DSP4__MASK) {;}
			writel(readl(reg) & RSTGEN_SWRSTSTATIC4__DSP_DSP4__INV_MASK, reg);
			printk(KERN_INFO "QDSP%d Reset\n", ndsps);
			break;
		case 5:
			writel(readl(ctl) & DSPGP_DSPCLKDISCTRL__DSP_DSP5__INV_MASK, ctl);
			while (readl(ctl + 4) & DSPGP_DSPCLKDISSTAT__DSP_DSP5__MASK) {;}
			writel(readl(reg) & RSTGEN_SWRSTSTATIC4__DSP_DSP5__INV_MASK, reg);
			printk(KERN_INFO "QDSP%d Reset\n", ndsps);
			break;
		case 6:
			writel(readl(ctl) & DSPGP_DSPCLKDISCTRL__DSP_DSP6__INV_MASK, ctl);
			while (readl(ctl + 4) & DSPGP_DSPCLKDISSTAT__DSP_DSP6__MASK) {;}
			writel(readl(reg) & RSTGEN_SWRSTSTATIC4__DSP_DSP6__INV_MASK, reg);
			printk(KERN_INFO "QDSP%d Reset\n", ndsps);
			break;
#endif
		}
		iounmap(ctl);
		iounmap(reg);

		/* turn on coherent external mem access and codebase for 63xx as Linux is always CCI mode 
		*/
		if(xmemcoherent) {
			dsp_readl(dsp, dsp->regbase  + DSP_COHERENCY_EXT_CTL_OFF, &temp);
			temp |= DSP0_COHERENCY_EXT_CTL__XM__MASK;
			dsp_writel(dsp, dsp->regbase + DSP_COHERENCY_EXT_CTL_OFF, temp);
		}
	}
	/* init wait queue for mailbox
	*/
	init_waitqueue_head(&dsp->mbq);

	ret = request_irq(dsp->irq, quasar_dsp_interrupt,
		0, "dspmail", dsp);
	if (ret) {
		dev_dbg(&pdev->dev, "could not request irq %d\n", dsp->irq);
		goto out_iounmapreg;
	}
	ret = request_irq(dsp->irqh, quasar_dsp_interrupt,
		0, "dsphalt", dsp);
	if (ret) {
		dev_dbg(&pdev->dev, "could not request irq %d\n", dsp->irqh);
		goto out_free_irq;
	}
	dspn = MKDEV(QDSP_MAJOR, ndsps);
	ret = cdev_add(&dsp->cdev, dspn, 1);
	if (ret) {
		dev_dbg(&pdev->dev, "could not create char dev %d\n", ndsps);
		goto out_free_irqh;
	}
	ndsps++;
	platform_set_drvdata(pdev, dsp);
	device_init_wakeup(&pdev->dev, 1);

	dev_info(&pdev->dev,
	   	"quasar dsp at MMIO %08lx [%08X-%08X], %08lx [%08X-%08X] irq %d,%d\n",
		(unsigned long)dsp->ctls, (unsigned int)ctls->start, (unsigned int)ctls->end,
		(unsigned long)dsp->regs, (unsigned int)regs->start, (unsigned int)regs->end,
		dsp->irq, dsp->irqh);
	return 0;

out_free_irqh:
	free_irq(dsp->irqh, dsp);
	dsp->irqh = 0;
out_free_irq:
	free_irq(dsp->irq, dsp);
	dsp->irq = 0;
out_iounmapreg:
	iounmap(dsp->regs);
out_iounmapctl:
	iounmap(dsp->ctls);
out:
	kfree(dsp);
	return ret;
}

static int __exit quasar_dsp_remove(struct platform_device *pdev)
{
	struct dsp_quasar *dsp = platform_get_drvdata(pdev);
	struct dsp_module *mod;

	device_init_wakeup(&pdev->dev, 0);

	while (!list_empty(&g_moduleslist)) {
		mod = list_entry(g_moduleslist.next, struct dsp_module, modules);
		list_del(&mod->modules);
		if(mod->code_la) {
#ifdef DSP_CODESEG_DMA
			dma_free_coherent(&mod->pdev->dev,
				mod->size_alloc, (u8*)mod->code_la, mod->code_dma);
#else
			kfree(mod->code_la);
#endif
		}
		kfree(mod);
	}
	cdev_del(&dsp->cdev);
	if(dsp->irq > 0)
		free_irq(dsp->irq, dsp);
	if(dsp->irqh > 0)
		free_irq(dsp->irqh, dsp);
	iounmap(dsp->ctls);
	iounmap(dsp->regs);
	kfree(dsp);
	platform_set_drvdata(pdev, NULL);

	return 0;
}

static int quasar_dsp_suspend(struct platform_device *pdev, pm_message_t state)
{
	struct dsp_quasar *dsp = platform_get_drvdata(pdev);

	{
		volatile u8 __iomem *reg;
		volatile u8 __iomem *ctl;
		u32 temp;

		ctl = ioremap(DSPGP_DSPCLKDISCTRL, 8);
		reg = ioremap(RSTGEN_SWRSTSTATIC4, 4);
		switch (dsp->minor)
		{
		default:
			printk(KERN_ERR "Error! QDSP%d\n", dsp->minor);
			break;
		case 0:
			// assert DSP0 reset
			writel(readl(reg) | RSTGEN_SWRSTSTATIC4__DSP_DSP0__MASK, reg);
			// disable DSP0 clock
			writel(readl(ctl) | DSPGP_DSPCLKDISCTRL__DSP_DSP0__MASK, ctl);
			while ((readl(ctl + 4) & DSPGP_DSPCLKDISSTAT__DSP_DSP0__MASK) == 0x0) {;}
			//printk(KERN_INFO "QDSP%d Suspend\n", dsp->minor);
			break;
		case 1:
			writel(readl(reg) | RSTGEN_SWRSTSTATIC4__DSP_DSP1__MASK, reg);
			writel(readl(ctl) | DSPGP_DSPCLKDISCTRL__DSP_DSP1__MASK, ctl);
			while ((readl(ctl + 4) & DSPGP_DSPCLKDISSTAT__DSP_DSP1__MASK) == 0x0) {;}
			//printk(KERN_INFO "QDSP%d Suspend\n", dsp->minor);
			break;
		case 2:
			writel(readl(reg) | RSTGEN_SWRSTSTATIC4__DSP_DSP2__MASK, reg);
			writel(readl(ctl) | DSPGP_DSPCLKDISCTRL__DSP_DSP2__MASK, ctl);
			while ((readl(ctl + 4) & DSPGP_DSPCLKDISSTAT__DSP_DSP2__MASK) == 0x0) {;}
			//printk(KERN_INFO "QDSP%d Suspend\n", dsp->minor);
			break;
		case 3:
			writel(readl(reg) | RSTGEN_SWRSTSTATIC4__DSP_DSP3__MASK, reg);
			writel(readl(ctl) | DSPGP_DSPCLKDISCTRL__DSP_DSP3__MASK, ctl);
			while ((readl(ctl + 4) & DSPGP_DSPCLKDISSTAT__DSP_DSP3__MASK) == 0x0) {;}
			//printk(KERN_INFO "QDSP%d Suspend\n", dsp->minor);
			break;
#if defined(Q6600)
		case 4:
			writel(readl(reg) | RSTGEN_SWRSTSTATIC4__DSP_DSP4__MASK, reg);
			writel(readl(ctl) | DSPGP_DSPCLKDISCTRL__DSP_DSP4__MASK, ctl);
			while ((readl(ctl + 4) & DSPGP_DSPCLKDISSTAT__DSP_DSP4__MASK) == 0x0) {;}
			//printk(KERN_INFO "QDSP%d Suspend\n", dsp->minor);
			break;
		case 5:
			writel(readl(reg) | RSTGEN_SWRSTSTATIC4__DSP_DSP5__MASK, reg);
			writel(readl(ctl) | DSPGP_DSPCLKDISCTRL__DSP_DSP5__MASK, ctl);
			while ((readl(ctl + 4) & DSPGP_DSPCLKDISSTAT__DSP_DSP5__MASK) == 0x0) {;}
			//printk(KERN_INFO "QDSP%d Suspend\n", dsp->minor);
			break;
		case 6:
			writel(readl(reg) | RSTGEN_SWRSTSTATIC4__DSP_DSP6__MASK, reg);
			writel(readl(ctl) | DSPGP_DSPCLKDISCTRL__DSP_DSP6__MASK, ctl);
			while ((readl(ctl + 4) & DSPGP_DSPCLKDISSTAT__DSP_DSP6__MASK) == 0x0) {;}
			//printk(KERN_INFO "QDSP%d Suspend\n", dsp->minor);
			break;
#endif
		}
		temp = readl(reg);
		iounmap(reg);
		iounmap(ctl);
	}
			
	//printk("    ==== quasar_dsp_suspend for qdsp%d, state=%d ====\n", dsp->minor, state);
	return 0;
}

static int quasar_dsp_resume(struct platform_device *pdev)
{
	struct dsp_quasar *dsp = platform_get_drvdata(pdev);
	u32	data;

	{
		volatile u8 __iomem *reg;
		volatile u8 __iomem *ctl;
		u32 temp;

		ctl = ioremap(DSPGP_DSPCLKDISCTRL, 8);
		reg = ioremap(RSTGEN_SWRSTSTATIC4, 4);
		switch (dsp->minor)
		{
		default:
			printk(KERN_ERR "Error! QDSP%d\n", dsp->minor);
			break;
		case 0:
			writel(readl(ctl) & DSPGP_DSPCLKDISCTRL__DSP_DSP0__INV_MASK, ctl);
			while (readl(ctl + 4) & DSPGP_DSPCLKDISSTAT__DSP_DSP0__MASK) {;}
			writel(readl(reg) & RSTGEN_SWRSTSTATIC4__DSP_DSP0__INV_MASK & RSTGEN_SWRSTSTATIC4__DSP__INV_MASK, reg);
			//printk(KERN_INFO "QDSP%d Resume\n", dsp->minor);
			break;
		case 1:
			writel(readl(ctl) & DSPGP_DSPCLKDISCTRL__DSP_DSP1__INV_MASK, ctl);
			while (readl(ctl + 4) & DSPGP_DSPCLKDISSTAT__DSP_DSP1__MASK) {;}
			writel(readl(reg) & RSTGEN_SWRSTSTATIC4__DSP_DSP1__INV_MASK & RSTGEN_SWRSTSTATIC4__DSP__INV_MASK, reg);
			//printk(KERN_INFO "QDSP%d Resume\n", dsp->minor);
			break;
		case 2:
			writel(readl(ctl) & DSPGP_DSPCLKDISCTRL__DSP_DSP2__INV_MASK, ctl);
			while (readl(ctl + 4) & DSPGP_DSPCLKDISSTAT__DSP_DSP2__MASK) {;}
			writel(readl(reg) & RSTGEN_SWRSTSTATIC4__DSP_DSP2__INV_MASK & RSTGEN_SWRSTSTATIC4__DSP__INV_MASK, reg);
			//printk(KERN_INFO "QDSP%d Resume\n", dsp->minor);
			break;
		case 3:
			writel(readl(ctl) & DSPGP_DSPCLKDISCTRL__DSP_DSP3__INV_MASK, ctl);
			while (readl(ctl + 4) & DSPGP_DSPCLKDISSTAT__DSP_DSP3__MASK) {;}
			writel(readl(reg) & RSTGEN_SWRSTSTATIC4__DSP_DSP3__INV_MASK & RSTGEN_SWRSTSTATIC4__DSP__INV_MASK, reg);
			//printk(KERN_INFO "QDSP%d Resume\n", dsp->minor);
			break;
#if defined(Q6600)
		case 4:
			writel(readl(ctl) & DSPGP_DSPCLKDISCTRL__DSP_DSP4__INV_MASK, ctl);
			while (readl(ctl + 4) & DSPGP_DSPCLKDISSTAT__DSP_DSP4__MASK) {;}
			writel(readl(reg) & RSTGEN_SWRSTSTATIC4__DSP_DSP4__INV_MASK & RSTGEN_SWRSTSTATIC4__DSP__INV_MASK, reg);
			//printk(KERN_INFO "QDSP%d Resume\n", dsp->minor);
			break;
		case 5:
			writel(readl(ctl) & DSPGP_DSPCLKDISCTRL__DSP_DSP5__INV_MASK, ctl);
			while (readl(ctl + 4) & DSPGP_DSPCLKDISSTAT__DSP_DSP5__MASK) {;}
			writel(readl(reg) & RSTGEN_SWRSTSTATIC4__DSP_DSP5__INV_MASK & RSTGEN_SWRSTSTATIC4__DSP__INV_MASK, reg);
			//printk(KERN_INFO "QDSP%d Resume\n", dsp->minor);
			break;
		case 6:
			writel(readl(ctl) & DSPGP_DSPCLKDISCTRL__DSP_DSP6__INV_MASK, ctl);
			while (readl(ctl + 4) & DSPGP_DSPCLKDISSTAT__DSP_DSP6__MASK) {;}
			writel(readl(reg) & RSTGEN_SWRSTSTATIC4__DSP_DSP6__INV_MASK & RSTGEN_SWRSTSTATIC4__DSP__INV_MASK, reg);
			//printk(KERN_INFO "QDSP%d Resume\n", dsp->minor);
			break;
#endif
		}
		temp = readl(reg);
		iounmap(reg);
		iounmap(ctl);

		/* turn on coherent external mem access and codebase for 63xx as Linux is always CCI mode 
		*/
		if(xmemcoherent) {
			dsp_readl(dsp, dsp->regbase  + DSP_COHERENCY_EXT_CTL_OFF, &temp);
			temp |= DSP0_COHERENCY_EXT_CTL__XM__MASK;
			dsp_writel(dsp, dsp->regbase + DSP_COHERENCY_EXT_CTL_OFF, temp);
		}
	}
	// enable this DSP and set it to
	// stop on reset so GO is used
	//
	dsp_writel(dsp, dsp->ctlbase, 0);
	dsp_readl(dsp, dsp->ctlbase, &data);
#ifdef DSP_NO_RESET
	dsp_writel(dsp, dsp->ctlbase, DSP_NO_RESET | ENABLE_DSP);
#else
	dsp_writel(dsp, dsp->ctlbase, ENABLE_DSP);
#endif
	dsp_readl(dsp, dsp->ctlbase, &data);

	// reset DSP to halt
	//
#ifdef DSP0_HALT_OFF
	dsp_writel(dsp, dsp->regbase + DSP0_HALT_OFF, 1);
#endif
#ifdef DSP0_SOFT_RST_OFF
	dsp_writel(dsp, dsp->regbase + DSP0_SOFT_RST_OFF, 1);
#endif

#ifdef DSP0_DS_LENGTH_SP_OFF
	dsp_writel(dsp, dsp->regbase + DSP0_DS_LENGTH_SP_OFF, 0xFFF00);
#endif
    // DMA burst length = reg_val + 1, as packet size 64 bytes
    dsp_writel(dsp, dsp->regbase + DSP0_DMA_XL_BSIZE_OFF, 15);
    dsp_writel(dsp, dsp->regbase + DSP0_DMA_LX_BSIZE_OFF, 15);

	//printk("    ==== quasar_dsp_resume for qdsp%d ====\n", dsp->minor);
	return 0;
}

MODULE_ALIAS("platform:quasar-dsp");

static const struct of_device_id qbit_quasar_id_table[] = {
	{ .compatible = "qbit,quasar-dsp" },
	{}
};
MODULE_DEVICE_TABLE(of, qbit_quasar_id_table);

static struct platform_driver quasar_dsp_driver_ops = {
	.probe		= quasar_dsp_probe,
	.remove		= quasar_dsp_remove,
	.suspend	= quasar_dsp_suspend,
	.resume		= quasar_dsp_resume,
	.driver		= {
		.name	= "quasar-dsp",
		.owner	= THIS_MODULE,
		.of_match_table = of_match_ptr(qbit_quasar_id_table),
	},
};

static int __init quasar_dsp_init(void)
{
	return platform_driver_register(&quasar_dsp_driver_ops);
}
module_init(quasar_dsp_init);

static void __exit quasar_dsp_exit(void)
{
	platform_driver_unregister(&quasar_dsp_driver_ops);
}
module_exit(quasar_dsp_exit);

MODULE_DESCRIPTION("Quasar DSP driver");
MODULE_LICENSE("Dual BSD/GPL");

