/*
 * An simple example of Quasar PCIe device driver
 *
 *
 * Copyright (c) 2015, The Linux Foundation.
 * All rights reserved.
 *
 * Copyright (c) 2019, 2020, 2021 QBit Semiconductor LTD.
 *
 * Redistribution and use
 * in source and binary forms, with or without modification,
 * are permitted (subject to the limitations in the disclaimer
 * below) provided that the following conditions are met :
 *   *Redistributions of source code must retain the above
 *    copyright notice, this list of conditions and the
 *    following disclaimer.
 *   *Redistributions in binary form must reproduce the
 *    above copyright notice, this list of conditions and
 *    the following disclaimer
 *    in the documentation and/or other materials provided
 *    with the distribution.
 *
 *  NO EXPRESS OR IMPLIED LICENSES TO ANY PARTYS PATENT
 *  RIGHTS ARE GRANTED BY THIS LICENSE.
 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
 *  AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
 *  WARRANTIES, INCLUDING,
 *  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
 *  AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 *  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
 *  OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 *  OR PROFITS;
 *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 *  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 *  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
 *  OF SUCH DAMAGE
 *
 */

#include <linux/init.h>
#include <linux/module.h>
#include <linux/version.h>   /* for LINUX_VERSION_CODE */
#include <linux/platform_device.h>
#include <linux/device.h>
#include <linux/cdev.h>
#include <linux/pci.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/msi.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/types.h>
#include <linux/list.h>
#include <linux/kthread.h>
#include <linux/delay.h>
#if LINUX_VERSION_CODE <= KERNEL_VERSION(4,10,0)
#include <asm/uaccess.h>
#else
#include <linux/uaccess.h>
#endif
#include <quasar/qbsocregs.h>

#define MODULE_NAME "quasar-pcied"

#ifdef Q6300
#define uint_port u32
#else
#define uint_port u64
#endif

#define DEBUG_PCIE   (0)

/* Device ID */
#define QPCIED_MAJOR (221)

/* Number of BARs in device */
#define BAR_NUMBER   (3)

/* Total size cannot be over 256KB (default) or coherent_pool
 * assigned in quasar63xxevma0-512mbyte.dts
 */
#define BUF_SIZE (128*1024)

#define SELF_TEST_PIO      (0)   // self test without application
#define SELF_TEST_DMA      (0)   // self test without application
#define SELF_TEST_HOST_DMA (0)   // self test without application, only work for QB63xx PCIe host

struct quasar_pcied {
	struct cdev		cdev;
	struct device *dev;
    struct pci_dev	*pdev;
	spinlock_t		lock;

	unsigned long bar_s[BAR_NUMBER];
	unsigned long bar_l[BAR_NUMBER];
	void __iomem *bar_va[BAR_NUMBER];

	unsigned long backup_ep_bars[3][4];
	unsigned long backup_ep_inbound_wins[4][6];

	void __iomem	*regs_config;
	void __iomem	*regs_config_host;

	wait_queue_head_t rdma_wait_queue;
	wait_queue_head_t wdma_wait_queue;
	u8 rdma_wait;
	u8 wdma_wait;
	u32 time_start;
	u32 time_end;
	void __iomem    *hrtcnt1;

	u8 *buf;
	dma_addr_t buf_dma;
	u32 buf_size;

	u8 *buf_ep;
	struct task_struct	*thread_task;
};

static int qpcied_dma_status_clear(struct quasar_pcied *pcied, int wdma, int host)
{
	void __iomem *regs;

	switch (host) {
	case 0:   // device
		regs = pcied->regs_config;
		break;
	case 1:   // host, only when QB66xx PCIe is host mode
		regs = pcied->regs_config_host;
		break;
	default:
		printk("Not host nor device DMA...%d\n", host);
		return 1;
	}
	mb();

	switch (wdma) {
	case 0:   // RDMA
		writel(0x100, regs + PCIE1_PAB_RDMA_CTRL_OFF);   // indirect descriptor
		writel(0xff, regs + PCIE1_PAB_RDMA_STAT_OFF);    // W1CLR
		writel(0, regs + PCIE1_PAB_IND_DESC_SEL_OFF);    // desc 0, rdma
		writel(0, regs + PCIE1_PAB_IND_DESC_CTRL_OFF);
		writel(0, regs + PCIE1_PAB_IND_DESC_STAT_OFF);
		writel(0x10, regs + PCIE1_PAB_IND_DESC_SEL_OFF); // desc 1, rdma
		writel(0, regs + PCIE1_PAB_IND_DESC_CTRL_OFF);
		writel(0, regs + PCIE1_PAB_IND_DESC_STAT_OFF);
		writel(0x20, regs + PCIE1_PAB_IND_DESC_SEL_OFF); // desc 2, rdma
		writel(0, regs + PCIE1_PAB_IND_DESC_CTRL_OFF);
		writel(0, regs + PCIE1_PAB_IND_DESC_STAT_OFF);
		writel(0x30, regs + PCIE1_PAB_IND_DESC_SEL_OFF); // desc 3, rdma
		writel(0, regs + PCIE1_PAB_IND_DESC_CTRL_OFF);
		writel(0, regs + PCIE1_PAB_IND_DESC_STAT_OFF);
		break;
	case 1:   // WDMA
		writel(0x100, regs + PCIE1_PAB_WDMA_CTRL_OFF);   // indirect descriptor
		writel(0xff, regs + PCIE1_PAB_WDMA_STAT_OFF);    // W1CLR
		writel(0x01, regs + PCIE1_PAB_IND_DESC_SEL_OFF); // desc 0, wdma
		writel(0, regs + PCIE1_PAB_IND_DESC_CTRL_OFF);
		writel(0, regs + PCIE1_PAB_IND_DESC_STAT_OFF);
		writel(0x11, regs + PCIE1_PAB_IND_DESC_SEL_OFF); // desc 1, wdma
		writel(0, regs + PCIE1_PAB_IND_DESC_CTRL_OFF);
		writel(0, regs + PCIE1_PAB_IND_DESC_STAT_OFF);
		writel(0x21, regs + PCIE1_PAB_IND_DESC_SEL_OFF); // desc 2, wdma
		writel(0, regs + PCIE1_PAB_IND_DESC_CTRL_OFF);
		writel(0, regs + PCIE1_PAB_IND_DESC_STAT_OFF);
		writel(0x31, regs + PCIE1_PAB_IND_DESC_SEL_OFF); // desc 3, wdma
		writel(0, regs + PCIE1_PAB_IND_DESC_CTRL_OFF);
		writel(0, regs + PCIE1_PAB_IND_DESC_STAT_OFF);
		break;
	default:
		printk("Not read nor write operation %d\n", wdma);
		return 1;
	}
	return 0;
}

/* src unit: bytes
   dst unit: bytes
   size unit: dword (4 bytes) */
static int qpcied_dma_start(struct quasar_pcied *pcied, int wdma, int host,
			u32 src, u32 dst, u32 size)
{
	u32 i = ((0x3FFFF000 & (size << 12)) | 0x00000FF1);
	void __iomem *regs;

#if DEBUG_PCIE
	dev_warn(pcied->dev, "%sdma_start(%s): src(0x%08x) dst(0x%08x) size dwords(0x%08x)\n",
		(wdma == 0) ? "r" : "w", (host == 1) ? "h" : "d", src, dst, size);
#endif
	switch (host) {
	case 0:   // device
		regs = pcied->regs_config;
		break;
	case 1:   // host, only when QB66xx PCIe is host mode
		regs = pcied->regs_config_host;
		break;
	default:
		printk("Not host nor device DMA...%d\n", host);
		return 1;
	}
	if (wdma)
		pcied->wdma_wait = 1;
	else
		pcied->rdma_wait = 1;

	switch (wdma) {
	case 0:   // RDMA
		writel(0x100, regs + PCIE1_PAB_RDMA_CTRL_OFF);
		writel(0, regs + PCIE1_PAB_IND_DESC_SEL_OFF);
		writel(i, regs + PCIE1_PAB_IND_DESC_CTRL_OFF);
		writel(src, regs + PCIE1_PAB_IND_DESC_SRC_ADDR_OFF);
		writel(0, regs + PCIE1_PAB_IND_DESC_SRC_ADDRX_OFF);
		writel(dst, regs + PCIE1_PAB_IND_DESC_DST_ADDR_OFF);
		writel(0, regs + PCIE1_PAB_IND_DESC_DST_ADDRX_OFF);
		mb();
		pcied->time_start = readl(pcied->hrtcnt1);
		writel(0x101, regs + PCIE1_PAB_RDMA_CTRL_OFF);  // must do this last
		break;
	case 1:   // WDMA
		writel(0x100, regs + PCIE1_PAB_WDMA_CTRL_OFF);
		writel(1, regs + PCIE1_PAB_IND_DESC_SEL_OFF);
		writel(i, regs + PCIE1_PAB_IND_DESC_CTRL_OFF);
		writel(src, regs + PCIE1_PAB_IND_DESC_SRC_ADDR_OFF);
		writel(0, regs + PCIE1_PAB_IND_DESC_SRC_ADDRX_OFF);
		writel(dst, regs + PCIE1_PAB_IND_DESC_DST_ADDR_OFF);
		writel(0, regs + PCIE1_PAB_IND_DESC_DST_ADDRX_OFF);
		mb();
		pcied->time_start = readl(pcied->hrtcnt1);
		writel(0x101, regs + PCIE1_PAB_WDMA_CTRL_OFF);  // must do this last
		break;
	default:
		printk("Not read nor write operation %d\n", wdma);
		return 1;
	}

	if (host) {
		if (wdma)
			wait_event_timeout(pcied->wdma_wait_queue,
				(pcied->wdma_wait == 0), 100);
		else
			wait_event_timeout(pcied->rdma_wait_queue,
				(pcied->rdma_wait == 0), 100);
	} else {
		if (wdma)
			wait_event_timeout(pcied->wdma_wait_queue,
				(pcied->wdma_wait == 0), 100);
		else
			wait_event_timeout(pcied->rdma_wait_queue,
				(pcied->rdma_wait == 0), 100);
	}
#if DEBUG_PCIE
	dev_warn(pcied->dev, "\nDMA time %d us from 0x%08x to 0x%08x\n",
		(pcied->time_end - pcied->time_start),
		pcied->time_start, pcied->time_end);
	dev_warn(pcied->dev, "DMA throughput %d MBytes/sec for %d bytes\n",
		(size<<2)/(pcied->time_end - pcied->time_start), (size<<2));
#endif

	return 0;
}

static int qpcied_open(struct inode* inode, struct file* fd)
{
	struct quasar_pcied	*pcied;

	pcied = container_of(inode->i_cdev, struct quasar_pcied, cdev);
	fd->private_data = pcied;
#if DEBUG_PCIE
	dev_warn(pcied->dev, "qpcied_open() done\n");
#endif
	return 0;
}

static int qpcied_close(struct inode* inode, struct file* fd)
{
	struct quasar_pcied *pcied;

	pcied = (struct quasar_pcied*)fd->private_data;
	return 0;
}

/* Get data from QB63xx PCIe device
 * (ask QB63xx PCIe device issues WDMA to this host) */
static ssize_t qpcied_read(struct file* fd, char __user *buffer,
			size_t length, loff_t *offset)
{
	struct quasar_pcied *pcied;
	size_t size;

	pcied = (struct quasar_pcied*)fd->private_data;

	if (length == 0)
		return -EINVAL;

	if (!pcied)
		return -ESHUTDOWN;

	//*(u32 *)0xa0000000 = 0x87;
	//*(u32 *)0xa0000004 = 0x87;

	qpcied_dma_status_clear(pcied, 1 /*wdma*/, 0 /*device*/);
	qpcied_dma_start(pcied, 1 /*wdma*/, 0 /*device*/,
			(u32)(uint_port)pcied->buf_ep /*src at EP*/,
			pcied->backup_ep_inbound_wins[1][1] /*dst at EP*/,
			length>>2 /*dwords*/);

	/* copy data to the user buffer */
	size = copy_to_user(buffer, (u8 *)0xa0000000 /*host memory mapping*/,
			length);
	if (size != length)
		return -EAGAIN;

	return size;
}

/* Send data to QB63xx PCIe device
 * (ask QB63xx PCIe device issues RDMA to this host) */
static ssize_t qpcied_write(struct file* fd, const char __user *buffer,
			size_t length, loff_t *offset)
{
	struct quasar_pcied *pcied;

	pcied = (struct quasar_pcied*)fd->private_data;

	if (length == 0)
		return -EINVAL;

	if (!pcied)
		return -ESHUTDOWN;

	/* copy data from user buffer to PCIe memory */
	if (copy_from_user((u8 *)0xa0000000 /*host memory mapping*/, buffer, length)) {
		dev_err(pcied->dev, "copy from user buffer failed\n");
		return -EINVAL;
	}

	qpcied_dma_status_clear(pcied, 0 /*rdma*/, 0 /*device*/);
	qpcied_dma_start(pcied, 0 /*rdma*/, 0 /*device*/,
			pcied->backup_ep_inbound_wins[1][1] /*src at EP*/,
			(u32)(uint_port)pcied->buf_ep /*dst at EP*/,
			length>>2 /*dwords*/);

	return 0;
}

#define PCIE_REGISTER_READ       0x01
#define PCIE_REGISTER_WRITE      0x02
#define PCIE_HOST_REGISTER_READ  0x11
#define PCIE_HOST_REGISTER_WRITE 0x12

struct gpcied_register {
	u32 address;
	u32 value;
};

static long qpcied_ioctl(struct file* fd, unsigned int cmd, unsigned long arg)
{
    struct pci_dev *pdev;
	struct quasar_pcied *pcied;
	struct gpcied_register reg;
    u32 i;

	pcied = (struct quasar_pcied *)fd->private_data;
  	pdev = pcied->pdev;

	if (copy_from_user((void*)&reg,
		(void*)arg, sizeof(struct gpcied_register)))
		return -EINVAL;

	if ((reg.address >= PCICLKDISCTRL) &&
		(reg.address <= PCI_TEST_OFF)) {
		// pass
	} else if ((reg.address >= PCIE1_GPEXP_CFG_VENDORID) &&
		(reg.address <= PCIE1_PAB_OB_BUF_SIZE_CTRL)) {
		// pass
	} else if ((reg.address < PCIE_SPARE) &&
		(reg.address > PCIE_CLKRX_CTRL2)) {
		// pass
	} else {
		return -EINVAL;
	}

	switch(cmd)
	{
	case PCIE_REGISTER_READ:
		i = reg.address - PCIE1_GPEXP_CFG_VENDORID;
		pci_bus_read_config_dword(pdev->bus, pdev->devfn, i, &reg.value);
		if (copy_to_user((void*)arg,
			(void*)&reg, sizeof(struct gpcied_register)))
			return -EINVAL;
		break;
	case PCIE_REGISTER_WRITE:
		i = reg.address - PCIE1_GPEXP_CFG_VENDORID;
		pci_bus_write_config_dword(pdev->bus, pdev->devfn, i, reg.value);
		break;
	case PCIE_HOST_REGISTER_READ:
		i = (u32)(uint_port)pcied->regs_config_host + reg.address -
			PCIE1_GPEXP_CFG_VENDORID;
#ifndef Q6300
		//i = (u32)pcied->regs_config_host + reg.address -
		//	PCIE2_GPEXP_CFG_VENDORID;
#endif
		reg.value = readl((void *)(uint_port)i);
		if (copy_to_user((void*)arg,
			(void*)&reg, sizeof(struct gpcied_register)))
			return -EINVAL;
		break;
	case PCIE_HOST_REGISTER_WRITE:
		i = (u32)(uint_port)pcied->regs_config_host + reg.address -
			PCIE1_GPEXP_CFG_VENDORID;
#ifndef Q6300
		//i = (u32)pcied->regs_config_host + reg.address -
		//	PCIE2_GPEXP_CFG_VENDORID;
#endif
		writel(reg.value, (void *)(uint_port)i);
		break;
	default:
		dev_err(pcied->dev, "Unknonw io command 0x%x\n", cmd);
		break;
	}

	return 0;
}

static struct file_operations quasar_pcied_fops = {
	.owner = THIS_MODULE,
	.open = qpcied_open,
	.release = qpcied_close,
	.read = qpcied_read,
	.write = qpcied_write,
	.unlocked_ioctl	= qpcied_ioctl,
	.compat_ioctl	= qpcied_ioctl 
};

static irqreturn_t quasar_pcied_msi_irq(int irq, void *_pdev)
{
	struct pci_dev *pdev = _pdev;
	struct quasar_pcied *pcied = pci_get_drvdata(pdev);
#ifndef CONFIG_PCI_MSI
	void __iomem *addr;
	u32 val;
#endif

	if ((pcied->wdma_wait == 1) || (pcied->rdma_wait== 1)) {
		if (pcied->wdma_wait == 1) {
			wake_up(&pcied->wdma_wait_queue);
			pcied->wdma_wait = 0;
		}
		if (pcied->rdma_wait == 1) {
			wake_up(&pcied->rdma_wait_queue);
			pcied->rdma_wait = 0;
		}
		pcied->time_end = readl(pcied->hrtcnt1);
	}

#ifndef CONFIG_PCI_MSI   /* for INTA */
	addr = pcied->regs_config + PCIE1_PAB_MB_AXI_CTRL_OFF;
	val = readl(addr);
	if (val & 0x01) {   /* QB63xx EP drived ASSERT_INTA */
#if DEBUG_PCIE
		addr = pcied->regs_config + PCIE1_PAB_MB_AXI_DATA_OFF;
		dev_warn(pcied->dev,	"EP PCIE1_PAB_MB_AXI_DATA=0x%08X\n",
			readl(addr));
		addr = pcied->regs_config + PCIE1_PAB_PEX_INT_STAT_OFF;
		dev_warn(pcied->dev,	"EP PCIE1_PAB_PEX_INT_STAT=0x%08X\n",
			readl(addr));
#endif
		addr = pcied->regs_config + PCIE1_PAB_MB_AXI_DATA_OFF;
		writel(0, addr);   /* clear message from QB63xx EP */

		addr = pcied->regs_config + PCIE1_PAB_MB_AXI_CTRL_OFF;
		val = readl(addr);
#if DEBUG_PCIE
		dev_warn(pcied->dev,	"EP PCIE1_PAB_MB_AXI_CTRL=0x%08X\n", val);
#endif
		val &= 0xfffffffe;   /* clear bit 0 for QB63xx EP */
		writel(val, addr);

		addr = pcied->regs_config + PCIE1_PAB_PEX_INT_STAT_OFF;
		val = readl(addr);
#if DEBUG_PCIE
		dev_warn(pcied->dev,	"EP PCIE1_PAB_PEX_INT_STAT=0x%08X\n", val);
#endif
		val |= 0x400;   /* clear bit 10 for QB63xx EP driving DEASSERT_INTA, this bit is W1CLR */
		writel(val, addr);
	}
#endif

#if DEBUG_PCIE
	dev_warn(pcied->dev, "d_int done\n");
#endif
	return IRQ_NONE;
}

static int quasar_pcied_thread(void *pcie)
{
	u32 i = 0;
#if SELF_TEST_PIO || SELF_TEST_HOST_DMA
	u32 j;
#endif
#if SELF_TEST_DMA || SELF_TEST_HOST_DMA
    u32 size = (BUF_SIZE - 4);
#endif
    struct quasar_pcied *pcied = (struct quasar_pcied *)pcie;

	while (!kthread_should_stop()) {
		dev_warn(pcied->dev, "PCIe task running %d\n", i++);
#if SELF_TEST_PIO   /* Test QB66xx PCIe device's configuration registers */
		printk("\nPCIe dev: check device(EP)'s CONFIG registers:\n");
		for (j = 0; j < 0x100; j+=4) {
			printk("0x%08X ", readl((void *)(pcied->regs_config + j)));
			if (((j+4) % 16) == 0)
				printk("\n");
		}
#endif

#if SELF_TEST_DMA   /* Test QB66xx PCIe device's DMA */
		/* The device (EP) starts write DMA, write data to the host (here) */
		qpcied_dma_status_clear(pcied, 1 /*wdma*/, 0 /*device*/);
		qpcied_dma_start(pcied, 1 /*wdma*/, 0 /*device*/,
			(u32)(uint_port)pcied->buf_ep /*src at EP*/,
			pcied->backup_ep_inbound_wins[1][1] /*dst at EP*/,
			size>>2 /*dwords*/);   /* EP->RC */

		/* The device (EP) starts read DMA, read data to the host (here) */
		qpcied_dma_status_clear(pcied, 0 /*rdma*/, 0 /*device*/);
		qpcied_dma_start(pcied, 0 /*rdma*/, 0 /*device*/,
			pcied->backup_ep_inbound_wins[1][1] /*src at EP*/,
			(u32)(uint_port)pcied->buf_ep /*dst at EP*/,
			size>>2 /*dwords*/);   /* EP<-RC */
#endif

#if SELF_TEST_HOST_DMA   /* test QB66xx PCIe host's DMA */
		/* Prepare buffer data for write DMA */
		for (j = 0; j < size; j++) {
			*(u8 *)(pcied->buf + j) = j;
		}
		qpcied_dma_status_clear(pcied, 1 /*wdma*/, 1 /* host*/);
		qpcied_dma_start(pcied, 1 /*wdma*/, 1 /*host*/,
			(u32)pcied->buf_dma /*src*/,
			0x51000000 /*dst*/,
			size>>2 /*dwords*/);

		/* destroy buffer data before read DMA */
		for (j = 0; j < size; j++) {
			*(u8 *)(pcied->buf + j) = 0x56;
		}
		qpcied_dma_status_clear(pcied, 0 /*rdma*/, 1 /* host*/);
		qpcied_dma_start(pcied, 0 /*rdma*/, 1 /*host*/,
			0x51000000 /*src*/,
			(u32)pcied->buf_dma /*dst*/,
			size>>2 /*dwords*/);

		/* Check buffer data after read DMA */
		printk("\ndata after host rDMA: 0x%08X\n", readl((void *)pcied->buf));
#endif
		msleep(1000);
	}

	pcied->thread_task = NULL;
	return 0;
}

/* New device inserted */
static int quasar_pcied_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
	int ret = 0;
	u32 i, j, k;
	dev_t  pciedn;
	unsigned int irq;
	struct quasar_pcied *pcied;

#if DEBUG_PCIE
	dev_warn(&pdev->dev, "quasar_pcied_probe() starts\n");
#endif

#if DEBUG_PCIE
	dev_warn(&pdev->dev, "(%d): bar per outbound setting in dts file\n",
			DEVICE_COUNT_RESOURCE);
	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
#ifdef Q6300
		dev_warn(&pdev->dev, "*%d bar_s(0x%x) bar_e(0x%x) bar_l(0x%x)\n", i,
			(u32)pci_resource_start(pdev, i),
			(u32)pci_resource_end(pdev, i),
			(u32)pci_resource_len(pdev, i));
#else
		dev_warn(&pdev->dev, "*%d bar_s(0x%llx) bar_e(0x%llx) bar_l(0x%llx)\n", i,
			(u64)pci_resource_start(pdev, i),
			(u64)pci_resource_end(pdev, i),
			(u64)pci_resource_len(pdev, i));
#endif
	}
#endif

	pcied = kzalloc(sizeof(struct quasar_pcied), GFP_KERNEL);
	if (!pcied) {
		dev_err(&pdev->dev, "out of memory\n");
		return -ENOMEM;
	}

	cdev_init(&pcied->cdev, &quasar_pcied_fops);
	pciedn = MKDEV(QPCIED_MAJOR, 0);
	ret = cdev_add(&pcied->cdev, pciedn, 1);
	if (ret) {
		dev_err(&pdev->dev, "could not create char dev %d\n", pciedn);
		return ret;
	}

	pcied->dev = &pdev->dev;
	pci_set_drvdata(pdev, pcied);
	pcied->pdev = pdev;

#if DEBUG_PCIE
	dev_warn(&pdev->dev, "name=%s bus no=%d\n",
		pdev->bus->name, (u32)pdev->bus->number);
	dev_warn(&pdev->dev, "msi  addr test1=0x%X\n",
		pdev->msi_cap);
	dev_warn(&pdev->dev, "msi  addr test2=0x%X\n",
		pci_find_capability(pdev, PCI_CAP_ID_MSI));
	dev_warn(&pdev->dev, "msix addr test1=0x%X\n",
		pdev->msix_cap);
	dev_warn(&pdev->dev, "msix addr test2=0x%X\n",
		pci_find_capability(pdev, PCI_CAP_ID_MSIX));
#endif

#ifdef CONFIG_PCI_MSI
#if DEBUG_PCIE
{
	u32 data;

	pci_bus_read_config_dword(pdev->bus, pdev->devfn, pdev->msi_cap, &data);
	dev_warn(&pdev->dev, "msi_cap data=0x%08x\n", data);
}
#endif
#endif

	if (pci_enable_device(pdev))
		return -EIO;

	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))
		return -ENODEV;

	ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
	if (ret) {
		dev_warn(&pdev->dev, "no suitable DMA configuring, aborting\n");
		return -ENODEV;
	}
	ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
	if (ret) {
		dev_warn(&pdev->dev, "no suitable DMA configuring, aborting\n");
		return -ENODEV;
	}

	spin_lock_init(&pcied->lock);

	/* Here we get three bars becuase the "endpoint" (device) firmware
	   provides three BAR settings to this RC host... */
	for (i = 0, j = 0; (j < 10) && (i < BAR_NUMBER); j++) {
		if ((u32)pci_resource_len(pdev, j) == 0)
			continue;
		pcied->bar_s[i] = (u32)pci_resource_start(pdev, j);
		pcied->bar_l[i] = (u32)pci_resource_len(pdev, j);
		pcied->bar_va[i] = ioremap(pcied->bar_s[i], pcied->bar_l[i]);
#if DEBUG_PCIE
		dev_warn(&pdev->dev, "=>bar%d phys(0x%08x) virt(0x%x) len(0x%08x)\n",
			(u32)i, (u32)(uint_port)pcied->bar_s[i], (u32)(uint_port)pcied->bar_va[i],
			(u32)(uint_port)pcied->bar_l[i]);
#endif
		i++;
	}

	/* Retrive EP's BAR settings */
	for (i = 0; i < 3; i++) {
		for (j = 0; j < 4; j++) {
			pcied->backup_ep_bars[i][j] =
				readl((void *)((uint_port)pcied->bar_va[2] + (i*4*4) + (j*4)));
		}
	}
	k = 3*4*4;
#if DEBUG_PCIE
	for (i = 0; i < 3; i++) {
		printk("\n");
		for (j = 0; j < 4; j++)
			printk("0x%08x ", (unsigned int)pcied->backup_ep_bars[i][j]);
	}
#endif

	/* Retrive EP's inbound settings */
	for (i = 0; i < 4; i++) {
		for (j = 0; j < 6; j++) {
			pcied->backup_ep_inbound_wins[i][j] =
				readl((void *)((uint_port)pcied->bar_va[2] + k + (i*6*4) + (j*4)));
		}
	}
    k += (4*6*4);
#if DEBUG_PCIE
	for (i = 0; i < 4; i++) {
		printk("\n");
		for (j = 0; j < 6; j++)
			printk("0x%08x ", (unsigned int)pcied->backup_ep_inbound_wins[i][j]);
	}
	printk("\n");
#endif

	/* retrive EP's buffer for PCIe */
	pcied->buf_ep = (u8 *)(uint_port)readl((void *)((uint_port)pcied->bar_va[2] + k));
#ifdef Q6300
	dev_warn(&pdev->dev, "EP's PCIe buffer address 0x%08x\n", (uint_port)pcied->buf_ep);
#else
	dev_warn(&pdev->dev, "EP's PCIe buffer address 0x%llx\n", (uint_port)pcied->buf_ep);
#endif
	pcied->regs_config = ioremap(0x40000000, 0xd000);
	pcied->regs_config_host = ioremap(PCIE1_GPEXP_CFG_VENDORID, 0xd000);   // PCIE1
#ifndef Q6300
	//pcied->regs_config_host = ioremap(PCIE2_GPEXP_CFG_VENDORID, 0xd000);   // PCIE2
#endif
#if DEBUG_PCIE
	printk("=>PCIe Device VID(0x%08x)=0x%08x\n",
		(u32)(uint_port)pcied->regs_config, readl(pcied->regs_config));
	printk("=>PCIe Host   VID(0x%08x)=0x%08x\n",
		(u32)(uint_port)pcied->regs_config_host, readl(pcied->regs_config_host));
#endif

	init_waitqueue_head(&pcied->rdma_wait_queue);
	init_waitqueue_head(&pcied->wdma_wait_queue);
	pcied->wdma_wait = 0;
	pcied->rdma_wait = 0;
	pcied->hrtcnt1 = ioremap(HRTCNT1, 4);

#if LINUX_VERSION_CODE > KERNEL_VERSION(4,10,0)
	irq = pdev->bus->self->irq;   /* kernel 4.19 */
#else
	irq = pdev->irq;   /* kernel 4.4 */
#endif

#ifdef CONFIG_PCI_MSI
	i = readl(pcied->regs_config + PCIE1_GPEXP_CFG_COMMAND_OFF);
	if ((i & 0x00000400) == 0) {
#if DEBUG_PCIE
		printk("For EP using MSI, disable its INTA function\n");
#endif
		i |= 0x0400;   /* set bit 10 to "disable" EP INTx */
		writel(i, pcied->regs_config + PCIE1_GPEXP_CFG_COMMAND_OFF);
	}
	if ((i & 0x00100000) == 0) {
		dev_err(&pdev->dev, "This device doesn't have capability list\n");
	} else {
		/* Set PCIe address for MSI, per inbound memory window 3 in devtree */
		pci_bus_write_config_dword(pdev->bus, pdev->devfn,
			pdev->msi_cap + 4, 0);
		pci_bus_write_config_dword(pdev->bus, pdev->devfn,
			pdev->msi_cap + 8, 0x00001400);
		pci_bus_write_config_dword(pdev->bus, pdev->devfn,
			pdev->msi_cap + 12, 0xAAB0);
	}

	ret = request_irq(irq, quasar_pcied_msi_irq,
			IRQF_SHARED, MODULE_NAME, pdev);
	if (!ret) {
		/* enable MSI */
		pci_bus_read_config_dword(pdev->bus, pdev->devfn, pdev->msi_cap, &i);
		i |= (1 << 16);
		pci_bus_write_config_dword(pdev->bus, pdev->devfn, pdev->msi_cap, i);
	} else {
		dev_err(&pdev->dev, "request_irq: %d failed\n", irq);
		ret = -EINVAL;
	}
#else   /* use INTx */
#if DEBUG_PCIE
	printk("For EP using INTA, enable its INTA function\n");
#endif
	i = readl(pcied->regs_config + PCIE1_GPEXP_CFG_COMMAND_OFF);
	i &= ~0x0400;   /* clear bit 10 to "enable" EP INTx */
	writel(i, pcied->regs_config + PCIE1_GPEXP_CFG_COMMAND_OFF);

	ret = request_irq(irq, quasar_pcied_msi_irq,
			IRQF_SHARED, MODULE_NAME, pdev);
	if (ret) {
		dev_err(&pdev->dev, "request_irq: %d failed\n", irq);
		ret = -EINVAL;
	}
#endif

#if DEBUG_PCIE
	dev_warn(&pdev->dev, "irq=%d\n", irq);
#endif

	ret = pci_request_regions(pdev, MODULE_NAME);

	if (ret < 0) {
		dev_err(&pdev->dev, "pci_request_regions failed\n");
		ret = -ENOMEM;
	}

	pci_set_master(pdev);

	/* request DMA pool */
	pcied->buf = pci_alloc_consistent(pdev, BUF_SIZE, &pcied->buf_dma);
	if (pcied->buf == NULL) {
		dev_err(&pdev->dev, "DMA recv buffer alloc failed\n");
		ret = -ENOMEM;
	} else {
		memset(pcied->buf, 0x78, BUF_SIZE);
#if DEBUG_PCIE
		dev_warn(&pdev->dev, "=> buf_dma(0x%x) buf(0x%x) size(0x%x)\n",
				(u32)pcied->buf_dma, (u32)(uint_port)pcied->buf, BUF_SIZE);
#endif
	}

	/* thread for testing */
	pcied->thread_task = NULL;
	if (pcied->thread_task == NULL) {
		pcied->thread_task = 
			kthread_create(quasar_pcied_thread,
				pcied, "pcied-process");
		if (IS_ERR(pcied->thread_task)) {
			printk("PCIe device thread failed!\n");
			ret = PTR_ERR(pcied->thread_task);
		}
	}
	wake_up_process(pcied->thread_task);

#if DEBUG_PCIE
	dev_warn(&pdev->dev, "quasar_pcie_probe() exits %d\n", ret);
#endif
	return ret;
}

static void quasar_pcied_remove(struct pci_dev *pdev)
{
	struct quasar_pcied *pcied = pci_get_drvdata(pdev);
	int i;

	for (i = 0; i < BAR_NUMBER; i++)
		iounmap(pcied->bar_va[i]);
	iounmap(pcied->regs_config);
	iounmap(pcied->regs_config_host);
	iounmap(pcied->hrtcnt1);
	return;
}

#ifdef CONFIG_PM
static int quasar_pcied_suspend(struct pci_dev *pdev, pm_message_t state)
{
	return 0;
}

static int quasar_pcied_resume(struct pci_dev *pdev)
{
	return 0;
}
#endif

static const struct pci_device_id quasar_pcied_dev[] = {
	{ PCI_DEVICE(0x11DE, 1), },
	{0,},
	{},
};
MODULE_DEVICE_TABLE(pci, quasar_pcied_dev);

static struct pci_driver quasar_pcied_driver = {
	.name     = MODULE_NAME,
	.id_table = quasar_pcied_dev,
	.probe    = quasar_pcied_probe,
	.remove   = quasar_pcied_remove,
#ifdef CONFIG_PM
	.suspend  = quasar_pcied_suspend,
	.resume   = quasar_pcied_resume,
#endif
};

static int __init quasar_pcied_init(void)
{
	int ret = 0;

#if DEBUG_PCIE
	printk("quasar_pcied_init()\n");
#endif
    ret = pci_register_driver(&quasar_pcied_driver);
    return ret;
}

static void __exit quasar_pcied_exit(void)
{
#if DEBUG_PCIE
	printk("quasar_pcied_exit\n");
#endif
	pci_unregister_driver(&quasar_pcied_driver);
	return ;
}
module_init(quasar_pcied_init);
module_exit(quasar_pcied_exit);

MODULE_DESCRIPTION("Quasar PCIe device driver");
MODULE_LICENSE("Dual BSD/GPL");
