/*
 * An simple example of Quasar PCIe gadget driver
 *
 *
 * Copyright (c) 2015, The Linux Foundation.
 * All rights reserved.
 *
 * Copyright (c) 2020, 2021 QBit Semiconductor LTD.
 *
 * Redistribution and use
 * in source and binary forms, with or without modification,
 * are permitted (subject to the limitations in the disclaimer
 * below) provided that the following conditions are met :
 *   *Redistributions of source code must retain the above
 *    copyright notice, this list of conditions and the
 *    following disclaimer.
 *   *Redistributions in binary form must reproduce the
 *    above copyright notice, this list of conditions and
 *    the following disclaimer
 *    in the documentation and/or other materials provided
 *    with the distribution.
 *
 *  NO EXPRESS OR IMPLIED LICENSES TO ANY PARTYS PATENT
 *  RIGHTS ARE GRANTED BY THIS LICENSE.
 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
 *  AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
 *  WARRANTIES, INCLUDING,
 *  BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
 *  AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 *  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
 *  OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 *  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 *  OR PROFITS;
 *  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 *  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 *  OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
 *  OF SUCH DAMAGE
 *
 */

#include <linux/init.h>
#include <linux/module.h>
#include <linux/version.h>   /* for LINUX_VERSION_CODE */
#include <linux/platform_device.h>
#include <linux/of_platform.h>
#include <linux/device.h>
#include <linux/cdev.h>
#include <linux/pci.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/msi.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
//#include <linux/irqdomain.h>
#include <linux/types.h>
#include <linux/list.h>
#include <linux/kthread.h>
#include <linux/delay.h>
#if LINUX_VERSION_CODE <= KERNEL_VERSION(4,10,0)
#include <asm/uaccess.h>
#else
#include <linux/uaccess.h>
#endif
#include <quasar/qbsocregs.h>
#if LINUX_VERSION_CODE <= KERNEL_VERSION(4,10,0)
#include <../../linux-4.4-quasar/drivers/pci/host/pcie-quasar.h>
#else
#include <../../linux-4.19-quasar/drivers/pci/controller/pcie-quasar.h>
#endif

#define MODULE_NAME "quasar-pcie"

#ifdef Q6300
#define uint_port u32
#else
#define uint_port u64
#endif

#define DEBUG_PCIE   (0)

#define SPEED_GEN1   (0)   /* 0 for Gen2, 1 for Gen 1 */

/* Device ID */
#define QPCIEG_MAJOR (222)

/* Total size cannot be over 256KB (default) or coherent_pool
 * assigned in quasar63xxevma0-512mbyte.dts */
#define BUF_SIZE (128*1024)   // (1024*1024)

#define SELF_TEST_DMA      (0)   // self test without application
#define SELF_TEST_MSI      (0)   // self test without application

/* Inbound PCIe address */
#define INBOUND_PEX1_LO   (0)
#define INBOUND_PEX1_HI   (0x00001100)
#define INBOUND_PEX1_SIZE (1024*1024)
#define INBOUND_PEX2_LO   (0)
#define INBOUND_PEX2_HI   (0x00001200)
#define INBOUND_PEX2_SIZE (1024*1024)
#define INBOUND_PEX3_LO   (0)
#define INBOUND_PEX3_HI   (0x00001300)
#define INBOUND_PEX3_SIZE (1024*1024)
#define INBOUND_PEX4_LO   (0)
#define INBOUND_PEX4_HI   (0x00001400)
#define INBOUND_PEX4_SIZE (1024*1024)

/* QB63XX EP side: BAR Settings (RC accesses EP's BARs)
 * Register: xxx_PAB_PEX_AMAP_BARx_xxx
 * Array   : {{AXI address (high, low), BAR size (high, low)}
 * Value 0 means not used or determined later */
static u32 ep_bars[3][4] = {
	{0, 0x04000000, 0, 0x01000000},   /* BAR 0/1: 63XX (EP side) register space */
	{0,          0, 0,          0},   /* BAR 2/3: EP's memory */
	{0,          0, 0,          0},   /* BAR 4/5: EP's memory */
};

/* QB63XX EP side: Inbound Memory Map (EP accesses RC)
 * Register: xxx_PAB_AXI_AMAP_xxx
 * Array   : {{AXI address (high, low), PEX address (high, low), Window size (high, low)}
 * Value 0 means not used or determined later */
static u32 ep_inbound_windows[4][6] = {
	{0,          0,          0, 0, 0, 0},   /* Windows 0: reserved for configuration access, not used */
	{0, 0x58000000, INBOUND_PEX1_HI, INBOUND_PEX1_LO, 0, INBOUND_PEX1_SIZE},   /* Windows 1: to access RC's memory */
	{0, 0x68000000, INBOUND_PEX2_HI, INBOUND_PEX2_LO, 0, INBOUND_PEX2_SIZE},   /* Windows 2: to access RC's memory */
	{0, 0x78000000, INBOUND_PEX3_HI, INBOUND_PEX3_LO, 0, INBOUND_PEX3_SIZE}    /* Windows 3: to access RC's memory */
};

struct quasar_pcieg {
	struct cdev		cdev;
	struct device *dev;
    struct platform_device	*pdev;
	spinlock_t		lock;

	void __iomem		*base_rstgen;
	void __iomem		*base_clkgen;
	void __iomem		*base_ctrl;
	void __iomem		*base_wrap;
	void __iomem		*base_gpf;
	struct resource		*rsc_rstgen;
	struct resource		*rsc_clkgen;
	struct resource		*rsc_ctrl;
	struct resource		*rsc_wrap;
	struct resource		*rsc_gpf;
	int			irq;
	u32 src_pci_clk_mhz;

	wait_queue_head_t rdma_wait_queue;
	wait_queue_head_t wdma_wait_queue;
	u8 rdma_wait;
	u8 wdma_wait;
	u32 time_start;
	u32 time_end;
	void __iomem    *hrtcnt1;

	u8 *buf;
	dma_addr_t buf_dma;

	u8 *buf_bar01;
	dma_addr_t buf_dma_bar01;

	u8 *buf_bar23;
	dma_addr_t buf_dma_bar23;

	u8 *buf_bar45;
	dma_addr_t buf_dma_bar45;

	struct task_struct	*thread_task;
};

static int qpcieg_dma_status_clear(struct quasar_pcieg *pcieg, int wdma)
{
	void __iomem *regs;

	regs = pcieg->base_ctrl;
	mb();

	switch (wdma) {
	case 0:   // RDMA
		writel(0x100, regs + PCIE1_PAB_RDMA_CTRL_OFF);   // indirect descriptor
		writel(0xff, regs + PCIE1_PAB_RDMA_STAT_OFF);    // W1CLR
		writel(0, regs + PCIE1_PAB_IND_DESC_SEL_OFF);    // desc 0, rdma
		writel(0, regs + PCIE1_PAB_IND_DESC_CTRL_OFF);
		writel(0, regs + PCIE1_PAB_IND_DESC_STAT_OFF);
		writel(0x10, regs + PCIE1_PAB_IND_DESC_SEL_OFF); // desc 1, rdma
		writel(0, regs + PCIE1_PAB_IND_DESC_CTRL_OFF);
		writel(0, regs + PCIE1_PAB_IND_DESC_STAT_OFF);
		writel(0x20, regs + PCIE1_PAB_IND_DESC_SEL_OFF); // desc 2, rdma
		writel(0, regs + PCIE1_PAB_IND_DESC_CTRL_OFF);
		writel(0, regs + PCIE1_PAB_IND_DESC_STAT_OFF);
		writel(0x30, regs + PCIE1_PAB_IND_DESC_SEL_OFF); // desc 3, rdma
		writel(0, regs + PCIE1_PAB_IND_DESC_CTRL_OFF);
		writel(0, regs + PCIE1_PAB_IND_DESC_STAT_OFF);
		break;
	case 1:   // WDMA
		writel(0x100, regs + PCIE1_PAB_WDMA_CTRL_OFF);   // indirect descriptor
		writel(0xff, regs + PCIE1_PAB_WDMA_STAT_OFF);    // W1CLR
		writel(0x01, regs + PCIE1_PAB_IND_DESC_SEL_OFF); // desc 0, wdma
		writel(0, regs + PCIE1_PAB_IND_DESC_CTRL_OFF);
		writel(0, regs + PCIE1_PAB_IND_DESC_STAT_OFF);
		writel(0x11, regs + PCIE1_PAB_IND_DESC_SEL_OFF); // desc 1, wdma
		writel(0, regs + PCIE1_PAB_IND_DESC_CTRL_OFF);
		writel(0, regs + PCIE1_PAB_IND_DESC_STAT_OFF);
		writel(0x21, regs + PCIE1_PAB_IND_DESC_SEL_OFF); // desc 2, wdma
		writel(0, regs + PCIE1_PAB_IND_DESC_CTRL_OFF);
		writel(0, regs + PCIE1_PAB_IND_DESC_STAT_OFF);
		writel(0x31, regs + PCIE1_PAB_IND_DESC_SEL_OFF); // desc 3, wdma
		writel(0, regs + PCIE1_PAB_IND_DESC_CTRL_OFF);
		writel(0, regs + PCIE1_PAB_IND_DESC_STAT_OFF);
		break;
	default:
		printk("Not read nor write operation %d\n", wdma);
		return 1;
	}
	return 0;
}

/* src unit: bytes
   dst unit: bytes
   size unit: dword (4 bytes) */
static int qpcieg_dma_start(struct quasar_pcieg *pcieg, int wdma,
			u32 src, u32 dst, u32 size)
{
	u32 i = ((0x3FFFF000 & (size << 12)) | 0x00000FF1);
	void __iomem *regs;

	regs = pcieg->base_ctrl;   //regs_config;

	if (wdma)
		pcieg->wdma_wait = 1;
	else
		pcieg->rdma_wait = 1;

	switch (wdma) {
	case 0:   // RDMA
		writel(0x100 /*0x300*/, regs + PCIE1_PAB_RDMA_CTRL_OFF);   // set bit 9 to relax ordering
		writel(0, regs + PCIE1_PAB_IND_DESC_SEL_OFF);
		writel(i, regs + PCIE1_PAB_IND_DESC_CTRL_OFF);
		writel(src, regs + PCIE1_PAB_IND_DESC_SRC_ADDR_OFF);
		writel(0, regs + PCIE1_PAB_IND_DESC_SRC_ADDRX_OFF);
		writel(dst, regs + PCIE1_PAB_IND_DESC_DST_ADDR_OFF);
		writel(0x10, regs + PCIE1_PAB_IND_DESC_DST_ADDRX_OFF);   // set coherency
		mb();
		pcieg->time_start = readl(pcieg->hrtcnt1);
		writel(0x101, regs + PCIE1_PAB_RDMA_CTRL_OFF);  // must do this last
		break;
	case 1:   // WDMA
		writel(0x100 /*0x300*/, regs + PCIE1_PAB_WDMA_CTRL_OFF);   // set bit 9 to relax ordering
		writel(1, regs + PCIE1_PAB_IND_DESC_SEL_OFF);
		writel(i, regs + PCIE1_PAB_IND_DESC_CTRL_OFF);
		writel(src, regs + PCIE1_PAB_IND_DESC_SRC_ADDR_OFF);
		writel(0x10, regs + PCIE1_PAB_IND_DESC_SRC_ADDRX_OFF);   // set coherency
		writel(dst, regs + PCIE1_PAB_IND_DESC_DST_ADDR_OFF);
		writel(0, regs + PCIE1_PAB_IND_DESC_DST_ADDRX_OFF);
		mb();
		pcieg->time_start = readl(pcieg->hrtcnt1);
		writel(0x101, regs + PCIE1_PAB_WDMA_CTRL_OFF);  // must do this last
		break;
	default:
		printk("Not read nor write operation %d\n", wdma);
		return 1;
	}

	if (wdma)
		wait_event_timeout(pcieg->wdma_wait_queue,
			(pcieg->wdma_wait == 0), 100);
	else
		wait_event_timeout(pcieg->rdma_wait_queue,
			(pcieg->rdma_wait == 0), 100);
#if DEBUG_PCIE
	dev_warn(pcieg->dev, "\nDMA time %d us from 0x%08x to 0x%08x\n",
		(pcieg->time_end - pcieg->time_start),
		pcieg->time_start, pcieg->time_end);
	dev_warn(pcieg->dev, "DMA throughput %d MBytes/sec for %d bytes\n",
		(size<<2)/(pcieg->time_end - pcieg->time_start), (size<<2));
#endif

	return 0;
}

static int qpcieg_open(struct inode* inode, struct file* fd)
{
	struct quasar_pcieg	*pcieg;

	pcieg = container_of(inode->i_cdev, struct quasar_pcieg, cdev);
	fd->private_data = pcieg;
#if DEBUG_PCIE
	dev_warn(pcieg->dev, "qpcieg_open() done\n");
#endif
	return 0;
}

static int qpcieg_close(struct inode* inode, struct file* fd)
{
	struct quasar_pcieg *pcieg;

	pcieg = (struct quasar_pcieg*)fd->private_data;
	return 0;
}

/* Get data from QB63xx PCIe host
 * (ask QB63xx PCIe device issues RDMA to the host) */
static ssize_t qpcieg_read(struct file* fd, char __user *buffer,
			size_t length, loff_t *offset)
{
	struct quasar_pcieg *pcieg;
	size_t size;

	pcieg = (struct quasar_pcieg*)fd->private_data;

	if (length == 0)
		return -EINVAL;

	if (!pcieg)
		return -ESHUTDOWN;

	qpcieg_dma_status_clear(pcieg, 0 /*rdma*/);
	qpcieg_dma_start(pcieg, 0 /*rdma*/,
			ep_inbound_windows[1][1] /*src at EP*/,
			(u32)(uint_port)pcieg->buf_dma /*dst at EP*/,
			length>>2 /*dwords*/);

	/* copy data to the user buffer */
	size = copy_to_user(buffer, (u8 *)0xa0000000 /*host memory mapping*/,
			length);
	if (size != length)
		return -EAGAIN;

	return size;
}

/* Send data to QB63xx PCIe host
 * (ask QB63xx PCIe device issues WDMA to the host) */
static ssize_t qpcieg_write(struct file* fd, const char __user *buffer,
			size_t length, loff_t *offset)
{
	struct quasar_pcieg *pcieg;

	pcieg = (struct quasar_pcieg*)fd->private_data;

	if (length == 0)
		return -EINVAL;

	if (!pcieg)
		return -ESHUTDOWN;

	/* copy data from user buffer to PCIe memory */
	if (copy_from_user((u8 *)0xa0000000 /*host memory mapping*/, buffer, length)) {
		dev_warn(pcieg->dev, "copy from user buffer failed\n");
		return -EINVAL;
	}

	qpcieg_dma_status_clear(pcieg, 1 /*wdma*/ /*, 0*/ /*device*/);
	qpcieg_dma_start(pcieg, 1 /*wdma*/ /*, 0*/ /*device*/,
			(u32)(uint_port)pcieg->buf_dma /*src at EP*/,
            ep_inbound_windows[1][1] /*dst at EP*/,
			length>>2 /*dwords*/);

	return 0;
}

#define PCIE_REGISTER_READ       0x01
#define PCIE_REGISTER_WRITE      0x02

struct gpcieg_register {
	u32 address;
	u32 value;
};

static long qpcieg_ioctl(struct file* fd, unsigned int cmd, unsigned long arg)
{
	struct quasar_pcieg *pcieg;
	struct gpcieg_register reg;
    u32 i;

	pcieg = (struct quasar_pcieg *)fd->private_data;

	if (copy_from_user((void*)&reg,
		(void*)arg, sizeof(struct gpcieg_register)))
		return -EINVAL;

	if ((reg.address >= PCICLKDISCTRL) &&
		(reg.address <= PCI_TEST_OFF)) {
		i = (u32)(uint_port)pcieg->base_gpf;
	} else if ((reg.address >= PCIE1_GPEXP_CFG_VENDORID) &&
		(reg.address <= PCIE1_PAB_OB_BUF_SIZE_CTRL)) {
		i = (u32)(uint_port)pcieg->base_ctrl;
	} else if ((reg.address < PCIE_SPARE) &&
		(reg.address > PCIE_CLKRX_CTRL2)) {
		i = (u32)(uint_port)pcieg->base_wrap;
	} else {
		return -EINVAL;
	}

	switch(cmd)
	{
	case PCIE_REGISTER_READ:
		i += (reg.address - PCIE1_GPEXP_CFG_VENDORID);
		reg.value = readl((void *)(uint_port)i);
		if (copy_to_user((void*)arg,
			(void*)&reg, sizeof(struct gpcieg_register)))
			return -EINVAL;
		break;
	case PCIE_REGISTER_WRITE:
		i += (reg.address - PCIE1_GPEXP_CFG_VENDORID);
		writel(reg.value, (void *)(uint_port)i);
		break;
	default:
		dev_err(pcieg->dev, "Unknonw io command 0x%x\n", cmd);
		break;
	}

	return 0;
}

static struct file_operations quasar_pcieg_fops = {
	.owner = THIS_MODULE,
	.open = qpcieg_open,
	.release = qpcieg_close,
	.read = qpcieg_read,
	.write = qpcieg_write,
	.unlocked_ioctl	= qpcieg_ioctl,
	.compat_ioctl	= qpcieg_ioctl 
};

static void quasar_pcieg_power_down(struct quasar_pcieg *pcieg)
{
	u32 val, timeout;
	u32 mask;

	/* PCI block power down sequence: RSTs -> CLKs -> ISO -> PWR
	   reset block logic */
	val = readl(pcieg->base_rstgen + RSTGEN_SWRSTSTATIC4_OFF);
	val |= (RSTGEN_SWRSTSTATIC4__PCI_PCIEWRAP__MASK | 
			RSTGEN_SWRSTSTATIC4__PCI_PCIE1__MASK | 
			RSTGEN_SWRSTSTATIC4__PCI__MASK);
	writel(val, pcieg->base_rstgen + RSTGEN_SWRSTSTATIC4_OFF);
	/* Assert PCI_SW_MEDIUM reset */
	val = readl(pcieg->base_rstgen + RSTGEN_SWRSTSTATIC11_OFF);
	val |= RSTGEN_SWRSTSTATIC11__PCI_SW_MEDIUM__MASK;
	writel(val, pcieg->base_rstgen + RSTGEN_SWRSTSTATIC11_OFF);
	/* Disable source clocks */
	val = readl(pcieg->base_clkgen + SYSCG_CLKDISCTRL1_OFF);
	val |= SYSCG_CLKDISCTRL1__PCI__MASK;
	writel(val, pcieg->base_clkgen + SYSCG_CLKDISCTRL1_OFF);
	timeout = WAIT_TIME;
	while (!(readl(pcieg->base_clkgen + SYSCG_CLKDISSTAT1_OFF) &
			SYSCG_CLKDISSTAT1__PCI__MASK)) {
		PWR_WAIT_TIMEOUT_LOOP
	}
	/* Disable IB clocks */
	val = readl(pcieg->base_clkgen + SYSCG_CLKDISCTRL2_OFF);
	val |= (SYSCG_CLKDISCTRL2__IB29__MASK |
			SYSCG_CLKDISCTRL2__IB30__MASK);
	writel(val, pcieg->base_clkgen + SYSCG_CLKDISCTRL2_OFF);
	timeout = WAIT_TIME;
	mask = (SYSCG_CLKDISSTAT2__IB29__MASK |
			SYSCG_CLKDISSTAT2__IB30__MASK);
	while ((readl(pcieg->base_clkgen + SYSCG_CLKDISSTAT2_OFF) &
			mask) != mask) {
		PWR_WAIT_TIMEOUT_LOOP
	}
	val = readl(pcieg->base_clkgen + SYSCG_CLKDISCTRL3_OFF);
	val |= SYSCG_CLKDISCTRL3__IB36__MASK;
	writel(val, pcieg->base_clkgen + SYSCG_CLKDISCTRL3_OFF);
	timeout = WAIT_TIME;
	mask = SYSCG_CLKDISSTAT3__IB36__MASK;
	while ((readl(pcieg->base_clkgen + SYSCG_CLKDISSTAT3_OFF) &
			mask) != mask) {
		PWR_WAIT_TIMEOUT_LOOP
	}
	/* Isolate PCI */
	val = readl(pcieg->base_rstgen + RSTGEN_PWR_ISOLATE_OFF);
	val &= RSTGEN_PWR_ISOLATE__PCI_N__INV_MASK;
	writel(val, pcieg->base_rstgen + RSTGEN_PWR_ISOLATE_OFF);
	udelay(5);
	/* Powerdown PCI */
	writel(0, pcieg->base_rstgen + RSTGEN_PWR_ON_PCI_OFF);
}

static void quasar_pcieg_power_on(struct quasar_pcieg *pcieg)
{
	u32 val;

	val = readl(pcieg->base_rstgen + RSTGEN_PWR_ON_PCI_OFF);
	val |= PWR_ON_W1;
	writel(val, pcieg->base_rstgen + RSTGEN_PWR_ON_PCI_OFF);
	udelay(5);

	val = readl(pcieg->base_rstgen + RSTGEN_PWR_ON_PCI_OFF);
	val |= PWR_ON_W2;
	writel(val, pcieg->base_rstgen + RSTGEN_PWR_ON_PCI_OFF);
	udelay(5);

	val = readl(pcieg->base_rstgen + RSTGEN_PWR_ON_PCI_OFF);
	val |= PWR_ON_S1;
	writel(val, pcieg->base_rstgen + RSTGEN_PWR_ON_PCI_OFF);
	udelay(5);
}

static void quasar_pcieg_power_up(struct quasar_pcieg *pcieg)
{
	u32 val, timeout;
	u32 mask;

	mask = (PWR_ON_W1 | PWR_ON_W2 | PWR_ON_S1);
	val = readl(pcieg->base_rstgen + RSTGEN_PWR_ON_PCI_OFF);
	if ((val & mask) == mask)
		return;

	quasar_pcieg_power_on(pcieg);

	val = readl(pcieg->base_clkgen + SYSCG_CLKDISCTRL1_OFF);
	val &= SYSCG_CLKDISCTRL1__PCI__INV_MASK;
	writel(val, pcieg->base_clkgen + SYSCG_CLKDISCTRL1_OFF);
	timeout = WAIT_TIME;
	while (readl(pcieg->base_clkgen + SYSCG_CLKDISSTAT1_OFF)
		& SYSCG_CLKDISSTAT1__PCI__MASK) {
			PWR_WAIT_TIMEOUT_LOOP
	}

	/* Enable IB clocks with common power domains
	   DDR_SW - 29,30
	   IPM1 - 36 */
	mask = (PWR_ON_W1 | PWR_ON_W2 | PWR_ON_S1);
	val = readl(pcieg->base_rstgen + RSTGEN_PWR_ON_DDR_OFF);
	if ((val & mask) == mask) {
		mask = (SYSCG_CLKDISCTRL2__IB29__INV_MASK &
				SYSCG_CLKDISCTRL2__IB30__INV_MASK);
		val = readl(pcieg->base_clkgen + SYSCG_CLKDISCTRL2_OFF);
		writel(val & mask, pcieg->base_clkgen + SYSCG_CLKDISCTRL2_OFF);
		timeout = WAIT_TIME;
		mask = (SYSCG_CLKDISSTAT2__IB29__MASK | SYSCG_CLKDISSTAT2__IB30__MASK);
		while (readl(pcieg->base_clkgen + SYSCG_CLKDISSTAT2_OFF) & mask) {
			PWR_WAIT_TIMEOUT_LOOP
		}
	}

	mask = (PWR_ON_W1 | PWR_ON_W2 | PWR_ON_S1);
	val = readl(pcieg->base_rstgen + RSTGEN_PWR_ON_IPM1_OFF);
	if ((val & mask) == mask) {
		val = readl(pcieg->base_clkgen + SYSCG_CLKDISCTRL3_OFF);
		val &= SYSCG_CLKDISCTRL3__IB36__INV_MASK;
		writel(val, pcieg->base_clkgen + SYSCG_CLKDISCTRL3_OFF);
		timeout = WAIT_TIME;
		while ((readl(pcieg->base_clkgen + SYSCG_CLKDISSTAT3_OFF) &
				SYSCG_CLKDISSTAT3__IB36__MASK) != 0) {
			PWR_WAIT_TIMEOUT_LOOP
		}
	}

	/* Release PCI_SW_MEDIUM reset */
	val = readl(pcieg->base_rstgen + RSTGEN_SWRSTSTATIC11_OFF);
	val &= RSTGEN_SWRSTSTATIC11__PCI_SW_MEDIUM__INV_MASK;
	writel(val, pcieg->base_rstgen + RSTGEN_SWRSTSTATIC11_OFF);
	/* Change RSTGEN_PWR_ISOLATE.BLOCKNAME_N to 1 to connect the block */
	val = readl(pcieg->base_rstgen + RSTGEN_PWR_ISOLATE_OFF);
	val |= RSTGEN_PWR_ISOLATE__PCI_N__MASK;
	writel(val, pcieg->base_rstgen + RSTGEN_PWR_ISOLATE_OFF);
	/* Deassert the block reset
	   Release all PCI block reset */
	val = readl(pcieg->base_rstgen + RSTGEN_SWRSTSTATIC4_OFF);
	val &= RSTGEN_SWRSTSTATIC4__PCI__INV_MASK;
	writel(val, pcieg->base_rstgen + RSTGEN_SWRSTSTATIC4_OFF);
	val = readl(pcieg->base_rstgen + RSTGEN_SWRSTSTATIC4_OFF);
	val &= (RSTGEN_SWRSTSTATIC4__PCI__INV_MASK &
			RSTGEN_SWRSTSTATIC4__PCI_PCIE1__INV_MASK &
			RSTGEN_SWRSTSTATIC4__PCI_PCIEWRAP__INV_MASK);
	writel(val, pcieg->base_rstgen + RSTGEN_SWRSTSTATIC4_OFF);
}

static void quasar_pcieg_hw_init(struct quasar_pcieg *pcieg)
{
	u32 val;

	udelay(100);

	/* If syspll is 2400MHz */
	//pcieg->src_pci_clk_mhz = 240;
    /* If syspll is 2000MHz */
	pcieg->src_pci_clk_mhz = 250;

	val  = readl(pcieg->base_wrap + PCIE_CLKRX_CTRL1_OFF);
	val &= 0xfffffffe;
	writel(val, pcieg->base_wrap + PCIE_CLKRX_CTRL1_OFF);

	/* Wait for CLKRX output to be stable before
	   using that output as ref_clk for PHY.
	   Put a simple wait loop here as a placeholder */
	udelay(100);

	/* Tell PHY to use ref_clk from pad before you take it out of reset */
	val  = readl(pcieg->base_wrap + PCIE_PHY_CLK_CTRL_OFF);
	val |= 0x00000200 /*PCIE_PHY_CLK_CTRL__REF_USE_PAD__MASK*/;
	writel(val, pcieg->base_wrap + PCIE_PHY_CLK_CTRL_OFF);

	/* Reset PHY and controller */
#ifdef Q6300
	val  = readl(pcieg->base_wrap + PCIE_PHY_RST_CTRL_OFF);
	val &= PCIE_PHY_RST_CTRL__RESET_N__INV_MASK;
	writel(val, pcieg->base_wrap + PCIE_PHY_RST_CTRL_OFF);
#else
	val  = readl(pcieg->base_wrap + PCIE_PHY_RST_CTRL_OFF);
	val &= PCIE_PHY_RST_CTRL__PIPE0_RESETN__INV_MASK;
	val &= PCIE_PHY_RST_CTRL__PIPE1_RESETN__INV_MASK;
	writel(val, pcieg->base_wrap + PCIE_PHY_RST_CTRL_OFF);
#endif

	val = readl(pcieg->base_wrap + PCIE1_SW_RST_OFF);
	val &= (PCIE1_SW_RST__PAB_N__INV_MASK &
			PCIE1_SW_RST__AMBA_N__INV_MASK &
			PCIE1_SW_RST__PBUS_N__INV_MASK &
			PCIE1_SW_RST__LINK_N__INV_MASK);
	writel(val, pcieg->base_wrap + PCIE1_SW_RST_OFF);
	udelay(5);

	/* Set role as EP */
	val  = readl(pcieg->base_wrap + PCIE_SW_BOOTSTRAP_OFF);
	val &= PCIE_SW_BOOTSTRAP__PCIE1_EP_RC_SEL__INV_MASK;
	writel(val, pcieg->base_wrap + PCIE_SW_BOOTSTRAP_OFF);

	/* Release TEST_RESET_N */
#ifdef Q6300
	val  = readl(pcieg->base_wrap + PCIE_PHY_TEST_CTRL_OFF);
	val |= PCIE_PHY_TEST_CTRL__TEST_RESET_N__MASK;
	writel(val, pcieg->base_wrap + PCIE_PHY_TEST_CTRL_OFF);
#else
	val  = readl(pcieg->base_wrap + PCIE_PHY_TEST_CTRL0_OFF);
	val |= PCIE_PHY_TEST_CTRL0__TEST_RESET_N_0__MASK;
	writel(val, pcieg->base_wrap + PCIE_PHY_TEST_CTRL0_OFF);
	val  = readl(pcieg->base_wrap + PCIE_PHY_TEST_CTRL1_OFF);
	val |= PCIE_PHY_TEST_CTRL1__TEST_RESET_N_1__MASK;
	writel(val, pcieg->base_wrap + PCIE_PHY_TEST_CTRL1_OFF);
	val  = readl(pcieg->base_wrap + PCIE_PHY_TEST_CTRL2_OFF);
	val |= PCIE_PHY_TEST_CTRL2__TEST_RESET_N_2__MASK;
	writel(val, pcieg->base_wrap + PCIE_PHY_TEST_CTRL2_OFF);
	val  = readl(pcieg->base_wrap + PCIE_PHY_TEST_CTRL3_OFF);
	val |= PCIE_PHY_TEST_CTRL3__TEST_RESET_N_3__MASK;
	writel(val, pcieg->base_wrap + PCIE_PHY_TEST_CTRL3_OFF);
#endif
	udelay(5);

	/* Here program PHY parameters if needed */
	udelay(5);

	/* Release PHY reset */
#ifdef Q6300
	val  = readl(pcieg->base_wrap + PCIE_PHY_RST_CTRL_OFF);
	val |= PCIE_PHY_RST_CTRL__RESET_N__MASK;
	writel(val, pcieg->base_wrap + PCIE_PHY_RST_CTRL_OFF);
#else
	val  = readl(pcieg->base_wrap + PCIE_PHY_RST_CTRL_OFF);
	val |= PCIE_PHY_RST_CTRL__PIPE0_RESETN__MASK;
	val |= PCIE_PHY_RST_CTRL__PIPE1_RESETN__MASK;
	writel(val, pcieg->base_wrap + PCIE_PHY_RST_CTRL_OFF);
#endif

	/* Wait pipe is ready */
#ifdef Q6300
	val = PCIE_PHY_PIPE_STAT__PHYSTATUS__MASK;
	while (val != 0) {
		val  = readl(pcieg->base_wrap + PCIE_PHY_PIPE_STAT_OFF);
		val &= PCIE_PHY_PIPE_STAT__PHYSTATUS__MASK;
	}
#else
	val = PCIE_PHY_PIPE_STAT__PIPE0_PHYSTATUS__MASK;   /* PCIE0 */
	while (val != 0) {
		val  = readl(pcieg->base_wrap + PCIE_PHY_PIPE_STAT_OFF);
		val &= PCIE_PHY_PIPE_STAT__PIPE0_PHYSTATUS__MASK;
	}
	//val = PCIE_PHY_PIPE_STAT__PIPE1_PHYSTATUS__MASK;   /* PCIE1 */
	//while (val != 0) {
	//	val  = readl(pcieg->base_wrap + PCIE_PHY_PIPE_STAT_OFF);
	//	val &= PCIE_PHY_PIPE_STAT__PIPE1_PHYSTATUS__MASK;
	//}
#endif
    
	/* Release controller reset */
	val = readl(pcieg->base_wrap + PCIE1_SW_RST_OFF);
	val |= (PCIE1_SW_RST__PAB_N__MASK |
			PCIE1_SW_RST__AMBA_N__MASK |
			PCIE1_SW_RST__PBUS_N__MASK |
			PCIE1_SW_RST__LINK_N__MASK);
	writel(val, pcieg->base_wrap + PCIE1_SW_RST_OFF);

#if SPEED_GEN1   /* Fix speed to Gen 1 if needed */
	val = readl(pcieg->base_ctrl + PCIE1_GPEXD_GEN2_CTRL_OFF);
	val &= 0xffffff0f;
	val |= 0x10;   /* 0x10 for Gen 1 only, 0x20 (default) for Gen 2 */
	writel(val, pcieg->base_ctrl + PCIE1_GPEXD_GEN2_CTRL_OFF);
	val = readl(pcieg->base_ctrl + PCIE1_GPEXD_GEN2_CTRL_OFF);
	dev_warn(pcieg->dev, "Quasar PCIe is configured as Gen %d\n", (val & 0xf0) >> 4);
#endif

	/* Set the ratio of GPEX core clk to be the expected 300 MHz
	 * GPEXD_CORE_CLK_RATIO = 16 * src_pci_clk/300MHz rounded to nearest integer */
	val = pcieg->src_pci_clk_mhz/(300*1000*1000/16);
	writel(val, pcieg->base_ctrl + PCIE1_GPEXD_CORE_CLK_RATIO_OFF);

	mdelay(300);
}

static int quasar_pcieg_ep_init(struct quasar_pcieg *pcieg)
{
	u32 val;

	/*** Inbound Memory Map (EP accesses RC) ***/  // TODO!!!
	writel(0x0f, pcieg->base_ctrl + PCIE1_PAB_AXI_PIO_CTRL0_OFF);

	/* Inbound window 0: reserved for configuration access, not used */

	/* Inbound window 1: memory access */
	writel((ep_inbound_windows[1][5] | 0x5),
		pcieg->base_ctrl + PCIE1_PAB_AXI_AMAP_CTRL1_OFF);
	writel(ep_inbound_windows[1][1],
		pcieg->base_ctrl + PCIE1_PAB_AXI_AMAP_AXI_BASE1_OFF);
	writel(ep_inbound_windows[1][0],
		pcieg->base_ctrl + PCIE1_PAB_AXI_AMAP_AXI_BASE1X_OFF);
	writel(ep_inbound_windows[1][3],
		pcieg->base_ctrl + PCIE1_PAB_AXI_AMAP_PEX_BASEL1_OFF);
	writel(ep_inbound_windows[1][2],
		pcieg->base_ctrl + PCIE1_PAB_AXI_AMAP_PEX_BASEH1_OFF);

	/* Inbound window 2: memory access */
	writel((ep_inbound_windows[2][5] | 0x5),
		pcieg->base_ctrl + PCIE1_PAB_AXI_AMAP_CTRL2_OFF);
	writel(ep_inbound_windows[2][1],
		pcieg->base_ctrl + PCIE1_PAB_AXI_AMAP_AXI_BASE2_OFF);
	writel(ep_inbound_windows[2][0],
		pcieg->base_ctrl + PCIE1_PAB_AXI_AMAP_AXI_BASE2X_OFF);
	writel(ep_inbound_windows[2][3],
		pcieg->base_ctrl + PCIE1_PAB_AXI_AMAP_PEX_BASEL2_OFF);
	writel(ep_inbound_windows[2][2],
		pcieg->base_ctrl + PCIE1_PAB_AXI_AMAP_PEX_BASEH2_OFF);

	/* Inbound window 3: memory access */
	writel((ep_inbound_windows[3][5] | 0x5),
		pcieg->base_ctrl + PCIE1_PAB_AXI_AMAP_CTRL3_OFF);
	writel(ep_inbound_windows[3][1],
		pcieg->base_ctrl + PCIE1_PAB_AXI_AMAP_AXI_BASE3_OFF);
	writel(ep_inbound_windows[3][0],
		pcieg->base_ctrl + PCIE1_PAB_AXI_AMAP_AXI_BASE3X_OFF);
	writel(ep_inbound_windows[3][3],
		pcieg->base_ctrl + PCIE1_PAB_AXI_AMAP_PEX_BASEL3_OFF);
	writel(ep_inbound_windows[3][2],
		pcieg->base_ctrl + PCIE1_PAB_AXI_AMAP_PEX_BASEH3_OFF);

	/*** BAR settings for RC outbound memory map (RC accesses EP) ***/

	/* Memory for BAR0/1 */
	/* As an example, BAR0 is defined in EpBARs[0][x] for QB63XX EP's register space. */

	/* Memory for BAR2/3 */
	pcieg->buf_bar23 = dma_alloc_coherent(pcieg->dev,
			BUF_SIZE, &pcieg->buf_dma_bar23, GFP_KERNEL);
	if (pcieg->buf_bar23 == NULL) {
		dev_err(pcieg->dev, "BAR2/3 buffer allocate failed\n");
		return -ENOMEM;
	} else {
		memset(pcieg->buf_bar23, 0x23, BUF_SIZE);
#if DEBUG_PCIE
#ifdef Q6300
		dev_warn(pcieg->dev, "BAR2/3: buf_dma(0x%08x) buf(0x%px) size(0x%x)\n",
				(u32)pcieg->buf_dma_bar23, pcieg->buf_bar23, BUF_SIZE);
#else
		dev_warn(pcieg->dev, "BAR2/3: buf_dma(0x%px) buf(0x%px) size(0x%x)\n",
				(void *)pcieg->buf_dma_bar23, pcieg->buf_bar23, BUF_SIZE);
#endif
#endif
	}
	ep_bars[1][0] = 0;
	ep_bars[1][1] = (u32)pcieg->buf_dma_bar23;
	ep_bars[1][2] = 0;
	ep_bars[1][3] = BUF_SIZE;

	/* Memory for BAR4/5 */
	pcieg->buf_bar45 = dma_alloc_coherent(pcieg->dev,
			BUF_SIZE, &pcieg->buf_dma_bar45, GFP_KERNEL);
	if (pcieg->buf_bar45 == NULL) {
		dev_err(pcieg->dev, "BAR4/5 buffer allocate failed\n");
		return -ENOMEM;
	} else {
		memset(pcieg->buf_bar45, 0x45, BUF_SIZE);
#if DEBUG_PCIE
#ifdef Q6300
		dev_warn(pcieg->dev, "BAR4/5: buf_dma(0x%08x) buf(0x%px) size(0x%x)\n",
				(u32)pcieg->buf_dma_bar45, pcieg->buf_bar45, BUF_SIZE);
#else
		dev_warn(pcieg->dev, "BAR4/5: buf_dma(0x%px) buf(0x%px) size(0x%x)\n",
				(void *)pcieg->buf_dma_bar45, pcieg->buf_bar45, BUF_SIZE);
#endif
#endif
	}
	ep_bars[2][0] = 0;
	ep_bars[2][1] = (u32)pcieg->buf_dma_bar45;
	ep_bars[2][2] = 0;
	ep_bars[2][3] = BUF_SIZE;

	writel(0x1, pcieg->base_ctrl + PCIE1_PAB_PEX_PIO_CTRL0_OFF);

	/* BAR0/1 */
	writel((ep_bars[0][1] | PCIE1_PAB_PEX_AMAP_BAR__ENABLE__VALUE),
		pcieg->base_ctrl + PCIE1_PAB_PEX_AMAP_BAR0_F0_OFF);
	writel(ep_bars[0][0], pcieg->base_ctrl + PCIE1_PAB_PEX_AMAP_BAR0_F0X_OFF);
	writel(0, pcieg->base_ctrl + PCIE1_GPEXD_BAR_SEL_OFF);   /* select BAR0/1 */
	writel(~(ep_bars[0][3]-1), pcieg->base_ctrl + PCIE1_GPEXD_BAR_SIZE_LDW_OFF);   /* the returned value by RC reading EP BAR size */
	writel(~(ep_bars[0][2]), pcieg->base_ctrl + PCIE1_GPEXD_BAR_SIZE_UDW_OFF);   /* the returned value by RC reading EP BAR size */
#if DEBUG_PCIE
	dev_warn(pcieg->dev, "BAR0/1 NEW L = 0x%08x\n",
		readl(pcieg->base_ctrl + PCIE1_GPEXD_BAR_SIZE_LDW_OFF));
	dev_warn(pcieg->dev, "BAR0/1 NEW H = 0x%08x\n",
		readl(pcieg->base_ctrl + PCIE1_GPEXD_BAR_SIZE_UDW_OFF));
#endif

	/* BAR2/3 */
	writel((ep_bars[1][1] | PCIE1_PAB_PEX_AMAP_BAR__ENABLE__VALUE),
		pcieg->base_ctrl + PCIE1_PAB_PEX_AMAP_BAR1_F0_OFF);
	writel(ep_bars[1][0], pcieg->base_ctrl + PCIE1_PAB_PEX_AMAP_BAR1_F0X_OFF);
	writel(1, pcieg->base_ctrl + PCIE1_GPEXD_BAR_SEL_OFF);   /* select BAR2/3 */
	writel(~(ep_bars[1][3]-1), pcieg->base_ctrl + PCIE1_GPEXD_BAR_SIZE_LDW_OFF);   /* the returned value by RC reading EP BAR size */
	writel(~(ep_bars[1][2]), pcieg->base_ctrl + PCIE1_GPEXD_BAR_SIZE_UDW_OFF);   /* the returned value by RC reading EP BAR size */
#if DEBUG_PCIE
	dev_warn(pcieg->dev, "BAR2/3 NEW L = 0x%08x\n",
		readl(pcieg->base_ctrl + PCIE1_GPEXD_BAR_SIZE_LDW_OFF));
	dev_warn(pcieg->dev, "BAR2/3 NEW H = 0x%08x\n",
		readl(pcieg->base_ctrl + PCIE1_GPEXD_BAR_SIZE_UDW_OFF));
#endif

	/* BAR4/5 */
	writel((ep_bars[2][1] | PCIE1_PAB_PEX_AMAP_BAR__ENABLE__VALUE),
		pcieg->base_ctrl + PCIE1_PAB_PEX_AMAP_BAR2_F0_OFF);
	writel(ep_bars[2][0], pcieg->base_ctrl + PCIE1_PAB_PEX_AMAP_BAR2_F0X_OFF);
	writel(2, pcieg->base_ctrl + PCIE1_GPEXD_BAR_SEL_OFF);   /* select BAR2/3 */
	writel(~(ep_bars[2][3]-1), pcieg->base_ctrl + PCIE1_GPEXD_BAR_SIZE_LDW_OFF);   /* the returned value by RC reading EP BAR size */
	writel(~(ep_bars[2][2]), pcieg->base_ctrl + PCIE1_GPEXD_BAR_SIZE_UDW_OFF);   /* the returned value by RC reading EP BAR size */
#if DEBUG_PCIE
	dev_warn(pcieg->dev, "BAR4/5 NEW L = 0x%08x\n",
		readl(pcieg->base_ctrl + PCIE1_GPEXD_BAR_SIZE_LDW_OFF));
	dev_warn(pcieg->dev, "BAR4/5 NEW H = 0x%08x\n",
		readl(pcieg->base_ctrl + PCIE1_GPEXD_BAR_SIZE_UDW_OFF));
#endif

	/*** Other EP settings ***/
	val = (PCIE1_PAB_CTRL__ENABLE_AMBA_PIO__MASK |
			PCIE1_PAB_CTRL__ENABLE_PEX_PIO__MASK |
			PCIE1_PAB_CTRL__ENABLE_WDMA__MASK |
			PCIE1_PAB_CTRL__ENABLE_RDMA__MASK |
#ifdef Q6300   /* QB63xx only supports 4 bursts */
			PCIE1_PAB_CTRL__AXI_MAX_BURST_4__VALUE |  
#else
			PCIE1_PAB_CTRL__AXI_MAX_BURST_16__VALUE |
#endif
			PCIE1_PAB_CTRL__DESC_DEPTH_4__VALUE |
			PCIE1_PAB_CTRL__MAX_DMA_128__VALUE /*PCIE1_PAB_CTRL__MAX_DMA_512__VALUE*/); 
	writel(val, pcieg->base_ctrl + PCIE1_PAB_CTRL_OFF);

	/* EP MRRS    - PCIE1_PAB_CTRL[12:10], set above... */
	/*              PCIE1_GPEXP_CFG_DEVCTRL[14:12] */
	/* EP/RC MPS  - PCIE1_GPEXP_CFG_DEVCTRL[7:5] */
	val = readl(pcieg->base_ctrl + PCIE1_GPEXP_CFG_DEVCTRL_OFF);
	val &= 0xffff8f1f;
	val |= (0 << 12);  // MRRS only for EP: 0-128, 1-256, 2-512, others not supported
	val |= (0 << 5);   // MPS must be the same for RC and EP: 0-128, 1-256, maybe depending on the device.
	writel(val, pcieg->base_ctrl + PCIE1_GPEXP_CFG_DEVCTRL_OFF);

	/* Set class code as memory device */
	val = 0x05000000;
	writel(val, pcieg->base_ctrl + PCIE1_GPEXD_CLASSCODE_OFF);
#if DEBUG_PCIE
	dev_warn(pcieg->dev, "PCIE1_GPEXD_CLASSCODE 0x%08X\n",
		readl(pcieg->base_ctrl + PCIE1_GPEXD_CLASSCODE_OFF));
#endif

	/* Set config ready */
	writel(PCIE1_GPEXD_CFG_RDY__CFG_RDY__MASK,
			pcieg->base_ctrl + PCIE1_GPEXD_CFG_RDY_OFF);

	/* Set command register */
	val = readl(pcieg->base_ctrl + PCIE1_GPEXP_CFG_COMMAND_OFF);
	val |= (PCIE1_CFG_COMMAND_STATUS__BUS_MASTER__VALUE |
			PCIE1_CFG_COMMAND_STATUS__MEMORY_SPACE__VALUE |
			PCIE1_CFG_COMMAND_STATUS__IO_SPACE__VALUE);
	writel(val, pcieg->base_ctrl + PCIE1_GPEXP_CFG_COMMAND_OFF);

	return 0;
}

#if SELF_TEST_MSI   /* manually send MSI */
static void quasar_pcieg_send_msi(struct quasar_pcieg *pcieg)
{
	/* Check if INTx or MSI */
	if (readl(pcieg->base_ctrl + PCIE1_GPEXP_CFG_COMMAND_OFF) & 0x00000400) {  /* MSI */
		writel(0x2, pcieg->base_ctrl + PCIE1_PAB_PEX_INT_MISC_VEC_OFF);
		/* Set mailbox ready bit to indicate that mailbox has valid data to be transferred.
		   This will cause EP to create PCIE memory write across the link in order to send the MSI. */
		dev_warn(pcieg->dev, "PCIE1_PAB_MB_AXI_DATA 0x%08x\n",
			readl(pcieg->base_ctrl + PCIE1_PAB_MB_AXI_DATA_OFF));
		writel(0x1, pcieg->base_ctrl + PCIE1_PAB_MB_AXI_CTRL_OFF);
	} else {   /* INTA */
		dev_warn(pcieg->dev, "EP is sending ASSERT_INTA CFG_COMMAND_STATUS: 0x%08X\n",
			readl(pcieg->base_ctrl + PCIE1_GPEXP_CFG_COMMAND_OFF));
		writel(0x88, pcieg->base_ctrl + PCIE1_PAB_MB_AXI_DATA_OFF);   /* prepare message for RC */
		writel(0x01, pcieg->base_ctrl + PCIE1_PAB_MB_AXI_CTRL_OFF);
		while ((readl(pcieg->base_ctrl + PCIE1_PAB_MB_AXI_CTRL_OFF) & 1) == 1) {
			//msleep(100);   /* wait for a while */
		}
		dev_warn(pcieg->dev, "RC has received ASSERT_INTA\n");
		dev_warn(pcieg->dev, "PCIE1_PAB_MB_AXI_DATA 0x%08X\n",
			readl(pcieg->base_ctrl + PCIE1_PAB_MB_AXI_DATA_OFF));
		/* for now, PCIE1_PAB_PEX_INT_STAT[10] will be cleared by RC, just check it */
		while ((readl(pcieg->base_ctrl + PCIE1_PAB_PEX_INT_STAT_OFF) & 0x400) == 1) {
			//msleep(100);   /* wait for a while */
		}
		dev_warn(pcieg->dev, "EP has sent DEASSERT_INTA\n");
	}
}
#endif

static irqreturn_t quasar_pcieg_isr(int irq, void *_pdev)
{
	u32 val;
	struct platform_device *pdev = _pdev;
	struct quasar_pcieg *pcieg = platform_get_drvdata(pdev);

	if ((pcieg->wdma_wait == 1) || (pcieg->rdma_wait== 1)) {
		if (pcieg->wdma_wait == 1) {
			wake_up(&pcieg->wdma_wait_queue);
			pcieg->wdma_wait = 0;
		}
		if (pcieg->rdma_wait == 1) {
			wake_up(&pcieg->rdma_wait_queue);
			pcieg->rdma_wait = 0;
		}
		pcieg->time_end = readl(pcieg->hrtcnt1);
	}

	/* PCIE1_PAB_PEX_INT_STAT
		[ 0] => AMBA PIO completed
		[ 1] => AMBA PIO aborted
		[ 2] => PEX PIO completed
		[ 3] => PEX PIO aborted
		[ 4] => WDMA last descriptor in a chain completed
		[ 5] => WDMA for a descriptor completed
		[ 6] => WDMA aborted
		[ 7] => RDMA last descriptor in a chain completed
		[ 8] => RDMA for a descriptor completed
		[ 9] => RDMA aborted
		[10] => AMBA-to-PCIE mailbox is ready to send
		[11] => AMBA bus reset
	*/
	val = readl(pcieg->base_ctrl + PCIE1_PAB_PEX_INT_STAT_OFF);
	writel(val, pcieg->base_ctrl + PCIE1_PAB_PEX_INT_STAT_OFF);
#if DEBUG_PCIE
	if (val)
		dev_warn(pcieg->dev,	"PAB_PEX_INT_STAT=0x%08x\n", val);
#endif

	val = readl(pcieg->base_ctrl + PCIE1_PAB_AXI_INT_WDMA_STAT_OFF);
	writel(val, pcieg->base_ctrl + PCIE1_PAB_AXI_INT_WDMA_STAT_OFF);
#if DEBUG_PCIE
	if (val)
		dev_warn(pcieg->dev,	"PAB_AXI_INT_WDMA_STAT=0x%08x\n", val);
#endif

	val = readl(pcieg->base_ctrl + PCIE1_PAB_AXI_INT_RDMA_STAT_OFF);
	writel(val, pcieg->base_ctrl + PCIE1_PAB_AXI_INT_RDMA_STAT_OFF);
#if DEBUG_PCIE
	if (val)
		dev_warn(pcieg->dev,	"PAB_AXI_INT_RDMA_STAT=0x%08x\n", val);
#endif

	val = readl(pcieg->base_ctrl + PCIE1_PAB_AXI_INT_PIO_STAT_OFF);
	writel(val, pcieg->base_ctrl + PCIE1_PAB_AXI_INT_PIO_STAT_OFF);
#if DEBUG_PCIE
	if (val)
		dev_warn(pcieg->dev,	"PAB_AXI_INT_PIO_STAT=0x%08x\n", val);
#endif

	val = readl(pcieg->base_ctrl + PCIE1_PAB_AXI_INT_MISC_STAT_OFF);
	writel(val, pcieg->base_ctrl + PCIE1_PAB_AXI_INT_MISC_STAT_OFF);
#if DEBUG_PCIE
	if (val)
		dev_warn(pcieg->dev,	"PAB_AXI_INT_MISC_STAT=0x%08x\n", val);
#endif

	val = readl(pcieg->base_wrap + PCIE1_INT_FLAG_OFF);
	writel(val, pcieg->base_wrap + PCIE1_INT_CLR_OFF);
#if DEBUG_PCIE
	if (val)
		dev_warn(pcieg->dev,	"INT_CLR=0x%08x\n", val);
#endif

#if DEBUG_PCIE
	dev_warn(pcieg->dev, "g_int done\n");
#endif
	return IRQ_NONE; /*IRQ_HANDLED*/
}

static int quasar_pcieg_int_init(struct quasar_pcieg *pcieg)
{
	int ret = 0;
	struct platform_device	*pdev = pcieg->pdev;

	/* Register interrupt routine
       and don't enable PERST# interrupt */
	writel((PCIE1_INT_EN__GDA_PAB__MASK),
		pcieg->base_wrap + PCIE1_INT_EN_OFF);

	writel((PCIE1_PAB_PEX_INT_STAT__MAILBOX_READY__MASK |
			PCIE1_PAB_PEX_INT_STAT__WDMA_COMPLETE__MASK |
			PCIE1_PAB_PEX_INT_STAT__RDMA_COMPLETE__MASK),
		pcieg->base_ctrl + PCIE1_PAB_PEX_INT_EN_OFF);

	writel(PCIE1_PAB_AXI_INT_WDMA_STAT__WDMA_COMPLETE__MASK,
		pcieg->base_ctrl + PCIE1_PAB_AXI_INT_WDMA_EN_OFF);
	writel(PCIE1_PAB_AXI_INT_RDMA_STAT__RDMA_COMPLETE__MASK,
    	pcieg->base_ctrl + PCIE1_PAB_AXI_INT_RDMA_EN_OFF);

	writel((PCIE1_PAB_AXI_INT_MISC_EN__MAILBOX__MASK |
			PCIE1_PAB_AXI_INT_MISC_EN__MSI__MASK |
			PCIE1_PAB_AXI_INT_MISC_EN__PEX_INTA__MASK),
		pcieg->base_ctrl + PCIE1_PAB_AXI_INT_MISC_EN_OFF);

	ret = request_irq(pcieg->irq, quasar_pcieg_isr,
			IRQF_SHARED, MODULE_NAME, pdev);
	if (ret) {
		dev_err(pcieg->dev, "request_irq: %d failed\n", pcieg->irq);
		ret = -EINVAL;
	}

	return ret;
}

static void quasar_pcieg_backup_ep_resources(struct quasar_pcieg *pcieg)
{
	u32 i, j, k;

#if DEBUG_PCIE
	printk("\nPCIe EP original Resource:\n");
#endif
	/* Backup resources for device driver's access through BAR2 (EpBARs[2][1]) */
	for (i = 0; i < 3; i++) {
#if DEBUG_PCIE
		printk("\n");
#endif
		for (j = 0; j < 4; j++) {
			writel(ep_bars[i][j],
				(void *)((uint_port)pcieg->buf_bar45 + (i*4*4) + (j*4)));   /* backup ep_bars[][] */
#if DEBUG_PCIE
			printk("0x%08X ", ep_bars[i][j]);
#endif
		}
	}
	k = 3*4*4;

	for (i = 0; i < 4; i++) {
#if DEBUG_PCIE
		printk("\n");
#endif
		for (j = 0; j < 6; j++) {
			writel(ep_inbound_windows[i][j],
				(void *)((uint_port)pcieg->buf_bar45 + k + (i*6*4) + (j*4)));   /* backup ep_inbound_windows[][] */
#if DEBUG_PCIE
			printk("0x%08X ", ep_inbound_windows[i][j]);
#endif
		}
	}
	k += (4*6*4);
	writel((u32)pcieg->buf_dma, (void *)((uint_port)pcieg->buf_bar45 + k));   // EP's local buffer for DMA
	writel(0x55aa55aa, (void *)((uint_port)pcieg->buf_bar45 + k + 4));   // end tag

#if DEBUG_PCIE
	printk("\nBackup EP Resource from address 0x%08X:\n", (u32)(uint_port)pcieg->buf_bar45);
	for (i = 0; ; i++) {
		if (0x55aa55aa == readl((void *)(uint_port)pcieg->buf_bar45 + (i*4))) {
			printk("Backup EP Resources done\n");
			break;
		}
		printk("0x%08X ", readl((void *)(uint_port)pcieg->buf_bar45 + (i*4)));
		switch (i) {
		case 3:
		case 7:
		case 11:
		case 17:
		case 23:
		case 29:
		case 35:
			printk("\n");
		default:
			break;
		}
	}
#endif
}

static int quasar_pcieg_wait_link_up(struct quasar_pcieg *pcieg)
{
	u32 val = 0;

	while (PCIE1_MISC_STAT__GDA_PAB_DL_UP__MASK != val) {
		dev_warn(pcieg->dev, "Wait for PCIe link...\n");
		val = readl(pcieg->base_wrap + PCIE1_MISC_STAT_OFF);
		val &= PCIE1_MISC_STAT__GDA_PAB_DL_UP__MASK;
        msleep(1000);
	}
	val = readl(pcieg->base_ctrl + PCIE1_GPEXP_CFG_LINKCTRL_OFF);
	if (((val & 0x000f0000) >> 16) == 1)
		dev_warn(pcieg->dev, "PCIe link speed is Gen 1\n");
	else if (((val & 0x000f0000) >> 16) == 2)
		dev_warn(pcieg->dev, "PCIe link speed is Gen 2\n");
    else {
		dev_warn(pcieg->dev, "PCIe link speed is unknown 0x%02x\n",
			(val & 0x000f0000) >> 16);
		return -ENODEV;
	}

	return 0;
}

static int quasar_pcieg_thread(void *pcie)
{
	int i = 0;
#if SELF_TEST_DMA
    u32 size = (BUF_SIZE - 4);
#endif
    struct quasar_pcieg *pcieg = (struct quasar_pcieg *)pcie;

	while (!kthread_should_stop()) {
		dev_warn(pcieg->dev, "PCIe gadget task running %d\n", i++);
#if SELF_TEST_DMA   /* Test QB66xx PCIe device's DMA */
		/* The device (EP) starts write DMA, write data to the host (here) */
		qpcieg_dma_status_clear(pcieg, 1 /*wdma*/);
		qpcieg_dma_start(pcieg, 1 /*wdma*/,
			(u32)(uint_port)pcieg->buf_dma /*src at EP*/,
			ep_inbound_windows[1][1] /*dst at EP*/,
			size>>2 /*dwords*/);   /* EP->RC */

		/* The device (EP) starts read DMA, read data to the host (here) */
		qpcieg_dma_status_clear(pcieg, 0 /*rdma*/);
		qpcieg_dma_start(pcieg, 0 /*rdma*/,
			ep_inbound_windows[1][1] /*src at EP*/,
			(u32)(uint_port)pcieg->buf_dma /*dst at EP*/,
			size>>2 /*dwords*/);   /* EP<-RC */
#endif

#if SELF_TEST_MSI   /* test manually send MSI */
		quasar_pcieg_send_msi(pcieg);
#endif
		msleep(1000);
	}

	pcieg->thread_task = NULL;
	return 0;
}

static int quasar_pcieg_probe(struct platform_device *pdev)
{
	int ret = 0;
	dev_t  pciegn;
	struct device *dev = &pdev->dev;
	struct quasar_pcieg *pcieg;

#if DEBUG_PCIE
	printk("quasar_pcieg_probe() starts\n");
#endif

	pcieg = kzalloc(sizeof(struct quasar_pcieg), GFP_KERNEL);
	if (!pcieg) {
		dev_err(dev, "out of memory\n");
		return -ENOMEM;
	}

	cdev_init(&pcieg->cdev, &quasar_pcieg_fops);
	pciegn = MKDEV(QPCIEG_MAJOR, 0);
	ret = cdev_add(&pcieg->cdev, pciegn, 1);
	if (ret) {
		dev_err(dev, "could not create char dev %d\n", pciegn);
		return ret;
	}

	pcieg->rsc_rstgen = platform_get_resource(pdev, IORESOURCE_MEM, 1);
	if (!pcieg->rsc_rstgen) {
		dev_err(dev, "No rsc_rstgen base resource\n");
		return -EINVAL;
	}
	pcieg->base_rstgen = ioremap(pcieg->rsc_rstgen->start,
		pcieg->rsc_rstgen->end - pcieg->rsc_rstgen->start + 1);

	pcieg->rsc_clkgen = platform_get_resource(pdev, IORESOURCE_MEM, 2);
	if (!pcieg->rsc_clkgen) {
		dev_err(dev, "No rsc_clkgen base resource\n");
		return -EINVAL;
	}
	pcieg->base_clkgen = devm_ioremap_resource(dev, pcieg->rsc_clkgen);

	pcieg->rsc_ctrl = platform_get_resource(pdev, IORESOURCE_MEM, 3);
	if (!pcieg->rsc_ctrl) {
		dev_err(dev, "No rsc_ctrl base resource\n");
		return -EINVAL;
	}
	pcieg->base_ctrl = devm_ioremap_resource(dev, pcieg->rsc_ctrl);

	pcieg->rsc_wrap = platform_get_resource(pdev, IORESOURCE_MEM, 4);
	if (!pcieg->rsc_wrap) {
		dev_err(dev, "No rsc_wrap base resource\n");
		return -EINVAL;
	}
	pcieg->base_wrap = devm_ioremap_resource(dev, pcieg->rsc_wrap);

	pcieg->rsc_gpf = platform_get_resource(pdev, IORESOURCE_MEM, 5);
	if (!pcieg->rsc_gpf) {
		dev_err(dev, "No rsc_gpf base resource\n");
		return -EINVAL;
	}
	pcieg->base_gpf = devm_ioremap_resource(dev, pcieg->rsc_gpf);

	pcieg->irq = platform_get_irq(pdev, 0);
	if (!pcieg->irq) {
		dev_err(dev, "No irq\n");
		return -EINVAL;
	}
#if DEBUG_PCIE
	dev_warn(dev, "irq=%d\n", pcieg->irq);
#endif

	pcieg->dev = dev;
	platform_set_drvdata(pdev, pcieg);
	pcieg->pdev = pdev;

	spin_lock_init(&pcieg->lock);

	init_waitqueue_head(&pcieg->rdma_wait_queue);
	init_waitqueue_head(&pcieg->wdma_wait_queue);
	pcieg->wdma_wait = 0;
	pcieg->rdma_wait = 0;
	pcieg->hrtcnt1 = ioremap(HRTCNT1, 4);

	/* Initialize PCIe hardware */
	quasar_pcieg_power_down(pcieg);
	quasar_pcieg_power_up(pcieg);
	quasar_pcieg_hw_init(pcieg);

	/* Initialize PCIe device controller */
	ret = quasar_pcieg_ep_init(pcieg);
	if (ret)
		return ret;
	ret = quasar_pcieg_int_init(pcieg);
	if (ret)
		return ret;

	/* Request DMA pool */
	pcieg->buf = dma_alloc_coherent(dev, BUF_SIZE, &pcieg->buf_dma,
		GFP_KERNEL);
	if (pcieg->buf == NULL) {
		dev_err(dev, "DMA local buffer allocate failed\n");
		ret = -ENOMEM;
	} else {
		memset(pcieg->buf, 0x78, BUF_SIZE);
#if DEBUG_PCIE
#ifdef Q6300
	dev_warn(dev, "=> buf_dma(0x%08x) buf(0x%px) size(0x%x)\n",
				(u32)pcieg->buf_dma, pcieg->buf, BUF_SIZE);
#else
	dev_warn(dev, "=> buf_dma(0x%px) buf(0x%px) size(0x%x)\n",
				(void *)pcieg->buf_dma, pcieg->buf, BUF_SIZE);
#endif
#endif
	}

	/* Wait for device link up */
	ret = quasar_pcieg_wait_link_up(pcieg);
	if (ret) {
		dev_warn(dev, "PCIe link up failed, probably no device\n");
		return ret;
	}
	msleep(2000);   /* wait for host's configuring device */

#if DEBUG_PCIE
	printk("BAR0/1 PCIe addr=0x%08x_%08x\n",
		readl(pcieg->base_ctrl + 0x14), readl(pcieg->base_ctrl + 0x10));
	printk("BAR2/3 PCIe addr=0x%08x_%08x\n",
		readl(pcieg->base_ctrl + 0x1c), readl(pcieg->base_ctrl + 0x18));
	printk("BAR4/5 PCIe addr=0x%08x_%08x\n",
		readl(pcieg->base_ctrl + 0x24), readl(pcieg->base_ctrl + 0x20));
#endif

	/* Backup EP resource */
	quasar_pcieg_backup_ep_resources(pcieg);

	msleep(10000);   /* wait for host */

	/* thread for testing */
	pcieg->thread_task = NULL;
	if (pcieg->thread_task == NULL) {
		pcieg->thread_task = 
			kthread_create(quasar_pcieg_thread,
				pcieg, "pcieg-process");
		if (IS_ERR(pcieg->thread_task)) {
			printk("PCIe device thread failed!\n");
			ret = PTR_ERR(pcieg->thread_task);
		}
	}
	wake_up_process(pcieg->thread_task);

#if DEBUG_PCIE
	dev_warn(dev, "quasar_pcie_probe() exits %d\n", ret);
#endif
	return ret;
}

static int quasar_pcieg_remove(struct platform_device *pdev)
{
	struct quasar_pcieg *pcieg = platform_get_drvdata(pdev);

	iounmap(pcieg->hrtcnt1);
	return 0;
}

static const struct of_device_id qbit_quasar_id_table[] = {
#if defined(Q6300)
	{ .compatible = "qbit,quasar-pcie" },
#elif defined(BOARD_TYPE_EP4)
	{ .compatible = "qbit,quasar-pcie1" },
#elif defined(BOARD_TYPE_EP3L)
	{ .compatible = "qbit,quasar-pcie2" },
#endif
	{}
};
MODULE_DEVICE_TABLE(of, qbit_quasar_id_table);

static struct platform_driver quasar_pcieg_driver = {
	.probe    = quasar_pcieg_probe,
	.remove   = quasar_pcieg_remove,
	.driver = {
		.name = "quasar-pcie",
		.of_match_table = of_match_ptr(qbit_quasar_id_table),	
	},
};

static int __init quasar_pcieg_init(void)
{
	int ret = 0;

#if DEBUG_PCIE
	printk("quasar_pcieg_init()\n");
#endif
    ret = platform_driver_register(&quasar_pcieg_driver);
    return ret;
}
module_init(quasar_pcieg_init);

static void __exit quasar_pcieg_exit(void)
{
#if DEBUG_PCIE
	printk("quasar_pcieg_exit\n");
#endif
	platform_driver_unregister(&quasar_pcieg_driver);
	return ;
}
module_exit(quasar_pcieg_exit);

MODULE_DESCRIPTION("Quasar PCIe gadget driver");
MODULE_LICENSE("Dual BSD/GPL");
