/*
 ***************************************************************************************
 * (c) Copyright 2015 Marvell International Ltd.
 **************************************************************************************
 *
 * Marvell Commercial License Option
 *
 * If you received this File from Marvell as part of a proprietary software release,
 * the File is considered Marvell Proprietary and Confidential Information, and is
 * licensed to you under the terms of the applicable Commercial License.
 *
 **************************************************************************************
 *
 * Marvell GPL License Option
 *
 * If you received this File from Marvell as part of a Linux distribution, this File
 * is licensed to you in accordance with the terms and conditions of the General Public
 * License Version 2, June 1991 (the "GPL License").  You can redistribute it and/or
 * modify it under the terms of the GPL License; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 * PARTICULAR PURPOSE.  See the GPL License for more details.
 *
 * You should have received a copy of the GNU General Public License along with this
 * program.  If not, see http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
 *
 **************************************************************************************
 */

#include <linux/io.h>        // for ioread/write32 (pie Read/Write macros)
#include <linux/export.h>    // for EXPORT_SYMBOL
#include <linux/spinlock.h>  // for spinlock_t

#include "PIE_regheaders.h"
#include "pie_full_subblock_list.h" // needed for detailed pie_handle
#include "pie_handle.h"
#include "pie_data.h"
#include "pie_if.h"
#include "pie_driver.h"
#include "pie_sccsc.h"
#include "pie_sccsc_if.h"

#include "pie_driverlib_if.h"  // for print macros


// SC CSC driver is currently expecting 17^3 32 bit iLut entries.
// Each 32 bit LUT entry should be packed as:
//   red[29:20]  green[19:10]  blue[9:0]
// Note that each color entry is only the most significant 10 bits.
#define SUPPORTED_SCCSC_ILUT_ENTRIES 4913

// iLUT RAM size
#define SCCSC_RAM_NUM_BANKS     8
#define SCCSC_RAM_BANK_SIZE     655
#define SCCSC_RAM_ENTRIES       (SCCSC_RAM_NUM_BANKS * SCCSC_RAM_BANK_SIZE)

#define SCCSC_ILUT_FILL         0x3A

// Arrays for each iLUT RAM. Keep a write and read copy to help with
// debugging.
uint32_t sccsc_lut_ram_wr[SCCSC_RAM_NUM_BANKS][SCCSC_RAM_BANK_SIZE];
uint32_t sccsc_lut_ram_rd[SCCSC_RAM_NUM_BANKS][SCCSC_RAM_BANK_SIZE];


void dump_sccsc_regs(sccscData *device_data)
{
    print("PIE Subcube CSC Registers\n");
    // STATUS not used in pie sccsc instance
    print("CNTRL  =0x%X\n",  sccscRead(CNTRL));
    // MCClr, LCClr, CACHE_INVLD, CACHE_CNTRL, IDX_MISS_COUNTER, PIXEL_MISS_COUNTER,
    // LATENCY_COUNTER, BASE_ADDR not used in pie sccsc instance
    print("WT     =0x%X\n", sccscRead(WT));
    print("WR_REG0=0x%X\n", sccscRead(WR_REG0));
    print("WR_REG1=0x%X\n", sccscRead(WR_REG1));
    print("WR_REG2=0x%X\n", sccscRead(WR_REG2));
    print("KR_REG0=0x%X\n", sccscRead(KR_REG0));
    print("KR_REG1=0x%X\n", sccscRead(KR_REG1));
    print("KR_REG2=0x%X\n", sccscRead(KR_REG2));
    // RAR is write only
    // Note: not dumping RDW* or RDR* because accessing these registers may have
    //       unintended consequences (for example, if autoinc is enabled or the 
    //       address register is wonky).
    //print("RDW0   =0x%X\n", sccscRead(RDW0));
    //print("RDW1   =0x%X\n", sccscRead(RDW1));
    //print("RDW2   =0x%X\n", sccscRead(RDW2));
    //print("RDR0   =0x%X\n", sccscRead(RDR0));
    //print("RDR1   =0x%X\n", sccscRead(RDR1));
    //print("RDR2   =0x%X\n", sccscRead(RDR2));
    print("REV    =0x%X\n", sccscRead(REV));
    print("PARAMS =0x%X\n", sccscRead(PARAMS));
}

static void sccsc_wr_ram(sccscData *device_data, uint32_t addr, uint32_t count, uint32_t* data)
{
    uint32_t rar_reg;
    uint32_t i;

    // Helper function to write a bank of iLUT RAM.  Complain if the count
    // will not fit into a single bank.
    BUG_ON(count > SCCSC_RAM_BANK_SIZE);

    // Setup the RAM address
    // WARNING: the caller must enable CPU access to the LUT RAM
    rar_reg = 0;
    rar_reg = SCCSC_RAR_ADDR_REPLACE_VAL(rar_reg, addr);  // Address
    rar_reg = SCCSC_RAR_RNW_REPLACE_VAL(rar_reg, 0);      // Write Mode
    rar_reg = SCCSC_RAR_AUTOINC_REPLACE_VAL(rar_reg, 2);  // Auto increment
    sccscWrite(RAR, rar_reg);

    // Write the requested data into the RAMs
    for (i=0; i<count; i++)
    {
        // NOTE: Ignoring writing to RDW1 and RDW2 because PIEs SC CSC doesn't
        //       take values larger than 32-bits
        sccscWrite(RDW0, data[i]);
    }
}

static void load_sccsc_ilut(sccscData *device_data, 
                            const uint32_t lut_data[], uint32_t lut_entries)
{
    uint32_t i, ch0, ch1, ch2, ramaddr, cnt;
    uint32_t ramptr = 0;
    uint32_t cntrl_reg, saved_cpu_access;
    uint32_t val;

    // Loads the specified number of 32 bit lut entries from the provided memory
    // buffer to the SC CSC.  Loading starts at iLUT address 0.

    // For now we expect to get the entire table, complain if we don't
    BUG_ON(lut_entries != SUPPORTED_SCCSC_ILUT_ENTRIES);
    BUG_ON(lut_data == NULL);

    memset(sccsc_lut_ram_wr, SCCSC_ILUT_FILL, sizeof(sccsc_lut_ram_wr));

    // Break given LUT data into pieces specific for each RAM.  The input
    // table has contiguous entries, the actual HW RAM has holes in the address
    // space.  The following magic is straight from the programmers guide ...
    for(ch0=0; ch0 < 17; ch0++, ramptr+=2)
    {
        for(ch1=0; ch1 < 17; ch1++, ramptr++)
        {
            for(ch2=0; ch2 < 17; ch2++, ramptr++)
            {
                cnt = ramptr / SCCSC_RAM_NUM_BANKS;
                val = lut_data[(289*ch0)+(17*ch1)+ch2];
                sccsc_lut_ram_wr[ramptr%SCCSC_RAM_NUM_BANKS][cnt] = val;
            }
        }
    }

    // Start at RAM address 0
    // Note: Address bit 13 = 0 forces LUT RAM access, then bit 12:0 select
    // which LUT RAM
    ramaddr = 0;

    PROTECT_REG_ACCESS;

    // Setup CPU Access for writing iLUT RAM
    cntrl_reg = sccscRead(CNTRL);
    saved_cpu_access = SCCSC_CNTRL_CPUACCESS_MASK_SHIFT(cntrl_reg);
    cntrl_reg = SCCSC_CNTRL_CPUACCESS_REPLACE_VAL(cntrl_reg,1);
    sccscWrite(CNTRL, cntrl_reg);

    // Write data arrays into each RAM. More addressing magic straight from the
    // programmers guide ... 
    for(i=0; i<SCCSC_RAM_NUM_BANKS; i++)
    {
        // Select RAM
        ramaddr = ramaddr & 0xFFFFE3FF;  // Clear bits 12:10
        ramaddr = ramaddr | (i << 10);   // Set bits 12:10 to loop value of i
        sccsc_wr_ram(device_data, ramaddr, SCCSC_RAM_BANK_SIZE, sccsc_lut_ram_wr[i]);
    }

    // Revert CNTRL register CPU access to original setting
    cntrl_reg = sccscRead(CNTRL);
    cntrl_reg = SCCSC_CNTRL_CPUACCESS_REPLACE_VAL(cntrl_reg, saved_cpu_access);
    sccscWrite(CNTRL, cntrl_reg);

    UNPROTECT_REG_ACCESS;
}

static void sccsc_rd_ram(sccscData *device_data, uint32_t addr, uint32_t count, uint32_t* data)
{
    uint32_t rar_reg;
    uint32_t i;

    // Helper function to read a bank of iLUT RAM.  Complain if the count
    // will not fit into a single bank.
    BUG_ON(count > SCCSC_RAM_BANK_SIZE);

    // Setup the RAM address
    // WARNING: the caller must enable CPU access to the LUT RAM
    rar_reg = 0;
    rar_reg = SCCSC_RAR_ADDR_REPLACE_VAL(rar_reg, addr);  // Address
    rar_reg = SCCSC_RAR_RNW_REPLACE_VAL(rar_reg, 1);      // Read Mode
    rar_reg = SCCSC_RAR_AUTOINC_REPLACE_VAL(rar_reg, 2);  // Auto increment
    sccscWrite(RAR, rar_reg);

    // Read the requested data from the RAMs
    for (i=0; i<count; i++)
    {
        // NOTE: Ignoring reading from RDR1 and RDR2 because PIEs SC CSC doesn't
        //       use values larger than 32-bits
        data[i] = sccscRead(RDR0);
    }
}

static void read_sccsc_ilut(sccscData *device_data)
{
    uint32_t i, ramaddr;
    uint32_t cntrl_reg, saved_cpu_access;

    // Helper function to read the entire contents of the iLUT RAM.  Does not alter
    // the internal format of the table data (so it may be difficult to compare
    // to a table that was loaded into load_sccsc_ilut).

    // Start at RAM address 0
    // Note: Address bit 13 = 0 forces LUT RAM access, then bit 12:0 select
    // which LUT RAM
    ramaddr = 0;

    PROTECT_REG_ACCESS;

    // Setup CPU Access for reading iLUT RAM
    cntrl_reg = sccscRead(CNTRL);
    saved_cpu_access = SCCSC_CNTRL_CPUACCESS_MASK_SHIFT(cntrl_reg);
    cntrl_reg = SCCSC_CNTRL_CPUACCESS_REPLACE_VAL(cntrl_reg,1);
    sccscWrite(CNTRL, cntrl_reg);

    // Read data arrays from each RAM. More addressing magic straight from the
    // programmers guide ... 
    for(i=0; i<SCCSC_RAM_NUM_BANKS; i++)
    {
        // Select RAM
        ramaddr = ramaddr & 0xFFFFE3FF;  // Clear bits 12:10
        ramaddr = ramaddr | (i << 10);   // Set bits 12:10 to loop value of i
        sccsc_rd_ram(device_data, ramaddr, SCCSC_RAM_BANK_SIZE, sccsc_lut_ram_rd[i]);
    }

    // Revert CNTRL register CPU access to original setting
    cntrl_reg = sccscRead(CNTRL);
    cntrl_reg = SCCSC_CNTRL_CPUACCESS_REPLACE_VAL(cntrl_reg, saved_cpu_access);
    sccscWrite(CNTRL, cntrl_reg);

    UNPROTECT_REG_ACCESS;
}

static void dump_sccsc_ilut(sccscData *device_data)
{
    int b, i;

    // Read the entire iLUT contents into our sccsc_lut_ram_rd arrays
    memset(sccsc_lut_ram_rd, SCCSC_ILUT_FILL, sizeof(sccsc_lut_ram_rd));
    read_sccsc_ilut(device_data);

    for (b=0; b < SCCSC_RAM_NUM_BANKS; b++)
    {
        for (i=0; i < SCCSC_RAM_BANK_SIZE; i++)
        {
            if (i % 8 == 0)
            {
                print("\n[%d:0x%04x] ", b, i);
            }
            print("0x%08x ", sccsc_lut_ram_rd[b][i]);
        }
        print("\n");
    }
}

static int dbg_verify_sccsc_ilut(sccscData *device_data)
{
    int b, i;
    int errors = 0;

    // Read the entire iLUT contents into our sccsc_lut_ram_rd arrays
    memset(sccsc_lut_ram_rd, SCCSC_ILUT_FILL, sizeof(sccsc_lut_ram_rd));
    read_sccsc_ilut(device_data);

    // Compare the data read with the last data we loaded
    for (b=0; b < SCCSC_RAM_NUM_BANKS; b++)
    {
        for (i=0; i < SCCSC_RAM_BANK_SIZE; i++)
        {
            if (sccsc_lut_ram_rd[b][i] != sccsc_lut_ram_wr[b][i])
            {
                print("[%d:0x%04x] rd=0x%08x wr=0x%08x\n", b, i,
                        sccsc_lut_ram_rd[b][i], sccsc_lut_ram_wr[b][i]);

                errors++;
            }
        }
    }

    return errors;
}


static int pie_revcheck(sccscData *device_data,
                        struct pie_handle_t *pie_handle)
{
    uint32_t rev0;
    uint32_t pieh_rev;

    rev0 = SCCSC_REV_MAJ_MASK_SHIFT(sccscRead(REV));
    pieh_rev = SCCSC_REV_MAJ_MASK_SHIFT(pie_handle->pie_sccsc->REV);
    
    if (rev0 != pieh_rev)
    {
        error_print("%s: %s failed, rev0=%d, handle rev=%d\n",
                    __FILE__, __func__, rev0, pieh_rev);
        return -1;
    }
    else
        return 0;
}

static void pie_reset(sccscData *device_data)
{
    uint32_t reg;

    PROTECT_REG_ACCESS;
    reg = sccscRead(CNTRL);
    reg = SCCSC_CNTRL_BYPASSALL_REPLACE_VAL(reg, 1);  // bypass to do a reset
    sccscWrite(CNTRL, reg);
    // leaving in bypass - if someone wants this subblock, they need to
    // enable it (take out of bypass)
    UNPROTECT_REG_ACCESS;
}
 
static void pie_configure(sccscData *device_data,
                          struct pie_handle_t *pie_handle)
{
    // There is no callback to configure for sccsc

    PROTECT_REG_ACCESS;

    // STATUS is read only or not used

    sccscWrite(CNTRL, pie_handle->pie_sccsc->CNTRL);

    // MCClr, LCClr, CACHE_INVLD, CACHE_CNTRL, IDX_MISS_COUNTER, PIXEL_MISS_COUNTER,
    // LATENCY_COUNTER, BASE_ADDR are read only or not used

    sccscWrite(WT,      pie_handle->pie_sccsc->WT);
    sccscWrite(WR_REG0, pie_handle->pie_sccsc->WR_REG0);
    sccscWrite(WR_REG1, pie_handle->pie_sccsc->WR_REG1);
    sccscWrite(WR_REG2, pie_handle->pie_sccsc->WR_REG2);
    sccscWrite(KR_REG0, pie_handle->pie_sccsc->KR_REG0);
    sccscWrite(KR_REG1, pie_handle->pie_sccsc->KR_REG1);
    sccscWrite(KR_REG2, pie_handle->pie_sccsc->KR_REG2);

    // RAR, RDW0, RDW1, RDW2 are used in the SRAM LUT loading process

    // RDR0, RDR1, RDR2, REV, PARAMS are read only

    UNPROTECT_REG_ACCESS;
}

static void pie_get_current(sccscData *device_data,
                            struct pie_handle_t *pie_handle)
{
    // STATUS not used in pie sccsc instance

    pie_handle->pie_sccsc->CNTRL    = sccscRead(CNTRL);

    // MCClr, LCClr, CACHE_INVLD, CACHE_CNTRL, IDX_MISS_COUNTER, PIXEL_MISS_COUNTER,
    // LATENCY_COUNTER, BASE_ADDR not used in pie sccsc instance

    pie_handle->pie_sccsc->WT       = sccscRead(WT);
    pie_handle->pie_sccsc->WR_REG0  = sccscRead(WR_REG0);
    pie_handle->pie_sccsc->WR_REG1  = sccscRead(WR_REG1);
    pie_handle->pie_sccsc->WR_REG2  = sccscRead(WR_REG2);
    pie_handle->pie_sccsc->KR_REG0  = sccscRead(KR_REG0);
    pie_handle->pie_sccsc->KR_REG1  = sccscRead(KR_REG1);
    pie_handle->pie_sccsc->KR_REG2  = sccscRead(KR_REG2);

    // RAR is write only

    // Note: not reading RDW* or RDR* because accessing these registers may have
    //       unintended consequences (for example, if autoinc is enabled or the 
    //       address register is wonky). Only used in LUT specific operations ...
    //pie_handle->pie_sccsc->RDW0     = sccscRead(RDW0);
    //pie_handle->pie_sccsc->RDW1     = sccscRead(RDW1);
    //pie_handle->pie_sccsc->RDW2     = sccscRead(RDW2);
    //pie_handle->pie_sccsc->RDR0     = sccscRead(RDR0);
    //pie_handle->pie_sccsc->RDR1     = sccscRead(RDR1);
    //pie_handle->pie_sccsc->RDR2     = sccscRead(RDR2);

    pie_handle->pie_sccsc->REV      = sccscRead(REV);
    pie_handle->pie_sccsc->PARAMS   = sccscRead(PARAMS);
}

struct pie_sccsc_function_struct pie_sccsc_functions =
{
    .pie_reset        = pie_reset,
    .pie_configure    = pie_configure,
    .pie_get_current  = pie_get_current,
    .pie_revcheck     = pie_revcheck,
    
    .dump_sccsc_regs       = dump_sccsc_regs,
    .dump_sccsc_ilut       = dump_sccsc_ilut,
    .dbg_verify_sccsc_ilut = dbg_verify_sccsc_ilut,

    .load_sccsc_ilut  = load_sccsc_ilut,
};

void pie_sccsc_init(sccscData *device_data)
{
    sccscDeviceHandle *pie_device_handle;

    pie_device_handle = allocate_memory(sizeof(sccscDeviceHandle), GFP_KERNEL);
    
    // register with the parent
    pie_device_handle->fcn_tbl = &pie_sccsc_functions;
    pie_device_handle->device_data = device_data;
    register_pie_subblock(sccsc, pie_device_handle);
    // NOTE that macro PROTECT_REG_ACCESS uses reg_spinlock
    spin_lock_init(&(pie_device_handle->device_data->reg_spinlock));

    // sccsc has no interrupts associated with it, so no callback
    pie_device_handle->device_data->interrupt_callback = NULL;
    device_data->interrupt_callback_data = NULL;    
    print("PIE SCCSC LOADED .....\n");
}
EXPORT_SYMBOL(pie_sccsc_init);


void pie_sccsc_exit(sccscData *device_data)
{
    sccscDeviceHandle *pie_device_handle;

    // unregister with the parent
    pie_device_handle = unregister_pie_subblock(sccsc);
    free_memory(pie_device_handle);
}
EXPORT_SYMBOL(pie_sccsc_exit);

