/*
This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this file,
You can obtain one at http://mozilla.org/MPL/2.0/.

Copyright (c) 2008-2014, Marvell International Ltd.

Alternatively, this software may be distributed under the terms of the GNU
General Public License Version 2, and any use shall comply with the terms and
conditions of the GPL.  A copy of the GPL is available at
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html

THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
ARE EXPRESSLY DISCLAIMED.  The GPL license provides additional details about
this warranty disclaimer.
*/
/**
 *
 * \file CortexR4.c
 *
 * \brief This is the CPU device driver implementation for the Marvell 
 * CortexR4 CPU which is based on an ARM Cortex core.
 * Thus, the operations and information presented here are culled from a
 * combination of the ARM Architecture Reference Manual (for general ARM
 * architecture related information) and the CortexR4 datasheet.
 */

#include <stdint.h>
#include <stdbool.h>

#include "cpu_api.h"
#include "CortexR4.h"

#include "bootCode.h"


#if 0
void cpu_disable_dcache( void )
{
	__DisableDCache();
}
#endif

void cpu_enable_dcache( void )
{
	__EnableDCache();
}

void cpu_disable_icache( void )
{
	__DisableICache();
}

void cpu_enable_icache( void )
{
	__EnableICache();
}

// Initializes the cpu; currently the only operation required is to start the
// cycle counter
void cpu_init( void )
{
    asm volatile
    (
        "mrc p15,0,r0,c9,c14,0;"
        "mov r0, $1;"
        "mcr p15,0,r0,c9,c14,0;"
        "mrc p15, 0, r0, c9, c12, 0;" // read performance monitor control register into R0 
		"orr r0, r0, #1;"             // set the enable-bit for the performance counters and cycle counter
        "mcr p15, 0, r0, c9, c12, 0;" // write the perfomance monitor control register back
        "mov r0, #0x80000000;"        // load the bit-mask into r0
        "mcr p15, 0, r0, c9, c12, 1;" // write the bit-mask to the enable set register, enable cycle counter bit
        :::"r0"                       // r0 is clobbered
    );
}

// MPU defines
#define MPUSIZE_4GB                 0x1F
#define MPUSIZE_2GB                 0x1E
#define MPUSIZE_1GB                 0x1D
#define MPUSIZE_512MB               0x1C
#define MPUSIZE_256MB               0x1B
#define MPUSIZE_128MB               0x1A
#define MPUSIZE_64MB                0x19
#define MPUSIZE_32MB                0x18
#define MPUSIZE_16MB                0x17
#define MPUSIZE_8MB                 0x16
#define MPUSIZE_4MB                 0x15
#define MPUSIZE_2MB                 0x14
#define MPUSIZE_1MB                 0x13
#define MPUSIZE_512KB               0x12
#define MPUSIZE_256KB               0x11
#define MPUSIZE_128KB               0x10
#define MPUSIZE_64KB                0x0F
#define MPUSIZE_32KB                0x0E
#define MPUSIZE_16KB                0x0D
#define MPUSIZE_8KB                 0x0C
#define MPUSIZE_4KB                 0x0B
#define MPUSIZE_2KB                 0x0A
#define MPUSIZE_1KB                 0x09
#define MPUSIZE_512B                0x08
#define MPUSIZE_256B                0x07
#define MPUSIZE_128B                0x06
#define MPUSIZE_64B                 0x05
#define MPUSIZE_32B                 0x04
#define MPUTYPE_NOCACHE             0x308
#define MPUTYPE_WBnWACACHE          0x33B 
#define MPUTYPE_WBWACACHE           0x329 
#define MPUTYPE_WTCACHE             0x322 
#define MPUTYPE_NSDEVICE            0x310
#define MPUTYPE_DEVICE             0x301

void setMpuRegion (uint32_t region, uint32_t address, uint32_t size, uint32_t subregionMask, uint32_t type)
{
    uint32_t  tmp_region = region;
    uint32_t tmp_address = address;
    uint32_t  tmp_size = (((size<<1) | subregionMask) | 0x1);
    uint32_t tmp_access = type;

    asm volatile
    (
    "mcr   p15, 0, %0, c6, c2, 0;"
    "mcr   p15, 0, %1, c6, c1, 0;"
    "mcr   p15, 0, %2, c6, c1, 4;"
    "mcr   p15, 0, %3, c6, c1, 2;"
    :
    : "r" (tmp_region), "r" (tmp_address), "r" (tmp_access), "r" (tmp_size)
    : "cc", "r0", "r1", "r2", "r3"
    );
}

void disableMPU(void)
{
    // at bootup, MPU & caches are already disabled
    // but possible a new cfg is being loaded, so do it right
    cpu_disable_dcache();            // could argue to combine the disable+clean into one()
    cpu_dcache_writeback_all();     // at power on, we did invalidate
//    cpu_dcache_invalidate_all();    // adds no value, enable invalidates anyway.
    cpu_disable_icache();
    cpu_icache_invalidate_all();
    asm volatile
    (
        "mov	r0, #0;"
        "mrc   p15, 0, r0, c1, c0, 0;"
        "bic  r0, r0, #0x1;"
        "DSB ;"
        "mcr   p15, 0, r0, c1, c0, 0;"
        "ISB ;"
        :
        :
        : "r0"
    );
}
// This procedure enables the MPU.
void enableMPU(void)
{
#if 1 //def BUILDLCM
    return;
#endif
    // prep to disable MPU - 
    disableMPU();

    // set up MPU regions
    setMpuRegion(0,0xF6000000,MPUSIZE_16MB, 0,MPUTYPE_WBWACACHE);            // BSPI
    setMpuRegion(1,0xF7000000,MPUSIZE_4MB, 0xC000,MPUTYPE_WBWACACHE);  // ROM, M3 TCM, LCM (top two regions disabled)
    setMpuRegion(2,0xF8000000,MPUSIZE_64MB, 0,MPUTYPE_DEVICE);             // PI APB, Func Blocks, PI Bus Blocks, FPGA
    setMpuRegion(3,0xFC000000,MPUSIZE_32MB, 0,MPUTYPE_DEVICE);             // AO Func Blocks, AO APB Peripherals
    setMpuRegion(4,0xFE000000,MPUSIZE_16MB, 0,MPUTYPE_NOCACHE);           // BSPI
    setMpuRegion(5,0xFF000000,MPUSIZE_2MB, 0,MPUTYPE_NOCACHE);            // ROM, M3 TCM
    setMpuRegion(6,0xFF200000,MPUSIZE_1MB, 0,MPUTYPE_NOCACHE);            // LCM
    setMpuRegion(7,0xFFFF0000,MPUSIZE_64KB, 0,MPUTYPE_NOCACHE);           // boot alias
    setMpuRegion(8,0xFFFFF000,MPUSIZE_4KB, 0,MPUTYPE_NSDEVICE);            // VIC
    setMpuRegion(9,0x00000000,MPUSIZE_4GB, 0x8000,MPUTYPE_WBWACACHE);        // DDR and NAND (top region disabled)

#ifdef BUILDRAM
    // enable mpu
    asm volatile
    (
        "mrc   p15, 0, r0, c1, c0, 0 ;"
        "orr  r0, r0, #0x1 ;"
        "DSB ;"
        "mcr   p15, 0, r0, c1, c0, 0 ;"
        "ISB ;"
        :
        :
        : "r0"
    );
#else
    startMPU();  // cpu/src/mpu.s.  Need to enable MPU while EXE from section that is not changing, so we need to do a trick...
#endif // BUILDDRAM
}

// Returns the lower 32 bits of the cycle counter
uint32_t cpu_get_ccount( void )
{
    uint32_t count;

    asm volatile
    (
        "mrc p15, 0, %0, c9, c13, 0;" // Load cycle counter into "count"
        :"=r" (count)                 // output 
    );

    return count;
}

void cpu_spin_delay(uint32_t delayInUs)
{
    uint32_t startClockCount;
    uint32_t endClockCount;
    uint32_t clockSpeed = hwGetProcSpeed();
    startClockCount = cpu_get_ccount();

    // to delay n microseconds, we just need to wait n*clockspeed cycles since
    // the processor is clocked in Mhz.
    endClockCount = startClockCount + (delayInUs * clockSpeed);

    // deal w/ rollover
    if(endClockCount < startClockCount)
    {
        while(cpu_get_ccount() < endClockCount ||
              cpu_get_ccount() > startClockCount);    // spin for delay
    }
    else
    {
        while(cpu_get_ccount() < endClockCount &&
              cpu_get_ccount() > startClockCount);      // spin for delay
    }
}

// Returns the size of a data cache line in bytes
uint32_t cpu_get_dcache_line_size( void )
{
    // documentation says data cache line size is 32 bytes
    return 32;
}

// This procedure cleans the R4 data cache.
// Writes all dirty data cache lines out to memory and then drains the contents
// of the write buffer
#if 0 // risk of clobbering locals when doing clean!!  do in asm.
void cpu_dcache_writeback_all( void )
{
int tmp;
int way,set;
  for (way=0;way<4;way++)         // 4-way
    for (set=0;set<64;set++) {   // 8KB cache
      tmp = way<<30 | set<<5;
        asm volatile
        (
           " mcr    p15,0,%0,c7,c10,2 ;" // clean data cache set/way
           " mov    %0, #0 ;"
           " mcr    p15,0,%0,c7,c10,4 ;" // DSB - drain write buffer (instruction wait) 
           " mcr    p15,0,%0,c7,c10,5 ;" // DMB - drain write buffer (data wait)        
           :: "r" (tmp)
        );
    } 
}
#endif
// Force a flush of the cache area at startAddr; in order to completely flush
// across line boundaries, the size and start of the memory region are adjusted
// as necessary.  This is considered a safe operation because writing back
// someone else's dirty data does not cause any data corruption.
void cpu_dcache_writeback_region( void *startAddr, uint32_t sizeInBytes )
{
    uint32_t byteOffset;
    uint32_t tempAddr;
    uint32_t tempSize;
    uint32_t cache_line_size;

    cache_line_size = cpu_get_dcache_line_size();

    // Determine adjustments to the region in order to flush all appropriate
    // cache lines
    byteOffset = (uint32_t)startAddr % cache_line_size;
    tempAddr = (uint32_t)startAddr - byteOffset;
    tempSize = sizeInBytes + byteOffset;

    // while we still have bytes within the address range to flush
    while ( (long)tempSize > 0 ) // signed comparison
    {
        asm volatile 
        (
            "mcr p15, 0, %0, c7, c10, 1;" // Clean line at address %0
            ::"r" (tempAddr)              // input
        );

        tempSize -= cache_line_size;
        tempAddr += cache_line_size;
    }

    // Drain the write buffer; r0 is 'don't care' -- Should be zero
    asm volatile
    (
        "mov %0, #0 ;"
        "mcr p15, 0, %0, c7, c10, 4;"  // DSB - Drain write buffer operation
        ::"r" (tempAddr)             // input
    );
}

// Invalidate the entire data cache
void cpu_dcache_invalidate_all(void)
{
int tmp;
tmp = 0;
  asm volatile
  (
     " DSB ;"
     " mcr    p15,0,%0,c15,c5,0 ;"
     :: "r" (tmp)
  );
}

// Invalidate a region of the data cache
void cpu_dcache_invalidate_region(void *startAddr, uint32_t sizeInBytes)
{
    uint32_t tempAddr;
    uint32_t tempSize;
    uint32_t cache_line_size;

    cache_line_size = cpu_get_dcache_line_size();

    // Make sure our start address and size are aligned to cache line boundaries
    // (we don't want to invalidate data we don't own)
    ASSERT(0 == (((uint32_t) startAddr) % cache_line_size));
    ASSERT(0 == (sizeInBytes % cache_line_size));

    tempAddr = (uint32_t)startAddr;
    tempSize = sizeInBytes;

    // while we still have bytes within the address range to invalidate
    while ( (long)tempSize > 0 ) // signed comparison
    {
        asm volatile 
        (
            "mcr p15, 0, %0, c7, c6, 1;" // Invalidate line at address %0
            ::"r" (tempAddr)             // input
        );

        tempSize -= cache_line_size;
        tempAddr += cache_line_size;
    }

    // Drain the write buffer; r0 is 'don't care'
    // This is required during an invalidate operation because there could
    // have been buffered writes already in progress for temporary cache data
    // that was just invalidated
    asm volatile
    (
        "mov %0, #0 ;"
        "mcr p15, 0, %0, c7, c10, 4;"  // Drain write buffer operation
        ::"r" (tempAddr)             // input
    );
}

// Returns the size of an icache line in bytes
uint32_t cpu_get_icache_line_size( void )
{
    // documention says instruction cache line size is 32 bytes 
    return 32;
}

// Invalidates the entire instruction cache
void cpu_icache_invalidate_all( void )
{
int tmp;
tmp = 0;
  asm volatile
  (
     " mov  %0, #0 ;"
     " mcr  p15,0,%0,c7,c5,0 ;"
/*     " mcr  p15,0,%0,c7,c5,4 ;"*/
     "ISB ;"
     :: "r" (tmp)
  );
}

// Invalidate a particular region of the instruction cache
void cpu_icache_invalidate_region( void *startAddr, uint32_t sizeInBytes )
{
    cpu_icache_invalidate_all();
}

CPU_MODE cpu_get_mode( void )
{
    uint32_t cpsr_mode;
    CPU_MODE current_mode;

    asm volatile
    (
        "mrs %0, cpsr;"       // Read the CPSR into %0
        :"=r" (cpsr_mode)     // output
    );

    cpsr_mode &= CPSR_MODE_MASK;

    // now cpsr_mode contains the 5 bit mode value
    switch ( cpsr_mode )
    {
        case USR_MODE:
        {
            current_mode = CPU_MODE_USER;
            break;
        }
        case FIQ_MODE:
        {
            current_mode = CPU_MODE_INTERRUPT;
            break;
        }
        case IRQ_MODE:
        {
            current_mode = CPU_MODE_INTERRUPT;
            break;
        }
        case SVC_MODE:
        {
            current_mode = CPU_MODE_SUPERVISOR;
            break;
        }
        default:
        {
            current_mode = CPU_MODE_OTHER;
            break;
        }
    }

    return current_mode;
}

uint32_t cpu_disable_interrupts( void )
{
    uint32_t cpsr;

    // Get the current state of interrupts
    asm volatile
    (
        "mrs %0, cpsr;" // Read the CPSR into %0
        :"=r" (cpsr)    // output
    );

    if ( (cpsr & IRQ_DISABLE_MASK) != 0 )
    {
        // I bit is set, which means interrupts are currently disabled, so 
        // nothing to do except indicate such.
        return IRQ_DISABLED;
    }
    else
    {
        // I bit is clear, which means interrupts are currently enabled
        // so disable them
        cpsr |= IRQ_DISABLE_MASK;
        asm volatile
        (
            "msr cpsr_c, %0;" // write out the cpsr
            ::"r" (cpsr)      // input
        );

        // Indicate interrupts were enabled
        return IRQ_ENABLED;
    }
}

void cpu_restore_interrupts( uint32_t previous_interrupt_state )
{
    // Only enable interrupts if they were previously enabled
    if ( IRQ_ENABLED == previous_interrupt_state )
    {
        asm volatile
        (
            "mrs r0, cpsr;"      // read the CPSR into r0
            "bic r0, r0, #0x80;" // clear the IRQ disable bit
            "msr cpsr_c, r0;"    // write out the CPSR
            :::"r0"              // r0 is clobbered
        );
    }
}

void cpu_disable_dcache(void)
{
    asm volatile
    (
// *
// * MUST BE IDENTICAL from cpu_dcache_writeback_all()
// *
"      mov     r1, #0               ;"   // set counter
"2000:                              ;"   //
"      mov     r0, r1, LSL #5       ;"   // way[31:30] set[s=4:5] reserved[4:0]
"      mcr     p15,0,r0,c7,c10,2    ;"   // clean set r1 /way 00
"      orr     r0, r0, #0x40000000  ;"   //
"      mcr     p15,0,r0,c7,c10,2    ;"   // clean set r1 /way 01
"      orr     r0, r0, #0x80000000  ;"   //
"      mcr     p15,0,r0,c7,c10,2    ;"   // clean set r1 /way 11
"      bic     r0, r0, #0x40000000  ;"   //
"      mcr     p15,0,r0,c7,c10,2    ;"   // clean set r1 /way 10
"      DSB                          ;"   //
"      DMB                          ;"   //
"      add     r1, #1               ;"   // increment set
"      cmp     r1, #64              ;"   // have all sets have been cleaned?
"      bne     2000b                ;"   //
"      mov     r0, #0               ;"   // another DSB DMB
"      MCR p15, 0, r0, c7, c10, 4   ;"   // Data Synchronization Barrier operation
"      MCR p15, 0, r0, c7, c10, 5   ;"   // Data Memory Barrier Operation.         
// *
// * End of cpu_dcache_writeback_all()
// *

// *
// * Now disable dcache
// *
       "MRC p15, 0, R0, c1, c0, 0;"// ;# Read System Control Register configuration data
       "BIC R0, R0, #0x1 <<2     ;"//
       "DSB                      ;"//
       "MCR p15, 0, R0, c1, c0, 0;"// ;# disabled data cache
     );
}


