/*
 ***************************************************************************************
 * (c) Copyright 2013-2015 Marvell International Ltd.
 **************************************************************************************
 *
 * Marvell Commercial License Option
 *
 * If you received this File from Marvell as part of a proprietary software release,
 * the File is considered Marvell Proprietary and Confidential Information, and is
 * licensed to you under the terms of the applicable Commercial License.
 *
 **************************************************************************************
 *
 * Marvell GPL License Option
 *
 * If you received this File from Marvell as part of a Linux distribution, this File
 * is licensed to you in accordance with the terms and conditions of the General Public
 * License Version 2, June 1991 (the "GPL License").  You can redistribute it and/or
 * modify it under the terms of the GPL License; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 * PARTICULAR PURPOSE.  See the GPL License for more details.
 *
 * You should have received a copy of the GNU General Public License along with this
 * program.  If not, see http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
 *
 **************************************************************************************
 */

////////////////////////////////////////////////////////////////////////////////
///  pie test.  The main test is the "t" option
///  All pertinent modules need to be insmoded
/// (pietop, piecommon, piepogodma, pie*pogoizer, descriptor, pieidma* pieodma*)
///  To run the main test, at the target linux command line type:
///  echo "t" > /sys/firmware/pie_verification/pietest/pietest
///
///  To see all options of what to do, type:
///  cat < /sys/firmware/pie_verification/pietest/pietest
////////////////////////////////////////////////////////////////////////////////

#include <linux/module.h>
#include <linux/cdev.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/dma-mapping.h>
#include <linux/delay.h>  // for msleep
#include <linux/random.h> // for get_random_bytes in pie_verification.h

#include "descriptor.h"

#include "PIE_regheaders.h"
#include "pie_full_subblock_list.h" // needed for detailed pie_handle
#include "pie_handle.h"
#include "pie_if.h"
#include "pie_verification.h"
#include "pie_scaler_verification.h"
#include "pie_convenience_if.h"
#include "pie_driver.h"

#include <linux/kobject.h> // for kobject_create_and_add


//#define DURING_DEBUG
#ifdef DURING_DEBUG
#define debug_printk(args...) printk(args)

#else
#define debug_printk(args...)
#endif



/* NOTES TO SANDRA!!!

  Need to add stuff to the driver and the descriptor driver (this one for sure)
  1) We want to specify a max size of data xferred by descriptors (all of the descriptors we allocate except for perhaps the last one will have this size).  We need to check to make sure:
      a) it's an integral number of pixels (divisible by 3 for rgb, 4 for rgbx, xrgb)
      b) check that it's 4 byte aligned.
      And I'll have to verify that the last descriptor's actual size passes the a and b checks too.
  2) Specify the # of Rows - hmmm, or is this something I calculate based on the total bytes and the line width - maybe just need to check tha the # of Rows doesn't overflow max number of rows 
  3) Specify the line width in bytes
      do a) and b)
  4) Specify total # of bytes in the transfer (and the descriptor driver will figure out how many descriptors to create)
      do a) and b)
*/
/* Sandra, probably need to add the above notes to the proper places in the drivers, and then this as well.
   We see in the UPR that the FIFO is 128 bytes, and the bus width is 64 bits (8 bytes).  So that means if we do an 8 beat burst, we will have a 64 byte burst, and we half fill the fifo, and we get double buffering.  So I should put these
   notes in the IDMA/ODMA driver, and calculate the number of beats as 8 on the fly - since I can read the UPR

   Ok, and then there's alignment issues.  descriptor data buffers need to be 8 byte aligned, and pogo descriptors have to be 8 byte aligned - but for Fastest transfer, 16 or 32 byte alignment is best for the descriptors - Sandra, figure out how to do aligned code with dma allocation....

   Ohhh, and sandra, go throw errors and give error returns (failure) if we get an error interrupt from any block
   And go add in semaphore code to wait until the xfer is done before checking
*/

#define NUM_PIE_IDMA_CHANNELS NUM_PIE_POGO_IDMA_CHANNELS

// NOTE that the default kernel may not allow enough memory for lots of descriptors or rows (or linewidth).
// If you reboot the target, stop it at UBOOT and change part of the the command line from something like:
// console=ttyS0,115200n8
// to
// console=ttyS0,115200n8 coherent_pool=2M
// and that should give enough memory to allocate lots of buffers

//#define NUM_DESC 10
#define NUM_DESC 3
//#define NUM_DESC 1

#define LINEWIDTH_PIXELS 1032
//#define LINEWIDTH_PIXELS 32

//#define ROWHEIGHT 16  // bypass test can't do more than 16 lines - 16 out is max
#define ROWHEIGHT 2  // hmmm, how many should we test.....

// allocate memory for the input and output data buffers used for dma transfer
// then fill it with random data

// here is our pie verification object
struct pie_verification_data piev_object;

uint32_t *create_data_buffer(int bufsize, dma_addr_t *buffer_phys_addr)
{
    uint32_t *buffer_virt_addr;
    //    bufsize *= 4; // * 4 for debugging - can see if someone wrote beyond original buffer end

    buffer_virt_addr = dma_alloc_coherent(NULL, bufsize, buffer_phys_addr, GFP_DMA); 
    printk("just called dma_alloc_coherent %d, 0x%X\n",bufsize, *buffer_phys_addr);
    
    if (buffer_virt_addr == 0)
    {
        // here is where to print an error message, and set a bad status
        return NULL;
    }

    // set the memory to avoid accidently thinking we got output data
    memset(buffer_virt_addr, 0xFF, bufsize);  
    
    // data buffers have to be 4 byte aligned
    BUG_ON((*buffer_phys_addr & 0x3) != 0);
    
    return buffer_virt_addr;
}

// total_num_pixels and bytes_per_pixels is unused for an ot_input_buf
// For the non-ot case, bytes_per_pixel is bytes.
// 
int create_input_buffer(int total_num_pixels, int bytes_per_pixel, int rounded_up_bufsize, bool ot_input_buf)
{
    dma_addr_t buffer_phys_addr;
    uint32_t *buffer_virt_addr;
#ifdef DEBUG_NORANDOM_BUFS
    uint8_t hold_int8, *input_buf8;
    uint16_t hold_int16, *input_buf16;
    uint32_t hold_int32, *input_buf32;
    int i;
#endif    

    buffer_virt_addr = create_data_buffer(rounded_up_bufsize, &buffer_phys_addr);

    if (ot_input_buf)
    {
        piev_object.ot_input_data = buffer_virt_addr;
        piev_object.ot_input_data_phys = buffer_phys_addr;
        piev_object.ot_in_bufsize = rounded_up_bufsize;
        debug_printk("generating random data for OT input_buffer phys_addr=0x%X, virt=0x%p\n",
                     piev_object.ot_input_data_phys, piev_object.ot_input_data);
    }
    else
    {
        piev_object.input_data = buffer_virt_addr;
        piev_object.input_data_phys = buffer_phys_addr;
        piev_object.in_bufsize = rounded_up_bufsize;
        debug_printk("generating random data for input_buffer phys_addr=0x%X\n",buffer_phys_addr);
    }
#ifndef DEBUG_NORANDOM_BUFS
    // normal case - fill buffer with random pixels
    get_random_bytes(buffer_virt_addr, rounded_up_bufsize);
#else
    if (ot_input_buf)
    {
        printk("ERROR!  OT can't support sequential pixels - filling with random instead\n");
        get_random_bytes(buffer_virt_addr, rounded_up_bufsize);
        return 0;
    }
    // debug case, put sequential pixels into buffer
    // generate data and put in the input buffer
    input_buf8 = (uint8_t *) buffer_virt_addr;
    input_buf16 = (uint16_t *) buffer_virt_addr;
    input_buf32 = (uint32_t *) buffer_virt_addr;    
    for (i=0;(i< total_num_pixels);i++)
    {
        switch(bytes_per_pixel)
        {
        case 1:
            PIE_INDATA_DATA(&hold_int8);
            input_buf8[i] = hold_int8;
            break;
        case 2:
            PIE_INDATA_DATA(&hold_int16);
            input_buf16[i] = hold_int16;
            break;
        case 3:
            PIE_INDATA_DATA(&hold_int32);
            input_buf8[i*3] = (hold_int32 & 0xFF) >> 0;
            input_buf8[i*3+1] = (hold_int32 & 0xFF00) >> 8;
            input_buf8[i*3+2] = (hold_int32 & 0xFF0000) >> 16;
            break;
        case 4:
            PIE_INDATA_DATA(&hold_int32);
            input_buf32[i] = hold_int32;
            break;
        default:
            printk("ERROR, unsupported bytes per pixel=%d\n",bytes_per_pixel);
                
        }
    }
#endif    
    return 0;
}

int create_output_buffer(int bufsize, bool ot_output_buf)
{
    dma_addr_t buffer_phys_addr;
    uint32_t *buffer_virt_addr;

    buffer_virt_addr = create_data_buffer(bufsize, &buffer_phys_addr);
    // store away the info on the outputbuffer
    if (ot_output_buf)
    {
        piev_object.ot_output_data = buffer_virt_addr;
        piev_object.ot_output_data_phys = buffer_phys_addr;
        piev_object.ot_out_bufsize = bufsize;
        printk("OT output buffer physaddr=0x%p, virtaddr=0x%p\n", (void *) buffer_phys_addr, (void *) buffer_virt_addr); 
    }
    else
    {
        piev_object.output_data = buffer_virt_addr;
        piev_object.output_data_phys = buffer_phys_addr;
        piev_object.out_bufsize = bufsize;
        printk("output buffer physaddr=0x%p, virtaddr=0x%p\n", (void *) buffer_phys_addr, (void *) buffer_virt_addr);
    }
    return 0;
}


int generate_idma_descriptors(int num_descriptors, int data_xfer_size, int channel, bool ot_desc)
{
    DMA_descriptor *descriptor;
    dma_addr_t phys_addr;
    int phys_addr_int;
    dma_addr_t idma_descriptor_phys;
    uint32_t flags;
    int retval, i, data_size;

    retval = create_descriptor_list(num_descriptors,
                                    new_8word_axi,
                                    &descriptor, 
                                    &idma_descriptor_phys,
                                    NULL);
    debug_printk("%s: virt_addr returned was 0x%p, phys_addr was 0x%X\n",__func__,
                 descriptor, idma_descriptor_phys);
    if (retval != 0)
    {
        return retval;
    }
    if (ot_desc)
    {
        piev_object.device_ot_idma_descriptor = descriptor;
        printk("generate otidma descriptors: device descriptor (logical) addr=0x%p\n", piev_object.device_ot_idma_descriptor);
        piev_object.device_ot_idma_descriptor_phys = idma_descriptor_phys;
        printk("phys=0x%X\n", piev_object.device_ot_idma_descriptor_phys);
        // when we set_descriptor, we need to specify the physical address of the data buffer
        phys_addr = piev_object.ot_input_data_phys;
    }
    else
    {
        piev_object.device_idma_descriptor[channel] = descriptor;
        piev_object.device_idma_descriptor_phys[channel] = idma_descriptor_phys;
        piev_object.num_input_strips = num_descriptors; //1 strip == 1 descriptor
        // when we set_descriptor, we need to specify the physical address of the data buffer
        // non-ot case - we have to offset the location in the buffer we are in for channels
        phys_addr = piev_object.input_data_phys;
        phys_addr_int = (int) phys_addr;
        phys_addr_int += (data_xfer_size * channel * num_descriptors); // move down in the buffer for this channel - 0 at start
        phys_addr = (dma_addr_t) phys_addr_int;
    }

    // descriptors created, now set up the data fields
    flags = 0;
    
    data_size = data_xfer_size;

    printk("created %d idma descriptors for channel %d, phys descr addr=0x%X, virt=0x%p\n",
           num_descriptors, channel, idma_descriptor_phys, descriptor);
    
    for (i=0;i<num_descriptors;i++)
    {
        /* still need to implement someday - make sure if you have a last data buffer that is smaller
           than the other fixed size buffers (since it's the last one), that we can pass smaller size */
        // if last descriptor, calculate actual data size FIXME
        if (i == (num_descriptors - 1))
        {
            //data_size = full_data_size - num_descriptor * data_xfer_size; FIXME
            flags = DMA_STOP_WHEN_DONE | DMA_INT_WHEN_DONE;
        }

        set_descriptor(descriptor, i, data_size, phys_addr, flags);
    }
    return retval;
}

int generate_odma_descriptors(int num_descriptors, int data_xfer_size, bool ot_desc)
{
    DMA_descriptor *descriptor;
    dma_addr_t odma_descriptor_phys;
    uint32_t flags;
    int retval, i;
    dma_addr_t odma_buffer_phys;

    retval = create_descriptor_list(num_descriptors,
                                    new_8word_axi,
                                    &descriptor,
                                    &odma_descriptor_phys,
                                    NULL);
    debug_printk("%s virt_addr returned was 0x%p, phys_addr was 0x%X\n",__func__,
                 descriptor, odma_descriptor_phys);    
    if (retval != 0)
    {
        return retval;
    }
    if (ot_desc)
    {
        piev_object.device_ot_odma_descriptor = descriptor;
        printk("generate otodma descriptors: device descriptor (logical) addr=0x%p\n", piev_object.device_ot_odma_descriptor);
        piev_object.device_ot_odma_descriptor_phys = odma_descriptor_phys;
        printk("phys=0x%X\n", piev_object.device_ot_odma_descriptor_phys);
        // when we set_descriptor, we need to specify the physical address of the data buffer        
        odma_buffer_phys = piev_object.ot_output_data_phys;
    }
    else
    {
        piev_object.device_odma_descriptor = descriptor;
        printk("generate odma descriptors: device descriptor (logical) addr=0x%p\n", piev_object.device_odma_descriptor);
        piev_object.device_odma_descriptor_phys = odma_descriptor_phys;
        printk("phys=0x%X\n", piev_object.device_odma_descriptor_phys);
        piev_object.num_output_strips = num_descriptors; // for PIE, 1 strip = 1 descriptor
        // when we set_descriptor, we need to specify the physical address of the data buffer
        odma_buffer_phys = piev_object.output_data_phys;        
    }

    // descriptors created, now set up the data fields
    flags = 0;

    for (i=0;i<num_descriptors;i++)
    {
        /* someday still need to implement - make sure if you have a last data buffer that is smaller
           than the other fixed size buffers (since it's the last one), that we can pass smaller size */
        //  if last descriptor, calculate actual data size FIXME
        if (i == (num_descriptors - 1))
        {
            //data_size = full_data_size - num_descriptor * data_xfer_size; FIXME
            flags = DMA_STOP_WHEN_DONE | DMA_INT_WHEN_DONE;
        }
        //debug_printk("data_xfer_size=0x%X\n", data_xfer_size);
        set_descriptor(descriptor, i, data_xfer_size, odma_buffer_phys, flags);
    }
    return retval;
}

DECLARE_COMPLETION(dma_bypass_test_complete);
void pie_bypass_odma_callback(void *stuff, void *callback_data)
{
    struct odma_interrupt_info *odma_data;

    odma_data = stuff;
    if (odma_data == NULL)
        printk("ERROR! why odma int callback if data was null????\n");
    else
    {
        printk("%s: EOS=%d, DESC=%d, CC=%d, Own=%d, LE=%d, OORE=%d RRERR=%d BRERR=%d\n",
               __func__, odma_data->EndOfStrip, odma_data->Desc,
               odma_data->ClearComplete, odma_data->Own, odma_data->LengthErr, odma_data->OutOfRangeErr,
               odma_data->RRespErr, odma_data->BRespErr);
        if (odma_data->Desc == 1)
            complete(&dma_bypass_test_complete); // signal the waiter(s) that we are done
    }
    
}

DECLARE_COMPLETION(dma_ot_odma_test_complete);
void pie_ot_odma_callback(void *stuff, void *callback_data)
{
    struct odma_interrupt_info *odma_data;

    odma_data = stuff;
    if (odma_data == NULL)
        printk("ERROR! why OT odma int callback if data was null????\n");
    else
    {
        printk("%s: EOS=%d, DESC=%d, CC=%d, Own=%d, LE=%d, OORE=%d RRERR=%d BRERR=%d\n",
               __func__, odma_data->EndOfStrip, odma_data->Desc,
               odma_data->ClearComplete, odma_data->Own, odma_data->LengthErr, odma_data->OutOfRangeErr,
               odma_data->RRespErr, odma_data->BRespErr);
        if (odma_data->Desc == 1)
            complete(&dma_ot_odma_test_complete); // signal the waiter(s) that we are done
    }
    
}

// just printing stuff out - making debug easier
void pie_ot_idma_callback(void *stuff, void *callback_data)
{
    struct idma_interrupt_info *idma_data;

    idma_data = stuff;
    if (idma_data == NULL)
        printk("ERROR! why OT idma int callback if data was null????\n");
    else
    {
        printk("%s: EOS=%d, DESC=%d, CC=%d, Own=%d, LE=%d, OORE=%d RRERR=%d BRERR=%d\n",
               __func__, idma_data->EndOfStrip, idma_data->Desc,
               idma_data->ClearComplete, idma_data->Own, idma_data->LengthErr, idma_data->OutOfRangeErr,
               idma_data->RRespErr, idma_data->BRespErr);
        if (idma_data->Desc == 1)
            // not going to set a completion - just eyeball it
            printk("OT idma got a Desc interrupt - we should be done!\n");
    }
}

// throw a huge error if we get a mismatch - that would be bad
void pie_ot_marb_callback(void *stuff, void *callback_data)
{
    struct otmarb_interrupt_info *otmarb_data;

    otmarb_data = stuff;
    if (otmarb_data == NULL)
        printk("ERROR! why OT MARB int callback if data was null????\n");
    else
    {
        printk("%s: DT Mismatch error = %d!!!!!!!!!!!!!!!!!!!!!!!  FATAL!!!!!!!! ERROR!!!!!!\n",
               __func__, otmarb_data->DTmismatchErr);
        printk("%s: DT Mismatch error = %d!!!!!!!!!!!!!!!!!!!!!!!  FATAL!!!!!!!! ERROR!!!!!!\n",
               __func__, otmarb_data->DTmismatchErr);
        printk("%s: DT Mismatch error = %d!!!!!!!!!!!!!!!!!!!!!!!  FATAL!!!!!!!! ERROR!!!!!!\n",
               __func__, otmarb_data->DTmismatchErr);
        printk("%s: DT Mismatch error = %d!!!!!!!!!!!!!!!!!!!!!!!  FATAL!!!!!!!! ERROR!!!!!!\n",
               __func__, otmarb_data->DTmismatchErr);
        printk("%s: DT Mismatch error = %d!!!!!!!!!!!!!!!!!!!!!!!  FATAL!!!!!!!! ERROR!!!!!!\n",
               __func__, otmarb_data->DTmismatchErr);
        printk("%s: DT Mismatch error = %d!!!!!!!!!!!!!!!!!!!!!!!  FATAL!!!!!!!! ERROR!!!!!!\n",
               __func__, otmarb_data->DTmismatchErr);
        printk("%s: DT Mismatch error = %d!!!!!!!!!!!!!!!!!!!!!!!  FATAL!!!!!!!! ERROR!!!!!!\n",
               __func__, otmarb_data->DTmismatchErr);
    }
}


int pie_bypass_idma_callback(void *stuff, void *callback_data)
{
    struct idma_interrupt_info *idma_data;

    idma_data = stuff;
    if (idma_data != NULL)
    {
        printk("%s: instance=%d, EOS=%d, DESC=%d, CC=%d, Own=%d, LE=%d, OORE=%d RRERR=%d BRERR=%d\n",
               __func__, idma_data->instance, idma_data->EndOfStrip, idma_data->Desc,
               idma_data->ClearComplete, idma_data->Own, idma_data->LengthErr, idma_data->OutOfRangeErr,
               idma_data->RRespErr, idma_data->BRespErr);
    }
    return 0;
}

DECLARE_COMPLETION(dma_interrupts_complete);
static bool initialized_idma;
int pie_idma_interrupt_callback(void *stuff, void *callback_data)
{
    struct idma_interrupt_info *idma_data;
    static int idma_udma[NUM_PIE_IDMA_CHANNELS], idma_core[NUM_PIE_IDMA_CHANNELS];
    int i;
    bool done;

    if (initialized_idma == false)
    {
        // need to initialize the idma arrays
        for (i=0;i<NUM_PIE_IDMA_CHANNELS;i++)
        {
            idma_udma[i] = 0;
            idma_core[i] = 0;
        }
        initialized_idma = true;
    }
        
    idma_data = stuff;
    if (idma_data != NULL)
    {
        printk("%s: instance=%d, EOS=%d, DESC=%d, CC=%d, Own=%d, LE=%d, OORE=%d RRERR=%d BRERR=%d\n",
               __func__, idma_data->instance, idma_data->EndOfStrip, idma_data->Desc,
               idma_data->ClearComplete, idma_data->Own, idma_data->LengthErr, idma_data->OutOfRangeErr,
               idma_data->RRespErr, idma_data->BRespErr);
        // if we received Desc, we have the udma interrupt
        if (idma_data->Desc)
            idma_udma[idma_data->instance] = 1;
        // if we received EOS, we have a core interrupt
        if (idma_data->EndOfStrip)
            idma_core[idma_data->instance] = 1;
        // have we received all the idma interrupts?
        done = true;
        for (i=0;i<NUM_PIE_IDMA_CHANNELS;i++)
        {
            if ((idma_udma[i] == 0) || (idma_core[i] == 0))
                done = false;
            printk("%s: idma_udma[%d]=%d, idma_core[%d]=%d\n",__func__, i, idma_udma[i], i, idma_core[i]);
        }
        if (done)
            complete(&dma_interrupts_complete); // signal the waiter(s) that we are done
    }
    else
        printk("%s - stuff pointer passed as NULL\n", __func__);
    return 0;
}

void pie_odma_interrupt_callback(void *stuff, void *callback_data)
{
    struct odma_interrupt_info *odma_data;

    odma_data = stuff;
    if (odma_data == NULL)
        printk("ERROR! why odma int callback if data was null????\n");
    else
    {
        printk("%s: EOS=%d, DESC=%d, CC=%d, Own=%d, LE=%d, OORE=%d RRERR=%d BRERR=%d\n",
               __func__, odma_data->EndOfStrip, odma_data->Desc,
               odma_data->ClearComplete, odma_data->Own, odma_data->LengthErr, odma_data->OutOfRangeErr,
               odma_data->RRespErr, odma_data->BRespErr);
    }
}

// parms
// number_of_desc - how many descriptors to set up
// pixels_per_desc - how many pixels each descriptor should xfer (pixel size determined in bpp)
// bpp - bytes per pixel
// returns the number of bytes to be transferred per descriptor per channel
static int setup_idma(int number_of_desc, int pixels_per_desc, 
                      int idma_Bpp, int num_idma_channels)
{
    int i;
    int total_num_pixels;
    int retval;
    int datasize, rounded_up_bufsize;

    // compute datasize - the number of bytes to be transferred per descriptor per channel
    datasize = pixels_per_desc * idma_Bpp;
    if ((datasize & 0x3) != 0)
    {
        datasize = (datasize+3) & (~(0x3)); // round up datasize to be 4 byte aligned
    }

    rounded_up_bufsize = datasize * number_of_desc * num_idma_channels;
    total_num_pixels = (number_of_desc * pixels_per_desc * num_idma_channels);
    debug_printk("%s total_num_pixels=%d, num_desc=%d, ppd=%d, iB=%d, nic=%d\n",__func__,
                 total_num_pixels, number_of_desc, pixels_per_desc, idma_Bpp, num_idma_channels);

    // create a buffer of random data to send to PIE
    retval = create_input_buffer(total_num_pixels, idma_Bpp, rounded_up_bufsize, false);
    if (retval != 0)
    {
        printk("ERROR!!! non-0 return from create_input_buffer in %s\n",__FUNCTION__);
        return retval; 
    }

    
    // create descriptors, store the first descriptor's physical address
    for (i=0;i<num_idma_channels;i++)
    {
        // test all num_idma_channels of PIE IDMA
        retval = generate_idma_descriptors(number_of_desc, datasize, i, false);
        if (retval != 0)
        {
            printk("ERROR!!!  Bad return creating idma_descriptors, 0x%X\n", retval);
            return -1;
        }
    }
    // register callback for idma interrupts
    // pie_idma_register_callback(pie_idma_callback);
    
    return datasize;
}

// num_desc - how many descriptors to set up
// num_pixels_per_desc - how many pixels each descriptor should xfer
// ot_bits - how many bits per OT pixel
// ot_inputformat - POGO_FMT_1CHAN_PK, POGO_FMT_1CHAN_UNPK
// returns the number of bytes to be transferred per descriptor per channel
static int setup_ot_idma(struct pie_handle_t *pie_handle, int num_desc, int num_pixels_per_desc,
                         int linewidth_pixels, int ot_bits, uint8_t ot_inputformat)
{
    uint8_t handshake, ownership, own_polarity, enable;
    int ot_bufsize, datasize, retval;
    int rowheight, linewidth;

    // compute the number of bytes to be transferred per descriptor
    ot_bufsize = num_desc * num_pixels_per_desc;
    if (ot_bits == 4)
    {
        // 4 bits OT data per pixel
        ot_bufsize = ot_bufsize/2;
        linewidth = linewidth_pixels/2;
    }
    else if (ot_bits == 2)
    {
        // 2 bits OT data per pixel
        ot_bufsize = ot_bufsize/4;
        linewidth = linewidth_pixels/4;
    }
    else if (ot_bits == 1)
    {
        // 1 bit of OT data per pixel
        ot_bufsize = ot_bufsize/8;
        linewidth = linewidth_pixels/8;
    }
    else
    {
        printk("%s: ERROR, only 4, 2, and 1 bit OT data size supported\n", __func__);
        return -1;
    }

    // create a buffer of random data to send to PIE
    retval = create_input_buffer(0, 0, ot_bufsize, true);  // true = ot_input_buf
    if (retval != 0)
    {
        printk("ERROR!!! non-0 return from create_input_buffer in %s\n",__FUNCTION__);
        return retval; 
    }

    // Calculate the amount of data for 1 ot output descriptor
    datasize = ot_bufsize/num_desc;
    if ((datasize & 0x3) != 0)
    {
        datasize = (datasize+3) & (~(0x3)); // round up datasize to be 4 byte aligned
    }            
    
    // create descriptors, store the first descriptor's physical address
    retval = generate_idma_descriptors(num_desc, datasize, 0, true);  // true for generating ot descriptors
    if (retval != 0)
    {
        printk("ERROR!!!  Bad return creating idma_descriptors, 0x%X\n", retval);
        return -1;
    }

    pie_ot_pogoizer_set_config(pie_handle, ot_inputformat, 2);  // parm 2 - 4bpp

    handshake = 0;
    ownership = 0;
    own_polarity = 0;
    enable = DMA_ENABLE;
    rowheight = ROWHEIGHT;
    pie_ot_input_dma_set_parms(pie_handle, handshake, ownership, own_polarity, enable,
                               linewidth, rowheight);
    return datasize;
}

static int setup_ot_odma(struct pie_handle_t *pie_handle, int num_desc, int num_pixels_per_desc,
                         int linewidth_pixels, int ot_bits, uint8_t ot_outputformat)
{
    uint8_t handshake, ownership, own_polarity, enable;
    int ot_bufsize, datasize, retval, linewidth;
    
    ot_bufsize = num_desc * num_pixels_per_desc;
    if (ot_bits == 4)
    {
        // 4 bits OT data per pixel
        ot_bufsize = ot_bufsize/2;
        linewidth = linewidth_pixels/2;
    }
    else if (ot_bits == 2)
    {
        // 2 bits OT data per pixel
        ot_bufsize = ot_bufsize/4;
        linewidth = linewidth_pixels/4;
    }
    else if (ot_bits == 1)
    {
        // 1 bit of OT data per pixel
        ot_bufsize = ot_bufsize/8;
        linewidth = linewidth_pixels/8;
    }
    else
    {
        printk("%s:ERROR, only 4, 2, and 1 bit OT data size supported\n", __func__);
        return -1;
    }
    
    if (create_output_buffer(ot_bufsize, true) != 0) 
    {
        printk("%s: ERROR return from create_output_buffer\n", __func__);
        return -1;
    }

    // Calculate the amount of data for 1 ot output descriptor
    datasize = ot_bufsize/num_desc;
    if ((datasize & 0x3) != 0)
    {
        datasize = (datasize+3) & (~(0x3)); // round up datasize to be 4 byte aligned
    }            

    // create descriptors
    retval = generate_odma_descriptors(num_desc, datasize, true); // true for generating ot descriptors

    if (retval != 0)
        printk("ERROR!!!  Bad return creating odma_descriptors\n");

    pie_ot_depogoizer_set_config(pie_handle, ot_outputformat, 2); // parm 2 - 4 bpp

    handshake = 0;
    ownership = 0;
    own_polarity = 0;
    enable = DMA_ENABLE;
    pie_ot_output_dma_set_parms(pie_handle, handshake, ownership, own_polarity, enable, linewidth);
    
    return retval;
}

static int setup_odma(int number_of_desc, int pixels_per_desc,
                      int odma_Bpp, int num_idma_channels)
{
    int bufsize, retval;
    int datasize;

    // DO WE NEED TO CHECK ON THE 4 BYTE ALIGNMENT OF BUFSIZE? FIXME
    // set up memory for the dma to write to - 1 giant buffer for all the data
    bufsize = (number_of_desc * pixels_per_desc * odma_Bpp * num_idma_channels);
    printk("calling create_output_buffer with bufsize of %d\n", bufsize);
    retval = create_output_buffer(bufsize, false);  // not an OT buffer
    if (retval != 0)
    {
        printk("ERROR!!! 0 return from create_output_buffer in %s\n",__FUNCTION__);
        return retval; 
    }

    // Calculate the amount of data in 1 output descriptor.  This is 
    // combining all planes of input
    datasize = pixels_per_desc * odma_Bpp;

    if ((datasize & 0x3) != 0)
    {
        datasize = (datasize+3) & (~(0x3)); // round up datasize to be 4 byte aligned
    }            
    // create descriptors
    retval = generate_odma_descriptors(number_of_desc,
                                       datasize, false);
    if (retval != 0)
    {
        printk("ERROR!!!  Bad return creating odma_descriptors\n");
        return retval;
    }
    // register callback for odma interrupts
    // pie_odma_register_callback(pie_odma_callback);

    return 0;
}

DECLARE_COMPLETION(common_interrupt_complete);
void pie_common_callback(void *stuff, void *callback_data)
{
    int *intstuff;
    char *callback_string = (char *) callback_data;
    
    intstuff = (int *) stuff;
    printk("WOOHOOO!!! callback worked %s: stuff=0x%X\n",__func__,(unsigned int) intstuff);
    printk("And the callback_string was %s\n", callback_string);
    complete(&common_interrupt_complete); // signal the common waiter(s) that we are done
}


static void interrupt_test(void)
{
    struct pie_handle_t *pie_handle;
    int i;
    
    initialized_idma = false;

    printk("%s\n",__FUNCTION__);
    pie_do_reset();
    pie_do_clear_all_irqs();

    // create a pie instance
    pie_handle = pie_create_new_default_handle();
    
    // install pie common callback
    pie_register_common_callback(pie_handle, pie_common_callback, "interrupt_test");

    printk("setting pie common interrupts first\n");
    pie_enable_common_irqs(pie_handle, NULL, true);
    pie_force_common_irqs(pie_handle, NULL, true);
    
    pie_do_configure(pie_handle); // pie, write your regs (this should start the interrupt test)

    wait_for_completion_interruptible(&common_interrupt_complete); // wait for our common callback to run
    printk("We received our common interrupt!\n");

    // unregister the common callback and register the dma callbacks
    pie_register_common_callback(pie_handle, NULL, NULL);
    pie_register_idma_callback(pie_handle, pie_idma_interrupt_callback, NULL);
    pie_register_odma_callback(pie_handle, pie_odma_interrupt_callback, NULL);

    pie_enable_pogo_odma_irqs(pie_handle, NULL, true);
    pie_force_pogo_odma_irqs(pie_handle, NULL, true);
    
    for (i=0;i<NUM_PIE_IDMA_CHANNELS;i++)
    {
        // enable and force all ints for all channels
        pie_enable_pogo_idma_irqs(pie_handle, NULL, true, i);
        pie_force_pogo_idma_irqs(pie_handle, NULL, true, i);
    }
    pie_do_configure(pie_handle); // pie, write your regs (this should start the next part)
    wait_for_completion_interruptible(&dma_interrupts_complete);
    printk("Interrupt test completed successfully!\n");
    pie_do_free_handle(pie_handle);
}

static void dump_handle_regs(int num_idma)
{
    struct pie_handle_t *pie_handle;
    
    if (num_idma <= NUM_PIE_IDMA_CHANNELS)
    {
        printk("printing regs for 1 odma channel and %d idma channel(s)\n", num_idma);
    }
    else
    {
        printk("Error, you requested %d channels dumped\n", num_idma);
        return;            
    }

    pie_handle = pie_create_new_default_handle();
    pie_do_get_current(pie_handle);
    printk("dumping all pie regs and descriptors\n");
    pie_dump_handle_regs(pie_handle, num_idma);
    pie_do_free_handle(pie_handle);
}
    
static void dump_desc(int num_idma, int num_desc)
{
    int i;

    if (num_idma <= NUM_PIE_IDMA_CHANNELS)
    {
        printk("printing 1 odma channel and %d idma channel(s)\n", num_idma);
    }
    else
    {
        printk("Error, you requested %d channels dumped\n", num_idma);
        return;            
    }
    
    printk("pie IDMA descriptors\n");
    for (i=0; i<num_idma; i++)
    {
        if (piev_object.device_idma_descriptor[i] == 0)
            printk("ERROR!!! idma_descriptor[%d] = 0\n", i);
        else
            dump_descriptors(piev_object.device_idma_descriptor[i], num_desc);
    }

    printk("pie ODMA descriptors - logical addr=0x%p\n", piev_object.device_odma_descriptor);
    if (piev_object.device_odma_descriptor == 0)
        printk("ERROR!!! odma_descriptor = 0\n");
    else
        dump_descriptors(piev_object.device_odma_descriptor, num_desc);

    printk("pie OT ODMA descriptors - logical address=0x%p\n", piev_object.device_ot_odma_descriptor);
    if (piev_object.device_ot_odma_descriptor == 0)
        printk("ERROR!!! odma_descriptor = 0\n");
    dump_descriptors(piev_object.device_ot_odma_descriptor, num_desc);
}


static void run_basic_test(void)
{
    struct pie_handle_t *pie_handle;
    printk("%s\n",__FUNCTION__);

    // create a pie instance
    pie_handle = pie_create_new_default_handle();
    pie_do_get_current(pie_handle);
    pie_dump_handle_regs(pie_handle, NUM_PIE_IDMA_CHANNELS);
    pie_do_free_handle(pie_handle);
}


// free the descriptors and the data that they point to

void cleanup_input_buffers(void)
{
    int bufsize;
    uint32_t *input_data;
    dma_addr_t input_data_phys;

    if (piev_object.in_bufsize != 0)
    {
        bufsize = piev_object.in_bufsize;
        input_data = piev_object.input_data;
        input_data_phys = piev_object.input_data_phys;
        dma_free_coherent(NULL, bufsize, input_data, input_data_phys);
    }

    if (piev_object.ot_in_bufsize != 0)
    {
        bufsize = piev_object.ot_in_bufsize;
        input_data = piev_object.ot_input_data;
        input_data_phys = piev_object.ot_input_data_phys;
        dma_free_coherent(NULL, bufsize, input_data, input_data_phys);
    }
}

void cleanup_output_buffers(void)
{
    int bufsize;
    uint32_t *output_data;
    dma_addr_t output_data_phys;
    
    if (piev_object.out_bufsize != 0)
    {
        bufsize = piev_object.out_bufsize;
        output_data = piev_object.output_data;
        output_data_phys = piev_object.output_data_phys;
        dma_free_coherent(NULL, bufsize, output_data, output_data_phys);
    }
    if (piev_object.ot_out_bufsize != 0)
    {
        bufsize = piev_object.ot_out_bufsize;
        output_data = piev_object.ot_output_data;
        output_data_phys = piev_object.ot_output_data_phys;
        dma_free_coherent(NULL, bufsize, output_data, output_data_phys);
    }
}

void cleanup_IDMA_descriptors(int num_idma_channels)
{
    int num_descriptors;
    DMA_descriptor *descriptor_addr;
    dma_addr_t descriptor_addr_phys;
    int i;

    num_descriptors = piev_object.num_input_strips; // same # descriptors for ot and data descriptors
    
    if (piev_object.device_idma_descriptor[0] != NULL)
    {
        for (i=0;i<num_idma_channels;i++)
        {
            descriptor_addr = piev_object.device_idma_descriptor[i];
            descriptor_addr_phys = piev_object.device_idma_descriptor_phys[i];
            destroy_descriptors(num_descriptors, descriptor_addr, descriptor_addr_phys, NULL);
        }
    }
    if (piev_object.device_ot_idma_descriptor != NULL)
    {
        // cleanup OT idma descriptors
        descriptor_addr = piev_object.device_ot_idma_descriptor;
        descriptor_addr_phys = piev_object.device_ot_idma_descriptor_phys;
        destroy_descriptors(num_descriptors, descriptor_addr, descriptor_addr_phys, NULL);
    }
}

void cleanup_ODMA_descriptors(void)
{
    int num_descriptors;
    DMA_descriptor *descriptor_addr;
    dma_addr_t descriptor_addr_phys;

    num_descriptors = piev_object.num_output_strips;

    if (piev_object.device_odma_descriptor != NULL)
    {
        descriptor_addr = piev_object.device_odma_descriptor;
        descriptor_addr_phys = piev_object.device_odma_descriptor_phys;
        destroy_descriptors(num_descriptors, descriptor_addr, descriptor_addr_phys, NULL);
    }
    if (piev_object.device_ot_odma_descriptor != NULL)
    {
        // cleanup OT odma descriptors
        descriptor_addr = piev_object.device_ot_odma_descriptor;
        descriptor_addr_phys = piev_object.device_ot_odma_descriptor_phys;
        destroy_descriptors(num_descriptors, descriptor_addr, descriptor_addr_phys, NULL);
    }
}

uint32_t get_16_val(int index, void *bufptr)
{
    uint16_t *bufptr16;

    bufptr16 = (uint16_t *) bufptr;
    return (bufptr16[index] & 0xffff);
}

uint32_t get_8_val(int index, void *bufptr)
{
    uint8_t *bufptr8;

    bufptr8 = (uint8_t *) bufptr;
    return (bufptr8[index] & 0xff);
}

uint32_t get_rgb_val(int index, void *bufptr)
{
    uint8_t *bufptr8;
    uint32_t *bufptr24;
    
    bufptr8 = (uint8_t *) bufptr;
    bufptr24 = (uint32_t *) &(bufptr8[index*3]);
    return (*bufptr24 & 0xFFFFFF);
}

uint32_t get_xrgb_val(int index, void *bufptr)
{
    uint32_t *bufptr32;
    
    bufptr32 = (uint32_t *) bufptr;
    return (bufptr32[index] & 0xFFFFFF);
}

uint32_t get_rgbx_val(int index, void *bufptr)
{
    uint32_t *bufptr32;
    
    bufptr32 = (uint32_t *) bufptr;
    return ((bufptr32[index] & 0xFFFFFF00) >> 8);
}

// index is the pixel index into the input_data buffer
uint32_t get_input_pixel_val(int pogo_bpp_input, int pogo_fmt_type_in,
                             int pogo_fmt_type_out,
                             uint32_t *input_data, int color_swap,
                             int datasize_per_descriptor,
                             int pixels_per_descriptor, int num_desc,
                             int index)
{
    uint32_t input_pixel_val;
    uint32_t input_pixel_r, input_pixel_g, input_pixel_b, input_data_r, input_data_g, input_data_b;
    
    switch (pogo_fmt_type_in)
    {
    case POGO_FMT_MONO:
        if (pogo_bpp_input == POGO_16BPP)
        {
            input_pixel_val = get_16_val(index, input_data);
            input_pixel_val = input_pixel_val >> 8;  // truncate bottom 8 bits
        }
        else
            input_pixel_val = get_8_val(index, input_data);
        break;
    case POGO_FMT_RGB:
        input_pixel_val = get_rgb_val(index, input_data);
        break;
    case POGO_FMT_RGBX:
        if (color_swap == 0)
            input_pixel_val = get_rgbx_val(index, input_data);
        else
            // color_swap = 1 puts the undefined data at bits 31:24
            // so we grab xrgb style data in that case  since we don't care
            // where rgb are in relation to each other, just in relation to
            // the undefined data
            input_pixel_val = get_xrgb_val(index, input_data);
        break;
    case POGO_FMT_XRGB:
        if (color_swap == 0)
            input_pixel_val = get_xrgb_val(index, input_data);
        else
            // color_swap = 1 puts the undefined data at bits 7:0
            // so we grab rgbx style data in that case  since we don't care
            // where rgb are in relation to each other, just in relation to
            // the undefined data
            input_pixel_val = get_rgbx_val(index, input_data);
        break;
    case POGO_FMT_PLANAR:
        input_data_r = (uint32_t) input_data;
        if (pogo_bpp_input == POGO_16BPP)
        {
            input_data_g = input_data_r + num_desc * datasize_per_descriptor;
            input_data_b = input_data_g + num_desc * datasize_per_descriptor;
            input_pixel_r = (get_16_val(index, (uint32_t *) input_data_r)) >> 8; // truncate bottom 8 bits
            input_pixel_g = (get_16_val(index, (uint32_t *) input_data_g)) >> 8; // truncate bottom 8 bits
            input_pixel_b = (get_16_val(index, (uint32_t *) input_data_b)) >> 8; // truncate bottom 8 bits
        }
        else
        {
            input_data_g = input_data_r + pixels_per_descriptor * num_desc;
            input_data_b = input_data_g + pixels_per_descriptor * num_desc;
            input_pixel_r = get_8_val(index, (uint32_t *) input_data_r);
            input_pixel_g = get_8_val(index, (uint32_t *) input_data_g);
            input_pixel_b = get_8_val(index, (uint32_t *) input_data_b);
        }
        input_pixel_val = input_pixel_r << 16 | input_pixel_g << 8 | input_pixel_b;
        break;
        
    default:
        printk("ERROR unsupported pogo_fmt_type %d\n",pogo_fmt_type_in);
        return -1;
    }
    if (pogo_fmt_type_out == POGO_FMT_MONO)
    {
        // now that we know the output is Mono, if the input wasn't mono,
        // the compare can only go against the channel we chose for mono.
        // But, since we are defaulting that, go for the bottom channel
        if (pogo_fmt_type_in != POGO_FMT_MONO)
            input_pixel_val = get_8_val(2, &input_pixel_val);
    }

    return input_pixel_val;
}

uint32_t get_output_pixel_val(int pogo_fmt_type_out,
                              uint32_t *output_data, int color_swap, int index)

{
    uint32_t output_pixel_val=0;
    
    switch (pogo_fmt_type_out)
    {
    case POGO_FMT_MONO:
        output_pixel_val = get_8_val(index, output_data);
        break;
    case POGO_FMT_RGB:
        output_pixel_val = get_rgb_val(index, output_data);            
        break;
    case POGO_FMT_RGBX:
        if (color_swap == 0)
            output_pixel_val = get_rgbx_val(index, output_data);
        else
            // color_swap = 1 puts the undefined data at bits 31:24
            // so we grab xrgb style data in that case  since we don't care
            // where rgb are in relation to each other, just in relation to
            // the undefined data
            output_pixel_val = get_xrgb_val(index, output_data);
        break;
    case POGO_FMT_XRGB:
        if (color_swap == 0)
            output_pixel_val = get_xrgb_val(index, output_data);
        else
            // color_swap = 1 puts the undefined data at bits 7:0
            // so we grab rgbx style data in that case  since we don't care
            // where rgb are in relation to each other, just in relation to
            // the undefined data
            output_pixel_val = get_rgbx_val(index, output_data);
        break;
    case POGO_FMT_PLANAR:
        printk("ERROR, color planar is unsupported output type\n");
        return -1;
        break;
    default:
        printk("ERROR unsupported pogo_fmt_type %d\n",pogo_fmt_type_out);
    }
    return output_pixel_val;
}

void get_bde_parms(int testnum, int *startnum, int *incr)
{
    // testnum=1 fills lut from 0-16320 (0-0x3fc0)
    // testnum=2 fills lut from 16383-63 (0x3fff-0x3f)

    if (testnum == 1)
    {
        *startnum = 0;
        *incr = 64;
    }
    else if (testnum == 2)
    {
        *startnum = 0x3fc0;
        *incr = -64;
    }
    else
    {
        // default to unity LUT
        *startnum = 0;
        *incr = 1;
    }
}

bool pixel_bde_compare(int testnum, int pogo_fmt_type, uint32_t input_pixel_val, uint32_t output_pixel_val)
{
    int startnum, incr, hold_compareval, compareval, i, num_channels;
    uint32_t hold_input_pixel_val, shiftval, mask;

    compareval = 0;
    
    get_bde_parms(testnum, &startnum, &incr);

    if (pogo_fmt_type == POGO_FMT_MONO)
    {
        num_channels = 1;
        shiftval = 0;
    }
    else
    {
        num_channels = NUM_PIE_IDMA_CHANNELS;        
        shiftval = 8*(num_channels-1);
    }
    // separate out RGB values, compute, recombine
    for (i=0;i<num_channels;i++)
    {
        mask = 0xFF << shiftval;
        hold_input_pixel_val = (input_pixel_val & mask) >> shiftval;
        hold_compareval = (hold_input_pixel_val * incr) + startnum; // compute the value coming out of the subblock
        hold_compareval = hold_compareval >> 6; // compute value after the following subblocks convert 14 bits to 8
        compareval |= hold_compareval << shiftval;

        shiftval -= 8; // next time grab next lower rgb byte
    }
    if (compareval == output_pixel_val)
        return true;
    else
        return false;
}

void fill_bde_lut_with_pattern(struct pie_handle_t *pie_handle, int testnum, int channel)
{
    uint32_t data[256];
    int startnum, currnum;
    int incr, i;

    get_bde_parms(testnum, &startnum, &incr);
    
    currnum = startnum;
    for (i=0;i<PIE_BDE_MAX_LUT_ENTRIES;i++)
    {
        data[i] = currnum;
        currnum += incr;
    }
    pie_bde_set_lut(pie_handle, data, PIE_BDE_MAX_LUT_ENTRIES, channel);
}


int check_output_bde_lut_data(int testnum, uint32_t *input_data, uint32_t *output_data,
                              int total_num_pixels, int pogo_fmt_type_in,
                              int pogo_fmt_type_out, int color_swap,
                              int pixels_per_descriptor, int datasize_per_descriptor,
                              int pogo_bpp_input, int num_desc)
{
    int i, mismatch, zero;
    uint32_t input_pixel_val, output_pixel_val;  // holds pixel value whether 8bpp, 16bpp, 24bpp, or 32bpp
    
    mismatch=0;
    zero=0;

    debug_printk("input_data=0x%p, output_data=0x%p, total_num_pixels=%d, fmttype_in=%d, fmttype_out=%d\n",
                 input_data, output_data, total_num_pixels, pogo_fmt_type_in, pogo_fmt_type_out);
    for (i=0; i< total_num_pixels; i++)
    {
        input_pixel_val = get_input_pixel_val(pogo_bpp_input, pogo_fmt_type_in, pogo_fmt_type_out,
                                              input_data, color_swap, datasize_per_descriptor,
                                              pixels_per_descriptor, num_desc, i);
        output_pixel_val = get_output_pixel_val(pogo_fmt_type_out, output_data, color_swap, i);

        if (!pixel_bde_compare(testnum, pogo_fmt_type_in, input_pixel_val, output_pixel_val))
        {
            if (output_pixel_val == 0)
                zero++;
            else
                mismatch++;
            if ((mismatch < 20) && (zero < 20))
            {   
                printk("Mismatch %d: od[0x%X]=0x%X, id[0x%X]=0x%X\n",
                       mismatch,i,output_pixel_val,i,input_pixel_val);
            }
        }
    }

    printk("\n\nThere were %d inappropriate 0 data, and %d mismatches\n",zero,mismatch);
    if ((zero == 0) && (mismatch == 0))
    {
        printk("The data compared - this test has SUCCESSFULLY COMPLETED!!\n");
        return 0;
    }
    else
    {
        printk("Failure!!!!!!!!!!\n");
        return -1;
    }
}


int check_output_clippad_data(int leftclip, int rightclip,
                              int linewidth_pixels,
                              uint32_t *input_data, uint32_t *output_data,
                              int total_num_pixels, int pogo_fmt_type_in,
                              int pogo_fmt_type_out, int color_swap,
                              int pixels_per_descriptor, int datasize_per_descriptor,
                              int pogo_bpp_input, int num_desc)
{
    int i, mismatch, zero, input_index, pixel_col;
    uint32_t input_pixel_val, output_pixel_val;  // holds pixel value whether 8bpp, 16bpp, 24bpp, or 32bpp
    
    mismatch=0;
    zero=0;

    debug_printk("input_data=0x%p, output_data=0x%p, total_num_pixels=%d, fmttype_in=%d, fmttype_out=%d\n",
                 input_data, output_data, total_num_pixels, pogo_fmt_type_in, pogo_fmt_type_out);
    for (i=0; i< total_num_pixels; i++)
    {
        // we want the input pixel to compare, but we padded the edge value pixels
        // so compare those values instead
        input_index = i;
        pixel_col = i % linewidth_pixels; // which pixel is this on the line
        if (pixel_col < leftclip)
        {
            input_index = (i - pixel_col) + leftclip;
            //printk(" < leftclip, input_index=%d, i=%d\n", input_index, i);
        }
        if ((pixel_col + rightclip) >= linewidth_pixels)
        {
            input_index = i - (pixel_col - (linewidth_pixels - rightclip - 1));
            //printk(" < rightclip, input_index=%d, i=%d, pixel_col=%d, rightclip=%d\n", input_index, i, pixel_col, rightclip);
        }


        input_pixel_val = get_input_pixel_val(pogo_bpp_input, pogo_fmt_type_in, pogo_fmt_type_out,
                                              input_data, color_swap, datasize_per_descriptor,
                                              pixels_per_descriptor, num_desc, input_index);
        
        output_pixel_val = get_output_pixel_val(pogo_fmt_type_out, output_data, color_swap, i);
        
        if (output_pixel_val != input_pixel_val)
        {
            if (output_pixel_val == 0)
                zero++;
            else
                mismatch++;
            if ((mismatch < 20) && (zero < 20))
            {   
                printk("Mismatch %d: od[0x%X]=0x%X, id[0x%X]=0x%X\n",
                       mismatch,i,output_pixel_val,input_index,input_pixel_val);
            }
        }
    }

    printk("\n\nThere were %d inappropriate 0 data, and %d mismatches\n",zero,mismatch);
    if ((zero == 0) && (mismatch == 0))
    {
        printk("The data compared - this test has SUCCESSFULLY COMPLETED!!\n");
        return 0;
    }
    else
    {
        printk("Failure!!!!!!!!!!\n");
        return -1;
    }
}

int check_output_decim_data(int div_x, int div_y, int lines_to_skip,
                            int input_linewidth_pixels,
                            uint32_t *input_data, uint32_t *output_data,
                            int output_num_pixels, int pogo_fmt_type_in,
                            int pogo_fmt_type_out, int color_swap,
                            int pixels_per_descriptor, int datasize_per_descriptor,
                            int pogo_bpp_input, int num_desc)
{
    int i, mismatch, zero;
    uint32_t input_pixel_val, output_pixel_val;  // holds pixel value whether 8bpp, 16bpp, 24bpp, or 32bpp
    int input_index, line_num, pixel_num, output_pixels_per_line;
    
    mismatch=0;
    zero=0;
    line_num = 0;
    pixel_num = 0;
    output_pixels_per_line = input_linewidth_pixels/div_x;
    
    debug_printk("input_data=0x%p, output_data=0x%p, output_num_pixels=%d, fmttype_in=%d, fmttype_out=%d\n",
                 input_data, output_data, output_num_pixels, pogo_fmt_type_in, pogo_fmt_type_out);
    for (i=0; i< output_num_pixels; i++)
    {
        // we want the input pixel to compare, but we have to skip decimated ones in the line (i*div_x) and
        // skip the entire lines that were decimated (i*div_y)
        input_index = line_num * input_linewidth_pixels; // jump down past the decimated lines
        input_index += pixel_num; // then skip to the next non-decimated pixel on this line

        input_pixel_val = get_input_pixel_val(pogo_bpp_input, pogo_fmt_type_in, pogo_fmt_type_out,
                                              input_data, color_swap, datasize_per_descriptor,
                                              pixels_per_descriptor, num_desc, input_index);
        
        output_pixel_val = get_output_pixel_val(pogo_fmt_type_out, output_data, color_swap, i);
        
        if (output_pixel_val != input_pixel_val)
        {
            if (output_pixel_val == 0)
                zero++;
            else
                mismatch++;
            if ((mismatch < 20) && (zero < 20))
            {   
                printk("Mismatch %d: od[0x%X]=0x%X, id[0x%X]=0x%X\n",
                       mismatch,i,output_pixel_val,input_index,input_pixel_val);
            }
        }
        pixel_num += div_x;   // increment to the next non-decimated pixel
        
        // is the next pixel going to cross the line?
        if ((i+1) % output_pixels_per_line == 0)
        {
            line_num += div_y; // increment to the next line we want to compare with
            pixel_num = 0;
        }
    }

    printk("\n\nThere were %d inappropriate 0 data, and %d mismatches\n",zero,mismatch);
    if ((zero == 0) && (mismatch == 0))
    {
        printk("The data compared - this test has SUCCESSFULLY COMPLETED!!\n");
        return 0;
    }
    else
    {
        printk("Failure!!!!!!!!!!\n");
        return -1;
    }
}

int check_output_data(uint32_t *input_data, uint32_t *output_data,
                      int total_num_pixels, int pogo_fmt_type_in,
                      int pogo_fmt_type_out, int color_swap,
                      int pixels_per_descriptor, int datasize_per_descriptor,
                      int pogo_bpp_input, int num_desc)
{
    int i, mismatch, zero;
    uint32_t input_pixel_val, output_pixel_val;  // holds pixel value whether 8bpp, 16bpp, 24bpp, or 32bpp
    
    mismatch=0;
    zero=0;

    debug_printk("input_data=0x%p, output_data=0x%p, total_num_pixels=%d, fmttype_in=%d, fmttype_out=%d\n",
                 input_data, output_data, total_num_pixels, pogo_fmt_type_in, pogo_fmt_type_out);
    for (i=0; i< total_num_pixels; i++)
    {
        input_pixel_val = get_input_pixel_val(pogo_bpp_input, pogo_fmt_type_in, pogo_fmt_type_out,
                                              input_data, color_swap, datasize_per_descriptor,
                                              pixels_per_descriptor, num_desc, i);
        output_pixel_val = get_output_pixel_val(pogo_fmt_type_out, output_data, color_swap, i);

        if (output_pixel_val != input_pixel_val)
        {
            if (output_pixel_val == 0)
                zero++;
            else
                mismatch++;
            if ((mismatch < 20) && (zero < 20))
            {   
                printk("Mismatch %d: od[0x%X]=0x%X, id[0x%X]=0x%X\n",
                       mismatch,i,output_pixel_val,i,input_pixel_val);
            }
        }
    }

    printk("\n\nThere were %d inappropriate 0 data, and %d mismatches\n",zero,mismatch);
    if ((zero == 0) && (mismatch == 0))
    {
        printk("The data compared - this test has SUCCESSFULLY COMPLETED!!\n");
        return 0;
    }
    else
    {
        printk("Failure!!!!!!!!!!\n");
        return -1;
    }
}

static char *pogo_string(uint32_t pogo_type)
{
    switch (pogo_type)
    {
    case POGO_FMT_MONO:
        return "Mono";
        break;
    case POGO_FMT_RGB:
        return "RGB";
        break;
    case POGO_FMT_RGBX:
        return "RGBX";
        break;
    case POGO_FMT_XRGB:
        return "XRGB";
        break;
    case POGO_FMT_PLANAR:
        return "COLOR Planar";
        break;
    default:
        return "ERROR: BAD POGO TYPE";
    }
}

int setup_pogo_output(struct pie_handle_t *pie_handle, uint8_t pogo_fmt_type_out,
                      uint8_t pogo_bpp, uint8_t color_swap, uint32_t linewidth,
                      uint8_t  eatPIEData)
{
    uint8_t handshake, ownership, own_polarity, enable;
    int ret;

    // setup the configuration values for the depogoizer
    ret = pie_depogoizer_set_config(pie_handle, pogo_fmt_type_out, pogo_bpp,
                                    color_swap, PIE_DEPOGO_MONOCHAN_DEF);
    if (ret == -1)
        return ret;
    // tell the system if we want to dump the pie data, or send to the output dma normally
    pie_odma_set_dump_data(pie_handle, eatPIEData);

    handshake = 0;
    ownership = 0;
    own_polarity = 0;
    enable = DMA_ENABLE;
    pie_output_dma_set_parms(pie_handle, handshake, ownership, own_polarity, enable, linewidth);
    return ret;
}

// note that the input_linewidth is in bytes
void setup_pogo_input(struct pie_handle_t *pie_handle, uint8_t pogo_fmt_type_in,
                      int num_idma_channels, uint32_t pogo_bpp_input,  uint8_t color_swap,
                      uint32_t input_linewidth, uint8_t rowheight)
{
    uint8_t handshake, ownership, own_polarity, enable;
    int i;
    
    // setup the configuration values for the pogoizer
    pie_pogoizer_set_config(pie_handle, pogo_fmt_type_in, pogo_bpp_input, color_swap,
                            PIE_NO_POGO_XSCALE, PIE_NO_POGO_YSCALE);

    handshake = 0;
    ownership = 0;
    own_polarity = 0;
    enable = DMA_ENABLE;

    for (i=0; i<num_idma_channels; i++)
    {
        // setup the input dma for each input channel
        pie_input_dma_set_parms(pie_handle, handshake, ownership, own_polarity, enable,
                                input_linewidth, rowheight, i);
    }
}

struct pie_handle_t *setup_basic_test(int pogo_fmt_type_in, int pogo_fmt_type_out,
                                      int pogo_bpp_input, int num_desc,
                                      int input_num_pixels_per_desc,
                                      int output_num_pixels_per_desc,
                                      int *idma_Bpp, int *odma_Bpp, int *num_idma_channels,
                                      int *datasize_per_descriptor)
{
    struct pie_handle_t *pie_handle;
    int i;

    // initialize
    piev_object.in_bufsize = 0;  
    piev_object.ot_in_bufsize = 0;
    piev_object.out_bufsize = 0;
    piev_object.ot_out_bufsize = 0;
    piev_object.device_odma_descriptor = NULL;
    piev_object.device_ot_odma_descriptor = NULL;
    piev_object.device_idma_descriptor[0] = NULL;
    piev_object.device_ot_idma_descriptor = NULL;
    
    pie_do_reset();
    msleep(100);  // give the asic time for a nice reset
    pie_do_clear_all_irqs();
    // create a pie instance
    pie_handle = pie_create_new_default_handle();

    printk("Going to do a revision check on the pie handle now\n");
    if (pie_do_revcheck(pie_handle) < 0)
    {
        printk("FAILED the revision check!!\n");
        return NULL;
    }
    
    pie_bypass_all_pie_subblocks(pie_handle);

    pie_register_idma_callback(pie_handle, pie_bypass_idma_callback, NULL);
    pie_register_odma_callback(pie_handle, pie_bypass_odma_callback, NULL);

    pie_pogo_fmt_type_parms(pogo_fmt_type_in, pogo_fmt_type_out, pogo_bpp_input,
                            idma_Bpp, odma_Bpp, num_idma_channels);

    // enable odma interrupts
    pie_enable_pogo_odma_irqs(pie_handle, NULL, true); // enable all odma interrupts
    // and enable each idma's interrupts
    for (i=0; i < *num_idma_channels; i++)
        pie_enable_pogo_idma_irqs(pie_handle, NULL, i, true);
    // enable all pie common interrupts
    pie_enable_common_irqs(pie_handle, NULL, true);

    // create data buffers and descriptors for output from the pie block
    if (setup_odma(num_desc, output_num_pixels_per_desc, *odma_Bpp, *num_idma_channels) != 0)
    {
        printk("ERROR setting up PIE odma, EXITING TEST\n");
        return NULL;
    }
    
    // create data buffers and descriptors for input to the pie block
    *datasize_per_descriptor = setup_idma(num_desc, input_num_pixels_per_desc, *idma_Bpp,
                                          *num_idma_channels);

    if (*datasize_per_descriptor < 0)
    {
        printk("ERROR in setup_basic_test, datasize_per_descriptor < 0, EXITING TEST\n");
        return NULL;
    }
    return pie_handle;
}

static int clippad_test(int leftclip, int rightclip, int pogo_fmt_type, int num_desc,
                        int rowheight, int linewidth_pixels)
{
    int odma_Bpp, idma_Bpp, num_idma_channels;
    int pogo_bpp_input, i, retval;
    int num_pixels_per_desc, num_pixels_total;
    int input_linewidth, output_linewidth, datasize_per_descriptor, color_swap;
    struct pie_handle_t *pie_handle;

    pogo_bpp_input = POGO_8BPP;
    color_swap = 0;

    num_pixels_per_desc = rowheight * linewidth_pixels;
    num_pixels_total = num_pixels_per_desc * num_desc;

    pie_handle = setup_basic_test(pogo_fmt_type, pogo_fmt_type, pogo_bpp_input,
                                  num_desc, num_pixels_per_desc, num_pixels_per_desc,
                                  &idma_Bpp, &odma_Bpp, &num_idma_channels,
                                  &datasize_per_descriptor);

    if (pie_handle == NULL)
        return -1;

    // input and output linewidth can differ
    // must be in bytes, not pixels
    output_linewidth = linewidth_pixels * odma_Bpp;
    input_linewidth = linewidth_pixels * idma_Bpp;

    if (setup_pogo_output(pie_handle, pogo_fmt_type, POGO_8BPP, color_swap,
                          output_linewidth, false) != 0)
        return -1;
    
    setup_pogo_input(pie_handle, pogo_fmt_type, num_idma_channels, pogo_bpp_input,
                     color_swap, input_linewidth, rowheight);

    // unbypass (enable) clippad block
    pie_clippad_set_bypass(pie_handle, false);
    pie_clippad_set_pad_left(pie_handle, true, leftclip);
    pie_clippad_set_pad_right(pie_handle, true, rightclip);
    pie_clippad_set_trunc_left(pie_handle, leftclip);
    pie_clippad_set_trunc_right(pie_handle, linewidth_pixels - rightclip - 1);
    
    // uncomment to output the registers we will be writing in pie_do_configure
    // printk("Dumping pie handle before calling configure\n");
    // pie_dump_handle_regs(pie_handle, 3);
    
    pie_do_configure(pie_handle); // pie, write your registers
    
    // ready to start the test! First start the odma
    pie_start_pogo_output_dma(piev_object.device_odma_descriptor_phys);

    // uncomment to output the current values of the asic registers read back live
    //dump_handle_regs(num_idma_channels);
    //dump_desc(num_idma_channels, NUM_DESC);

    // now the idmas (this starts data flowing)
    for (i=0; i<num_idma_channels; i++)
        pie_start_pogo_input_dma(piev_object.device_idma_descriptor_phys[i], i);

    //printk("Dump regs after starting idma\n");
    //dump_handle_regs(num_idma_channels);
    //dump_desc(num_idma_channels, NUM_DESC);

    wait_for_completion_interruptible(&dma_bypass_test_complete);
    printk("odma completed!\n");

    printk("****************************************************\n");
    printk("End of clippad Test leftclippixels=%d, rightclippixels=%d\n", leftclip, rightclip);
    printk("%s Input %d bpp, %s color swap, %s Output\n",pogo_string(pogo_fmt_type),
           (pogo_bpp_input == POGO_16BPP)?16:8,color_swap?"":"no", pogo_string(pogo_fmt_type));
    printk("num_desc=%d, ppline=%d, rows=%d\n\n\n",num_desc, linewidth_pixels, rowheight);

    retval = check_output_clippad_data(leftclip, rightclip, linewidth_pixels,
                                       piev_object.input_data, piev_object.output_data,
                                       num_pixels_total, pogo_fmt_type,
                                       pogo_fmt_type,
                                       color_swap, num_pixels_per_desc,
                                       datasize_per_descriptor, pogo_bpp_input, num_desc);
    printk("****************************************************\n\n\n");
    // free the memory buffers, then delete the descriptors that point to them
    cleanup_input_buffers();
    cleanup_output_buffers();
    // cleanup the IDMA descriptors
    cleanup_IDMA_descriptors(num_idma_channels);
        // cleanup the ODMA descriptors
    cleanup_ODMA_descriptors();

    // release memory for the pie_handle
    pie_do_free_handle(pie_handle);
    return retval;
}


// NOTE that this test only passes for 1:1, 2:1, 1:2, 3:1, 2:2, and 3:2 .  This framework never
// was designed for idma and odma to have different sizes.  So someone ought to make
// it a little less fragile someday....

// Those few tests pass with NUM_DESC 3, LINEWIDTH_PIXELS 1032, and ROWHEIGHT 2, 4 or 8
// (but 2 takes less DMA RAM to let the "t" test pass)
static int pie_decimate_test(int div_x, int div_y, int lines_to_skip, int num_desc,
                             int input_rowheight, int input_linewidth_pixels)
{
    // almost identical to a bypass test
    int odma_Bpp, idma_Bpp, num_idma_channels;
    int pogo_fmt_type_in, pogo_fmt_type_out, pogo_bpp_input;
    int input_num_pixels_per_desc, input_num_pixels_total;
    int output_num_pixels_per_desc, output_num_pixels_total;
    int retval, i;
    int input_linewidth, output_linewidth;
    int datasize_per_descriptor;
    int color_swap;
    struct pie_handle_t *pie_handle;

    // pick some standard values for fmt parms
    pogo_fmt_type_in = POGO_FMT_MONO;
    pogo_fmt_type_out = POGO_FMT_MONO;
    pogo_bpp_input = POGO_8BPP;
    color_swap = 0;
    
    // calculate how many pixels are needed per descriptor
    input_num_pixels_per_desc = input_rowheight * input_linewidth_pixels;
    output_num_pixels_per_desc = (input_rowheight/div_y) * (input_linewidth_pixels/div_x);
    input_num_pixels_total = input_num_pixels_per_desc * num_desc;
    output_num_pixels_total = output_num_pixels_per_desc * num_desc;

    
    // create data buffers and descriptors for output from the pie block
    if (output_num_pixels_per_desc == 0)
    {
        printk("Test defect!! output_num_pixels_per_desc=0, stopping test.  See comment at top of %s\n",
               __func__);
        printk("div_x=%d, div_y=%d, lines_to_skip=%d\n", div_x, div_y, lines_to_skip);
        return -1;
    }

    pie_handle = setup_basic_test(pogo_fmt_type_in, pogo_fmt_type_out,
                                  pogo_bpp_input, num_desc, input_num_pixels_per_desc,
                                  output_num_pixels_per_desc, &idma_Bpp, &odma_Bpp,
                                  &num_idma_channels, &datasize_per_descriptor);

    if (pie_handle == NULL)
        return -1;
    // input and output linewidth can differ
    // must be in bytes, not pixels
    output_linewidth = (input_linewidth_pixels/div_x) * odma_Bpp;
    input_linewidth = input_linewidth_pixels * idma_Bpp;
    // NOTE that pie pogo output is only allowed to be 8bpp - 16bpp is input only
    
    if (setup_pogo_output(pie_handle, pogo_fmt_type_out, POGO_8BPP, color_swap,
                          output_linewidth, false) != 0)
        return -1;
    
    setup_pogo_input(pie_handle, pogo_fmt_type_in, num_idma_channels, pogo_bpp_input,
                     color_swap, input_linewidth, input_rowheight);

    // unbypass decim to test the decimator  ****
    pie_decim_set_bypass(pie_handle, false); //****
    pie_decim_set_div(pie_handle, div_x-1, div_y-1);  // reg is -1
    pie_decim_set_start(pie_handle, lines_to_skip);

    // uncomment to output the registers we will be writing in pie_do_configure
    // printk("Dumping pie handle before calling configure\n");
    // pie_dump_handle_regs(pie_handle, 3);
    
    pie_do_configure(pie_handle); // pie, write your registers

    // ready to start the test! First start the odma
    pie_start_pogo_output_dma(piev_object.device_odma_descriptor_phys);

    // uncomment to output the current values of the asic registers read back live
    //dump_handle_regs(num_idma_channels);
    //dump_desc(num_idma_channels, NUM_DESC);


    // now the idmas (this starts data flowing)
    for (i=0; i<num_idma_channels; i++)
        pie_start_pogo_input_dma(piev_object.device_idma_descriptor_phys[i], i);

    //printk("Dump regs after starting idma\n");
    //dump_handle_regs(num_idma_channels);
    //dump_desc(num_idma_channels, NUM_DESC);


    wait_for_completion_interruptible(&dma_bypass_test_complete);
    printk("odma completed!\n");
    // don't compare the output data if we didn't run anything through
    retval = 0;


    printk("****************************************************\n");
    printk("End of decim Test div_x=%d, div_y=%d, lines_to_skip=%d\n", div_x, div_y, lines_to_skip);
    printk("%s Input %d bpp, %s color swap, %s Output\n",pogo_string(pogo_fmt_type_in),
           (pogo_bpp_input == POGO_16BPP)?16:8,color_swap?"":"no", pogo_string(pogo_fmt_type_out));
    printk("num_desc=%d, ppline=%d, rows=%d\n\n\n",num_desc, input_linewidth_pixels, input_rowheight);
           
    retval = check_output_decim_data(div_x, div_y, lines_to_skip,
                                     input_linewidth_pixels,
                                     piev_object.input_data,
                                     piev_object.output_data,
                                     output_num_pixels_total, pogo_fmt_type_in,
                                     pogo_fmt_type_out,
                                     color_swap, input_num_pixels_per_desc,
                                     datasize_per_descriptor, pogo_bpp_input, num_desc);

    printk("****************************************************\n\n\n");
    
    // free the memory buffers, then delete the descriptors that point to them
    cleanup_input_buffers();
    cleanup_output_buffers();
    // cleanup the IDMA descriptors
    cleanup_IDMA_descriptors(num_idma_channels);
        // cleanup the ODMA descriptors
    cleanup_ODMA_descriptors();

    // release memory for the pie_handle
    pie_do_free_handle(pie_handle);
    return retval;
    
}

static int pie_bde_lut_test(int pogo_fmt_type, int num_desc,
                            int rowheight, int linewidth_pixels, int testnum)
{
    int odma_Bpp, idma_Bpp, num_pixels_per_desc, num_pixels_total, num_idma_channels;
    int pogo_bpp_io, color_swap, datasize_per_descriptor;
    int retval, input_linewidth, output_linewidth, i;
    struct pie_handle_t *pie_handle;
    
    pogo_bpp_io = POGO_8BPP;
    color_swap = 0;
    // calculate how many pixels are needed per descriptor
    num_pixels_per_desc = rowheight * linewidth_pixels;
    num_pixels_total = num_pixels_per_desc * num_desc;

    pie_handle = setup_basic_test(pogo_fmt_type, pogo_fmt_type, pogo_bpp_io,
                                  num_desc, num_pixels_per_desc,
                                  num_pixels_per_desc,                                  
                                  &idma_Bpp, &odma_Bpp, &num_idma_channels,
                                  &datasize_per_descriptor);
    if (pie_handle == NULL)
        return -1;

    // must be in bytes, not pixels
    output_linewidth = linewidth_pixels * odma_Bpp;
    input_linewidth = linewidth_pixels * idma_Bpp;

    if (setup_pogo_output(pie_handle, pogo_fmt_type, pogo_bpp_io, color_swap,
                          output_linewidth, false) != 0)
        return -1;
    
    setup_pogo_input(pie_handle, pogo_fmt_type, num_idma_channels, pogo_bpp_io,
                     color_swap, input_linewidth, rowheight);

    // uncomment to output the registers we will be writing in pie_do_configure
    // printk("Dumping pie handle before calling configure\n");
    // pie_dump_handle_regs(pie_handle, 3);

    // write the bde luts into the pie_handle
    fill_bde_lut_with_pattern(pie_handle, testnum, 0);
    if (pogo_fmt_type != POGO_FMT_MONO)
    {
        fill_bde_lut_with_pattern(pie_handle, testnum, 1);
        fill_bde_lut_with_pattern(pie_handle, testnum, 2);
    }
    // take requested channels of BDE out of bypass
    pie_bde_set_bypass(pie_handle, false);
    
    pie_do_configure(pie_handle); // pie, write your registers

    // ready to start the test! First start the odma
    pie_start_pogo_output_dma(piev_object.device_odma_descriptor_phys);

    // uncomment to output the current values of the asic registers read back live
    //dump_handle_regs(num_idma_channels);
    //dump_desc(num_idma_channels, NUM_DESC);

    // now the idmas (this starts data flowing)
    for (i=0; i<num_idma_channels; i++)
        pie_start_pogo_input_dma(piev_object.device_idma_descriptor_phys[i], i);

    //printk("Dump regs, desc, and lut after starting idma\n");
    //dump_handle_regs(num_idma_channels);
    //dump_desc(num_idma_channels, NUM_DESC);
    //pie_bde_dump_handle_lut(pie_handle, 0);

    wait_for_completion_interruptible(&dma_bypass_test_complete);
    printk("odma completed!\n");

    retval = check_output_bde_lut_data(testnum, piev_object.input_data,
                                       piev_object.output_data,
                                       num_pixels_total, pogo_fmt_type,
                                       pogo_fmt_type,
                                       color_swap, num_pixels_per_desc,
                                       datasize_per_descriptor, pogo_bpp_io, num_desc);
    
    printk("****************************************************\n");
    printk("Completed BDE Test %s Input %d bpp, %s color swap, %s Output\n",pogo_string(pogo_fmt_type),
           (pogo_bpp_io == POGO_16BPP)?16:8,color_swap?"":"no", pogo_string(pogo_fmt_type));
    printk("num_desc=%d, ppline=%d, rows=%d\n\n\n",num_desc, linewidth_pixels, rowheight);
           
    // free the memory buffers, then delete the descriptors that point to them
    cleanup_input_buffers();
    cleanup_output_buffers();
    // cleanup the IDMA descriptors
    cleanup_IDMA_descriptors(num_idma_channels);
        // cleanup the ODMA descriptors
    cleanup_ODMA_descriptors();

    // release memory for the pie_handle
    pie_do_free_handle(pie_handle);
    return retval;
}

// note, linewidth_pixels is in pixels
// For a bypass test, there are the same rowheight in the input and output dma, and same
// linewidth size *in pixels*
// pogo_bpp_val is only POGO_8BPP for all types except planar input which could either be 16 bit or 8 bit input data
static int do_bypass_test(int pogo_fmt_type_in, int pogo_fmt_type_out,
                          int pogo_bpp_input, int color_swap,
                          int num_desc, int linewidth_pixels,
                          int rowheight, char eatPIEData)
{
    int odma_Bpp, idma_Bpp, num_pixels_per_desc, num_pixels_total, num_idma_channels;
    int retval, i;
    int input_linewidth, output_linewidth;
    int datasize_per_descriptor;
    struct pie_handle_t *pie_handle;

    // calculate how many pixels are needed per descriptor
    num_pixels_per_desc = rowheight * linewidth_pixels;
    num_pixels_total = num_pixels_per_desc * num_desc;

    pie_handle = setup_basic_test(pogo_fmt_type_in, pogo_fmt_type_out, pogo_bpp_input,
                                  num_desc, num_pixels_per_desc,
                                  num_pixels_per_desc,                                  
                                  &idma_Bpp, &odma_Bpp, &num_idma_channels,
                                  &datasize_per_descriptor);

    if (pie_handle == NULL)
        return -1;
    // input and output linewidth can differ for planar input
    // must be in bytes, not pixels
    output_linewidth = linewidth_pixels * odma_Bpp;
    input_linewidth = linewidth_pixels * idma_Bpp;
    // NOTE that pie pogo output is only allowed to be 8bpp - 16bpp is input only
    
    if (setup_pogo_output(pie_handle, pogo_fmt_type_out, POGO_8BPP, color_swap,
                          output_linewidth, eatPIEData) != 0)
        return -1;
    
    setup_pogo_input(pie_handle, pogo_fmt_type_in, num_idma_channels, pogo_bpp_input,
                     color_swap, input_linewidth, rowheight);

    // uncomment to output the registers we will be writing in pie_do_configure
    // printk("Dumping pie handle before calling configure\n");
    // pie_dump_handle_regs(pie_handle, 3);
    
    pie_do_configure(pie_handle); // pie, write your registers

    // ready to start the test! First start the odma
    pie_start_pogo_output_dma(piev_object.device_odma_descriptor_phys);

    // uncomment to output the current values of the asic registers read back live
    //dump_handle_regs(num_idma_channels);
    //dump_desc(num_idma_channels, NUM_DESC);


    // now the idmas (this starts data flowing)
    for (i=0; i<num_idma_channels; i++)
        pie_start_pogo_input_dma(piev_object.device_idma_descriptor_phys[i], i);

    //printk("Dump regs after starting idma\n");
    //dump_handle_regs(num_idma_channels);
    //dump_desc(num_idma_channels, NUM_DESC);

    wait_for_completion_interruptible(&dma_bypass_test_complete);
    printk("odma completed!\n");
    // don't compare the output data if we didn't run anything through
    retval = 0;
    if (eatPIEData == 0)
    {
        retval = check_output_data(piev_object.input_data,
                                   piev_object.output_data,
                                   num_pixels_total, pogo_fmt_type_in,
                                   pogo_fmt_type_out,
                                   color_swap, num_pixels_per_desc,
                                   datasize_per_descriptor, pogo_bpp_input, num_desc);
    }
    else
        printk("Not comparing PIE output data when in eat PIE data mode\n");

    printk("****************************************************\n");
    printk("Completed Test %s Input %d bpp, %s color swap, %s Output\n",pogo_string(pogo_fmt_type_in),
           (pogo_bpp_input == POGO_16BPP)?16:8,color_swap?"":"no", pogo_string(pogo_fmt_type_out));
    printk("num_desc=%d, ppline=%d, rows=%d\n\n\n",num_desc, linewidth_pixels, rowheight);
           
    // free the memory buffers, then delete the descriptors that point to them
    cleanup_input_buffers();
    cleanup_output_buffers();
    // cleanup the IDMA descriptors
    cleanup_IDMA_descriptors(num_idma_channels);
        // cleanup the ODMA descriptors
    cleanup_ODMA_descriptors();

    // release memory for the pie_handle
    pie_do_free_handle(pie_handle);
    return retval;
}

int pie_otodma_simple_test(void)
{
    int odma_Bpp, idma_Bpp, num_pixels_per_data_desc, num_pixels_total, num_idma_channels;
    int retval, i;
    int input_linewidth, output_linewidth;
    int datasize_per_data_descriptor;
    struct pie_handle_t *pie_handle;
    int num_pixels_per_ot_desc;

    int linewidth_pixels = 2048;   // math is easier with big powers of 2
    int num_ot_desc = NUM_DESC; // number of ot odma desc
    int rowheight = ROWHEIGHT;
    int num_data_desc = NUM_DESC;


    pie_do_reset();
    msleep(100);  // give the asic time for a nice reset
    pie_do_clear_all_irqs();
    
    // calculate how many pixels are needed per descriptor
    num_pixels_per_data_desc = rowheight * linewidth_pixels;
    num_pixels_total = num_pixels_per_data_desc * num_data_desc;

    pie_handle = setup_basic_test(POGO_FMT_MONO, POGO_FMT_MONO, POGO_8BPP,
                                  num_data_desc, num_pixels_per_data_desc,
                                  num_pixels_per_data_desc,
                                  &idma_Bpp, &odma_Bpp, &num_idma_channels,
                                  &datasize_per_data_descriptor);

    if (pie_handle == NULL)
        return -1;
    // input and output linewidth can differ for planar input
    // must be in bytes, not pixels
    output_linewidth = linewidth_pixels * odma_Bpp;
    input_linewidth = linewidth_pixels * idma_Bpp;
    // NOTE that pie pogo output is only allowed to be 8bpp - 16bpp is input only
    
    if (setup_pogo_output(pie_handle, POGO_FMT_MONO, POGO_8BPP, POGO_NOCOLORSWAP,
                          output_linewidth, false) != 0)
        return -1;
    
    setup_pogo_input(pie_handle, POGO_FMT_MONO, num_idma_channels, POGO_8BPP,
                     POGO_NOCOLORSWAP, input_linewidth, rowheight);

    num_pixels_per_ot_desc = num_pixels_total / num_ot_desc;
    // set up the OT odma memory and descriptors
    setup_ot_odma(pie_handle, num_ot_desc, num_pixels_per_ot_desc, linewidth_pixels, 4, POGO_FMT_1CHAN_PK);  // 4 bits OT data per pixel

    pie_register_ot_odma_callback(pie_handle, pie_ot_odma_callback, NULL);

    // uncomment to output the registers we will be writing in pie_do_configure
    // printk("Dumping pie handle before calling configure\n");
    // pie_dump_handle_regs(pie_handle, 3);

    pie_common_set_ot_bypass(pie_handle, false);  // tell pie common we are using OT depogoizer/odma

    pie_enable_pogo_otodma_irqs(pie_handle, NULL, true); // enable all ot odma irqs
    
    pie_do_configure(pie_handle); // pie, write your registers

    // ready to start the test! First start the odma for the data path (not the OT DMA)
    pie_start_pogo_output_dma(piev_object.device_odma_descriptor_phys);

    // uncomment to output the current values of the asic registers read back live
    //dump_handle_regs(num_idma_channels);
    //dump_desc(num_idma_channels, NUM_DESC);

    // start the OT odma
    pie_start_pogo_ot_odma(piev_object.device_ot_odma_descriptor_phys);

    // now the idmas (this starts data flowing) - this is for the data path, not the OT path
    for (i=0; i<num_idma_channels; i++)
        pie_start_pogo_input_dma(piev_object.device_idma_descriptor_phys[i], i);

    //printk("Dump regs after starting idma\n");
    //dump_handle_regs(num_idma_channels);
    //dump_desc(num_idma_channels, NUM_DESC);

    wait_for_completion_interruptible(&dma_ot_odma_test_complete);

    retval = 0;
    msleep(100); // let serial output messages get out of the way of the failure message
    //  verify the data in the ot output is not FFs, since that was filled by default - should be 0s
    for (i=0;i<num_pixels_total/2;i++) // ot_output_data contains 4 bit nibbles - so divide by 2
    {
        if (get_8_val(i, piev_object.ot_output_data) != 0)
        {
            printk("FAILURE! OT output data[%d] was 0x%X\n", i, get_8_val(i, piev_object.ot_output_data));
            retval = -1;
            return retval;   // shortcut - only output 1 error byte
        }
    }
    if (retval == 0)
        printk("OT output was 0 - OT ODMA simple test PASSED\n");

    // free the memory buffers, then delete the descriptors that point to them
    cleanup_input_buffers();
    cleanup_output_buffers();
    // cleanup the IDMA descriptors
    cleanup_IDMA_descriptors(num_idma_channels);
        // cleanup the ODMA descriptors
    cleanup_ODMA_descriptors();

    // release memory for the pie_handle
    pie_do_free_handle(pie_handle);
    
    return retval;
}

int pie_ot_test(void)
{
    int odma_Bpp, idma_Bpp, num_pixels_per_data_desc, num_pixels_total, num_idma_channels;
    int retval, i;
    int input_linewidth, output_linewidth;
    int datasize_per_data_descriptor;
    struct pie_handle_t *pie_handle;
    int num_pixels_per_ot_desc;

    int linewidth_pixels = 2048;   // math is easier with big powers of 2
    int num_ot_desc = NUM_DESC; // number of ot odma desc
    int rowheight = ROWHEIGHT;
    int num_data_desc = NUM_DESC;

    pie_do_reset();
    msleep(100);  // give the asic time for a nice reset
    pie_do_clear_all_irqs();
    
    // calculate how many pixels are needed per descriptor
    num_pixels_per_data_desc = rowheight * linewidth_pixels;
    num_pixels_total = num_pixels_per_data_desc * num_data_desc;

    pie_handle = setup_basic_test(POGO_FMT_MONO, POGO_FMT_MONO, POGO_8BPP,
                                  num_data_desc, num_pixels_per_data_desc,
                                  num_pixels_per_data_desc,
                                  &idma_Bpp, &odma_Bpp, &num_idma_channels,
                                  &datasize_per_data_descriptor);

    if (pie_handle == NULL)
        return -1;
    // input and output linewidth can differ for planar input
    // must be in bytes, not pixels
    output_linewidth = linewidth_pixels * odma_Bpp;
    input_linewidth = linewidth_pixels * idma_Bpp;
    // NOTE that pie pogo output is only allowed to be 8bpp - 16bpp is input only
    
    if (setup_pogo_output(pie_handle, POGO_FMT_MONO, POGO_8BPP, POGO_NOCOLORSWAP,
                          output_linewidth, false) != 0)
        return -1;
    
    setup_pogo_input(pie_handle, POGO_FMT_MONO, num_idma_channels, POGO_8BPP,
                     POGO_NOCOLORSWAP, input_linewidth, rowheight);

    num_pixels_per_ot_desc = num_pixels_total / num_ot_desc;
    // set up the OT odma memory and descriptors - 4 bits OT data per pixel
    setup_ot_odma(pie_handle, num_ot_desc, num_pixels_per_ot_desc, linewidth_pixels, 4, POGO_FMT_1CHAN_PK);

    // set up the OT IDMA memory and descriptors - 4 bits OT data per pixel
    setup_ot_idma(pie_handle, num_ot_desc, num_pixels_per_ot_desc, linewidth_pixels, 4, POGO_FMT_1CHAN_PK);

    pie_register_ot_odma_callback(pie_handle, pie_ot_odma_callback, NULL);
    pie_register_ot_idma_callback(pie_handle, pie_ot_idma_callback, NULL);
    pie_register_otmarb_callback(pie_handle, pie_ot_marb_callback, NULL);    

    // for debug, output the registers we will be writing in pie_do_configure
#ifdef DURING_DEBUG    
    printk("Dumping pie handle before calling configure\n");
    pie_dump_handle_regs(pie_handle, 3);
#endif    

    pie_common_set_ot_bypass(pie_handle, false);  // tell pie common we are using OT depogoizer/odma
    pie_otmarb_set_bypass(pie_handle, false); // tell otmarb that we need it to run

    pie_enable_pogo_otodma_irqs(pie_handle, NULL, true); // enable all ot odma irqs
    pie_enable_pogo_otidma_irqs(pie_handle, NULL, true); // enable all ot odma irqs
    pie_enable_otmarb_irqs(pie_handle, NULL, true); // enable otmarb irqs

    pie_do_configure(pie_handle); // pie, write your registers

#ifdef DURING_DEBUG    
    pie_dump();  // let's see what we just configured
#endif    
    
    // ready to start the test! First start the odma for the data path (not the OT DMA)
    pie_start_pogo_output_dma(piev_object.device_odma_descriptor_phys);

    // uncomment to output the current values of the asic registers read back live
    //dump_handle_regs(num_idma_channels);
    //dump_desc(num_idma_channels, NUM_DESC);

    // start the OT odma
    pie_start_pogo_ot_odma(piev_object.device_ot_odma_descriptor_phys);

    // now the idmas (this starts data flowing) - this is for the data path, not the OT path
    for (i=0; i<num_idma_channels; i++)
        pie_start_pogo_input_dma(piev_object.device_idma_descriptor_phys[i], i);

    // and finally, start the ot idma
    pie_start_pogo_ot_idma(piev_object.device_ot_idma_descriptor_phys);

#ifdef DURING_DEBUG
    printk("Dump regs after starting idma\n");
    dump_handle_regs(num_idma_channels);
    dump_desc(num_idma_channels, NUM_DESC);
#endif
    
    wait_for_completion_interruptible(&dma_ot_odma_test_complete);

    retval = 0;
    msleep(100); // let serial output messages get out of the way of the failure message
    //  verify the data in the ot output is not FFs, since that was filled by default - should be 0s
    printk("ot test comparing data. inbuf=0x%p, outbuf=0x%p \n",
           piev_object.ot_input_data, piev_object.ot_output_data);
    for (i=0;i<num_pixels_total/2;i++) // ot_output_data contains 4 bit nibbles - so divide by 2
    {
        if (get_8_val(i, piev_object.ot_input_data) != get_8_val(i, piev_object.ot_output_data))
        {
            printk("FAILURE! OT input data[%d] was 0x%X, output data[%d] was 0x%X\n",
                   i, get_8_val(i, piev_object.ot_input_data), i, get_8_val(i, piev_object.ot_output_data));
            retval = -1;
            return retval;   // shortcut - only output 1 error byte
        }
    }
    if (retval == 0)
        printk("PIE OT input data matched the output data - OT idma/odma/otmarb test PASSED\n");

    // free the memory buffers, then delete the descriptors that point to them
    cleanup_input_buffers();
    cleanup_output_buffers();
    // cleanup the IDMA descriptors
    cleanup_IDMA_descriptors(num_idma_channels);
        // cleanup the ODMA descriptors
    cleanup_ODMA_descriptors();

    // release memory for the pie_handle
    pie_do_free_handle(pie_handle);

    return retval;
}

void do_memory_dump(uint32_t uval32, int num_pixels)
{
    uint32_t *data32_buf;
    uint16_t *data16_buf;    
    uint8_t *data8_buf;
    char *output_string;
    int i, j, input_value_integer;
    
    
    if ((uval32 == 8) || (uval32 == 16) || (uval32 == 32))
        printk("\ndumping %d pixels at %d bits per pixel\n", num_pixels,uval32);
    else
    {
        printk("error - %d is unsupported size of dump - \"m <bitsperpixel> <numPixels>\"\n\n", uval32);
        return;
    }
    
    // dump the memory for data in and data out
    printk("input data");
    data32_buf = piev_object.input_data;
    if (data32_buf == NULL)
    {
        printk("ERROR, memory buffer is NULL - has the test run yet??\n");
        return;
    }
    data8_buf = (uint8_t *) data32_buf;
    data16_buf = (uint16_t *) data32_buf;
    
    output_string = "id";

    for (j=0;j<2;j++)
    {
        // 0 is when printing input buf, 1 is output
        for (i=0;(i<num_pixels);i++)
        {
            if (i%4 == 0)
                printk("\n");
            if (uval32 == 8)
            {
                input_value_integer = data8_buf[i];
            }
            else if (uval32 == 16)
            {
                input_value_integer = data16_buf[i];
            }
            else
            {
                input_value_integer = data32_buf[i];
            }
            
            printk("%s[0x%X]=0x%X ",output_string, i, input_value_integer);
        }
        printk("\noutput data");
        output_string = "od";
        data32_buf =  piev_object.output_data;
        data8_buf = (uint8_t *) data32_buf;
        data16_buf = (uint16_t *) data32_buf;
    }
    printk("\n");
}

void pie_dsmf_write_luts(void)
{
    uint32_t *dsmf_lut; // all luts use this
    int8_t *dsmf_ai_lut_y; // ai needs 2 values in lut
    int8_t *dsmf_ai_lut_crcb; // ai needs 2 values in lut
    int i;
    struct pie_handle_t *pie_handle;

    // biggest lut - all others fit in here    
    dsmf_lut = kmalloc(PIE_DSMF_SEG_PROJ_LUT_ENTRIES * sizeof(uint32_t), GFP_KERNEL);  
    dsmf_ai_lut_y = kmalloc(PIE_DSMF_AI_LUT_ENTRIES * sizeof(int8_t), GFP_KERNEL);
    dsmf_ai_lut_crcb = kmalloc(PIE_DSMF_AI_LUT_ENTRIES * sizeof(int8_t), GFP_KERNEL);
    pie_do_reset();
    msleep(100);  // give the asic time for a nice reset
    pie_do_clear_all_irqs();
    // create a pie instance
    pie_handle = pie_create_new_default_handle();

    printk("Now writing patterns to the luts\n");

    for (i=0;i<PIE_DSMF_CTL_LUT_ENTRIES;i++)
    {
        dsmf_lut[i] = i; 
    }

    pie_dsmf_set_filtctl_lut_raw(pie_handle, dsmf_lut);
    
    for (i=0;i<PIE_DSMF_SEG_PROJ_LUT_ENTRIES;i++) 
    {
        dsmf_lut[i] = 0xFF - i;
    }

    pie_dsmf_set_seg_proj_lut(pie_handle, dsmf_lut);
    
    for (i=0;i<PIE_DSMF_AI_LUT_ENTRIES;i++) 
    {
        dsmf_ai_lut_y[i] = i;  // The Y activity index value
        dsmf_ai_lut_crcb[i] = 0xFF - i;  // the CrCb Activity Index value
    }
    
    pie_dsmf_set_ai_lut0(pie_handle,
            dsmf_ai_lut_y, PIE_DSMF_AI_LUT_ENTRIES,
            dsmf_ai_lut_crcb, PIE_DSMF_AI_LUT_ENTRIES );
    
    for (i=0;i<PIE_DSMF_AI_LUT_ENTRIES;i++)
    {
        dsmf_ai_lut_y[i] = 0xFF - i;
        dsmf_ai_lut_crcb[i] = i;
    }
    pie_dsmf_set_ai_lut1(pie_handle,
            dsmf_ai_lut_y, PIE_DSMF_AI_LUT_ENTRIES,
            dsmf_ai_lut_crcb, PIE_DSMF_AI_LUT_ENTRIES );

    pie_dsmf_set_bypass(pie_handle, false);  // enable the block

    printk("Writing registers to the ASIC\n");
    pie_do_configure(pie_handle); // write pie regs

    pie_do_get_current(pie_handle);
    
    pie_dsmf_dump_handle_luts(pie_handle);
    // now do a quick check on the CTL LUT (the only readable one), and
    // see if it has the correct value (same as we wrote)
    printk("comparing CTL LUT write/read values\n");
    for (i=0;i<PIE_DSMF_CTL_LUT_ENTRIES;i++)
    {
        // since this is verification, we are allowed to access the
        // entries in the pie handle.  In the real scan world, NO ONE
        // is allowed to access the entries in the pie handle outside
        // of the driver.  But since we are testing the driver, we are
        // special.  If this really were a general purpose thing to
        // do, we would add the function to the convenience functions
        if (pie_handle->pie_dsmf->DSMF_FILT_CTL_LUT[i] != i)
        {
            printk("ERROR!! bad compare LUT[%d] = %d, should be %d\n",
                   i, pie_handle->pie_dsmf->DSMF_FILT_CTL_LUT[i], i);
            return;
        }
    }
    printk("Successful test!\n");
}

void pie_tcns_rw_test_luts(void)
{
    struct ycc_values *tcns_main_lut; 
    struct ycc_values *tcns_alt_lut;
    uint8_t byte_array[PIE_TCNS_LUT_ENTRIES];
    int i;
    struct pie_handle_t *pie_handle;

    tcns_main_lut = kmalloc(PIE_TCNS_LUT_ENTRIES * sizeof(struct ycc_values), GFP_KERNEL);  
    tcns_alt_lut = kmalloc(PIE_TCNS_LUT_ENTRIES * sizeof(struct ycc_values), GFP_KERNEL);
    pie_do_reset();
    msleep(100);  // give the asic time for a nice reset
    pie_do_clear_all_irqs();
    // create a pie instance
    pie_handle = pie_create_new_default_handle();

    printk("Now writing patterns to the luts\n");

    get_random_bytes(byte_array, PIE_TCNS_LUT_ENTRIES);;
  
    for (i=0;i<PIE_TCNS_LUT_ENTRIES;i++)
    {
        tcns_main_lut[i].y_lut_value = i & 0xFF;
        tcns_main_lut[i].cr_lut_value = (i ^ 0xFF) & 0xFF;
        tcns_main_lut[i].cb_lut_value = byte_array[i];
        tcns_alt_lut[i].y_lut_value = i & 0xFF;
        tcns_alt_lut[i].cr_lut_value = (i ^ 0xFF) & 0xFF;
        tcns_alt_lut[i].cb_lut_value = byte_array[i] ^ 0xFF;
    }

    pie_tcns_set_lut(pie_handle, tcns_main_lut, 1);
    pie_tcns_set_lut(pie_handle, tcns_alt_lut, 2);    
    
    pie_tcns_set_bypass(pie_handle, false);  // enable the block

    printk("Writing registers to the ASIC\n");
    pie_do_configure(pie_handle); // write pie regs

    // pie_tcns_dump_luts();  // needed while debugging the compare

    // check the values of the lut
    // see if they have the correct values (same as we wrote)
    pie_do_get_current(pie_handle);
    printk("comparing main LUT write/read values\n");
    for (i=0;i<PIE_TCNS_LUT_ENTRIES;i++)
    {
        // since this is verification, we are allowed to access the
        // entries in the pie handle.  In the real scan world, NO ONE
        // is allowed to access the entries in the pie handle outside
        // of the driver.  But since we are testing the driver, we are
        // special.  If this really were a general purpose thing to
        // do, we would add the function to the convenience functions
        if (TCNSENSE_TCNSLUT_Y_MASK_SHIFT(pie_handle->pie_tcns->TCNSLUT[i]) != tcns_main_lut[i].y_lut_value)
        {
            printk("ERROR!! bad compare main LUT[%d].y_lut_value = %d, should be %d\n",
                   i, TCNSENSE_TCNSLUT_Y_MASK_SHIFT(pie_handle->pie_tcns->TCNSLUT[i]),
                   tcns_main_lut[i].y_lut_value);
            return;
        }
        if (TCNSENSE_TCNSLUT_CR_MASK_SHIFT(pie_handle->pie_tcns->TCNSLUT[i]) != tcns_main_lut[i].cr_lut_value)
        {
            printk("ERROR!! bad compare main LUT[%d].cr_lut_value = %d, should be %d\n",
                   i, TCNSENSE_TCNSLUT_CR_MASK_SHIFT(pie_handle->pie_tcns->TCNSLUT[i]),
                   tcns_main_lut[i].cr_lut_value);
            return;
        }
        if (TCNSENSE_TCNSLUT_CB_MASK_SHIFT(pie_handle->pie_tcns->TCNSLUT[i]) != tcns_main_lut[i].cb_lut_value)
        {
            printk("ERROR!! bad compare main LUT[%d].cb_lut_value = %d, should be %d\n",
                   i, TCNSENSE_TCNSLUT_CB_MASK_SHIFT(pie_handle->pie_tcns->TCNSLUT[i]),
                   tcns_main_lut[i].cb_lut_value);
            return;
        }
        // now check alternate lut
        if (TCNSENSE_TCNSLUT_Y_MASK_SHIFT(pie_handle->pie_tcns->TCNSLUT2[i]) != tcns_alt_lut[i].y_lut_value)
        {
            printk("ERROR!! bad compare alt LUT[%d].y_lut_value = %d, should be %d\n",
                   i, TCNSENSE_TCNSLUT_Y_MASK_SHIFT(pie_handle->pie_tcns->TCNSLUT2[i]),
                   tcns_alt_lut[i].y_lut_value);
            return;
        }
        if (TCNSENSE_TCNSLUT_CR_MASK_SHIFT(pie_handle->pie_tcns->TCNSLUT2[i]) != tcns_alt_lut[i].cr_lut_value)
        {
            printk("ERROR!! bad compare alt LUT[%d].cr_lut_value = %d, should be %d\n",
                   i, TCNSENSE_TCNSLUT_CR_MASK_SHIFT(pie_handle->pie_tcns->TCNSLUT2[i]),
                   tcns_alt_lut[i].cr_lut_value);
            return;
        }
        if (TCNSENSE_TCNSLUT_CB_MASK_SHIFT(pie_handle->pie_tcns->TCNSLUT2[i]) != tcns_alt_lut[i].cb_lut_value)
        {
            printk("ERROR!! bad compare alt LUT[%d].cb_lut_value = %d, should be %d\n",
                   i, TCNSENSE_TCNSLUT_CB_MASK_SHIFT(pie_handle->pie_tcns->TCNSLUT2[i]),
                   tcns_alt_lut[i].cb_lut_value);
            return;
        }
        
    }
    printk("Successful test!\n");
}

void pie_rgb2esrgb_writeread_luts(void)
{
    uint32_t *r_lut;
    uint32_t *g_lut;
    uint32_t *b_lut;    
    int i;
    struct pie_handle_t *pie_handle;

    r_lut = kmalloc(PIE_RGB2ESRGB_LUT_ENTRIES * sizeof(uint32_t), GFP_KERNEL);
    g_lut = kmalloc(PIE_RGB2ESRGB_LUT_ENTRIES * sizeof(uint32_t), GFP_KERNEL);
    b_lut = kmalloc(PIE_RGB2ESRGB_LUT_ENTRIES * sizeof(uint32_t), GFP_KERNEL);          
    pie_do_reset();
    msleep(100);  // give the asic time for a nice reset
    pie_do_clear_all_irqs();
    // create a pie instance
    pie_handle = pie_create_new_default_handle();

    printk("Now writing patterns to the luts\n");

    get_random_bytes(b_lut, PIE_RGB2ESRGB_LUT_ENTRIES*4);  // fill b buffer randomly
    
    for (i=0;i<PIE_RGB2ESRGB_LUT_ENTRIES;i++)
    {
        r_lut[i] = i;
        g_lut[i] = PIE_RGB2ESRGB_LUT_ENTRIES -1 - i;
        b_lut[i] &= LUT1D_RGB2ESRGB_LUT2_LUT_VAL_MASK;  // make sure value fits in field
    }

    // set lut values and extra regs.  Why grab exactly 1/2way through the lut for the
    // extrareg value?  Well, why not?
    pie_rgb2esrgb_set_lut(pie_handle, r_lut, PIE_RGB2ESRGB_LUT_ENTRIES, PIE_CSC_RED_LUT);
    pie_rgb2esrgb_set_extraregs(pie_handle, PIE_CSC_RED_LUT, r_lut[PIE_RGB2ESRGB_LUT_ENTRIES/2]);

    pie_rgb2esrgb_set_lut(pie_handle, g_lut, PIE_RGB2ESRGB_LUT_ENTRIES, PIE_CSC_GREEN_LUT);
    pie_rgb2esrgb_set_extraregs(pie_handle, PIE_CSC_GREEN_LUT, g_lut[PIE_RGB2ESRGB_LUT_ENTRIES/2]);

    pie_rgb2esrgb_set_lut(pie_handle, b_lut, PIE_RGB2ESRGB_LUT_ENTRIES, PIE_CSC_BLUE_LUT);
    pie_rgb2esrgb_set_extraregs(pie_handle, PIE_CSC_BLUE_LUT, b_lut[PIE_RGB2ESRGB_LUT_ENTRIES/2]);
    
//    pie_rgb2esrgb_set_bypass(pie_handle, false);  // enable the block

    printk("Writing registers to the ASIC\n");
    pie_do_configure(pie_handle); // write pie regs

    pie_do_get_current(pie_handle);

    // now do a quick check on the LUTs and
    // see if it has the correct value (same as we wrote)
    printk("comparing LUT write/read values\n");
    for (i=0;i<PIE_RGB2ESRGB_LUT_ENTRIES;i++)
    {
        // since this is verification, we are allowed to access the
        // entries in the pie handle.  In the real scan world, NO ONE
        // is allowed to access the entries in the pie handle outside
        // of the driver.  But since we are testing the driver, we are
        // special.  If this really were a general purpose thing to
        // do, we would add the function to the convenience functions
        if (pie_handle->pie_rgb2esrgb->LUT0[i] != r_lut[i])
        {
            printk("ERROR!! bad compare LUT0[%d] = 0x%X, should be 0x%X\n",
                   i, pie_handle->pie_rgb2esrgb->LUT0[i], r_lut[i]);
            return;
        }

        if (pie_handle->pie_rgb2esrgb->LUT1[i] != g_lut[i])
        {
            printk("ERROR!! bad compare LUT1[%d] = 0x%X, should be 0x%X\n",
                   i, pie_handle->pie_rgb2esrgb->LUT1[i], g_lut[i]);
            return;
        }
        if (pie_handle->pie_rgb2esrgb->LUT2[i] != b_lut[i])
        {
            printk("ERROR!! bad compare LUT2[%d] = 0x%X, should be 0x%X\n",
                   i, pie_handle->pie_rgb2esrgb->LUT2[i], b_lut[i]);
            return;
        }
    }

    printk("comparing extrareg write/read values\n");
    
    if (pie_handle->pie_rgb2esrgb->EXTRAREG0 != r_lut[PIE_RGB2ESRGB_LUT_ENTRIES/2])
    {
        printk("ERROR!! bad compare EXTRAREG0 = 0x%X, should be 0x%X\n",
               pie_handle->pie_rgb2esrgb->EXTRAREG0, r_lut[PIE_RGB2ESRGB_LUT_ENTRIES/2]);
        return;
    }

    if (pie_handle->pie_rgb2esrgb->EXTRAREG1 != g_lut[PIE_RGB2ESRGB_LUT_ENTRIES/2])
    {
        printk("ERROR!! bad compare EXTRAREG1 = 0x%X, should be 0x%X\n",
               pie_handle->pie_rgb2esrgb->EXTRAREG1, g_lut[PIE_RGB2ESRGB_LUT_ENTRIES/2]);
        return;
    }

    if (pie_handle->pie_rgb2esrgb->EXTRAREG2 != b_lut[PIE_RGB2ESRGB_LUT_ENTRIES/2])
    {
        printk("ERROR!! bad compare EXTRAREG2 = 0x%X, should be 0x%X\n",
               pie_handle->pie_rgb2esrgb->EXTRAREG2, b_lut[PIE_RGB2ESRGB_LUT_ENTRIES/2]);
        return;
    }
    
    printk("Successful test!\n");
}

void pie_sccsc_rw_test_luts(void)
{
    uint32_t *sccsc_ilut;
    int       i, errors;

    sccsc_ilut = kmalloc(PIE_SCCSC_ILUT_SIZE, GFP_KERNEL);  
    if( sccsc_ilut==NULL ) {
        printk("%s Could not allocate sccsc_ilut\n", __FUNCTION__);
        return;
    }

    pie_do_reset();
    msleep(100);  // give the asic time for a nice reset

    printk("Now writing patterns to the luts\n");

    for (i=0;i<PIE_SCCSC_ILUT_ENTRIES;i++)
    {
        sccsc_ilut[i] = i;
    }

    pie_sccsc_load_ilut(sccsc_ilut, PIE_SCCSC_ILUT_ENTRIES);

    printk("Now reading and verifying the luts\n");

    errors = pie_sccsc_dbg_verify_ilut();
    //pie_sccsc_dump_ilut();

    printk("Test complete, lut compare errors=%d\n", errors);

    kfree(sccsc_ilut);
}

void pie_cstats_test_ram(void)
{
    struct pie_handle_t *pie_handle;
    uint32_t *sram;
    int i, sramsize_bytes, sramidx;
    uint32_t *ptr32;
    int errors = 0;

    sramsize_bytes = PIE_CSTATS_SRAM_SIZE * sizeof(uint32_t);

    sram = kmalloc(PIE_SCCSC_ILUT_SIZE, GFP_KERNEL);  
    if( sram==NULL ) {
        printk("%s Could not allocate sram\n", __FUNCTION__);
        return;
    }

    pie_do_reset();
    msleep(100);  // give the asic time for a nice reset

    // create a pie instance
    pie_handle = pie_create_new_default_handle();

    // Set ram bits (in the pie handle)
    memset( sram, 0xff, sramsize_bytes );
    pie_cstats_set_sram(pie_handle,0,sram,PIE_CSTATS_SRAM_SIZE);
    pie_cstats_set_sram(pie_handle,1,sram,PIE_CSTATS_SRAM_SIZE);
    pie_cstats_set_sram(pie_handle,2,sram,PIE_CSTATS_SRAM_SIZE);
    pie_cstats_set_sram(pie_handle,3,sram,PIE_CSTATS_NONWHITE_SRAM_SIZE);

    printk("Now writing patterns to the cstats ram\n");
    pie_do_configure(pie_handle); // pie, write your regs/ram

    // Clear out the pie handle
    pie_do_get_default(pie_handle);

    printk("Now reading patterns from the cstats ram\n");
    pie_do_get_current(pie_handle);

    for( sramidx=0 ; sramidx<3 ; sramidx++ ) {
        memset( sram, 0, sramsize_bytes );
        pie_cstats_get_sram( pie_handle, sramidx, sram, PIE_CSTATS_SRAM_SIZE );
        ptr32 = sram;
        for( i=0 ; i<PIE_CSTATS_SRAM_SIZE ; i++ ) {
            if ( *ptr32 != 0xffffffff ) {
                errors++;
            }
            ptr32++;
        }
    }
    memset( sram, 0, sramsize_bytes );
    pie_cstats_get_sram( pie_handle, 3, sram, PIE_CSTATS_NONWHITE_SRAM_SIZE );
    ptr32 = sram;
    for( i=0 ; i<PIE_CSTATS_NONWHITE_SRAM_SIZE ; i++ ) {
        /* 27-bit value stored as 32-bit integer */
        if ( *ptr32 != 0x07ffffff ) {
            errors++;
        }
        ptr32++;
    }

    printk("Cstats (histogram) test completed, errors=%d\n", errors);

    pie_do_free_handle(pie_handle);
    kfree(sram);
}

void pie_ngadjust_rw_test_lut(void)
{
    uint8_t  *lut_c1_array;
    uint8_t  *lut_c2_array;
    int      i;
    struct pie_handle_t *pie_handle;
    int      errors = 0;

    lut_c1_array = kmalloc(PIE_NGADJUST_LUT_ENTRIES, GFP_KERNEL);  
    lut_c2_array = kmalloc(PIE_NGADJUST_LUT_ENTRIES, GFP_KERNEL);  
 
    if (lut_c1_array == NULL || lut_c2_array == NULL) 
    {
        printk("ERROR: ngadjust could not allocate lut ram\n");
        return;
    }

    pie_do_reset();
    msleep(100);  // give the asic time for a nice reset
    pie_do_clear_all_irqs();

    // create a pie instance
    pie_handle = pie_create_new_default_handle();

    printk("Now writing pattern to the luts\n");

    get_random_bytes(lut_c1_array, PIE_NGADJUST_LUT_ENTRIES);;
    get_random_bytes(lut_c2_array, PIE_NGADJUST_LUT_ENTRIES);;

    pie_ngadjust_set_lut(pie_handle, lut_c1_array, lut_c2_array);
    pie_ngadjust_set_bypass(pie_handle, false);  // enable the block

    printk("Writing registers to the ASIC\n");
    pie_do_configure(pie_handle);

    // pie_ngadjust_dump_lut();

    // check the values of the lut
    // see if they have the correct values (same as we wrote)
    pie_do_get_current(pie_handle);

    printk("comparing LUT write/read values\n");
    for (i=0; i<PIE_NGADJUST_LUT_ENTRIES; i++)
    {
        // since this is verification, we are allowed to access the
        // entries in the pie handle.  In the real scan world, NO ONE
        // is allowed to access the entries in the pie handle outside
        // of the driver.  But since we are testing the driver, we are
        // special.  If this really were a general purpose thing to
        // do, we would add the function to the convenience functions
        if (NGADJUST_LUTACCESS_C1_MASK_SHIFT(pie_handle->pie_ngadjust->LUTAccess[i]) != lut_c1_array[i])
        {
            printk("ERROR: bad compare ngadjust LUT[%d].C1 = 0x%02X, should be 0x%02X\n",
                   i, 
                   NGADJUST_LUTACCESS_C1_MASK_SHIFT(pie_handle->pie_ngadjust->LUTAccess[i]),
                   lut_c1_array[i]);
            errors++;
        }
        if (NGADJUST_LUTACCESS_C2_MASK_SHIFT(pie_handle->pie_ngadjust->LUTAccess[i]) != lut_c2_array[i])
        {
            printk("ERROR: bad compare ngadjust LUT[%d].C2 = 0x%02X, should be 0x%02X\n",
                   i, 
                   NGADJUST_LUTACCESS_C2_MASK_SHIFT(pie_handle->pie_ngadjust->LUTAccess[i]),
                   lut_c2_array[i]);
            errors++;
        }
    }

    pie_do_free_handle(pie_handle);
    kfree(lut_c1_array);
    kfree(lut_c2_array);

    printk("ngadjust lut test complete, errors=%d\n", errors);
}

// start of functions callable by sysfs
// sysfs interface definitions
ssize_t run_test_set(struct device *dev, struct device_attribute *attr,
                     const char *buf, size_t count)
{
    char *mybuf;
    uint32_t uval32, uval32_1, num_pixels;
    char command[32];
    int i, lines_to_skip;
    static char eatPIEData=0;
    static int num_desc = NUM_DESC, rowheight=ROWHEIGHT, linewidth_pixels=LINEWIDTH_PIXELS;
    
    mybuf = (char *) buf;
    printk("\n%s\n",__func__);
    // take out the newline in the buffer
    if (buf[count-1] == '\n')
    {
        mybuf[count-1] = '\0';
    }
    switch(mybuf[0])
    {
    case 'a':
        printk("running basic reg read test\n");
        run_basic_test();
        break;

    case 'b':
        printk("running sccsc test\n");
        sscanf(mybuf, "%s%d", command, &uval32);
        switch (uval32)
        {
        case 0:
            printk("Executing sccsc register dump\n");
            pie_sccsc_dump();
            break;
        case 1:
            printk("Executing sccsc lut dump\n");
            pie_sccsc_dump_ilut();
            break;
        case 2:
            printk("Executing sccsc lut rw test\n");
            pie_sccsc_rw_test_luts();
            break;
        default:
            printk("ERROR! sccsc doesn't have a test for %d\n", uval32);
        }
        break;

    case 'c':
        printk("running tcns test\n");
        sscanf(mybuf, "%s%d", command, &uval32);
        switch (uval32)
        {
        case 0:
            printk("Executing tcns register test\n");
            pie_tcns_dump();
            break;
        case 1:
            printk("Executing tcns dump luts - fixme, make this a lut rw test\n");
            pie_tcns_rw_test_luts();
            break;
        default:
            printk("ERROR! tcns doesn't have a test for %d\n", uval32);
        }
        break;
        
    case 'd':
        sscanf(mybuf,"%s%d%d", command, &uval32, &uval32_1);
        if (uval32 > NUM_PIE_IDMA_CHANNELS)
        {
            printk("ERROR: too many channels requested: usage: d <numchan> <numdesc>\n");
            return count;
        }
        if (uval32_1 > 1000)
        {
            printk("ERROR: too many descriptors requested: usage: d <numchan> <numdesc>\n");
            return count;
        }
        dump_handle_regs(uval32);
        dump_desc(uval32, uval32_1);
        break;
    case 'e':
        sscanf(mybuf,"%s%d", command, &uval32);
        if (uval32 == 0)
        {
            eatPIEData = 0;
            printk("normal PIE ODMA - don't eat the data\n");
        }
        else
        {
            eatPIEData = 1;
            printk("PIE ODMA will now eat all the data\n");
        }
        break;
    case 'f':
        // do filter test - dsmf
        sscanf(mybuf, "%s%d", command, &uval32);
        switch (uval32)
        {
        case 0:
            printk("Executing dsmf register test\n");
            pie_dsmf_dump();
            break;
        case 1:
            printk("Executing dsmf write luts\n");
            pie_dsmf_write_luts();
            break;
        default:
            printk("ERROR! dsmf doesn't have a test for %d\n", uval32);
        }
        break;
    case 'g':
        // do decimation test (ran out of pneumonic characters)
#define MIN_DECIM_VAL 1
#define MAX_DECIM_VAL 8        
        sscanf(mybuf, "%s%d%d", command, &uval32, &lines_to_skip);
        if ((lines_to_skip < 0) || (lines_to_skip > 16))
        {
            printk("lines to skip must be 1-16\n");
            break;
        }
        if ((uval32 >= MIN_DECIM_VAL) && (uval32 <= MAX_DECIM_VAL))
        {
            printk("Running decimation tests, decimation value %d, lines_to_skip %d\n", uval32, lines_to_skip);
            for (i=MIN_DECIM_VAL;i<=MAX_DECIM_VAL;i++)
            {
                if (pie_decimate_test(i, uval32, lines_to_skip, num_desc, rowheight, linewidth_pixels) < 0)
                    return count;
                if (i != uval32)
                    if (pie_decimate_test(uval32, i, lines_to_skip, num_desc, rowheight,
                                          linewidth_pixels) < 0)
                        return count;
            }
        }
        else
            printk("ERROR, decimation test requires parameter %d-%d \n", MIN_DECIM_VAL, MAX_DECIM_VAL);
        break;
    case 'h':
        printk("running cstats (histogram) test\n");
        sscanf(mybuf, "%s%d", command, &uval32);
        switch (uval32)
        {
        case 0:
            printk("Executing cstats register dump\n");
            pie_cstats_dump();
            break;
        case 1:
            printk("Executing cstats ram dump\n");
            pie_cstats_dump_ram();
            break;
        case 2:
            printk("Executing cstats ram test\n");
            pie_cstats_test_ram();
            break;
        default:
            printk("ERROR! cstats doesn't have a test for %d\n", uval32);
        }
        break;
    case 'i':
        // interrupt test
        interrupt_test();
        break;
    case 'j':
        // clippad test
        sscanf(mybuf, "%s%d%d", command, &uval32, &uval32_1);
        if (clippad_test(uval32, uval32_1, POGO_FMT_MONO, num_desc, rowheight, linewidth_pixels) < 0)
            return count;
        if (clippad_test(uval32, uval32_1, POGO_FMT_RGB, num_desc, rowheight, linewidth_pixels) < 0)
            return count;
        if (clippad_test(uval32, uval32_1, POGO_FMT_RGBX, num_desc, rowheight, linewidth_pixels) < 0)
            return count;
        if (clippad_test(uval32, uval32_1, POGO_FMT_XRGB, num_desc, rowheight, linewidth_pixels) < 0)
            return count;
        break;
    case 'k':
        sscanf(mybuf, "%s%d", command, &uval32);        
        switch (uval32)
        {
        case 0:
            printk("Executing bde register dump\n");
            pie_bde_dump();
            break;
        case 1:
            printk("Dumping bde luts\n");
            pie_bde_lut_dump();
            break;
        default:
            // bde lut test - set up luts, send data through, check output data matches the expected values
            // run test 1
            if (pie_bde_lut_test(POGO_FMT_MONO, num_desc, rowheight, linewidth_pixels, 1) < 0)
                return count;
            //run test 2
            if (pie_bde_lut_test(POGO_FMT_MONO, num_desc, rowheight, linewidth_pixels, 2) < 0)
                return count;
            // run test 1
            if (pie_bde_lut_test(POGO_FMT_RGB, num_desc, rowheight, linewidth_pixels, 1) < 0)
                return count;
            //run test 2
            if (pie_bde_lut_test(POGO_FMT_RGB, num_desc, rowheight, linewidth_pixels, 2) < 0)
                return count;
            
            break;
        }
        break;
    case 'm':
        sscanf(mybuf,"%s%d%d", command, &uval32, &num_pixels);
        do_memory_dump(uval32, num_pixels);
        break;
    case 'n':
        printk("running ngadjust test\n");
        sscanf(mybuf, "%s%d", command, &uval32);
        switch (uval32)
        {
        case 0:
            printk("Executing ngadjust register dump\n");
            pie_ngadjust_dump();
            break;
        case 1:
            printk("Executing ngadjust lut dump\n");
            pie_ngadjust_dump_lut();
            break;
        case 2:
            printk("Executing ngadjust lut rw test\n");
            pie_ngadjust_rw_test_lut();
            break;
        default:
            printk("ERROR! ngadjust doesn't have a test for %d\n", uval32);
        }
        break;
    case 'o':
        printk("running ot dma tests\n");
        sscanf(mybuf, "%s%d", command, &uval32);
        switch(uval32)
        {
        case 0:
            printk("Executing register dump\n");
            pie_dump_pogo_regs();
            break;
        case 1:
            printk("Executing odma only test\n");
            if (pie_otodma_simple_test() < 0)
            {
                printk("otodma test FAILED\n");
                return count;
            }
            break;
        case 2:
            printk("Executing ot idma, otmerge, odma test\n");
            if (pie_ot_test() < 0)
            {
                printk("OT test FAILED\n");
                return count;
            }
        }
        
        break;
    case 'p':
        sscanf(mybuf, "%s%d%d%d", command, &linewidth_pixels, &rowheight, &num_desc);
        printk("command p just set linewidth to %d, rowheight to %d and num_desc to %d\n",
               linewidth_pixels, rowheight, num_desc);
        break;
    case 'r':
        printk("soft resetting pie block\n");
        pie_do_reset();
        break;
    case 's':
        sscanf(mybuf, "%s%d", command, &uval32);
        switch (uval32)
        {
        case 0:
            printk("rgb2esrgb register test\n");
            pie_rgb2esrgb_dump();
            break;
        case 1:
            printk("Executing rbb2esrgb write/read luts\n");
            pie_rgb2esrgb_writeread_luts();
            break;
        default:
            printk("ERROR! dsmf doesn't have a test for %d\n", uval32);
        }
        break;
    case 't':
        // here's the RGB test
        // disable ints, reset the dmas and PIE
        
        printk("Executing the main bypass tests (Data from POGO IDMA through PIE out POGO ODMA)\n");

        printk("PLANAR 8bpp MONO test - no color swap - MONO  out\n");
        if (do_bypass_test(POGO_FMT_MONO, POGO_FMT_MONO, POGO_8BPP, POGO_NOCOLORSWAP,
                           num_desc, linewidth_pixels, rowheight, eatPIEData) < 0)
            return count;

        printk("RGB test - no color swap\n");
        if (do_bypass_test(POGO_FMT_RGB, POGO_FMT_RGB, POGO_8BPP, POGO_NOCOLORSWAP,
                           num_desc, linewidth_pixels, rowheight, eatPIEData) < 0)
            return count;


        printk("RGBX test - no color swap - MONO Out\n");
        if (do_bypass_test(POGO_FMT_RGBX, POGO_FMT_MONO, POGO_8BPP, POGO_NOCOLORSWAP,
                           num_desc, linewidth_pixels, rowheight, eatPIEData) < 0)
            return count;

        printk("XRGB test - no color swap - MONO Out\n");
        if (do_bypass_test(POGO_FMT_XRGB, POGO_FMT_MONO, POGO_8BPP, POGO_NOCOLORSWAP,
                           num_desc, linewidth_pixels, rowheight, eatPIEData) < 0)
            return count;

        printk("RGB test - no color swap - MONO Out\n");
        if (do_bypass_test(POGO_FMT_RGB, POGO_FMT_MONO, POGO_8BPP, POGO_NOCOLORSWAP,
                           num_desc, linewidth_pixels, rowheight, eatPIEData) < 0)
            return count;
        
        printk("RGB test - colorswap\n");
        if (do_bypass_test(POGO_FMT_RGB, POGO_FMT_RGB, POGO_8BPP, POGO_COLORSWAP,
                           num_desc, linewidth_pixels, rowheight, eatPIEData) < 0)
            return count;

        printk("XRGB test - no colorswap\n");
        if (do_bypass_test(POGO_FMT_XRGB, POGO_FMT_XRGB, POGO_8BPP, POGO_NOCOLORSWAP,
                           num_desc, linewidth_pixels, rowheight, eatPIEData) < 0)
            return count;
        
        printk("XRGB test - colorswap\n");
        if (do_bypass_test(POGO_FMT_XRGB, POGO_FMT_XRGB, POGO_8BPP, POGO_COLORSWAP,
                           num_desc, linewidth_pixels, rowheight, eatPIEData) < 0)
            return count;
        
        printk("RGBX test - no colorswap\n");
        if (do_bypass_test(POGO_FMT_RGBX, POGO_FMT_RGBX, POGO_8BPP, POGO_NOCOLORSWAP,
                           num_desc, linewidth_pixels, rowheight, eatPIEData) < 0)
            return count;
        
        printk("RGBX test - colorswap\n");
        if (do_bypass_test(POGO_FMT_RGBX, POGO_FMT_RGBX, POGO_8BPP, POGO_COLORSWAP,
                           num_desc, linewidth_pixels, rowheight, eatPIEData) < 0)
            return count;

        printk("PLANAR 16bpp MONO test - no color swap - MONO out\n");
        if (do_bypass_test(POGO_FMT_MONO, POGO_FMT_MONO, POGO_16BPP, POGO_NOCOLORSWAP,
                           num_desc, linewidth_pixels, rowheight, eatPIEData) < 0)
            return count;

        printk("Planar 8bpp COLOR test - no color swap - MONO Out\n");
        if (do_bypass_test(POGO_FMT_PLANAR, POGO_FMT_MONO, POGO_8BPP, POGO_NOCOLORSWAP,
                           num_desc, linewidth_pixels, rowheight, eatPIEData) < 0)
            return count;

        printk("PLANAR 8bpp COLOR test - no color swap - RGB out\n");
        if (do_bypass_test(POGO_FMT_PLANAR, POGO_FMT_RGB, POGO_8BPP, POGO_NOCOLORSWAP,
                           num_desc, linewidth_pixels, rowheight, eatPIEData) < 0)
            return count;

        printk("PLANAR 8bpp COLOR test - no color swap - XRGB out\n");
        if (do_bypass_test(POGO_FMT_PLANAR, POGO_FMT_XRGB, POGO_8BPP, POGO_NOCOLORSWAP,
                           num_desc, linewidth_pixels, rowheight, eatPIEData) < 0)
            return count;

        printk("PLANAR 8bpp COLOR test - no color swap - RGBX out\n");
        if (do_bypass_test(POGO_FMT_PLANAR, POGO_FMT_RGBX, POGO_8BPP, POGO_NOCOLORSWAP,
                           num_desc, linewidth_pixels, rowheight, eatPIEData) < 0)
            return count;

        printk("Planar 16bpp COLOR test - no color swap - MONO Out\n");
        if (do_bypass_test(POGO_FMT_PLANAR, POGO_FMT_MONO, POGO_16BPP, POGO_NOCOLORSWAP,
                           num_desc, linewidth_pixels, rowheight, eatPIEData) < 0)
            return count;
        
        printk("PLANAR 16bpp COLOR test - no color swap - RGB out\n");
        if (do_bypass_test(POGO_FMT_PLANAR, POGO_FMT_RGB, POGO_16BPP, POGO_NOCOLORSWAP,
                           num_desc, linewidth_pixels, rowheight, eatPIEData) < 0)
            return count;

        printk("PLANAR 16bpp COLOR test - no color swap - XRGB out\n");
        if (do_bypass_test(POGO_FMT_PLANAR, POGO_FMT_XRGB, POGO_16BPP, POGO_NOCOLORSWAP,
                           num_desc, linewidth_pixels, rowheight, eatPIEData) < 0)
            return count;

        printk("PLANAR 16bpp COLOR test - no color swap - RGBX out\n");
        if (do_bypass_test(POGO_FMT_PLANAR, POGO_FMT_RGBX, POGO_16BPP, POGO_NOCOLORSWAP,
                           num_desc, linewidth_pixels, rowheight, eatPIEData) < 0)
            return count;
        
        break;

    case 'x': //xyscaler test
    	sscanf(mybuf,"%s%d", command, &uval32);
    	printk("Executing xyscaler testing test case=%d\n", uval32);
        
        pie_do_reset();
        msleep(100);  // give the asic time for a nice reset
        pie_do_clear_all_irqs();
        
    	switch (uval32)
        {
        case 0:
            run_pie_xyscaler_register_test();
            break;
        case 1:
            run_pie_xyscaler_test(POGO_FMT_MONO, POGO_FMT_MONO,
                                  128, 30,
                                  POGO_8BPP,POGO_NOCOLORSWAP,
                                  1, 2, //scale =1/2
                                  PIE_XY_SCALER_RAPR     //scale mode =1 PIX_XY_SCALER_RAPR,
                );
            break;
            
        case 2:
            run_pie_xyscaler_test(POGO_FMT_MONO, POGO_FMT_MONO,
                                  64, 15,
                                  POGO_8BPP,POGO_NOCOLORSWAP,
                                  2, 1, //scale =2/1
                                  PIE_XY_SCALER_LINEAR     //scale mode =2 PIE_XY_SCALER_LINEAR,
                );
            break;
        case 3:
            run_pie_xycscaler_register_test();
            break;
            
        case 4:
            run_pie_xyscaler_test(POGO_FMT_MONO, POGO_FMT_MONO,
                                  64, 15,
                                  POGO_8BPP,POGO_NOCOLORSWAP,
                                  2, 1, //scale =2/1
                                  PIE_XY_SCALER_CUBIC     //scale mode =1 PIX_XY_SCALER_CUBIC,
                );
            break;
            
        case 5://color
            run_pie_xyscaler_test(POGO_FMT_PLANAR, POGO_FMT_RGB,
                                  128, 30,
                                  POGO_8BPP,POGO_NOCOLORSWAP,
                                  1, 2, //scale =2/1
                                  PIE_XY_SCALER_RAPR     //scale mode =1 PIX_XY_SCALER_RAPR,
                );
            break;
            
            break;

        default:
            printk("unknown command parameter\n");
            
    	}
    	break;
    default:
        printk("There is no test triggered by %s\n",buf);
    }
    
    return count;
}

char *usagebuf=" \n\
a=basic reg read \n\
b 0=sccsc dump regs \n\
b 1=sccsc dump luts \n\
b 2=sccsc lut test\n\
c 0=tcns test regs \n\
c 1=tcns lut test ???\n\
d <numchannels> <numdescriptors>=dump all regs and descriptors  \n\
e 1=set PIE ODMA to throw away data  \n\
e 0=reset PIE ODMA to process data  \n\
f 0=filter test regs (dsmf) \n\
f 1=filter lut test - write luts (dmsf) \n\
g <decimate> <lines_to_skip> = run decimate test \n\
h 0=cstats (histogram) dump regs \n\
h 1=cstats (histogram) dump ram \n\
h 2=cstats (histogram) test ram \n\
i=interrupt test - set all pie interrupts  \n\
j <left clip&pad pixels><right clip&pad pixels> - test clippad (clip and pad only) \n\
k 0=reg dump  \n\
k 1=lut dump  \n\
k 2==bde lut test \n\
m <memsize> <num entries>=dump memory buffers  \n\
n 0=ngadjust dump regs \n\
n 1=ngadjust dump luts \n\
n 2=ngadjust lut test\n\
o 0=OT dump regs \n\
o 1=OT odma tests \n\
o 2=OTMARB, OT idma/odma tests \n\
p <pixels_per_line> <num_rows> <num_descriptors> = set parms   \n\
r=do soft reset PIE, dmas  \n\
t=run dma test \"s\"   \n\
x <testnum>=run xyscaler test <0-5>\n";
    
ssize_t run_test_get(struct device *dev, struct device_attribute *attr,
                        char *buf)
{
    return scnprintf(buf, PAGE_SIZE, usagebuf);
}

// register sysfs functions from the test code
DEVICE_ATTR(pietest, S_IWUSR | S_IRUGO, run_test_get, run_test_set);

static struct attribute *pietest_attrs[] = {
    &dev_attr_pietest.attr,
    NULL,
};

struct attribute_group pietest_attrgrp = {
    .name = "pietest",
    .attrs = pietest_attrs,
};

// end of register sysfs functions from the test code


struct kobject *kobj;

static int pie_verification_init(void)
{
    int retcode;
    
    printk("%s\n",__func__);
    // create a sysfs interface
    kobj = kobject_create_and_add("pie_verification",firmware_kobj);
    
#if 0    
    // create more user friendly name - note that this call only works when not built as a module
    // the sysfs_rename_dir function is not exported
    if ((sysfs_rename_dir(kobj), "pie") != 0)
        printk("ERROR, sysfs_rename_dir failed in %s\n",__FUNCTION__);
#endif    
    
    retcode = sysfs_create_group(kobj,&pietest_attrgrp);    
    if (retcode != 0)
    {
        printk(KERN_ERR "%s sysfs_create_file pietest failed retcode=%d\n",
                __FUNCTION__, retcode);
    }
    return 0;
}

module_init(pie_verification_init);

static void pie_verification_exit(void)
{
    sysfs_remove_group(kobj,&pietest_attrgrp);
    kobject_put(kobj); // tell the system it can release the kobject
}

module_exit(pie_verification_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Copyright (c) 2013 Marvell International Ltd.");
MODULE_DESCRIPTION("verification of the pie module");
MODULE_VERSION("2013_Dec_5");
