/*
 ***************************************************************************************
 * (c) Copyright 2014 Marvell International Ltd.
 **************************************************************************************
 *
 * Marvell Commercial License Option
 *
 * If you received this File from Marvell as part of a proprietary software release,
 * the File is considered Marvell Proprietary and Confidential Information, and is
 * licensed to you under the terms of the applicable Commercial License.
 *
 **************************************************************************************
 *
 * Marvell GPL License Option
 *
 * If you received this File from Marvell as part of a Linux distribution, this File
 * is licensed to you in accordance with the terms and conditions of the General Public
 * License Version 2, June 1991 (the "GPL License").  You can redistribute it and/or
 * modify it under the terms of the GPL License; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 * PARTICULAR PURPOSE.  See the GPL License for more details.
 *
 * You should have received a copy of the GNU General Public License along with this
 * program.  If not, see http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
 *
 **************************************************************************************
 */

#include <linux/module.h>
#include <linux/cdev.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
#include <linux/dma-mapping.h>
#include <linux/delay.h>  // for msleep
#include <linux/random.h> // for get_random_bytes in pie_verification.h

#include "descriptor.h"

#include "pie_handle_if.h"
#include "pie_if.h"
#include "pie_convenience_if.h"
#include "pie_verification.h"

#include "pie_strippr.h"
#include "pie_scaler_verification.h"
#include "pie_verification.h"


/** convenience structure holding PIE XYScale scaling information. The scaler
 * values are passed as two fractions, an X scale and a Y scale. For example,
 * to scale down 50%, XYScale needs X=1/2 and Y=1/2.
 */
struct pie_scale {
    int x_numerator;
    int x_denominator;
    int y_numerator;
    int y_denominator;
    
    /* xyscaler hardware values */
    int x_scale;
    int y_scale;
};

/* davep 09-Jun-2011 ; centralizing a collection of hodge-podge strip size
 * calculations
 */
struct pie_strip {
    bool use_cs;
    bool use_mf;
    bool use_xy;
    
    int rdma_in; /* input dma strip size */
    
    int cs_in;   /* colorshift; eats two lines */
    int cs_out;
    
    int mf_in;   /* multifilter; eats two or four lines */
    int mf_out;
    
    int xy_in;   /* very complicated */
    int xy_out;
    
    int wdma_out; /* output dma strip size */
    
    int total_extra_rows; /* cs+mf */
};



/**
 * \brief  ceil() without float
 *
 *  ceil(3) for a 16-bit value that is <<16 into a 32-bit value in order to
 *  make accurate division with only integers.
 *
 *      uint32_t a,b,c;
 *
 *      # quotient = int(ceil(float(a)/float(b)b));
 *      a = numer<<16;
 *      b = a / denom;
 *      c = f16_ceil();
 *      quotient = c;
 *
 * \author David Poole
 * \date 02-Aug-2013
 */

static uint32_t f16_ceil( uint32_t n )
{
    /* ceil() without float.
     *
     *
     */
    if( n & 0xffff ) {
        //# if there is a fractional part
        //# add 1 to base; clear fractional part
        return (n+(1<<16)) & (~0xffff);
    }
    return n;
}

/**
 * \brief  Calculate edge pixels for linear mode scaling.
 *
 * Die floating point, die!
 *
 * \author David Poole
 * \date 01-Aug-2013
 */

uint32_t pie_xy_calc_linear_mode_edge_pixels( uint32_t numerator,
                                              uint32_t denominator )
{
    uint32_t edge_pixels;
    uint32_t a,b,c,d;
    
    printk(KERN_NOTICE "%s numer=%d denom=%d\n", __func__, numerator, denominator );
    
    BUG_ON(numerator < denominator);//XASSERT(numerator>=denominator,numerator);
    
    /* from the MA:
     *      edge_pixels = ceiling( (scale-1)/2 )
     *
     * former floating point code
     *    edge_pixels = (int)ceil( ((float)numerator/(float)denominator - 1) / 2.0);
     *
     * Numerator and denominator are a fraction used to determine scale. The
     * numer, denom are limited to 15-bit numbers so we can use 32-bit number
     * for our integer division.
     */
    
    a = (numerator<<16) / (denominator);
    b = a - (1<<16);
    c = b / (2);
    d = f16_ceil(c);
    edge_pixels = d >> 16;
    
    printk("edge_pixels=%d\n", edge_pixels);
    
    BUG_ON(edge_pixels < 0 || edge_pixels > 15);//XASSERT( edge_pixels>=0 && edge_pixels<=15, edge_pixels );
    
    return edge_pixels;
}

/**
 * \brief Calculate Linear mode strip size
 *
 *  The PIE XYScale documentation uses floating point division for some of
 *  these calculations. However, Linux kernel has no float. Using a 64-bit
 *  integer to do the division.
 *
 * original floating point math
 *  strip_size_out = (int)ceil( (strip_size_in * (1<<16)) / (float)fracY);
 *
 * \author David Poole
 * \date 02-Aug-2013
 */
//FIXME: GR2 doesn't support 64bit integer, what do to?
static uint32_t pie_xy_calc_linear_mode_strip_size( uint32_t strip_size_in, uint32_t y_scale )
{
    uint32_t strip_size_out;
    uint32_t tmp;
    
    /* strip_size_in will almost always be in [1,22]
     * y_scale is a 16-bit number [1,65535]
     */
    /*ToDo: Check with DaveP on why can't use the simple
     *algorithm below
     */
    BUG_ON(strip_size_in > 22);
    tmp = strip_size_in << 16;
    strip_size_out= tmp/y_scale;
    if( (tmp % y_scale) >= (y_scale>>1))
    	strip_size_out++;
    
    return strip_size_out;
    
#if 0
    
    uint64_t a,b,c,d,e;
    uint64_t numer64, denom64;
    
    
    
    
    
    /* create a 64-bit integer so get the most accurate result from the integer
     * division
     */
    a = (uint64_t)strip_size_in << 32;
    b = a * (1<<16);
    numer64 = b;
    denom64 = (uint64_t)y_scale;
    c = safeint_divide_uint64( numer64, denom64 );
    d = f32_ceil(c);
    e = d >> 32;
    strip_size_out = (uint32_t)e;
    
    return strip_size_out;
#endif
}

/**
 * \brief Get number of rows out of scaler based on settings.
 *
 *  Because of XYScale's padding requirements and the scaling factor, the exact
 *  number of rows coming out of XYScale cannot be predicted ahead of time.
 *  However, using the scale factor, we can tell how many actual rows of real
 *  data to expect.
 *
 *  We need to pad the bottom of the image on the read to match the stripsize
 *  but that extra data must be removed from the output image when we're
 *  complete.
 *
 * \param[in] total_rows_in number of rows fed into top of XYScale block
 * \param[in] fracY XYScale's Y direction scale factor
 * \param[in] scaler_mode PIE_XY_SCALER_LINEAR or PIE_XY_SCALER_RAPR
 * \param[out] expected_rows_out based on the total number of input rows and the
 *          scale factor, how many rows would a normal human expect
 *
 * \author David Poole
 * \date 26-June-05
 *
 * \remarks This function will eventually be freakishly complicated because of
 * the possibility of numeric overflow with total_rows_in * fracY > 2^32.
 * DougK sent me an email containing a formula which should prevent overflows.
 *
 * \remarks 26-July-05 I'm now finally putting DougK's email formula into this
 * function. Hasn't been excessively tested, though. See 26-July-05 notes for
 * original email.
 *
 *
 *davep 13-Jun-2011 ; DO NOT CALL THIS FUNCTION TO GET HORIZONTAL VALUES!
 *    Only call for rows!  The -1 in Linear mode is something bizarre for rows.
 *    Not pixels. See DougK for explanation.  Or maybe I'm just not that smart.
 *
 **/

static void pie_xy_calc_expected_rows( int rows_in, int scale_factor, int scaler_mode, int *rows_out )
{
    BUG_ON( rows_in <= 0);
    
    if( scaler_mode==PIE_XY_SCALER_LINEAR || scaler_mode==PIE_XY_SCALER_CUBIC ) {
        /* davep 10-Jul-2006 ; DougK suggested using total_rows_in-1 for Linear */
        *rows_out = ( (rows_in-1) * (1<<16)) / scale_factor;
    }
    else{
    	BUG_ON( scaler_mode!=PIE_XY_SCALER_RAPR );
        /* 26-July-05 davep ; original simple formula which would overflow pretty
         * quickly
         *
         *    *expected_rows_out = (total_rows_in * xyscale) / (1<<16);
         */
        /* DougK's new formula which I admit I don't follow */
        *rows_out = ( ((rows_in>>12) * scale_factor) + (((rows_in % (1<<12) ) * scale_factor) >> 12)) >> 4;
    }
    printk("%s.%d\n", __func__, __LINE__);
}

/**
 * \brief calculate expected pixel width
 *
 *  why is this function different than the 'expected_rows' version above?
 *  Because there is a huge amount of code that relies on the expected rows
 *  version and changing to the same formula below breaks that code.
 *
 *  The -1 in linear above is because linear eats a line. I need to go through
 *  a lot of code to fix the +1/-1 compensation. But first, need to get this
 *  code done.
 *
 * \author David Poole
 * \date 13-Jun-2011
 *
 */

void pie_xy_calc_expected_pixels(int pixels_in, int scale_factor, int scaler_mode, int *pixels_out)
{
#if 0
    unsigned long long llpin, llpout, llscale;
    BUG_ON( pixels_in <= 0);
    
    llscale = (unsigned long long)scale_factor;
    llpin = (unsigned long long)pixels_in;
    
    if (scaler_mode == PIE_XY_SCALER_LINEAR)
    {
        llpout = (llpin * (1<<16));
        llpout = safeint_divide_uint64( llpout, llscale );
    }
    else
    {
        BUG_ON(scaler_mode != PIE_XY_SCALER_RAPR);
        llpout = (llpin * llscale) / (1<<16);
    }
    
    printk("%s scale=%llu pin=%llu pout=%llu\n", __FUNCTION__, llscale, llpin, llpout);
    
    BUG_ON((unsigned long)(llpout>>32)!=0);
    *pixels_out = llpout;
#else
    uint32_t pout;
    
    BUG_ON(pixels_in <= 0 || pixels_in > 64000);
    
    if (scaler_mode==PIE_XY_SCALER_LINEAR || scaler_mode == PIE_XY_SCALER_CUBIC)
    {
        uint32_t tmp = pixels_in << 16;;
        pout = tmp/scale_factor;
        if ((tmp%scale_factor) > (scale_factor>>1))
        {
            pout++;
        }
    }
    else
    {
        BUG_ON(scaler_mode!=PIE_XY_SCALER_RAPR);
        pout = (pixels_in * scale_factor) / (1<<16);
    }
    *pixels_out = pout;
#endif
    
}

/**
 * \brief Utility function to calculate XYScale's scale factors
 *
 * \author David Poole
 * \date 10-Jun-2011
 *
 */
#define PIE_SCALE_SCALEX_SCALE(x) (((x) & 0xfffff) << 0)
#define PIE_SCALE_SCALEY_SCALE(x) (((x) & 0xfffff) << 0)
void pie_xy_calc_xyscale( struct pie_scale *scale, int scaler_mode )
{
    if( scaler_mode==PIE_XY_SCALER_LINEAR || scaler_mode==PIE_XY_SCALER_CUBIC ) {
        scale->x_scale = ((1<<16) * scale->x_denominator) / scale->x_numerator;
        scale->y_scale = ((1<<16) * scale->y_denominator) / scale->y_numerator;
    }
    else{
        BUG_ON( scaler_mode!=PIE_XY_SCALER_RAPR);
        scale->x_scale = ((1<<16) * scale->x_numerator) / scale->x_denominator;
        scale->y_scale = ((1<<16) * scale->y_numerator) / scale->y_denominator;
    }
    
    BUG_ON( PIE_SCALE_SCALEX_SCALE(scale->x_scale)!=scale->x_scale);
    BUG_ON( PIE_SCALE_SCALEY_SCALE(scale->y_scale)!=scale->y_scale);
}

/**
 * \brief  Return max write DMA rows out based on our current platform.
 *
 * \author David Poole
 * \date 17-Nov-2006
 *
 */

static int pie_get_wdma_max_rows( void )
{
    return PIE_WDMA_MAX_ROWS;
}

/**
 * \brief  Calculate the optimal stripsize taking into account the scaling
 * factor.
 *
 * \param[in,out] read_stripsize
 * \param[in] scaler_mode PIE_XY_SCALER_LINEAR or PIE_XY_SCALER_RAPR
 * \param[in] fracY
 * \param[out] write_stripsize
 *
 * \author David Poole
 * \date 20-Sep-2005
 *
 *
 */

int pie_xy_calc_stripsize( int *read_stripsize,
                           int fracY,
                           int scaler_mode,
                           int *write_stripsize )
{
    int strip_size_in;
    int strip_size_out;
    int max_rows_out;
    
    strip_size_in = *read_stripsize;
    BUG_ON( strip_size_in <= 0 ||strip_size_in > PIE_RDMA_MAX_ROWS);
    
    max_rows_out = pie_get_wdma_max_rows();
    
    while( 1 ) {
    	//FixMe: Eric check to see if pie_xy_calc_linear_mode_strip() can be used for cubic mode
        if( scaler_mode==PIE_XY_SCALER_LINEAR || scaler_mode==PIE_XY_SCALER_CUBIC ) {
            strip_size_out = pie_xy_calc_linear_mode_strip_size( strip_size_in, fracY );
        }
        else {
            BUG_ON( scaler_mode!=PIE_XY_SCALER_RAPR);
            strip_size_out = (strip_size_in * fracY) / (1<<16);
        }
        
        if( strip_size_out <= max_rows_out ) {
            break;
        }
        
        strip_size_in--;
        if( strip_size_in <= 0 ) {
            /* most likely cause of this failure is a scaling factor that is
             * too large
             */
            printk("BAD! *read_stripsize=%d\n", *read_stripsize );
            *write_stripsize = 0;
            return -6;
        }
    }
    
    *read_stripsize = strip_size_in;
    *write_stripsize = strip_size_out;
    
    /* check my math */
    BUG_ON( *read_stripsize <= 0 || *read_stripsize > PIE_RDMA_MAX_ROWS);
    BUG_ON( *write_stripsize <= 0 || *write_stripsize > PIE_WDMA_MAX_ROWS);
    
    return 0;
}

/**
 * \brief
 *
 *
 * \author David Poole
 * \date 09-Jun-2011
 *
 */

int pie_calc_stripsize( struct pie_strip *strip, int y_scale, int scaler_mode )
{
    int scerr;
    int extra_rows_for_cs;
    int extra_rows_for_mf;
    
    extra_rows_for_cs = 0;
    if( strip->use_cs ) {
        extra_rows_for_cs = 2;
    }
    extra_rows_for_mf = 0;
    if( strip->use_mf ) {
        /* TODO add support for 3x3 (eats two rows) */
        extra_rows_for_mf = 4;
    }
    strip->total_extra_rows = extra_rows_for_cs + extra_rows_for_mf;
    
    strip->rdma_in = PIE_RDMA_MAX_ROWS;
    
    strip->cs_in = strip->rdma_in;
    strip->cs_out = strip->cs_in - extra_rows_for_cs;
    
    strip->mf_in = strip->cs_out;
    strip->mf_out = strip->mf_in - extra_rows_for_mf;
    
    strip->xy_in = strip->mf_out;
    
    if( strip->use_xy ) {
        scerr = pie_xy_calc_stripsize( &strip->xy_in, y_scale, scaler_mode, &strip->xy_out );
        if( scerr != 0 ) {
            memset( strip, 0, sizeof(struct pie_strip) );
            return scerr;
        }
        strip->wdma_out = strip->xy_out;
        
        strip->mf_out = strip->xy_in;
        strip->mf_in = strip->mf_out + extra_rows_for_mf;
        
        strip->cs_out = strip->mf_in;
        strip->cs_in = strip->cs_out + extra_rows_for_cs;
        
        strip->rdma_in = strip->cs_in;
    }
    
    /* check for math mistakes */
    BUG_ON( strip->rdma_in > PIE_RDMA_MAX_ROWS);
    BUG_ON( strip->wdma_out > PIE_WDMA_MAX_ROWS);
    
    return 0;
}

/* Strip predictor*/

#define NORMAL (1<<16)

static int STRIPDEBUG = 0;

void strip_start( struct predict_state *s,
                  int strip_height,
                  int y_numerator,
                  int y_denominator,
                  int fracY,
                  int mode )
{
    int edge_pixels;
    
    memset( s, 0, sizeof(struct predict_state) );
    
    printk(KERN_INFO "%s: y_num=%d, y_den=%d mode=%d, fracY=%d\n", __func__,
           y_numerator, y_denominator, mode, fracY);
    
    if( mode == STRIPPR_MODE_LINEAR ) {
        s->linear = 1;
        edge_pixels = pie_xy_calc_linear_mode_edge_pixels( y_numerator, y_denominator );
        
//        s->R = 0;
        s->R = edge_pixels * fracY - 32768 + fracY/2;
        
        printk(KERN_INFO "strip predictor remainder initialized to %d\n", s->R );
    }
    else if (mode== STRIPPR_MODE_RAPR){
        s->R = NORMAL;
    }else if(mode == STRIPPR_MODE_CUBIC){
    	s->cubic = 1;
    	/*Start vertical remainder at 0, use this to transpose */
    	s->R = 0;
    }
    s->strip_height = strip_height;
    s->fracY = fracY;
    printk("%s: s->fravY=%d\n", __func__, s->fracY);
}

void strip_next( struct predict_state *s, int strip[] )
{
    int i;
    int R;
    int nextR;
    int iPtr;
    int oPtr;
    int lastInputShipped;
    /* convenience variables; these never change */
    const int fracY = s->fracY;
    const int strip_height = s->strip_height;
    int start_i;
    
    /* restore state */
    iPtr = s->iPtr;
    oPtr = s->oPtr;
    lastInputShipped = s->lastInputShipped;
    R = s->R;
    
    /* convenience variables; these never change */
//    fracY = s->fracY;
//    strip_height = s->strip_height;
    
    /* we would iterate over strips starting here */
    
    // [0] - input strip start row
    // [1] - input strip end row
    // [2] - output strip start row
    // [3] - output strip end row
    strip[0] = iPtr;
    strip[1] = iPtr + strip_height - 1;
    strip[2] = -1;
    strip[3] = -1;
    
    if( s->linear ) {
        start_i = 1;
    }
    else {
        start_i = 0;
    }
    
    // #-- iterate down the input pixels in this strip
    for( i=start_i ; i<strip_height ; ) {
        
        if( STRIPDEBUG ) {
            printk( "..input row %d+%d  R=%d\n", iPtr, i, R );
        }
        
        nextR = R;
        
        if( !s->linear ) {
            if( R-fracY <= 0 ) {
                // #- ship.
                if( STRIPDEBUG ) {
                    printk( " ..ship output line %d: input row = %d: R=%d",
                            oPtr, i, R );
                    if( R-fracY == 0 ) {
                        printk( " (perfect)" );
                    }
                    printk( "\n" );
                }
                
                if( strip[2] == -1 ) {
                    // this is the first time we've seen this output strip;
                    // mark its start
                    strip[2]=oPtr;
                }
                
                strip[3]=oPtr;
                oPtr++;
                nextR += NORMAL;
                lastInputShipped = i + (R-fracY == 0);
            }
            
            if( R-fracY >= 0 ) {
                // #- next input line
                if( STRIPDEBUG ) {
                    printk( "..retire input line %d+%d\n", iPtr, i );
                }
                nextR -= fracY;
                /* new line in new predictor is actually:
                 *  $i+= 1/(1<<$prescale);
                 * but I'm not using prescale so leave alone.
                 */
                i++;
            }
        }
        else {
            /* linear mode */
            if (fracY < NORMAL) { // # enlarging
                if( STRIPDEBUG ) {
                    printk( " ..ship output line %d: input row = %d: R=%d\n",
                            oPtr, i, R );
                }
                if( strip[2]==-1 ) {
                    strip[2]=oPtr;
                }
                strip[3]=oPtr;
                oPtr++;
                nextR += fracY;
                if (nextR > NORMAL) {
                    nextR -= NORMAL;
                    if( STRIPDEBUG ) {
                        printk( "..retire input line %d\n", iPtr+i );
                    }
                    i++;
                }
            } else { // # reducing
                if (R <= NORMAL) {  // # output a line
                    if( STRIPDEBUG ) {
                        printk( " ..ship output line %d: input row = %d: R=%d\n", oPtr, i, R );
                    }
                    if( strip[2]==-1 ) {
                        strip[2]=oPtr;
                    }
                    strip[3]=oPtr;
                    oPtr++;
                    nextR += (fracY - NORMAL);
                } else {
                    nextR -= NORMAL;
                }
                if( STRIPDEBUG ) {
                    printk(  "..retire input line %d+%d\n", iPtr, i );
                }
                i++;
            }
        }
        
        R = nextR;
    }
    
    /* don't need to save R in strip[] (leave commented out to keep
     * matching the perl predictor)
     */
//        strip[4] = R;
    if (s->linear) {
        iPtr += (strip_height - 1);
    } else {
        iPtr += lastInputShipped;
        R += (fracY * (i-lastInputShipped));
    }
    
    /* end of iterating over strips */
    
    /* save state */
    s->iPtr = iPtr;
    s->oPtr = oPtr;
    s->lastInputShipped = lastInputShipped;
    s->R = R;
}

void strip_next_cubic(struct predict_state *s, int strip[])
{
    int i;
    int R;
    int nextR;
    int iPtr;
    int oPtr;
    int count = 0;
    
    /*covenience variable; these never change*/
    const int fracY = s->fracY;
    const int strip_height = s->strip_height;
    
    /*restore state */
    iPtr = s->iPtr;
    oPtr = s->oPtr;
    R = s->R;
    
    while( R >= NORMAL){
        R -= NORMAL;
        iPtr++;
    }
    nextR = R;
    
    //[0] - input strip start row
    //[1] - input strip end row
    //[2] - output strip start row
    //[3] - output strip end row
    
    strip[0] = iPtr;
    strip[1] = iPtr + strip_height -1;
    strip[2] = oPtr;
    strip[3] = -1;
    
    //# -- Bicubic interpolation requires 4 line window
    //# -- iterate down the input pixels in this strip
    for( i=3; i<=strip_height-1;){
        if(STRIPDEBUG){
            printk("..input row %d+%d R=%d\n", iPtr, i, R);
        }
        
        if(fracY < NORMAL){ //#enlarging
            if(STRIPDEBUG){
                printk("..ship output line %d: input row = %d: R=%d\n",oPtr, i, R);
            }
            
            strip[3] = oPtr++;
            nextR += fracY;
            if(nextR >= NORMAL){
                nextR -= NORMAL;
                if(STRIPDEBUG){
                    printk("..retire input line %d\n", iPtr+i);
                }
                i++;
                count++;
            }
        }else{ //#reducing
            if(R < NORMAL){ //#output a line
                if(STRIPDEBUG){
                    printk(".. ship output line %d, input row=%d: R=%d\n", oPtr, i, R);
                }
                nextR += (fracY - NORMAL);
                strip[3] = oPtr++;
            }else{
                nextR -= NORMAL;
            }
            
            if(STRIPDEBUG){
                printk("..retire input line %d+%d\n %d\n", iPtr, i, R);
            }
            i++;
            count++;
        }
        R = nextR;
    }
    
    /*end of iterating over strips */
    /* save state */
    s->iPtr = iPtr + count;
    s->oPtr = oPtr;
    s->R = R;
}

/* indices of values in strip[] array as returned by strip_next() */
#define STRIP_READ_FIRST  0
#define STRIP_READ_LAST   1
#define STRIP_WRITE_FIRST 2
#define STRIP_WRITE_LAST  3

/* The list of strip information*/
struct strip_info{
    int start_row;
    int end_row;
    struct strip_info *next;
};

/* Generate static input descriptor list that encapsulate all strips for
 * processing ALL the data of the image to be processed. The start_strip() and
 * next_strip() is used to generate the row data for each strip
 */
static int generate_test_idma_descriptors( int num_descriptors, int bytes_per_row,
                                           dma_addr_t buf_phy_addr, struct strip_info *strip_head,
                                           DMA_descriptor **device_idma_descriptor,
                                           dma_addr_t *device_idma_descriptor_phys)
{
    DMA_descriptor *descriptor;
    dma_addr_t phys_addr;
    int phys_addr_int;
    dma_addr_t idma_descriptor_phys;
    uint32_t flags;
    int retval;
    struct strip_info *cur_strip;
    int dar1,drcr1;
    int strip_cnt;
    
    phys_addr = buf_phy_addr;
    phys_addr_int = (int) phys_addr;
    
    printk("%s: bytes_per_row=%d, num_descriptors=%d\n", __func__, bytes_per_row,num_descriptors);
    
    //phys_addr_int += (data_xfer_size * channel * num_descriptors); // move down in the buffer for this channel - 0 at start
    phys_addr = (dma_addr_t) phys_addr_int;
    
    retval = create_descriptor_list(num_descriptors,
                                    new_8word_axi,
                                    &descriptor,
                                    &idma_descriptor_phys,
                                    NULL);
    printk(KERN_INFO "%s: virt_addr returned was 0x%p, phys_addr was 0x%X\n",__func__,
           descriptor, idma_descriptor_phys);
    if (retval != 0)
    {
        return retval;
    }
    
    *device_idma_descriptor_phys = idma_descriptor_phys;
    *device_idma_descriptor = descriptor;
    //piev_object.num_input_strips = num_descriptors; //1 strip == 1 descriptor
    
    // descriptors created, now set up the data fields
    flags = 0;
    
    //data_size = data_xfer_size;
    //printk("created idma descriptor for channel %d, phys descr addr=0x%X, virt=0x%p\n",
    //       channel, idma_descriptor_phys, descriptor);
    
    strip_cnt = 0;
    for (cur_strip = strip_head; cur_strip!=0; cur_strip = cur_strip->next)
    {
    	printk("%s: strip[%d] start_row= %d end_row=%d \n", __func__,
               strip_cnt, cur_strip->start_row, cur_strip->end_row);
        /* still need to implement someday - make sure if you have a last data buffer that is smaller
           than the other fixed size buffers (since it's the last one), that we can pass smaller size */
        // if last descriptor, calculate actual data size FIXME
        if (cur_strip->next==0)
        {
            //data_size = full_data_size - num_descriptor * data_xfer_size; FIXME
            flags = DMA_STOP_WHEN_DONE | DMA_INT_WHEN_DONE;
        }
        
        /* get a pointer to the row in the buffer where this strip starts */
        dar1 = (uint32_t)(phys_addr + cur_strip->start_row * bytes_per_row);
        
        /* +1 to convert from index to count */
        /* 16-June-05 ; ()'s are important! Strip indices can be negative numbers */
        drcr1 = (cur_strip->end_row - cur_strip->start_row + 1)* bytes_per_row;
        
      	printk(KERN_ERR "%s; set_descriptor_direct  at strip_cnt=%d, dar1=%x drcr1=%d\n",
               __func__, strip_cnt,dar1, drcr1);
        
        //setup descriptor for current strip
        retval = set_descriptor_direct(descriptor, strip_cnt, drcr1, dar1, flags);
        if(retval){
            printk(KERN_ERR "%s; set_descriptor_direct fails at strip_cnt=%d, dar1=%x drcr1=%d\n",
                   __func__, strip_cnt,dar1, drcr1);
            return retval;
        }
        
        strip_cnt++;
        printk("%s.%d\n", __func__, __LINE__);
        BUG_ON(strip_cnt > num_descriptors);
    }
    printk("%s.%d\n", __func__, __LINE__);
    BUG_ON(strip_cnt != num_descriptors);
    return 0;
}


/* Generate static output descriptor list that encapsulates all strips for
 * processing ALL the data of the image to be generated. The start_strip() and
 * next_strip() is used to generate the row data for each strip
 *
 * buf_phy_addr: base address of output buffer
 * strip_head: Link list of strip row informations
 *
 */

static int generate_test_odma_descriptors( int num_descriptors,
                                           int bytes_per_row,
                                           dma_addr_t buf_phy_addr,
                                           struct strip_info *strip_head,
                                           DMA_descriptor **device_odma_descriptor,
                                           dma_addr_t *device_odma_descriptor_phys)
{
    DMA_descriptor *descriptor;
    dma_addr_t odma_descriptor_phys;
    uint32_t flags;
    int retval;
    struct strip_info *cur_strip;
    int dar1,drcr1;
    int strip_cnt;
    
    printk(KERN_NOTICE "%s: num_descritors=%d\n", __func__, num_descriptors);
    retval = create_descriptor_list(num_descriptors,
                                    new_8word_axi,
                                    &descriptor,
                                    &odma_descriptor_phys,
                                    NULL);
    printk(KERN_INFO "%s virt_addr returned was 0x%p, phys_addr was 0x%X\n",__func__,
           descriptor, odma_descriptor_phys);
    if (retval != 0)
    {
        return retval;
    }
    *device_odma_descriptor = descriptor;
    *device_odma_descriptor_phys = odma_descriptor_phys;
    
    flags = 0;
    strip_cnt = 0;
    printk("%s: bytes_per_row=%d\n", __func__, bytes_per_row);
    for (cur_strip = strip_head; cur_strip!=0; cur_strip=cur_strip->next)
    {
        printk("%s: strip[%d] start_row = %d end_row= %d\n", __func__,
               strip_cnt, cur_strip->start_row, cur_strip->end_row);
        /* still need to implement someday - make sure if you have a last data buffer that is smaller
           than the other fixed size buffers (since it's the last one), that we can pass smaller size */
        // if last descriptor, calculate actual data size FIXME
        if (cur_strip->next == 0)
        {
            //data_size = full_data_size - num_descriptor * data_xfer_size; FIXME
            flags = DMA_STOP_WHEN_DONE | DMA_INT_WHEN_DONE;
        }
        /* get a pointer to the row in the buffer where this strip starts */
        dar1 = (uint32_t)(buf_phy_addr + cur_strip->start_row * bytes_per_row);
        
        /* +1 to convert from index to count */
        /* 16-June-05 ; ()'s are important! Strip indices can be negative numbers */
        drcr1 = (cur_strip->end_row - cur_strip->start_row + 1)* bytes_per_row;
        
        printk("dar1=%08x\n, size=%d\n", dar1, drcr1);
        
        //setup descriptor for current strip
        retval = set_descriptor_direct(descriptor, strip_cnt, drcr1, dar1, flags);
        printk("%s.%d\n", __func__, __LINE__);
        if(retval){
            printk(KERN_ERR "%s; set_descriptor_direct fails at strip_cnt=%d, dar1=%x drcr1=%d\n",
                   __func__, strip_cnt,dar1, drcr1);
            return retval;
        }
        strip_cnt++;
        
        BUG_ON(strip_cnt > num_descriptors);
    }
    BUG_ON(strip_cnt != num_descriptors);
    
    return 0;
}

struct strippr_option {
    /* RAPR or Linear */
    int xy_scaler_mode;
    
    /* fraction representing how much to scale Y (e.g., 1/3, 7/22, 355/113) */
    uint32_t y_numerator;
    uint32_t y_denominator;
    
    /* use MultiFilter eats 2 or 4 lines depending on radius */
    bool use_mf;
    
    /* MultiFilter window can be 3x3 or 5x5 (maybe higher by the time you read
     * this)
     */
    int mf_radius;
    
    /* ColorShift eats 2 lines */
    bool use_cs;
    
    /* how many rows to run through strip predictor */
    uint32_t expected_rows;
};

static void pie_xy_calc_fracy( int y_numerator, int y_denominator, int scaler_mode, int *fracY )
{
    int y_scale;
    
    if( scaler_mode==PIE_XY_SCALER_LINEAR || scaler_mode==PIE_XY_SCALER_CUBIC) {
        y_scale = ((1<<16) * y_denominator) / y_numerator;
    }
    else {
        y_scale = ((1<<16) * y_numerator) / y_denominator;
    }
    
    *fracY = PIE_SCALE_SCALEY_SCALE(y_scale);
}


/* Find rows data for each input output strips, return number of strips*/
static int calc_strip_data( struct strippr_option options,
                            int *strip_size,
                            struct strip_info **ret_in_strip_head,
                            struct strip_info **ret_out_strip_head,
                            int *total_rows_required,
                            int *total_rows_generated)
{
    
    int pie_fracY;
    int xy_strip_size, pie_strip_size_out;
    int extra_rows_for_mf;
    int extra_rows_for_cs;
    struct predict_state strip_state;
    int strippr_mode;
    struct strip_info *in_strip_cur = 0;
    struct strip_info *out_strip_cur = 0;
    struct strip_info *tmp_strip;
    
    int num_strips;
    int last_row_index;
    int prev_strip[4];
    uint32_t read_overlap;
    char read_overlap_str[64];
    struct predict_state *st=&strip_state;
    int expected_rows = options.expected_rows;
    int strip[4];
    
    pie_xy_calc_fracy( options.y_numerator, options.y_denominator,
                       options.xy_scaler_mode, &pie_fracY );
    
    /*If DSMF is enabled, the number of rows in each input strip should be increased by 10, and
     * the row number of the 1st line in each input strip after the first one should be
     * decreased by 10
     *
     */
    extra_rows_for_mf = 0; //by pass mf (Multi filter)
    
    /*If Color Shift is enabled in 2-line mode, the number of rows in each inout strip should
     * increased by 1 and the row number of the 1st line in each input after the first one should
     * be decreaed by 1. If color shift is enabled in 3-line mode, the number of rows in each
     * input strip should be increased by 2 and the row number  of the 1st line in
     * each input strip after the first one should be decreased by 2.
     * 88PA6270RA PG p1300
     */
    extra_rows_for_cs = 0; //by pass cs (Color Shift)
    
    /* start big, try to find the best possible fit */
    xy_strip_size = (PIE_RDMA_MAX_ROWS - extra_rows_for_mf) - extra_rows_for_cs;
    pie_xy_calc_stripsize( &xy_strip_size, pie_fracY,
                           options.xy_scaler_mode, &pie_strip_size_out );
    
    printk(KERN_INFO "strippr xy_strip_size=%d y=%d/%d fracY=%d\n", xy_strip_size,
           options.y_numerator, options.y_denominator, pie_fracY );
    
    *strip_size = xy_strip_size;
    
    strippr_mode = STRIPPR_MODE_RAPR;
    if (options.xy_scaler_mode == PIE_XY_SCALER_RAPR)
    {
        strippr_mode = STRIPPR_MODE_RAPR;
    }
    else if (options.xy_scaler_mode == PIE_XY_SCALER_LINEAR)
    {
        strippr_mode = STRIPPR_MODE_LINEAR;
    }
    else if (options.xy_scaler_mode == PIE_XY_SCALER_CUBIC)
    {
        strippr_mode = STRIPPR_MODE_CUBIC;
    }
    else
    {
        printk(KERN_ERR " invalid options.xy_scaler_mode=%d\n", options.xy_scaler_mode);
        BUG_ON(0);
    }
    
    strip_start( &strip_state, xy_strip_size,
                 options.y_numerator, options.y_denominator, pie_fracY, strippr_mode );
    
    /* Below is from find_last_strip()
     */
    
    printk(KERN_INFO "%s: rows to processed %d\n", __FUNCTION__, expected_rows );
    
    /* run the predictor until we hit a state where the expected_rows'th row
     * is included
     */
    
    strip[0] = strip[1] = strip[2] = strip[3] = 0;
    
    /* -1 to convert from count to index */
    last_row_index = expected_rows - 1;
    
    num_strips = 0;
    
    memcpy(prev_strip, strip, sizeof(prev_strip));
    
    while(1)
    {
    	if (!st->cubic)
            strip_next( st, strip );
    	else
            strip_next_cubic(st, strip);
        
        /* +1 to convert from index to count */
        read_overlap = (prev_strip[STRIP_READ_LAST] - strip[STRIP_READ_FIRST])+1;
        
        /* don't bother with overlap on first strip */
        memset( read_overlap_str, 0, sizeof(read_overlap_str) );
        if (num_strips)
        {
            snprintf( read_overlap_str, 63, "%d", read_overlap );
        }
        else
        {
            /* there is no previous strip so don't write a number which could
             * be confusing to me
             */
            strncpy( read_overlap_str, "(no)", 63 );
        }
        
        printk(KERN_INFO "strip %d=[%d,%d][%d,%d] readlen=%d read_overlap=%s writelen=%d\n",
               num_strips,
               strip[STRIP_READ_FIRST], strip[STRIP_READ_LAST],
               strip[STRIP_WRITE_FIRST], strip[STRIP_WRITE_LAST],
               strip[STRIP_READ_LAST]-strip[STRIP_READ_FIRST]+1,
               
               read_overlap_str ,
               strip[STRIP_WRITE_LAST]-strip[STRIP_WRITE_FIRST]+1
            );
        //Store the strip result
        tmp_strip = kmalloc(sizeof (struct strip_info), GFP_KERNEL);
        BUG_ON(tmp_strip == 0);
        tmp_strip->start_row =  strip[STRIP_READ_FIRST];
        tmp_strip->end_row =  strip[STRIP_READ_LAST];
        tmp_strip->next = 0;
        if (in_strip_cur==0)
        {
            in_strip_cur = tmp_strip;
            *ret_in_strip_head = in_strip_cur;
        }
        else
        {
            in_strip_cur->next = tmp_strip;
            in_strip_cur = tmp_strip;
        }
        
        tmp_strip = kmalloc(sizeof (struct strip_info), GFP_KERNEL);
        BUG_ON(tmp_strip == 0);
        tmp_strip->start_row =  strip[STRIP_WRITE_FIRST];
        tmp_strip->end_row =  strip[STRIP_WRITE_LAST];
        tmp_strip->next = 0;
        if(out_strip_cur == 0)
        {
            out_strip_cur = tmp_strip;
            *ret_out_strip_head = out_strip_cur;
        }
        else
        {
            out_strip_cur->next = tmp_strip;
            out_strip_cur = tmp_strip;
        }
        num_strips += 1;
        
        /* beware of stupid mistakes */
        BUG_ON( num_strips >= 100000 );
        
        /* does this strip contain the last row? */
        if (strip[STRIP_READ_FIRST] <= last_row_index
            && strip[STRIP_READ_LAST] >= last_row_index)
        {
            break;
        }
        memcpy(prev_strip, strip, sizeof(prev_strip));
    }
    
    printk(KERN_NOTICE "%s done strip=[%d,%d][%d,%d] num_strips=%d\n", __FUNCTION__,
           strip[STRIP_READ_FIRST], strip[STRIP_READ_LAST],
           strip[STRIP_WRITE_FIRST], strip[STRIP_WRITE_LAST],
           num_strips );
    *total_rows_required = strip[STRIP_READ_LAST]+1;
    *total_rows_generated = strip[STRIP_WRITE_LAST]+1;
    return num_strips;
}



//DECLARE_COMPLETION(test_odma_interrupt_complete);
DECLARE_COMPLETION(test_idma_interrupt_complete);
static int test_odma_interrupt_callback(void *stuff)
{
    struct odma_interrupt_info *odma_data;
    
    odma_data = stuff;
    if (odma_data != NULL)
    {
        printk("%s: EOS=%d, DESC=%d, CC=%d, Own=%d, LE=%d, OORE=%d RRERR=%d BRERR=%d\n",
               __func__, odma_data->EndOfStrip, odma_data->Desc,
               odma_data->ClearComplete, odma_data->Own, odma_data->LengthErr, odma_data->OutOfRangeErr,
               odma_data->RRespErr, odma_data->BRespErr);
//        if(odma_data->EndOfStrip)
//        {
//            complete(&test_odma_interrupt_complete);
//        }
    }
    return 0;
}

static int test_idma_interrupt_callback(void *stuff)
{
    struct idma_interrupt_info *idma_data;
    
    idma_data = stuff;
    if (idma_data != NULL)
    {
        printk("%s: instance=%d, EOS=%d, DESC=%d, CC=%d, Own=%d, LE=%d, OORE=%d RRERR=%d BRERR=%d\n",
               __func__, idma_data->instance, idma_data->EndOfStrip, idma_data->Desc,
               idma_data->ClearComplete, idma_data->Own, idma_data->LengthErr, idma_data->OutOfRangeErr,
               idma_data->RRespErr, idma_data->BRespErr);
        if (idma_data->EndOfStrip)
        {
            complete(&test_idma_interrupt_complete);
        }
    }
    return 0;
}

static void dump_mono_data(uint8_t *buf, int height, int width)
{
    int i,j;
    for (i = 0;i < height;i++)
    {
        printk("\n------------row %d ---------------", i+1);
        for (j = 0; j < width; j++)
        {
            if ((j%32) == 0)
            {
                printk("\n[%d]", j);
            }
            printk("%02x ", *(buf + i*width + j));
        }
        printk("\n-----------------------------------");
    }
}

static void dump_color_data(uint8_t *buf, int height, int width, int fmt, int color_swap)
{
    int i,j;
    int bpp = 4;
    uint32_t *p;
//	int high_x = 1;
    
    if (fmt == POGO_FMT_RGB)
        bpp = 3;
    else
    {
        bpp = 4;
#if 0
        if(fmt == POGO_FMT_XRGB)
            if(color_swap==0)
                high_x = 1;
            else
                high_x = 0;
        else if(fmt==POGO_FMT_RGBX)
            if(color_swap==0)
                high_x = 0;
            else
                high_x = 1;
#endif
    }
    
    for (i = 0;i < height;i++)
    {
        printk("\n------------row %d ---------------", i+1);
        for (j = 0; j < width; j++)
        {
            if ((j%16) == 0)
            {
                printk("\n[%d]", j);
            }
            p = (uint32_t *)(buf + (i*width*bpp) + (j*bpp));
            if (bpp == 3)
            {
                printk("%06x ", *p & 0x00FFFFFF);
            }
            else
            {
                printk("%08x ", *p);
            }
        }
        printk("\n-----------------------------------");
    }
}


/*Make pattern for input buffer*/
void make_pattern(uint8_t *buf, int height, int width, int pattern)
{
    //int i, j;
    BUG_ON(height < 12);
    BUG_ON(width <  12);
    //clear all
    memset(buf,0, width*height);
    switch(pattern)
    {
    case 1:
        //Build a square
        //Line 4,5 is filled from 4th to end-4
        memset(buf + width*3 + 4, 0x55, width-8);
        memset(buf + width*4 + 4, 0x55, width-8);
        //Line 6,7,8,9 has two lines filled at beginning and end
        memset(buf + width*5 + 4, 0x55, 2);
        memset(buf + width*5 + width-10, 0x55, 2);
        memset(buf + width*6 + 4, 0x55, 2);
        memset(buf + width*6 + width-10, 0x55, 2);
        memset(buf + width*7 + 4, 0x55, 2);
        memset(buf + width*7 + width-10, 0x55, 2);
        memset(buf + width*8 + 4,        0x55, 2);
        memset(buf + width*8 + width-10, 0x55, 2);
        //Line 10,11 filled from 4th end end-4
        memset(buf + width*9 + 4,       0x55,    width-8);
        memset(buf + width*10 + 4,      0x55,    width-8);
        break;

    case 2:
        //Build a square
        //Line 4,5 is filled from 4th to end-4
        memset(buf + width*3 + 4, 0x60, width-8);
        memset(buf + width*4 + 4, 0x60, width-8);
        //Line 6,7,8,9 has two lines filled at beginning and end
        memset(buf + width*5 + 4, 0x60, 2);
        memset(buf + width*5 + width-10, 0x60, 2);
        memset(buf + width*6 + 4, 0x60, 2);
        memset(buf + width*6 + width-10, 0x60, 2);
        memset(buf + width*7 + 4, 0x60, 2);
        memset(buf + width*7 + width-10, 0x60, 2);
        memset(buf + width*8 + 4,        0x60, 2);
        memset(buf + width*8 + width-10, 0x60, 2);
        //Line 10,11 filled from 4th end end-4
        memset(buf + width*9 + 4,       0x60,    width-8);
        memset(buf + width*10 + 4,      0x60,    width-8);
        break;
        
    case 3:
        //Build a square
        //Line 4,5 is filled from 4th to end-4
        memset(buf + width*3 + 4, 0x08, width-8);
        memset(buf + width*4 + 4, 0x08, width-8);
        //Line 6,7,8,9 has two lines filled at beginning and end
        memset(buf + width*5 + 4, 0x08, 2);
        memset(buf + width*5 + width-10, 0x08, 2);
        memset(buf + width*6 + 4, 0x08, 2);
        memset(buf + width*6 + width-10, 0x08, 2);
        memset(buf + width*7 + 4, 0x08, 2);
        memset(buf + width*7 + width-10, 0x08, 2);
        memset(buf + width*8 + 4,        0x08, 2);
        memset(buf + width*8 + width-10, 0x08, 2);
        //Line 10,11 filled from 4th end end-4
        memset(buf + width*9 + 4,       0x08,    width-8);
        memset(buf + width*10 + 4,      0x08,    width-8);
        break;
        
    default:
        //Middles lines (height-8) at 55
        memset(buf+width*4, 0x55, width*(height-8));
        break;
    }
    
    
#if 0
    for (i = 0; i < 4; i++)
    {
        for (j = 0;j < width;i++)
            buf[i*width + j] = 0;
    }
    //Middle rows all 55
    for (i = 4; i < height-8; i++)
    {
        for (j = 0; j< width; j++)
            buf[i*width + j] = 55;
    }
    //Last rows all 0
    for (i = height-8; i < height; i++)
    {
        for (j = 0;j < width;j++)
            buf[i*width + j] = 0;
    }
#endif
    
}
/*
 * Setup and enable XY scaler block
 */
int xyscaler_config(struct pie_handle_t *pie_handle,
                    int scale_mode,
                    struct pie_scale scale,
                    int pixels_out)
{
    
    int x_edge_pixels, y_edge_pixels;
    int fills[]={0xFF, 0xFF, 0xFF};
    int scale_tx, scale_ty;
    /*
     *
     * Setup xy-scaler, taken from function pie_xy_scale
     *
     */
    
    //horizontal and vertical scale setup
    pie_xyscale_set_scale(pie_handle, scale.x_scale, scale.y_scale);
    pie_xyscale_set_column_cnt(pie_handle, pixels_out);
    /* 0xff is black for laser, white for RGB color scans, and (I think) white for
     * ink copies
     */
    
    pie_xyscale_set_fill_color(pie_handle, fills[0], fills[1], fills[2]);
    
    /* 26-May-05 davep ; set to full roundup */
    /* davep 23-Jun-2006 ; XXX I did this back when I was shotgun debugging
     * scaler problems. Will this break Linear?
     */
    pie_xyscale_set_roundup(pie_handle, PIE_SCALE_RUP_ALL);
    
    /* davep 15-Jun-2006 ; turn on linear mode and hope for the best... */
    if (scale_mode==PIE_XY_SCALER_LINEAR)
    {
        pie_xyscale_set_linear_mode(pie_handle, PIE_LINEAR_MODE);
        printk(KERN_NOTICE "%s:%d\n", __func__,__LINE__);
        
        /* four other registers need configuration in Linear mode plus we need to
         * set up the Edge Pixels field in the SCALEX register
         */
        /* edge = ceiling( (scale-1)/2 ) */
        x_edge_pixels = pie_xy_calc_linear_mode_edge_pixels(scale.x_numerator,
                                                            scale.x_denominator);
        BUG_ON(x_edge_pixels <0 || x_edge_pixels >15);
        printk(KERN_NOTICE "%s:x_edge_pixels=%d\n", __func__, x_edge_pixels);
        
        pie_xyscale_set_edge_pixel_cnt(pie_handle, x_edge_pixels);
        
        //        dbg2( "x_edge_pixels=%d x_scale=%d\n", x_edge_pixels, x_scale );
        
        y_edge_pixels = pie_xy_calc_linear_mode_edge_pixels(scale.y_numerator,
                                                            scale.y_denominator);
        printk(KERN_NOTICE "%s:y_edge_pixels=%d\n", __func__, y_edge_pixels);
        //        dbg2( "y_edge_pixels=%d y_scale=%d\n", y_edge_pixels, y_scale );
        
        /* Do I know what this formula means? No, I do not. It's straight from the
         * XYScale MA.
         *
         * Use temporaries so I can debug print the results because these two
         * registers are write only.
         */
        scale_tx = x_edge_pixels * scale.x_scale - 32768 + scale.x_scale/2;
        scale_ty = y_edge_pixels * scale.y_scale - 32768 + scale.y_scale/2;
        printk("scale_tx=0x%x scale_ty=0x%x\n", scale_tx, scale_ty );
    }
    else
    {
        /* Disable LM mode so we're now set to RAPR (Running Average Pixel
         * Replication) mode
         */
        pie_xyscale_set_linear_mode(pie_handle, PIE_RAPR_MODE);
        
        /* so we don't have a warning about unused function variables */
        x_edge_pixels = 0;
        y_edge_pixels = 0;
        scale_tx = 0;
        scale_ty = 0;
    }
    pie_xyscale_set_bypass(pie_handle, false);
    
    pie_xyscale_set_transpose(pie_handle, scale_tx, scale_ty);
    
    // done calculating values - now send all configuration registers to the ASIC
    pie_do_configure(pie_handle);
    
    return 0;
}

/*
 * Setup and enable XYC scaler block
 */
int xycscaler_config(struct pie_handle_t *pie_handle,
                     struct pie_scale scale,
                     int strip_heigh_rows,
                     int strip_width_pixels,
                     int pixels_out)
{

    int x_edge_pixels, y_edge_pixels;
    int fills[]={0xFF, 0xFF, 0xFF};
    int scale_tx, scale_ty;
    
    //horizontal and vertical scale setup
    pie_xy_c_scale_set_scale(pie_handle, scale.x_scale, scale.y_scale);
    pie_xy_c_scale_set_output_column_cnt(pie_handle, pixels_out);
    /* 0xff is black for laser, white for RGB color scans, and (I think) white for
     * ink copies
     */
    pie_xy_c_scale_set_fill_color(pie_handle, fills[0], fills[1], fills[2]);
    
    /* edge = ceiling( (scale-1)/2 ) */
    x_edge_pixels = pie_xy_calc_linear_mode_edge_pixels(scale.x_numerator,
                                                        scale.x_denominator);
    BUG_ON(x_edge_pixels <0 || x_edge_pixels >15);
    printk(KERN_NOTICE "%s:x_edge_pixels=%d\n", __func__, x_edge_pixels);
    
    y_edge_pixels = pie_xy_calc_linear_mode_edge_pixels(scale.y_numerator,
                                                        scale.y_denominator);
    printk(KERN_NOTICE "%s:y_edge_pixels=%d\n", __func__, y_edge_pixels);
    
    /* Do I know what this formula means? No, I do not. It's straight from the
     * XYScale MA.
     *
     * Use temporaries so I can debug print the results because these two
     * registers are write only.
     */
    scale_tx = x_edge_pixels * scale.x_scale - 32768 + scale.x_scale/2;
    scale_ty = y_edge_pixels * scale.y_scale - 32768 + scale.y_scale/2;
    printk("scale_tx=0x%x scale_ty=0x%x\n", scale_tx, scale_ty);
    
    pie_xy_c_scale_set_transpose(pie_handle, scale_tx, scale_ty);
    
    pie_xy_c_scale_set_bypass(pie_handle, false);

    pie_do_configure(pie_handle);
    
    return 0;
}


/*
 * main test entry
 */
int run_pie_xyscaler_test(int pogo_fmt_type_in, int pogo_fmt_type_out,
                          int pixels_per_row, int rowheight,
                          int pogo_bpp_input, int color_swap,
                          int scale_num, int scale_den,
                          int scale_mode)
{
    int retval, i;
    int in_buf_size;
    int total_num_pixels;
    int pixels_out;
    int strip_size;
    
    //For input buffer, multiple channels
    dma_addr_t in_buffer_phys_addr[3];
    uint32_t *in_buffer_virt_addr[3];
    struct strip_info *in_strip_head = 0;
    DMA_descriptor *in_device_idma_descriptor_set[3];
    dma_addr_t in_device_idma_descriptor_phys_set[3];
    
    //For output, single channels
    int out_buf_size;
    dma_addr_t out_buffer_phys_addr;
    uint32_t *out_buffer_virt_addr;
    struct strip_info *out_strip_head = 0;
    DMA_descriptor *out_device_odma_descriptor;
    dma_addr_t out_device_odma_descriptor_phys;
    
    struct strippr_option options;
    int num_strips;
    
    int idma_Bpp, odma_Bpp;
    int num_idma_channels;
    int input_linewidth, output_linewidth;
    
    struct pie_scale scale;
    int extra_pad;
    int expected_total_rows_out;
    int actual_total_rows_out;
    int actual_total_rows_in;
    
    struct pie_handle_t *pie_handle;
    
    printk("%s is called! scale_mode=%d, scale=%d/%d\n", __func__,
           scale_mode, scale_num, scale_den);
    
    /* Verify parameters for only supported settings*/
    if (pogo_fmt_type_in != POGO_FMT_MONO &&
        pogo_fmt_type_in != POGO_FMT_PLANAR)
    {
        printk(KERN_ERR "No support for pogo format input type= %d. Only support MONO and PLANAR. Abort!",pogo_fmt_type_in);
        return -1;
    }
    
    if (pogo_bpp_input!=POGO_8BPP)
    {
        printk(KERN_ERR "No support for pogo format input bpp= %d. Only support 8BPP. Abort!",pogo_bpp_input);
        return -1;
    }
    
    
    /* davep 07-Oct-2013 ; Linear mode HW only works down to 1/2. If we're
     * scaling down, we force RAPR. If we're scaling up, we usually want Linear
     * but RAPR also works. Verify to avoid confusing math errors deeper in
     * the PIE setup code.
     *
     * Eric: There are asserts in various places,so just follow the practice.
     */
    if (scale_num < scale_den)
    {
        if (PIE_XY_SCALER_RAPR != scale_mode)
        {
            printk("Only PIE_XY_SCALER_RAPR is expected for scaling down, abort\n");
            return -1;
        }
    }
    
    
    pie_pogo_fmt_type_parms(pogo_fmt_type_in, pogo_fmt_type_out, pogo_bpp_input,
                            &idma_Bpp, &odma_Bpp, &num_idma_channels);
    
    if ((pixels_per_row*idma_Bpp )% 4!= 0)
    {
        printk(KERN_ERR "bytes_perow(=pixels_per_row*idma_Bpp) must be 32-bit aligned, pixels_epr_row=%d idma_Bpp=%d\n!",
               pixels_per_row, idma_Bpp);
        printk(KERN_ERR "Abort!\n");
        return -2;
    }
    BUG_ON(num_idma_channels > 3);
    
    memset(&scale, 0, sizeof(struct pie_scale));
    scale.x_numerator =  scale_num;
    scale.x_denominator = scale_den;
    scale.y_numerator =  scale_num;
    scale.y_denominator = scale_den;
    
    /* calculate the integer representation of the scale factors */
    pie_xy_calc_xyscale(&scale, scale_mode);
    
    /* how many pixels are we going to get out? */
    pie_xy_calc_expected_pixels(pixels_per_row, scale.x_scale, scale_mode, &pixels_out);
    
    /* add in any extra pad bytes we might need (e.g., for HalfPack) */
    /*Assuming we bypass all the blocks except xy scaler*/
    extra_pad = 0;
    pixels_out += extra_pad;
    
    /* make sure each output row is quadword DMA aligned */
    //    *pixels_out = ((*pixels_out+15) / 16) * 16;
    pixels_out = ICE_DMA_ALIGN_ME(pixels_out);
    
    //Expected row out, the actual row out may not be the same
    pie_xy_calc_expected_rows( rowheight, scale.y_scale, scale_mode, &expected_total_rows_out );
    
    printk(KERN_NOTICE "%s: input rows =%d input pixels_per_row=%d idma_Bpp=%d \n",
           __func__, rowheight, pixels_per_row, idma_Bpp);
    
    printk(KERN_NOTICE "%s: expected (not actual) total_rows_out=%d, pixels_out(per row) is %d, odma_Bpp=%d\n",
           __func__, expected_total_rows_out, pixels_out, odma_Bpp);
    
    /*
     * Run strip predictor
     */
    options.y_numerator = scale_num;
    options.y_denominator = scale_den;
    options.xy_scaler_mode = scale_mode;//PIE_XY_SCALER_XXXX;
    options.use_mf = false; //bypass
    options.use_cs = false; //bypass
    options.expected_rows = rowheight; //total input rows
    
    num_strips = calc_strip_data(options, &strip_size, &in_strip_head, &out_strip_head,
                                 &actual_total_rows_in,
                                 &actual_total_rows_out);
    printk(KERN_NOTICE "num_strips is found to be %d, strip_size is %d, "
           "actual rows in is %d ; actual rows out is %d",
           num_strips, strip_size,
           actual_total_rows_in, actual_total_rows_out);
    
    
    /* Need a buffer that can hold all input data, and the extra rows required by the last strip. This requires
     * us to use the actual_total_rows_in obtained from predictor
     */
    
    //Create input buffer, note the num_idam_channels
    total_num_pixels = actual_total_rows_in * pixels_per_row;
    in_buf_size = total_num_pixels*idma_Bpp;
    for (i = 0;i < num_idma_channels; i++)
    {
        printk(KERN_NOTICE "going to call dma_alloc_coherent with in_buf_size= %d\n",in_buf_size);
        in_buffer_virt_addr[i] = dma_alloc_coherent(NULL, in_buf_size, &in_buffer_phys_addr[i], GFP_DMA);
        
        if (in_buffer_virt_addr[i] == 0)
        {
            printk(KERN_ERR "%s: Fail to allocate coherent buffer for input buf size=%d for channel=%d\n",
                   __func__, in_buf_size, i);
            return -1;
        }
        BUG_ON(((uint32_t)in_buffer_phys_addr[i] & 0x3) != 0);
    }
    
    //make_pattern only works on one-byte data
    BUG_ON(idma_Bpp!=1);
    for (i = 0; i < num_idma_channels; i++)
    {
        make_pattern((uint8_t*)in_buffer_virt_addr[i], actual_total_rows_in, pixels_per_row, 1);
        printk("Dump Pattern for Input channel %d\n", i);
        dump_mono_data((uint8_t *)in_buffer_virt_addr[i], actual_total_rows_in, pixels_per_row);
    }
    
    pie_do_reset();
    msleep(100);  // give the asic time for a nice reset
    pie_do_clear_all_irqs();  // clear any interrupts

    // create a pie instance
    pie_handle = pie_create_new_default_handle();
    
    pie_bypass_all_pie_subblocks(pie_handle);
    /*The above bypass_all may not implemented yet, do it myself*/
    pie_xyscale_set_bypass(pie_handle, true);
    pie_xy_c_scale_set_bypass(pie_handle, true);
    
    /*
     * Setup output buffer
     * Note output buffer size is calculated based on actual row out and pixels out
     *
     */
    out_buf_size = (actual_total_rows_out) * pixels_out * odma_Bpp * num_idma_channels;
    
    printk(KERN_NOTICE "%s: going to call dma_alloc_coherent out_buf_size %d\n",
           __func__, out_buf_size);
    
    out_buffer_virt_addr = dma_alloc_coherent(NULL, out_buf_size, &out_buffer_phys_addr, GFP_DMA);
    if (out_buffer_virt_addr == 0)
    {
        printk(KERN_ERR "%s: Fail to allocate memory for output buf, size=%d\n",
               __func__, out_buf_size);
        return -1;
    }
    // set the memory to avoid accidently thinking we got output data
    memset(out_buffer_virt_addr, 0xFE, out_buf_size);
    // data buffers have to be 4 byte aligned
    BUG_ON(((uint32_t)out_buffer_virt_addr & 0x3) != 0);
    printk(KERN_NOTICE "output buffer physaddr=0x%p, virtaddr=0x%p\n", (void *) out_buffer_phys_addr, (void *) out_buffer_virt_addr);
    
    retval = generate_test_odma_descriptors(num_strips,
                                            pixels_out*odma_Bpp,
                                            out_buffer_phys_addr,
                                            out_strip_head,
                                            &out_device_odma_descriptor,
                                            &out_device_odma_descriptor_phys);
    if (retval)
    {
        printk(KERN_ERR "%s call to generate_test_odma_descriptors failed %d\n", __func__, retval);
        return retval;
    }
    
    for (i = 0;i < num_idma_channels; i++)
    {
        retval= generate_test_idma_descriptors(num_strips, pixels_per_row*idma_Bpp,
                                               in_buffer_phys_addr[i], in_strip_head,
                                               &(in_device_idma_descriptor_set[i]),
                                               &(in_device_idma_descriptor_phys_set[i]));
        if (retval)
        {
            printk(KERN_ERR "%s calls to generate_test_odma_descriptors failed channel=%d retval=%d\n",
                   __func__, i, retval);
            return retval;
        }
    }
    
    // input and output linewidth can differ for planar input
    // must be in bytes, not pixels
    input_linewidth = pixels_per_row * idma_Bpp ;
    
    output_linewidth = pixels_out * odma_Bpp;
    printk(KERN_NOTICE "%s: input_linewidth=%d output_linewidth=%d\n", __func__, input_linewidth, output_linewidth);
    
    // NOTE that pie pogo output is only allowed to be 8bpp - 16bpp is input only
    retval = setup_pogo_output(pie_handle, pogo_fmt_type_out, POGO_8BPP, color_swap,
                               output_linewidth, 0);
    if (retval < 0)
        return retval;
    
    /* Note to strip size to indicates column height in a strip*/
    setup_pogo_input(pie_handle, pogo_fmt_type_in, num_idma_channels, pogo_bpp_input,
                     color_swap, input_linewidth, strip_size);

    // register callbacks before enabling ints
    //    pie_register_idma_callback(pie_handle, NULL);
    pie_register_idma_callback(pie_handle, test_idma_interrupt_callback, NULL);
    pie_register_odma_callback(pie_handle, test_odma_interrupt_callback, NULL);
    pie_register_common_callback(pie_handle, NULL, NULL);

    // enable odma, idma interrupts
    pie_enable_pogo_odma_irqs(pie_handle, NULL, true);
    for (i=0; i<num_idma_channels; i++)
        pie_enable_pogo_idma_irqs(pie_handle, NULL, i, true);    

    // enable all pie common interrupts
    pie_enable_common_irqs(pie_handle, NULL, true);
    
    if(scale_mode != PIE_XY_SCALER_CUBIC)
    	xyscaler_config(pie_handle, scale_mode, scale, pixels_out); 
    else
    	xycscaler_config(pie_handle, scale, strip_size, pixels_per_row,
                         pixels_out);

    // get the odmas running, nothing happens until idma does something
    printk("Starting odma address = 0x%X\n", out_device_odma_descriptor_phys);
    pie_start_pogo_output_dma(out_device_odma_descriptor_phys);

    // uncomment the next 2 lines for register debug before the start of the test
    pie_do_get_current(pie_handle);
    // pie_dump_handle_regs(pie_handle, 1);
    pie_dump_handle_regs(pie_handle, 3);    
    
    // start the test!
    for (i=0; i<num_idma_channels; i++)
    {
        printk("Starting idma channel %d, address = 0x%X\n", i, in_device_idma_descriptor_phys_set[i]);
        pie_start_pogo_input_dma(in_device_idma_descriptor_phys_set[i], i);
    }
    
    printk("%s: test started, waiting for completion\n", __func__);
    
//    wait_for_completion_interruptible(&test_odma_interrupt_complete);
    wait_for_completion_interruptible(&test_idma_interrupt_complete);    
    
    printk("test completion\n");
    
    pie_do_get_current(pie_handle); // update the pie handle from the current ASIC values
    //Dump to see registers
    if (scale_mode != PIE_XY_SCALER_CUBIC)
    	pie_xyscale_dump_handle_regs(pie_handle);
    else
    	pie_xy_c_scale_dump_handle_regs(pie_handle);
    
    printk("................OUTPUT DATA DUMP.............\n");
    if(pogo_fmt_type_out == POGO_FMT_MONO)
    	dump_mono_data((uint8_t *)out_buffer_virt_addr,
                       actual_total_rows_out, pixels_out);
    else
    	dump_color_data((uint8_t *)out_buffer_virt_addr,
    			actual_total_rows_out, pixels_out,
    			pogo_fmt_type_out, color_swap);
    
    
    printk(KERN_NOTICE "To free memeory\n");
    if(scale_mode!=PIE_XY_SCALER_CUBIC)
       	pie_xyscale_set_bypass(pie_handle, true);
    else
       	pie_xy_c_scale_set_bypass(pie_handle, true);
    
    // free the memory buffers, then delete the descriptors that point to them
    //cleanup data buffers
#if 0
    dma_free_coherent(NULL, in_buf_size, in_buffer_virt_addr, in_buffer_phys_addr);
#else
    for(i=0;i < num_idma_channels; i++)
    	dma_free_coherent(NULL, in_buf_size, in_buffer_virt_addr[i], in_buffer_phys_addr[i]);
#endif
    
    dma_free_coherent(NULL, out_buf_size, out_buffer_virt_addr, out_buffer_phys_addr);
    
    // cleanup the IDMA descriptors
    for (i = 0;i < num_idma_channels;i++)
    {
        destroy_descriptors(num_strips,
                            in_device_idma_descriptor_set[i],
                            in_device_idma_descriptor_phys_set[i],
                            NULL);
    }
    
    //cleanup odma, single channel
    destroy_descriptors(num_strips, out_device_odma_descriptor, out_device_odma_descriptor_phys, NULL);
    
    printk(KERN_NOTICE "Scaler Test Done\n");

    return 0;
}


/*
 * register test entry
 */
int run_pie_xyscaler_register_test(void)
{
    struct pie_handle_t *pie_handle;
    
    pie_handle = pie_create_new_default_handle();
    
    printk("\n initial register\n");
    pie_do_get_current(pie_handle);
    pie_xyscale_dump_handle_regs(pie_handle);
    
    printk("\nset mode to 0\n");
    pie_xyscale_set_linear_mode(pie_handle, PIE_RAPR_MODE);
    pie_do_configure(pie_handle);  // write all regs down to the ASIC
    pie_do_get_current(pie_handle);
    pie_xyscale_dump_handle_regs(pie_handle);
    if (pie_xyscale_get_linear_mode(pie_handle) == PIE_RAPR_MODE)
        printk("set mode to 0 passed\n");
    else
    {
        printk("ERROR, set mode to 0 failed\n");
        return -1;
    }
    
    printk("\nset mode to 1\n");
    pie_xyscale_set_linear_mode(pie_handle, PIE_LINEAR_MODE);
    pie_do_configure(pie_handle);
    pie_do_get_current(pie_handle);
    pie_xyscale_dump_handle_regs(pie_handle);
    if (pie_xyscale_get_linear_mode(pie_handle) == PIE_LINEAR_MODE)
        printk("set mode to 1 passed\n");
    else
    {
        printk("ERROR, set mode to 1 failed\n");
        return -1;
    }
    
    printk("\n set mode back to 0 again\n");
    pie_xyscale_set_linear_mode(pie_handle, PIE_RAPR_MODE);
    pie_do_configure(pie_handle);
    pie_do_get_current(pie_handle);
    pie_xyscale_dump_handle_regs(pie_handle);
    if (pie_xyscale_get_linear_mode(pie_handle) == PIE_RAPR_MODE)
        printk("set mode to 0 passed\n");
    else
    {
        printk("ERROR, set mode to 0 failed\n");
        return -1;
    }
    
    printk("\nTurn off bypass\n");
    pie_xyscale_set_bypass(pie_handle, false);
    pie_do_configure(pie_handle);
    pie_do_get_current(pie_handle);
    pie_xyscale_dump_handle_regs(pie_handle);
    if (pie_xyscale_get_bypass(pie_handle) == false)
        printk("set bypass to 0 passed\n");
    else
    {
        printk("ERROR, set bypass to 0 failed\n");
        return -1;
    }
    
    printk("\nTurn on bypass\n");
    pie_xyscale_set_bypass(pie_handle, true);
    pie_do_configure(pie_handle);
    pie_do_get_current(pie_handle);
    pie_xyscale_dump_handle_regs(pie_handle);
    if (pie_xyscale_get_bypass(pie_handle) == true)
        printk("set bypass to 1 passed\n");
    else
    {
        printk("ERROR, set bypass to 1 failed\n");
        return -1;
    }
    printk("%s: ALL TESTS PASSED\n", __func__);
    return 0;
}

int run_pie_xycscaler_register_test(void)
{
    struct pie_handle_t *pie_handle;
    
    pie_handle = pie_create_new_default_handle();

    printk("\n initial XYC registers values:\n");
    pie_do_get_current(pie_handle);
    pie_xy_c_scale_dump_handle_regs(pie_handle);
    
    printk("Turn off bypass\n\n");
    pie_xy_c_scale_set_bypass(pie_handle, false);
    pie_do_configure(pie_handle);
    pie_do_get_current(pie_handle);
    pie_xy_c_scale_dump_handle_regs(pie_handle);
    if (pie_xy_c_scale_get_bypass(pie_handle) == false)
        printk("set bypass to 0 passed\n");
    else
    {
        printk("ERROR, set bypass to 0 failed\n");
        return -1;
    }
        
    printk("Turn on bypass\n\n");
    pie_xy_c_scale_set_bypass(pie_handle, true);
    pie_do_configure(pie_handle);
    pie_do_get_current(pie_handle);
    pie_xy_c_scale_dump_handle_regs(pie_handle);
    if (pie_xy_c_scale_get_bypass(pie_handle) == true)
        printk("set bypass to 1 passed\n");
    else
    {
        printk("ERROR, set bypass to 1 failed\n");
        return -1;
    }

    return 0;
}

