/*
**************************************************************************
This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this file,
You can obtain one at http://mozilla.org/MPL/2.0/.

Copyright (c) 2005-2016, Marvell International Ltd.

Alternatively, this software may be distributed under the terms of the GNU
General Public License Version 2, and any use shall comply with the terms and
conditions of the GPL.  A copy of the GPL is available at
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html

THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
ARE EXPRESSLY DISCLAIMED.  The GPL license provides additional details about
this warranty disclaimer.
******************************************************************************
*/



#include <string.h>
#include <math.h>

#include "scos.h"

#include "lassert.h"
#include "interrupt_api.h"
#include "memAPI.h"
#include "cpu_api.h"
#include "regAddrs.h"

#include "scantypes.h"
#include "scancore.h"
#include "scandbg.h"
#include "scan.h"

#include "pie_handle_if.h"
#include "pie_if.h"
#include "pie_convenience_if.h"
#include "scansen.h"
#include "scanvars.h"
#include "pie.h"
#include "piereset.h"
#include "safeint.h"

/* Define to turn on more verbose debugging */
#define PIE_DEBUG  

#ifdef PIE_DEBUG
  #define pie_dbg2 dbg2 
#else
  #define pie_dbg2(...) 
#endif


uint32_t pie_interrupt_disable(void)
{
    pie_disable_pie_common_irq(NULL);

    // FIXME: we have no idea if it was enabled ...
    return 1;
}

void pie_interrupt_enable(void)
{
     pie_enable_pie_common_irq(NULL);
}


/**
 * \brief  convenience functions to return the number of bits/bytes in
 * a specific PIE pixel type
 *
 */

int pie_get_bytes_per_pixel(pie_pixel_t ppt)
{
    int bytes_per_pixel = 0;
    int pogo_fmt_type_out = -1;

    switch(ppt)
    {
    case PIE_PIXEL_MONO:
        pogo_fmt_type_out = POGO_FMT_MONO;
        break;
    case PIE_PIXEL_3_PLANES:
        pogo_fmt_type_out = POGO_FMT_PLANAR;
        break;
    case PIE_PIXEL_XRGB :
        pogo_fmt_type_out = POGO_FMT_XRGB;
        break;
    case PIE_PIXEL_RGBX :
        pogo_fmt_type_out = POGO_FMT_RGBX;
        break;
    case PIE_PIXEL_RGB :
        pogo_fmt_type_out = POGO_FMT_RGB;
        break;
    default:
        /* oops - what other modes can there be?  */
        XASSERT( 0, ppt );
        break;
    }
    bytes_per_pixel = pie_pogo_get_output_bytes_per_pixel(pogo_fmt_type_out);
    XASSERT(bytes_per_pixel > 0, bytes_per_pixel);        
    
    return bytes_per_pixel;
}

int pie_get_bits_per_pixel(pie_pixel_t ppt)
{
    return pie_get_bytes_per_pixel(ppt)*8;
}

void pie_pixel_type_to_scan_data_type( pie_pixel_t ppt, scan_data_type *sdt )
{
    *sdt = SCAN_DATA_NULL_TYPE;

    switch( ppt ) {
        case PIE_PIXEL_XRGB :
            *sdt = SCAN_DATA_TYPE_XRGB;
            break;

        case PIE_PIXEL_RGBX :
            *sdt = SCAN_DATA_TYPE_RGBX;
            break;

        case PIE_PIXEL_3_PLANES :
            *sdt = SCAN_DATA_TYPE_PLANE;
            break;

        case PIE_PIXEL_MONO :
            *sdt = SCAN_DATA_TYPE_MONO;
            break;

        case PIE_PIXEL_RGB :
            *sdt = SCAN_DATA_TYPE_RGB;
            break;

        default : 
            /* unknown/unsupported PIE output format */
            XASSERT( 0, ppt );
            break;
    }
}

/**********************************************************
 *
 * CSC Functions
 *
 *********************************************************/


/**
 * \brief Setup the Color Space Conversion blocks
 *
 * RGB2esRGB LUTs (3),
 * esRGB2YCbCr coefficients,
 * and YCbCr2esRGB coefficients
 * Note that the SCCSC isn't set up here - that is done in scanpipe.c's pipe_start_pic_and_pie_only
 **/

void pie_csc_setup(struct pie_handle_t *pie_handle, uint32_t sensor_num)
{
    struct scanvars *sv;

    sv = scanvar_get();

    XASSERT(SCANSEN_MAX_SENSORS > sensor_num, sensor_num);

    //
    /////// Setup RGB2esRGB - Has 3 LUTs. 
    if (sv->use_csc_r_lut[sensor_num])
    {
        pie_rgb2esrgb_set_lut(pie_handle,  sv->csc_r_lut[sensor_num].data, sv->csc_r_lut[sensor_num].num_entries, PIE_CSC_RED_LUT);
        // copy the last value in the LUT to the extraregs - since we white clip, it will match
        pie_rgb2esrgb_set_extraregs(pie_handle, PIE_CSC_RED_LUT, sv->csc_r_lut[sensor_num].data[sv->csc_r_lut[sensor_num].num_entries-1]);
    }
    if (sv->use_csc_g_lut[sensor_num])
    {
        pie_rgb2esrgb_set_lut(pie_handle, sv->csc_g_lut[sensor_num].data, sv->csc_g_lut[sensor_num].num_entries, PIE_CSC_GREEN_LUT);
        // copy the last value in the LUT to the extraregs - since we white clip, it will match
        pie_rgb2esrgb_set_extraregs(pie_handle, PIE_CSC_GREEN_LUT, sv->csc_g_lut[sensor_num].data[sv->csc_g_lut[sensor_num].num_entries-1]);
    }
    if (sv->use_csc_b_lut[sensor_num])
    {
        pie_rgb2esrgb_set_lut(pie_handle, sv->csc_b_lut[sensor_num].data, sv->csc_b_lut[sensor_num].num_entries, PIE_CSC_BLUE_LUT);
        // copy the last value in the LUT to the extraregs - since we white clip, it will match
        pie_rgb2esrgb_set_extraregs(pie_handle, PIE_CSC_BLUE_LUT, sv->csc_b_lut[sensor_num].data[sv->csc_b_lut[sensor_num].num_entries-1]);
    }
    pie_rgb2esrgb_set_bypass(pie_handle, sv->csc_rgb_srgb_bypass);

    //
    // Setup esRGB2YCrCb. (ES2Y) - Has 3x3 matrix but no LUT. 
    pie_esrgb2ycc_set_coeff(pie_handle, sv->csc_rgbycc_matrix);
    pie_esrgb2ycc_set_bypass(pie_handle, sv->csc_srgb_ycc_bypass);

    //
    // Setup YCrCb2esRGB. (Y2ES) - Has 3x3 matrix but no LUT. 
    pie_ycc2esrgb_set_coeff(pie_handle, sv->csc_yccrgb_matrix);
    pie_ycc2esrgb_set_bypass(pie_handle, sv->csc_ycc_rgb_bypass);
}

/**********************************************************
 *
 * MultiFilter Functions
 *
 *********************************************************/
/**
 * \brief setup PIE MultiFilter 
 *
 *
 * \author David Poole
 * \date 19-Sep-2005
 *
 */

void pie_filter_setup(struct pie_handle_t *pie_handle, const struct dsmf_settings *dsmf)
{
    uint32_t i;

    XASSERT( pie_handle != NULL, (uint32_t) pie_handle );

    pie_dsmf_set_bypass(pie_handle, false);

    /* program Ctl register */
    pie_dsmf_setup_ctl(pie_handle, dsmf->dsmf_test_mixed,   dsmf->dsmf_test_halftone,
                                   dsmf->dsmf_test_text,    dsmf->dsmf_test_isolated, dsmf->dsmf_alpha_filter_en,
                                   dsmf->dsmf_filt_idx_sel, dsmf->dsmf_ot_out_sel);

    /* setup segmentation / projection thresholds */
    pie_dsmf_set_proj_thresh(pie_handle,           dsmf->dsmf_proj_horiz_thresh_x55,  dsmf->dsmf_proj_vert_thresh_x55,  dsmf->dsmf_proj_diag_thresh_x25);
    pie_dsmf_set_isolated_thresh(pie_handle,       dsmf->dsmf_isolated_horiz_thresh,  dsmf->dsmf_isolated_vert_thresh,  dsmf->dsmf_isolated_diag_thresh);
    pie_dsmf_set_3x3text_thresh(pie_handle,        dsmf->dsmf_3x3_text_horiz_thresh,  dsmf->dsmf_3x3_text_vert_thresh,  dsmf->dsmf_3x3_text_diag_thresh);
    pie_dsmf_set_3x3mixed_thresh(pie_handle,       dsmf->dsmf_3x3_mixed_horiz_thresh, dsmf->dsmf_3x3_mixed_vert_thresh, dsmf->dsmf_3x3_mixed_diag_thresh);
    pie_dsmf_set_5x5text_thresh(pie_handle,        dsmf->dsmf_5x5_text_horiz_thresh,  dsmf->dsmf_5x5_text_vert_thresh,  dsmf->dsmf_5x5_text_diag_thresh);
    pie_dsmf_set_5x5mixed_thresh(pie_handle,       dsmf->dsmf_5x5_mixed_horiz_thresh, dsmf->dsmf_5x5_mixed_vert_thresh, dsmf->dsmf_5x5_mixed_diag_thresh);
    pie_dsmf_set_line_detection_thresh(pie_handle, dsmf->dsmf_line_horiz_thresh,      dsmf->dsmf_line_vert_thresh,      dsmf->dsmf_line_diag_thresh,
                                                   dsmf->dsmf_line_diag1_thresh,      dsmf->dsmf_line_diag2_thresh);

    /* program sculptor config registers */
    pie_dsmf_setup_sculptor_config(pie_handle, dsmf->mf_scale_10, dsmf->mf_scale_15, dsmf->mf_scale_20,
                                      dsmf->mf_lut_output_scale, dsmf->mf_offset, dsmf->use_mf_erosion, dsmf->use_mf_dilation, dsmf->te_use_5x5_matrix );

    /* setup the filter control lut */
    pie_dsmf_set_filtctl_lut(pie_handle,    dsmf->dsmf_filtctl_conv_filter_coeff_select, dsmf->dsmf_filtctl_ai_lut_y_select,
        dsmf->dsmf_filtctl_ai_lut_c_select, dsmf->dsmf_filtctl_unsharp_mask_enable,      dsmf->dsmf_filtctl_text_enhance_enable, DSMF_FILT_CTRL_LUT_MAX_SIZE);

    /* program the Activity Index lookup tables */
    if( dsmf->use_mf_ai_lut ) {
        pie_dsmf_set_ai_lut0(pie_handle, 
                dsmf->mf_ai_lut0_y.data,    dsmf->mf_ai_lut0_y.num_entries,
                dsmf->mf_ai_lut0_crcb.data, dsmf->mf_ai_lut0_crcb.num_entries);

        pie_dsmf_set_ai_lut1(pie_handle,
                dsmf->mf_ai_lut1_y.data,    dsmf->mf_ai_lut1_y.num_entries,
                dsmf->mf_ai_lut1_crcb.data, dsmf->mf_ai_lut1_crcb.num_entries);
    }

    /* setup varcoef luts */
    for(i = 0; i < DSMF_COEF_NUM_FILTERS; i++)
    {
        pie_dsmf_set_varcoeff_y(pie_handle,    i, dsmf->mf_varcoef_y[i],       DSMF_COEF_MAX_COEFFICIENTS);
        pie_dsmf_set_varcoeff_crcb(pie_handle, i, dsmf->mf_varcoef_chroma[i],  DSMF_COEF_MAX_COEFFICIENTS);
        pie_dsmf_set_filt_mult(pie_handle,     i, dsmf->mf_varcoef_y_mult[i],  dsmf->mf_varcoef_chroma_mult[i]);
        pie_dsmf_set_filt_shift(pie_handle,    i, dsmf->mf_varcoef_y_shift[i], dsmf->mf_varcoef_chroma_shift[i]);
    }

    pie_dsmf_setup_te(pie_handle, dsmf->use_mf_te,
        dsmf->te_count_black_min,  dsmf->te_count_white_min,   dsmf->te_count_white_plus_black_min,
        dsmf->te_center_color_max, dsmf->te_intense_color_max, dsmf->te_black_y_max,
        dsmf->te_black_CbCr_max,   dsmf->te_white_y_min,       dsmf->te_white_CbCr_max);
}


/**********************************************************
 *
 * XYScale Functions
 *
 *********************************************************/
// this code has been ported to this version of the PIE xyscale
// subblock.  Newer versions of the kernel driver may or may not
// be compatible
#define XYSCALE_VERSION 3
#define XYSCALE_CONFIG 0
// xyscale defaults

#define COL_CNT 0
#define FILL0 0
#define FILL1 0
#define FILL2 0    
#define SCALEX 0
#define SCALEY 0
#define XMINUS 0
#define YMINUS 0
#define XTRANS 0
#define YTRANS 0
#define YPER 0x10000
#define EDGE_PIX_CNT 0

// return the pie_handle with our version of defaulted xyscale
static void pie_set_xyscale_defaults(struct pie_handle_t *pie_handle)
{
    int version, config;
    BUG_ON(pie_handle == NULL);
    pie_xyscale_get_fw_rev(pie_handle, &version, &config);
    if ((XYSCALE_VERSION != version) || (XYSCALE_CONFIG != config))
        errprint("%s: warning - different version or config\n", __func__);

    pie_xyscale_set_values(pie_handle, PIE_SCALE_BYPASS, PIE_LINEAR_MODE, PIE_SCALE_RUP_NONE,
                           COL_CNT, FILL0, FILL1, FILL2, SCALEX, SCALEY, XMINUS, YMINUS, 
                           XTRANS, YTRANS, YPER, PIE_NO_REPLICATE, EDGE_PIX_CNT);
}   


/**
 * \brief  ceil() without float
 *
 *  ceil(3) for a 16-bit value that is <<16 into a 32-bit value in order to
 *  make accurate division with only integers. 
 *
 *      uint32_t a,b,c;
 *
 *      # quotient = int(ceil(float(a)/float(b)b));
 *      a = numer<<16;
 *      b = a / denom;
 *      c = f16_ceil();
 *      quotient = c;
 *
 * \author David Poole
 * \date 02-Aug-2013
 */

static uint32_t f16_ceil( uint32_t n )
{
    /* ceil() without float.
     *
     *
     */
    if( n & 0xffff ) {
        //# if there is a fractional part
        //# add 1 to base; clear fractional part
        return (n+(1<<16)) & (~0xffff);
    }
    return n;
}

/**
 * \brief  ceil() without float
 *
 * see f16_ceil() for more detail
 *
 * \author David Poole
 * \date 02-Aug-2013
 */

static uint64_t f32_ceil( uint64_t n) 
{
    uint64_t a,b;

    if( n & (uint64_t)0xffffffff ) {
        //# there is a fractional part
        //# add 1 to base; clear fractional part
        a = (n+(1ull<<32));
        b = a & (~0xffffffffull);
        return (n+(1ull<<32)) & (~0xffffffffull);
    }
    return n;
}


/**
 * \brief  Calculate edge pixels for linear mode scaling.
 *
 * Die floating point, die!
 *
 * \author David Poole
 * \date 01-Aug-2013
 */

uint32_t pie_xy_calc_linear_mode_edge_pixels( uint32_t numerator, 
                                              uint32_t denominator )
{
    uint32_t edge_pixels;
    uint32_t a,b,c,d;

//    dbg2( "%s numer=%d denom=%d\n", __FUNCTION__, numerator, denominator );

    XASSERT(numerator>=denominator,numerator);

    /* from the MA:
     *      edge_pixels = ceiling( (scale-1)/2 ) 
     *
     * former floating point code 
     *    edge_pixels = (int)ceil( ((float)numerator/(float)denominator - 1) / 2.0);
     *
     * Numerator and denominator are a fraction used to determine scale. The
     * numer, denom are limited to 15-bit numbers so we can use 32-bit number
     * for our integer division.
     */

    a = (numerator<<16) / (denominator);
    b = a - (1<<16);
    c = b / (2);
    d = f16_ceil(c);
    edge_pixels = d >> 16;

    XASSERT( edge_pixels>=0 && edge_pixels<=15, edge_pixels );

    return edge_pixels;
}

/**
 * \brief Calculate Linear mode strip size
 *
 *  The PIE XYScale documentation uses floating point division for some of
 *  these calculations. However, Linux kernel has no float. Using a 64-bit
 *  integer to do the division.
 *
 * \author David Poole
 * \date 02-Aug-2013
 */

static uint32_t pie_xy_calc_linear_mode_strip_size( uint32_t strip_size_in, uint32_t y_scale )
{
    uint64_t a,b,c,d,e;
    uint64_t numer64, denom64;
    uint32_t strip_size_out;

    /* original floating point math 
     *  strip_size_out = (int)ceil( (strip_size_in * (1<<16)) / (float)fracY);
     */

    /* strip_size_in will almost always be in [1,22]
     * y_scale is a 16-bit number [1,65535]
     */

    /* create a 64-bit integer so get the most accurate result from the integer
     * division
     */
    a = (uint64_t)strip_size_in << 32;
    b = a * (1<<16);
    numer64 = b;
    denom64 = (uint64_t)y_scale;
    c = safeint_divide_uint64( numer64, denom64 );
    d = f32_ceil(c);
    e = d >> 32;
    strip_size_out = (uint32_t)e;

    return strip_size_out;
}

/**
 * \brief  Initialize the XY Scaler 
 *
 * \param[in] scale     
 *      the x,y scale factors as a fraction
 *
 * \param[in] pixels_in 
 *      how many pixels per row 
 *
 * \param[in] strip_size_in
 *      the number of rows acted upon in a single pass; programmed into the
 *      hardware
 *
 * \param[in] total_rows_in
 *      the complete size of the input image
 *
 * \param[in] extra_pad
 *      how many extra pixels are added to the end of each row
 *
 * \param[in] scaler_mode PIE_XY_SCALER_LINEAR or PIE_XY_SCALER_RAPR
 *
 * \param[out] fracY
 *      the calculated Y scaler value, normalized to 16-bit
 *  
 * \param[out] pixels_out
 *      the number of pixels per row output, DMA aligned.
 *
 * \param[out] strip_size_out
 *      the Maximum strip size the hardware will output; <strong>Note
 *      actual strip size can change across strips!</strong>
 *
 * \param[out] total_rows_out
 *      The total number of rows output from the scaler. The actual output from
 *      the hardware will be padded to an exact strip size because of the DMA
 *      requirements but the driver will remove the extra padding on the
 *      output.
 *
 * \author David Poole
 * \date 29-Aug-2005
 *
 */

void pie_xy_scale(struct pie_handle_t *pie_handle,
                  struct pie_scale *scale,
                  int pixels_in,
                  int strip_size_in,
                  int total_rows_in,
                  int extra_pad,
                  int scaler_mode,
                  
                  int *pixels_out,
                  int strip_size_out,
                  int *total_rows_out )
{
    int x_edge_pixels, y_edge_pixels; 
    uint32_t scale_tx, scale_ty;
    //struct xyscaler_CSR_reg csr;
    //struct xyscaler_CLCR_reg clcr;
    
    /* Set up XYScale.  */
    dbg2( "%s ssi=%d sso=%d x=%d/%d y=%d/%d mode=%s\n", __FUNCTION__, 
          strip_size_in, strip_size_out,
          scale->x_numerator, scale->x_denominator, 
          scale->y_numerator, scale->y_denominator,
          scaler_mode==PIE_XY_SCALER_LINEAR?"linear":"rapr" );

    // first set all xyscale values to our default
    pie_set_xyscale_defaults(pie_handle);

    /* davep 07-Oct-2013 ; Linear mode HW only works down to 1/2. If we're
     * scaling down, we force RAPR. If we're scaling up, we usually want Linear
     * but RAPR also works. Verify to avoid confusing math errors deeper in
     * the PIE setup code.
     */
    if( scale->x_numerator < scale->x_denominator ) {
        XASSERT( scaler_mode==PIE_XY_SCALER_RAPR, scaler_mode );
    }
    if( scale->y_numerator < scale->y_denominator ) {
        XASSERT( scaler_mode==PIE_XY_SCALER_RAPR, scaler_mode );
    }
    
    /* davep 13-Sep-2005 ; require the input pixels to be DMA aligned;
     * XXX I may want to remove this later
     */
    /* davep 03-Mar-2008 ; removing it since incoming pixels != incoming bytes */
//    XASSERT( pixels_in%16 == 0, pixels_in );
    
//    /* arbitrary scale */
//    if( scaler_mode==PIE_XY_SCALER_LINEAR ) {
//        x_scale = ((1<<16) * scale->x_denominator) / scale->x_numerator;
//        y_scale = ((1<<16) * scale->y_denominator) / scale->y_numerator;
//    }
//    else {
//        XASSERT( scaler_mode==PIE_XY_SCALER_RAPR, scaler_mode );
//        x_scale = ((1<<16) * scale->x_numerator) / scale->x_denominator;
//        y_scale = ((1<<16) * scale->y_numerator) / scale->y_denominator;
//    }

#if 0
    FIXME - WHAT ARE WE CHECKING HERE???  NOTE THAT THE NEW ASIC does not use the X and Y scale normal values....
    XASSERT( scale->x_scale && PIE_SCALE_SCALEX_SCALE(scale->x_scale)==scale->x_scale, scale->x_scale );
    XASSERT( scale->y_scale && PIE_SCALE_SCALEY_SCALE(scale->y_scale)==scale->y_scale, scale->y_scale );
#endif
    pie_xyscale_set_scale(pie_handle, scale->x_scale, scale->y_scale);

    /* how many pixels are we going to get out? */
    pie_xy_calc_expected_pixels(pixels_in, scale->x_scale, scaler_mode, pixels_out);
    
    /* add in any extra pad bytes we might need (e.g., for HalfPack) */
    *pixels_out += extra_pad;

    /* make sure each output row is quadword DMA aligned */
    // *pixels_out = ((*pixels_out+15) / 16) * 16;
    *pixels_out = ICE_DMA_ALIGN_ME(*pixels_out);
    pie_xyscale_set_column_cnt(pie_handle, *pixels_out);

    /* make sure each output row is quadword DMA aligned */
    // XASSERT( *PIE_SCALE_OUTX % 16 == 0, *PIE_SCALE_OUTX );

    /* 21-June-05 ; note total_rows won't be used in the MultiStrip driver
     * because the output row count depends on XYScale settings.
     */
    pie_xy_calc_expected_rows(total_rows_in, scale->y_scale, scaler_mode, total_rows_out);

    /* 16-June-05 ; have to adjust the write strip size even though PIE write
     * DMA will get the rows from the XYScale block. We need this number to
     * correctly allocate memory for write buffers. This number needs to be the
     * largest the write strip size will ever be.
     */
    /* davep 14-Oct-2005 ; I'm changing the rules. Originally I wanted to have
     * as many of these silly numbers calculated in one place (here, in the
     * xyscale setup function) but found I needed too many of them in order to
     * set up HalfPack's numbers. Then I wound up calculating them in two
     * places--before xyscale was set up and here in xyscale setup. I wound up
     * ignoring the second set of numbers. Now, however, the two sets of
     * numbers are divering because of MultiFilter. I don't want this function
     * to know about MultiFilter so I'm no longer going to allow it to change
     * any of the settings. From now on we'll have to calculate all this crap
     * before we call xyscale setup.
     *
     * I'm validating everything to within an inch of its life so don't get too
     * cocky.
     */
    // *strip_size_out = (strip_size_in * y_scale) / (1<<16);
    if( scaler_mode==PIE_XY_SCALER_LINEAR ) {
        XASSERT( strip_size_out==pie_xy_calc_linear_mode_strip_size(strip_size_in,scale->y_scale), 
                 strip_size_out );
    }
    else {
        XASSERT( scaler_mode==PIE_XY_SCALER_RAPR, scaler_mode );
        XASSERT( strip_size_out==(strip_size_in * scale->y_scale) / (1<<16), strip_size_out );
    }

    /* 6-May-05 ; set fill data to something other than zero so it shows up */
#if 0
    pie_xyscale_set_fill_color(pie_handle, 0xbb, 0xbb, 0xbb);
    // pie_xyscale_set_fill_color(pie_handle, 0xbb, 0xcc, 0xdd);
#else
    /* 0xff is black for laser, white for RGB color scans, and (I think) white for
     * ink copies
     */
    pie_xyscale_set_fill_color(pie_handle, 0xff, 0xff, 0xff);
#endif
    /* black */
    // pie_xyscale_set_fill_color(pie_handle, 0x00, 0x00, 0x00);

    /* 26-May-05 davep ; set to full roundup */
    /* davep 23-Jun-2006 ; XXX I did this back when I was shotgun debugging
     * scaler problems. Will this break Linear?
     */
    pie_xyscale_set_roundup(pie_handle, PIE_SCALE_RUP_ALL);

    /* davep 15-Jun-2006 ; turn on linear mode and hope for the best... */
    if (scaler_mode==PIE_XY_SCALER_LINEAR)
    {
        // dbg1("%s enabling XYScale linear mode\n", __FUNCTION__ );

        pie_xyscale_set_linear_mode(pie_handle, PIE_LINEAR_MODE);

        /* other registers need configuration in Linear mode plus we need to
         * set up the Edge Pixels field in the SCALEX register
         */

        /* edge = ceiling( (scale-1)/2 ) */
        x_edge_pixels = pie_xy_calc_linear_mode_edge_pixels( scale->x_numerator,
                                                      scale->x_denominator );
        XASSERT( x_edge_pixels>=0 && x_edge_pixels<=15, x_edge_pixels );

        y_edge_pixels = pie_xy_calc_linear_mode_edge_pixels( scale->y_numerator,
                                                      scale->y_denominator );        

#if 0
FIXME!!!!        Why is this function generating x and y edge pixels, when the asic only
        takes x edge pixels    
        pie_xyscale_set_edge_pixel_cnt(pie_handle, x_edge_pixels, y_edge_pixels);
#endif
        pie_xyscale_set_edge_pixel_cnt(pie_handle, x_edge_pixels);        
        /* Do I know what this formula means? No, I do not. It's straight from the
         * XYScale MA.
         *
         * Use temporaries so I can debug print the results because these two
         * registers are write only.
         */
        scale_tx = x_edge_pixels * scale->x_scale - 32768 + scale->x_scale/2;
        scale_ty = y_edge_pixels * scale->y_scale - 32768 + scale->y_scale/2;
        // dbg1("scale_tx=0x%x scale_ty=0x%x\n", scale_tx, scale_ty );

        pie_xyscale_set_transpose(pie_handle, scale_tx, scale_ty);

    }
    else
    {
        /* Disable LM mode so we're now set to RAPR (Running Average Pixel
         * Replication) mode 
         */
        pie_xyscale_set_linear_mode(pie_handle, PIE_RAPR_MODE);
        
        /* so we don't have a warning about unused function variables */
        x_edge_pixels = 0;
        y_edge_pixels = 0;
        scale_tx = 0;
        scale_ty = 0;
    }

    /* Now that we're all set up, disable bypass so XYScale is now running */
    pie_xyscale_set_bypass(pie_handle, false);
    
    /* before we run any data through, let's see what all the registers are */
    //pie_dump_xyscale_regs(pie_handle);

}

/**
 * \brief Get number of rows out of scaler based on settings.
 *
 *  Because of XYScale's padding requirements and the scaling factor, the exact
 *  number of rows coming out of XYScale cannot be predicted ahead of time.
 *  However, using the scale factor, we can tell how many actual rows of real
 *  data to expect. 
 *
 *  We need to pad the bottom of the image on the read to match the stripsize
 *  but that extra data must be removed from the output image when we're
 *  complete. 
 *
 * \param[in] total_rows_in number of rows fed into top of XYScale block
 * \param[in] fracY XYScale's Y direction scale factor
 * \param[in] scaler_mode PIE_XY_SCALER_LINEAR or PIE_XY_SCALER_RAPR
 * \param[out] expected_rows_out based on the total number of input rows and the
 *          scale factor, how many rows would a normal human expect
 *
 * \author David Poole
 * \date 26-June-05
 *
 * \remarks This function will eventually be freakishly complicated because of
 * the possibility of numeric overflow with total_rows_in * fracY > 2^32.
 * DougK sent me an email containing a formula which should prevent overflows.
 *
 * \remarks 26-July-05 I'm now finally putting DougK's email formula into this
 * function. Hasn't been excessively tested, though. See 26-July-05 notes for
 * original email.
 *
 *
 *davep 13-Jun-2011 ; DO NOT CALL THIS FUNCTION TO GET HORIZONTAL VALUES!
 *    Only call for rows!  The -1 in Linear mode is something bizarre for rows.
 *    Not pixels. See DougK for explanation.  Or maybe I'm just not that smart.
 *
 **/

void pie_xy_calc_expected_rows( int rows_in, int scale_factor, int scaler_mode, int *rows_out )
{
    XASSERT( rows_in>0, rows_in );

    if( scaler_mode==PIE_XY_SCALER_LINEAR ) {
        /* davep 10-Jul-2006 ; DougK suggested using total_rows_in-1 for Linear */
        *rows_out = ( (rows_in-1) * (1<<16)) / scale_factor;
    }
    else if( scaler_mode==PIE_XY_SCALER_RAPR ) {
        XASSERT( scaler_mode==PIE_XY_SCALER_RAPR, scaler_mode );
        /* 26-July-05 davep ; original simple formula which would overflow pretty
         * quickly
         *
         *    *expected_rows_out = (total_rows_in * xyscale) / (1<<16);
         */
        /* DougK's new formula which I admit I don't follow */
        *rows_out = ( ((rows_in>>12) * scale_factor) + (((rows_in % (1<<12) ) * scale_factor) >> 12)) >> 4;
    }
}

/**
 * \brief calculate expected pixel width
 *
 *  why is this function different than the 'expected_rows' version above?
 *  Because there is a huge amount of code that relies on the expected rows
 *  version and changing to the same formula below breaks that code. 
 *
 *  The -1 in linear above is because linear eats a line. I need to go through
 *  a lot of code to fix the +1/-1 compensation. But first, need to get this
 *  code done.
 *
 * \author David Poole
 * \date 13-Jun-2011
 *
 */

void pie_xy_calc_expected_pixels( int pixels_in, int scale_factor, int scaler_mode, int *pixels_out )
{
    unsigned long long llpin, llpout, llscale;

    XASSERT( pixels_in>0, pixels_in );

    llscale = (unsigned long long)scale_factor;
    llpin = (unsigned long long)pixels_in;

    if( scaler_mode==PIE_XY_SCALER_LINEAR ) {
        llpout = (llpin * (1<<16));
        llpout = safeint_divide_uint64( llpout, llscale );
    }
    else {
        XASSERT( scaler_mode==PIE_XY_SCALER_RAPR, scaler_mode );
        llpout = (llpin * llscale) / (1<<16);
    }

    dbg2( "%s scale=%llu pin=%llu pout=%llu\n", __FUNCTION__, llscale, llpin, llpout );

    XASSERT( (unsigned long)(llpout>>32)==0, (uint32_t)(llpout>>32) );

    *pixels_out = llpout;
}

/**
 * \brief Utility function to calculate XYScale's scale factors 
 *
 * \author David Poole
 * \date 10-Jun-2011
 *
 */

void pie_xy_calc_xyscale( struct pie_scale *scale, int scaler_mode )
{
    if( scaler_mode==PIE_XY_SCALER_LINEAR ) {
        scale->x_scale = ((1<<16) * scale->x_denominator) / scale->x_numerator;
        scale->y_scale = ((1<<16) * scale->y_denominator) / scale->y_numerator;
    }
    else {
        XASSERT( scaler_mode==PIE_XY_SCALER_RAPR, scaler_mode );
        scale->x_scale = ((1<<16) * scale->x_numerator) / scale->x_denominator;
        scale->y_scale = ((1<<16) * scale->y_numerator) / scale->y_denominator;
    }
#if 0
    FIXME - WHAT ARE WE CHECKING HERE???  NOTE THAT THE NEW ASIC does not use the X and Y scale normal values....
    XASSERT( PIE_SCALE_SCALEX_SCALE(scale->x_scale)==scale->x_scale, scale->x_scale );
    XASSERT( PIE_SCALE_SCALEY_SCALE(scale->y_scale)==scale->y_scale, scale->y_scale );
#endif    
}
    
/**
 * \brief  Return max write DMA rows out based on our current platform.
 *
 * \author David Poole
 * \date 17-Nov-2006
 *
 */

static int
pie_get_wdma_max_rows( void )
{
    return PIE_WDMA_MAX_ROWS;
}

/**
 * \brief  Calculate the optimal stripsize taking into account the scaling
 * factor.
 *
 * \param[in,out] read_stripsize
 * \param[in] scaler_mode PIE_XY_SCALER_LINEAR or PIE_XY_SCALER_RAPR
 * \param[in] fracY
 * \param[out] write_stripsize
 *
 * \author David Poole
 * \date 20-Sep-2005
 *
 *
 */

scan_err_t pie_xy_calc_stripsize(int *read_stripsize, 
                                 int fracY, 
                                 int scaler_mode, 
                                 int *write_stripsize)
{
    int strip_size_in;
    int strip_size_out;
    int max_rows_out;

    strip_size_in = *read_stripsize;
    XASSERT( strip_size_in>0 && strip_size_in<=PIE_RDMA_MAX_ROWS, strip_size_in );

    max_rows_out = pie_get_wdma_max_rows();

    while( 1 ) {
        if( scaler_mode==PIE_XY_SCALER_LINEAR ) {
            strip_size_out = pie_xy_calc_linear_mode_strip_size( strip_size_in, fracY );
        }
        else {
            XASSERT( scaler_mode==PIE_XY_SCALER_RAPR, scaler_mode );
            strip_size_out = (strip_size_in * fracY) / (1<<16);
        }

        if( strip_size_out <= max_rows_out ) {
            break;
        }

        strip_size_in--;
        if( strip_size_in <= 0 ) {
            /* most likely cause of this failure is a scaling factor that is
             * too large 
             */
            XASSERT( 0, *read_stripsize );
            *write_stripsize = 0;
            return SCANERR_INVALID_PARAM;
        }
    } 

    *read_stripsize = strip_size_in;
    *write_stripsize = strip_size_out;

    /* check my math */
    XASSERT( *read_stripsize>0 && *read_stripsize<=PIE_RDMA_MAX_ROWS, *read_stripsize );
    XASSERT( *write_stripsize>0 && *write_stripsize<=PIE_WDMA_MAX_ROWS, *write_stripsize );

    return 0;
}

scan_err_t pie_calc_stripsize( struct pie_strip *strip, int y_scale, int scaler_mode )
{
    scan_err_t scerr;
    int extra_rows_for_cs;
    int extra_rows_for_mf;

    extra_rows_for_cs = 0;
    if( strip->use_cs ) {
        extra_rows_for_cs = 2;
    }
    extra_rows_for_mf = 0;
    if( strip->use_mf ) {
        /* double border pixels for top and bottom */
        extra_rows_for_mf = 2 * pie_dsmf_get_eaten_border_pixels(NULL);
    }
    strip->total_extra_rows = extra_rows_for_cs + extra_rows_for_mf;
        
    //strip->rdma_in = PIE_RDMA_MAX_ROWS;
    strip->rdma_in = 29; //DSMF for Ricoh

    strip->cs_in = strip->rdma_in;
    strip->cs_out = strip->cs_in - extra_rows_for_cs;

    strip->mf_in = strip->cs_out;
    strip->mf_out = strip->mf_in - extra_rows_for_mf;

    strip->xy_in = strip->mf_out;

    if( strip->use_xy ) {
        scerr = pie_xy_calc_stripsize( &strip->xy_in, y_scale, scaler_mode, &strip->xy_out );
        if( scerr != SCANERR_NONE ) {
            memset( strip, 0, sizeof(struct pie_strip) );
            return scerr;
        }
        strip->wdma_out = strip->xy_out;

        strip->mf_out = strip->xy_in;
        strip->mf_in = strip->mf_out + extra_rows_for_mf;

        strip->cs_out = strip->mf_in;
        strip->cs_in = strip->cs_out + extra_rows_for_cs;

        strip->rdma_in = strip->cs_in;
    }

    /* check for math mistakes */
    XASSERT( strip->rdma_in <= PIE_RDMA_MAX_ROWS, strip->rdma_in );
    XASSERT( strip->wdma_out <= PIE_WDMA_MAX_ROWS, strip->wdma_out );

    return SCANERR_NONE;
}

void pie_dbg2_scale( struct pie_scale *scale ) 
{
    pie_dbg2( "pie scale x=%d/%d y=%d/%d xs=%d ys=%d\n", 
        scale->x_numerator, scale->x_denominator, 
        scale->y_numerator, scale->y_denominator,
        scale->x_scale, scale->y_scale );
}

void pie_dbg2_strip( struct pie_strip *strip )
{
    pie_dbg2( "pie strip in=%d cs=%d,%d mf=%d,%d xy=%d,%d out=%d\n", 
                strip->rdma_in,
                strip->cs_in, strip->cs_out,
                strip->mf_in, strip->mf_out,
                strip->xy_in, strip->xy_out,
                strip->wdma_out );
}

void pie_dbg2_row( struct pie_dma_row *row )
{
    pie_dbg2( "pie row pi=%d po=%d bi=%d bo=%d\n", 
                row->pixels_in, row->pixels_out, row->bytes_in, row->bytes_out );
}

/**
 * \brief calculate bytes per row for DMA channel read and write 
 *
 * Takes into account the 16-byte DMA alignment requirement.
 *
 * \param[in] pixels_per_row
 * \param[in] pixel_size
 * \param[in] scan_type
 * \param[in] pie_output_type
 * \param[in] scale
 * \param[in] extra_pad     Optional extra padding which will be added/removed
 *                          from the row by the XYScale block via the OUTX register.
 *                          Final DMA align will occur after extra_pad is added
 *                          to pixels_out.
 * \param[in] scaler_mode PIE_XY_SCALER_LINEAR or PIE_XY_SCALER_RAPR
 * \param[out] row
 *
 * \author David Poole
 * \date 30-Aug-2005
 *
 */

void pie_xy_calc_row( int pixels_per_row, 
                      uint32_t pixel_size, 
                      pie_pixel_t pie_output_type,
                      struct pie_scale *scale,
                      int extra_pad,
                      int scaler_mode,
                      struct pie_dma_row *row )
{
    dbg2( "%s ppr=%d psize=%d pout=%d\n", __FUNCTION__,
          pixels_per_row, pixel_size, pie_output_type );
    
    /* stupid human check */
#if 0
    FIXME - WHAT ARE WE CHECKING HERE???  NOTE THAT THE NEW ASIC does not use the X and Y scale normal values....
    XASSERT( scale->x_scale && PIE_SCALE_SCALEX_SCALE(scale->x_scale)==scale->x_scale, scale->x_scale );
#endif
    
    memset( row, 0, sizeof(struct pie_dma_row ) );
    row->pixels_in = pixels_per_row;
    pie_xy_calc_expected_pixels( row->pixels_in, scale->x_scale, scaler_mode, &row->pixels_out ); 

    if( pixel_size == 8 ) {
        row->bytes_in = row->pixels_in;
    }
    else {
        XASSERT( pixel_size==16, pixel_size );
        row->bytes_in = 2 * row->pixels_in;
    }

    /* add in the pad pixels */
    row->pixels_out += extra_pad;

    /* pad up to make sure we're DMA aligned (note we're mixing pixels and
     * bytes; the math still works)
     */
    row->pixels_out = ICE_DMA_ALIGN_ME(row->pixels_out);

    row->bytes_out = pie_get_bytes_per_pixel(pie_output_type) * row->pixels_out;

    pie_dbg2_row( row );
}

/**********************************************************
 *
 * Overall PIE Functions
 *
 *********************************************************/

/**
 * \brief Restore all of PIE to its power-on default values.
 *
 * This function calls the function that soft resets all of PIE
 **/
void
pie_reset( void )
{
    pie_dbg2("%s\n", __FUNCTION__);
    pie_do_reset();
}

/**
 * \brief PIE hardware setup.
 *
 *  Restores PIE to as close to power-on values as possible. Doesn't interfere
 *  with operating system interface (interrupts, extra debugging, etc).
 *
 *  Created to separate first-time intialization of PIE (turn on the hardware
 *  as well as hook up operating system interrupts, add debug commands,
 *  one-time data structure set-up) from things that needed to be done to reset
 *  PIE completely to a predictable, clean, runnable state.
 *
 * \retval 0 success
 * \retval !0 detectable hardware failure
 *
 * \author David Poole
 * \date 10-Mar-05
 *
 *
 **/
// NOTE - rewrite description above since now this just does a soft reset.  When a
// new pie structure is requested, it will write every pie register anyway
scan_err_t pie_soft_setup( void )
{
    // dbg1("%s\n", __FUNCTION__);

    // request soft reset of all pie subblocks
    pie_do_reset();

    /* davep 14-Mar-2011 ; turn on everything and hope for the best */
//    pie_interrupt_enable();

    return SCANERR_NONE;
}

/**
 * \brief PIE hardware and OS setup.
 *
 *  Does one-time initial PIE hardware and OS level setup. Should be called
 *  once in life of system.
 *  
 * \bug Not yet returning hardware failure! Hardwired to return 0.
 *
 * \retval 0 success
 * \retval !0 detectable hardware failure
 *
 * \author David Poole
 * \date 6-Mar-05
 *
 **/

scan_err_t pie_onetime_init( void )
{
    scan_err_t scerr;

    pie_dbg2("%s\n", __FUNCTION__);

    scerr = pie_soft_setup();
    if( scerr != 0 ) {
        /* pie_soft_setup() hardwire to return 0 at this time but just in
         * case... 
         */
        return scerr;
    }

    return SCANERR_NONE;
}

