/*
**************************************************************************
This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this file,
You can obtain one at http://mozilla.org/MPL/2.0/.

Copyright (c) 2014-2015, Marvell International Ltd.

Alternatively, this software may be distributed under the terms of the GNU
General Public License Version 2, and any use shall comply with the terms and
conditions of the GPL.  A copy of the GPL is available at
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html

THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
ARE EXPRESSLY DISCLAIMED.  The GPL license provides additional details about
this warranty disclaimer.
******************************************************************************
*/



#include "jhw_api.h"
#include "jhw_dev.h"
#include "jhw_uio.h"
#include "jhw_core.h"
#include "mqueue.h"
#include "jpeghw_lib.h"


static jpeghw_error_type_t _jhwc_init_quant_core(struct jpeghw_compress_struct * jhwcinfo)
{
    jpeghw_error_type_t result = e_JPEGHW_ERROR;

    DBG_INFO("Start %s() call\n", __func__);

    struct jpeg_dev_info_s * dev = JHWC_GET_DEV(jhwcinfo);
    device_core_info_ptr core_info = &dev->core_info;

    uint32_t size = sizeof(core_info->dequant_q_array);

    if(jhwcinfo->cmgr && jhwcinfo->cmgr->get_quant_table)
    {
        result = jhwcinfo->cmgr->get_quant_table(jhwcinfo, jhwcinfo->quality, &core_info->dequant_q_array[0], &size);
    }
    else
    {
        result = jhw_get_default_quant_table(jhwcinfo->quality, &core_info->dequant_q_array[0], &size);
    }

    DBG_INFO("Exit %s() call\n", __func__);

    return result;
}

static jpeghw_error_type_t _jhwc_init_huff_core(struct jpeghw_compress_struct * jhwcinfo)
{
    int i, j, t;

    DBG_INFO("Start %s() call\n", __func__);

    struct jpeg_dev_info_s * dev = JHWC_GET_DEV(jhwcinfo);
    device_core_info_ptr core_info = &dev->core_info;

    uint32_t sram_i = 0;
    uint32_t ptr_base = 0;

    for (t=0; t<4; t++)
    {
        uint8_t bits[16];
        uint8_t val[162];
        uint32_t bits_size = sizeof(bits);
        uint32_t val_size = sizeof(val);

        uint32_t table_num = 0;
        uint32_t tree_val = 0;
        uint32_t code_valid = 0;
        uint32_t len_code_addr_base = 0;
        bool ac = false;

        memset(bits, 0, bits_size);
        memset(val, 0, val_size);

        switch (t)
        {
            case 0: // DC table 0
                table_num = 0;
                sram_i = len_code_addr_base = 0;
                ptr_base = 0;
                ac = false;
                break;

            case 1: // AC table 0
                table_num = 0;
                sram_i = len_code_addr_base = 12;
                ptr_base += 16;
                ac = true;
                break;

            case 2: // DC table 1
                table_num = 1;
                sram_i = len_code_addr_base = 12+162;
                ptr_base += 16;
                ac = false;
                break;

            case 3: // AC table 1
                table_num = 1;
                sram_i = len_code_addr_base = 12+162+12;
                ptr_base += 16;
                ac = true;
                break;
        }

        // load Table bits and values
        jpeghw_error_type_t result = e_JPEGHW_ERROR;
        if(jhwcinfo->cmgr && jhwcinfo->cmgr->get_huff_table)
        {
            result = jhwcinfo->cmgr->get_huff_table(jhwcinfo, table_num, ac, &bits[0], &bits_size, &val[0], &val_size);
        }
        else
        {
            result = jhw_get_default_huff_table(table_num, ac, &bits[0], &bits_size, &val[0], &val_size);
        }

        if (e_JPEGHW_SUCCESS == result)
        {
            uint32_t table_i = 0;

            // loop through the bits and values to calculate the sram values
            // tree_val is the current node as we work through the Huffman tree
            // code_valid is the code length enable values for Code Enable registers
            // v_i is the index into the val table
            // huff_ptr_array is the array to load into HUFF_PTR RAM
            // huff_fcode_array is the array to load into HUFF_FCODE RAM
            // huff_lcode_array is the array to load into HUFF_LCODE RAM
            // huff_sym_array is the array to load into HUFF_SYM RAM
            // huff_len_code_array is the array to load into LEN_CODE RAM
            for (i=0; i<bits_size; i++)
            {
                uint8_t len;
                uint32_t rrrr, ssss;

                len = bits[i];
                if (len > 0)
                {
                    code_valid = code_valid | (1<<i);
                    core_info->huff_ptr_array[ptr_base+i] = sram_i;
                    for (j=0; j<len; j++)
                    {
                        uint32_t len_code_addr = len_code_addr_base;

                        if (j==0)
                        {
                            core_info->huff_fsymbol_array[ptr_base+i] = tree_val;
                        }

                        if (j==(len-1))
                        {
                            core_info->huff_lsymbol_array[ptr_base+i] = tree_val;
                        }

                        core_info->huff_sym_array[sram_i] = val[table_i];

                        rrrr = (core_info->huff_sym_array[sram_i] >> 4) & 0xF;
                        ssss = core_info->huff_sym_array[sram_i] & 0xF;

                        if (ac)
                        {
                            if (rrrr == 0 && ssss == 0)
                            {
                                // do nothing - offset of 0
                            }
                            else if (rrrr == 0xF && ssss == 0)
                            {
                                len_code_addr += 1; // offset 1
                            }
                            else
                            {
                                len_code_addr += 1 + (10 * rrrr) + ssss;
                            }
                        }
                        else
                        {
                            len_code_addr += ssss;
                        }

                        core_info->huff_code_array[len_code_addr] = ((i)<<16) | (tree_val);

                        sram_i++;
                        table_i++;
                        tree_val += 1;
                    }
                }

                tree_val = (tree_val << 1);
            }
        }
        else
        {
            DBG_ERR("%s() ERROR - jpeghw_get_huff_table() call failed - 0x%x!\n", __func__, result);
            jpeghw_error(&jhwcinfo->common, e_JPEGHW_ERROR);
            return result;
        }

        // ac_code_en_1 and ac_code_en_0 are the values to program in
        // Huffman Decode Code Enable AC Register
        // dc_code_en_1 and dc_code_en_0 are the values to program in
        // Huffman Decode Code Enable DC Register
        if(ac)
        {
          if(table_num == 1)
              core_info->ac_code_en_1 = code_valid;
          else
              core_info->ac_code_en_0 = code_valid;
        }
        else
        {
          if(table_num == 1)
              core_info->dc_code_en_1 = code_valid;
          else
              core_info->dc_code_en_0 = code_valid;
        }
    }

    DBG_INFO("Exit %s() call\n", __func__);

    return e_JPEGHW_SUCCESS;
}

// open idma and odma message queues to be used to communcate interrupt status 
// from the core ISR to the firmware.
static jpeghw_error_type_t _jhwc_open_message_queues(struct jpeghw_compress_struct * jhwcinfo)
{
    struct jpeg_dev_info_s * dev = JHWC_GET_DEV(jhwcinfo);
    dev->imqd.mqd = jhw_open_message_queue(dev, IDMA_C_MSGQ_NAME, dev->imqd.name);
    dev->omqd.mqd = jhw_open_message_queue(dev, ODMA_C_MSGQ_NAME, dev->omqd.name);

    if(dev->imqd.mqd < 0 || dev->omqd.mqd < 0)
    {
        jpeghw_error(&jhwcinfo->common, e_JPEGHW_ERROR);
        return e_JPEGHW_ERROR;
    }
    return e_JPEGHW_SUCCESS;
}

// return compression configuration flags.  The flags describe the capabilites
// available for this driver.  The flag are described below
uint32_t jhwc_get_config_flags()
{
    uint32_t flags = 0;

    // setting the flag implies
    // the hardware cannot pad width to achive mcu alignment so
    // the client is responsible for alignment, therefore
    // set the width to the mcu aligned image width
    //flags |= JPEGHW_CONFIG_FLAG_MCU_WIDTH_ALIGNMENT;

    // setting the flag implies
    // the hardware cannot handle strips that are not mcu aligned so
    // the client is responsible for alignment
    //flags |= JPEGHW_CONFIG_FLAG_MCU_HEIGHT_ALIGNMENT;

    return flags;
}

// create a jpeg_dev_info structure
// open compression access to the hardware via uio devices.
// allocate memory for the jpeg_dev_info structure and attatch it 
// to the jpeghw_decompress structure.
jpeghw_error_type_t jhw_open_compress(struct jpeghw_compress_struct * jhwcinfo)
{
    struct jpeg_dev_info_s * dev = NULL;
    jpeghw_error_type_t jerr = e_JPEGHW_SUCCESS;

    DBG_INFO("[%d]%s()\n", gettid(), __func__);
    if(jhwcinfo == NULL)
    {
        DBG_ERR("jpeghw compress structure is not initialized, cannot continue.\n");
        jpeghw_error(&jhwcinfo->common, e_JPEGHW_ERR_INVALID_PARAMETERS);
        return e_JPEGHW_ERR_INVALID_PARAMETERS;
    }

    if (! jhw_is_initialized()) 
    {
        DBG_ERR("JHW driver is not initialized, cannot continue.\n");
        jpeghw_error(&jhwcinfo->common, e_JPEGHW_ERR_BAD_STATE);
        return e_JPEGHW_ERROR;
    }

    INC_G_ALLOCATED();
    dev = (struct jpeg_dev_info_s *)MEM_MALLOC(sizeof(struct jpeg_dev_info_s));
    if(dev == NULL)
    {
        jhwcinfo->common.global_jpeg_state = e_JPEGHW_STATE_ERROR;
        jpeghw_error(&jhwcinfo->common, e_JPEGHW_ERR_OUT_OF_MEMORY);
        return e_JPEGHW_ERR_OUT_OF_MEMORY;
    }
    memset(dev, 0, sizeof(struct jpeg_dev_info_s));

    ((jpeghw_dev_ptr)jhwcinfo->common.jpeghw_context)->jhw_dev = dev;
    dev->type = COMPRESSOR;

    // open the next available uio device associated with the JPEG block.
    // if JPEGHW_CONFIG_FLAG_BLOCK_JPEG_CORE_AVAIL bit is set in the config_flags
    // then open hangs waiting for a core to free up
    jerr = jhw_open_available_core(dev, (jhwcinfo->common.config_flags & 
                                         JPEGHW_CONFIG_FLAG_BLOCK_JPEG_CORE_AVAIL));
    if(jerr != e_JPEGHW_SUCCESS)
    {
        DEC_G_ALLOCATED();
        MEM_FREE_AND_NULL(dev);
        jhwcinfo->common.global_jpeg_state = e_JPEGHW_STATE_ERROR;
        jpeghw_error(&jhwcinfo->common, e_JPEGHW_ERROR);
        return jerr;
    }

    // attach the compressor to the uio device
    jerr = jhw_attach_to_device(dev);
    if(jerr != e_JPEGHW_SUCCESS)
    {
        DEC_G_ALLOCATED();
        MEM_FREE_AND_NULL(dev);
        jhwcinfo->common.global_jpeg_state = e_JPEGHW_STATE_ERROR;
        jpeghw_error(&jhwcinfo->common, e_JPEGHW_ERROR);
        return jerr;
    }

    // Set default values
    // pad makes no sense for compression
    dev->core_info.pad = 0;
    jhwcinfo->quality = JPEG_DEFAULT_QUALITY;
    jhwcinfo->common.mcu_height = 8;
    jhwcinfo->common.mcu_width = 8;
    dev->cookie = DEV_COOKIE;

    // open the idma and odma comm message queues
    jerr= _jhwc_open_message_queues(jhwcinfo);
    if(jerr != e_JPEGHW_SUCCESS)
    {
        jhwcinfo->common.global_jpeg_state = e_JPEGHW_STATE_ERROR;
        jpeghw_error(&jhwcinfo->common, e_JPEGHW_ERROR);
        return jerr;
    }

    return e_JPEGHW_SUCCESS;
}

// start the compression.
// sanity check jpeghw_common structure values
// open the message queues used to communicate interrupt status
// intialize the JPEG core registers, intialize the IDMA and ODMA registers.
// start the ODMA thread that will via callbacks sent ODMA data back to the user
jpeghw_error_type_t jhw_start_compress(struct jpeghw_compress_struct * jhwcinfo)
{
    DBG_INFO("[%d]---%s()---\n", gettid(), __func__);

    JHWC_VALIDATE(jhwcinfo);
    struct jpeg_dev_info_s * dev = JHWC_GET_DEV(jhwcinfo);
    jpeghw_dev_ptr   jdev = JPEGHWC_GET_DEV(jhwcinfo);

    if(jdev->info->image_width == 0 || jdev->info->image_height ==  0 ||
                                       jdev->mcu_aligned_width == 0)
    {
        DBG_ERR("%s width/height are out of bounds\n", __func__);
        jhwcinfo->common.global_jpeg_state = e_JPEGHW_STATE_ERROR;
        jpeghw_error(&jhwcinfo->common, e_JPEGHW_ERR_INVALID_PARAMETERS);
        return e_JPEGHW_ERR_INVALID_PARAMETERS;
    }

    // sanity check some parameters
    if(jdev->hw_config_flags & JPEGHW_CONFIG_FLAG_MCU_WIDTH_ALIGNMENT)
    {
        // the hardware cannot pad width to achive mcu alignment so
        // the client is responsible for mcu alignment, therefore
        // set the width to the mcu aligned image width
        dev->pixel_width = jdev->mcu_aligned_width;
        dev->byte_width  = jdev->mcu_bytes_per_row;
        if(dev->byte_width % jdev->info->mcu_width  != 0)
        {
            DBG_ERR("%s Image width is not a multiple of the MCU width\n", __func__);
            jhwcinfo->common.global_jpeg_state = e_JPEGHW_STATE_ERROR;
            jpeghw_error(&jhwcinfo->common, e_JPEGHW_ERR_INVALID_PARAMETERS);
            return e_JPEGHW_ERR_INVALID_PARAMETERS;
        }
    }
    else
    {
        // the hardware can pad width to achive mcu alignment so
        // the client is not responsible for mcu alignment, therefore
        // set the width to the image width
        dev->pixel_width = jdev->info->image_width;
        if(dev->pixel_width % jdev->info->mcu_width != 0)
        {
            dev->pixel_width += jdev->info->mcu_width -(dev->pixel_width % jdev->info->mcu_width);
        }
        dev->byte_width  = dev->pixel_width * jdev->info->bytes_per_pixel;
    }

    dev->height = jdev->info->image_height;
    if(!(jdev->hw_config_flags & JPEGHW_CONFIG_FLAG_MCU_HEIGHT_ALIGNMENT))
    {
        if(dev->height % jdev->info->mcu_height != 0)
        {
            dev->height += jdev->info->mcu_height -(dev->height % jdev->info->mcu_height);
        }
    }

    if((jdev->info->mcu_height != 8 && jdev->info->mcu_width !=  16) ||
            (jdev->info->mcu_height != 8 && jdev->info->mcu_width != 16))
    {
        DBG_ERR("%s MCU width/height are out of bounds\n", __func__);
        jhwcinfo->common.global_jpeg_state = e_JPEGHW_STATE_ERROR;
        jpeghw_error(&jhwcinfo->common, e_JPEGHW_ERR_INVALID_PARAMETERS);
        return e_JPEGHW_ERR_INVALID_PARAMETERS;
    }

    if(jdev->info->bytes_per_pixel != 1 && jdev->info->bytes_per_pixel != 3)
    {
        DBG_ERR("%s Bytes per pixel out of bounds\n", __func__);
        jhwcinfo->common.global_jpeg_state = e_JPEGHW_STATE_ERROR;
        jpeghw_error(&jhwcinfo->common, e_JPEGHW_ERR_INVALID_PARAMETERS);
        return e_JPEGHW_ERR_INVALID_PARAMETERS;
    }

    // 16 pixel mcu dimensions are not allowed for monochrome
    if(jdev->info->bytes_per_pixel == 1 &&
        (jdev->info->mcu_height == 16 || jdev->info->mcu_width == 16))
    {
        DBG_ERR("%s 16 byte MCUs are not allowed for monochrome\n", __func__);
        jhwcinfo->common.global_jpeg_state = e_JPEGHW_STATE_ERROR;
        jpeghw_error(&jhwcinfo->common, e_JPEGHW_ERR_INVALID_PARAMETERS);
        return e_JPEGHW_ERR_INVALID_PARAMETERS;
    }

    dev->quality = jhwcinfo->quality;

    _jhwc_init_quant_core(jhwcinfo);
    _jhwc_init_huff_core(jhwcinfo);

    // intialize the jpeg hardware block
    if(jhwc_init_compression_hw(jhwcinfo) != OK)
    {
        jhwcinfo->common.global_jpeg_state = e_JPEGHW_STATE_ERROR;
        return e_JPEGHW_ERROR;
    }

    // intialize the jpeg DMAs
    dev->number_idma_descriptors = MAX_IDMA_DESCRIPTORS;
    if(jhwc_init_odma_list(jhwcinfo) != OK)
    {
        jhwcinfo->common.global_jpeg_state = e_JPEGHW_STATE_ERROR;
        return e_JPEGHW_ERROR;
    }
    if(jhwc_init_idma_list(jhwcinfo) != OK)
    {
        jhwcinfo->common.global_jpeg_state = e_JPEGHW_STATE_ERROR;
        return e_JPEGHW_ERROR;
    }

    // turn on the interrupts
    if(jhw_enable_device_interrupts(dev) != OK)
    {
        DBG_ERR("%s enable device interrupts failed\n", __func__);
        jhwcinfo->common.global_jpeg_state = e_JPEGHW_STATE_ERROR;
        jpeghw_error(&jhwcinfo->common, e_JPEGHW_ERROR);
        return e_JPEGHW_ERROR;
    }

    // start the odma thread used retrieve data from the ODMA via callbacks
    // to the user.  the callback is defined in the cmgr structure.
    if(jhwc_start_odma(jhwcinfo) != OK)
    {
        jhwcinfo->common.global_jpeg_state = e_JPEGHW_STATE_ERROR;
        return e_JPEGHW_ERROR;
    }

    return e_JPEGHW_SUCCESS;
}

// detatch and close access to the uio device.
// close the DMA comm message queues and remove them from the system
// free the memory allocated for the jepg_dev_info structure in the open call
static void _jhw_close(struct jpeghw_common_struct * jhwinfo)
{
    struct jpeg_dev_info_s * dev = JHW_GET_DEV(jhwinfo);

    DBG_INFO("[%d]%s()\n", gettid(), __func__);
    DBG_INFO("[%d]mqd=%d close\n", gettid(), dev->imqd.mqd);
    DBG_INFO("[%d]mqd=%d close\n", gettid(), dev->omqd.mqd);
    mq_close(dev->imqd.mqd);
    mq_close(dev->omqd.mqd);
    unlink(dev->imqd.name);
    unlink(dev->omqd.name);
    jhw_detach_from_device(dev);
    jhw_close_core(dev);

    DEC_G_ALLOCATED();
    MEM_FREE_AND_NULL(dev);
}

// close compression.
jpeghw_error_type_t jhw_close_compress(struct jpeghw_compress_struct * jhwcinfo)
{
    DBG_INFO("[%d]%s()\n", gettid(), __func__);
    // make sure the input parameter is valid
    if(jhwcinfo == NULL)
    {
        jpeghw_error(&jhwcinfo->common, e_JPEGHW_ERR_INVALID_PARAMETERS);
        return e_JPEGHW_ERR_INVALID_PARAMETERS;
    }
    JHWC_VALIDATE(jhwcinfo);

    _jhw_close((struct jpeghw_common_struct *)jhwcinfo);
    return e_JPEGHW_SUCCESS;
}

// finish a compression.
// destroy the DMA lists created in the start-compression call
// reset the DMA hardware
jpeghw_error_type_t jhw_finish_compress(struct jpeghw_compress_struct * jhwcinfo)
{
    DBG_INFO("[%d]%s()\n", gettid(), __func__);
    // make sure the input parameter is valid
    jpeghw_error_type_t retval = e_JPEGHW_SUCCESS;
    JHWC_VALIDATE(jhwcinfo);

    // the true in the 2nd parameter below tells the destroy calls to wait
    // for encode complete.  wait for the the last buffer to be compressed
    // and returned to the user.
    // NOTE: Using wait=true, always shut down the odma first because the odma
    //  is most likely still active and shutting down the idma while the odma
    //  is active causes an intermitten odma hang.
    if(jhwc_destroy_odma_list(jhwcinfo, true) != OK) retval = e_JPEGHW_ERROR;
    if(jhwc_destroy_idma_list(jhwcinfo, true) != OK) retval = e_JPEGHW_ERROR;

    jhwc_reset_core((struct jpeghw_common_struct *)jhwcinfo);
    jhwc_reset_idma(jhwcinfo);
    jhwc_reset_odma(jhwcinfo);

    return retval;
}

// much like the finish compression above except there is no wait
// for completion of the DMA list destruction.
jpeghw_error_type_t jhw_abort_compress(struct jpeghw_compress_struct * jhwcinfo)
{
    DBG_INFO("[%d]%s()\n", gettid(), __func__);
    // make sure the input parameter is valid
    jpeghw_error_type_t retval = e_JPEGHW_SUCCESS;
    JHWC_VALIDATE(jhwcinfo);
    struct jpeg_dev_info_s * dev = JHWC_GET_DEV(jhwcinfo);

    DBG_INFO("[%d]Aborting odma thread!\n", gettid());
    int event_flags = TERMINATE_THRD;
    jpeg_event_t jpeg_event;
    jpeg_event.event_flags = event_flags;
    jpeg_event.dev_info_ptr = dev;
    mq_send(dev->omqd.mqd, (char *)&jpeg_event, sizeof(jpeg_event_t), 0);

    // the false in the 2nd parameter below tells the destroy calls to not wait
    // for encode complete.  
    if(jhwc_destroy_idma_list(jhwcinfo, false) != OK) retval = e_JPEGHW_ERROR;
    if(jhwc_destroy_odma_list(jhwcinfo, false) != OK) retval = e_JPEGHW_ERROR;

    return retval;
}

// if the big_buffer contains lines that are not mcu aligned then create a new
// big_buffer with lines that are mcu aligned (longer than the initial lines), 
// copy the original data into the lines and pad the remaining bytes with a 
// fill value
static uint32_t _jhwc_add_mcu_width_alignment(struct jpeghw_compress_struct * jhwcinfo, 
                             struct BigBuffer_s **big_buffer, uint32_t num_lines)
{
    DBG_INFO("[%d]%s()\n", gettid(), __func__);

    jpeghw_dev_ptr   jdev = JPEGHWC_GET_DEV(jhwcinfo);
    struct jpeg_dev_info_s * dev = JHWC_GET_DEV(jhwcinfo);

    uint32_t byte_width = (*big_buffer)->datalen / num_lines;
    uint32_t pixel_width = byte_width / jdev->info->bytes_per_pixel;

    if(pixel_width % jdev->info->mcu_width == 0) return e_JPEGHW_SUCCESS;

    DBG_INFO("[%d]  pixel_len %d mcu_w %d remainder %d\n", gettid(), pixel_width, jdev->info->mcu_width,
            jdev->info->mcu_width - (pixel_width%jdev->info->mcu_width));

    uint32_t line_remainder = jdev->info->mcu_width -(pixel_width % jdev->info->mcu_width);
    uint32_t npixel_width = pixel_width + line_remainder;
    uint32_t nbyte_width = npixel_width * jdev->info->bytes_per_pixel;
    uint32_t nlen = nbyte_width * num_lines;

    INC_G_ALLOCATED();
    struct BigBuffer_s *nbb = dma_buffer_malloc(0, nlen);
    if(nbb == NULL)
    {
        jpeghw_error(&jhwcinfo->common, e_JPEGHW_ERR_OUT_OF_MEMORY);
        return e_JPEGHW_ERR_OUT_OF_MEMORY;
    }

    INC_G_MAPPED();
    char *buf = dma_buffer_mmap_forcpu(*big_buffer);
    INC_G_MAPPED();
    char *nbuf = dma_buffer_mmap_forcpu(nbb);

    memset(nbuf, JHW_FILL_BYTE_VALUE, nlen);

    DBG_INFO("[%d]-----pad width from %d to %d len\n",gettid(),byte_width, nbyte_width);
    uint32_t idx=0;
    for(idx=0; idx<num_lines; idx++)
    {
        memcpy(nbuf + (idx * nbyte_width), buf + (idx * byte_width), byte_width);
    }
    DEC_G_MAPPED();
    dma_buffer_unmmap_forcpu(*big_buffer);
    DEC_G_MAPPED();
    dma_buffer_unmmap_forcpu(nbb);
    
    // delete old big buffers
    *big_buffer = JHW_FREE_BIG_BUFFER(*big_buffer);

    *big_buffer = nbb;

    dev->byte_width = nbyte_width;
    dev->pixel_width = npixel_width;

    return e_JPEGHW_SUCCESS;
}

// aggregates a previous "too small buffer" with the current big buffer to
// form a buffer hopefully large enough for the DMAs
// 
// if the number of lines of the big_buffer summed with any reminder from a
// previous call is less than the MCU height then move the buffer
// to  the too_small_buffer pointer and return to the users.
// remainders occur when the number of lines sent to the DMA is not
// a multiple of the MCU height.
//
static bool _jhw_buffer_aggregator(struct jpeghw_compress_struct * jhwcinfo,
                struct BigBuffer_s **big_buffer, uint32_t num_lines, bool eoi)
{
    jpeghw_dev_ptr   jdev = JPEGHWC_GET_DEV(jhwcinfo);
    struct jpeg_dev_info_s * dev = JHWC_GET_DEV(jhwcinfo);

    DBG_INFO("[%d]%s num_lines:%d eoi:%d\n",gettid(), __func__, num_lines, eoi);
    struct BigBuffer_s *input_bb = *big_buffer;
   
    // if the too_small_buffer from a previous call exists combine it with big_buffer
    if(dev->bb_info.too_small_big_buffer != 0)
    {
        DBG_INFO("[%d]  too-small-buf found, combine with incoming big-buf\n",gettid());
        uint32_t tsbuf_len = dev->bb_info.too_small_big_buffer->datalen;
        uint32_t ibuf_len = input_bb->datalen; 
        uint32_t nbuf_len = tsbuf_len + ibuf_len;
        DBG_INFO("[%d]  ibuf_len=%d ts_buf_len=%d\n",gettid(), ibuf_len, tsbuf_len);

        INC_G_ALLOCATED();
        struct BigBuffer_s *nbb = dma_buffer_malloc(0, nbuf_len);
        if(nbb == NULL)
        {
            return false;
        }
        INC_G_MAPPED();
        char *nbuf  = dma_buffer_mmap_forcpu(nbb);

        INC_G_MAPPED();
        char *tsbuf = dma_buffer_mmap_forcpu(dev->bb_info.too_small_big_buffer);
        memcpy(nbuf, tsbuf, tsbuf_len);
        DEC_G_MAPPED();
        dma_buffer_unmmap_forcpu(dev->bb_info.too_small_big_buffer);

        INC_G_MAPPED();
        char *ibuf  = dma_buffer_mmap_forcpu(input_bb);
        memcpy(nbuf + tsbuf_len, ibuf, ibuf_len);
        DEC_G_MAPPED();
        dma_buffer_unmmap_forcpu(input_bb);
        DEC_G_MAPPED();
        dma_buffer_unmmap_forcpu(nbb);

        input_bb = JHW_FREE_BIG_BUFFER(input_bb);
        dev->bb_info.too_small_big_buffer = 
                   JHW_FREE_BIG_BUFFER(dev->bb_info.too_small_big_buffer);
        *big_buffer = nbb;

        dev->bb_info.too_small_big_buffer = 0;
    }

    uint32_t source_len = (*big_buffer)->datalen;
    uint32_t source_lines = source_len / dev->byte_width;
    DBG_INFO("[%d]   len:%d w:%d num_lines:%d \n",gettid(), source_len, dev->byte_width, source_lines);

    // if the number of lines of the big_buffer summed with any reminder from a
    // previous call is less than the MCU height then move the buffer
    // to  the too_small_buffer pointer and return to the user, no sense in sending
    // this buffer to the DMAs, they can't handle it.
    if(!eoi && dev->bb_info.remainder_lines + source_lines < jdev->info->mcu_height)
    {
        DBG_INFO("[%d]   still too small R:%d L:%d\n",gettid(), dev->bb_info.remainder_lines, source_lines);
        dev->bb_info.too_small_big_buffer = *big_buffer;
        return false;
    }

    return true;
}

// if the JPEGHW_CONFIG_FLAG_MCU_HEIGHT_ALIGNMENT == 0 it indicates that the driver
// (this code) can handle buffer heights that are not MCU height aligned.
// This function is used to make sure that the number of lines in input big_buffer is 
// a multiple of the MCU height. 
// The first thing to do is make sure the buffer contain fewer lines than the
// MCU height, if it does then that buffer is saved as a two_small_buffer to be used
// later and returns to the user.
// If the given big_buffer is at the end-of-image (eoi==true) and it combined with
// any remainder from a previous DMA call plus a previous too-small-buffer is not 
// MCU height aligned then padd the end of the buffer with enough "fill" lines to 
// make the big_buffer MCU height aligned.
// 
static uint32_t _jhwc_write_buffer(struct jpeghw_compress_struct * jhwcinfo,
                struct BigBuffer_s *big_buffer, uint32_t num_lines, bool eoi)
{
    DBG_INFO("[%d]%s() num_lines:%d eoi:%d\n", gettid(), __func__, num_lines, eoi);
 
    jpeghw_dev_ptr   jdev = JPEGHWC_GET_DEV(jhwcinfo);
    struct jpeg_dev_info_s * dev = JHWC_GET_DEV(jhwcinfo);

    struct BigBuffer_s *input_bb = big_buffer;
    uint32_t source_len = input_bb->datalen;
    uint32_t source_lines = source_len / dev->byte_width;
    uint32_t l_num_lines = num_lines;

    // when buffers are too small for the idma descriptors the buffer aggregrator combines
    // adjacent buffers.  The previous small buffer is saved in the bb_info structure
    // as a too_small_big_buffer
    if(_jhw_buffer_aggregator(jhwcinfo, &input_bb , l_num_lines, eoi) == false)
    {
        return num_lines;
    }

    // the aggregator may have changed these values
    source_len = input_bb->datalen;
    source_lines = source_len / dev->byte_width;

    // at the end of image if the number of remainding lines to compress is not mcu
    // aligned, append white lines to align it
    if(eoi && (source_lines + dev->bb_info.remainder_lines) % jdev->info->mcu_height != 0)
    {
        DBG_INFO("[%d]--Padding end of image with extra lines.\n",gettid());
        uint32_t nlines = 0;
        uint32_t nlen = 0;

        uint32_t eoi_lines = jdev->info->mcu_height -
                    ((source_lines + dev->bb_info.remainder_lines) % jdev->info->mcu_height);
        nlines = source_lines + eoi_lines;
        l_num_lines = nlines;
        DBG_INFO("[%d]  source_lines=%d rlines=%d nlines=%d eoi_lines:%d\n",gettid(), 
               source_lines, dev->bb_info.remainder_lines, nlines, eoi_lines);
        nlen = nlines * dev->byte_width; 

        INC_G_ALLOCATED();
        struct BigBuffer_s *nbig_buffer = dma_buffer_malloc(0, nlen);
        if(nbig_buffer == NULL)
        {
            jhwcinfo->common.global_jpeg_state = e_JPEGHW_STATE_ERROR;
            jpeghw_error(&jhwcinfo->common, e_JPEGHW_ERR_OUT_OF_MEMORY);
            return e_JPEGHW_ERR_OUT_OF_MEMORY;
        }

        INC_G_MAPPED();
        char *nbuf = dma_buffer_mmap_forcpu(nbig_buffer);
        INC_G_MAPPED();
        char *bbuf = dma_buffer_mmap_forcpu(input_bb);
        uint32_t bbuflen = input_bb->datalen;

        memcpy(nbuf, bbuf, bbuflen);
        memset(nbuf + bbuflen, JHW_FILL_BYTE_VALUE, nlen - bbuflen);
        DEC_G_MAPPED();
        dma_buffer_unmmap_forcpu(nbig_buffer);

        // delete old big buffers
        DEC_G_MAPPED();
        dma_buffer_unmmap_forcpu(input_bb);
        input_bb = JHW_FREE_BIG_BUFFER(input_bb);

        input_bb = nbig_buffer;
    } 

    // send the buffer to the DMA
    if(jhwc_add_idma_data(jhwcinfo, input_bb, eoi) != OK)
    {
        jhwcinfo->common.global_jpeg_state = e_JPEGHW_STATE_ERROR;
        return 0;
    }

    return l_num_lines;
}

// if the JPEGHW_CONFIG_FLAG_MCU_HEIGHT_ALIGNMENT == 1 it indicates that the driver
// (this code) cannot handle buffer heights that are not MCU height aligned.
// This function does no MCU height alignment buffer checking.  The buffers are
// send to the DMAs as-is.  The DMAs can handle buffers that are not MCU height
// aligned if the combination of two adjacent buffers have more lines than
// the MCU height.  A DMA descriptor can be configured to point to two different
// buffers thus facilitating splitting the descriptor between the two buffers.
// If the combined number of lines in the two buffers is less than the MCU height
// then the add_imda_data call will fail and thus this function will fail.
static uint32_t _jhwc_write_height_aligned_buffer(struct jpeghw_compress_struct * jhwcinfo,
                struct BigBuffer_s *big_buffer, uint32_t num_lines, bool eoi)
{
    DBG_INFO("[%d]%s() num_lines:%d eoi:%d\n",gettid(), __func__, num_lines, eoi);

    if(jhwc_add_idma_data(jhwcinfo, big_buffer, eoi) != OK)
    {
        jhwcinfo->common.global_jpeg_state = e_JPEGHW_STATE_ERROR;
        return 0;
    }

    return num_lines;
}

// this is the API call to move data from the user to the IDMA.  All data must
// be wrapped in a BigBuffer structure.
// If the given big_buffer is at the end-of-image (eoi==true) and the number of
// accumulated lines for the image plus the lines contained within the big_buffer
// is less than the image height advertised during the compressor-open then the
// hardware must be made aware of the new height or it will hang expecting more
// data.  This is accomplished via the jhwc_update_DIM_Y call.
// if the JPEGHW_CONFIG_FLAG_MCU_WIDTH_ALIGNMENT == 0 the big_buffer width 
// alignment is checked and fixed if necessary via the _jhwc_add_mcu_width_alignment
// call.  Otherwise if the buffer is not width MCU aligned an error is returned
// to the user.
// if the JPEGHW_CONFIG_FLAG_MCU_HEIGHT_ALIGNMENT == 0 the big_buffer height 
// alignment is checked and fixed if necessary via the _jhwc_write_buffer
// call.  Otherwise if the buffer is not height MCU aligned the data is sent
// directly to the IDMA via the _jhwc_write_height_aligned_buffer call.
uint32_t jhw_write_buffer(struct jpeghw_compress_struct * jhwcinfo,
                struct BigBuffer_s *big_buffer, uint32_t num_lines, bool eoi)
{
    struct BigBuffer_s *l_big_buffer = big_buffer;

    __JHW_VALIDATE(jhwcinfo, 0);
    jpeghw_dev_ptr   jdev = JPEGHWC_GET_DEV(jhwcinfo);
    struct jpeg_dev_info_s * dev = JHWC_GET_DEV(jhwcinfo);

    INC_G_ALLOCATED();  // assuming the user allocated the big_buffer before calling

    DBG_INFO("[%d]%s() num_lines:%d total-lines:%d height:%d eoi:%d\n", gettid(),
             __func__, num_lines, dev->line_counter, dev->height, eoi);

    // sanity check
    if(num_lines > l_big_buffer->datalen / jdev->image_bytes_per_row)
    {
        DBG_ERR("[%d]%s num lines(%d) != big_buffer size(%d)\n",gettid(), __func__, num_lines,
                             l_big_buffer->datalen / jdev->image_bytes_per_row);
        jhwcinfo->common.global_jpeg_state = e_JPEGHW_STATE_ERROR;
        return 0;
    }

    // if the user issues a big_buffer such that num_lines is less than the
    // number of lines allocated in the big_buffer, the big_buffer must be reallocated
    // due to the underlying codes reliance on the big_buffer len correctness.
    // -- reduce the size of the big_buffer to match the given num_lines
    if(num_lines < l_big_buffer->datalen / jdev->image_bytes_per_row)
    {
        DBG_INFO("[%d]%s() reduce buffer size from %d lines to %d lines\n",gettid(), __func__,
                 l_big_buffer->datalen / jdev->image_bytes_per_row, num_lines);

        uint32_t nlen = num_lines * jdev->image_bytes_per_row;
        struct BigBuffer_s *nbb = dma_buffer_malloc(0, nlen);
        INC_G_ALLOCATED();
        char *nbuf = dma_buffer_mmap_forcpu(nbb);
        char *cbuf = dma_buffer_mmap_forcpu(big_buffer);
        memcpy(nbuf, cbuf, nlen);
        dma_buffer_unmmap_forcpu(big_buffer);
        dma_buffer_unmmap_forcpu(nbb);
        big_buffer = JHW_FREE_BIG_BUFFER(big_buffer);
        l_big_buffer = nbb;
    }

    // If the given big_buffer is at the end-of-image (eoi==true) and the number of
    // accumulated lines for the image plus the lines contained within the big_buffer
    // is less than the image height advertised during the compressor-open then the
    // hardware must be made aware of the new height or it will hang expecting more
    // data. 
    dev->line_counter += num_lines;
    if(eoi && dev->line_counter < dev->height)
    {
        DBG_INFO("[%d]%s() premature end of image.  Update Core DIM-Y.\n",gettid(), __func__);
        jhwc_update_DIM_Y(jhwcinfo, dev->line_counter);
    }

    // if the JPEGHW_CONFIG_FLAG_MCU_WIDTH_ALIGNMENT == 0 the big_buffer width 
    // alignment is checked and fixed if necessary via the _jhwc_add_mcu_width_alignment
    // call.  Otherwise if the buffer is not width MCU aligned an error is returned
    // to the user.
    if(jdev->hw_config_flags & JPEGHW_CONFIG_FLAG_MCU_WIDTH_ALIGNMENT)
    {
        uint32_t pixel_width = dev->byte_width / jdev->info->bytes_per_pixel;
        if(pixel_width % jdev->info->mcu_width != 0)
        {
           jpeghw_error(&jhwcinfo->common, e_JPEGHW_ERR_INVALID_PARAMETERS);
           return e_JPEGHW_ERR_INVALID_PARAMETERS;
        }
    }
    else
    {
        _jhwc_add_mcu_width_alignment(jhwcinfo, &l_big_buffer, num_lines);
    }


    // if the JPEGHW_CONFIG_FLAG_MCU_HEIGHT_ALIGNMENT == 0 the big_buffer height 
    // alignment is checked and fixed if necessary via the _jhwc_write_buffer
    // call.  Otherwise if the buffer is not height MCU aligned the data is sent
    // directly to the IDMA via the _jhwc_write_height_aligned_buffer call.
    if(jdev->hw_config_flags & JPEGHW_CONFIG_FLAG_MCU_HEIGHT_ALIGNMENT)
    {
        // the hardware cannot vertically align so
        // the client is responsible for mcu alignment
        return _jhwc_write_height_aligned_buffer(jhwcinfo, l_big_buffer, num_lines, eoi);

    }
    else
    {
        // the hardware cat vertically align so
        // the client is not responsible for mcu alignment
        return _jhwc_write_buffer(jhwcinfo, l_big_buffer, num_lines, eoi);
    }
}
  
