/*
	SDL_image:  An example image loading library for use with SDL
	Copyright (C) 1997-2009 Sam Lantinga

	This library is free software; you can redistribute it and/or
	modify it under the terms of the GNU Lesser General Public
	License as published by the Free Software Foundation; either
	version 2.1 of the License, or (at your option) any later version.

	This library is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
	Lesser General Public License for more details.

	You should have received a copy of the GNU Lesser General Public
	License along with this library; if not, write to the Free Software
	Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

	Sam Lantinga
	slouken@libsdl.org
*/

/* This is a JPEG image file loading framework */

#include <stdio.h>
#include <string.h>
#include <setjmp.h>


#include "utilPoolMgr.h"
#include "utilMemory.h"

#include "SDL_image.h"
#include "PW_SDL_image.h"
#include "PW_IMAGE_filters.h"
#include "utilHWAccelAPI.h"


//#define RICH_DEBUG_INFO 1
//#define PARTIAL_DECODE_INFO

/* 2009.03.31, Enable Jpeg Data Reduce by Adjusting Scaling Factor */
#ifndef PW_IMG_JPG_SCALE
#define PW_IMG_JPG_SCALE 1
#endif

#ifndef PW_IMG_JPG_PROGRESSIVE_LIMITED
#define PW_IMG_JPG_PROGRESSIVE_LIMITED 1
#endif

// Use Topaz Partial Decode Code
#define TOPAZ_HW_PARTIAL_DECODE

// Use System Pool as merge process buffers
#define USE_SYSTEM_POOL_FOR_MERGE

#if defined(USE_SYSTEM_POOL_FOR_MERGE)
#define USE_PARTIAL_DECODE_BUF		0x200000   // Was spiCB
#define USE_YUV_MERGE_BUF			0x200000   // Was spiY
#else
#define USE_PARTIAL_DECODE_BUF		0x100000
#define USE_YUV_MERGE_BUF			0x100000
#endif

// Reserve buffer for data alignment
#define PARTIAL_DECODE_ALIGNMENT    0x3000

// Force to use fixed inout buffer
#define USE_FIXED_INPUT_BUF

// Partial Decode polling timeout mechainsm
#define PARTIAL_DECODE_POLLING_TIMEOUT

// For to disable INPUT BUFFER
//#define DISABLE_FIXED_INPUT_BUFFER

// Search EOI
//#define PARTIAL_DECODE_EOI

#ifndef min
#define min(a,b) (((a) < (b)) ? (a) : (b))
#endif

#ifndef max
#define max(a,b) (((a) > (b)) ? (a) : (b))
#endif

#ifdef LOAD_JPG

#include <jpeglib.h>



/* Debug Tools */
#define TEST_LOG_START(msg)		(test_log(msg, 0, 0))
#define TEST_LOG_END(c)			(test_log(NULL, 1, (c)))

#define L_PRINT(...)	fprintf(stderr, __VA_ARGS__)

static void test_log(char *msg, int done, int counter)
{
	static Uint32 s_nTicks;
	static char * s_pcMsg;
	if (done)
	{
		Uint32 nTicksPassed = SDL_GetTicks() - s_nTicks;
		if (counter == 0)
		{
			L_PRINT("%s - done, time elapse %u ms\n",
				s_pcMsg, (unsigned int)nTicksPassed);
		}
		else
		{
			L_PRINT("%s - done, time elapse %u ms, fps %.2f\n",
				s_pcMsg, (unsigned int)nTicksPassed, (1000 * (float)counter / nTicksPassed));
		}
		L_PRINT("=============================================================\n");
	}
	else
	{
		s_pcMsg = msg;
		s_nTicks = SDL_GetTicks();
		L_PRINT("\n\n");
		L_PRINT("=============================================================\n");
		L_PRINT("%s - start\n", s_pcMsg);
	}
}


/* Define this for fast loading and not as good image quality */
#define FAST_JPEG

/* Define this for quicker (but less perfect) JPEG identification */
#define FAST_IS_JPEG

static struct {
	int loaded;
	void *handle;
	void (*jpeg_calc_output_dimensions) (j_decompress_ptr cinfo);
	void (*jpeg_CreateDecompress) (j_decompress_ptr cinfo, int version, size_t structsize);
	void (*jpeg_destroy_decompress) (j_decompress_ptr cinfo);
	boolean (*jpeg_finish_decompress) (j_decompress_ptr cinfo);
	int (*jpeg_read_header) (j_decompress_ptr cinfo, boolean require_image);
	JDIMENSION (*jpeg_read_scanlines) (j_decompress_ptr cinfo, JSAMPARRAY scanlines, JDIMENSION max_lines);
	boolean (*jpeg_resync_to_restart) (j_decompress_ptr cinfo, int desired);
	boolean (*jpeg_start_decompress) (j_decompress_ptr cinfo);
	struct jpeg_error_mgr * (*jpeg_std_error) (struct jpeg_error_mgr * err);
} lib;

#ifdef LOAD_JPG_DYNAMIC
int IMG_InitJPG()
{
	if ( lib.loaded == 0 ) {
		lib.handle = SDL_LoadObject(LOAD_JPG_DYNAMIC);
		if ( lib.handle == NULL ) {
			return -1;
		}
		lib.jpeg_calc_output_dimensions =
			(void (*) (j_decompress_ptr))
			SDL_LoadFunction(lib.handle, "jpeg_calc_output_dimensions");
		if ( lib.jpeg_calc_output_dimensions == NULL ) {
			SDL_UnloadObject(lib.handle);
			return -1;
		}
		lib.jpeg_CreateDecompress =
			(void (*) (j_decompress_ptr, int, size_t))
			SDL_LoadFunction(lib.handle, "jpeg_CreateDecompress");
		if ( lib.jpeg_CreateDecompress == NULL ) {
			SDL_UnloadObject(lib.handle);
			return -1;
		}
		lib.jpeg_destroy_decompress =
			(void (*) (j_decompress_ptr))
			SDL_LoadFunction(lib.handle, "jpeg_destroy_decompress");
		if ( lib.jpeg_destroy_decompress == NULL ) {
			SDL_UnloadObject(lib.handle);
			return -1;
		}
		lib.jpeg_finish_decompress =
			(boolean (*) (j_decompress_ptr))
			SDL_LoadFunction(lib.handle, "jpeg_finish_decompress");
		if ( lib.jpeg_finish_decompress == NULL ) {
			SDL_UnloadObject(lib.handle);
			return -1;
		}
		lib.jpeg_read_header =
			(int (*) (j_decompress_ptr, boolean))
			SDL_LoadFunction(lib.handle, "jpeg_read_header");
		if ( lib.jpeg_read_header == NULL ) {
			SDL_UnloadObject(lib.handle);
			return -1;
		}
		lib.jpeg_read_scanlines =
			(JDIMENSION (*) (j_decompress_ptr, JSAMPARRAY, JDIMENSION))
			SDL_LoadFunction(lib.handle, "jpeg_read_scanlines");
		if ( lib.jpeg_read_scanlines == NULL ) {
			SDL_UnloadObject(lib.handle);
			return -1;
		}
		lib.jpeg_resync_to_restart =
			(boolean (*) (j_decompress_ptr, int))
			SDL_LoadFunction(lib.handle, "jpeg_resync_to_restart");
		if ( lib.jpeg_resync_to_restart == NULL ) {
			SDL_UnloadObject(lib.handle);
			return -1;
		}
		lib.jpeg_start_decompress =
			(boolean (*) (j_decompress_ptr))
			SDL_LoadFunction(lib.handle, "jpeg_start_decompress");
		if ( lib.jpeg_start_decompress == NULL ) {
			SDL_UnloadObject(lib.handle);
			return -1;
		}
		lib.jpeg_std_error =
			(struct jpeg_error_mgr * (*) (struct jpeg_error_mgr *))
			SDL_LoadFunction(lib.handle, "jpeg_std_error");
		if ( lib.jpeg_std_error == NULL ) {
			SDL_UnloadObject(lib.handle);
			return -1;
		}
	}
	++lib.loaded;

	return 0;
}
void IMG_QuitJPG()
{
	if ( lib.loaded == 0 ) {
		return;
	}
	if ( lib.loaded == 1 ) {
		SDL_UnloadObject(lib.handle);
	}
	--lib.loaded;
}
#else
int IMG_InitJPG()
{
	if ( lib.loaded == 0 ) {
		lib.jpeg_calc_output_dimensions = jpeg_calc_output_dimensions;
		lib.jpeg_CreateDecompress = jpeg_CreateDecompress;
		lib.jpeg_destroy_decompress = jpeg_destroy_decompress;
		lib.jpeg_finish_decompress = jpeg_finish_decompress;
		lib.jpeg_read_header = jpeg_read_header;
		lib.jpeg_read_scanlines = jpeg_read_scanlines;
		lib.jpeg_resync_to_restart = jpeg_resync_to_restart;
		lib.jpeg_start_decompress = jpeg_start_decompress;
		lib.jpeg_std_error = jpeg_std_error;
	}
	++lib.loaded;

	return 0;
}
void IMG_QuitJPG()
{
	if ( lib.loaded == 0 ) {
		return;
	}
	if ( lib.loaded == 1 ) {
	}
	--lib.loaded;
}
#endif /* LOAD_JPG_DYNAMIC */

/* See if an image is contained in a data source */
int IMG_isJPG(SDL_RWops *src)
{
	int start;
	int is_JPG;
	int in_scan;
	Uint8 magic[4];

	/* This detection code is by Steaphan Greene <stea@cs.binghamton.edu> */
	/* Blame me, not Sam, if this doesn't work right. */
	/* And don't forget to report the problem to the the sdl list too! */

	if ( !src )
		return 0;
	start = SDL_RWtell(src);
	is_JPG = 0;
	in_scan = 0;
	if ( SDL_RWread(src, magic, 2, 1) ) {
		if ( (magic[0] == 0xFF) && (magic[1] == 0xD8) ) {
			is_JPG = 1;
			while (is_JPG == 1) {
				if(SDL_RWread(src, magic, 1, 2) != 2) {
					is_JPG = 0;
				} else if( (magic[0] != 0xFF) && (in_scan == 0) ) {
					is_JPG = 0;
				} else if( (magic[0] != 0xFF) || (magic[1] == 0xFF) ) {
					/* Extra padding in JPEG (legal) */
					/* or this is data and we are scanning */
					SDL_RWseek(src, -1, SEEK_CUR);
				} else if(magic[1] == 0xD9) {
					/* Got to end of good JPEG */
					break;
				} else if( (in_scan == 1) && (magic[1] == 0x00) ) {
					/* This is an encoded 0xFF within the data */
				} else if( (magic[1] >= 0xD0) && (magic[1] < 0xD9) ) {
					/* These have nothing else */
				} else if(SDL_RWread(src, magic+2, 1, 2) != 2) {
					is_JPG = 0;
				} else {
					/* Yes, it's big-endian */
					Uint32 start;
					Uint32 size;
					Uint32 end;
					start = SDL_RWtell(src);
					size = (magic[2] << 8) + magic[3];
					end = SDL_RWseek(src, size-2, SEEK_CUR);
					if ( end != start + size - 2 ) is_JPG = 0;
					if ( magic[1] == 0xDA ) {
						/* Now comes the actual JPEG meat */
#ifdef	FAST_IS_JPEG
						/* Ok, I'm convinced.  It is a JPEG. */
						break;
#else
						/* I'm not convinced.  Prove it! */
						in_scan = 1;
#endif
					}
				}
			}
		}
	}
	SDL_RWseek(src, start, SEEK_SET);
	return(is_JPG);
}

#define INPUT_BUFFER_SIZE	4096
typedef struct {
	struct jpeg_source_mgr pub;

	SDL_RWops *ctx;
	Uint8 buffer[INPUT_BUFFER_SIZE];
} my_source_mgr;

/*
 * Initialize source --- called by jpeg_read_header
 * before any data is actually read.
 */
static void init_source (j_decompress_ptr cinfo)
{
	/* We don't actually need to do anything */
	return;
}

/*
 * Fill the input buffer --- called whenever buffer is emptied.
 */
static int fill_input_buffer (j_decompress_ptr cinfo)
{
	my_source_mgr * src = (my_source_mgr *) cinfo->src;
	int nbytes;

	nbytes = SDL_RWread(src->ctx, src->buffer, 1, INPUT_BUFFER_SIZE);
	if (nbytes <= 0) {
		/* Insert a fake EOI marker */
		src->buffer[0] = (Uint8) 0xFF;
		src->buffer[1] = (Uint8) JPEG_EOI;
		nbytes = 2;
	}
	src->pub.next_input_byte = src->buffer;
	src->pub.bytes_in_buffer = nbytes;

	return TRUE;
}


/*
 * Skip data --- used to skip over a potentially large amount of
 * uninteresting data (such as an APPn marker).
 *
 * Writers of suspendable-input applications must note that skip_input_data
 * is not granted the right to give a suspension return.  If the skip extends
 * beyond the data currently in the buffer, the buffer can be marked empty so
 * that the next read will cause a fill_input_buffer call that can suspend.
 * Arranging for additional bytes to be discarded before reloading the input
 * buffer is the application writer's problem.
 */
static void skip_input_data (j_decompress_ptr cinfo, long num_bytes)
{
	my_source_mgr * src = (my_source_mgr *) cinfo->src;

	/* Just a dumb implementation for now.	Could use fseek() except
	 * it doesn't work on pipes.  Not clear that being smart is worth
	 * any trouble anyway --- large skips are infrequent.
	 */
	if (num_bytes > 0) {
		while (num_bytes > (long) src->pub.bytes_in_buffer) {
			num_bytes -= (long) src->pub.bytes_in_buffer;
			(void) src->pub.fill_input_buffer(cinfo);
			/* note we assume that fill_input_buffer will never
			 * return FALSE, so suspension need not be handled.
			 */
		}
		src->pub.next_input_byte += (size_t) num_bytes;
		src->pub.bytes_in_buffer -= (size_t) num_bytes;
	}
}

/*
 * Terminate source --- called by jpeg_finish_decompress
 * after all data has been read.
 */
static void term_source (j_decompress_ptr cinfo)
{
	/* We don't actually need to do anything */
	return;
}

/*
 * Prepare for input from a stdio stream.
 * The caller must have already opened the stream, and is responsible
 * for closing it after finishing decompression.
 */
static void jpeg_SDL_RW_src (j_decompress_ptr cinfo, SDL_RWops *ctx)
{
  my_source_mgr *src;

  /* The source object and input buffer are made permanent so that a series
   * of JPEG images can be read from the same file by calling jpeg_stdio_src
   * only before the first one.  (If we discarded the buffer at the end of
   * one image, we'd likely lose the start of the next one.)
   * This makes it unsafe to use this manager and a different source
   * manager serially with the same JPEG object.  Caveat programmer.
   */
  if (cinfo->src == NULL) {	/* first time for this JPEG object? */
	cinfo->src = (struct jpeg_source_mgr *)
	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
				  sizeof(my_source_mgr));
	src = (my_source_mgr *) cinfo->src;
  }

  src = (my_source_mgr *) cinfo->src;
  src->pub.init_source = init_source;
  src->pub.fill_input_buffer = fill_input_buffer;
  src->pub.skip_input_data = skip_input_data;
  src->pub.resync_to_restart = lib.jpeg_resync_to_restart; /* use default method */
  src->pub.term_source = term_source;
  src->ctx = ctx;
  src->pub.bytes_in_buffer = 0; /* forces fill_input_buffer on first read */
  src->pub.next_input_byte = NULL; /* until buffer loaded */
}

struct my_error_mgr {
	struct jpeg_error_mgr errmgr;
	jmp_buf escape;
};

static void my_error_exit(j_common_ptr cinfo)
{
	struct my_error_mgr *err = (struct my_error_mgr *)cinfo->err;
	longjmp(err->escape, 1);
}

static void output_no_message(j_common_ptr cinfo)
{
	/* do nothing */
}


/* Load a JPEG type image from an SDL datasource */
SDL_Surface *IMG_LoadJPG_RW(SDL_RWops *src,int type)
{
	return PW_IMG_LoadJPG_RW(src, NULL, NULL, NULL);
}


#define DUMP_SIZE	256
void DUMP_memory(unsigned int*	pnDump_memory_PHY)
{
	DWORD			i,j;
	unsigned int*	pnTemp_PHY;
	unsigned int*   pnTemp_Vir;
	unsigned int*   pnDump_memory_Vir;

	printf("\n\nMEMORY DUMP : %p\n",pnDump_memory_PHY);
	pnDump_memory_Vir = utilPoolMgrVirtAddrGet(pnDump_memory_PHY);
	j = 0;
	for (i=0;i<DUMP_SIZE;i++)
	{
		pnTemp_PHY = (pnDump_memory_PHY - (DUMP_SIZE/2) + i );
		pnTemp_Vir = (pnDump_memory_Vir - (DUMP_SIZE/2) + i);

		if (j == 0)
		{
			printf("\n%08x : ",pnTemp_PHY);
		}
		printf("%08x ",*pnTemp_Vir);
		j++;
		if (j >= 8)
		{
			j = 0;
		}
	}

}


typedef struct
{
	unsigned char   sw_scale;
	unsigned char	cType;			// 0 : type A, 1 : type B
	unsigned char   cRoration;		// 0 : none, 1 : Rotation needed. (assume +90)
	int				x;
	int				x1;
	int				x2;
	int				y;
	int				y1;
	int				y2;
	int				decode_x;
	int				decode_y;
	int				ydh;			// y of decoded height
	int				ydh_index;		//
	int             decode_y_pos;	// decode y pos
	int             decode_y_size;	// decode y szie
	int             decode_y_compl;
	int             decode_y_accum;
}PartialDecodeOutput,*PPartialDecodeOutput;


void	UpdateDecodeInfo(PPartialDecodeOutput psPartialDecodeOutput)
{
	int				this_size_adj;		//

	psPartialDecodeOutput->ydh_index += psPartialDecodeOutput->ydh;
	psPartialDecodeOutput->decode_y_pos = psPartialDecodeOutput->ydh_index / 0x100;
	psPartialDecodeOutput->decode_y_accum += (psPartialDecodeOutput->decode_y_size);

	// We need to check both start position and size of this calculation.
	psPartialDecodeOutput->decode_y_compl = 0;
	if (psPartialDecodeOutput->decode_y_pos > psPartialDecodeOutput->decode_y_accum)
	{
		psPartialDecodeOutput->decode_y_compl = psPartialDecodeOutput->decode_y_pos - psPartialDecodeOutput->decode_y_accum;
		psPartialDecodeOutput->decode_y_accum += psPartialDecodeOutput->decode_y_compl;
#if defined(RICH_DEBUG_INFO)
		IMG_TRACE(" ------------ INC Start Pos--------------------\n");
#endif
	}
	this_size_adj = (psPartialDecodeOutput->ydh_index + psPartialDecodeOutput->ydh) / 0x100;
	this_size_adj -= (psPartialDecodeOutput->decode_y_pos);

	if (this_size_adj > (psPartialDecodeOutput->decode_y_size + psPartialDecodeOutput->decode_y_compl))
	{
		psPartialDecodeOutput->decode_y_compl += (this_size_adj - psPartialDecodeOutput->decode_y_size - psPartialDecodeOutput->decode_y_compl);
#if defined(RICH_DEBUG_INFO)
		IMG_TRACE(" ------------ INC SIZE--------------------\n");
#endif
	}

	// fix the size. the final size = decode_y_size + decode_y_compl
	if ((psPartialDecodeOutput->decode_y_pos + psPartialDecodeOutput->decode_y_size + psPartialDecodeOutput->decode_y_compl) >= psPartialDecodeOutput->y)
	{
		psPartialDecodeOutput->decode_y_size = psPartialDecodeOutput->y - psPartialDecodeOutput->decode_y_pos - psPartialDecodeOutput->decode_y_compl;
#if defined(RICH_DEBUG_INFO)
		IMG_TRACE(" --- LAST ONE --- : psPartialDecodeOutput->decode_y_size = %d\n",psPartialDecodeOutput->decode_y_size);
#endif
	}

#if defined(RICH_DEBUG_INFO)
	IMG_TRACE(" --- UPDATE DECODE INFO --- : psPartialDecodeOutput->decode_y_pos = %d\n",psPartialDecodeOutput->decode_y_pos);
	IMG_TRACE(" --- UPDATE DECODE INFO --- : psPartialDecodeOutput->decode_y_size = %d\n",psPartialDecodeOutput->decode_y_size);
	IMG_TRACE(" --- UPDATE DECODE INFO --- : psPartialDecodeOutput->decode_y_accum = %d\n",psPartialDecodeOutput->decode_y_accum);
	IMG_TRACE(" --- UPDATE DECODE INFO --- : psPartialDecodeOutput->ydh_index = %p\n",psPartialDecodeOutput->ydh_index);
#endif
}

// Calculate partial decode output information
static DWORD   GetPartialDecodeOutput(DWORD iw, DWORD decode_iw, DWORD ih, DWORD decode_ih, DWORD fw, DWORD fh, DWORD idh, int transform, PPartialDecodeOutput psPartialDecodeOutput)
{
	DWORD	dwUpdateHeight;
	DWORD	dwTempScaledHeight;
	BOOL    bBestFit = FALSE;

	IMG_TRACE("iw = %d, ih = %d\n",iw ,ih);

	psPartialDecodeOutput->sw_scale = 0;

	// only support 90, 270
	if (transform & (pfrTRANSFORM_90 | pfrTRANSFORM_270))
	{
		// swap fw and fh
		DWORD tmp;

		tmp = fw;
		fw = fh;
		fh = tmp;
	}

//
// Limit the up scaling factor.
//	
	if (((fw*0x10)/iw) >= 0x10 && ((fh*0x10)/ih) >= 0x10)
	{
		// Both H and W are nigger than frame buffer size.
		IMG_TRACE("fw = %d, fh = %d\n",fw,fh);
		fw = (iw*0x100)/0x100;
		//fw_no_padding = (iw_no_padding*0x100)/0x100;
		fh = (ih*0x100)/0x100;
		
		//fw = (fw/64)*64;
		// If fw is less than 64 ? it will be zero!
		IMG_TRACE(" --- LIMIT SCALING UP SIZE 1.0 ---\n");
		//IMG_TRACE("fw = %d, fw_no_padding= %d, fh = %d\n",fw,fw_no_padding,fh);
		
		psPartialDecodeOutput->sw_scale = 1;
	}

	IMG_TRACE("iw = %d, ih = %d,fw = %d,fh = %d,idh = %d\n",iw,ih,fw,fh,idh);
	//IMG_TRACE("iw_no_padding = %d, fw_no_padding = %d\n",iw_no_padding,fw_no_padding);

	dwUpdateHeight = idh;

	// do BESTFIT or actual size (CENTRAL), AUTO OR NONE means not BESTFIT
	bBestFit = ((iw > fw) || (ih > fh) || (transform & pfrSTRETCH_PROP && !(transform & (pfrSTRETCH_AUTO | pfrSTRETCH_NONE))));

	// Avoid to use division for display mode calculation
	if ( (iw * fh) >= (fw * ih) )
	{
		psPartialDecodeOutput->cType = 1;
	}
	else
	{
		psPartialDecodeOutput->cType = 0;
	}

	if (!bBestFit)
	{
		IMG_TRACE("Actaul Size Display\n");
		psPartialDecodeOutput->cType |= 0x2; // mark as actual size type

		psPartialDecodeOutput->x = iw;
		psPartialDecodeOutput->x1 = (fw - psPartialDecodeOutput->x) / 2;
		psPartialDecodeOutput->x2 = (fw - psPartialDecodeOutput->x - psPartialDecodeOutput->x1);
		psPartialDecodeOutput->y = ih;
		psPartialDecodeOutput->y1 = (fh - psPartialDecodeOutput->y) / 2;
		psPartialDecodeOutput->y2 = (fh - psPartialDecodeOutput->y - psPartialDecodeOutput->y1);
		psPartialDecodeOutput->ydh = (idh * 0x100 * psPartialDecodeOutput->y) / ih;
		psPartialDecodeOutput->decode_x = decode_iw;
		psPartialDecodeOutput->decode_y = decode_ih;

		dwTempScaledHeight = psPartialDecodeOutput->ydh / 0x100;
		if (dwTempScaledHeight > idh)
		{
			psPartialDecodeOutput->ydh = idh * 0x100;
			dwUpdateHeight = (psPartialDecodeOutput->ydh * ih) / (0x100 * psPartialDecodeOutput->y);
			dwUpdateHeight = (dwUpdateHeight / 16) * 16;
			psPartialDecodeOutput->ydh = (dwUpdateHeight * 0x100 * psPartialDecodeOutput->y) / ih;
		}
	}
	else if (!psPartialDecodeOutput->cType)
	{
		// cType : 0 (Portrait)
		// scaled height width is same as frame buffer height
		IMG_TRACE("Portrait Display\n");

		psPartialDecodeOutput->x = (iw * fh) / ih;
		psPartialDecodeOutput->x1 = (fw - psPartialDecodeOutput->x) / 2;
		psPartialDecodeOutput->x2 = (fw - psPartialDecodeOutput->x - psPartialDecodeOutput->x1);
		psPartialDecodeOutput->y = fh;
		psPartialDecodeOutput->y1 = psPartialDecodeOutput->y2 = 0;
		psPartialDecodeOutput->ydh = (idh * 0x100 * psPartialDecodeOutput->y)/ih;
		psPartialDecodeOutput->decode_y = psPartialDecodeOutput->y;
		psPartialDecodeOutput->decode_x = (decode_iw * psPartialDecodeOutput->decode_y) / decode_ih;

		dwTempScaledHeight = psPartialDecodeOutput->ydh / 0x100;
		if (dwTempScaledHeight > idh)
		{
			IMG_TRACE("--- SCALING UP ---\n");
			// Scaling up, we need to ensure scaled size exceed the limitation
			psPartialDecodeOutput->ydh = idh * 0x100;
			dwUpdateHeight = (psPartialDecodeOutput->ydh * ih)/(0x100 * psPartialDecodeOutput->y);
			IMG_TRACE("dwUpdateHeight = %d\n",dwUpdateHeight);
			dwUpdateHeight = (dwUpdateHeight / 16) * 16;
			IMG_TRACE("ALIGNED dwUpdateHeight = %d\n",dwUpdateHeight);
			psPartialDecodeOutput->ydh = (dwUpdateHeight * 0x100 * psPartialDecodeOutput->y)/ih;
			IMG_TRACE("psPartialDecodeOutput->ydh = %p\n",psPartialDecodeOutput->ydh);
		}
	}
	else
	{
		// cType : 1 (Landscape)
		// scaled image width is same as frame buffer width
		IMG_TRACE("Landscape Display\n");

		psPartialDecodeOutput->x = fw;
		psPartialDecodeOutput->x1 = psPartialDecodeOutput->x2 = 0;
		psPartialDecodeOutput->y = (ih * fw) / iw;
		psPartialDecodeOutput->y1 = (fh - psPartialDecodeOutput->y)/2;
		psPartialDecodeOutput->y2 = fh - psPartialDecodeOutput->y - psPartialDecodeOutput->y1;
		psPartialDecodeOutput->ydh = (idh * 0x100 * psPartialDecodeOutput->y)/ih;
		psPartialDecodeOutput->decode_y = psPartialDecodeOutput->y;
		psPartialDecodeOutput->decode_x = (decode_iw * psPartialDecodeOutput->decode_y) / decode_ih;

		dwTempScaledHeight = psPartialDecodeOutput->ydh / 0x100;
		if (dwTempScaledHeight > idh)
		{
			// Scaling up
			psPartialDecodeOutput->ydh = idh * 0x100;
			dwUpdateHeight = (psPartialDecodeOutput->ydh * ih)/(0x100 * psPartialDecodeOutput->y);
			dwUpdateHeight = (dwUpdateHeight / 16) * 16;
			psPartialDecodeOutput->ydh = (dwUpdateHeight * 0x100 * psPartialDecodeOutput->y)/ih;
		}
	}

    // replace x with decode_x(new design);
	psPartialDecodeOutput->x = psPartialDecodeOutput->decode_x;
    
	psPartialDecodeOutput->ydh_index = 0;
	psPartialDecodeOutput->decode_y_pos = 0;
	psPartialDecodeOutput->decode_y_accum = 0;
	psPartialDecodeOutput->decode_y_compl = 0;
	psPartialDecodeOutput->decode_y_size = (psPartialDecodeOutput->ydh) / 0x100;
	IMG_TRACE("psPartialDecodeOutput->decode_y_size = %d, ensure it is aligned.\n",psPartialDecodeOutput->decode_y_size);

	if ((psPartialDecodeOutput->decode_y_pos + psPartialDecodeOutput->decode_y_size) > psPartialDecodeOutput->y)
	{
		psPartialDecodeOutput->decode_y_size = psPartialDecodeOutput->y - psPartialDecodeOutput->decode_y_pos;
	}

	return dwUpdateHeight;
}



// ==================================================================================================================================
//
// Partial Decode Object : Data structure to manage the partial parsing object
// We will use three buffers to load JPEG RAW.
// JR_BUF_0 JR_BUF_1 and JR_BUF_2
// On the 1st time decode, we will fill all these buffers.
// ==================================================================================================================================
#define PARTIALDECODE_INIT      0x00    // Return Tables info
#define PARTIALDECODE_PARSING   0x01    // Parsing is on-going, still can use the RAW INFO
#define PARTIALDECODE_PARSED    0x02    // RAW INFO can be used by JPEG hardware
#define PARTIALDECODE_END       0x03    // End of the file.

#define PARTIALDECODE_BUF_NUM   3
#define PARTIAL_BUFFER_ALIGNMENT_SIZE_K    (64)
#define PARTIAL_BUFFER_ALIGNMENT_ADDRESS   4096


typedef struct _PartialParsing
{
	unsigned char    cParsingStatus;  // 1: OK, 0 : fail
	unsigned char    cInfo;           // The info
	unsigned int     dwValideBuffer;
	void*            ptrBuffer[PARTIALDECODE_BUF_NUM];
	unsigned int     dwSize;
//    void*            ptrBuffer2;   // second buffer
}PartialParsing;

static unsigned char*   prtDualBufferPHY[PARTIALDECODE_BUF_NUM];
static unsigned char*   prtDualBufferVIR[PARTIALDECODE_BUF_NUM];
static unsigned int     m_dwDualBufferSize[PARTIALDECODE_BUF_NUM];
static unsigned int     m_dwReadEcsSize;
static unsigned int     m_dwNextBufferIndex = 0;
static unsigned int     m_dwNextBufferFull = 0;
static unsigned int     m_dwNextBufferStop = 0;
#if defined(PARTIAL_DECODE_EOI)
static unsigned int     m_dwEOIStep = 0;
static unsigned int     m_dwEOIFound = 0;
#endif

// We will use 1/3 of prtPool as the buffer. Buffer 1 = 1/3 of ptrPool
PartialParsing PartialPartialOnjectInitialize(void* ptrPool,unsigned int dwPoolSize,SDL_RWops *opsJPEGRaw,unsigned int nFileSize,JPEG_TABLE_INFO *ptrJPEGTable);
int PartialParsingObjectGet(SDL_RWops *opsJPEGRaw,PartialParsing* pPartialDecode);

PartialParsing PartialPartialOnjectInitialize(void* ptrPool,unsigned int dwPoolSize,SDL_RWops *opsJPEGRaw,unsigned int nFileSize,JPEG_TABLE_INFO *ptrJPEGTable)
{
	unsigned int    dwJPEGTablesBufferSize;
	void*           ptrJPEGRawHWPool;
	unsigned int    dwJPEGTableSize;
	PartialParsing  stPartialParsing;
	unsigned int    dwJPEGFileSize;
	unsigned char   acEOIBuf[2];

	ptrJPEGRawHWPool = utilPoolMgrPhyAddrGet(ptrPool);

#if defined(PARTIAL_DECODE_EOI)
	m_dwEOIStep = 0;
#endif
	m_dwNextBufferStop = 0;

	printf("  ----     PartialPartialOnjectInitialize  (dwPoolSize = %d)---\n",dwPoolSize);
//    dwPoolSize = 19467 + + 1024*3*32*2;
//    dwPoolSize = 18843 + 1024*3*32;
//    dwPoolSize = dwPoolSize/2;

	// This will be a three buffers architecture and at any time only two buffers have valid JPEG raw data.
	// We will read 1/2 of Pool buffer for the first time. The read will ensure we fill buffer 0 and buffer 1. buffer 2 is the pointer to empty buffer.
	dwJPEGTablesBufferSize = (dwPoolSize/2);

#if 0  // For test only
	dwJPEGTablesBufferSize = 19467 + + 1024*3*32*2;  // 19562 + 1024*2*32;
#endif

#if defined(PARTIAL_DECODE_INFO)
	IMG_TRACE("JPG : dwPoolSize = %d\n",dwPoolSize);
	IMG_TRACE("JPG : dwJPEGTablesBufferSize = %d\n",dwJPEGTablesBufferSize);
#endif


	// Try to get file size.
	SDL_RWseek(opsJPEGRaw, 0, SEEK_END);
	dwJPEGFileSize = SDL_RWtell(opsJPEGRaw);

#if defined(PARTIAL_DECODE_EOI)
	// Try to find EOI (assume at end of file)
	SDL_RWseek(opsJPEGRaw, dwJPEGFileSize - 2, RW_SEEK_SET);
	SDL_RWread(opsJPEGRaw,acEOIBuf,1,2);
	//printf("%x %x \n",acEOIBuf[0],acEOIBuf[1]);
	if (acEOIBuf[0] == 0xff && acEOIBuf[1] == 0xd9)
	{
		m_dwEOIFound = 1;
		IMG_TRACE("JPG : EOI at end of file(len = %d)\n",dwJPEGFileSize);
	}
	else
	{
		m_dwEOIFound = 0;
	}
#endif

	// Seek to begin position
	SDL_RWseek(opsJPEGRaw, 0, RW_SEEK_SET);
	SDL_RWread(opsJPEGRaw, ptrPool, 1, dwJPEGTablesBufferSize);

#if defined(PARTIAL_DECODE_INFO)
	printf("ptrPool = %p\n",ptrPool);
#endif

	ptrJPEGTable->dwEcsLength = 0;
	if (rcERROR == utilHWAccelJPEGParse(psmPHYSICAL_TO_PHYSICAL,
							 ptrPool,
							 dwJPEGTablesBufferSize,
							 ptrJPEGRawHWPool,
							 ptrJPEGTable))
	{
		stPartialParsing.cParsingStatus = 0;
		return stPartialParsing;
	}

	stPartialParsing.cParsingStatus = 1;

#if defined(PARTIAL_DECODE_INFO)
	IMG_TRACE("JPG : ptrJPEGTable->dwEcsStartAddr = %p\n",ptrJPEGTable->dwEcsStartAddr);
	IMG_TRACE("JPG : ptrJPEGTable->dwEcsLength = %p\n",ptrJPEGTable->dwEcsLength);
	IMG_TRACE("JPG : --- NULL --- \n");
#endif

	//
	// On some JPEG, has additional info after EOI, we need to keep this in mind to avoid overload too much JPEG raw info.
	//
	m_dwReadEcsSize = ptrJPEGTable->dwEcsLength;
	IMG_TRACE("JPG : m_dwReadEcsSize = %p\n",m_dwReadEcsSize);
	if (nFileSize >= dwJPEGTablesBufferSize)
	{
		// We need to fill the correct EcsLength
		ptrJPEGTable->dwEcsLength += (nFileSize - dwJPEGTablesBufferSize);
		IMG_TRACE("JPG : ptrJPEGTable->dwEcsLength = %p\n",ptrJPEGTable->dwEcsLength);
	}

	dwJPEGTableSize = (ptrJPEGTable->dwEcsStartAddr - (DWORD)ptrJPEGRawHWPool);
	IMG_TRACE("JPG : dwJPEGTableSize = %d \n",dwJPEGTableSize);

	//
	// At this moment, all table size, ptrJPEGTable->dwEcsStartAddr address is valid
	//




	//--- Know tables size already ---
	//printf("\n    @@@   \n");
#if defined(PARTIAL_DECODE_INFO)
	IMG_TRACE("JPG : ptrJPEGTable->dwEcsStartAddr = %p\n",ptrJPEGTable->dwEcsStartAddr);
	IMG_TRACE("JPG : ptrJPEGRawHWPool = %p\n",ptrJPEGRawHWPool);
#endif

	// Align raw data
	{
	DWORD	dwRawShift;
	SDL_RWseek(opsJPEGRaw, dwJPEGTableSize, RW_SEEK_SET);

	dwRawShift = (ptrJPEGTable->dwEcsStartAddr - (DWORD)ptrJPEGRawHWPool);
#if defined(PARTIAL_DECODE_INFO)
	IMG_TRACE("JPG : dwRawShift = %p\n",dwRawShift);
#endif

#if 0
	dwRawShift = ((dwRawShift + 1023)/1024)*1024;

#if defined(PARTIAL_DECODE_INFO)
	IMG_TRACE("JPG : dwRawShift = %p\n",dwRawShift);
#endif
#endif

	// Align buffer position ?
	ptrJPEGTable->dwEcsStartAddr = (DWORD)ptrJPEGRawHWPool + dwRawShift;
	ptrJPEGTable->dwEcsStartAddr = ((ptrJPEGTable->dwEcsStartAddr + PARTIAL_BUFFER_ALIGNMENT_ADDRESS - 1)/PARTIAL_BUFFER_ALIGNMENT_ADDRESS)*PARTIAL_BUFFER_ALIGNMENT_ADDRESS;

#if defined(PARTIAL_DECODE_INFO)
	IMG_TRACE("JPG : ptrJPEGTable->dwEcsStartAddr = %p\n",ptrJPEGTable->dwEcsStartAddr);
#endif

	prtDualBufferPHY[0] =  (unsigned char*)ptrJPEGTable->dwEcsStartAddr;
	prtDualBufferVIR[0] = (unsigned char*)utilPoolMgrVirtAddrGet(prtDualBufferPHY[0]);

	//
	// Get the total size for each buffer
	//
	//m_dwDualBufferSize[0] = m_dwDualBufferSize[1] = m_dwDualBufferSize[2] = (((dwPoolSize - dwJPEGTableSize)/PARTIALDECODE_BUF_NUM)/(1024*32))*(1024*32);
	m_dwDualBufferSize[0] = m_dwDualBufferSize[1] = m_dwDualBufferSize[2] = (((dwPoolSize - dwJPEGTableSize - PARTIAL_BUFFER_ALIGNMENT_ADDRESS)/PARTIALDECODE_BUF_NUM)/(1024*PARTIAL_BUFFER_ALIGNMENT_SIZE_K))*(1024*PARTIAL_BUFFER_ALIGNMENT_SIZE_K);


#if defined(PARTIAL_DECODE_INFO)
	IMG_TRACE("JPG : m_dwDualBufferSize[0] = %p\n",m_dwDualBufferSize[0]);
#endif

	// Read buffer 0
	SDL_RWread(opsJPEGRaw, prtDualBufferVIR[0], 1, m_dwDualBufferSize[0]);
	m_dwReadEcsSize = m_dwDualBufferSize[0];

	}


	//prtDualBufferPHY[0] =  ptrJPEGTable->dwEcsStartAddr;
	//prtDualBufferVIR[0] = utilPoolMgrVirtAddrGet(prtDualBufferPHY[0]);

	// Fill buffer 1 info
	prtDualBufferPHY[1] =  (unsigned char*)ptrJPEGTable->dwEcsStartAddr + m_dwDualBufferSize[0];
	prtDualBufferVIR[1] = (unsigned char*)utilPoolMgrVirtAddrGet(prtDualBufferPHY[1]);

	// Read buffer 1
	SDL_RWread(opsJPEGRaw, prtDualBufferVIR[1], 1, m_dwDualBufferSize[1]);
	m_dwReadEcsSize += m_dwDualBufferSize[1];

	IMG_TRACE("JPG : m_dwReadEcsSize = %d\n",m_dwReadEcsSize);

	// Fill buffer 2 info
	prtDualBufferPHY[2] =  (unsigned char*)ptrJPEGTable->dwEcsStartAddr + m_dwDualBufferSize[0] + m_dwDualBufferSize[1];
	prtDualBufferVIR[2] = utilPoolMgrVirtAddrGet(prtDualBufferPHY[2]);


#if defined(PARTIAL_DECODE_INFO)
	IMG_TRACE("JPG : prtDualBufferPHY[0] = %p \n",prtDualBufferPHY[0]);
	IMG_TRACE("JPG : prtDualBufferVIR[0] = %p \n",prtDualBufferVIR[0]);
	IMG_TRACE("JPG : m_dwDualBufferSize[0] = %d \n",m_dwDualBufferSize[0]);
	IMG_TRACE("JPG : prtDualBufferPHY[1] = %p \n",prtDualBufferPHY[1]);
	IMG_TRACE("JPG : prtDualBufferVIR[1] = %p \n",prtDualBufferVIR[1]);
	IMG_TRACE("JPG : m_dwDualBufferSize[1] = %d \n",m_dwDualBufferSize[1]);
	IMG_TRACE("JPG : prtDualBufferPHY[2] = %p \n",prtDualBufferPHY[2]);
	IMG_TRACE("JPG : prtDualBufferVIR[2] = %p \n",prtDualBufferVIR[2]);
	IMG_TRACE("JPG : m_dwDualBufferSize[2] = %d \n",m_dwDualBufferSize[2]);
#endif

	stPartialParsing.dwValideBuffer = 0;

	if (m_dwReadEcsSize >= ptrJPEGTable->dwEcsLength)
	{
		// All JPEG RAW read.
		stPartialParsing.cInfo = PARTIALDECODE_END;
		stPartialParsing.ptrBuffer[0] = prtDualBufferPHY[0];
		// Second buffer
		stPartialParsing.ptrBuffer[1] = prtDualBufferPHY[1];
		stPartialParsing.dwSize = m_dwReadEcsSize;
		IMG_TRACE("JPG : CASE 1 , m_dwReadEcsSize = %d\n",m_dwReadEcsSize);

#if defined(PARTIAL_DECODE_EOI)
		// Scan the buffer to find EOI to avoid feed too much info to JPEG IP
		if (m_dwEOIFound == 0)
		{
		DWORD	i;
		BYTE    *ptrStream;

		ptrStream = (BYTE*)&prtDualBufferVIR[0][0];
		for (i = 0 ;i<stPartialParsing.dwSize;i++)
		{
			if(0 == m_dwEOIStep)
			{
				if (ptrStream[i] == 0xff)
				{
					m_dwEOIStep = 1;
				}
			}
			else if (1 == m_dwEOIStep)
			{
				if (ptrStream[i] == 0xd9)
				{
					m_dwEOIStep = 2;
					break;
				}
				else
				{
					m_dwEOIStep = 0;
				}
			}
		}
		if (m_dwEOIStep == 2)
		{
			// We don't need buffer 1 ...
			stPartialParsing.dwSize = i + 1;
			m_dwReadEcsSize = stPartialParsing.dwSize;
			IMG_TRACE("EOI(1) - m_dwReadEcsSize = %d\n",m_dwReadEcsSize);
			ptrJPEGTable->dwEcsLength = m_dwReadEcsSize;
			//stPartialParsing.ptrBuffer[1] = NULL;
		}
		}
#endif

	}
	else
	{
		if (m_dwDualBufferSize[0] >= ptrJPEGTable->dwEcsLength)
		{
			// the buffer is good enough for all Ecs data
			IMG_TRACE("JPG : ptrJPEGTable->dwEcsLength - m_dwReadEcsSize = %d\n",ptrJPEGTable->dwEcsLength - m_dwReadEcsSize);
			SDL_RWread(opsJPEGRaw,&prtDualBufferPHY[0][m_dwReadEcsSize], 1, ptrJPEGTable->dwEcsLength - m_dwReadEcsSize);
			m_dwReadEcsSize += (ptrJPEGTable->dwEcsLength - m_dwReadEcsSize);
			stPartialParsing.cInfo = PARTIALDECODE_END;
			stPartialParsing.ptrBuffer[0] = prtDualBufferPHY[0];
			// Second buffer
			stPartialParsing.ptrBuffer[1] = prtDualBufferPHY[1];
			stPartialParsing.dwSize = m_dwReadEcsSize;
			IMG_TRACE("JPG : CASE 2 , m_dwReadEcsSize = %d\n",m_dwReadEcsSize);
		}
		else
		{

			// Still need to read BUF_0 ?
			if (m_dwReadEcsSize < m_dwDualBufferSize[0])
			{
				IMG_TRACE("JPG : Continue to read BUF_0\n");
				IMG_TRACE("JPG : m_dwDualBufferSize[0] - m_dwReadEcsSize = %d\n",m_dwDualBufferSize[0] - m_dwReadEcsSize);   // PartialPartialOnjectInitialize
				IMG_TRACE("JPG : &prtDualBufferPHY[0][m_dwReadEcsSize] = %p \n",&prtDualBufferPHY[0][m_dwReadEcsSize]);
				SDL_RWread(opsJPEGRaw,&prtDualBufferVIR[0][m_dwReadEcsSize], 1, m_dwDualBufferSize[0] - m_dwReadEcsSize); //  + m_dwDualBufferSize[1]

				m_dwReadEcsSize += (m_dwDualBufferSize[0] - m_dwReadEcsSize);   //  + m_dwDualBufferSize[1]
			}
			else
			{
#if 0
				printf("-- No read buf0 --\n");
#endif
			}
			stPartialParsing.cInfo = PARTIALDECODE_PARSED;
			stPartialParsing.ptrBuffer[0] = prtDualBufferPHY[0];

			// Fixed buffer siz.
			stPartialParsing.dwSize = m_dwDualBufferSize[0]; //  + m_dwDualBufferSize[1] + m_dwDualBufferSize[2];
			//stPartialParsing.dwSize = m_dwDualBufferSize[0] + m_dwDualBufferSize[1];


			IMG_TRACE("JPG : m_dwReadEcsSize = %d\n",m_dwReadEcsSize);

#if defined(PARTIAL_DECODE_INFO)
			IMG_TRACE("JPG : ---- Fill second buffer ------\n");
#endif

			// Second buffer
			stPartialParsing.ptrBuffer[1] = prtDualBufferPHY[1];

#if defined(PARTIAL_DECODE_INFO)
			IMG_TRACE("JPG : m_dwReadEcsSize = %d\n",m_dwReadEcsSize);
#endif

#if defined(PARTIAL_DECODE_INFO)
			IMG_TRACE("JPG : ---- Fill third buffer ------\n");
#endif
			// Third buffer
			stPartialParsing.ptrBuffer[2] = prtDualBufferPHY[2];

#if defined(PARTIAL_DECODE_INFO)
			// PRINT RAW INFO
			printf("===> prtDualBufferVIR[0][0]\n");
			printf("===> %2x %2x %2x %2x\n",prtDualBufferVIR[0][0],prtDualBufferVIR[0][1],prtDualBufferVIR[0][2],prtDualBufferVIR[0][3]);

			printf("===> prtDualBufferVIR[1][0]\n");
			printf("===> %2x %2x %2x %2x\n",prtDualBufferVIR[1][0],prtDualBufferVIR[1][1],prtDualBufferVIR[1][2],prtDualBufferVIR[1][3]);

			printf("===> prtDualBufferVIR[2][0]\n");
			printf("===> %2x %2x %2x %2x\n",prtDualBufferVIR[2][0],prtDualBufferVIR[2][1],prtDualBufferVIR[2][2],prtDualBufferVIR[2][3]);
			printf("===> %2x %2x %2x %2x\n",prtDualBufferVIR[2][m_dwDualBufferSize[2]-4],prtDualBufferVIR[2][m_dwDualBufferSize[2]-3],prtDualBufferVIR[2][m_dwDualBufferSize[2]-2],prtDualBufferVIR[2][m_dwDualBufferSize[2]-1]);
			//
#endif

//            m_dwReadEcsSize += m_dwDualBufferSize[2];
			IMG_TRACE("JPG : m_dwReadEcsSize = %d\n",m_dwReadEcsSize);

#if defined(PARTIAL_DECODE_INFO)
			IMG_TRACE("JPG : stPartialParsing.ptrBuffer[0] = %p\n",stPartialParsing.ptrBuffer[0]);
			IMG_TRACE("JPG : stPartialParsing.ptrBuffer[1] = %p\n",stPartialParsing.ptrBuffer[1]);
			IMG_TRACE("JPG : stPartialParsing.ptrBuffer[2] = %p\n",stPartialParsing.ptrBuffer[2]);
#endif


			// Indicate the next buffer index

#if 1       // Single buffer
			m_dwNextBufferIndex = 1;
			m_dwNextBufferFull = 1;
			stPartialParsing.dwValideBuffer = 0;
#else
			// Alway use dual buffers mode.
			// Init - provide buffer 0 and buffer 1.
			// and point the NEXT to buffer 2.
			m_dwNextBufferIndex = 2;
			m_dwNextBufferFull = 0;
			stPartialParsing.dwValideBuffer = 1;
#endif

#if defined(PARTIAL_DECODE_INFO)
			IMG_TRACE("JPG : stPartialParsing.dwSize = %d\n",stPartialParsing.dwSize);
			IMG_TRACE("JPG : CASE 3 , m_dwReadEcsSize = %d\n",m_dwReadEcsSize);
#endif
		}
	}

#if defined(PARTIAL_DECODE_INFO)
	printf("---- INIT DONE of PartialLoad      ----\n");
	printf("---------------------------------------\n\n\n");
#endif

	return stPartialParsing;
}

//
//  Get partial object description
//
int PartialParsingObjectGet(SDL_RWops *opsJPEGRaw,PartialParsing* pPartialDecode)
{
	int	nParsingValide = 1;
	static int nIndex = 0;


#if defined(PARTIAL_DECODE_INFO)
	printf("-------------------------\n");
	printf(" Partial Load Object  \n");
	printf("-------------------------\n");

	nIndex++;

	printf("nIndex = %d\n",nIndex);
	IMG_TRACE("JPG : m_dwNextBufferIndex = %d\n",m_dwNextBufferIndex);
#endif

	// Index to next buffer
	pPartialDecode->dwValideBuffer++;    // = m_dwNextBufferIndex + 1;
	if (pPartialDecode->dwValideBuffer >= PARTIALDECODE_BUF_NUM)
	{
		pPartialDecode->dwValideBuffer = pPartialDecode->dwValideBuffer - PARTIALDECODE_BUF_NUM;
	}

#if defined(PARTIAL_DECODE_INFO)
	IMG_TRACE("JPG : pPartialDecode->dwValideBuffer = %d\n",pPartialDecode->dwValideBuffer);
	printf("m_dwNextBufferFull = %d\n",m_dwNextBufferFull);
#endif

	if (m_dwNextBufferFull == 0)
	{
#if defined(PARTIAL_DECODE_EOI)
#define		CACHE_READ_SIZE   0x4000
		DWORD	i;
		BYTE    *ptrStream;
		BYTE    cacheBuffer[CACHE_READ_SIZE];
#endif


#if defined(PARTIAL_DECODE_INFO)
		IMG_TRACE("JPG : m_dwDualBufferSize[m_dwNextBufferIndex] = %d\n",m_dwDualBufferSize[m_dwNextBufferIndex]);
		printf("#1 m_dwReadEcsSize = %d\n",m_dwReadEcsSize);
#endif

#if defined(PARTIAL_DECODE_EOI)
		if (m_dwEOIStep != 2)
		{
			DWORD	dwCacheReadSize;
			DWORD	dwReadIndex;
			BOOL	bCacheReadDone = 0;

			dwCacheReadSize = m_dwDualBufferSize[m_dwNextBufferIndex];
			dwReadIndex = 0;

			if (dwCacheReadSize > CACHE_READ_SIZE)
			{
				dwCacheReadSize = CACHE_READ_SIZE;
			}

			do
			{
				//printf("dwReadIndex = %d\n",dwReadIndex);
				//pPartialDecode->dwSize = SDL_RWread(opsJPEGRaw,&prtDualBufferVIR[m_dwNextBufferIndex][0], 1, m_dwDualBufferSize[m_dwNextBufferIndex]);
				pPartialDecode->dwSize = SDL_RWread(opsJPEGRaw,cacheBuffer, 1, dwCacheReadSize);

				// Search EOI
				if (m_dwEOIFound == 0)
				{
					//ptrStream = (BYTE*)&prtDualBufferVIR[m_dwNextBufferIndex][0];
					ptrStream = (BYTE*)cacheBuffer;

					// Checking EOP pattern
					for (i = 0;i<pPartialDecode->dwSize;i++)
					{
						if(0 == m_dwEOIStep)
						{
							if (ptrStream[i] == 0xff)
							{
								m_dwEOIStep = 1;
							}
						}
						else if (1 == m_dwEOIStep)
						{
							if (ptrStream[i] == 0xd9)
							{
								m_dwEOIStep = 2;
								break;
							}
							else
							{
								m_dwEOIStep = 0;
							}
						}
					}
				}

				if (m_dwEOIStep == 2)
				{
					pPartialDecode->dwSize = i + 1;
					//IMG_TRACE("JPG : EOI , pPartialDecode->dwSize = %d\n",pPartialDecode->dwSize);
					m_dwReadEcsSize += pPartialDecode->dwSize;
					IMG_TRACE("EOI(2) - m_dwReadEcsSize = %d\n",m_dwReadEcsSize);
					bCacheReadDone = 1;
				}
				else
				{
				m_dwReadEcsSize += pPartialDecode->dwSize;
				}

				// Read it to HW buffer
				memcpy(&prtDualBufferVIR[m_dwNextBufferIndex][dwReadIndex],cacheBuffer,pPartialDecode->dwSize);
				dwReadIndex += pPartialDecode->dwSize;

				if (bCacheReadDone == 1)
				{
					//IMG_TRACE("Cache Read done\n");
					break;
				}

				// Last block ?
				if (dwCacheReadSize < CACHE_READ_SIZE)
				{
					//IMG_TRACE("End of buffer\n");
					break;
				}

				//printf("dwReadIndex = %d\n",dwReadIndex);
				if ((dwReadIndex + CACHE_READ_SIZE) > m_dwDualBufferSize[m_dwNextBufferIndex])
				{
					dwCacheReadSize = m_dwDualBufferSize[m_dwNextBufferIndex] - dwReadIndex;
					printf("dwCacheReadSize = %d\n",dwCacheReadSize);

					if (dwCacheReadSize <= 0)
					{
					break;
					}
				}

			}while(1);

		}
		else // No data to read anymore
		{
			pPartialDecode->dwSize = 0;
		}
#else
		pPartialDecode->dwSize = SDL_RWread(opsJPEGRaw,&prtDualBufferVIR[m_dwNextBufferIndex][0], 1, m_dwDualBufferSize[m_dwNextBufferIndex]);
#endif



		if (pPartialDecode->dwSize < m_dwDualBufferSize[m_dwNextBufferIndex])
		{
			m_dwNextBufferStop = 1;
		}

#if defined(PARTIAL_DECODE_INFO)
		printf("Read buffer %d, %d bytes\n",m_dwNextBufferIndex,pPartialDecode->dwSize);
#endif

		m_dwNextBufferFull = 1;
	}
	else
	{
#if defined(PARTIAL_DECODE_INFO)
		printf(" -- Next Buffer is full already --\n");
#endif
		pPartialDecode->dwSize = m_dwDualBufferSize[m_dwNextBufferIndex];

	}

	m_dwNextBufferIndex = m_dwNextBufferIndex + 1;
	if (m_dwNextBufferIndex >= PARTIALDECODE_BUF_NUM)
	{
		m_dwNextBufferIndex = 0;
	}
	m_dwNextBufferFull = 0;

#if defined(PARTIAL_DECODE_INFO)
	IMG_TRACE("JPG : m_dwNextBufferIndex = %d\n",m_dwNextBufferIndex);
	printf("\n\n");
#endif
}



#if defined(TOPAZ) || defined(TOPAZ_ENHANCE)
// ==================================================================================================================================
// Partial Hardware Decode... (Topaz Platform)
//
// return
//          0 : fail
//          1 : success
// ==================================================================================================================================
static int  Topaz_PartialHardwareDecode(SDL_RWops *src,SDL_Surface *surface, int x, int y, int clip_w, int clip_h, PW_IMAGE_FILTERS *filters)
{
	INT			nFreeMemory;
	INT			nRequireMemory;
	void*		pvJPEGRawdataPool;
	void*		pvJPEGHardwarePool;

	int			nFileSize = 0;
	JPEG_TABLE_INFO     sJPEGTableInfo;

	void*		pvJPEGYbuffer = NULL;
	void*		pvJPEGUbuffer = NULL;
	void*		pvJPEGVbuffer = NULL;
	char*       pbMergedBuffer = NULL;

	DWORD		dwYBufferSize;
	DWORD		dwUBufferSize;
	DWORD		dwVBufferSize;

	DWORD		dwJPEGDecodedHeight;
	DWORD		dwJPEGHeight;
	DWORD		dwJPEGWidth;
	DWORD		dwColorBytesPerLine;

	DWORD		dwHWDecodeHeight = 256;

	int         line_pitch;
	int         status = 1;
	int         JPEGRaw_Use_HWPool = 0;
	int         MERGE_Use_HWPool = 0;
	PartialDecodeOutput  sPartialDecodeOutput;
	int         partial_decode_transform = 0;
	PartialParsing  stPartialParsing;
	BOOL        bFixedInputBufferMode = FALSE;    // Fxied inout buffer mode.
	BOOL        bForceSoftwareDecode = FALSE;
	BOOL		bPartialDecodeBreak = FALSE;

	// We need to handle the case of hardware memory
	IMG_TRACE("JPG : --------- Partial Hardware Decode (%p)---------------\n",filters);

	// read rotation and stretch information from filters
	if (filters)
	{
		int filter_method;
		PW_IMAGE_FILTERS* walks_filters = filters;

		while (walks_filters != NULL)
		{
			filter_method = PW_IMAGE_FILTER_resample_getMethod(walks_filters->filter);
			if (filter_method != -1)
			{
				partial_decode_transform |= filter_method & (pfrSTRETCH_AUTO | pfrSTRETCH_NONE | pfrSTRETCH_PROP |
					pfrTRANSFORM_90 | pfrTRANSFORM_180 | pfrTRANSFORM_270 | pfrMIRROR_V | pfrMIRROR_H);
			}
			walks_filters = walks_filters->next;
		}
	}
	// unify the transform style (the filter transform works for filter itself)

	// pfrTRANSFORM_90 | pfrMIRROR_H | pfrMIRROR_V = 270
	//                   pfrMIRROR_H | pfrMIRROR_V = 180
	// pfrTRANSFORM_90 | pfrMIRROR_H = pfrMIRROR_H then pfrTRANSFORM_270
	// pfrTRANSFORM_90 | pfrMIRROR_V = pfrMIRROR_H then pfrTRANSFORM_90
	if ((partial_decode_transform & pfrTRANSFORM_90) && (partial_decode_transform & pfrMIRROR_H) && (partial_decode_transform & pfrMIRROR_V))
	{
		partial_decode_transform &= ~(pfrTRANSFORM_90 | pfrMIRROR_H | pfrMIRROR_V);
		partial_decode_transform |= pfrTRANSFORM_270;
	}
	else if ((partial_decode_transform & pfrTRANSFORM_90) && (partial_decode_transform & pfrMIRROR_H))
	{
		partial_decode_transform &= ~(pfrTRANSFORM_90);
		partial_decode_transform |= pfrTRANSFORM_270;
	}
	else if ((partial_decode_transform & pfrMIRROR_H) && (partial_decode_transform & pfrMIRROR_V))
	{
		partial_decode_transform &= ~(pfrMIRROR_H | pfrMIRROR_V);
		partial_decode_transform |= pfrTRANSFORM_180;
	}
	else if ((partial_decode_transform & pfrTRANSFORM_90) && (partial_decode_transform & pfrMIRROR_V))
	{
		partial_decode_transform &= ~(pfrMIRROR_V);
		partial_decode_transform |= pfrMIRROR_H;
	}

	pvJPEGRawdataPool = src->hidden.mem.base;
	nFileSize = SDL_RWseek(src, 0, RW_SEEK_END);

	nFreeMemory = utilMemoryFreeSpaceGet();
	IMG_TRACE("JPG : nFreeMemory = %d,nFileSize = %d\n",nFreeMemory,nFileSize);

#if defined(USE_FIXED_INPUT_BUF)
	bFixedInputBufferMode = TRUE;
	IMG_TRACE("JPG : Force to use Fixed Input Buffer\n");
#else
#if defined(DISABLE_FIXED_INPUT_BUFFER)
	// Force to disable fixed input buffer mode for JPEG decode.
	bFixedInputBufferMode = FALSE;
	IMG_TRACE("JPG : Disable Fixed Input Buffer\n");
#else

	// nRequireMemory = utilPoolMgrSystemSizeGet(spiCB) + utilPoolMgrSystemSizeGet(spiY) + nFileSize;
	nRequireMemory = USE_PARTIAL_DECODE_BUF + USE_YUV_MERGE_BUF + nFileSize;

	if (nRequireMemory < nFreeMemory)
	{
		// Assume we use fixed input buffer mode for JPEG decode.
		bFixedInputBufferMode = FALSE;
	}
	else
	{
		// Assume we use fixed input buffer mode for JPEG decode.
		bFixedInputBufferMode = TRUE;
		IMG_TRACE("JPG : Fixed Input Buffer Enabled\n");
	}
#endif
#endif

	// pvJPEGRawdataPool could be a system memory.
	pvJPEGHardwarePool = utilPoolMgrPhyAddrGet(pvJPEGRawdataPool);

	IMG_TRACE("JPG : pvJPEGHardwarePool = %p\n",pvJPEGHardwarePool);
	//
	// On current design we expect the JPEG info should be kept in system buffer(virtual) instead of hardware buffer(physical).
	//
	// JPEG RAW is in system memory
	// if (pvJPEGHardwarePool == NULL)
	{
		// Up layer prove the JPEG in a system memory, we want copy it to
		// 1. spiJ0
		// 2. Malloc


		if (TRUE == bFixedInputBufferMode)
		{
			DWORD	dwJPEGBugSize;

			dwJPEGBugSize = utilPoolMgrSystemSizeGet(spiJ0);
			pvJPEGRawdataPool = utilPoolMgrSystemPoolGet(spiJ0);
			stPartialParsing = PartialPartialOnjectInitialize(pvJPEGRawdataPool,dwJPEGBugSize,src,nFileSize,&sJPEGTableInfo);

			if (0 == stPartialParsing.cParsingStatus)
			{
				// Error handling, in case we cannot parse it.
				bForceSoftwareDecode = TRUE;
			}
#if 0
			#define PARTIAL_DECODE_BUFFER_SIZE  0x100000
			printf("  ~~~ PARTIAL_DECODE_BUFFER_SIZE = %p ~~~\n",PARTIAL_DECODE_BUFFER_SIZE);

			pvJPEGRawdataPool = utilMemoryMalloc(PARTIAL_DECODE_BUFFER_SIZE);
			if (pvJPEGRawdataPool != NULL)
			{
				stPartialParsing = PartialPartialOnjectInitialize(pvJPEGRawdataPool,PARTIAL_DECODE_BUFFER_SIZE,src,nFileSize,&sJPEGTableInfo);
				JPEGRaw_Use_HWPool = 1;
			}
#endif
		}
		else
		{
			// Load whole file into hardware memory
			pvJPEGRawdataPool = utilPoolMgrSystemPoolGet(spiJ0);
			if (pvJPEGRawdataPool != NULL &&
				(utilPoolMgrSystemSizeGet(spiJ0) >= nFileSize))
			{
				IMG_TRACE("JPG : Use spiJ0 to keep JPEG RAW\n");
				// spiJ0 is valid
				SDL_RWseek(src, 0, RW_SEEK_SET);
				SDL_RWread(src, pvJPEGRawdataPool, 1, nFileSize);
				pvJPEGHardwarePool = utilPoolMgrPhyAddrGet(pvJPEGRawdataPool);
			}
			else
			{
				IMG_TRACE("JPG : Try to allocate HW memory for JPEG RAW\n");
				// We need to allocate from HW memory
				// Add addition 8 bytes to work around partial decode issue
				pvJPEGRawdataPool = utilMemoryMalloc(nFileSize + 0x10);
				if (pvJPEGRawdataPool != NULL)
				{
					JPEGRaw_Use_HWPool = 1;
					SDL_RWseek(src, 0, RW_SEEK_SET);
					SDL_RWread(src, pvJPEGRawdataPool, 1, nFileSize);
					pvJPEGHardwarePool = utilPoolMgrPhyAddrGet(pvJPEGRawdataPool);
				}
				else
				{
					IMG_TRACE("JPG : Cannot allocate memory for JPEG RAW\n");
					// No enough memory
					return 0;
				}
			}
		}

	}

#ifdef RICH_DEBUG_INFO
	IMG_TRACE("JPG : JPEG Pool , pvJPEGRawdataPool = %p\n",pvJPEGRawdataPool);
	IMG_TRACE("JPG : JPEG Pool , pvJPEGHardwarePool  = %p\n",pvJPEGHardwarePool);
	IMG_TRACE("JPG : nFileSize = %d\n",nFileSize);
#endif

	if (FALSE == bFixedInputBufferMode)
	{
	if (rcERROR == utilHWAccelJPEGParse(psmPHYSICAL_TO_PHYSICAL,
							 pvJPEGRawdataPool,
							 nFileSize,
							 pvJPEGHardwarePool,    // utilPoolMgrPhyAddrGet(g_pJpeg_FileStore),
							 &sJPEGTableInfo))
		{
			// Error handling, in case we cannot parse it.
			bForceSoftwareDecode = TRUE;
		}
	}


	// Cannot parse this file, abort hardware decode.
	if (bForceSoftwareDecode == TRUE)
	{
		// Abort, and enable software decode.

		// Unable to support partial hardware decode
		status = 0;

		IMG_TRACE("JPG : Force to use softtware decode\n");
		// free JPEG Raw buffer
		if (JPEGRaw_Use_HWPool)
		{
			utilMemoryFree(pvJPEGRawdataPool);
		}
		return status;
	}

	//line_pitch = sJPEGTableInfo.dwDecodedHSize * surface->format->BytesPerPixel;
	line_pitch = sJPEGTableInfo.dwDecodedHSize * surface->format->BytesPerPixel;
	line_pitch = ((line_pitch + 191)/192)*192;
	IMG_TRACE(" --- USE FIXED PAICH(line_pitch = %d)  ---\n",line_pitch);

	// IMG_TRACE("JPG : sJPEGTableInfo.eColorFormat = %d\n",sJPEGTableInfo.eColorFormat);

	//
	// To calculate Decode height according image width and mmeory size
	// Step 1 - Decode size limitation. Get minimal height size
	// Step 2 - Merge size limitation. If size is less than decode size, use merge size as the minimal height value
	//
	IMG_TRACE("Partial JPEG : Calculate merge height\n");


	// Get DECODE info (of Max) buffer first.
	//dwHWDecodeHeight = utilPoolMgrSystemSizeGet(spiCB) / (sJPEGTableInfo.dwDecodedHSize + sJPEGTableInfo.dwCbCrWidth + sJPEGTableInfo.dwCbCrWidth);

	// Reserve 3K for data alignment.
	dwHWDecodeHeight = (USE_PARTIAL_DECODE_BUF - PARTIAL_DECODE_ALIGNMENT) / (sJPEGTableInfo.dwDecodedHSize + sJPEGTableInfo.dwCbCrWidth + sJPEGTableInfo.dwCbCrWidth);

	// Limit the buffer size and height alignment
	dwHWDecodeHeight = (dwHWDecodeHeight / 8) * 8;

	if (dwHWDecodeHeight > 256) // was 256
	{
		dwHWDecodeHeight = 256;
	}
	else
	{
		dwHWDecodeHeight = (dwHWDecodeHeight / 32) * 32;
	}
#ifdef RICH_DEBUG_INFO
	IMG_TRACE("JPG Partial : DECODE dwHWDecodeHeight = %d\n",dwHWDecodeHeight);
#endif

	// GET MERGE size, in case merge needs more memory
	{
		DWORD	temp_merge_height;
		DWORD	dwScaledX;

		// Assume this is not scale up
		dwScaledX = sJPEGTableInfo.wHSize; // Use not alignment HSize for scale. ---> was sJPEGTableInfo.dwDecodedHSize;

		{
			//temp_merge_height = (utilPoolMgrSystemSizeGet(spiY)) / (dwScaledX * 3);
			temp_merge_height = (USE_YUV_MERGE_BUF) / (dwScaledX * 3);

			if (temp_merge_height < dwHWDecodeHeight)
			{
				dwHWDecodeHeight = temp_merge_height;
				IMG_TRACE("JPG Partial : MERGE dwHWDecodeHeight = %d\n", dwHWDecodeHeight);
			}

			dwHWDecodeHeight = (dwHWDecodeHeight / 32) * 32;
			IMG_TRACE("JPG Partial : ALIGNED dwHWDecodeHeight = %d\n", dwHWDecodeHeight);


			// To calculate the MERGER + SCALE function needed buffer. (no more need to reference the padded data, only merge the actual width)
			temp_merge_height = GetPartialDecodeOutput(sJPEGTableInfo.wHSize, sJPEGTableInfo.wHSize, sJPEGTableInfo.wVSize, sJPEGTableInfo.wVSize,  // Use original parameter ---> second parameter was sJPEGTableInfo.dwDecodedHSize
				clip_w, clip_h, dwHWDecodeHeight, partial_decode_transform, &sPartialDecodeOutput);
			IMG_TRACE("JPG Partial : SCALER temp_merge_height = %d\n", temp_merge_height);

			// If merge process require more memory(less) merge height, use new merge height
			if (temp_merge_height < dwHWDecodeHeight)
			{
				dwHWDecodeHeight = temp_merge_height;
				dwScaledX = sPartialDecodeOutput.x;
				IMG_TRACE("dwScaledX = %d\n",dwScaledX);

				//temp_merge_height = (utilPoolMgrSystemSizeGet(spiY)) / (dwScaledX * 3);
				temp_merge_height = (USE_YUV_MERGE_BUF) / (dwScaledX * 3);

				IMG_TRACE("dwHWDecodeHeight = %d,temp_merge_height = %d\n", dwHWDecodeHeight,temp_merge_height);
				if (temp_merge_height < dwHWDecodeHeight)
				{
					dwHWDecodeHeight = temp_merge_height;
					IMG_TRACE("JPG Partial : MERGE dwHWDecodeHeight = %d\n", dwHWDecodeHeight);
				}

				dwHWDecodeHeight = (dwHWDecodeHeight/8) * 8;  // was 32
				IMG_TRACE("JPG Partial : Adjust Height -  dwHWDecodeHeight = %d - \n", dwHWDecodeHeight);

				temp_merge_height = GetPartialDecodeOutput(sJPEGTableInfo.wHSize, sJPEGTableInfo.wHSize, sJPEGTableInfo.wVSize, sJPEGTableInfo.wVSize,  // use not alignment HSize. ---> second parameter was sJPEGTableInfo.dwDecodedHSize
					clip_w, clip_h, dwHWDecodeHeight, partial_decode_transform, &sPartialDecodeOutput);
				dwHWDecodeHeight = temp_merge_height;
				IMG_TRACE("JPG Partial : SCALER temp_merge_height = %d,dwHWDecodeHeight = %d\n", temp_merge_height, dwHWDecodeHeight);
			}

			// Definite not exceed memory size available for it. Design phase used.
			{
				DWORD	dwMergedSize;

				dwMergedSize = (sPartialDecodeOutput.decode_y_size + 1) * sPartialDecodeOutput.x * 3;  // assume we need sPartialDecodeOutput.decode_y_compl = 1
				IMG_TRACE("dwMergedSize = %d\n",dwMergedSize);

				//if (dwMergedSize >utilPoolMgrSystemSizeGet(spiY))
				if (dwMergedSize > USE_YUV_MERGE_BUF)
				{
					ASSERT_ALWAYS();
				}
			}

		}

	}

	IMG_TRACE("-- Start Memory Allocation for Partial Decode --\n");

	// May be , this is a up scaling, we need to

#if 1
#if defined(RICH_DEBUG_INFO)
		IMG_TRACE("JPG : sJPEGTableInfo.dwDecodedHSize = %d\n",sJPEGTableInfo.dwDecodedHSize);
		IMG_TRACE("JPG : sJPEGTableInfo.dwCbCrWidth = %d\n",sJPEGTableInfo.dwCbCrWidth);
		IMG_TRACE("JPG : sJPEGTableInfo.dwCbCrWidth = %d\n",sJPEGTableInfo.dwCbCrWidth);
#endif
		dwYBufferSize = sJPEGTableInfo.dwDecodedHSize * dwHWDecodeHeight;
		dwYBufferSize = ((dwYBufferSize + 0x7ff)/0x800)*0x800;

		dwUBufferSize = sJPEGTableInfo.dwCbCrWidth * dwHWDecodeHeight;
		dwUBufferSize = ((dwUBufferSize + 0x7ff)/0x800)*0x800;

		dwVBufferSize = sJPEGTableInfo.dwCbCrWidth * dwHWDecodeHeight;
		dwVBufferSize = ((dwVBufferSize + 0x7ff)/0x800)*0x800;

//		IMG_TRACE("JPG : dwYBufferSize = %d\n",dwYBufferSize);
//		IMG_TRACE("JPG : dwUBufferSize = %d\n",dwUBufferSize);
//		IMG_TRACE("JPG : dwVBufferSize = %d\n",dwVBufferSize);
#endif


	// Check the overall size again if case the color format is not correctly.
	nFreeMemory = utilMemoryFreeSpaceGet();
	IMG_TRACE("JPG : nFreeMemory = %d\n",nFreeMemory);

	// Use spiCB size as MERGE require memory
	//nRequireMemory = dwYBufferSize + dwUBufferSize + dwVBufferSize + line_pitch * 32;
	//nRequireMemory = utilPoolMgrSystemSizeGet(spiCB) + utilPoolMgrSystemSizeGet(spiY);
	nRequireMemory = USE_PARTIAL_DECODE_BUF + USE_YUV_MERGE_BUF;
	IMG_TRACE("JPG : nRequireMemory = %d\n",nRequireMemory);

#if defined(USE_SYSTEM_POOL_FOR_MERGE)
	pvJPEGYbuffer = (unsigned char*)utilMemoryMalloc(USE_PARTIAL_DECODE_BUF);
	pbMergedBuffer = (unsigned char*)utilMemoryMalloc(USE_YUV_MERGE_BUF);
	MERGE_Use_HWPool = 1;
#else
	pvJPEGYbuffer = (unsigned char*)utilPoolMgrSystemPoolGet(spiCB);
	pbMergedBuffer = (unsigned char*)utilPoolMgrSystemPoolGet(spiY);
#endif
	IMG_TRACE("JPG : pvJPEGYbuffer = %p with size %p,pbMergedBuffer = %p with size %p\n",pvJPEGYbuffer,USE_PARTIAL_DECODE_BUF,pbMergedBuffer,USE_YUV_MERGE_BUF);

	if ((nRequireMemory < nFreeMemory) && (pvJPEGYbuffer != NULL) && (pbMergedBuffer != NULL) )
	{
		IMG_TRACE("JPG Partial :dwYBufferSize = %d\n",dwYBufferSize);
		IMG_TRACE("JPG Partial :dwUBufferSize = %d\n",dwUBufferSize);
		IMG_TRACE("JPG Partial :dwVBufferSize = %d\n",dwVBufferSize);

		pvJPEGUbuffer = (void*)((DWORD)pvJPEGYbuffer + dwYBufferSize);
		pvJPEGVbuffer = (void*)((DWORD)pvJPEGUbuffer + dwUBufferSize);

#if 0
		pvJPEGYbuffer = utilMemoryMalloc(dwYBufferSize);
		pvJPEGUbuffer = utilMemoryMalloc(dwUBufferSize);
		pvJPEGVbuffer = utilMemoryMalloc(dwVBufferSize);
		IMG_TRACE("JPG : --- CLEAR JPEG MERGE BUFFER --- \n");
		memset(pvJPEGYbuffer,0,dwYBufferSize);
		memset(pvJPEGUbuffer,0,dwUBufferSize);
		memset(pvJPEGVbuffer,0,dwVBufferSize);
#endif

		sJPEGTableInfo.dwYStartAddr = (DWORD)utilPoolMgrPhyAddrGet(pvJPEGYbuffer);
		sJPEGTableInfo.dwCbStartAddr = (DWORD)utilPoolMgrPhyAddrGet(pvJPEGUbuffer);
		sJPEGTableInfo.dwCrStartAddr = (DWORD)utilPoolMgrPhyAddrGet(pvJPEGVbuffer);

#ifdef RICH_DEBUG_INFO
		IMG_TRACE("JPG :pvJPEGYbuffer = %p PHY  %p\n",pvJPEGYbuffer,sJPEGTableInfo.dwYStartAddr);
		IMG_TRACE("JPG :pvJPEGUbuffer = %p PHY  %p\n",pvJPEGUbuffer,sJPEGTableInfo.dwCbStartAddr);
		IMG_TRACE("JPG :pvJPEGVbuffer = %p PHY  %p\n",pvJPEGVbuffer,sJPEGTableInfo.dwCrStartAddr);
#endif

		// step 2 : decode the JPEG,
		// IMG_TRACE("JPG : skip decode, no filter  2\n");
		// IMG_TRACE("JPG : spiY size is %d\n",utilPoolMgrSystemSizeGet(spiY));
		// To calculate how much memory resource we have...
		// IMG_TRACE("JPG : pbMergedBuffer = %p\n",pbMergedBuffer);


		// step 3 : merge a piece
		sJPEGTableInfo.eMergeMode = mgmLITTLE_TO_LITTLE_PITCH; // mgmLITTLE_TO_LITTLE;

		sJPEGTableInfo.sDSTMergeRect.wBlitX = 0;
		sJPEGTableInfo.sDSTMergeRect.wBlitY = 0;
		sJPEGTableInfo.sDSTMergeRect.wWidth = (sJPEGTableInfo.wHSize + 1) & (~0x1); // Use not alignment Hsize and alignment to 2 pixels. ---> was sJPEGTableInfo.dwDecodedHSize;    //cinfo.output_width;
		sJPEGTableInfo.sDSTMergeRect.wBlitWidth = (sJPEGTableInfo.wHSize + 1) & (~0x1); // Use not alignment Hsize and alignment to 2 pixels. ---> was sJPEGTableInfo.dwDecodedHSize;    //cinfo.output_width;

		sJPEGTableInfo.pvDST_ADDR =  utilPoolMgrPhyAddrGet(pbMergedBuffer);
		IMG_TRACE("JPG : sJPEGTableInfo.pvDST_ADDR = %p\n",sJPEGTableInfo.pvDST_ADDR);

		//IMG_TRACE("JPG :sJPEGTableInfo.pvDST_ADDR = %p\n",sJPEGTableInfo.pvDST_ADDR);
		//IMG_TRACE("JPG :sJPEGTableInfo.dwDecodedHSize = %d\n",sJPEGTableInfo.dwDecodedHSize);
		//IMG_TRACE("JPG :sJPEGTableInfo.sDSTMergeRect.wBlitHeight = %d\n",sJPEGTableInfo.sDSTMergeRect.wBlitHeight);
		//IMG_TRACE("cinfo.output_height = %d\n",cinfo.output_height);

		dwJPEGHeight = sJPEGTableInfo.wVSize; // Was sJPEGTableInfo.sDSTMergeRect.wBlitHeight; (include aligfnment info)
#ifdef RICH_DEBUG_INFO
		IMG_TRACE(" -------------------- > dwJPEGHeight = %d\n",dwJPEGHeight);
#endif


		// Scaling with software filter design
		{
			PW_IMAGE_FILTER_CONTEXT filter_context;
			int	i,j;
			int scan_line;

			filter_context.filters = filters;
			filter_context.image = surface;
			filter_context.clip.x = x;
			filter_context.clip.y = y;
			filter_context.clip.w = clip_w;
			filter_context.clip.h = clip_h;
			PW_FILTER_TOOLS_init(&filter_context);
			//////PW_FILTER_TOOLS_config(&filter_context, (int)cinfo.output_width, (int)cinfo.output_height, picfRGB);

#ifdef RICH_DEBUG_INFO
			IMG_TRACE(" ---- SW Scaling Info ----\n");
			IMG_TRACE("filter_context.clip.x = %d\n",filter_context.clip.x);
			IMG_TRACE("filter_context.clip.y = %d\n",filter_context.clip.y);
			IMG_TRACE("filter_context.clip.w = %d\n",filter_context.clip.w);
			IMG_TRACE("filter_context.clip.h = %d\n",filter_context.clip.h);
#endif


			//PW_FILTER_TOOLS_config(&filter_context, (int)sJPEGTableInfo.dwDecodedHSize, (int)dwJPEGHeight, picfRGB);
			//
			// Use frame buffer size
#if 1
#ifdef RICH_DEBUG_INFO
			IMG_TRACE("=====> sPartialDecodeOutput.x = %d\n",sPartialDecodeOutput.x);
//			IMG_TRACE("=====> sPartialDecodeOutput.x_no_padding = %d\n",sPartialDecodeOutput.x_no_padding);
#endif
			PW_FILTER_TOOLS_config(&filter_context, (int)sPartialDecodeOutput.x, (int)sPartialDecodeOutput.y, picfRGB);
#endif
			scan_line = 0;

#ifdef RICH_DEBUG_INFO
			IMG_TRACE("JPG : Start HW Partial Decode\n");
#endif

		{	// PARTIAL_HW_DECODE
			void*					pvJPEGRAW_PartialDecode;
			PARTIAL_DEC_BUF_INFO 	sPartDecBufInfo;
			memset(&sPartDecBufInfo,0,sizeof(PARTIAL_DEC_BUF_INFO));

#if 0
			// JPEG Configure and Table setting
			{
			JPEG_CONFIG         sJJPGConfig;

			// JPEG configuration parameters
			sJJPGConfig.eLateIRQMethod = jltLIRQ0;		        // IRQ asserted method
			sJJPGConfig.eReadEndianess = jetLittleEndian32;	    // Little Endian 32 read mode
			sJJPGConfig.eWriteEndianess = jetLittleEndian32;	// Little Endian 32 write mode

			// JPEG configure
			dvJPEGConfigure(&sJJPGConfig);

			IMG_TRACE("Set JPEG Tables\n");
			// JPEG table setting
			dvJPEGTablesSet(&sJPEGTableInfo);

			}
#endif

			utilHWAccelPartialJPEGDecodeConfig(&sJPEGTableInfo);


			IMG_TRACE("JPEG Partial : FILL PARTIAL DECODE INFO\n");
			// --- FILL PARTIAL DECODE INFO ---
			sPartDecBufInfo.dwJpegLen = sJPEGTableInfo.dwEcsLength;
			IMG_TRACE("sJPEGTableInfo.dwEcsLength = %p\n",sJPEGTableInfo.dwEcsLength);

			if (TRUE == bFixedInputBufferMode)
			{
				sPartDecBufInfo.dwRawBufSize = stPartialParsing.dwSize;
			}
			else
			{
				sPartDecBufInfo.dwRawBufSize = sPartDecBufInfo.dwJpegLen + 0x10;   // Additional one bytes to for partial Read Done symptom.
			}

			sPartDecBufInfo.wPtDecQuantity = dwHWDecodeHeight / 8;
			sPartDecBufInfo.cInputBufMode = SINGLE_BUF;
			sPartDecBufInfo.cOutputBufMode = SINGLE_BUF;

#if 0
			//pvJPEGRAW_PartialDecode = utilMemoryMalloc(sPartDecBufInfo.dwRawBufSize);
			// JPEG RAW buffer
			/// (DWORD)utilPoolMgrPhyAddrGet(pvJPEGRAW_PartialDecode);
#endif

			if (TRUE == bFixedInputBufferMode)
			{
				// Use fixed input buffer mode
				sPartDecBufInfo.dwScanAddr0 = (DWORD)stPartialParsing.ptrBuffer[0];
				sPartDecBufInfo.dwScanAddr1 = (DWORD)stPartialParsing.ptrBuffer[1]; // second buffer
				IMG_TRACE("sPartDecBufInfo.dwScanAddr0 = %p\n",sPartDecBufInfo.dwScanAddr0);
				IMG_TRACE("sPartDecBufInfo.dwScanAddr1 = %p\n",sPartDecBufInfo.dwScanAddr1);
			}
			else
			{
				sPartDecBufInfo.dwScanAddr0 = sJPEGTableInfo.dwEcsStartAddr;
				IMG_TRACE("sPartDecBufInfo.dwScanAddr0 = %p\n",sPartDecBufInfo.dwScanAddr0);
			}

#ifdef RICH_DEBUG_INFO
			IMG_TRACE("sJPEGTableInfo.dwEcsStartAddr = %p\n",sJPEGTableInfo.dwEcsStartAddr);
			IMG_TRACE("pvJPEGRawdataPool  = %p\n",pvJPEGRawdataPool);
			IMG_TRACE("pvJPEGRawdataPool PHY = %p\n",utilPoolMgrPhyAddrGet(pvJPEGRawdataPool));
#endif

			{
			DWORD	pcEcsAddressOffset;
#ifdef RICH_DEBUG_INFO
			IMG_TRACE("Try to copy JPEG RAW data\n");
			IMG_TRACE("pvJPEGRawdataPool = %p\n",pvJPEGRawdataPool);
#endif

			pcEcsAddressOffset = (DWORD)pvJPEGRawdataPool;
			pcEcsAddressOffset = (DWORD) pcEcsAddressOffset + (sJPEGTableInfo.dwEcsStartAddr - (DWORD)utilPoolMgrPhyAddrGet(pvJPEGRawdataPool));
#ifdef RICH_DEBUG_INFO
			IMG_TRACE("pcEcsAddressOffset = %p\n",pcEcsAddressOffset);
#endif
			}

			// Merge output addreee
			sPartDecBufInfo.dwYStartAddr0 = (DWORD)utilPoolMgrPhyAddrGet(pvJPEGYbuffer);
			sPartDecBufInfo.dwCbStartAddr0 = (DWORD)utilPoolMgrPhyAddrGet(pvJPEGUbuffer);
			sPartDecBufInfo.dwCrStartAddr0 = (DWORD)utilPoolMgrPhyAddrGet(pvJPEGVbuffer);

#ifdef RICH_DEBUG_INFO
			IMG_TRACE("sPartDecBufInfo.dwJpegLen = %d\n",sPartDecBufInfo.dwJpegLen);
			IMG_TRACE("sPartDecBufInfo.dwRawBufSize = %d\n",sPartDecBufInfo.dwRawBufSize);
			IMG_TRACE("sPartDecBufInfo.wPtDecQuantity = %d\n",sPartDecBufInfo.wPtDecQuantity);
			IMG_TRACE("sPartDecBufInfo.dwScanAddr0 = %p\n",sPartDecBufInfo.dwScanAddr0);
			IMG_TRACE("sPartDecBufInfo.dwYStartAddr0 = %p\n",sPartDecBufInfo.dwYStartAddr0);
			IMG_TRACE("sPartDecBufInfo.dwCbStartAddr0 = %p\n",sPartDecBufInfo.dwCbStartAddr0);
			IMG_TRACE("sPartDecBufInfo.dwCrStartAddr0 = %p\n",sPartDecBufInfo.dwCrStartAddr0);
#endif

#if defined(TOPAZ_HW_PARTIAL_DECODE)
			utilHWAccelPartialDecodeStart(&sPartDecBufInfo);
#else
			IMG_TRACE("JPG : --- HW DECODE 2 --- \n");
			utilHWAccelJPEGDecode(&sJPEGTableInfo);
#endif

			IMG_TRACE("JPG : dwHWDecodeHeight = %d ,dwJPEGHeight = %d\n",dwHWDecodeHeight,dwJPEGHeight);

			for (i = 0; i<dwJPEGHeight; i += dwHWDecodeHeight)
			{
				PARTIAL_DEC_BUF_STATUS   	sDecBufStatus;
				DWORD    					dwMergedHeight;
				eJPEG_DECODE_STATUS_TYPE 	eDecStatus;
				PARTIAL_DEC_RESUME_BUF_INFO sPartDecResBufInfo;
#if defined(PARTIAL_DECODE_POLLING_TIMEOUT)
				DWORD						dwPollingCount;
#endif

				if (bPartialDecodeBreak == TRUE)
				{
					IMG_TRACE("JPG : Break Partial Decode\n");
					break;
				}

#if defined(PARTIAL_DECODE_POLLING_TIMEOUT)
				dwPollingCount = 0;
#endif

#if defined(TOPAZ_HW_PARTIAL_DECODE)
				usleep(5); // was 500
				do
				{
					BOOL	bPartialDecodeDone = FALSE;

					if (TRUE == bFixedInputBufferMode)
					{
						sDecBufStatus.wRawBufReadCnt = 0;
					}
					eDecStatus = dvJPEGPartialDecodePoll(&sDecBufStatus);

#ifdef RICH_DEBUG_INFO
					IMG_TRACE("JPG : eDecStatus = %d \n",eDecStatus);
#endif

					if (jdstDecodePending == eDecStatus)
					{
						usleep(5);  // was 100

#if defined(PARTIAL_DECODE_POLLING_TIMEOUT)
						dwPollingCount++;
						if (dwPollingCount <= 100000)
						{
							// polling 500 ms (100000 * 5 us = 500 ms)
							continue;
						}
						else
						{
							// Quit
							IMG_TRACE("\n\n ----- Polling Timeout Quit -----\n\n");
							bPartialDecodeDone = TRUE;
							bPartialDecodeBreak = TRUE;
							break;
						}
#endif
					}
					else if (jdstDecodeError == eDecStatus)
					{
						ASSERT_ALWAYS();
					}

#if defined(PARTIAL_DECODE_POLLING_TIMEOUT)
					dwPollingCount = 0;
#endif

					switch(eDecStatus)
					{
					case jdstDecodeDone:
					IMG_TRACE("eDecodeDone\n");
					bPartialDecodeDone = TRUE;
					bPartialDecodeBreak = TRUE;
					break;

					case jdstPartDecodeReadDone:
					//IMG_TRACE("ePartDecodeReadDone\n");
					//ASSERT_ALWAYS();


					if (TRUE == bFixedInputBufferMode)
					{
						//
						// Nick 0706 : It is always we will get buffers load ... we may failed to load in API  PartialParsingObjectGet. Exception handleing is needed.
						//
						// TEST_LOG_START("Partial Load ......");
						PartialParsingObjectGet(src,&stPartialParsing);
						// TEST_LOG_END(0);

						// Cannot exit from partial decode ....
						//bPartialDecodeDone = TRUE;

						sPartDecResBufInfo.eResumeCmd = jpdrcSRC;

						sPartDecResBufInfo.dwRawBufAddr0 = stPartialParsing.ptrBuffer[stPartialParsing.dwValideBuffer];

						if (stPartialParsing.dwSize > 0)
						{
						sPartDecResBufInfo.wRawBufCnt = 1;
						utilHWAccelPartialDecodeResume(&sPartDecResBufInfo);
#if defined(PARTIAL_DECODE_INFO)
						IMG_TRACE("Resume for Decode Read Done\n\n");
#endif
						}
						else
						{
						// Nick 0706 - It looks like it could happen that generate one more JPEG READ DONE. Still need to find the root cause of this.
						// We just ignore this and assume this READ DONE is useless.
						//
						//IMG_TRACE("Resume for Decode Read Done -- NO JPEG Info\n\n");
						}
					}
					else
					{
						bPartialDecodeDone = TRUE;

						sPartDecResBufInfo.eResumeCmd = jpdrcDST;
						sPartDecResBufInfo.dwRawBufAddr0 = sJPEGTableInfo.dwEcsStartAddr;

						utilHWAccelPartialDecodeResume(&sPartDecResBufInfo);
						IMG_TRACE("Resume for Decode Done\n");
					}
					break;

					case jdstPartDecodeWriteDone:
					bPartialDecodeDone = TRUE;

#ifdef RICH_DEBUG_INFO
					IMG_TRACE("ePartDecodeWriteDone\n");
#endif

#if 0

					// Dump decoded info
					printf("\n");
					IMG_TRACE("sPartDecBufInfo.wPtDecQuantity = %d\n",sPartDecBufInfo.wPtDecQuantity);
					DUMP_memory(sJPEGTableInfo.dwYStartAddr);
					DUMP_memory(sJPEGTableInfo.dwCbStartAddr);
					DUMP_memory(sJPEGTableInfo.dwCrStartAddr);
#endif

					sPartDecResBufInfo.eResumeCmd = jpdrcDST;

					sPartDecResBufInfo.dwYStartAddr = sJPEGTableInfo.dwYStartAddr;
					sPartDecResBufInfo.dwCbStartAddr = sJPEGTableInfo.dwCbStartAddr;
					sPartDecResBufInfo.dwCrStartAddr = sJPEGTableInfo.dwCrStartAddr;

					break;
					default:
					   ASSERT_ALWAYS();
					   //continue;
					   bPartialDecodeDone = TRUE;

					// Continue the decode proceed.
					sPartDecResBufInfo.eResumeCmd = jpdrcDST;

					sPartDecResBufInfo.dwYStartAddr = sJPEGTableInfo.dwYStartAddr;
					sPartDecResBufInfo.dwCbStartAddr = sJPEGTableInfo.dwCbStartAddr;
					sPartDecResBufInfo.dwCrStartAddr = sJPEGTableInfo.dwCrStartAddr;

					break;;
					}

					if (bPartialDecodeDone)
					{
						break;
					}
				}while(1);
#endif


				if  ((dwJPEGHeight - i) < dwHWDecodeHeight)
				{
					dwMergedHeight = dwJPEGHeight - i;
				}
				else
				{
					dwMergedHeight = dwHWDecodeHeight;
				}

				IMG_TRACE("Merging   i = %d   dwMergedHeight = %d \n",i,dwMergedHeight);

				// Merged the info
				sJPEGTableInfo.sDSTMergeRect.wBlitX = 0;
				sJPEGTableInfo.sDSTMergeRect.wBlitY = 0;
				sJPEGTableInfo.sDSTMergeRect.wBlitHeight = dwMergedHeight & (~1);
				//IMG_TRACE("Partial Merge (Align height to even) wBlitHeight = %d\n",sJPEGTableInfo.sDSTMergeRect.wBlitHeight);

#ifdef RICH_DEBUG_INFO
				IMG_TRACE("-----------------------\n");
				IMG_TRACE("JPG : i = %d\n",i);
				IMG_TRACE("JPG : dwMergedHeight = %d\n",dwMergedHeight);
				IMG_TRACE("JPG : sJPEGTableInfo.dwDecodedHSize = %d\n",sJPEGTableInfo.dwDecodedHSize);
				IMG_TRACE("JPG : sPartialDecodeOutput.decode_y_size = %d\n",sPartialDecodeOutput.decode_y_size);
#endif
				if (sPartialDecodeOutput.decode_y_size <= 1)
				{
					// A limitation
					IMG_TRACE("Break due to height size\n");
					break;
				}
#if 1
				// Should be moved forward...
				// line_pitch = sJPEGTableInfo.dwDecodedHSize * surface->format->BytesPerPixel;
				// Now, with
				line_pitch = sPartialDecodeOutput.decode_x * surface->format->BytesPerPixel;
				line_pitch = ((line_pitch + 191)/192)*192;
#endif
				sJPEGTableInfo.dwDSTPitch = line_pitch;

				// Only if the height is more than 0(We don't want to BLIT a 1 height surface)
				if (sJPEGTableInfo.sDSTMergeRect.wBlitHeight > 0)
				{
					//
					// Use the scaled down size width.
					// Beside decode_size , we need to add the extra fiction part.
					//
					sJPEGTableInfo.sDSTMergeRect.wWidth = sPartialDecodeOutput.decode_x;
#ifdef RICH_DEBUG_INFO
					IMG_TRACE("JPG : HW MERGE ==> %d, %d \n",sPartialDecodeOutput.x,(sPartialDecodeOutput.decode_y_size + sPartialDecodeOutput.decode_y_compl));
#endif
					utilHWAccelMergeWithScale(&sJPEGTableInfo, sPartialDecodeOutput.decode_x, (sPartialDecodeOutput.decode_y_size + sPartialDecodeOutput.decode_y_compl));

#if defined(TOPAZ_HW_PARTIAL_DECODE)
#else
					sJPEGTableInfo.dwYStartAddr += (sJPEGTableInfo.dwDecodedHSize*dwMergedHeight);
					sJPEGTableInfo.dwCbStartAddr += (dwColorBytesPerLine*dwMergedHeight);
					sJPEGTableInfo.dwCrStartAddr += (dwColorBytesPerLine*dwMergedHeight);
#endif
					// We may use Hardware BLIT to copy the merge image to SDL main screen.
					// SW scale : HW can scale down to limited size which cannot fit to screen.
					if (sPartialDecodeOutput.sw_scale)
					{
						for (j=0; j < (sPartialDecodeOutput.decode_y_size + sPartialDecodeOutput.decode_y_compl); j++)
						{
							if (PW_FILTER_TOOLS_sendPixels(&filter_context,
								(int)scan_line, (Uint8 *)&pbMergedBuffer[line_pitch*j], line_pitch, picfRGB) != 0)
							{
								break;
							}
							scan_line ++;
						}
					}
					else  // NULL, no rotation needed.
					{
						BLIT_INFO			sBlitInfo;
						eHVT_MODE			eHVTMode;
						static DWORD		dwFlipControl = 0;
		//			    static DWORD		dwPosX = 0;

						sBlitInfo.eBlitMode = emLITTLE_TO_LITTLE;
						sBlitInfo.eColorSpace = jcs444;

						dwFlipControl += (sPartialDecodeOutput.decode_y_size + sPartialDecodeOutput.decode_y_compl);

						SDL_LockSurface(surface);

						// Src
						sBlitInfo.pvSRC_ADDR = utilPoolMgrPhyAddrGet(pbMergedBuffer);
						sBlitInfo.dwSRCPitch = line_pitch; // sBlitInfo.sSRCBlitRect.wWidth * 3;

#ifdef RICH_DEBUG_INFO
						IMG_TRACE("=====> sPartialDecodeOutput.x = %d\n",sPartialDecodeOutput.x);
						//IMG_TRACE("=====> sPartialDecodeOutput.x_no_padding = %d\n",sPartialDecodeOutput.x_no_padding);
#endif
						sBlitInfo.sSRCBlitRect.wWidth = sPartialDecodeOutput.x;
						sBlitInfo.sSRCBlitRect.wHeight = (sPartialDecodeOutput.decode_y_size + sPartialDecodeOutput.decode_y_compl) ;

						sBlitInfo.sSRCBlitRect.wBlitX = 0;
						sBlitInfo.sSRCBlitRect.wBlitY = 0;
						sBlitInfo.sSRCBlitRect.wBlitWidth = sBlitInfo.sSRCBlitRect.wWidth;
						sBlitInfo.sSRCBlitRect.wBlitHeight = sBlitInfo.sSRCBlitRect.wHeight;

#ifdef RICH_DEBUG_INFO
						IMG_TRACE("sBlitInfo.pvSRC_ADDR = %p\n",sBlitInfo.pvSRC_ADDR);
						IMG_TRACE("sBlitInfo.dwSRCPitch = %d\n",sBlitInfo.dwSRCPitch);
						IMG_TRACE("sBlitInfo.sSRCBlitRect.wWidth = %p\n",sBlitInfo.sSRCBlitRect.wWidth);
						IMG_TRACE("sBlitInfo.sSRCBlitRect.wHeight = %p\n",sBlitInfo.sSRCBlitRect.wHeight);
						IMG_TRACE("sBlitInfo.sSRCBlitRect.wBlitX = %p\n",sBlitInfo.sSRCBlitRect.wBlitX);
						IMG_TRACE("sBlitInfo.sSRCBlitRect.wBlitY = %p\n",sBlitInfo.sSRCBlitRect.wBlitY);
						IMG_TRACE("sBlitInfo.sSRCBlitRect.wBlitWidth = %p\n",sBlitInfo.sSRCBlitRect.wBlitWidth);
						IMG_TRACE("sBlitInfo.sSRCBlitRect.wBlitHeight = %p\n",sBlitInfo.sSRCBlitRect.wBlitHeight);
#endif

						sBlitInfo.bEOB = 0;

						// Handle 2 steps orientation
						if ((partial_decode_transform & (pfrMIRROR_V | pfrMIRROR_H)) &&
							(partial_decode_transform & (pfrTRANSFORM_90 | pfrTRANSFORM_180 | pfrTRANSFORM_270)))
						{
							// store the flip image in JPEG YBuffer (fixed me, the buffer should be always big enough to keep the flipped images)
							// the H FLIP cannot use the same buffer to finish the IO.
							sBlitInfo.pvDST_ADDR = (DWORD)utilPoolMgrPhyAddrGet(pvJPEGYbuffer);
							sBlitInfo.dwDSTPitch = sBlitInfo.dwSRCPitch;

							sBlitInfo.sDSTBlitRect.wWidth = sBlitInfo.sSRCBlitRect.wWidth;
							sBlitInfo.sDSTBlitRect.wHeight = sBlitInfo.sSRCBlitRect.wHeight;

							sBlitInfo.sDSTBlitRect.wBlitX = sBlitInfo.sSRCBlitRect.wBlitX;
							sBlitInfo.sDSTBlitRect.wBlitY = sBlitInfo.sSRCBlitRect.wBlitY;
							sBlitInfo.sDSTBlitRect.wBlitWidth = sBlitInfo.sSRCBlitRect.wBlitWidth;
							sBlitInfo.sDSTBlitRect.wBlitHeight = sBlitInfo.sSRCBlitRect.wBlitHeight;

							// FLIP
							utilHWAccelHTV(&sBlitInfo, (partial_decode_transform & pfrMIRROR_V) ? emHVT_VFLIP: emHVT_HFLIP);

							// Reconfigure the JPEG YBuffer as source
							sBlitInfo.pvSRC_ADDR = sBlitInfo.pvDST_ADDR;
						}

						// Dest
						sBlitInfo.pvDST_ADDR = utilPoolMgrPhyAddrGet(surface->pixels);
						sBlitInfo.dwDSTPitch = surface->pitch;

						sBlitInfo.sDSTBlitRect.wWidth = surface->w;
						sBlitInfo.sDSTBlitRect.wHeight = surface->h;

						sBlitInfo.sDSTBlitRect.wBlitWidth = sBlitInfo.sSRCBlitRect.wWidth;
						sBlitInfo.sDSTBlitRect.wBlitHeight = sBlitInfo.sSRCBlitRect.wBlitHeight;

						// perform flip, rotation, transpose (limited support, we don't support HVT hardware function combination. Only 1 transform supported)
						// consider to support combination function using YUV & MERGE physical buffers in next version


						if (partial_decode_transform & pfrTRANSFORM_90)
						{
							eHVTMode = emHVT_CW_90;
							sBlitInfo.sDSTBlitRect.wBlitX = (clip_w + sPartialDecodeOutput.y) / 2 - sPartialDecodeOutput.decode_y_pos - sBlitInfo.sSRCBlitRect.wBlitHeight;
							sBlitInfo.sDSTBlitRect.wBlitY = (clip_h - sBlitInfo.sSRCBlitRect.wBlitWidth) / 2;
						}
						else if (partial_decode_transform & pfrTRANSFORM_180)
						{
							eHVTMode = emHVT_CW_180;
							sBlitInfo.sDSTBlitRect.wBlitX = sPartialDecodeOutput.x1;
							sBlitInfo.sDSTBlitRect.wBlitY = clip_h - sPartialDecodeOutput.y2 - sPartialDecodeOutput.decode_y_pos - sBlitInfo.sSRCBlitRect.wBlitHeight;
						}
						else if (partial_decode_transform & pfrTRANSFORM_270)
						{
							eHVTMode = emHVT_CW_270;
							sBlitInfo.sDSTBlitRect.wBlitX = (clip_w - sPartialDecodeOutput.y) / 2 + sPartialDecodeOutput.decode_y_pos;
							sBlitInfo.sDSTBlitRect.wBlitY = (clip_h - sBlitInfo.sSRCBlitRect.wBlitWidth) / 2;
						}
						else if (partial_decode_transform & pfrMIRROR_V)
						{
							eHVTMode = emHVT_VFLIP;
							sBlitInfo.sDSTBlitRect.wBlitX = sPartialDecodeOutput.x1;
							sBlitInfo.sDSTBlitRect.wBlitY = clip_h - sPartialDecodeOutput.y2 - sPartialDecodeOutput.decode_y_pos - sBlitInfo.sSRCBlitRect.wBlitHeight;
						}
						else if (partial_decode_transform & pfrMIRROR_H)
						{
							eHVTMode = emHVT_HFLIP;
							sBlitInfo.sDSTBlitRect.wBlitX = sPartialDecodeOutput.x1;
							sBlitInfo.sDSTBlitRect.wBlitY = sPartialDecodeOutput.y1 + sPartialDecodeOutput.decode_y_pos;   // Place to the center (sPartialDecodeOutput.y1)
						}
						else // other, pfrSTRETCH_AUTO | pfrSTRETCH_NONE | pfrSTRETCH_PROP, pfrMIRROR_H
						{
							eHVTMode = emHVT_NONE;
							sBlitInfo.sDSTBlitRect.wBlitX = sPartialDecodeOutput.x1;
							sBlitInfo.sDSTBlitRect.wBlitY = sPartialDecodeOutput.y1 + sPartialDecodeOutput.decode_y_pos;   // Place to the center (sPartialDecodeOutput.y1)
						}

#ifdef RICH_DEBUG_INFO
						IMG_TRACE("sBlitInfo.pvDST_ADDR = %p\n",sBlitInfo.pvDST_ADDR);
						IMG_TRACE("sBlitInfo.dwDSTPitch = %p\n",sBlitInfo.dwDSTPitch);

						IMG_TRACE("sBlitInfo.sDSTBlitRect.wWidth = %p\n",sBlitInfo.sDSTBlitRect.wWidth);
						IMG_TRACE("sBlitInfo.sDSTBlitRect.wHeight = %p\n",sBlitInfo.sDSTBlitRect.wHeight);

						IMG_TRACE("sBlitInfo.sDSTBlitRect.wBlitX = %p\n",sBlitInfo.sDSTBlitRect.wBlitX);
						IMG_TRACE("sBlitInfo.sDSTBlitRect.wBlitY = %p\n",sBlitInfo.sDSTBlitRect.wBlitY);
						IMG_TRACE("sBlitInfo.sDSTBlitRect.wBlitWidth = %p\n",sBlitInfo.sDSTBlitRect.wBlitWidth);
						IMG_TRACE("sBlitInfo.sDSTBlitRect.wBlitHeight = %p\n",sBlitInfo.sDSTBlitRect.wBlitHeight);
						IMG_TRACE("-- HW BLIT ---\n");
#endif
						//printf("xxxx emHVT=%d, (scaled_w=%d, scaled_h=%d) (%d, %d, %d, %d), (%d, %d)\n", eHVTMode, sPartialDecodeOutput.x, sPartialDecodeOutput.y,
						//	sBlitInfo.sDSTBlitRect.wBlitX, sBlitInfo.sDSTBlitRect.wBlitY,
						//	sBlitInfo.sSRCBlitRect.wBlitWidth, sBlitInfo.sSRCBlitRect.wBlitHeight, sBlitInfo.sDSTBlitRect.wBlitWidth, sBlitInfo.sDSTBlitRect.wBlitHeight);

						utilHWAccelHTV(&sBlitInfo, eHVTMode);
						SDL_UnlockSurface(surface);

						if (dwFlipControl >= 100)
						{
							dwFlipControl -= 100;
							SDL_Flip(surface);
						}
					}
				}

				if (bPartialDecodeBreak)
				{
					IMG_TRACE("JPG : Break Partial Decode\n");
					break;
				}


				// break;
				UpdateDecodeInfo(&sPartialDecodeOutput);

#if defined(TOPAZ_HW_PARTIAL_DECODE)
				if ((i + dwHWDecodeHeight) < dwJPEGHeight)
				{
#ifdef RICH_DEBUG_INFO
					IMG_TRACE("Resume JPEG Decode ---\n");
#endif
					// Resume Decode
					utilHWAccelPartialDecodeResume(&sPartDecResBufInfo);
				}
#endif
		}
		IMG_TRACE("JPG Partial : Partial Decode Exit\n");
		utilHWAccelPartialDecodeDisable();
		IMG_TRACE("JPG Partial : Disable Partial Decode\n");

		PW_FILTER_TOOLS_finalize(&filter_context);

		// free JPEG Raw buffer
		if (JPEGRaw_Use_HWPool)
		{
			IMG_TRACE("JPG Partial : Free pvJPEGRawdataPool\n");
			utilMemoryFree(pvJPEGRawdataPool);
		}


		} // PARTIAL_HW_DECODE
	}

#ifdef RICH_DEBUG_INFO
		IMG_TRACE("JPG : Try to free resource filter, not to init\n");
#endif
	}
	else
	{
		ASSERT_ALWAYS();
		// Unable to support partial hardware decode
		status = 0;

		IMG_TRACE("JPG : Not enough memory to support partial hardware decode\n");
		// free JPEG Raw buffer
		if (JPEGRaw_Use_HWPool)
		{
			utilMemoryFree(pvJPEGRawdataPool);
		}

	}

	IMG_TRACE("JPG :---- Partial Hardware Decode Finished ----\n");
	if (MERGE_Use_HWPool)
	{
		IMG_TRACE("JPG Partial : Free HW Pool\n");
		if (pvJPEGYbuffer)
		{
			utilMemoryFree(pvJPEGYbuffer);
			pvJPEGYbuffer = NULL;
		}
		if (pbMergedBuffer)
		{
			utilMemoryFree(pbMergedBuffer);
			pbMergedBuffer = NULL;
		}
	}
	// done...

	return status;
}
#endif


//
// Partial Hardware Decode...
//
// return
//          0 : fail
//          1 : success
static int  PartialHardwareDecode(SDL_RWops *src,SDL_Surface *surface,int x,int y,int clip_w,int clip_h,PW_IMAGE_FILTERS *filters)
{
	INT			nFreeMemory;
	INT			nRequireMemory;
	void*		pvJPEGRawdataPool;
	void*		pvJPEGHardwarePool;

	int			nFileSize = 0;
	JPEG_TABLE_INFO     sJPEGTableInfo;

	void*		pvJPEGYbuffer;
	void*		pvJPEGUbuffer;
	void*		pvJPEGVbuffer;
	char*       pbMergedBuffer;

	DWORD		dwYBufferSize;
	DWORD		dwUBufferSize;
	DWORD		dwVBufferSize;

	DWORD		dwJPEGDecodedHeight;
	DWORD		dwJPEGHeight;
	DWORD		dwJPEGWidth;
	DWORD		dwColorBytesPerLine;

	DWORD		dwHWDecodeHeight;

	int         line_pitch;
	int         status = 1;
	int         JPEGRaw_Use_HWPool = 0;

	dwHWDecodeHeight = 32;

#if 0
	nFileSize = SDL_RWseek(src, 0, RW_SEEK_END);
	if (nFileSize > 0)
	{
		IMG_TRACE("JPG :nFileSize = %d\n",nFileSize);
		if ((pvJPEGRawdataPool = utilMemoryMalloc(nFileSize)))
		{
			IMG_TRACE("JPG :pvJPEGRawdataPool = %p\n",pvJPEGRawdataPool);
			pvJPEGHardwarePool = utilPoolMgrPhyAddrGet(pvJPEGRawdataPool);
			IMG_TRACE("JPG :pvJPEGHardwarePool = %p\n",pvJPEGHardwarePool);

			SDL_RWseek(src, 0, RW_SEEK_SET);
			SDL_RWread(src, pvJPEGRawdataPool, 1, nFileSize);
		}
	}
#endif
	// We need to handle the case of hardware memory
	nFreeMemory = utilMemoryFreeSpaceGet();
	IMG_TRACE("JPG : --------- Partial Hardwar Decode ---------------\n");
	IMG_TRACE("JPG : nFreeMemory = %d\n",nFreeMemory);

	pvJPEGRawdataPool = src->hidden.mem.base;
	nFileSize = SDL_RWseek(src, 0, RW_SEEK_END);

//#ifdef RICH_DEBUG_INFO
	IMG_TRACE("JPG : nFileSize = %d\n",nFileSize);
//#endif

	// pvJPEGRawdataPool could be a system memory.
	pvJPEGHardwarePool = utilPoolMgrPhyAddrGet(pvJPEGRawdataPool);

	// JPEG RAW is in system memory
	if (pvJPEGHardwarePool == NULL)
	{
		// Up layer prove the JPEG in a system memory, we want copy it to
		// 1. spiJ0
		// 2. Malloc
		pvJPEGRawdataPool = utilPoolMgrSystemPoolGet(spiJ0);
		if (pvJPEGRawdataPool != NULL &&
			(utilPoolMgrSystemSizeGet(spiJ0) >= nFileSize))
		{
			IMG_TRACE("JPG : Use spiJ0 to keep JPEG RAW\n");
			// spiJ0 is valid
			SDL_RWseek(src, 0, RW_SEEK_SET);
			SDL_RWread(src, pvJPEGRawdataPool, 1, nFileSize);
			pvJPEGHardwarePool = utilPoolMgrPhyAddrGet(pvJPEGRawdataPool);
		}
		else
		{
			IMG_TRACE("JPG : Try to allocate HW memory for JPEG RAW\n");
			// We need to allocate from HW memory
			pvJPEGRawdataPool = utilMemoryMalloc(nFileSize);
			if (pvJPEGRawdataPool != NULL)
			{
				JPEGRaw_Use_HWPool = 1;
				SDL_RWseek(src, 0, RW_SEEK_SET);
				SDL_RWread(src, pvJPEGRawdataPool, 1, nFileSize);
				pvJPEGHardwarePool = utilPoolMgrPhyAddrGet(pvJPEGRawdataPool);
			}
			else
			{
				IMG_TRACE("JPG : Cannot allocate memory for JPEG RAW\n");
				// No enough memory
				return 0;
			}
		}

	}

#ifdef RICH_DEBUG_INFO
	IMG_TRACE("JPG : pvJPEGRawdataPool = %p\n",pvJPEGRawdataPool);
	IMG_TRACE("JPG : pvJPEGHardwarePool = %p\n",pvJPEGHardwarePool);
	IMG_TRACE("JPG : nFileSize = %d\n",nFileSize);
#endif

	utilHWAccelJPEGParse(psmPHYSICAL_TO_PHYSICAL,
						 pvJPEGRawdataPool,
						 nFileSize,
						 pvJPEGHardwarePool,    // utilPoolMgrPhyAddrGet(g_pJpeg_FileStore),
						 &sJPEGTableInfo);

	line_pitch = sJPEGTableInfo.dwDecodedHSize * surface->format->BytesPerPixel;

	IMG_TRACE("JPG : sJPEGTableInfo.eColorFormat = %d\n",sJPEGTableInfo.eColorFormat);
//	IMG_TRACE("cinfo.output_width = %p\n",cinfo.output_width);
//	IMG_TRACE(" ------------- Allocate megered buffer ---------------------\n");

	switch(sJPEGTableInfo.eColorFormat)
	{
	case jcftYUV444:
		dwYBufferSize = sJPEGTableInfo.dwDecodedHSize * sJPEGTableInfo.sDSTMergeRect.wBlitHeight;
		dwUBufferSize = sJPEGTableInfo.dwDecodedHSize * sJPEGTableInfo.sDSTMergeRect.wBlitHeight;
		dwVBufferSize = sJPEGTableInfo.dwDecodedHSize * sJPEGTableInfo.sDSTMergeRect.wBlitHeight;

		dwColorBytesPerLine = sJPEGTableInfo.dwDecodedHSize;
	break;

	case jcftYUV422:
		dwYBufferSize = sJPEGTableInfo.dwDecodedHSize * sJPEGTableInfo.sDSTMergeRect.wBlitHeight;

		if (sJPEGTableInfo.dwDecodedHSize % 16)
		{
			dwColorBytesPerLine = ((sJPEGTableInfo.dwDecodedHSize/16 + 1)*16 )/2;
		}
		else
		{
			dwColorBytesPerLine = ((sJPEGTableInfo.dwDecodedHSize/16 )*16 )/2;
		}
		dwUBufferSize = dwColorBytesPerLine * sJPEGTableInfo.sDSTMergeRect.wBlitHeight;
		dwVBufferSize = dwUBufferSize;
		IMG_TRACE("JPG : dwColorBytesPerLine = %d\n",dwColorBytesPerLine);

		//dwUBufferSize = sJPEGTableInfo.dwDecodedHSize * sJPEGTableInfo.sDSTMergeRect.wBlitHeight/2;
		//dwVBufferSize = sJPEGTableInfo.dwDecodedHSize * sJPEGTableInfo.sDSTMergeRect.wBlitHeight/2;
	break;

	case jcftYUV420:
		dwYBufferSize = sJPEGTableInfo.dwDecodedHSize * sJPEGTableInfo.sDSTMergeRect.wBlitHeight;

		if (sJPEGTableInfo.dwDecodedHSize % 16)
		{
			dwColorBytesPerLine = ((sJPEGTableInfo.dwDecodedHSize/16 + 1)*16 )/4;
		}
		else
		{
			dwColorBytesPerLine = ((sJPEGTableInfo.dwDecodedHSize/16 )*16 )/4;
		}

		dwJPEGDecodedHeight = sJPEGTableInfo.sDSTMergeRect.wBlitHeight;
		if (dwJPEGDecodedHeight % 16)
		{
			dwJPEGDecodedHeight = (dwJPEGDecodedHeight/16 + 1)*16;
		}
		else
		{
			dwJPEGDecodedHeight = (dwJPEGDecodedHeight/16 )*16 ;
		}
#ifdef RICH_DEBUG_INFO
		IMG_TRACE("JPG : dwJPEGDecodedHeight = %d\n",dwJPEGDecodedHeight);
#endif

		dwUBufferSize = dwColorBytesPerLine * dwJPEGDecodedHeight;
		dwVBufferSize = dwUBufferSize;
#ifdef RICH_DEBUG_INFO
		IMG_TRACE("JPG : dwColorBytesPerLine = %d\n",dwColorBytesPerLine);
#endif

		//dwUBufferSize = sJPEGTableInfo.dwDecodedHSize * sJPEGTableInfo.sDSTMergeRect.wBlitHeight/4;
		//dwVBufferSize = sJPEGTableInfo.dwDecodedHSize * sJPEGTableInfo.sDSTMergeRect.wBlitHeight/4;
	break;
	default:
		ASSERT_ALWAYS();
	break;
	}


	// Check the overall size again if case the color format is not correctly.
	nFreeMemory = utilMemoryFreeSpaceGet();
	nRequireMemory = dwYBufferSize + dwUBufferSize + dwVBufferSize + line_pitch * 32;
	IMG_TRACE("JPG : nRequireMemory = %d\n",nRequireMemory);

	if (nRequireMemory < nFreeMemory)
	{
		int MergeBufffer_HWPool = 1;
		//IMG_TRACE("JPG :dwYBufferSize = %d\n",dwYBufferSize);
		//IMG_TRACE("JPG :dwUBufferSize = %d\n",dwUBufferSize);
		//IMG_TRACE("JPG :dwVBufferSize = %d\n",dwVBufferSize);

		pvJPEGYbuffer = utilMemoryMalloc(dwYBufferSize);
		pvJPEGUbuffer = utilMemoryMalloc(dwUBufferSize);
		pvJPEGVbuffer = utilMemoryMalloc(dwVBufferSize);

		sJPEGTableInfo.dwYStartAddr = (DWORD)utilPoolMgrPhyAddrGet(pvJPEGYbuffer);
		sJPEGTableInfo.dwCbStartAddr = (DWORD)utilPoolMgrPhyAddrGet(pvJPEGUbuffer);
		sJPEGTableInfo.dwCrStartAddr = (DWORD)utilPoolMgrPhyAddrGet(pvJPEGVbuffer);

		//IMG_TRACE("JPG :pvJPEGYbuffer = %p PHY  %p\n",pvJPEGYbuffer,sJPEGTableInfo.dwYStartAddr);
		//IMG_TRACE("JPG :pvJPEGUbuffer = %p PHY  %p\n",pvJPEGUbuffer,sJPEGTableInfo.dwCbStartAddr);
		//IMG_TRACE("JPG :pvJPEGVbuffer = %p PHY  %p\n",pvJPEGVbuffer,sJPEGTableInfo.dwCrStartAddr);

		//L_PRINT("sJPEGTableInfo.dwYStartAddr = %p\n",sJPEGTableInfo.dwYStartAddr);
		//L_PRINT("sJPEGTableInfo.dwCbStartAddr = %p\n",sJPEGTableInfo.dwCbStartAddr);
		//L_PRINT("sJPEGTableInfo.dwCrStartAddr = %p\n",sJPEGTableInfo.dwCrStartAddr);

		// step 2 : decode the JPEG,
		IMG_TRACE("JPG : skip decode, no filter  2\n");
		utilHWAccelJPEGDecode(&sJPEGTableInfo);

		// free JPEG Raw buffer
		if (JPEGRaw_Use_HWPool)
		{
			utilMemoryFree(pvJPEGRawdataPool);
		}

		// To calcuate how much memory resource we have...

		pbMergedBuffer = (unsigned char*)utilPoolMgrSystemPoolGet(spiY);
//        pbMergedBuffer = NULL;

		if (pbMergedBuffer != NULL)
		{
			// Use spiY as the merge buffer.
			IMG_TRACE("JPG : Use YUV Buffer as merge buffer\n");
			dwHWDecodeHeight = (utilPoolMgrSystemSizeGet(spiY) + utilPoolMgrSystemSizeGet(spiCB))/line_pitch;
			dwHWDecodeHeight = (dwHWDecodeHeight/2)*2;

#ifdef RICH_DEBUG_INFO
			IMG_TRACE("JPG : dwHWDecodeHeight = %d\n",dwHWDecodeHeight);
#endif
			MergeBufffer_HWPool = 0;
		}
		else
		{
			IMG_TRACE("JPG : Use HW Pool Buffer as merge buffer\n");
			nFreeMemory = utilMemoryFreeSpaceGet();
			dwHWDecodeHeight = ((nFreeMemory - 0x1000) / line_pitch) ;          // up to 256 lines

			if (dwHWDecodeHeight > 256)
			{
				dwHWDecodeHeight = 256;
			}
			else
			{
				dwHWDecodeHeight = (dwHWDecodeHeight/2)*2;
			}
#ifdef RICH_DEBUG_INFO
			IMG_TRACE("JPG : nFreeMemory = %d\n",nFreeMemory);
			IMG_TRACE("JPG : line_pitch * dwHWDecodeHeight = %d\n",line_pitch * dwHWDecodeHeight);
			IMG_TRACE("JPG : dwHWDecodeHeight = %d\n",dwHWDecodeHeight);
#endif
			pbMergedBuffer = utilMemoryMalloc((line_pitch * dwHWDecodeHeight));
#ifdef RICH_DEBUG_INFO
			IMG_TRACE("JPG : pbMergedBuffer = %p\n",pbMergedBuffer);
#endif
			MergeBufffer_HWPool = 1;
		}


		// step 3 : merge a piece
		sJPEGTableInfo.eMergeMode = mgmLITTLE_TO_LITTLE;

		sJPEGTableInfo.sDSTMergeRect.wBlitX = 0;
		sJPEGTableInfo.sDSTMergeRect.wBlitY = 0;
		sJPEGTableInfo.sDSTMergeRect.wWidth = sJPEGTableInfo.dwDecodedHSize;    //cinfo.output_width;
		sJPEGTableInfo.sDSTMergeRect.wBlitWidth = sJPEGTableInfo.dwDecodedHSize;    //cinfo.output_width;

		//sJPEGTableInfo.pvDST_ADDR =  utilPoolMgrPhyAddrGet(utilPoolMgrSystemPoolGet(spiFB2));
		sJPEGTableInfo.pvDST_ADDR =  utilPoolMgrPhyAddrGet(pbMergedBuffer);

		//IMG_TRACE("JPG :sJPEGTableInfo.pvDST_ADDR = %p\n",sJPEGTableInfo.pvDST_ADDR);
		//IMG_TRACE("JPG :sJPEGTableInfo.dwDecodedHSize = %d\n",sJPEGTableInfo.dwDecodedHSize);
		//IMG_TRACE("JPG :sJPEGTableInfo.sDSTMergeRect.wBlitHeight = %d\n",sJPEGTableInfo.sDSTMergeRect.wBlitHeight);
		//IMG_TRACE("cinfo.output_height = %d\n",cinfo.output_height);

		dwJPEGHeight = sJPEGTableInfo.sDSTMergeRect.wBlitHeight;

		// Scaling with software filter design
		{
		PW_IMAGE_FILTER_CONTEXT filter_context;
		int	i,j;
		int scan_line;

		filter_context.filters = filters;
		filter_context.image = surface;
		filter_context.clip.x = x;
		filter_context.clip.y = y;
		filter_context.clip.w = clip_w;
		filter_context.clip.h = clip_h;
		PW_FILTER_TOOLS_init(&filter_context);
		//////PW_FILTER_TOOLS_config(&filter_context, (int)cinfo.output_width, (int)cinfo.output_height, picfRGB);
		PW_FILTER_TOOLS_config(&filter_context, (int)sJPEGTableInfo.dwDecodedHSize, (int)dwJPEGHeight, picfRGB);

		scan_line = 0;

#ifdef RICH_DEBUG_INFO
		IMG_TRACE("JPG : Start HW Partial Decode\n");
#endif

		for (i = 0; i<dwJPEGHeight; i += dwHWDecodeHeight)
		{
			DWORD    dwMergedHeight;

			if  ((dwJPEGHeight - i) < dwHWDecodeHeight)
			{
				dwMergedHeight = dwJPEGHeight - i;
			}
			else
			{
				dwMergedHeight = dwHWDecodeHeight;
			}

			//IMG_TRACE("Merging   i = %d   dwMergedHeight = %d \n",i,dwMergedHeight);


			// Merged the info
			sJPEGTableInfo.sDSTMergeRect.wBlitX = 0;
			sJPEGTableInfo.sDSTMergeRect.wBlitY = 0;
			sJPEGTableInfo.sDSTMergeRect.wBlitHeight = dwMergedHeight;

	//		IMG_TRACE("sJPEGTableInfo.dwYStartAddr = %p\n",sJPEGTableInfo.dwYStartAddr);
	//		IMG_TRACE("sJPEGTableInfo.dwCbStartAddr = %p\n",sJPEGTableInfo.dwCbStartAddr);
	//		IMG_TRACE("sJPEGTableInfo.dwCrStartAddr = %p\n",sJPEGTableInfo.dwCrStartAddr);

			utilHWAccelMerge(&sJPEGTableInfo);

			sJPEGTableInfo.dwYStartAddr += (sJPEGTableInfo.dwDecodedHSize*dwMergedHeight);
			sJPEGTableInfo.dwCbStartAddr += (dwColorBytesPerLine*dwMergedHeight);
			sJPEGTableInfo.dwCrStartAddr += (dwColorBytesPerLine*dwMergedHeight);

			for (j=0; j < dwMergedHeight; j++)
			{
				if (PW_FILTER_TOOLS_sendPixels(&filter_context,
					(int)scan_line, (Uint8 *)&pbMergedBuffer[line_pitch*j], line_pitch, picfRGB) != 0)
				{
					break;
				}
				scan_line ++;
			}
		}
		PW_FILTER_TOOLS_finalize(&filter_context);
		}

#ifdef RICH_DEBUG_INFO
		IMG_TRACE("JPG : Try to free resource filter, not to init\n");
#endif
		// Free resource
		if (pvJPEGYbuffer)
		{
			utilMemoryFree(pvJPEGYbuffer);
		}
		if (pvJPEGUbuffer)
		{
			utilMemoryFree(pvJPEGUbuffer);
		}
		if (pvJPEGVbuffer)
		{
			utilMemoryFree(pvJPEGVbuffer);
		}
#ifdef RICH_DEBUG_INFO
		IMG_TRACE("JPG : Try to free pbMergedBuffer\n");
#endif
		if (MergeBufffer_HWPool && pbMergedBuffer)
		{
			utilMemoryFree(pbMergedBuffer);
		}
	}
	else
	{
		// Unable to support partial hardware decode
		status = 0;

		IMG_TRACE("JPG : Not enough memory to support partial hardware decode\n");
		// free JPEG Raw buffer
		if (JPEGRaw_Use_HWPool)
		{
			utilMemoryFree(pvJPEGRawdataPool);
		}
	}
	IMG_TRACE("JPG :---- Partial Hardware Decode Finished ----\n");
	// done...

	return status;
}

//
// Load JPEG files in partial hardware decode.   "PJP"
//
SDL_Surface *PW_IMG_LoadPJP_RW(SDL_RWops *src, SDL_Surface *target, SDL_Rect *rect, PW_IMAGE_FILTERS *filters)
{
	int start;
	struct jpeg_decompress_struct cinfo;
	JSAMPROW rowptr[1];
	SDL_Surface *volatile surface = NULL;
	struct my_error_mgr jerr;
	/* PW_SDL Spec. */
	int x, y, clip_w, clip_h;
	PW_IMAGE_FILTER_CONTEXT filter_context;
	BOOL bSwapOrder = FALSE;

	if ( !src ) {
		/* The error message has been set in SDL_RWFromFile */
		return NULL;
	}
	start = SDL_RWtell(src);

	if ( IMG_InitJPG() < 0 ) {
		return NULL;
	}

	/* PW_SDL Spec. */
	if (rect == NULL) {
		x = 0;
		y = 0;
		clip_w = 0;
		clip_h = 0;
	} else {
		x = rect->x;
		y = rect->y;
		clip_w = rect->w;
		clip_h = rect->h;
		if (clip_w < 0 || clip_h < 0)
			return NULL;
	}

	/* Allen K, init filters context */
	filter_context.filters = NULL;

	/* Create a decompression structure and load the JPEG header */
	cinfo.err = lib.jpeg_std_error(&jerr.errmgr);
	jerr.errmgr.error_exit = my_error_exit;
	jerr.errmgr.output_message = output_no_message;
	if(setjmp(jerr.escape)) {
		/* Allen K, clean up filters */
		if (filters != NULL && filter_context.filters == filters)
		{
			PW_FILTER_TOOLS_finalize(&filter_context);
		}
		/* If we get here, libjpeg found an error */
		lib.jpeg_destroy_decompress(&cinfo);
		if ( surface != NULL ) {
			if ( SDL_MUSTLOCK(surface) ) {
				SDL_UnlockSurface(surface);
			}
			if ( surface != target ) {
				SDL_FreeSurface(surface);
				surface = NULL;
			}
		}
		SDL_RWseek(src, start, SEEK_SET);
		IMG_QuitJPG();
		IMG_SetError("JPEG loading error");
		return NULL;
	}

	lib.jpeg_create_decompress(&cinfo);
	jpeg_SDL_RW_src(&cinfo, src);
	lib.jpeg_read_header(&cinfo, TRUE);

#ifdef RICH_DEBUG_INFO
	IMG_TRACE("JPG : cinfo.num_components = %d\n",cinfo.num_components);
#endif

	if(cinfo.num_components == 4)
	{

		/* Set 32-bit Raw output */
		cinfo.out_color_space = JCS_CMYK;
		cinfo.quantize_colors = FALSE;
		lib.jpeg_calc_output_dimensions(&cinfo);

		/* Check target surface format */
		if ( target != NULL )
		{
			if ( target->format->BitsPerPixel != 32 )
			{
				lib.jpeg_destroy_decompress(&cinfo);
				SDL_RWseek(src, start, SEEK_SET);
				IMG_SetError("JPEG image is not fitted with target surface");
				IMG_QuitJPG();
				return NULL;
			} else if ( filters != NULL )
			{
				lib.jpeg_destroy_decompress(&cinfo);
				SDL_RWseek(src, start, SEEK_SET);
				IMG_SetError("JPEG image 32bits filters is not supported");
				IMG_QuitJPG();
				return NULL;
			}
		}

    IMG_TRACE("JPG : cinfo.progressive_mode = %d\n",cinfo.progressive_mode);
	IMG_TRACE("JPG : cinfo.output_width = %d\n",cinfo.output_width);
	IMG_TRACE("JPG : cinfo.output_height = %d\n",cinfo.output_height);

// size chcking should be done for progressive format first.
#if PW_IMG_JPG_PROGRESSIVE_LIMITED
		if (cinfo.progressive_mode)
		{
			IMG_TRACE("JPG : Progressive\n");
			if ((cinfo.output_width > IMG_JPG_PROGRESSIVE_MAX_WIDTH ||
				cinfo.output_height > IMG_JPG_PROGRESSIVE_MAX_HEIGHT) ||
				(cinfo.output_width < IMG_JPG_PROGRESSIVE_MIN_WIDTH &&
				cinfo.output_height < IMG_JPG_PROGRESSIVE_MIN_HEIGHT))
			{
				lib.jpeg_destroy_decompress(&cinfo);
				SDL_RWseek(src, start, SEEK_SET);
				IMG_QuitJPG();
				IMG_SetError("JPEG image is not supported size\n");
				return NULL;
			}
		}
#endif

		if ((cinfo.output_width > IMG_JPG_MAX_WIDTH ||
			cinfo.output_height > IMG_JPG_MAX_HEIGHT) ||
			(cinfo.output_width < IMG_JPG_MIN_WIDTH ||
				cinfo.output_height < IMG_JPG_MIN_HEIGHT))
		{
			lib.jpeg_destroy_decompress(&cinfo);
			SDL_RWseek(src, start, SEEK_SET);
			IMG_QuitJPG();
			IMG_SetError("JPEG image is not supported size\n");
			return NULL;
		}

#if PW_IMG_JPG_SCALE
		{
			double scale_factor;
			int w, h;
			w = (target == NULL)? utilSystemMetricsGet(smScreenWidth): target->w;
			h = (target == NULL)? utilSystemMetricsGet(smScreenHeight): target->h;
			scale_factor = (double)cinfo.output_width / w;
			if (scale_factor > ((double) cinfo.output_height / h))
				scale_factor = (double)cinfo.output_height / h;
			cinfo.scale_denom = (unsigned int)scale_factor;
			if (cinfo.scale_denom == 0)
				cinfo.scale_denom = 1;
			lib.jpeg_calc_output_dimensions(&cinfo);
		}
#endif


		// Allen K, color order issue, THE PJP uses HW decoder so that the color components order must match to hardware design but not JPEG RGB order
		/* Allocate an output surface to hold the image */
		if ( target == NULL ) {
			surface = SDL_AllocSurface(SDL_SWSURFACE,
				cinfo.output_width, cinfo.output_height, 32,
#if defined(TOPAZ_ENHANCE)
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
				0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000);
#else
				0xFF000000, 0x00FF0000, 0x0000FF00, 0x000000FF);
#endif
#else
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
				0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000);
#else
				0x0000FF00, 0x00FF0000, 0xFF000000, 0x000000FF);
#endif
#endif
		} else {
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
			bSwapOrder = (target->format->Rmask != 0x000000FF);
#else
			bSwapOrder = (target->format->Rmask != 0xFF000000);
#endif
			surface = target;
		}
	} else {
		/* Set 24-bit RGB output */
#ifdef PW_FB_UYV444B
		cinfo.out_color_space = JCS_YCbCr;
#else
		cinfo.out_color_space = JCS_RGB;
#endif

		cinfo.quantize_colors = FALSE;
#ifdef FAST_JPEG
		cinfo.scale_num   = 1;
		cinfo.scale_denom = 1;
		cinfo.dct_method = JDCT_FASTEST;
		cinfo.do_fancy_upsampling = FALSE;
#endif
		lib.jpeg_calc_output_dimensions(&cinfo);

        IMG_TRACE("JPG : cinfo.progressive_mode = %d\n",cinfo.progressive_mode);
		IMG_TRACE("JPG : cinfo.output_width = %d\n",cinfo.output_width);
		IMG_TRACE("JPG : cinfo.output_height = %d\n",cinfo.output_height);

		/* Check target surface format */
		if ( target != NULL && target->format->BitsPerPixel != 24 ) {
			lib.jpeg_destroy_decompress(&cinfo);
			SDL_RWseek(src, start, SEEK_SET);
			IMG_QuitJPG();
			IMG_SetError("JPEG image is not fitted with target surface");
			return NULL;
		}

// size chcking should be done for progressive format first.
#if PW_IMG_JPG_PROGRESSIVE_LIMITED
		if (cinfo.progressive_mode)
		{
			IMG_TRACE("JPG : Progressive\n");
			if ((cinfo.output_width > IMG_JPG_PROGRESSIVE_MAX_WIDTH ||
				cinfo.output_height > IMG_JPG_PROGRESSIVE_MAX_HEIGHT) ||
				(cinfo.output_width < IMG_JPG_PROGRESSIVE_MIN_WIDTH &&
				cinfo.output_height < IMG_JPG_PROGRESSIVE_MIN_HEIGHT))
			{
				lib.jpeg_destroy_decompress(&cinfo);
				SDL_RWseek(src, start, SEEK_SET);
				IMG_QuitJPG();
				IMG_SetError("JPEG image is not supported size\n");
				IMG_TRACE("JPG : Not support progressive jpeg size\n");
				return NULL;
			}
		}
#endif

		if ((cinfo.output_width > IMG_JPG_MAX_WIDTH ||
			cinfo.output_height > IMG_JPG_MAX_HEIGHT) ||
			(cinfo.output_width < IMG_JPG_MIN_WIDTH ||
				cinfo.output_height < IMG_JPG_MIN_HEIGHT))
		{
			lib.jpeg_destroy_decompress(&cinfo);
			SDL_RWseek(src, start, SEEK_SET);
			IMG_QuitJPG();
			IMG_SetError("JPEG image is not supported size\n");
			return NULL;
		}


#if PW_IMG_JPG_SCALE
		{
			double scale_factor;
			int w, h;
			w = (target == NULL)? utilSystemMetricsGet(smScreenWidth): target->w;
			h = (target == NULL)? utilSystemMetricsGet(smScreenHeight): target->h;
			scale_factor = (double)cinfo.output_width / w;
			if (scale_factor > ((double) cinfo.output_height / h))
				scale_factor = (double)cinfo.output_height / h;
			cinfo.scale_denom = (unsigned int)scale_factor;
			if (cinfo.scale_denom == 0)
				cinfo.scale_denom = 1;
			lib.jpeg_calc_output_dimensions(&cinfo);
		}
#endif

		/* Allocate an output surface to hold the image */
		if ( target == NULL) {
			surface = SDL_AllocSurface(SDL_SWSURFACE,
					cinfo.output_width, cinfo.output_height, 24,
#if defined(TOPAZ_ENHANCE)
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
				0xFF0000, 0x00FF00, 0x0000FF,
#else
				0x0000FF, 0x00FF00, 0xFF0000,
#endif
#else
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
				0x0000FF, 0x00FF00, 0xFF0000,
#else
				0xFF0000, 0x00FF00, 0x0000FF,
#endif
#endif
						   0);
		} else {
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
			bSwapOrder = (target->format->Rmask != 0xFF0000);
#else
			bSwapOrder = (target->format->Rmask != 0x0000FF);
#endif
			surface = target;
		}
	}

	if ( surface == NULL ) {
		lib.jpeg_destroy_decompress(&cinfo);
		SDL_RWseek(src, start, SEEK_SET);
		IMG_QuitJPG();
		IMG_SetError("Out of memory");
		IMG_TRACE("JPG : not enough memory\n");
		return NULL;
	}

	if ( SDL_MUSTLOCK(surface) ) {
		if ( SDL_LockSurface(surface) < 0 ) {
			lib.jpeg_destroy_decompress(&cinfo);
			SDL_RWseek(src, start, SEEK_SET);
			IMG_QuitJPG();
			IMG_SetError("Cannot Lock Surface");
			if ( surface != target ) {
				SDL_FreeSurface(surface);
				surface = NULL;
			}
			return NULL;
		}
	}

	/* Decompress the image */
	lib.jpeg_start_decompress(&cinfo);

	/* Allen K, try to generate a exception to test if we can succeed to recovery the exceptions */
#if 0
	/* enable the code via debugger */
	if (0)
		my_error_exit((j_common_ptr)&cinfo);
#endif

	/* PW_SDL Spec. */
	if (clip_w == 0) clip_w = surface->w;
	if (clip_h == 0) clip_h = surface->h;

#ifdef RICH_DEBUG_INFO
	IMG_TRACE("JPG : Try to decode\n");
#endif

	if (clip_w > 0 && clip_h > 0)
	{
		int line_pitch = cinfo.output_width * surface->format->BytesPerPixel;
		void * temp_line_buf = malloc(line_pitch);
		if ( temp_line_buf == NULL )
		{
			if ( SDL_MUSTLOCK(surface) )
			{
				SDL_UnlockSurface(surface);
			}
			if ( surface != target )
			{
				SDL_FreeSurface(surface);
				surface = NULL;
			}
			lib.jpeg_destroy_decompress(&cinfo);
			SDL_RWseek(src, start, SEEK_SET);
			IMG_QuitJPG();
			IMG_SetError("Out of memory");
			IMG_TRACE("JPG : Not support size\n");
			return NULL;
		} else
		{
			Uint8 * pixels = (Uint8 *)surface->pixels + surface->pitch * y + surface->format->BytesPerPixel * x;

			IMG_TRACE("JPG : filters = %p\n",filters);
//			if (filters != NULL)
			{
#if defined(TOPAZ) || defined(TOPAZ_ENHANCE)
				if (hpdHW_PARTIAL_DECODE == utilHWAccelHWPartialDecodeGet())
				{
					// Replaced by hardware + software solution.
					if (!Topaz_PartialHardwareDecode(src,surface,x,y,clip_w,clip_h,filters))
					{
						IMG_TRACE("JPG : Partial Hardware Decode Failed.\n");
						// Unable to support partial decode
						if (SDL_MUSTLOCK(surface) )
						{
							IMG_TRACE("JPG : Unlock.\n");
							SDL_UnlockSurface(surface);
						}

						// The surface is not from upper layer, free it.
						if ( surface != target )
						{
							SDL_FreeSurface(surface);
						}
						surface = NULL;

					}
				}else
#endif
				{
					// Replaced by hardware + software solution.
					if (!PartialHardwareDecode(src,surface,x,y,clip_w,clip_h,filters))
					{
						IMG_TRACE("JPG : Partial Hardware Decode Failed.\n");
						// Unable to support partial decode
						if (SDL_MUSTLOCK(surface) )
						{
							IMG_TRACE("JPG : Unlock.\n");
							SDL_UnlockSurface(surface);
						}
						SDL_FreeSurface(surface);
						surface = NULL;

					}
				}

				// Original filter design
#if 0
				filter_context.filters = filters;
				filter_context.image = surface;
				filter_context.clip.x = x;
				filter_context.clip.y = y;
				filter_context.clip.w = clip_w;
				filter_context.clip.h = clip_h;
				PW_FILTER_TOOLS_init(&filter_context);
				PW_FILTER_TOOLS_config(&filter_context, (int)cinfo.output_width, (int)cinfo.output_height, picfRGB);
				while ( (int)cinfo.output_scanline < cinfo.output_height )
				{
					rowptr[0] = (JSAMPROW)temp_line_buf;

					IMG_TRACE("JPG : cinfo.output_scanline = %d , cinfo.output_height = %d\n",cinfo.output_scanline,cinfo.output_height);

					lib.jpeg_read_scanlines(&cinfo, rowptr, (JDIMENSION) 1);
					if (PW_FILTER_TOOLS_sendPixels(&filter_context,
						(int)cinfo.output_scanline - 1, (Uint8 *)temp_line_buf, line_pitch, picfRGB) != 0)
						break;
				}
				PW_FILTER_TOOLS_finalize(&filter_context);
#endif

			}
#if 0 // No filter, use partial HW decode
			else {
				int temp_line_pitch;
				clip_w = min(clip_w, (int)cinfo.output_width);
				clip_h = min(clip_h, (int)cinfo.output_height);
				clip_w = min(clip_w, surface->w - x);
				clip_h = min(clip_h, surface->h - y);
				temp_line_pitch= clip_w * surface->format->BytesPerPixel;
				while ( (int)cinfo.output_scanline < clip_h )
				{
					rowptr[0] = (JSAMPROW)temp_line_buf;
					lib.jpeg_read_scanlines(&cinfo, rowptr, (JDIMENSION) 1);
					memcpy(pixels, temp_line_buf, temp_line_pitch);
					pixels += surface->pitch;
				}
			}
#endif
			IMG_TRACE("JPG : Exit from decode\n");
			free(temp_line_buf);
		}
	}

	if (surface != NULL && SDL_MUSTLOCK(surface) ) {
		SDL_UnlockSurface(surface);
	}

	if (cinfo.output_scanline >= cinfo.output_height)
		lib.jpeg_finish_decompress(&cinfo);
	lib.jpeg_destroy_decompress(&cinfo);

	IMG_QuitJPG();

	return(surface);
}

/* Load a JPEG type image from an SDL datasource */
SDL_Surface *PW_IMG_LoadJPG_RW(SDL_RWops *src, SDL_Surface *target, SDL_Rect *rect, PW_IMAGE_FILTERS *filters)
{
	int start;
	struct jpeg_decompress_struct cinfo;
	JSAMPROW rowptr[1];
	SDL_Surface *volatile surface = NULL;
	struct my_error_mgr jerr;
	/* PW_SDL Spec. */
	int x, y, clip_w, clip_h;
	PW_IMAGE_FILTER_CONTEXT filter_context;
	BOOL bSwapOrder = FALSE;

	if ( !src ) {
		/* The error message has been set in SDL_RWFromFile */
		return NULL;
	}
	start = SDL_RWtell(src);

	if ( IMG_InitJPG() < 0 ) {
		return NULL;
	}

	/* PW_SDL Spec. */
	if (rect == NULL) {
		x = 0;
		y = 0;
		clip_w = 0;
		clip_h = 0;
	} else {
		x = rect->x;
		y = rect->y;
		clip_w = rect->w;
		clip_h = rect->h;
		if (clip_w < 0 || clip_h < 0)
			return NULL;
	}

	/* Allen K, init filters context */
	filter_context.filters = NULL;

	/* Create a decompression structure and load the JPEG header */
	cinfo.err = lib.jpeg_std_error(&jerr.errmgr);
	jerr.errmgr.error_exit = my_error_exit;
	jerr.errmgr.output_message = output_no_message;
	if(setjmp(jerr.escape)) {
		/* Allen K, clean up filters */
		if (filters != NULL && filter_context.filters == filters)
		{
			PW_FILTER_TOOLS_finalize(&filter_context);
		}
		/* If we get here, libjpeg found an error */
		lib.jpeg_destroy_decompress(&cinfo);
		if ( surface != NULL ) {
			if ( SDL_MUSTLOCK(surface) ) {
				SDL_UnlockSurface(surface);
			}
			if ( surface != target ) {
				SDL_FreeSurface(surface);
				surface = NULL;
			}
		}
		SDL_RWseek(src, start, SEEK_SET);
		IMG_QuitJPG();
		IMG_SetError("JPEG loading error");
		return NULL;
	}

	lib.jpeg_create_decompress(&cinfo);
	jpeg_SDL_RW_src(&cinfo, src);
	lib.jpeg_read_header(&cinfo, TRUE);

	IMG_TRACE("JPG : cinfo.num_components = %d\n",cinfo.num_components);

	if(cinfo.num_components == 4) {

		/* Set 32-bit Raw output */
		cinfo.out_color_space = JCS_CMYK;
		cinfo.quantize_colors = FALSE;
		lib.jpeg_calc_output_dimensions(&cinfo);

		/* Check target surface format */
		if ( target != NULL ) {
			if ( target->format->BitsPerPixel != 32 ) {
				lib.jpeg_destroy_decompress(&cinfo);
				SDL_RWseek(src, start, SEEK_SET);
				IMG_SetError("JPEG image is not fitted with target surface");
				IMG_QuitJPG();
				return NULL;
			} else if ( filters != NULL ) {
				lib.jpeg_destroy_decompress(&cinfo);
				SDL_RWseek(src, start, SEEK_SET);
				IMG_SetError("JPEG image 32bits filters is not supported");
				IMG_QuitJPG();
				return NULL;
			}
		}

    IMG_TRACE("JPG : cinfo.progressive_mode = %d\n",cinfo.progressive_mode);
	IMG_TRACE("JPG : cinfo.output_width = %d\n",cinfo.output_width);
	IMG_TRACE("JPG : cinfo.output_height = %d\n",cinfo.output_height);

// size chcking should be done for progressive format first.
#if PW_IMG_JPG_PROGRESSIVE_LIMITED
		if (cinfo.progressive_mode)
		{
			IMG_TRACE("JPG : Progressive\n");
			if ((cinfo.output_width > IMG_JPG_PROGRESSIVE_MAX_WIDTH ||
				cinfo.output_height > IMG_JPG_PROGRESSIVE_MAX_HEIGHT) ||
				(cinfo.output_width < IMG_JPG_PROGRESSIVE_MIN_WIDTH &&
				cinfo.output_height < IMG_JPG_PROGRESSIVE_MIN_HEIGHT))
			{
				lib.jpeg_destroy_decompress(&cinfo);
				SDL_RWseek(src, start, SEEK_SET);
				IMG_QuitJPG();
				IMG_SetError("JPEG image is not supported size\n");
				return NULL;
			}
		}
#endif

		if ((cinfo.output_width > IMG_JPG_MAX_WIDTH ||
			cinfo.output_height > IMG_JPG_MAX_HEIGHT) ||
			(cinfo.output_width < IMG_JPG_MIN_WIDTH &&
				cinfo.output_height < IMG_JPG_MIN_HEIGHT))
		{
			lib.jpeg_destroy_decompress(&cinfo);
			SDL_RWseek(src, start, SEEK_SET);
			IMG_QuitJPG();
			IMG_SetError("JPEG image is not supported size\n");
			return NULL;
		}

#if PW_IMG_JPG_SCALE
		{
			double scale_factor;
			int w, h;
			w = (target == NULL)? utilSystemMetricsGet(smScreenWidth): target->w;
			h = (target == NULL)? utilSystemMetricsGet(smScreenHeight): target->h;
			scale_factor = (double)cinfo.output_width / w;
			if (scale_factor > ((double) cinfo.output_height / h))
				scale_factor = (double)cinfo.output_height / h;
			cinfo.scale_denom = (unsigned int)scale_factor;
			if (cinfo.scale_denom == 0)
				cinfo.scale_denom = 1;
			lib.jpeg_calc_output_dimensions(&cinfo);
		}
#endif

		/* Allocate an output surface to hold the image */
		if ( target == NULL ) {
			surface = SDL_AllocSurface(SDL_SWSURFACE,
				cinfo.output_width, cinfo.output_height, 32,
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
						   0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000);
#else
						   0x0000FF00, 0x00FF0000, 0xFF000000, 0x000000FF);
#endif
		} else {
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
			bSwapOrder = (target->format->Rmask != 0x00FF0000);
#else
			bSwapOrder = (target->format->Rmask != 0x0000FF00);
#endif
			surface = target;
		}
	} else {
		/* Set 24-bit RGB output */
#ifdef PW_FB_UYV444B
		cinfo.out_color_space = JCS_YCbCr;
#else
		cinfo.out_color_space = JCS_RGB;
#endif

		cinfo.quantize_colors = FALSE;
#ifdef FAST_JPEG
		cinfo.scale_num   = 1;
		cinfo.scale_denom = 1;
		cinfo.dct_method = JDCT_FASTEST;
		cinfo.do_fancy_upsampling = FALSE;
#endif
		lib.jpeg_calc_output_dimensions(&cinfo);

	    IMG_TRACE("JPG : cinfo.progressive_mode = %d\n",cinfo.progressive_mode);
		IMG_TRACE("JPG : cinfo.output_width = %d\n",cinfo.output_width);
		IMG_TRACE("JPG : cinfo.output_height = %d\n",cinfo.output_height);

		/* Check target surface format */
		if ( target != NULL && target->format->BitsPerPixel != 24 ) {
			lib.jpeg_destroy_decompress(&cinfo);
			SDL_RWseek(src, start, SEEK_SET);
			IMG_QuitJPG();
			IMG_SetError("JPEG image is not fitted with target surface");
			return NULL;
		}

// size chcking should be done for progressive format first.
#if PW_IMG_JPG_PROGRESSIVE_LIMITED
		if (cinfo.progressive_mode)
		{
			IMG_TRACE("JPG : Progressive\n");
			if ((cinfo.output_width > IMG_JPG_PROGRESSIVE_MAX_WIDTH ||
				cinfo.output_height > IMG_JPG_PROGRESSIVE_MAX_HEIGHT) ||
				(cinfo.output_width < IMG_JPG_PROGRESSIVE_MIN_WIDTH &&
				cinfo.output_height < IMG_JPG_PROGRESSIVE_MIN_HEIGHT))
			{
				lib.jpeg_destroy_decompress(&cinfo);
				SDL_RWseek(src, start, SEEK_SET);
				IMG_QuitJPG();
				IMG_SetError("JPEG image is not supported size\n");
				IMG_TRACE("JPG : Not support progressive jpeg size\n");
				return NULL;
			}
		}
#endif

		if ((cinfo.output_width > IMG_JPG_MAX_WIDTH ||
			cinfo.output_height > IMG_JPG_MAX_HEIGHT) ||
			(cinfo.output_width < IMG_JPG_MIN_WIDTH &&
				cinfo.output_height < IMG_JPG_MIN_HEIGHT))
		{
			lib.jpeg_destroy_decompress(&cinfo);
			SDL_RWseek(src, start, SEEK_SET);
			IMG_QuitJPG();
			IMG_SetError("JPEG image is not supported size\n");
			return NULL;
		}


#if PW_IMG_JPG_SCALE
		{
			double scale_factor;
			int w, h;
			w = (target == NULL)? utilSystemMetricsGet(smScreenWidth): target->w;
			h = (target == NULL)? utilSystemMetricsGet(smScreenHeight): target->h;
			scale_factor = (double)cinfo.output_width / w;
			if (scale_factor > ((double) cinfo.output_height / h))
				scale_factor = (double)cinfo.output_height / h;
			cinfo.scale_denom = (unsigned int)scale_factor;
			if (cinfo.scale_denom == 0)
				cinfo.scale_denom = 1;
			lib.jpeg_calc_output_dimensions(&cinfo);
		}
#endif

		/* Allocate an output surface to hold the image */
		if ( target == NULL) {
			surface = SDL_AllocSurface(SDL_SWSURFACE,
					cinfo.output_width, cinfo.output_height, 24,
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
						   0x0000FF, 0x00FF00, 0xFF0000,
#else
						   0xFF0000, 0x00FF00, 0x0000FF,
#endif
						   0);
		} else {
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
			bSwapOrder = (target->format->Rmask != 0x0000FF);
#else
			bSwapOrder = (target->format->Rmask != 0xFF0000);
#endif
			surface = target;
		}
	}

	if ( surface == NULL ) {
		lib.jpeg_destroy_decompress(&cinfo);
		SDL_RWseek(src, start, SEEK_SET);
		IMG_QuitJPG();
		IMG_SetError("Out of memory");
		IMG_TRACE("JPG : not enough memory\n");
		return NULL;
	}

	if ( SDL_MUSTLOCK(surface) ) {
		if ( SDL_LockSurface(surface) < 0 ) {
			lib.jpeg_destroy_decompress(&cinfo);
			SDL_RWseek(src, start, SEEK_SET);
			IMG_QuitJPG();
			IMG_SetError("Cannot Lock Surface");
			if ( surface != target ) {
				SDL_FreeSurface(surface);
				surface = NULL;
			}
			return NULL;
		}
	}

	/* Decompress the image */
	lib.jpeg_start_decompress(&cinfo);

	/* Allen K, try to generate a exception to test if we can succeed to recovery the exceptions */
#if 0
	/* enable the code via debugger */
	if (0)
		my_error_exit((j_common_ptr)&cinfo);
#endif

	/* PW_SDL Spec. */
	if (clip_w == 0) clip_w = surface->w;
	if (clip_h == 0) clip_h = surface->h;

	IMG_TRACE("JPG : Try to decode\n");

	if (clip_w > 0 && clip_h > 0)
	{
		int line_pitch = cinfo.output_width * surface->format->BytesPerPixel;
		void * temp_line_buf = malloc(line_pitch);
		if ( temp_line_buf == NULL )
		{
			if ( SDL_MUSTLOCK(surface) )
			{
				SDL_UnlockSurface(surface);
			}
			if ( surface != target )
			{
				SDL_FreeSurface(surface);
				surface = NULL;
			}
			lib.jpeg_destroy_decompress(&cinfo);
			SDL_RWseek(src, start, SEEK_SET);
			IMG_QuitJPG();
			IMG_SetError("Out of memory");
			IMG_TRACE("JPG : Not support size\n");
			return NULL;
		} else
		{
			Uint8 * pixels = (Uint8 *)surface->pixels + surface->pitch * y + surface->format->BytesPerPixel * x;

			IMG_TRACE("JPG : filters = %p\n",filters);
			if (filters != NULL)
			{
				filter_context.filters = filters;
				filter_context.image = surface;
				filter_context.clip.x = x;
				filter_context.clip.y = y;
				filter_context.clip.w = clip_w;
				filter_context.clip.h = clip_h;
				PW_FILTER_TOOLS_init(&filter_context);
				PW_FILTER_TOOLS_config(&filter_context, (int)cinfo.output_width, (int)cinfo.output_height, picfRGB);
				while ( (int)cinfo.output_scanline < cinfo.output_height )
				{
					rowptr[0] = (JSAMPROW)temp_line_buf;
					lib.jpeg_read_scanlines(&cinfo, rowptr, (JDIMENSION) 1);

					if (bSwapOrder)
					{
						Uint8 * pixels = (Uint8 *)temp_line_buf;

						// 32 bits
						if (cinfo.num_components == 4)
						{
							int offset;
							Uint8 b;

							for (offset = 0; offset < line_pitch; offset += 4)
							{
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
								// little, swap 1, 3
								b = *(pixels + offset);
								*(pixels + offset) = *(pixels + offset + 2);
								*(pixels + offset + 2) = b;
#else
								// little, swap 2, 4
								b = *(pixels + offset + 1);
								*(pixels + offset + 1) = *(pixels + offset + 3);
								*(pixels + offset + 3) = b;
#endif
							}
						}
						// 24 bits
						else
						{
							int offset;
							Uint8 b;
							for (offset = 0; offset < line_pitch; offset += 3)
							{
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
								// little, swap 1, 3
								b = *(pixels + offset);
								*(pixels + offset) = *(pixels + offset + 2);
								*(pixels + offset + 2) = b;
#else
								// little, swap 1, 3
								b = *(pixels + offset);
								*(pixels + offset) = *(pixels + offset + 2);
								*(pixels + offset + 2) = b;
#endif
							}
						}
					}

					if (PW_FILTER_TOOLS_sendPixels(&filter_context,
						(int)cinfo.output_scanline - 1, (Uint8 *)temp_line_buf, line_pitch, picfRGB) != 0)
						break;
				}
				PW_FILTER_TOOLS_finalize(&filter_context);
			} else {
				int temp_line_pitch;
				clip_w = min(clip_w, (int)cinfo.output_width);
				clip_h = min(clip_h, (int)cinfo.output_height);
				clip_w = min(clip_w, surface->w - x);
				clip_h = min(clip_h, surface->h - y);
				temp_line_pitch= clip_w * surface->format->BytesPerPixel;
				if (bSwapOrder)
				{
					// 32 bits
					if (cinfo.num_components == 4)
					{
						while ((int)cinfo.output_scanline < clip_h)
						{
							rowptr[0] = (JSAMPROW)temp_line_buf;
							lib.jpeg_read_scanlines(&cinfo, rowptr, (JDIMENSION)1);
							memcpy(pixels, temp_line_buf, temp_line_pitch);
							{
								int offset;
								Uint8 b;

								for (offset = 0; offset < temp_line_pitch; offset += 4)
								{
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
									// little, swap 1, 3
									b = *(pixels + offset);
									*(pixels + offset) = *(pixels + offset + 2);
									*(pixels + offset + 2) = b;
#else
									// little, swap 2, 4
									b = *(pixels + offset + 1);
									*(pixels + offset + 1) = *(pixels + offset + 3);
									*(pixels + offset + 3) = b;
#endif
								}
							}
							pixels += surface->pitch;
						}
					}
					// 24 bits
					else
					{
						while ((int)cinfo.output_scanline < clip_h)
						{
							rowptr[0] = (JSAMPROW)temp_line_buf;
							lib.jpeg_read_scanlines(&cinfo, rowptr, (JDIMENSION)1);
							memcpy(pixels, temp_line_buf, temp_line_pitch);
							printf("......line swap\n");
							{
								int offset;
								Uint8 b;
								for (offset = 0; offset < temp_line_pitch; offset+=3)
								{
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
									// little, swap 1, 3
									b = *(pixels + offset);
									*(pixels + offset) = *(pixels + offset + 2);
									*(pixels + offset + 2) = b;
#else
									// little, swap 1, 3
									b = *(pixels + offset);
									*(pixels + offset) = *(pixels + offset + 2);
									*(pixels + offset + 2) = b;
#endif
								}
							}
							pixels += surface->pitch;
						}
					}
				}
				else
				{
					while ((int)cinfo.output_scanline < clip_h)
					{
						rowptr[0] = (JSAMPROW)temp_line_buf;
						lib.jpeg_read_scanlines(&cinfo, rowptr, (JDIMENSION)1);
						memcpy(pixels, temp_line_buf, temp_line_pitch);
						pixels += surface->pitch;
					}
				}
			}
			IMG_TRACE("JPG : Exit from decode\n");
			free(temp_line_buf);
		}
	}

	if ( SDL_MUSTLOCK(surface) ) {
		SDL_UnlockSurface(surface);
	}

	if (cinfo.output_scanline >= cinfo.output_height)
		lib.jpeg_finish_decompress(&cinfo);
	lib.jpeg_destroy_decompress(&cinfo);

	IMG_QuitJPG();

	return(surface);
}

#else

/* See if an image is contained in a data source */
int IMG_isJPG(SDL_RWops *src)
{
	return(0);
}

/* Load a JPEG type image from an SDL datasource */
SDL_Surface *IMG_LoadJPG_RW(SDL_RWops *src,int type)
{
	return(NULL);
}

SDL_Surface *PW_IMG_LoadJPG_RW(SDL_RWops *src, SDL_Surface *target, SDL_Rect *rect, PW_IMAGE_FILTERS *filters)
{
	return(NULL);
}

#endif /* LOAD_JPG */
