#include "config.h"

#include <GL/glew.h>
#include <memory.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "texture_conversion.h"
#include "debug/tracers.h"

#include "system/types.h"

extern uint8	bp_tmem[0x10000];
extern uint32	bp_tex_addr;
extern uint32	bp_tlut_addr;
extern uint32	bp_tmem_maddr;

extern uint8	bp_tlut_rgba32[4 * 256];

extern uint8	tex_tlut_rgba32[4 * 256];


static inline uint32 byteswap32(uint32 data)
{
	__asm
	{
		mov	eax, data
			bswap eax
	}
}

static inline uint16 byteswap16(uint16 data)
{
	return (data<<8)|(data>>8);
}

//==================================================================
/*static void intensity4_to_rgb24( uint8 *srcp, uint8 *desp, int dpitch, int w, int h )
{
int		i, j, col1, col2;
uint8	*desp2;

	for (i=h; i > 0; --i)
	{
		desp2 = desp;
		for (j=w/2; j > 0; --j)
		{
			col1 = (*srcp <<   4);
			col2 = (*srcp & 0xf0);
			++srcp;
			desp2[0] = desp2[1] = desp2[2] = col1;	desp2 += 3;
			desp2[0] = desp2[1] = desp2[2] = col2;	desp2 += 3;
		}
		desp += dpitch;
	}
}
*/
/*static void intensity4_to_rgb24( uint32 *srcp, uint8 *desp, int dpitch, int w, int h )
{
	uint8	*desp2;
	uint32 *srcp2 = (uint32 *)srcp;
	int i, j, k, l;
	uint32 col;

	for(i = 0 ; i < h ; i+=8)
	{
		for(j = 0 ; j < w ; j+=8)
		{
			for(k = 0 ; k < 8 ; k++)
			{
				desp2 = desp + ((i + k) * dpitch) + (3 * j);
				col = *srcp2++;
				col = byteswap32(col);
				for(l = 0 ; l < 8 ; l++)
				{
					//printf("%08x ", col);
					desp2[0] = (col << 4) & 0xf0;
					desp2[1] = (col << 4) & 0xf0;
					desp2[2] = (col << 4) & 0xf0;
					desp2+=3;
					col >>= 4;
				}
			}
		}
	}
}
*/
static void i4_to_rgb32( uint32 *srcp, uint8 *desp, int dpitch, int w, int h )
{
	uint8	*desp2;
	uint32 *srcp2 = (uint32 *)srcp;
	int i, j, k, l;
	uint32 col;

	for(i = 0 ; i < h ; i+=8)
	{
		for(j = 0 ; j < w ; j+=8)
		{
			for(k = 0 ; k < 8 ; k++)
			{
				desp2 = desp + ((i + k) * dpitch) + (4 * j);
				col = *srcp2++;
				col = byteswap32(col);
				for(l = 0 ; l < 8 ; l++)
				{
					//printf("%04x ", col);
					desp2[0] = (col >> 24) & 0xf0;
					desp2[1] = (col >> 24) & 0xf0;
					desp2[2] = (col >> 24) & 0xf0;
					desp2[3] = 0xff;
					desp2+=4;
					col <<= 4;
				}
			}
		}
	}
}

static void i4_to_i8( uint32 *srcp, uint8 *desp, int dpitch, int w, int h )
{
	uint8	*desp2;
	uint32 *srcp2 = (uint32 *)srcp;
	int i, j, k, l;
	uint32 col;

	for(i = 0 ; i < h ; i+=8)
	{
		for(j = 0 ; j < w ; j+=8)
		{
			for(k = 0 ; k < 8 ; k++)
			{
				desp2 = desp + ((i + k) * dpitch) + (1 * j);
				col = *srcp2++;
				col = byteswap32(col);
				for(l = 0 ; l < 8 ; l++)
				{
					//printf("%04x ", col);
					desp2[0] = (col >> 24) & 0xf0;
					desp2++;
					col <<= 4;
				}
			}
		}
	}
}


static void i8_to_rgb32( uint32 *srcp, uint8 *desp, int dpitch, int w, int h )
{
	uint8	*desp2;
	uint32 *srcp2 = (uint32 *)srcp;
	int i, j, k, l, n;
	uint32 col;

	for(i = 0 ; i < h ; i+=4)
	{
		for(j = 0 ; j < w ; j+=8)
		{
			for(k = 0 ; k < 4 ; k++)
			{
				desp2 = desp + ((i + k) * dpitch) + (4 * j);
				for(n = 0 ; n < 2 ; n++)
				{
					col = *srcp2++;
					col = byteswap32(col);
					for(l = 0 ; l < 4 ; l++)
					{
						//printf("%04x ", col);
						desp2[0] = (col >> 24) & 0xff;
						desp2[1] = (col >> 24) & 0xff;
						desp2[2] = (col >> 24) & 0xff;
						desp2[3] = 0xff;
						desp2+=4;
						col <<= 8;
					}
				}
			}
		}
	}
}

static void i8_to_i8( uint32 *srcp, uint8 *desp, int dpitch, int w, int h )
{
	uint8	*desp2;
	uint32 *srcp2 = (uint32 *)srcp;
	int i, j, k, l, n;
	uint32 col;

	for(i = 0 ; i < h ; i+=4)
	{
		for(j = 0 ; j < w ; j+=8)
		{
			for(k = 0 ; k < 4 ; k++)
			{
				desp2 = desp + ((i + k) * dpitch) + (1 * j);
				for(n = 0 ; n < 2 ; n++)
				{
					col = *srcp2++;
					col = byteswap32(col);
					for(l = 0 ; l < 4 ; l++)
					{
						//printf("%04x ", col);
						desp2[0] = (col >> 24) & 0xff;
						desp2++;
						col <<= 8;
					}
				}
			}
		}
	}
}


static void ia4_to_rgb32( uint32 *srcp, uint8 *desp, int dpitch, int w, int h )
{
	uint8	*desp2;
	uint32 *srcp2 = (uint32 *)srcp;
	int i, j, k, l, n;
	uint32 col;

	for(i = 0 ; i < h ; i+=4)
	{
		for(j = 0 ; j < w ; j+=8)
		{
			for(k = 0 ; k < 4 ; k++)
			{
				desp2 = desp + ((i + k) * dpitch) + (4 * j);
				for(n = 0 ; n < 2 ; n++)
				{
					col = *srcp2++;
					col = byteswap32(col);
					for(l = 0 ; l < 4 ; l++)
					{
						desp2[0] = (col >> 20) & 0xf0;
						desp2[1] = (col >> 20) & 0xf0;
						desp2[2] = (col >> 20) & 0xf0;
						desp2[3] = (col >> 24) & 0xf0;
						desp2+=4;
						col <<= 8;
					}
				}
			}
		}
	}
}

static void ia8_to_rgba32( uint32 *srcp, uint8 *desp, int dpitch, int w, int h )
{
	uint8	*desp2;
	uint32 *srcp2 = (uint32 *)srcp;
	int i, j, k, l, n;
	uint32 col;

	for(i = 0 ; i < h ; i+=4)
	{
		for(j = 0 ; j < w ; j+=4)
		{
			for(k = 0 ; k < 4 ; k++)
			{
				desp2 = desp + ((i + k) * dpitch) + (4 * j);
				for(n = 0 ; n < 2 ; n++)
				{
					col = *srcp2++;
					col = byteswap32(col);
					for(l = 0 ; l < 2 ; l++)
					{
						desp2[0] = (col >> 16) & 0xff;
						desp2[1] = (col >> 16) & 0xff;
						desp2[2] = (col >> 16) & 0xff;
						desp2[3] = (col >> 24) & 0xff;
						desp2+=4;
						col <<= 16;
					}
				}
			}
		}
	}
}

static void rgb565_to_rgba32( uint32 *srcp, uint8 *desp, int dpitch, int w, int h )
{
	uint8	*desp2;
	uint32 *srcp2 = (uint32 *)srcp;
	int i, j, k, l, n;
	uint32 col;

	for(i = 0 ; i < h ; i+=4)
	{
		for(j = 0 ; j < w ; j+=4)
		{
			for(k = 0 ; k < 4 ; k++)
			{
				desp2 = desp + ((i + k) * dpitch) + (4 * j);
				for(n = 0 ; n < 2 ; n++)
				{
					col = *srcp2++;
					col = byteswap32(col);
					for(l = 0 ; l < 2 ; l++)
					{
						desp2[0] = (col >> 24) & 0xf8;
						desp2[1] = (col >> 19) & 0xfc;
						desp2[2] = (col >> 13) & 0xf8;
						desp2[3] = 0xff;
						desp2+=4;
						col <<= 16;
					}
				}
			}
		}
	}
}

static void rgb5a3_to_rgba32( uint32 *srcp, uint8 *desp, int dpitch, int w, int h )
{
	uint8	*desp2;
	uint32 *srcp2 = (uint32 *)srcp;
	int i, j, k, l, n;
	uint32 col;

	for(i = 0 ; i < h ; i+=4)
	{
		for(j = 0 ; j < w ; j+=4)
		{
			for(k = 0 ; k < 4 ; k++)
			{
				desp2 = desp + ((i + k) * dpitch) + (4 * j);
				for(n = 0 ; n < 2 ; n++)
				{
					col = *srcp2++;
					col = byteswap32(col);
					for(l = 0 ; l < 2 ; l++)
					{
						if (col & 0x80000000)
						{
							desp2[0] = (col >> 23) & 0xf8;
							desp2[1] = (col >> 18) & 0xf8;
							desp2[2] = (col >> 13) & 0xf8;
							desp2[3] = 0xff;
						}
						else
						{
							desp2[0] = (col >> 20) & 0xf0;
							desp2[1] = (col >> 16) & 0xf0;
							desp2[2] = (col >> 12) & 0xf0;
							desp2[3] = (col >> 23) & 0xe0;
						}
						desp2+=4;
						col <<= 16;
					}
				}
			}
		}
	}
}


static void rgba8_to_rgba32( uint32 *srcp, uint8 *desp, int dpitch, int w, int h )
{
	uint8	*desp2;
	uint32 *srcp2 = (uint32 *)srcp;
	int i, j, k, l;
	uint32 col;

	for(i = 0 ; i < h ; i+=4)
	{
		for(j = 0 ; j < w ; j+=4)
		{
			for(k = 0 ; k < 4 ; k++)
			{
				desp2 = desp + ((i + k) * dpitch) + (4 * j);
				for(l = 0 ; l < 2 ; l++)
				{
					col = *srcp2++;
					col = byteswap32(col);
					//printf("%04x ", col);
					desp2[0] = (col >> 16) & 0xff;
					desp2[3] = (col >> 24) & 0xff;
					desp2+=4;
					desp2[0] = (col >> 0) & 0xff;
					desp2[3] = (col >> 8) & 0xff;
					desp2+=4;
				}
			}
			for(k = 0 ; k < 4 ; k++)
			{
				desp2 = desp + ((i + k) * dpitch) + (4 * j);
				for(l = 0 ; l < 2 ; l++)
				{
					col = *srcp2++;
					col = byteswap32(col);

					desp2[1] = (col >> 24) & 0xff;
					desp2[2] = (col >> 16) & 0xff;
					desp2+=4;

					desp2[1] = (col >> 8) & 0xff;
					desp2[2] = (col >> 0) & 0xff;
					desp2+=4;
				}
			}
		}
	}
}

static void cmp_to_rgba32( uint32 *srcp, uint8 *desp, int dpitch, int w, int h )
{
	uint8	*desp2;
	uint8	*desp3;
	uint32 *srcp2 = (uint32 *)srcp;
	int i, j, k, l, m, n;
	uint32 col;

	uint32 c[4][4];
	uint32 a;

	for(i = 0 ; i < h ; i+=8)
	{
		for(j = 0 ; j < w ; j+=8)
		{
			for(k = 0 ; k < 8 ; k+=4)
			{
				for(l = 0 ; l < 8 ; l+=4)
				{
					// format of colours is unknown
					// lets assume 565 at the beginning
					// however other doc says RGB5A3 and one bit for modulation

					col = *srcp2++;
					col = byteswap32(col);

					if(((col >> 16) & 0xffff) < (col & 0xffff))
					{

						c[1][0] = (col >> 8) & 0xf8;
						c[1][1] = (col >> 3) & 0xfc;
						c[1][2] = (col << 3) & 0xf8;
						c[1][3] = 0xff;

						c[0][0] = (col >> 24) & 0xf8;
						c[0][1] = (col >> 19) & 0xfc;
						c[0][2] = (col >> 13) & 0xf8;
						c[0][3] = 0xff;


						for (m = 0 ; m < 4 ; m++)
						{
							a = c[0][m] + c[1][m];
							a /= 2;
							c[2][m] = a;
							a = c[1][m] * 2 + c[0][m];
							a /= 3;
							c[3][m] = a;
						}
					}
					else
					{
						c[1][0] = (col >> 8) & 0xf8;
						c[1][1] = (col >> 3) & 0xfc;
						c[1][2] = (col << 3) & 0xf8;
						c[1][3] = 0xff;

						c[0][0] = (col >> 24) & 0xf8;
						c[0][1] = (col >> 19) & 0xfc;
						c[0][2] = (col >> 13) & 0xf8;
						c[0][3] = 0xff;


						for (m = 0 ; m < 4 ; m++)
						{
							a = c[0][m] * 2 + c[1][m];
							a /= 3;
							c[2][m] = a;
							a = c[1][m] * 2 + c[0][m];
							a /= 3;
							c[3][m] = a;
						}
					}
					col = *srcp2++;
					col = byteswap32(col);

					desp2 = desp + ((i + k) * dpitch) + ((j + l) * 4);

					for(m = 0 ; m < 4 ; m++)
					{
						desp3 = desp2;
						for(n = 0 ; n < 4 ; n++)
						{
							desp3[0] = c[(col >> 30) & 0x3][0];
							desp3[1] = c[(col >> 30) & 0x3][1];
							desp3[2] = c[(col >> 30) & 0x3][2];
							desp3[3] = c[(col >> 30) & 0x3][3];
							desp3+=4;
							col <<= 2;
						}
						desp2 += dpitch;
					}
				}
			}
		}
	}
}


//==================================================================
/*
static void rgb5a3_to_rgba32( u_short *srcp, uint8 *desp, int dpitch, int w, int h )
{
int		i, j, col;
uint8	*desp2;

	for (i=h; i > 0; --i)
	{
		desp2 = desp;
		for (j=w; j > 0; --j)
		{
			col = *srcp++;
			// BYTESWAP MF! $duddie$
			col = ((col >> 8) & 0x00ff) | ((col << 8) & 0xff00);

			if ( col & 0x8000 )	// RGB5
			{
				desp2[0] = ((col >> 10) & 31) << 3;
				desp2[1] = ((col >>  5) & 31) << 3;
				desp2[2] = ((col >>  0) & 31) << 3;
				desp2[3] = 0xff;	// assume alpha to 255.. bha !
			}
			else				// RGB4A3
			{
				desp2[3] = ((col >> 12) &  7) << (8-3);
				desp2[0] = ((col >>  8) & 15) << (8-4);
				desp2[1] = ((col >>  4) & 15) << (8-4);
				desp2[2] = ((col >>  0) & 15) << (8-4);
			}
			desp2 += 4;
		}
		desp += dpitch;
	}
}
*/

/*static void rgb5a3_to_rgba32( u_short *srcp, uint8 *desp, int dpitch, int w, int h )
{
	uint8	*desp2;
	int i, j, k, l;
	int col;

	for(i = 0 ; i < h ; i+=4)
	{
		for(j = 0 ; j < w ; j+=4)
		{
			for(k = 0 ; k < 4 ; k++)
			{
				desp2 = desp + ((i + k) * dpitch) + (4 * j);
				for(l = 0 ; l < 4 ; l++)
				{
					col = *srcp++;
					// BYTESWAP MF! $duddie$
					col = ((col >> 8) & 0x00ff) | ((col << 8) & 0xff00);
					desp2[0] = ((col >> 10) & 31) << 3;
					desp2[1] = ((col >>  5) & 31) << 3;
					desp2[2] = ((col >>  0) & 31) << 3;
					desp2[3] = 0xff;	// assume alpha to 255.. bha !
					desp2+=4;
				}
			}
		}
	}
}
*/


static void c4_to_c8( uint32 *srcp, uint8 *desp, int dpitch, int w, int h )
{
	uint8	*desp2;
	uint32 *srcp2 = (uint32 *)srcp;
	int i, j, k, l;
	uint32 col;

	for(i = 0 ; i < h ; i+=8)
	{
		for(j = 0 ; j < w ; j+=8)
		{
			for(k = 0 ; k < 8 ; k++)
			{
				desp2 = desp + ((i + k) * dpitch) + (1 * j);
				col = *srcp2++;
				col = byteswap32(col);
				for(l = 0 ; l < 8 ; l++)
				{
					*((uint8 *)desp2) = (col >> 28) & 0x0f;
					desp2+=1;
					col <<= 4;
				}
			}
		}
	}
}

static void c8_to_c8( uint32 *srcp, uint8 *desp, int dpitch, int w, int h )
{
	uint8	*desp2;
	uint32 *srcp2 = (uint32 *)srcp;
	int i, j, k, l, n;
	uint32 col;

	for(i = 0 ; i < h ; i+=4)
	{
		for(j = 0 ; j < w ; j+=8)
		{
			for(k = 0 ; k < 4 ; k++)
			{
				desp2 = desp + ((i + k) * dpitch) + (1 * j);
				for(n = 0 ; n < 2 ; n++)
				{
					col = *srcp2++;
					col = byteswap32(col);
					for(l = 0 ; l < 4 ; l++)
					{
						*((uint8 *)desp2) = (col >> 24) & 0xff;
						col <<= 8;
						desp2+=1;
					}
				}
			}
		}
	}
}

/*
static void c4_to_rgb32( uint32 *srcp, uint8 *desp, int dpitch, int w, int h )
{
	uint8	*desp2;
	uint32 *srcp2 = (uint32 *)srcp;
	int i, j, k, l;
	uint32 col;
	uint32 col16;

	for(i = 0 ; i < h ; i+=8)
	{
		for(j = 0 ; j < w ; j+=8)
		{
			for(k = 0 ; k < 8 ; k++)
			{
				desp2 = desp + ((i + k) * dpitch) + (4 * j);
				col = *srcp2++;
				col = byteswap32(col);
				for(l = 0 ; l < 8 ; l++)
				{
					col16 = ((uint32 *)tex_tlut_rgba32)[(col >> 28) & 0x0f];
					*((uint32 *)desp2) = col16;
					desp2+=4;
					col <<= 4;
				}
			}
		}
	}
}

static void c8_to_rgb32( uint32 *srcp, uint8 *desp, int dpitch, int w, int h )
{
	uint8	*desp2;
	uint32 *srcp2 = (uint32 *)srcp;
	int i, j, k, l, n;
	uint32 col;
	uint32	col16;
	for(i = 0 ; i < h ; i+=4)
	{
		for(j = 0 ; j < w ; j+=8)
		{
			for(k = 0 ; k < 4 ; k++)
			{
				desp2 = desp + ((i + k) * dpitch) + (4 * j);
				for(n = 0 ; n < 2 ; n++)
				{
					col = *srcp2++;
					col = byteswap32(col);
					for(l = 0 ; l < 4 ; l++)
					{
						col16 = ((uint32 *)tex_tlut_rgba32)[(col >> 24) & 0xff];
						*((uint32 *)desp2) = col16;
						col <<= 8;
						desp2+=4;
					}
				}
			}
		}
	}
}


*/


//==================================================================

void tex_convert_tlut(uint32 fmt, uint32 size)
{
	uint16	col16;
	uint32	i;

	switch(fmt)
	{
	case 0x0:	// IA8
		for(i = 0 ; i < size ; i++)
		{
			col16 = ((uint16 *)(bp_tmem + bp_tlut_addr))[i];
			col16 = byteswap16(col16);

			tex_tlut_rgba32[i*4 + 0] = col16 & 0xff;
			tex_tlut_rgba32[i*4 + 1] = col16 & 0xff;
			tex_tlut_rgba32[i*4 + 2] = col16 & 0xff;
			tex_tlut_rgba32[i*4 + 3] = (col16 >> 8) & 0xff;
		}
		break;
	case 0x1:	// RGB565
		for(i = 0 ; i < size ; i++)
		{
			col16 = ((uint16 *)(bp_tmem + bp_tlut_addr))[i];
			col16 = byteswap16(col16);

			tex_tlut_rgba32[i*4 + 0] = (col16 >> 8) & 0xf8;
			tex_tlut_rgba32[i*4 + 1] = (col16 >> 3) & 0xfc;
			tex_tlut_rgba32[i*4 + 2] = (col16 << 3) & 0xf8;
			tex_tlut_rgba32[i*4 + 3] = 0xff;
		}
		break;
	case 0x2:	// RGB5A3
		for(i = 0 ; i < size ; i++)
		{
			col16 = ((uint16 *)(bp_tmem + bp_tlut_addr))[i];
			col16 = byteswap16(col16);

			if (col16 & 0x8000)
			{
				tex_tlut_rgba32[i*4 + 0] = (col16 >> 7) & 0xf8;
				tex_tlut_rgba32[i*4 + 1] = (col16 >> 2) & 0xf8;
				tex_tlut_rgba32[i*4 + 2] = (col16 << 3) & 0xf8;
				tex_tlut_rgba32[i*4 + 3] = 0xff;
			}
			else
			{
				tex_tlut_rgba32[i*4 + 0] = (col16 >> 4) & 0xf0;
				tex_tlut_rgba32[i*4 + 1] = (col16 >> 0) & 0xf0;
				tex_tlut_rgba32[i*4 + 2] = (col16 << 4) & 0xf0;
				tex_tlut_rgba32[i*4 + 3] = (col16 >> 7) & 0xe0;
			}
		}
		break;
	default:
		syslog_error(BP,"Unsupported TLUT format %d\n", fmt);
		break;
	}

}

//==================================================================







//===============================================================================
static int count_bits( uint32 val, int *nex_log2p )
{
int	i, mask, cnt, highmost;

	mask = 1;
	cnt = 0;
	highmost = -1;
	for (i=0; i < 32; ++i)
	{
		if ( val & mask )
		{
			++cnt;
			highmost = i;
		}

		mask <<= 1;
	}

	*nex_log2p = 0;
	if ( highmost >= 0 )
	{
		*nex_log2p = cnt > 1 ? (highmost + 1) : highmost;	// if cnt > 1 then it means that doesnt fit int he highmost bit !
	}

	return cnt;
}
//===============================================================================
// assuming 8bit indexed format
static void stretch_copy8( uint8 *des_datap, int dw, int dh, int dpitch, uint8 *src_datap, int sw, int sh, int spitch )
{
float	dsrcx, dsrcy;
int		dx, dy, sx, sy;
uint8	*sp;
uint8	*src_datap2;

	dsrcx = (float)sw / dw;
	dsrcy = (float)sh / dh;

	for (dy=0; dy < dh; ++dy)
	{
		sy = (int)(dy * dsrcy);
		src_datap2 = src_datap + sy * spitch;

		for (dx=0; dx < dw; ++dx)
		{
			sx = (int)(dx * dsrcx);
			sp = src_datap2 + sx;
			des_datap[0] = sp[0];
			des_datap += 1;
		}
	}
}

//===============================================================================
// assuming 24bit RGB format
static void stretch_copy24( uint8 *des_datap, int dw, int dh, int dpitch, uint8 *src_datap, int sw, int sh, int spitch )
{
float	dsrcx, dsrcy;
int		dx, dy, sx, sy;
uint8	*sp;
uint8	*src_datap2;

	dsrcx = (float)sw / dw;
	dsrcy = (float)sh / dh;

	for (dy=0; dy < dh; ++dy)
	{
		sy = (int)(dy * dsrcy);
		src_datap2 = src_datap + sy * spitch;

		for (dx=0; dx < dw; ++dx)
		{
			sx = (int)(dx * dsrcx);
			sp = src_datap2 + sx * 3;
			des_datap[0] = sp[0];
			des_datap[1] = sp[1];
			des_datap[2] = sp[2];
			des_datap += 3;
		}
	}
}
//===============================================================================
// assuming 32bit RGBA format
static void stretch_copy32( uint8 *des_datap, int dw, int dh, int dpitch, uint8 *src_datap, int sw, int sh, int spitch )
{
float	dsrcx, dsrcy;
int		dx, dy, sx, sy;
uint8	*sp;
uint8	*src_datap2;

	dsrcx = (float)sw / dw;
	dsrcy = (float)sh / dh;

	for (dy=0; dy < dh; ++dy)
	{
		sy = (int)(dy * dsrcy);
		src_datap2 = src_datap + sy * spitch;

		for (dx=0; dx < dw; ++dx)
		{
			sx = (int)(dx * dsrcx);
			sp = src_datap2 + sx * 4;
			des_datap[0] = sp[0];
			des_datap[1] = sp[1];
			des_datap[2] = sp[2];
			des_datap[3] = sp[3];
			des_datap += 4;
		}
	}
}

//===============================================================================

void calc_pow2(int sw, int sh, int *out_wp, int *out_hp)
{
	int		w2, h2, nbitsw, nbitsh;
	nbitsw = count_bits( sw, &w2 );
	nbitsh = count_bits( sh, &h2 );
	*out_wp = 1 << w2;
	*out_hp = 1 << h2;
}
static uint8 *force_pow2( uint8 *src_datap, int sw, int sh, int sdepth, int *out_wp, int *out_hp )
{
int		w2, h2, nbitsw, nbitsh;
uint8	*des_datap;
int		dw, dh;

	nbitsw = count_bits( sw, &w2 );
	nbitsh = count_bits( sh, &h2 );

	if ( nbitsw <= 1 && nbitsh <= 1 )	// this is already a power of 2.. fine !
	{
		*out_wp = 1 << w2;
		*out_hp = 1 << h2;
		return src_datap;
	}

	dw = 1 << w2;
	dh = 1 << h2;

	assert( sdepth == 24 || sdepth == 32 || sdepth == 8);
	if ( sdepth == 24 )
	{
	if ( !(des_datap = (uint8 *)malloc( dw * dh * 3 )) )
		return NULL;

		stretch_copy24( des_datap, dw, dh, dw * 3,
				  src_datap, sw, sh, sw * 3 );
	}
	else
	if ( sdepth == 32 )
	{
		if ( !(des_datap = (uint8 *)malloc( dw * dh * 4 )) )
			return NULL;

		stretch_copy32( des_datap, dw, dh, dw * 4,
						src_datap, sw, sh, sw * 4 );
	}
	else
	if ( sdepth == 8 )
	{
		if ( !(des_datap = (uint8 *)malloc( dw * dh )) )
			return NULL;

		stretch_copy8( des_datap, dw, dh, dw,
						src_datap, sw, sh, sw );
	}

	*out_wp = 1 << w2;
	*out_hp = 1 << h2;

	return des_datap;
}



int		w2, h2;

#define TEX_ALLOC(BYTES_PER_TEXEL)	tbuffp = (uint8 *)malloc( ((w + 31) & ~31) * BYTES_PER_TEXEL * ((h + 31) & ~31) );
#define	TEX_POW2(BYTES_PER_TEXEL)	tbuff_pow2p = force_pow2( tbuffp, w, h, BYTES_PER_TEXEL * 8, &w2, &h2 );

void *convert_tex( uint8 *srcp, int w, int h, uint32 paletized, uint8 tex_format )
{
	uint8	*tbuffp, *tbuff_pow2p;

	switch ( tex_format )
	{
	case TEX_FMT_I4:	
/*		TEX_ALLOC(4);
		i4_to_rgb32( (uint32 *)srcp, tbuffp, w*4, w, h );
		TEX_POW2(4);
*/
		TEX_ALLOC(1);
		i4_to_i8( (uint32 *)srcp, tbuffp, w, w, h );
		TEX_POW2(1);
		break;
	case TEX_FMT_I8:	
/*		TEX_ALLOC(4);
		i8_to_rgb32( (uint32 *)srcp, tbuffp, w*4, w, h );
		TEX_POW2(4);
*/		TEX_ALLOC(1);
		i8_to_i8( (uint32 *)srcp, tbuffp, w, w, h );
		TEX_POW2(1);
		break;
		break;
	case TEX_FMT_IA4:	
		TEX_ALLOC(4);
		ia4_to_rgb32( (uint32 *)srcp, tbuffp, w*4, w, h );
		TEX_POW2(4);
		break;
	case TEX_FMT_IA8:	
		TEX_ALLOC(4);
		ia8_to_rgba32( (uint32 *)srcp, tbuffp, w*4, w, h );
		TEX_POW2(4);
		break;
	case TEX_FMT_RGB565:	
		TEX_ALLOC(4);
		rgb565_to_rgba32( (uint32 *)srcp, tbuffp, w*4, w, h );
		TEX_POW2(4);
		break;
	case TEX_FMT_RGB5A3:	
		TEX_ALLOC(4);
		rgb5a3_to_rgba32( (uint32 *)srcp, tbuffp, w*4, w, h );
		TEX_POW2(4);
		break;
	case TEX_FMT_RGBA8:	
		TEX_ALLOC(4);
		rgba8_to_rgba32( (uint32 *)srcp, tbuffp, w*4, w, h );
		TEX_POW2(4);
		break;
	case TEX_FMT_CMP:	
		TEX_ALLOC(4);
		cmp_to_rgba32( (uint32 *)srcp, tbuffp, w*4, w, h );
		TEX_POW2(4);
		break;
	case TEX_FMT_C4:
		TEX_ALLOC(1);
		c4_to_c8( (uint32 *)srcp, tbuffp, w, w, h );
		TEX_POW2(1);
		break;
	case TEX_FMT_C8:
		TEX_ALLOC(1);
		c8_to_c8( (uint32 *)srcp, tbuffp, w, w, h );
		TEX_POW2(1);
		break;
	default:
		syslog_error(BP,"Unsupported texture format %d\n", tex_format);
		break;
	}

	if ( tbuff_pow2p != tbuffp )
	{
		free( tbuffp );
	}
	return tbuff_pow2p;
}

void txc_upload_texture(uint32 tex_format, void *texture)
{
	switch(tex_format)
	{
	case TEX_FMT_I4:	
		glTexImage2D(GL_TEXTURE_2D, 0, GL_INTENSITY8, w2, h2, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, texture);
		break;
	case TEX_FMT_I8:	
		glTexImage2D(GL_TEXTURE_2D, 0, GL_INTENSITY8, w2, h2, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, texture);
		break;
	case TEX_FMT_IA4:	
	case TEX_FMT_IA8:	
	case TEX_FMT_RGB565:	
	case TEX_FMT_RGB5A3:	
	case TEX_FMT_RGBA8:	
	case TEX_FMT_CMP:	
		glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, w2, h2, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture);
		break;
	case TEX_FMT_C4:
	case TEX_FMT_C8:
#if WITH_PALETTED_AT_SHADER
		glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE8, w2, h2, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, texture);
#else
		glTexImage2D(GL_TEXTURE_2D, 0, GL_COLOR_INDEX8_EXT, w2, h2, 0, GL_COLOR_INDEX, GL_UNSIGNED_BYTE, texture);
#endif
		break;
	default:
		syslog_error(BP,"Unsupported texture format %d\n", tex_format);
		break;
	}
}