I am very rusty on this stuff and not an intel assmebler guy, but maybe something like this
myheader.h
Code:
#ifndef __MYHEADER_H__ // Avoid multiple inclusions
#define __MYHEADER_H__
#include <windows.h> // and whatever else
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h> // etc
// Other stuff ...
extern "C" {
void __stdcall LUT_iSSE (Pixel32 *dst,int *LUT,int psize);
}
#endif // __MYHEADER_H__
asm.c
Code:
#include myHeader.h
void __stdcall LUT_iSSE (Pixel32 *dst,int *LUT,int psize)
{
__asm {
mov edi, [dst]
mov esi, [LUT]
mov ecx, [psize]
align 16
GLoop: mov eax, [edi]
xor ebx, ebx
mov edx, eax
mov bl, ah
and edx, 0xff0000
and eax, 0xff
shr edx, 16
movd mm0, [esi + eax * 4 + (512 * 4)]
prefetchnta[edi + 512]
por mm0, [esi + ebx * 4 + (256 * 4)]
por mm0, [esi + edx * 4]
movd[edi], mm0
add edi, 4
dec ecx
jnz GLoop
emms
}
}
I think that the asm file should b using __stdcall, but not sure.
Perhaps others will give better advice.
EDIT: One of the headers should define what Pixel32 is.