View Single Post
Old 1st July 2015, 07:51   #2  |  Link
feisty2
I'm Siri
 
feisty2's Avatar
 
Join Date: Oct 2012
Location: void
Posts: 2,633
Quote:
void Sobel(PVideoFrame &src, PVideoFrame &dst, int plane, int thresh)
{
const int src_pitch = src->GetPitch(plane);
const int dst_pitch = dst->GetPitch(plane);
const unsigned char *psrc = src->GetReadPtr(plane);
unsigned char *pdst = dst->GetWritePtr(plane);
const int height = src->GetHeight(plane);
const int dst_row_size = dst->GetRowSize(plane);
const int i = (dst_row_size + 3) >> 2;

if (g_cpuid & CPUID_SSE2)
// SSE2 version
for (int y = 0; y < height; y++)
{
__asm{
mov esi, psrc
mov edi, pdst
mov edx, src_pitch
xor eax, eax
mov ecx, y
test ecx, ecx
cmovnz eax, edx
inc ecx
add edx, eax
cmp ecx, height
cmovz edx, eax
sub esi, eax
mov ecx, i
sub edi, 10h
sub edi, esi
movd xmm0, thresh
pshufd xmm0, xmm0, 0
packssdw xmm0, xmm0
packuswb xmm0, xmm0
align 10h
l:
movdqu xmm2, [esi-1]
movdqu xmm3, [esi]
movdqu xmm4, [esi+1]
movdqu xmm5, [esi+edx-1]
movdqu xmm6, [esi+edx]
movdqu xmm7, [esi+edx+1]

movdqa xmm1, xmm2
pavgb xmm1, xmm4
pavgb xmm3, xmm1

movdqa xmm1, xmm5
pavgb xmm1, xmm7
pavgb xmm6, xmm1

movdqa xmm1, xmm3
psubusb xmm3, xmm6
psubusb xmm6, xmm1
por xmm6, xmm3

movdqu xmm1, [esi+eax-1]
movdqu xmm3, [esi+eax+1]
pavgb xmm5, xmm2
pavgb xmm7, xmm4
pavgb xmm1, xmm5
pavgb xmm3, xmm7
movdqa xmm5, xmm1
psubusb xmm1, xmm3
psubusb xmm3, xmm5
por xmm1, xmm3

movdqa xmm2, xmm6
paddusb xmm2, xmm1
pmaxub xmm1, xmm6
paddusb xmm2, xmm1

movdqa xmm3, xmm2
paddusb xmm2, xmm2
paddusb xmm2, xmm3
paddusb xmm2, xmm2
pminub xmm2, xmm0 // thresh
add esi, 10h
sub ecx, 4
jb le1
movntdq [esi+edi], xmm2
jnz l
jmp lx
le1:
test ecx, 2
jz le2
movq qword ptr [esi+edi], xmm2
test ecx, 1
jz lx
add esi, 8
psrldq xmm2, 8
le2:
movd [esi+edi], xmm2
lx:
}
pdst[0] = pdst[1];
pdst[dst_row_size-1] = pdst[dst_row_size-2];
psrc += src_pitch;
pdst += dst_pitch;
}
else
// MMXExt version
for (int y = 0; y < height; y++)
{
__asm{
mov esi, psrc
mov edi, pdst
mov edx, src_pitch
xor eax, eax
mov ecx, y
test ecx, ecx
cmovnz eax, edx
inc ecx
add edx, eax
cmp ecx, height
cmovz edx, eax
sub esi, eax
mov ecx, i
sub edi, 8
sub edi, esi
movd mm0, thresh
punpckldq mm0, mm0
packssdw mm0, mm0
packuswb mm0, mm0
align 10h
lm:
movq mm2, [esi-1]
movq mm3, [esi]
movq mm4, [esi+1]
movq mm5, [esi+edx-1]
movq mm6, [esi+edx]
movq mm7, [esi+edx+1]

movq mm1, mm2
pavgb mm1, mm4
pavgb mm3, mm1

movq mm1, mm5
pavgb mm1, mm7
pavgb mm6, mm1

movq mm1, mm3
psubusb mm3, mm6
psubusb mm6, mm1
por mm6, mm3

movq mm1, [esi+eax-1]
movq mm3, [esi+eax+1]
pavgb mm5, mm2
pavgb mm7, mm4
pavgb mm1, mm5
pavgb mm3, mm7
movq mm5, mm1
psubusb mm1, mm3
psubusb mm3, mm5
por mm1, mm3

movq mm2, mm6
paddusb mm2, mm1
pmaxub mm1, mm6
paddusb mm2, mm1

movq mm3, mm2
paddusb mm2, mm2
paddusb mm2, mm3
paddusb mm2, mm2
pminub mm2, mm0 // thresh

add esi, 8
sub ecx, 2
jb lme
movntq [esi+edi], mm2
jnz lm
jmp lmx
lme:
movd [esi+edi], xmm2
lmx:
}
pdst[0] = pdst[1];
pdst[dst_row_size-1] = pdst[dst_row_size-2];
psrc += src_pitch;
pdst += dst_pitch;
}
__asm sfence;
}
tell me 'bout it, bitch looks portable to you?
feisty2 is offline   Reply With Quote