;
; MMX 寄存器测试的例子
;
;
format PE GUI
include 'win32axp.inc'
entry start
section '.data' data readable writeable
TAP equ 1
flags dd ?
caption db '测试',0
message db '这是一个测试',0
;--------------------------------
input1 db 11,11,22,22,33,33,44,44,55,55,66,66,77,77,88,88,99,99,00,00
db 11,11,22,22,33,33,44,44,55,55,66,66,77,77,88,88,99,99,00,00
db 11,11,22,22,33,33,44,44,55,55,66,66,77,77,88,88,99,99,00,00
db 11,11,22,22,33,33,44,44,55,55,66,66,77,77,88,88,99,99,00,00,0
out1 dd 10 dup(?)
coeff1 dq 0000,9999,8888,7777,6666,5555,4444,3333,2222,1111,0
count1 dw 10
;---------------------------------
lpString db 11,11,22,22,33,33,44,44,55,55,66,66,77,77,88,88,99,99,00,00
db 11,11,22,22,33,33,44,44,55,55,66,66,77,77,88,88,99,99,00,00
db 11,11,22,22,33,33,44,44,55,55,66,66,77,77,88,88,99,99,00,00
db 11,11,22,22,33,33,44,44,55,55,66,66,77,77,88,88,99,99,00,00,0
lpbuffer dw 200h dup (0)
InString dw 000000ffh
dw 0
hex_table \
db "000102030405060708090A0B0C0D0E0F"
db "101112131415161718191A1B1C1D1E1F"
db "202122232425262728292A2B2C2D2E2F"
db "303132333435363738393A3B3C3D3E3F"
db "404142434445464748494A4B4C4D4E4F"
db "505152535455565758595A5B5C5D5E5F"
db "606162636465666768696A6B6C6D6E6F"
db "707172737475767778797A7B7C7D7E7F"
db "808182838485868788898A8B8C8D8E8F"
db "909192939495969798999A9B9C9D9E9F"
db "A0A1A2A3A4A5A6A7A8A9AAABACADAEAF"
db "B0B1B2B3B4B5B6B7B8B9BABBBCBDBEBF"
db "C0C1C2C3C4C5C6C7C8C9CACBCCCDCECF"
db "D0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF"
db "E0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF"
db "F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF"
section '.text' code readable writeable executable
start:
; sub rsp,8*5
invoke GetModuleHandle,0
invoke MessageBox,0,message,caption,MB_ICONINFORMATION+MB_OK
;--------------------------------------------------------------------------
mov eax,1
cpuid
;test edx,00800000h ;支持MMX[bit 23=1]
bt edx,23
jnc exit
;test edx,002000000h ;支持SSE[bit 25=1]
bt edx,25
jnc exit
;test edx,004000000h ;支持SSE2[bit 26=1]
bt edx,26
jnc exit
;test ecx,000000001h ;支持SSE3[bit 0=1]
bt edx,0
jnc exit
;test ecx,000000200h ;支持SSSE3[bit 9=1]
bt edx,9
jnc exit
;test ecx,000080000h ;支持SSE4.1 [bit 19=1]
bt ecx,19
jnc exit
;test ecx,000100000h ;支持SSE4.2 [bit 20=1]
bt ecx,20
jnc exit
stdcall fir,input1,out1,coeff1,count1
stdcall bin2hex,out1,InString,lpbuffer
invoke MessageBox,0,lpbuffer,'测试',MB_ICONINFORMATION+MB_OK
;--------------------------------------------------------------------------
exit:
invoke ExitProcess,0
proc fir input1:dword,out1:dword,coeff:dword,count:dword
align 16
pxor xmm0, xmm0
xor ecx, ecx
mov eax,[input1]
mov ebx,[coeff]
inner_loop:
movups xmm1,[eax+ecx]
mulss xmm1,[ebx+4*ecx]
addps xmm0, xmm1
pxor xmm0, xmm0
xor ecx, ecx
mov eax,[input1]
mov ebx,[coeff]
movups xmm1,[eax+ecx] ;
movaps xmm3, xmm1
mulss xmm1,[ebx+4*ecx]
addps xmm0, xmm1
movups xmm1,[eax+ecx+4]
mulss xmm1,[ebx+4*ecx+16]
addps xmm0, xmm1
movups xmm2,[eax+ecx+16]
movups xmm1, xmm2
palignr xmm2, xmm3, 4
mulss xmm2,[ebx+4*ecx+16]
addps xmm0, xmm2
movups xmm1,[eax+ecx+8]
mulss xmm1,[ebx+4*ecx+32]
addps xmm0, xmm1
movups xmm2, xmm1
palignr xmm2, xmm3, 8
mulss xmm2,[ebx+4*ecx+32]
addps xmm0, xmm2
movups xmm1,[eax+ecx+12]
mulss xmm1,[ebx+4*ecx+48]
addps xmm0, xmm1
add ecx, 16
cmp ecx, 4*TAP
jl inner_loop
mov eax,[out1]
movups [eax], xmm1
pxor xmm0, xmm0
xor ecx, ecx
mov eax, [input1]
mov ebx, [coeff]
inner_loop1:
movups xmm1, [eax+ecx]
movups xmm3, xmm1
mulss xmm1, [ebx+4*ecx]
addps xmm0, xmm1
movups xmm2, [eax+ecx+16]
movups xmm1, xmm2
palignr xmm2, xmm3, 4
mulss xmm2, [ebx+4*ecx+16]
addps xmm0, xmm2
movups xmm2, xmm1
palignr xmm2, xmm3, 8
mulss xmm2, [ebx+4*ecx+32]
addps xmm0, xmm2
movups xmm2, xmm1
palignr xmm2, xmm3, 12
mulss xmm2, [ebx+4*ecx+48]
addps xmm0, xmm2
add ecx, 16
cmp ecx, 4*TAP
jl inner_loop1
mov eax, [out1]
movups [eax], xmm0
p_exit:
ret
endp
proc bin2hex lpString:dword,lnString:dword,lpbuffer:dword
;-----------------------------------------
; EAX is unused in loop code
;-----------------------------------------
push ebx
push esi
push edi
push ebp
mov esi, [lpString] ; address of source string
mov edi, [lpbuffer] ; address of output buffer
mov ecx,esi
add ecx,dword [InString]
; exit condition for byte read
xor ebx, ebx ; line counter
xor edx,edx
xor ebp,ebp
jmp hxlp
; -------------------------------------------------------------------------
align 4
hxpre:
mov word [edi]," -" ; write centre seperator
add edi, 2
align 4
pre:
cmp esi,ecx ; 大于退出
jge hxout ; mispredicted only once
hxlp:
movzx ebp,byte [esi] ; zero extend byte into EBP
mov dx,word [hex_table+ebp*2] ; put WORD from table into DX
add ebx, 1
add esi, 1
mov word [edi], dx ; write 2 byte string to buffer
mov byte [edi+2], 32 ; write space
add edi, 3
cmp ebx, 8 ; test for half to add "-"
je hxpre ; predicted backwards
cmp ebx, 16 ; break line at 16 characters
jne pre ; predicted backwards
mov word [edi], 0A0Dh ; overwrite last byte with CRLF
add edi, 2
xor ebx, ebx ; clear line counter
jmp pre ; predicted backwards
hxout:
; -------------------------------------------------------------------------
pop ebp
mov byte [edi-1], 0 ; append terminator
sub edi, 1
sub edi, [lpbuffer]
mov eax, edi ; return written length of hex output
pop edi
pop esi
pop ebx
ret
endp
section '.idata' import data readable writeable
library kernel,'KERNEL32.DLL',\
user,'USER32.DLL'
import kernel,\
GetModuleHandle,'GetModuleHandleA',\
ExitProcess,'ExitProcess'
import user,\
MessageBox,'MessageBoxA',\
wsprintf,'wsprintfA'
