117 lines
3.0 KiB
NASM
117 lines
3.0 KiB
NASM
|
;
|
||
|
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||
|
;
|
||
|
; Use of this source code is governed by a BSD-style license
|
||
|
; that can be found in the LICENSE file in the root of the source
|
||
|
; tree. An additional intellectual property rights grant can be found
|
||
|
; in the file PATENTS. All contributing project authors may
|
||
|
; be found in the AUTHORS file in the root of the source tree.
|
||
|
;
|
||
|
|
||
|
|
||
|
%include "vpx_ports/x86_abi_support.asm"
|
||
|
|
||
|
;void copy_mem16x16_sse2(
|
||
|
; unsigned char *src,
|
||
|
; int src_stride,
|
||
|
; unsigned char *dst,
|
||
|
; int dst_stride
|
||
|
; )
|
||
|
global sym(vp8_copy_mem16x16_sse2) PRIVATE
|
||
|
sym(vp8_copy_mem16x16_sse2):
|
||
|
push rbp
|
||
|
mov rbp, rsp
|
||
|
SHADOW_ARGS_TO_STACK 4
|
||
|
push rsi
|
||
|
push rdi
|
||
|
; end prolog
|
||
|
|
||
|
mov rsi, arg(0) ;src;
|
||
|
movdqu xmm0, [rsi]
|
||
|
|
||
|
movsxd rax, dword ptr arg(1) ;src_stride;
|
||
|
mov rdi, arg(2) ;dst;
|
||
|
|
||
|
movdqu xmm1, [rsi+rax]
|
||
|
movdqu xmm2, [rsi+rax*2]
|
||
|
|
||
|
movsxd rcx, dword ptr arg(3) ;dst_stride
|
||
|
lea rsi, [rsi+rax*2]
|
||
|
|
||
|
movdqa [rdi], xmm0
|
||
|
add rsi, rax
|
||
|
|
||
|
movdqa [rdi+rcx], xmm1
|
||
|
movdqa [rdi+rcx*2],xmm2
|
||
|
|
||
|
lea rdi, [rdi+rcx*2]
|
||
|
movdqu xmm3, [rsi]
|
||
|
|
||
|
add rdi, rcx
|
||
|
movdqu xmm4, [rsi+rax]
|
||
|
|
||
|
movdqu xmm5, [rsi+rax*2]
|
||
|
lea rsi, [rsi+rax*2]
|
||
|
|
||
|
movdqa [rdi], xmm3
|
||
|
add rsi, rax
|
||
|
|
||
|
movdqa [rdi+rcx], xmm4
|
||
|
movdqa [rdi+rcx*2],xmm5
|
||
|
|
||
|
lea rdi, [rdi+rcx*2]
|
||
|
movdqu xmm0, [rsi]
|
||
|
|
||
|
add rdi, rcx
|
||
|
movdqu xmm1, [rsi+rax]
|
||
|
|
||
|
movdqu xmm2, [rsi+rax*2]
|
||
|
lea rsi, [rsi+rax*2]
|
||
|
|
||
|
movdqa [rdi], xmm0
|
||
|
add rsi, rax
|
||
|
|
||
|
movdqa [rdi+rcx], xmm1
|
||
|
|
||
|
movdqa [rdi+rcx*2], xmm2
|
||
|
movdqu xmm3, [rsi]
|
||
|
|
||
|
movdqu xmm4, [rsi+rax]
|
||
|
lea rdi, [rdi+rcx*2]
|
||
|
|
||
|
add rdi, rcx
|
||
|
movdqu xmm5, [rsi+rax*2]
|
||
|
|
||
|
lea rsi, [rsi+rax*2]
|
||
|
movdqa [rdi], xmm3
|
||
|
|
||
|
add rsi, rax
|
||
|
movdqa [rdi+rcx], xmm4
|
||
|
|
||
|
movdqa [rdi+rcx*2],xmm5
|
||
|
movdqu xmm0, [rsi]
|
||
|
|
||
|
lea rdi, [rdi+rcx*2]
|
||
|
movdqu xmm1, [rsi+rax]
|
||
|
|
||
|
add rdi, rcx
|
||
|
movdqu xmm2, [rsi+rax*2]
|
||
|
|
||
|
lea rsi, [rsi+rax*2]
|
||
|
movdqa [rdi], xmm0
|
||
|
|
||
|
movdqa [rdi+rcx], xmm1
|
||
|
movdqa [rdi+rcx*2],xmm2
|
||
|
|
||
|
movdqu xmm3, [rsi+rax]
|
||
|
lea rdi, [rdi+rcx*2]
|
||
|
|
||
|
movdqa [rdi+rcx], xmm3
|
||
|
|
||
|
; begin epilog
|
||
|
pop rdi
|
||
|
pop rsi
|
||
|
UNSHADOW_ARGS
|
||
|
pop rbp
|
||
|
ret
|