parent
2d77a6f5d3
commit
5268443fdf
|
@ -114,6 +114,13 @@ Files extracted from upstream source:
|
|||
- COPYING
|
||||
|
||||
|
||||
## libvpx
|
||||
|
||||
- Upstream: http://www.webmproject.org/code/
|
||||
- Version: 1.6.0
|
||||
- License: BSD-3-Clause
|
||||
|
||||
|
||||
## libwebp
|
||||
|
||||
- Upstream: https://chromium.googlesource.com/webm/libwebp/
|
||||
|
|
|
@ -0,0 +1,142 @@
|
|||
# This file is automatically generated from the git commit history
|
||||
# by tools/gen_authors.sh.
|
||||
|
||||
Aaron Watry <awatry@gmail.com>
|
||||
Abo Talib Mahfoodh <ab.mahfoodh@gmail.com>
|
||||
Adam Xu <adam@xuyaowu.com>
|
||||
Adrian Grange <agrange@google.com>
|
||||
Aℓex Converse <aconverse@google.com>
|
||||
Ahmad Sharif <asharif@google.com>
|
||||
Alexander Voronov <avoronov@graphics.cs.msu.ru>
|
||||
Alexis Ballier <aballier@gentoo.org>
|
||||
Alok Ahuja <waveletcoeff@gmail.com>
|
||||
Alpha Lam <hclam@google.com>
|
||||
A.Mahfoodh <ab.mahfoodh@gmail.com>
|
||||
Ami Fischman <fischman@chromium.org>
|
||||
Andoni Morales Alastruey <ylatuya@gmail.com>
|
||||
Andres Mejia <mcitadel@gmail.com>
|
||||
Andrew Russell <anrussell@google.com>
|
||||
Angie Chiang <angiebird@google.com>
|
||||
Aron Rosenberg <arosenberg@logitech.com>
|
||||
Attila Nagy <attilanagy@google.com>
|
||||
Brion Vibber <bvibber@wikimedia.org>
|
||||
changjun.yang <changjun.yang@intel.com>
|
||||
Charles 'Buck' Krasic <ckrasic@google.com>
|
||||
chm <chm@rock-chips.com>
|
||||
Christian Duvivier <cduvivier@google.com>
|
||||
Daniele Castagna <dcastagna@chromium.org>
|
||||
Daniel Kang <ddkang@google.com>
|
||||
Deb Mukherjee <debargha@google.com>
|
||||
Dim Temp <dimtemp0@gmail.com>
|
||||
Dmitry Kovalev <dkovalev@google.com>
|
||||
Dragan Mrdjan <dmrdjan@mips.com>
|
||||
Ed Baker <edward.baker@intel.com>
|
||||
Ehsan Akhgari <ehsan.akhgari@gmail.com>
|
||||
Erik Niemeyer <erik.a.niemeyer@intel.com>
|
||||
Fabio Pedretti <fabio.ped@libero.it>
|
||||
Frank Galligan <fgalligan@google.com>
|
||||
Fredrik Söderquist <fs@opera.com>
|
||||
Fritz Koenig <frkoenig@google.com>
|
||||
Gaute Strokkenes <gaute.strokkenes@broadcom.com>
|
||||
Geza Lore <gezalore@gmail.com>
|
||||
Ghislain MARY <ghislainmary2@gmail.com>
|
||||
Giuseppe Scrivano <gscrivano@gnu.org>
|
||||
Gordana Cmiljanovic <gordana.cmiljanovic@imgtec.com>
|
||||
Guillaume Martres <gmartres@google.com>
|
||||
Guillermo Ballester Valor <gbvalor@gmail.com>
|
||||
Hangyu Kuang <hkuang@google.com>
|
||||
Hanno Böck <hanno@hboeck.de>
|
||||
Henrik Lundin <hlundin@google.com>
|
||||
Hui Su <huisu@google.com>
|
||||
Ivan Maltz <ivanmaltz@google.com>
|
||||
Jacek Caban <cjacek@gmail.com>
|
||||
Jacky Chen <jackychen@google.com>
|
||||
James Berry <jamesberry@google.com>
|
||||
James Yu <james.yu@linaro.org>
|
||||
James Zern <jzern@google.com>
|
||||
Jan Gerber <j@mailb.org>
|
||||
Jan Kratochvil <jan.kratochvil@redhat.com>
|
||||
Janne Salonen <jsalonen@google.com>
|
||||
Jean-Yves Avenard <jyavenard@mozilla.com>
|
||||
Jeff Faust <jfaust@google.com>
|
||||
Jeff Muizelaar <jmuizelaar@mozilla.com>
|
||||
Jeff Petkau <jpet@chromium.org>
|
||||
Jia Jia <jia.jia@linaro.org>
|
||||
Jian Zhou <zhoujian@google.com>
|
||||
Jim Bankoski <jimbankoski@google.com>
|
||||
Jingning Han <jingning@google.com>
|
||||
Joey Parrish <joeyparrish@google.com>
|
||||
Johann Koenig <johannkoenig@google.com>
|
||||
John Koleszar <jkoleszar@google.com>
|
||||
Johnny Klonaris <google@jawknee.com>
|
||||
John Stark <jhnstrk@gmail.com>
|
||||
Joshua Bleecher Snyder <josh@treelinelabs.com>
|
||||
Joshua Litt <joshualitt@google.com>
|
||||
Julia Robson <juliamrobson@gmail.com>
|
||||
Justin Clift <justin@salasaga.org>
|
||||
Justin Lebar <justin.lebar@gmail.com>
|
||||
KO Myung-Hun <komh@chollian.net>
|
||||
Lawrence Velázquez <larryv@macports.org>
|
||||
Linfeng Zhang <linfengz@google.com>
|
||||
Lou Quillio <louquillio@google.com>
|
||||
Luca Barbato <lu_zero@gentoo.org>
|
||||
Makoto Kato <makoto.kt@gmail.com>
|
||||
Mans Rullgard <mans@mansr.com>
|
||||
Marco Paniconi <marpan@google.com>
|
||||
Mark Mentovai <mark@chromium.org>
|
||||
Martin Ettl <ettl.martin78@googlemail.com>
|
||||
Martin Storsjo <martin@martin.st>
|
||||
Matthew Heaney <matthewjheaney@chromium.org>
|
||||
Michael Kohler <michaelkohler@live.com>
|
||||
Mike Frysinger <vapier@chromium.org>
|
||||
Mike Hommey <mhommey@mozilla.com>
|
||||
Mikhal Shemer <mikhal@google.com>
|
||||
Minghai Shang <minghai@google.com>
|
||||
Morton Jonuschat <yabawock@gmail.com>
|
||||
Nico Weber <thakis@chromium.org>
|
||||
Parag Salasakar <img.mips1@gmail.com>
|
||||
Pascal Massimino <pascal.massimino@gmail.com>
|
||||
Patrik Westin <patrik.westin@gmail.com>
|
||||
Paul Wilkins <paulwilkins@google.com>
|
||||
Pavol Rusnak <stick@gk2.sk>
|
||||
Paweł Hajdan <phajdan@google.com>
|
||||
Pengchong Jin <pengchong@google.com>
|
||||
Peter de Rivaz <peter.derivaz@gmail.com>
|
||||
Philip Jägenstedt <philipj@opera.com>
|
||||
Priit Laes <plaes@plaes.org>
|
||||
Rafael Ávila de Espíndola <rafael.espindola@gmail.com>
|
||||
Rafaël Carré <funman@videolan.org>
|
||||
Ralph Giles <giles@xiph.org>
|
||||
Rob Bradford <rob@linux.intel.com>
|
||||
Ronald S. Bultje <rsbultje@gmail.com>
|
||||
Rui Ueyama <ruiu@google.com>
|
||||
Sami Pietilä <samipietila@google.com>
|
||||
Sasi Inguva <isasi@google.com>
|
||||
Scott Graham <scottmg@chromium.org>
|
||||
Scott LaVarnway <slavarnway@google.com>
|
||||
Sean McGovern <gseanmcg@gmail.com>
|
||||
Sergey Kolomenkin <kolomenkin@gmail.com>
|
||||
Sergey Ulanov <sergeyu@chromium.org>
|
||||
Shimon Doodkin <helpmepro1@gmail.com>
|
||||
Shunyao Li <shunyaoli@google.com>
|
||||
Stefan Holmer <holmer@google.com>
|
||||
Suman Sunkara <sunkaras@google.com>
|
||||
Taekhyun Kim <takim@nvidia.com>
|
||||
Takanori MATSUURA <t.matsuu@gmail.com>
|
||||
Tamar Levy <tamar.levy@intel.com>
|
||||
Tao Bai <michaelbai@chromium.org>
|
||||
Tero Rintaluoma <teror@google.com>
|
||||
Thijs Vermeir <thijsvermeir@gmail.com>
|
||||
Tim Kopp <tkopp@google.com>
|
||||
Timothy B. Terriberry <tterribe@xiph.org>
|
||||
Tom Finegan <tomfinegan@google.com>
|
||||
Vignesh Venkatasubramanian <vigneshv@google.com>
|
||||
Yaowu Xu <yaowu@google.com>
|
||||
Yi Luo <luoyi@google.com>
|
||||
Yongzhe Wang <yongzhe@google.com>
|
||||
Yunqing Wang <yunqingwang@google.com>
|
||||
Yury Gitman <yuryg@google.com>
|
||||
Zoe Liu <zoeliu@google.com>
|
||||
Google Inc.
|
||||
The Mozilla Foundation
|
||||
The Xiph.Org Foundation
|
|
@ -0,0 +1,654 @@
|
|||
2016-07-20 v1.6.0 "Khaki Campbell Duck"
|
||||
This release improves upon the VP9 encoder and speeds up the encoding and
|
||||
decoding processes.
|
||||
|
||||
- Upgrading:
|
||||
This release is ABI incompatible with 1.5.0 due to a new 'color_range' enum
|
||||
in vpx_image and some minor changes to the VP8_COMP structure.
|
||||
|
||||
The default key frame interval for VP9 has changed from 128 to 9999.
|
||||
|
||||
- Enhancement:
|
||||
A core focus has been performance for low end Intel processors. SSSE3
|
||||
instructions such as 'pshufb' have been avoided and instructions have been
|
||||
reordered to better accommodate the more constrained pipelines.
|
||||
|
||||
As a result, devices based on Celeron processors have seen substantial
|
||||
decoding improvements. From Indian Runner Duck to Javan Whistling Duck,
|
||||
decoding speed improved between 10 and 30%. Between Javan Whistling Duck
|
||||
and Khaki Campbell Duck, it improved another 10 to 15%.
|
||||
|
||||
While Celeron benefited most, Core-i5 also improved 5% and 10% between the
|
||||
respective releases.
|
||||
|
||||
Realtime performance for WebRTC for both speed and quality has received a
|
||||
lot of attention.
|
||||
|
||||
- Bug Fixes:
|
||||
A number of fuzzing issues, found variously by Mozilla, Chromium and others,
|
||||
have been fixed and we strongly recommend updating.
|
||||
|
||||
2015-11-09 v1.5.0 "Javan Whistling Duck"
|
||||
This release improves upon the VP9 encoder and speeds up the encoding and
|
||||
decoding processes.
|
||||
|
||||
- Upgrading:
|
||||
This release is ABI incompatible with 1.4.0. It drops deprecated VP8
|
||||
controls and adds a variety of VP9 controls for testing.
|
||||
|
||||
The vpxenc utility now prefers VP9 by default.
|
||||
|
||||
- Enhancements:
|
||||
Faster VP9 encoding and decoding
|
||||
Smaller library size by combining functions used by VP8 and VP9
|
||||
|
||||
- Bug Fixes:
|
||||
A variety of fuzzing issues
|
||||
|
||||
2015-04-03 v1.4.0 "Indian Runner Duck"
|
||||
This release includes significant improvements to the VP9 codec.
|
||||
|
||||
- Upgrading:
|
||||
This release is ABI incompatible with 1.3.0. It drops the compatibility
|
||||
layer, requiring VPX_IMG_FMT_* instead of IMG_FMT_*, and adds several codec
|
||||
controls for VP9.
|
||||
|
||||
- Enhancements:
|
||||
Faster VP9 encoding and decoding
|
||||
Multithreaded VP9 decoding (tile and frame-based)
|
||||
Multithreaded VP9 encoding - on by default
|
||||
YUV 4:2:2 and 4:4:4 support in VP9
|
||||
10 and 12bit support in VP9
|
||||
64bit ARM support by replacing ARM assembly with intrinsics
|
||||
|
||||
- Bug Fixes:
|
||||
Fixes a VP9 bitstream issue in Profile 1. This only affected non-YUV 4:2:0
|
||||
files.
|
||||
|
||||
- Known Issues:
|
||||
Frame Parallel decoding fails for segmented and non-420 files.
|
||||
|
||||
2013-11-15 v1.3.0 "Forest"
|
||||
This release introduces the VP9 codec in a backward-compatible way.
|
||||
All existing users of VP8 can continue to use the library without
|
||||
modification. However, some VP8 options do not map to VP9 in the same manner.
|
||||
|
||||
The VP9 encoder in this release is not feature complete. Users interested in
|
||||
the encoder are advised to use the git master branch and discuss issues on
|
||||
libvpx mailing lists.
|
||||
|
||||
- Upgrading:
|
||||
This release is ABI and API compatible with Duclair (v1.0.0). Users
|
||||
of older releases should refer to the Upgrading notes in this document
|
||||
for that release.
|
||||
|
||||
- Enhancements:
|
||||
Get rid of bashisms in the main build scripts
|
||||
Added usage info on command line options
|
||||
Add lossless compression mode
|
||||
Dll build of libvpx
|
||||
Add additional Mac OS X targets: 10.7, 10.8 and 10.9 (darwin11-13)
|
||||
Add option to disable documentation
|
||||
configure: add --enable-external-build support
|
||||
make: support V=1 as short form of verbose=yes
|
||||
configure: support mingw-w64
|
||||
configure: support hardfloat armv7 CHOSTS
|
||||
configure: add support for android x86
|
||||
Add estimated completion time to vpxenc
|
||||
Don't exit on decode errors in vpxenc
|
||||
vpxenc: support scaling prior to encoding
|
||||
vpxdec: support scaling output
|
||||
vpxenc: improve progress indicators with --skip
|
||||
msvs: Don't link to winmm.lib
|
||||
Add a new script for producing vcxproj files
|
||||
Produce Visual Studio 10 and 11 project files
|
||||
Produce Windows Phone project files
|
||||
msvs-build: use msbuild for vs >= 2005
|
||||
configure: default configure log to config.log
|
||||
Add encoding option --static-thresh
|
||||
|
||||
- Speed:
|
||||
Miscellaneous speed optimizations for VP8 and VP9.
|
||||
|
||||
- Quality:
|
||||
In general, quality is consistent with the Eider release.
|
||||
|
||||
- Bug Fixes:
|
||||
This release represents approximately a year of engineering effort,
|
||||
and contains multiple bug fixes. Please refer to git history for details.
|
||||
|
||||
|
||||
2012-12-21 v1.2.0
|
||||
This release acts as a checkpoint for a large amount of internal refactoring
|
||||
and testing. It also contains a number of small bugfixes, so all users are
|
||||
encouraged to upgrade.
|
||||
|
||||
- Upgrading:
|
||||
This release is ABI and API compatible with Duclair (v1.0.0). Users
|
||||
of older releases should refer to the Upgrading notes in this
|
||||
document for that release.
|
||||
|
||||
- Enhancements:
|
||||
VP8 optimizations for MIPS dspr2
|
||||
vpxenc: add -quiet option
|
||||
|
||||
- Speed:
|
||||
Encoder and decoder speed is consistent with the Eider release.
|
||||
|
||||
- Quality:
|
||||
In general, quality is consistent with the Eider release.
|
||||
|
||||
Minor tweaks to ARNR filtering
|
||||
Minor improvements to real time encoding with multiple temporal layers
|
||||
|
||||
- Bug Fixes:
|
||||
Fixes multithreaded encoder race condition in loopfilter
|
||||
Fixes multi-resolution threaded encoding
|
||||
Fix potential encoder dead-lock after picture resize
|
||||
|
||||
|
||||
2012-05-09 v1.1.0 "Eider"
|
||||
This introduces a number of enhancements, mostly focused on real-time
|
||||
encoding. In addition, it fixes a decoder bug (first introduced in
|
||||
Duclair) so all users of that release are encouraged to upgrade.
|
||||
|
||||
- Upgrading:
|
||||
This release is ABI and API compatible with Duclair (v1.0.0). Users
|
||||
of older releases should refer to the Upgrading notes in this
|
||||
document for that release.
|
||||
|
||||
This release introduces a new temporal denoiser, controlled by the
|
||||
VP8E_SET_NOISE_SENSITIVITY control. The temporal denoiser does not
|
||||
currently take a strength parameter, so the control is effectively
|
||||
a boolean - zero (off) or non-zero (on). For compatibility with
|
||||
existing applications, the values accepted are the same as those
|
||||
for the spatial denoiser (0-6). The temporal denoiser is enabled
|
||||
by default, and the older spatial denoiser may be restored by
|
||||
configuring with --disable-temporal-denoising. The temporal denoiser
|
||||
is more computationally intensive than the spatial one.
|
||||
|
||||
This release removes support for a legacy, decode only API that was
|
||||
supported, but deprecated, at the initial release of libvpx
|
||||
(v0.9.0). This is not expected to have any impact. If you are
|
||||
impacted, you can apply a reversion to commit 2bf8fb58 locally.
|
||||
Please update to the latest libvpx API if you are affected.
|
||||
|
||||
- Enhancements:
|
||||
Adds a motion compensated temporal denoiser to the encoder, which
|
||||
gives higher quality than the older spatial denoiser. (See above
|
||||
for notes on upgrading).
|
||||
|
||||
In addition, support for new compilers and platforms were added,
|
||||
including:
|
||||
improved support for XCode
|
||||
Android x86 NDK build
|
||||
OS/2 support
|
||||
SunCC support
|
||||
|
||||
Changing resolution with vpx_codec_enc_config_set() is now
|
||||
supported. Previously, reinitializing the codec was required to
|
||||
change the input resolution.
|
||||
|
||||
The vpxenc application has initial support for producing multiple
|
||||
encodes from the same input in one call. Resizing is not yet
|
||||
supported, but varying other codec parameters is. Use -- to
|
||||
delineate output streams. Options persist from one stream to the
|
||||
next.
|
||||
|
||||
Also, the vpxenc application will now use a keyframe interval of
|
||||
5 seconds by default. Use the --kf-max-dist option to override.
|
||||
|
||||
- Speed:
|
||||
Decoder performance improved 2.5% versus Duclair. Encoder speed is
|
||||
consistent with Duclair for most material. Two pass encoding of
|
||||
slideshow-like material will see significant improvements.
|
||||
|
||||
Large realtime encoding speed gains at a small quality expense are
|
||||
possible by configuring the on-the-fly bitpacking experiment with
|
||||
--enable-onthefly-bitpacking. Realtime encoder can be up to 13%
|
||||
faster (ARM) depending on the number of threads and bitrate
|
||||
settings. This technique sees constant gain over the 5-16 speed
|
||||
range. For VC style input the loss seen is up to 0.2dB. See commit
|
||||
52cf4dca for further details.
|
||||
|
||||
- Quality:
|
||||
On the whole, quality is consistent with the Duclair release. Some
|
||||
tweaks:
|
||||
|
||||
Reduced blockiness in easy sections by applying a penalty to
|
||||
intra modes.
|
||||
|
||||
Improved quality of static sections (like slideshows) with
|
||||
two pass encoding.
|
||||
|
||||
Improved keyframe sizing with multiple temporal layers
|
||||
|
||||
- Bug Fixes:
|
||||
Corrected alt-ref contribution to frame rate for visible updates
|
||||
to the alt-ref buffer. This affected applications making manual
|
||||
usage of the frame reference flags, or temporal layers.
|
||||
|
||||
Additional constraints were added to disable multi-frame quality
|
||||
enhancement (MFQE) in sections of the frame where there is motion.
|
||||
(#392)
|
||||
|
||||
Fixed corruption issues when vpx_codec_enc_config_set() was called
|
||||
with spatial resampling enabled.
|
||||
|
||||
Fixed a decoder error introduced in Duclair where the segmentation
|
||||
map was not being reinitialized on keyframes (#378)
|
||||
|
||||
|
||||
2012-01-27 v1.0.0 "Duclair"
|
||||
Our fourth named release, focused on performance and features related to
|
||||
real-time encoding. It also fixes a decoder crash bug introduced in
|
||||
v0.9.7, so all users of that release are encouraged to upgrade.
|
||||
|
||||
- Upgrading:
|
||||
This release is ABI incompatible with prior releases of libvpx, so the
|
||||
"major" version number has been bumped to 1. You must recompile your
|
||||
applications against the latest version of the libvpx headers. The
|
||||
API remains compatible, and this should not require code changes in most
|
||||
applications.
|
||||
|
||||
- Enhancements:
|
||||
This release introduces several substantial new features to the encoder,
|
||||
of particular interest to real time streaming applications.
|
||||
|
||||
Temporal scalability allows the encoder to produce a stream that can
|
||||
be decimated to different frame rates, with independent rate targetting
|
||||
for each substream.
|
||||
|
||||
Multiframe quality enhancement postprocessing can make visual quality
|
||||
more consistent in the presence of frames that are substantially
|
||||
different quality than the surrounding frames, as in the temporal
|
||||
scalability case and in some forced keyframe scenarios.
|
||||
|
||||
Multiple-resolution encoding support allows the encoding of the
|
||||
same content at different resolutions faster than encoding them
|
||||
separately.
|
||||
|
||||
- Speed:
|
||||
Optimization targets for this release included the decoder and the real-
|
||||
time modes of the encoder. Decoder speed on x86 has improved 10.5% with
|
||||
this release. Encoder improvements followed a curve where speeds 1-3
|
||||
improved 4.0%-1.5%, speeds 4-8 improved <1%, and speeds 9-16 improved
|
||||
1.5% to 10.5%, respectively. "Best" mode speed is consistent with the
|
||||
Cayuga release.
|
||||
|
||||
- Quality:
|
||||
Encoder quality in the single stream case is consistent with the Cayuga
|
||||
release.
|
||||
|
||||
- Bug Fixes:
|
||||
This release fixes an OOB read decoder crash bug present in v0.9.7
|
||||
related to the clamping of motion vectors in SPLITMV blocks. This
|
||||
behavior could be triggered by corrupt input or by starting
|
||||
decoding from a P-frame.
|
||||
|
||||
|
||||
2011-08-15 v0.9.7-p1 "Cayuga" patch 1
|
||||
This is an incremental bugfix release against Cayuga. All users of that
|
||||
release are strongly encouraged to upgrade.
|
||||
|
||||
- Fix potential OOB reads (cdae03a)
|
||||
|
||||
An unbounded out of bounds read was discovered when the
|
||||
decoder was requested to perform error concealment (new in
|
||||
Cayuga) given a frame with corrupt partition sizes.
|
||||
|
||||
A bounded out of bounds read was discovered affecting all
|
||||
versions of libvpx. Given an multipartition input frame that
|
||||
is truncated between the mode/mv partition and the first
|
||||
residiual paritition (in the block of partition offsets), up
|
||||
to 3 extra bytes could have been read from the source buffer.
|
||||
The code will not take any action regardless of the contents
|
||||
of these undefined bytes, as the truncated buffer is detected
|
||||
immediately following the read based on the calculated
|
||||
starting position of the coefficient partition.
|
||||
|
||||
- Fix potential error concealment crash when the very first frame
|
||||
is missing or corrupt (a609be5)
|
||||
|
||||
- Fix significant artifacts in error concealment (a4c2211, 99d870a)
|
||||
|
||||
- Revert 1-pass CBR rate control changes (e961317)
|
||||
Further testing showed this change produced undesirable visual
|
||||
artifacts, rolling back for now.
|
||||
|
||||
|
||||
2011-08-02 v0.9.7 "Cayuga"
|
||||
Our third named release, focused on a faster, higher quality, encoder.
|
||||
|
||||
- Upgrading:
|
||||
This release is backwards compatible with Aylesbury (v0.9.5) and
|
||||
Bali (v0.9.6). Users of older releases should refer to the Upgrading
|
||||
notes in this document for that release.
|
||||
|
||||
- Enhancements:
|
||||
Stereo 3D format support for vpxenc
|
||||
Runtime detection of available processor cores.
|
||||
Allow specifying --end-usage by enum name
|
||||
vpxdec: test for frame corruption
|
||||
vpxenc: add quantizer histogram display
|
||||
vpxenc: add rate histogram display
|
||||
Set VPX_FRAME_IS_DROPPABLE
|
||||
update configure for ios sdk 4.3
|
||||
Avoid text relocations in ARM vp8 decoder
|
||||
Generate a vpx.pc file for pkg-config.
|
||||
New ways of passing encoded data between encoder and decoder.
|
||||
|
||||
- Speed:
|
||||
This release includes across-the-board speed improvements to the
|
||||
encoder. On x86, these measure at approximately 11.5% in Best mode,
|
||||
21.5% in Good mode (speed 0), and 22.5% in Realtime mode (speed 6).
|
||||
On ARM Cortex A9 with Neon extensions, real-time encoding of video
|
||||
telephony content is 35% faster than Bali on single core and 48%
|
||||
faster on multi-core. On the NVidia Tegra2 platform, real time
|
||||
encoding is 40% faster than Bali.
|
||||
|
||||
Decoder speed was not a priority for this release, but improved
|
||||
approximately 8.4% on x86.
|
||||
|
||||
Reduce motion vector search on alt-ref frame.
|
||||
Encoder loopfilter running in its own thread
|
||||
Reworked loopfilter to precalculate more parameters
|
||||
SSE2/SSSE3 optimizations for build_predictors_mbuv{,_s}().
|
||||
Make hor UV predict ~2x faster (73 vs 132 cycles) using SSSE3.
|
||||
Removed redundant checks
|
||||
Reduced structure sizes
|
||||
utilize preload in ARMv6 MC/LPF/Copy routines
|
||||
ARM optimized quantization, dfct, variance, subtract
|
||||
Increase chrow row alignment to 16 bytes.
|
||||
disable trellis optimization for first pass
|
||||
Write SSSE3 sub-pixel filter function
|
||||
Improve SSE2 half-pixel filter funtions
|
||||
Add vp8_sub_pixel_variance16x8_ssse3 function
|
||||
Reduce unnecessary distortion computation
|
||||
Use diamond search to replace full search
|
||||
Preload reference area in sub-pixel motion search (real-time mode)
|
||||
|
||||
- Quality:
|
||||
This release focused primarily on one-pass use cases, including
|
||||
video conferencing. Low latency data rate control was significantly
|
||||
improved, improving streamability over bandwidth constrained links.
|
||||
Added support for error concealment, allowing frames to maintain
|
||||
visual quality in the presence of substantial packet loss.
|
||||
|
||||
Add rc_max_intra_bitrate_pct control
|
||||
Limit size of initial keyframe in one-pass.
|
||||
Improve framerate adaptation
|
||||
Improved 1-pass CBR rate control
|
||||
Improved KF insertion after fades to still.
|
||||
Improved key frame detection.
|
||||
Improved activity masking (lower PSNR impact for same SSIM boost)
|
||||
Improved interaction between GF and ARFs
|
||||
Adding error-concealment to the decoder.
|
||||
Adding support for independent partitions
|
||||
Adjusted rate-distortion constants
|
||||
|
||||
|
||||
- Bug Fixes:
|
||||
Removed firstpass motion map
|
||||
Fix parallel make install
|
||||
Fix multithreaded encoding for 1 MB wide frame
|
||||
Fixed iwalsh_neon build problems with RVDS4.1
|
||||
Fix semaphore emulation, spin-wait intrinsics on Windows
|
||||
Fix build with xcode4 and simplify GLOBAL.
|
||||
Mark ARM asm objects as allowing a non-executable stack.
|
||||
Fix vpxenc encoding incorrect webm file header on big endian
|
||||
|
||||
|
||||
2011-03-07 v0.9.6 "Bali"
|
||||
Our second named release, focused on a faster, higher quality, encoder.
|
||||
|
||||
- Upgrading:
|
||||
This release is backwards compatible with Aylesbury (v0.9.5). Users
|
||||
of older releases should refer to the Upgrading notes in this
|
||||
document for that release.
|
||||
|
||||
- Enhancements:
|
||||
vpxenc --psnr shows a summary when encode completes
|
||||
--tune=ssim option to enable activity masking
|
||||
improved postproc visualizations for development
|
||||
updated support for Apple iOS to SDK 4.2
|
||||
query decoder to determine which reference frames were updated
|
||||
implemented error tracking in the decoder
|
||||
fix pipe support on windows
|
||||
|
||||
- Speed:
|
||||
Primary focus was on good quality mode, speed 0. Average improvement
|
||||
on x86 about 40%, up to 100% on user-generated content at that speed.
|
||||
Best quality mode speed improved 35%, and realtime speed 10-20%. This
|
||||
release also saw significant improvement in realtime encoding speed
|
||||
on ARM platforms.
|
||||
|
||||
Improved encoder threading
|
||||
Dont pick encoder filter level when loopfilter is disabled.
|
||||
Avoid double copying of key frames into alt and golden buffer
|
||||
FDCT optimizations.
|
||||
x86 sse2 temporal filter
|
||||
SSSE3 version of fast quantizer
|
||||
vp8_rd_pick_best_mbsegmentation code restructure
|
||||
Adjusted breakout RD for SPLITMV
|
||||
Changed segmentation check order
|
||||
Improved rd_pick_intra4x4block
|
||||
Adds armv6 optimized variance calculation
|
||||
ARMv6 optimized sad16x16
|
||||
ARMv6 optimized half pixel variance calculations
|
||||
Full search SAD function optimization in SSE4.1
|
||||
Improve MV prediction accuracy to achieve performance gain
|
||||
Improve MV prediction in vp8_pick_inter_mode() for speed>3
|
||||
|
||||
- Quality:
|
||||
Best quality mode improved PSNR 6.3%, and SSIM 6.1%. This release
|
||||
also includes support for "activity masking," which greatly improves
|
||||
SSIM at the expense of PSNR. For now, this feature is available with
|
||||
the --tune=ssim option. Further experimentation in this area
|
||||
is ongoing. This release also introduces a new rate control mode
|
||||
called "CQ," which changes the allocation of bits within a clip to
|
||||
the sections where they will have the most visual impact.
|
||||
|
||||
Tuning for the more exact quantizer.
|
||||
Relax rate control for last few frames
|
||||
CQ Mode
|
||||
Limit key frame quantizer for forced key frames.
|
||||
KF/GF Pulsing
|
||||
Add simple version of activity masking.
|
||||
make rdmult adaptive for intra in quantizer RDO
|
||||
cap the best quantizer for 2nd order DC
|
||||
change the threshold of DC check for encode breakout
|
||||
|
||||
- Bug Fixes:
|
||||
Fix crash on Sparc Solaris.
|
||||
Fix counter of fixed keyframe distance
|
||||
ARNR filter pointer update bug fix
|
||||
Fixed use of motion percentage in KF/GF group calc
|
||||
Changed condition for using RD in Intra Mode
|
||||
Fix encoder real-time only configuration.
|
||||
Fix ARM encoder crash with multiple token partitions
|
||||
Fixed bug first cluster timecode of webm file is wrong.
|
||||
Fixed various encoder bugs with odd-sized images
|
||||
vp8e_get_preview fixed when spatial resampling enabled
|
||||
quantizer: fix assertion in fast quantizer path
|
||||
Allocate source buffers to be multiples of 16
|
||||
Fix for manual Golden frame frequency
|
||||
Fix drastic undershoot in long form content
|
||||
|
||||
|
||||
2010-10-28 v0.9.5 "Aylesbury"
|
||||
Our first named release, focused on a faster decoder, and a better encoder.
|
||||
|
||||
- Upgrading:
|
||||
This release incorporates backwards-incompatible changes to the
|
||||
ivfenc and ivfdec tools. These tools are now called vpxenc and vpxdec.
|
||||
|
||||
vpxdec
|
||||
* the -q (quiet) option has been removed, and replaced with
|
||||
-v (verbose). the output is quiet by default. Use -v to see
|
||||
the version number of the binary.
|
||||
|
||||
* The default behavior is now to write output to a single file
|
||||
instead of individual frames. The -y option has been removed.
|
||||
Y4M output is the default.
|
||||
|
||||
* For raw I420/YV12 output instead of Y4M, the --i420 or --yv12
|
||||
options must be specified.
|
||||
|
||||
$ ivfdec -o OUTPUT INPUT
|
||||
$ vpxdec --i420 -o OUTPUT INPUT
|
||||
|
||||
* If an output file is not specified, the default is to write
|
||||
Y4M to stdout. This makes piping more natural.
|
||||
|
||||
$ ivfdec -y -o - INPUT | ...
|
||||
$ vpxdec INPUT | ...
|
||||
|
||||
* The output file has additional flexibility for formatting the
|
||||
filename. It supports escape characters for constructing a
|
||||
filename from the width, height, and sequence number. This
|
||||
replaces the -p option. To get the equivalent:
|
||||
|
||||
$ ivfdec -p frame INPUT
|
||||
$ vpxdec --i420 -o frame-%wx%h-%4.i420 INPUT
|
||||
|
||||
vpxenc
|
||||
* The output file must be specified with -o, rather than as the
|
||||
last argument.
|
||||
|
||||
$ ivfenc <options> INPUT OUTPUT
|
||||
$ vpxenc <options> -o OUTPUT INPUT
|
||||
|
||||
* The output defaults to webm. To get IVF output, use the --ivf
|
||||
option.
|
||||
|
||||
$ ivfenc <options> INPUT OUTPUT.ivf
|
||||
$ vpxenc <options> -o OUTPUT.ivf --ivf INPUT
|
||||
|
||||
|
||||
- Enhancements:
|
||||
ivfenc and ivfdec have been renamed to vpxenc, vpxdec.
|
||||
vpxdec supports .webm input
|
||||
vpxdec writes .y4m by default
|
||||
vpxenc writes .webm output by default
|
||||
vpxenc --psnr now shows the average/overall PSNR at the end
|
||||
ARM platforms now support runtime cpu detection
|
||||
vpxdec visualizations added for motion vectors, block modes, references
|
||||
vpxdec now silent by default
|
||||
vpxdec --progress shows frame-by-frame timing information
|
||||
vpxenc supports the distinction between --fps and --timebase
|
||||
NASM is now a supported assembler
|
||||
configure: enable PIC for shared libs by default
|
||||
configure: add --enable-small
|
||||
configure: support for ppc32-linux-gcc
|
||||
configure: support for sparc-solaris-gcc
|
||||
|
||||
- Bugs:
|
||||
Improve handling of invalid frames
|
||||
Fix valgrind errors in the NEON loop filters.
|
||||
Fix loopfilter delta zero transitions
|
||||
Fix valgrind errors in vp8_sixtap_predict8x4_armv6().
|
||||
Build fixes for darwin-icc
|
||||
|
||||
- Speed:
|
||||
20-40% (average 28%) improvement in libvpx decoder speed,
|
||||
including:
|
||||
Rewrite vp8_short_walsh4x4_sse2()
|
||||
Optimizations on the loopfilters.
|
||||
Miscellaneous improvements for Atom
|
||||
Add 4-tap version of 2nd-pass ARMv6 MC filter.
|
||||
Improved multithread utilization
|
||||
Better instruction choices on x86
|
||||
reorder data to use wider instructions
|
||||
Update NEON wide idcts
|
||||
Make block access to frame buffer sequential
|
||||
Improved subset block search
|
||||
Bilinear subpixel optimizations for ssse3.
|
||||
Decrease memory footprint
|
||||
|
||||
Encoder speed improvements (percentage gain not measured):
|
||||
Skip unnecessary search of identical frames
|
||||
Add SSE2 subtract functions
|
||||
Improve bounds checking in vp8_diamond_search_sadx4()
|
||||
Added vp8_fast_quantize_b_sse2
|
||||
|
||||
- Quality:
|
||||
Over 7% overall PSNR improvement (6.3% SSIM) in "best" quality
|
||||
encoding mode, and up to 60% improvement on very noisy, still
|
||||
or slow moving source video
|
||||
|
||||
Motion compensated temporal filter for Alt-Ref Noise Reduction
|
||||
Improved use of trellis quantization on 2nd order Y blocks
|
||||
Tune effect of motion on KF/GF boost in two pass
|
||||
Allow coefficient optimization for good quality speed 0.
|
||||
Improved control of active min quantizer for two pass.
|
||||
Enable ARFs for non-lagged compress
|
||||
|
||||
2010-09-02 v0.9.2
|
||||
- Enhancements:
|
||||
Disable frame dropping by default
|
||||
Improved multithreaded performance
|
||||
Improved Force Key Frame Behaviour
|
||||
Increased rate control buffer level precision
|
||||
Fix bug in 1st pass motion compensation
|
||||
ivfenc: correct fixed kf interval, --disable-kf
|
||||
- Speed:
|
||||
Changed above and left context data layout
|
||||
Rework idct calling structure.
|
||||
Removed unnecessary MB_MODE_INFO copies
|
||||
x86: SSSE3 sixtap prediction
|
||||
Reworked IDCT to include reconstruction (add) step
|
||||
Swap alt/gold/new/last frame buffer ptrs instead of copying.
|
||||
Improve SSE2 loopfilter functions
|
||||
Change bitreader to use a larger window.
|
||||
Avoid loopfilter reinitialization when possible
|
||||
- Quality:
|
||||
Normalize quantizer's zero bin and rounding factors
|
||||
Add trellis quantization.
|
||||
Make the quantizer exact.
|
||||
Updates to ARNR filtering algorithm
|
||||
Fix breakout thresh computation for golden & AltRef frames
|
||||
Redo the forward 4x4 dct
|
||||
Improve the accuracy of forward walsh-hadamard transform
|
||||
Further adjustment of RD behaviour with Q and Zbin.
|
||||
- Build System:
|
||||
Allow linking of libs built with MinGW to MSVC
|
||||
Fix target auto-detection on mingw32
|
||||
Allow --cpu= to work for x86.
|
||||
configure: pass original arguments through to make dist
|
||||
Fix builds without runtime CPU detection
|
||||
msvs: fix install of codec sources
|
||||
msvs: Change devenv.com command line for better msys support
|
||||
msvs: Add vs9 targets.
|
||||
Add x86_64-linux-icc target
|
||||
- Bugs:
|
||||
Potential crashes on older MinGW builds
|
||||
Fix two-pass framrate for Y4M input.
|
||||
Fixed simple loop filter, other crashes on ARM v6
|
||||
arm: fix missing dependency with --enable-shared
|
||||
configure: support directories containing .o
|
||||
Replace pinsrw (SSE) with MMX instructions
|
||||
apple: include proper mach primatives
|
||||
Fixed rate control bug with long key frame interval.
|
||||
Fix DSO link errors on x86-64 when not using a version script
|
||||
Fixed buffer selection for UV in AltRef filtering
|
||||
|
||||
|
||||
2010-06-17 v0.9.1
|
||||
- Enhancements:
|
||||
* ivfenc/ivfdec now support YUV4MPEG2 input and pipe I/O
|
||||
* Speed optimizations
|
||||
- Bugfixes:
|
||||
* Rate control
|
||||
* Prevent out-of-bounds accesses on invalid data
|
||||
- Build system updates:
|
||||
* Detect toolchain to be used automatically for native builds
|
||||
* Support building shared libraries
|
||||
* Better autotools emulation (--prefix, --libdir, DESTDIR)
|
||||
- Updated LICENSE
|
||||
* http://webmproject.blogspot.com/2010/06/changes-to-webm-open-source-license.html
|
||||
|
||||
|
||||
2010-05-18 v0.9.0
|
||||
- Initial open source release. Welcome to WebM and VP8!
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
Copyright (c) 2010, The WebM Project authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
* Neither the name of Google, nor the WebM Project, nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
Additional IP Rights Grant (Patents)
|
||||
------------------------------------
|
||||
|
||||
"These implementations" means the copyrightable works that implement the WebM
|
||||
codecs distributed by Google as part of the WebM Project.
|
||||
|
||||
Google hereby grants to you a perpetual, worldwide, non-exclusive, no-charge,
|
||||
royalty-free, irrevocable (except as stated in this section) patent license to
|
||||
make, have made, use, offer to sell, sell, import, transfer, and otherwise
|
||||
run, modify and propagate the contents of these implementations of WebM, where
|
||||
such license applies only to those patent claims, both currently owned by
|
||||
Google and acquired in the future, licensable by Google that are necessarily
|
||||
infringed by these implementations of WebM. This grant does not include claims
|
||||
that would be infringed only as a consequence of further modification of these
|
||||
implementations. If you or your agent or exclusive licensee institute or order
|
||||
or agree to the institution of patent litigation or any other patent
|
||||
enforcement activity against any entity (including a cross-claim or
|
||||
counterclaim in a lawsuit) alleging that any of these implementations of WebM
|
||||
or any code incorporated within any of these implementations of WebM
|
||||
constitute direct or contributory patent infringement, or inducement of
|
||||
patent infringement, then any patent rights granted to you under this License
|
||||
for these implementations of WebM shall terminate as of the date such
|
||||
litigation is filed.
|
|
@ -0,0 +1,18 @@
|
|||
Copyright (C) 2005-2012 x264 project
|
||||
|
||||
Authors: Loren Merritt <lorenm@u.washington.edu>
|
||||
Anton Mitrofanov <BugMaster@narod.ru>
|
||||
Jason Garrett-Glaser <darkshikari@gmail.com>
|
||||
Henrik Gramner <hengar-6@student.ltu.se>
|
||||
|
||||
Permission to use, copy, modify, and/or distribute this software for any
|
||||
purpose with or without fee is hereby granted, provided that the above
|
||||
copyright notice and this permission notice appear in all copies.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
@ -0,0 +1,20 @@
|
|||
URL: https://git.videolan.org/git/x264.git
|
||||
Version: d23d18655249944c1ca894b451e2c82c7a584c62
|
||||
License: ISC
|
||||
License File: LICENSE
|
||||
|
||||
Description:
|
||||
x264/libav's framework for x86 assembly. Contains a variety of macros and
|
||||
defines that help automatically allow assembly to work cross-platform.
|
||||
|
||||
Local Modifications:
|
||||
Get configuration from vpx_config.asm.
|
||||
Prefix functions with vpx by default.
|
||||
Manage name mangling (prefixing with '_') manually because 'PREFIX' does not
|
||||
exist in libvpx.
|
||||
Expand PIC default to macho64 and respect CONFIG_PIC from libvpx
|
||||
Set 'private_extern' visibility for macho targets.
|
||||
Copy PIC 'GLOBAL' macros from x86_abi_support.asm
|
||||
Use .text instead of .rodata on macho to avoid broken tables in PIC mode.
|
||||
Use .text with no alignment for aout
|
||||
Only use 'hidden' visibility with Chromium
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,190 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "alloccommon.h"
|
||||
#include "blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "onyxc_int.h"
|
||||
#include "findnearmv.h"
|
||||
#include "entropymode.h"
|
||||
#include "systemdependent.h"
|
||||
|
||||
void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < NUM_YV12_BUFFERS; i++)
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]);
|
||||
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame);
|
||||
#if CONFIG_POSTPROC
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
|
||||
if (oci->post_proc_buffer_int_used)
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int);
|
||||
|
||||
vpx_free(oci->pp_limits_buffer);
|
||||
oci->pp_limits_buffer = NULL;
|
||||
#endif
|
||||
|
||||
vpx_free(oci->above_context);
|
||||
vpx_free(oci->mip);
|
||||
#if CONFIG_ERROR_CONCEALMENT
|
||||
vpx_free(oci->prev_mip);
|
||||
oci->prev_mip = NULL;
|
||||
#endif
|
||||
|
||||
oci->above_context = NULL;
|
||||
oci->mip = NULL;
|
||||
}
|
||||
|
||||
int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
|
||||
{
|
||||
int i;
|
||||
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
|
||||
/* our internal buffers are always multiples of 16 */
|
||||
if ((width & 0xf) != 0)
|
||||
width += 16 - (width & 0xf);
|
||||
|
||||
if ((height & 0xf) != 0)
|
||||
height += 16 - (height & 0xf);
|
||||
|
||||
|
||||
for (i = 0; i < NUM_YV12_BUFFERS; i++)
|
||||
{
|
||||
oci->fb_idx_ref_cnt[i] = 0;
|
||||
oci->yv12_fb[i].flags = 0;
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0)
|
||||
goto allocation_fail;
|
||||
}
|
||||
|
||||
oci->new_fb_idx = 0;
|
||||
oci->lst_fb_idx = 1;
|
||||
oci->gld_fb_idx = 2;
|
||||
oci->alt_fb_idx = 3;
|
||||
|
||||
oci->fb_idx_ref_cnt[0] = 1;
|
||||
oci->fb_idx_ref_cnt[1] = 1;
|
||||
oci->fb_idx_ref_cnt[2] = 1;
|
||||
oci->fb_idx_ref_cnt[3] = 1;
|
||||
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0)
|
||||
goto allocation_fail;
|
||||
|
||||
oci->mb_rows = height >> 4;
|
||||
oci->mb_cols = width >> 4;
|
||||
oci->MBs = oci->mb_rows * oci->mb_cols;
|
||||
oci->mode_info_stride = oci->mb_cols + 1;
|
||||
oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));
|
||||
|
||||
if (!oci->mip)
|
||||
goto allocation_fail;
|
||||
|
||||
oci->mi = oci->mip + oci->mode_info_stride + 1;
|
||||
|
||||
/* Allocation of previous mode info will be done in vp8_decode_frame()
|
||||
* as it is a decoder only data */
|
||||
|
||||
oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
|
||||
|
||||
if (!oci->above_context)
|
||||
goto allocation_fail;
|
||||
|
||||
#if CONFIG_POSTPROC
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0)
|
||||
goto allocation_fail;
|
||||
|
||||
oci->post_proc_buffer_int_used = 0;
|
||||
memset(&oci->postproc_state, 0, sizeof(oci->postproc_state));
|
||||
memset(oci->post_proc_buffer.buffer_alloc, 128,
|
||||
oci->post_proc_buffer.frame_size);
|
||||
|
||||
/* Allocate buffer to store post-processing filter coefficients.
|
||||
*
|
||||
* Note: Round up mb_cols to support SIMD reads
|
||||
*/
|
||||
oci->pp_limits_buffer = vpx_memalign(16, 24 * ((oci->mb_cols + 1) & ~1));
|
||||
if (!oci->pp_limits_buffer)
|
||||
goto allocation_fail;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
|
||||
allocation_fail:
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return 1;
|
||||
}
|
||||
|
||||
void vp8_setup_version(VP8_COMMON *cm)
|
||||
{
|
||||
switch (cm->version)
|
||||
{
|
||||
case 0:
|
||||
cm->no_lpf = 0;
|
||||
cm->filter_type = NORMAL_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 0;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
case 1:
|
||||
cm->no_lpf = 0;
|
||||
cm->filter_type = SIMPLE_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
case 2:
|
||||
cm->no_lpf = 1;
|
||||
cm->filter_type = NORMAL_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
case 3:
|
||||
cm->no_lpf = 1;
|
||||
cm->filter_type = SIMPLE_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->full_pixel = 1;
|
||||
break;
|
||||
default:
|
||||
/*4,5,6,7 are reserved for future use*/
|
||||
cm->no_lpf = 0;
|
||||
cm->filter_type = NORMAL_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 0;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
void vp8_create_common(VP8_COMMON *oci)
|
||||
{
|
||||
vp8_machine_specific_config(oci);
|
||||
|
||||
vp8_init_mbmode_probs(oci);
|
||||
vp8_default_bmode_probs(oci->fc.bmode_prob);
|
||||
|
||||
oci->mb_no_coeff_skip = 1;
|
||||
oci->no_lpf = 0;
|
||||
oci->filter_type = NORMAL_LOOPFILTER;
|
||||
oci->use_bilinear_mc_filter = 0;
|
||||
oci->full_pixel = 0;
|
||||
oci->multi_token_partition = ONE_PARTITION;
|
||||
oci->clamp_type = RECON_CLAMP_REQUIRED;
|
||||
|
||||
/* Initialize reference frame sign bias structure to defaults */
|
||||
memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
|
||||
|
||||
/* Default disable buffer to buffer copying */
|
||||
oci->copy_buffer_to_gf = 0;
|
||||
oci->copy_buffer_to_arf = 0;
|
||||
}
|
||||
|
||||
void vp8_remove_common(VP8_COMMON *oci)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ALLOCCOMMON_H_
|
||||
#define VP8_COMMON_ALLOCCOMMON_H_
|
||||
|
||||
#include "onyxc_int.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void vp8_create_common(VP8_COMMON *oci);
|
||||
void vp8_remove_common(VP8_COMMON *oci);
|
||||
void vp8_de_alloc_frame_buffers(VP8_COMMON *oci);
|
||||
int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height);
|
||||
void vp8_setup_version(VP8_COMMON *oci);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_ALLOCCOMMON_H_
|
|
@ -0,0 +1,181 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vp8/common/loopfilter.h"
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
|
||||
#define prototype_loopfilter(sym) \
|
||||
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
|
||||
const unsigned char *limit, const unsigned char *thresh, int count)
|
||||
|
||||
#if HAVE_MEDIA
|
||||
extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_armv6);
|
||||
#endif
|
||||
|
||||
#if HAVE_NEON
|
||||
typedef void loopfilter_y_neon(unsigned char *src, int pitch,
|
||||
unsigned char blimit, unsigned char limit, unsigned char thresh);
|
||||
typedef void loopfilter_uv_neon(unsigned char *u, int pitch,
|
||||
unsigned char blimit, unsigned char limit, unsigned char thresh,
|
||||
unsigned char *v);
|
||||
|
||||
extern loopfilter_y_neon vp8_loop_filter_horizontal_edge_y_neon;
|
||||
extern loopfilter_y_neon vp8_loop_filter_vertical_edge_y_neon;
|
||||
extern loopfilter_uv_neon vp8_loop_filter_horizontal_edge_uv_neon;
|
||||
extern loopfilter_uv_neon vp8_loop_filter_vertical_edge_uv_neon;
|
||||
|
||||
extern loopfilter_y_neon vp8_mbloop_filter_horizontal_edge_y_neon;
|
||||
extern loopfilter_y_neon vp8_mbloop_filter_vertical_edge_y_neon;
|
||||
extern loopfilter_uv_neon vp8_mbloop_filter_horizontal_edge_uv_neon;
|
||||
extern loopfilter_uv_neon vp8_mbloop_filter_vertical_edge_uv_neon;
|
||||
#endif
|
||||
|
||||
#if HAVE_MEDIA
|
||||
/* ARMV6/MEDIA loopfilter functions*/
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, blimit);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_NEON
|
||||
/* NEON loopfilter functions */
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
unsigned char mblim = *lfi->mblim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
unsigned char mblim = *lfi->mblim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
|
||||
vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
unsigned char blim = *lfi->blim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, blim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, blim, lim, hev_thr, v_ptr + 4 * uv_stride);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
unsigned char blim = *lfi->blim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, blim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, blim, lim, hev_thr, v_ptr + 4);
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,591 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
static const uint8_t bifilter4_coeff[8][2] = {
|
||||
{128, 0},
|
||||
{112, 16},
|
||||
{ 96, 32},
|
||||
{ 80, 48},
|
||||
{ 64, 64},
|
||||
{ 48, 80},
|
||||
{ 32, 96},
|
||||
{ 16, 112}
|
||||
};
|
||||
|
||||
void vp8_bilinear_predict8x4_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8;
|
||||
uint8x8_t d7u8, d9u8, d11u8, d22u8, d23u8, d24u8, d25u8, d26u8;
|
||||
uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8;
|
||||
uint16x8_t q1u16, q2u16, q3u16, q4u16;
|
||||
uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16;
|
||||
|
||||
if (xoffset == 0) { // skip_1stpass_filter
|
||||
d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d26u8 = vld1_u8(src_ptr);
|
||||
} else {
|
||||
q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q5u8 = vld1q_u8(src_ptr);
|
||||
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
|
||||
|
||||
q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
|
||||
q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
|
||||
q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
|
||||
q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
|
||||
q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
|
||||
|
||||
d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1);
|
||||
d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1);
|
||||
d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
|
||||
d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
|
||||
d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
|
||||
|
||||
q6u16 = vmlal_u8(q6u16, d3u8, d1u8);
|
||||
q7u16 = vmlal_u8(q7u16, d5u8, d1u8);
|
||||
q8u16 = vmlal_u8(q8u16, d7u8, d1u8);
|
||||
q9u16 = vmlal_u8(q9u16, d9u8, d1u8);
|
||||
q10u16 = vmlal_u8(q10u16, d11u8, d1u8);
|
||||
|
||||
d22u8 = vqrshrn_n_u16(q6u16, 7);
|
||||
d23u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d24u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
d25u8 = vqrshrn_n_u16(q9u16, 7);
|
||||
d26u8 = vqrshrn_n_u16(q10u16, 7);
|
||||
}
|
||||
|
||||
// secondpass_filter
|
||||
if (yoffset == 0) { // skip_2ndpass_filter
|
||||
vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d25u8);
|
||||
} else {
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
|
||||
|
||||
q1u16 = vmull_u8(d22u8, d0u8);
|
||||
q2u16 = vmull_u8(d23u8, d0u8);
|
||||
q3u16 = vmull_u8(d24u8, d0u8);
|
||||
q4u16 = vmull_u8(d25u8, d0u8);
|
||||
|
||||
q1u16 = vmlal_u8(q1u16, d23u8, d1u8);
|
||||
q2u16 = vmlal_u8(q2u16, d24u8, d1u8);
|
||||
q3u16 = vmlal_u8(q3u16, d25u8, d1u8);
|
||||
q4u16 = vmlal_u8(q4u16, d26u8, d1u8);
|
||||
|
||||
d2u8 = vqrshrn_n_u16(q1u16, 7);
|
||||
d3u8 = vqrshrn_n_u16(q2u16, 7);
|
||||
d4u8 = vqrshrn_n_u16(q3u16, 7);
|
||||
d5u8 = vqrshrn_n_u16(q4u16, 7);
|
||||
|
||||
vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d5u8);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x8_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8, d11u8;
|
||||
uint8x8_t d22u8, d23u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8, d30u8;
|
||||
uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8;
|
||||
uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16;
|
||||
uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16;
|
||||
|
||||
if (xoffset == 0) { // skip_1stpass_filter
|
||||
d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d26u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d27u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d28u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d29u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d30u8 = vld1_u8(src_ptr);
|
||||
} else {
|
||||
q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
|
||||
|
||||
q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
|
||||
q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
|
||||
q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
|
||||
q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
|
||||
|
||||
d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1);
|
||||
d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1);
|
||||
d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
|
||||
d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
|
||||
|
||||
q6u16 = vmlal_u8(q6u16, d3u8, d1u8);
|
||||
q7u16 = vmlal_u8(q7u16, d5u8, d1u8);
|
||||
q8u16 = vmlal_u8(q8u16, d7u8, d1u8);
|
||||
q9u16 = vmlal_u8(q9u16, d9u8, d1u8);
|
||||
|
||||
d22u8 = vqrshrn_n_u16(q6u16, 7);
|
||||
d23u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d24u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
d25u8 = vqrshrn_n_u16(q9u16, 7);
|
||||
|
||||
// first_pass filtering on the rest 5-line data
|
||||
q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q5u8 = vld1q_u8(src_ptr);
|
||||
|
||||
q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8);
|
||||
q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8);
|
||||
q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8);
|
||||
q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8);
|
||||
q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8);
|
||||
|
||||
d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1);
|
||||
d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1);
|
||||
d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1);
|
||||
d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1);
|
||||
d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1);
|
||||
|
||||
q6u16 = vmlal_u8(q6u16, d3u8, d1u8);
|
||||
q7u16 = vmlal_u8(q7u16, d5u8, d1u8);
|
||||
q8u16 = vmlal_u8(q8u16, d7u8, d1u8);
|
||||
q9u16 = vmlal_u8(q9u16, d9u8, d1u8);
|
||||
q10u16 = vmlal_u8(q10u16, d11u8, d1u8);
|
||||
|
||||
d26u8 = vqrshrn_n_u16(q6u16, 7);
|
||||
d27u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d28u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
d29u8 = vqrshrn_n_u16(q9u16, 7);
|
||||
d30u8 = vqrshrn_n_u16(q10u16, 7);
|
||||
}
|
||||
|
||||
// secondpass_filter
|
||||
if (yoffset == 0) { // skip_2ndpass_filter
|
||||
vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d25u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d26u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d27u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d28u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d29u8);
|
||||
} else {
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
|
||||
|
||||
q1u16 = vmull_u8(d22u8, d0u8);
|
||||
q2u16 = vmull_u8(d23u8, d0u8);
|
||||
q3u16 = vmull_u8(d24u8, d0u8);
|
||||
q4u16 = vmull_u8(d25u8, d0u8);
|
||||
q5u16 = vmull_u8(d26u8, d0u8);
|
||||
q6u16 = vmull_u8(d27u8, d0u8);
|
||||
q7u16 = vmull_u8(d28u8, d0u8);
|
||||
q8u16 = vmull_u8(d29u8, d0u8);
|
||||
|
||||
q1u16 = vmlal_u8(q1u16, d23u8, d1u8);
|
||||
q2u16 = vmlal_u8(q2u16, d24u8, d1u8);
|
||||
q3u16 = vmlal_u8(q3u16, d25u8, d1u8);
|
||||
q4u16 = vmlal_u8(q4u16, d26u8, d1u8);
|
||||
q5u16 = vmlal_u8(q5u16, d27u8, d1u8);
|
||||
q6u16 = vmlal_u8(q6u16, d28u8, d1u8);
|
||||
q7u16 = vmlal_u8(q7u16, d29u8, d1u8);
|
||||
q8u16 = vmlal_u8(q8u16, d30u8, d1u8);
|
||||
|
||||
d2u8 = vqrshrn_n_u16(q1u16, 7);
|
||||
d3u8 = vqrshrn_n_u16(q2u16, 7);
|
||||
d4u8 = vqrshrn_n_u16(q3u16, 7);
|
||||
d5u8 = vqrshrn_n_u16(q4u16, 7);
|
||||
d6u8 = vqrshrn_n_u16(q5u16, 7);
|
||||
d7u8 = vqrshrn_n_u16(q6u16, 7);
|
||||
d8u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d9u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
|
||||
vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d5u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d6u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d7u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d8u8); dst_ptr += dst_pitch;
|
||||
vst1_u8((uint8_t *)dst_ptr, d9u8);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict16x16_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
int i;
|
||||
unsigned char tmp[272];
|
||||
unsigned char *tmpp;
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8;
|
||||
uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d16u8, d17u8, d18u8;
|
||||
uint8x8_t d19u8, d20u8, d21u8;
|
||||
uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8;
|
||||
uint8x16_t q11u8, q12u8, q13u8, q14u8, q15u8;
|
||||
uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16, q6u16, q7u16, q8u16;
|
||||
uint16x8_t q9u16, q10u16, q11u16, q12u16, q13u16, q14u16;
|
||||
|
||||
if (xoffset == 0) { // secondpass_bfilter16x16_only
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
|
||||
|
||||
q11u8 = vld1q_u8(src_ptr);
|
||||
src_ptr += src_pixels_per_line;
|
||||
for (i = 4; i > 0; i--) {
|
||||
q12u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q13u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q14u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
q15u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
|
||||
q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8);
|
||||
q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8);
|
||||
q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8);
|
||||
q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8);
|
||||
q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8);
|
||||
q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8);
|
||||
q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8);
|
||||
q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8);
|
||||
|
||||
q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8);
|
||||
q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8);
|
||||
q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8);
|
||||
q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8);
|
||||
q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8);
|
||||
q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8);
|
||||
q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8);
|
||||
q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8);
|
||||
|
||||
d2u8 = vqrshrn_n_u16(q1u16, 7);
|
||||
d3u8 = vqrshrn_n_u16(q2u16, 7);
|
||||
d4u8 = vqrshrn_n_u16(q3u16, 7);
|
||||
d5u8 = vqrshrn_n_u16(q4u16, 7);
|
||||
d6u8 = vqrshrn_n_u16(q5u16, 7);
|
||||
d7u8 = vqrshrn_n_u16(q6u16, 7);
|
||||
d8u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d9u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
|
||||
q1u8 = vcombine_u8(d2u8, d3u8);
|
||||
q2u8 = vcombine_u8(d4u8, d5u8);
|
||||
q3u8 = vcombine_u8(d6u8, d7u8);
|
||||
q4u8 = vcombine_u8(d8u8, d9u8);
|
||||
|
||||
q11u8 = q15u8;
|
||||
|
||||
vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (yoffset == 0) { // firstpass_bfilter16x16_only
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
|
||||
|
||||
for (i = 4; i > 0 ; i--) {
|
||||
d2u8 = vld1_u8(src_ptr);
|
||||
d3u8 = vld1_u8(src_ptr + 8);
|
||||
d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d5u8 = vld1_u8(src_ptr);
|
||||
d6u8 = vld1_u8(src_ptr + 8);
|
||||
d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d8u8 = vld1_u8(src_ptr);
|
||||
d9u8 = vld1_u8(src_ptr + 8);
|
||||
d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d11u8 = vld1_u8(src_ptr);
|
||||
d12u8 = vld1_u8(src_ptr + 8);
|
||||
d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
|
||||
q7u16 = vmull_u8(d2u8, d0u8);
|
||||
q8u16 = vmull_u8(d3u8, d0u8);
|
||||
q9u16 = vmull_u8(d5u8, d0u8);
|
||||
q10u16 = vmull_u8(d6u8, d0u8);
|
||||
q11u16 = vmull_u8(d8u8, d0u8);
|
||||
q12u16 = vmull_u8(d9u8, d0u8);
|
||||
q13u16 = vmull_u8(d11u8, d0u8);
|
||||
q14u16 = vmull_u8(d12u8, d0u8);
|
||||
|
||||
d2u8 = vext_u8(d2u8, d3u8, 1);
|
||||
d5u8 = vext_u8(d5u8, d6u8, 1);
|
||||
d8u8 = vext_u8(d8u8, d9u8, 1);
|
||||
d11u8 = vext_u8(d11u8, d12u8, 1);
|
||||
|
||||
q7u16 = vmlal_u8(q7u16, d2u8, d1u8);
|
||||
q9u16 = vmlal_u8(q9u16, d5u8, d1u8);
|
||||
q11u16 = vmlal_u8(q11u16, d8u8, d1u8);
|
||||
q13u16 = vmlal_u8(q13u16, d11u8, d1u8);
|
||||
|
||||
d3u8 = vext_u8(d3u8, d4u8, 1);
|
||||
d6u8 = vext_u8(d6u8, d7u8, 1);
|
||||
d9u8 = vext_u8(d9u8, d10u8, 1);
|
||||
d12u8 = vext_u8(d12u8, d13u8, 1);
|
||||
|
||||
q8u16 = vmlal_u8(q8u16, d3u8, d1u8);
|
||||
q10u16 = vmlal_u8(q10u16, d6u8, d1u8);
|
||||
q12u16 = vmlal_u8(q12u16, d9u8, d1u8);
|
||||
q14u16 = vmlal_u8(q14u16, d12u8, d1u8);
|
||||
|
||||
d14u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d15u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
d16u8 = vqrshrn_n_u16(q9u16, 7);
|
||||
d17u8 = vqrshrn_n_u16(q10u16, 7);
|
||||
d18u8 = vqrshrn_n_u16(q11u16, 7);
|
||||
d19u8 = vqrshrn_n_u16(q12u16, 7);
|
||||
d20u8 = vqrshrn_n_u16(q13u16, 7);
|
||||
d21u8 = vqrshrn_n_u16(q14u16, 7);
|
||||
|
||||
q7u8 = vcombine_u8(d14u8, d15u8);
|
||||
q8u8 = vcombine_u8(d16u8, d17u8);
|
||||
q9u8 = vcombine_u8(d18u8, d19u8);
|
||||
q10u8 =vcombine_u8(d20u8, d21u8);
|
||||
|
||||
vst1q_u8((uint8_t *)dst_ptr, q7u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q8u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q9u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q10u8); dst_ptr += dst_pitch;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
|
||||
|
||||
d2u8 = vld1_u8(src_ptr);
|
||||
d3u8 = vld1_u8(src_ptr + 8);
|
||||
d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d5u8 = vld1_u8(src_ptr);
|
||||
d6u8 = vld1_u8(src_ptr + 8);
|
||||
d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d8u8 = vld1_u8(src_ptr);
|
||||
d9u8 = vld1_u8(src_ptr + 8);
|
||||
d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d11u8 = vld1_u8(src_ptr);
|
||||
d12u8 = vld1_u8(src_ptr + 8);
|
||||
d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
|
||||
// First Pass: output_height lines x output_width columns (17x16)
|
||||
tmpp = tmp;
|
||||
for (i = 3; i > 0; i--) {
|
||||
q7u16 = vmull_u8(d2u8, d0u8);
|
||||
q8u16 = vmull_u8(d3u8, d0u8);
|
||||
q9u16 = vmull_u8(d5u8, d0u8);
|
||||
q10u16 = vmull_u8(d6u8, d0u8);
|
||||
q11u16 = vmull_u8(d8u8, d0u8);
|
||||
q12u16 = vmull_u8(d9u8, d0u8);
|
||||
q13u16 = vmull_u8(d11u8, d0u8);
|
||||
q14u16 = vmull_u8(d12u8, d0u8);
|
||||
|
||||
d2u8 = vext_u8(d2u8, d3u8, 1);
|
||||
d5u8 = vext_u8(d5u8, d6u8, 1);
|
||||
d8u8 = vext_u8(d8u8, d9u8, 1);
|
||||
d11u8 = vext_u8(d11u8, d12u8, 1);
|
||||
|
||||
q7u16 = vmlal_u8(q7u16, d2u8, d1u8);
|
||||
q9u16 = vmlal_u8(q9u16, d5u8, d1u8);
|
||||
q11u16 = vmlal_u8(q11u16, d8u8, d1u8);
|
||||
q13u16 = vmlal_u8(q13u16, d11u8, d1u8);
|
||||
|
||||
d3u8 = vext_u8(d3u8, d4u8, 1);
|
||||
d6u8 = vext_u8(d6u8, d7u8, 1);
|
||||
d9u8 = vext_u8(d9u8, d10u8, 1);
|
||||
d12u8 = vext_u8(d12u8, d13u8, 1);
|
||||
|
||||
q8u16 = vmlal_u8(q8u16, d3u8, d1u8);
|
||||
q10u16 = vmlal_u8(q10u16, d6u8, d1u8);
|
||||
q12u16 = vmlal_u8(q12u16, d9u8, d1u8);
|
||||
q14u16 = vmlal_u8(q14u16, d12u8, d1u8);
|
||||
|
||||
d14u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d15u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
d16u8 = vqrshrn_n_u16(q9u16, 7);
|
||||
d17u8 = vqrshrn_n_u16(q10u16, 7);
|
||||
d18u8 = vqrshrn_n_u16(q11u16, 7);
|
||||
d19u8 = vqrshrn_n_u16(q12u16, 7);
|
||||
d20u8 = vqrshrn_n_u16(q13u16, 7);
|
||||
d21u8 = vqrshrn_n_u16(q14u16, 7);
|
||||
|
||||
d2u8 = vld1_u8(src_ptr);
|
||||
d3u8 = vld1_u8(src_ptr + 8);
|
||||
d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d5u8 = vld1_u8(src_ptr);
|
||||
d6u8 = vld1_u8(src_ptr + 8);
|
||||
d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d8u8 = vld1_u8(src_ptr);
|
||||
d9u8 = vld1_u8(src_ptr + 8);
|
||||
d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
d11u8 = vld1_u8(src_ptr);
|
||||
d12u8 = vld1_u8(src_ptr + 8);
|
||||
d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
|
||||
q7u8 = vcombine_u8(d14u8, d15u8);
|
||||
q8u8 = vcombine_u8(d16u8, d17u8);
|
||||
q9u8 = vcombine_u8(d18u8, d19u8);
|
||||
q10u8 = vcombine_u8(d20u8, d21u8);
|
||||
|
||||
vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16;
|
||||
vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16;
|
||||
vst1q_u8((uint8_t *)tmpp, q9u8); tmpp += 16;
|
||||
vst1q_u8((uint8_t *)tmpp, q10u8); tmpp += 16;
|
||||
}
|
||||
|
||||
// First-pass filtering for rest 5 lines
|
||||
d14u8 = vld1_u8(src_ptr);
|
||||
d15u8 = vld1_u8(src_ptr + 8);
|
||||
d16u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line;
|
||||
|
||||
q9u16 = vmull_u8(d2u8, d0u8);
|
||||
q10u16 = vmull_u8(d3u8, d0u8);
|
||||
q11u16 = vmull_u8(d5u8, d0u8);
|
||||
q12u16 = vmull_u8(d6u8, d0u8);
|
||||
q13u16 = vmull_u8(d8u8, d0u8);
|
||||
q14u16 = vmull_u8(d9u8, d0u8);
|
||||
|
||||
d2u8 = vext_u8(d2u8, d3u8, 1);
|
||||
d5u8 = vext_u8(d5u8, d6u8, 1);
|
||||
d8u8 = vext_u8(d8u8, d9u8, 1);
|
||||
|
||||
q9u16 = vmlal_u8(q9u16, d2u8, d1u8);
|
||||
q11u16 = vmlal_u8(q11u16, d5u8, d1u8);
|
||||
q13u16 = vmlal_u8(q13u16, d8u8, d1u8);
|
||||
|
||||
d3u8 = vext_u8(d3u8, d4u8, 1);
|
||||
d6u8 = vext_u8(d6u8, d7u8, 1);
|
||||
d9u8 = vext_u8(d9u8, d10u8, 1);
|
||||
|
||||
q10u16 = vmlal_u8(q10u16, d3u8, d1u8);
|
||||
q12u16 = vmlal_u8(q12u16, d6u8, d1u8);
|
||||
q14u16 = vmlal_u8(q14u16, d9u8, d1u8);
|
||||
|
||||
q1u16 = vmull_u8(d11u8, d0u8);
|
||||
q2u16 = vmull_u8(d12u8, d0u8);
|
||||
q3u16 = vmull_u8(d14u8, d0u8);
|
||||
q4u16 = vmull_u8(d15u8, d0u8);
|
||||
|
||||
d11u8 = vext_u8(d11u8, d12u8, 1);
|
||||
d14u8 = vext_u8(d14u8, d15u8, 1);
|
||||
|
||||
q1u16 = vmlal_u8(q1u16, d11u8, d1u8);
|
||||
q3u16 = vmlal_u8(q3u16, d14u8, d1u8);
|
||||
|
||||
d12u8 = vext_u8(d12u8, d13u8, 1);
|
||||
d15u8 = vext_u8(d15u8, d16u8, 1);
|
||||
|
||||
q2u16 = vmlal_u8(q2u16, d12u8, d1u8);
|
||||
q4u16 = vmlal_u8(q4u16, d15u8, d1u8);
|
||||
|
||||
d10u8 = vqrshrn_n_u16(q9u16, 7);
|
||||
d11u8 = vqrshrn_n_u16(q10u16, 7);
|
||||
d12u8 = vqrshrn_n_u16(q11u16, 7);
|
||||
d13u8 = vqrshrn_n_u16(q12u16, 7);
|
||||
d14u8 = vqrshrn_n_u16(q13u16, 7);
|
||||
d15u8 = vqrshrn_n_u16(q14u16, 7);
|
||||
d16u8 = vqrshrn_n_u16(q1u16, 7);
|
||||
d17u8 = vqrshrn_n_u16(q2u16, 7);
|
||||
d18u8 = vqrshrn_n_u16(q3u16, 7);
|
||||
d19u8 = vqrshrn_n_u16(q4u16, 7);
|
||||
|
||||
q5u8 = vcombine_u8(d10u8, d11u8);
|
||||
q6u8 = vcombine_u8(d12u8, d13u8);
|
||||
q7u8 = vcombine_u8(d14u8, d15u8);
|
||||
q8u8 = vcombine_u8(d16u8, d17u8);
|
||||
q9u8 = vcombine_u8(d18u8, d19u8);
|
||||
|
||||
vst1q_u8((uint8_t *)tmpp, q5u8); tmpp += 16;
|
||||
vst1q_u8((uint8_t *)tmpp, q6u8); tmpp += 16;
|
||||
vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16;
|
||||
vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16;
|
||||
vst1q_u8((uint8_t *)tmpp, q9u8);
|
||||
|
||||
// secondpass_filter
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
|
||||
|
||||
tmpp = tmp;
|
||||
q11u8 = vld1q_u8(tmpp);
|
||||
tmpp += 16;
|
||||
for (i = 4; i > 0; i--) {
|
||||
q12u8 = vld1q_u8(tmpp); tmpp += 16;
|
||||
q13u8 = vld1q_u8(tmpp); tmpp += 16;
|
||||
q14u8 = vld1q_u8(tmpp); tmpp += 16;
|
||||
q15u8 = vld1q_u8(tmpp); tmpp += 16;
|
||||
|
||||
q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8);
|
||||
q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8);
|
||||
q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8);
|
||||
q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8);
|
||||
q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8);
|
||||
q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8);
|
||||
q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8);
|
||||
q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8);
|
||||
|
||||
q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8);
|
||||
q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8);
|
||||
q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8);
|
||||
q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8);
|
||||
q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8);
|
||||
q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8);
|
||||
q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8);
|
||||
q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8);
|
||||
|
||||
d2u8 = vqrshrn_n_u16(q1u16, 7);
|
||||
d3u8 = vqrshrn_n_u16(q2u16, 7);
|
||||
d4u8 = vqrshrn_n_u16(q3u16, 7);
|
||||
d5u8 = vqrshrn_n_u16(q4u16, 7);
|
||||
d6u8 = vqrshrn_n_u16(q5u16, 7);
|
||||
d7u8 = vqrshrn_n_u16(q6u16, 7);
|
||||
d8u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d9u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
|
||||
q1u8 = vcombine_u8(d2u8, d3u8);
|
||||
q2u8 = vcombine_u8(d4u8, d5u8);
|
||||
q3u8 = vcombine_u8(d6u8, d7u8);
|
||||
q4u8 = vcombine_u8(d8u8, d9u8);
|
||||
|
||||
q11u8 = q15u8;
|
||||
|
||||
vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch;
|
||||
vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch;
|
||||
}
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
void vp8_copy_mem8x4_neon(
|
||||
unsigned char *src,
|
||||
int src_stride,
|
||||
unsigned char *dst,
|
||||
int dst_stride) {
|
||||
uint8x8_t vtmp;
|
||||
int r;
|
||||
|
||||
for (r = 0; r < 4; r++) {
|
||||
vtmp = vld1_u8(src);
|
||||
vst1_u8(dst, vtmp);
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_copy_mem8x8_neon(
|
||||
unsigned char *src,
|
||||
int src_stride,
|
||||
unsigned char *dst,
|
||||
int dst_stride) {
|
||||
uint8x8_t vtmp;
|
||||
int r;
|
||||
|
||||
for (r = 0; r < 8; r++) {
|
||||
vtmp = vld1_u8(src);
|
||||
vst1_u8(dst, vtmp);
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_copy_mem16x16_neon(
|
||||
unsigned char *src,
|
||||
int src_stride,
|
||||
unsigned char *dst,
|
||||
int dst_stride) {
|
||||
int r;
|
||||
uint8x16_t qtmp;
|
||||
|
||||
for (r = 0; r < 16; r++) {
|
||||
qtmp = vld1q_u8(src);
|
||||
vst1q_u8(dst, qtmp);
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
void vp8_dc_only_idct_add_neon(
|
||||
int16_t input_dc,
|
||||
unsigned char *pred_ptr,
|
||||
int pred_stride,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_stride) {
|
||||
int i;
|
||||
uint16_t a1 = ((input_dc + 4) >> 3);
|
||||
uint32x2_t d2u32 = vdup_n_u32(0);
|
||||
uint8x8_t d2u8;
|
||||
uint16x8_t q1u16;
|
||||
uint16x8_t qAdd;
|
||||
|
||||
qAdd = vdupq_n_u16(a1);
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 0);
|
||||
pred_ptr += pred_stride;
|
||||
d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 1);
|
||||
pred_ptr += pred_stride;
|
||||
|
||||
q1u16 = vaddw_u8(qAdd, vreinterpret_u8_u32(d2u32));
|
||||
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
|
||||
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0);
|
||||
dst_ptr += dst_stride;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1);
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,142 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
static const int16_t cospi8sqrt2minus1 = 20091;
|
||||
static const int16_t sinpi8sqrt2 = 35468;
|
||||
|
||||
void vp8_dequant_idct_add_neon(
|
||||
int16_t *input,
|
||||
int16_t *dq,
|
||||
unsigned char *dst,
|
||||
int stride) {
|
||||
unsigned char *dst0;
|
||||
int32x2_t d14, d15;
|
||||
int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
|
||||
int16x8_t q1, q2, q3, q4, q5, q6;
|
||||
int16x8_t qEmpty = vdupq_n_s16(0);
|
||||
int32x2x2_t d2tmp0, d2tmp1;
|
||||
int16x4x2_t d2tmp2, d2tmp3;
|
||||
|
||||
d14 = d15 = vdup_n_s32(0);
|
||||
|
||||
// load input
|
||||
q3 = vld1q_s16(input);
|
||||
vst1q_s16(input, qEmpty);
|
||||
input += 8;
|
||||
q4 = vld1q_s16(input);
|
||||
vst1q_s16(input, qEmpty);
|
||||
|
||||
// load dq
|
||||
q5 = vld1q_s16(dq);
|
||||
dq += 8;
|
||||
q6 = vld1q_s16(dq);
|
||||
|
||||
// load src from dst
|
||||
dst0 = dst;
|
||||
d14 = vld1_lane_s32((const int32_t *)dst0, d14, 0);
|
||||
dst0 += stride;
|
||||
d14 = vld1_lane_s32((const int32_t *)dst0, d14, 1);
|
||||
dst0 += stride;
|
||||
d15 = vld1_lane_s32((const int32_t *)dst0, d15, 0);
|
||||
dst0 += stride;
|
||||
d15 = vld1_lane_s32((const int32_t *)dst0, d15, 1);
|
||||
|
||||
q1 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q3),
|
||||
vreinterpretq_u16_s16(q5)));
|
||||
q2 = vreinterpretq_s16_u16(vmulq_u16(vreinterpretq_u16_s16(q4),
|
||||
vreinterpretq_u16_s16(q6)));
|
||||
|
||||
d12 = vqadd_s16(vget_low_s16(q1), vget_low_s16(q2));
|
||||
d13 = vqsub_s16(vget_low_s16(q1), vget_low_s16(q2));
|
||||
|
||||
q2 = vcombine_s16(vget_high_s16(q1), vget_high_s16(q2));
|
||||
|
||||
q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
|
||||
q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
|
||||
|
||||
q3 = vshrq_n_s16(q3, 1);
|
||||
q4 = vshrq_n_s16(q4, 1);
|
||||
|
||||
q3 = vqaddq_s16(q3, q2);
|
||||
q4 = vqaddq_s16(q4, q2);
|
||||
|
||||
d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
|
||||
d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
|
||||
|
||||
d2 = vqadd_s16(d12, d11);
|
||||
d3 = vqadd_s16(d13, d10);
|
||||
d4 = vqsub_s16(d13, d10);
|
||||
d5 = vqsub_s16(d12, d11);
|
||||
|
||||
d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
|
||||
d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
|
||||
d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
|
||||
vreinterpret_s16_s32(d2tmp1.val[0]));
|
||||
d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
|
||||
vreinterpret_s16_s32(d2tmp1.val[1]));
|
||||
|
||||
// loop 2
|
||||
q2 = vcombine_s16(d2tmp2.val[1], d2tmp3.val[1]);
|
||||
|
||||
q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2);
|
||||
q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1);
|
||||
|
||||
d12 = vqadd_s16(d2tmp2.val[0], d2tmp3.val[0]);
|
||||
d13 = vqsub_s16(d2tmp2.val[0], d2tmp3.val[0]);
|
||||
|
||||
q3 = vshrq_n_s16(q3, 1);
|
||||
q4 = vshrq_n_s16(q4, 1);
|
||||
|
||||
q3 = vqaddq_s16(q3, q2);
|
||||
q4 = vqaddq_s16(q4, q2);
|
||||
|
||||
d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4));
|
||||
d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4));
|
||||
|
||||
d2 = vqadd_s16(d12, d11);
|
||||
d3 = vqadd_s16(d13, d10);
|
||||
d4 = vqsub_s16(d13, d10);
|
||||
d5 = vqsub_s16(d12, d11);
|
||||
|
||||
d2 = vrshr_n_s16(d2, 3);
|
||||
d3 = vrshr_n_s16(d3, 3);
|
||||
d4 = vrshr_n_s16(d4, 3);
|
||||
d5 = vrshr_n_s16(d5, 3);
|
||||
|
||||
d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
|
||||
d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
|
||||
d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]),
|
||||
vreinterpret_s16_s32(d2tmp1.val[0]));
|
||||
d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]),
|
||||
vreinterpret_s16_s32(d2tmp1.val[1]));
|
||||
|
||||
q1 = vcombine_s16(d2tmp2.val[0], d2tmp2.val[1]);
|
||||
q2 = vcombine_s16(d2tmp3.val[0], d2tmp3.val[1]);
|
||||
|
||||
q1 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q1),
|
||||
vreinterpret_u8_s32(d14)));
|
||||
q2 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2),
|
||||
vreinterpret_u8_s32(d15)));
|
||||
|
||||
d14 = vreinterpret_s32_u8(vqmovun_s16(q1));
|
||||
d15 = vreinterpret_s32_u8(vqmovun_s16(q2));
|
||||
|
||||
dst0 = dst;
|
||||
vst1_lane_s32((int32_t *)dst0, d14, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d14, 1);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d15, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d15, 1);
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "vp8/common/blockd.h"
|
||||
|
||||
void vp8_dequantize_b_neon(BLOCKD *d, short *DQC) {
|
||||
int16x8x2_t qQ, qDQC, qDQ;
|
||||
|
||||
qQ = vld2q_s16(d->qcoeff);
|
||||
qDQC = vld2q_s16(DQC);
|
||||
|
||||
qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]);
|
||||
qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]);
|
||||
|
||||
vst2q_s16(d->dqcoeff, qDQ);
|
||||
}
|
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
|
||||
/* place these declarations here because we don't want to maintain them
|
||||
* outside of this scope
|
||||
*/
|
||||
void idct_dequant_full_2x_neon(short *q, short *dq,
|
||||
unsigned char *dst, int stride);
|
||||
void idct_dequant_0_2x_neon(short *q, short dq,
|
||||
unsigned char *dst, int stride);
|
||||
|
||||
|
||||
void vp8_dequant_idct_add_y_block_neon(short *q, short *dq,
|
||||
unsigned char *dst,
|
||||
int stride, char *eobs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (((short *)(eobs))[0])
|
||||
{
|
||||
if (((short *)eobs)[0] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dst, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dst, stride);
|
||||
}
|
||||
|
||||
if (((short *)(eobs))[1])
|
||||
{
|
||||
if (((short *)eobs)[1] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q+32, dq, dst+8, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q+32, dq[0], dst+8, stride);
|
||||
}
|
||||
q += 64;
|
||||
dst += 4*stride;
|
||||
eobs += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq,
|
||||
unsigned char *dstu,
|
||||
unsigned char *dstv,
|
||||
int stride, char *eobs)
|
||||
{
|
||||
if (((short *)(eobs))[0])
|
||||
{
|
||||
if (((short *)eobs)[0] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dstu, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dstu, stride);
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstu += 4*stride;
|
||||
|
||||
if (((short *)(eobs))[1])
|
||||
{
|
||||
if (((short *)eobs)[1] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dstu, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dstu, stride);
|
||||
}
|
||||
|
||||
q += 32;
|
||||
|
||||
if (((short *)(eobs))[2])
|
||||
{
|
||||
if (((short *)eobs)[2] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dstv, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dstv, stride);
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstv += 4*stride;
|
||||
|
||||
if (((short *)(eobs))[3])
|
||||
{
|
||||
if (((short *)eobs)[3] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dstv, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dstv, stride);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
void idct_dequant_0_2x_neon(
|
||||
int16_t *q,
|
||||
int16_t dq,
|
||||
unsigned char *dst,
|
||||
int stride) {
|
||||
unsigned char *dst0;
|
||||
int i, a0, a1;
|
||||
int16x8x2_t q2Add;
|
||||
int32x2_t d2s32 = vdup_n_s32(0),
|
||||
d4s32 = vdup_n_s32(0);
|
||||
uint8x8_t d2u8, d4u8;
|
||||
uint16x8_t q1u16, q2u16;
|
||||
|
||||
a0 = ((q[0] * dq) + 4) >> 3;
|
||||
a1 = ((q[16] * dq) + 4) >> 3;
|
||||
q[0] = q[16] = 0;
|
||||
q2Add.val[0] = vdupq_n_s16((int16_t)a0);
|
||||
q2Add.val[1] = vdupq_n_s16((int16_t)a1);
|
||||
|
||||
for (i = 0; i < 2; i++, dst += 4) {
|
||||
dst0 = dst;
|
||||
d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 0);
|
||||
dst0 += stride;
|
||||
d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 1);
|
||||
dst0 += stride;
|
||||
d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 0);
|
||||
dst0 += stride;
|
||||
d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 1);
|
||||
|
||||
q1u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
|
||||
vreinterpret_u8_s32(d2s32));
|
||||
q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]),
|
||||
vreinterpret_u8_s32(d4s32));
|
||||
|
||||
d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
|
||||
d4u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16));
|
||||
|
||||
d2s32 = vreinterpret_s32_u8(d2u8);
|
||||
d4s32 = vreinterpret_s32_u8(d4u8);
|
||||
|
||||
dst0 = dst;
|
||||
vst1_lane_s32((int32_t *)dst0, d2s32, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d2s32, 1);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d4s32, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d4s32, 1);
|
||||
}
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,185 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
static const int16_t cospi8sqrt2minus1 = 20091;
|
||||
static const int16_t sinpi8sqrt2 = 17734;
|
||||
// because the lowest bit in 0x8a8c is 0, we can pre-shift this
|
||||
|
||||
void idct_dequant_full_2x_neon(
|
||||
int16_t *q,
|
||||
int16_t *dq,
|
||||
unsigned char *dst,
|
||||
int stride) {
|
||||
unsigned char *dst0, *dst1;
|
||||
int32x2_t d28, d29, d30, d31;
|
||||
int16x8_t q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11;
|
||||
int16x8_t qEmpty = vdupq_n_s16(0);
|
||||
int32x4x2_t q2tmp0, q2tmp1;
|
||||
int16x8x2_t q2tmp2, q2tmp3;
|
||||
int16x4_t dLow0, dLow1, dHigh0, dHigh1;
|
||||
|
||||
d28 = d29 = d30 = d31 = vdup_n_s32(0);
|
||||
|
||||
// load dq
|
||||
q0 = vld1q_s16(dq);
|
||||
dq += 8;
|
||||
q1 = vld1q_s16(dq);
|
||||
|
||||
// load q
|
||||
q2 = vld1q_s16(q);
|
||||
vst1q_s16(q, qEmpty);
|
||||
q += 8;
|
||||
q3 = vld1q_s16(q);
|
||||
vst1q_s16(q, qEmpty);
|
||||
q += 8;
|
||||
q4 = vld1q_s16(q);
|
||||
vst1q_s16(q, qEmpty);
|
||||
q += 8;
|
||||
q5 = vld1q_s16(q);
|
||||
vst1q_s16(q, qEmpty);
|
||||
|
||||
// load src from dst
|
||||
dst0 = dst;
|
||||
dst1 = dst + 4;
|
||||
d28 = vld1_lane_s32((const int32_t *)dst0, d28, 0);
|
||||
dst0 += stride;
|
||||
d28 = vld1_lane_s32((const int32_t *)dst1, d28, 1);
|
||||
dst1 += stride;
|
||||
d29 = vld1_lane_s32((const int32_t *)dst0, d29, 0);
|
||||
dst0 += stride;
|
||||
d29 = vld1_lane_s32((const int32_t *)dst1, d29, 1);
|
||||
dst1 += stride;
|
||||
|
||||
d30 = vld1_lane_s32((const int32_t *)dst0, d30, 0);
|
||||
dst0 += stride;
|
||||
d30 = vld1_lane_s32((const int32_t *)dst1, d30, 1);
|
||||
dst1 += stride;
|
||||
d31 = vld1_lane_s32((const int32_t *)dst0, d31, 0);
|
||||
d31 = vld1_lane_s32((const int32_t *)dst1, d31, 1);
|
||||
|
||||
q2 = vmulq_s16(q2, q0);
|
||||
q3 = vmulq_s16(q3, q1);
|
||||
q4 = vmulq_s16(q4, q0);
|
||||
q5 = vmulq_s16(q5, q1);
|
||||
|
||||
// vswp
|
||||
dLow0 = vget_low_s16(q2);
|
||||
dHigh0 = vget_high_s16(q2);
|
||||
dLow1 = vget_low_s16(q4);
|
||||
dHigh1 = vget_high_s16(q4);
|
||||
q2 = vcombine_s16(dLow0, dLow1);
|
||||
q4 = vcombine_s16(dHigh0, dHigh1);
|
||||
|
||||
dLow0 = vget_low_s16(q3);
|
||||
dHigh0 = vget_high_s16(q3);
|
||||
dLow1 = vget_low_s16(q5);
|
||||
dHigh1 = vget_high_s16(q5);
|
||||
q3 = vcombine_s16(dLow0, dLow1);
|
||||
q5 = vcombine_s16(dHigh0, dHigh1);
|
||||
|
||||
q6 = vqdmulhq_n_s16(q4, sinpi8sqrt2);
|
||||
q7 = vqdmulhq_n_s16(q5, sinpi8sqrt2);
|
||||
q8 = vqdmulhq_n_s16(q4, cospi8sqrt2minus1);
|
||||
q9 = vqdmulhq_n_s16(q5, cospi8sqrt2minus1);
|
||||
|
||||
q10 = vqaddq_s16(q2, q3);
|
||||
q11 = vqsubq_s16(q2, q3);
|
||||
|
||||
q8 = vshrq_n_s16(q8, 1);
|
||||
q9 = vshrq_n_s16(q9, 1);
|
||||
|
||||
q4 = vqaddq_s16(q4, q8);
|
||||
q5 = vqaddq_s16(q5, q9);
|
||||
|
||||
q2 = vqsubq_s16(q6, q5);
|
||||
q3 = vqaddq_s16(q7, q4);
|
||||
|
||||
q4 = vqaddq_s16(q10, q3);
|
||||
q5 = vqaddq_s16(q11, q2);
|
||||
q6 = vqsubq_s16(q11, q2);
|
||||
q7 = vqsubq_s16(q10, q3);
|
||||
|
||||
q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
|
||||
q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
|
||||
q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
|
||||
vreinterpretq_s16_s32(q2tmp1.val[0]));
|
||||
q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
|
||||
vreinterpretq_s16_s32(q2tmp1.val[1]));
|
||||
|
||||
// loop 2
|
||||
q8 = vqdmulhq_n_s16(q2tmp2.val[1], sinpi8sqrt2);
|
||||
q9 = vqdmulhq_n_s16(q2tmp3.val[1], sinpi8sqrt2);
|
||||
q10 = vqdmulhq_n_s16(q2tmp2.val[1], cospi8sqrt2minus1);
|
||||
q11 = vqdmulhq_n_s16(q2tmp3.val[1], cospi8sqrt2minus1);
|
||||
|
||||
q2 = vqaddq_s16(q2tmp2.val[0], q2tmp3.val[0]);
|
||||
q3 = vqsubq_s16(q2tmp2.val[0], q2tmp3.val[0]);
|
||||
|
||||
q10 = vshrq_n_s16(q10, 1);
|
||||
q11 = vshrq_n_s16(q11, 1);
|
||||
|
||||
q10 = vqaddq_s16(q2tmp2.val[1], q10);
|
||||
q11 = vqaddq_s16(q2tmp3.val[1], q11);
|
||||
|
||||
q8 = vqsubq_s16(q8, q11);
|
||||
q9 = vqaddq_s16(q9, q10);
|
||||
|
||||
q4 = vqaddq_s16(q2, q9);
|
||||
q5 = vqaddq_s16(q3, q8);
|
||||
q6 = vqsubq_s16(q3, q8);
|
||||
q7 = vqsubq_s16(q2, q9);
|
||||
|
||||
q4 = vrshrq_n_s16(q4, 3);
|
||||
q5 = vrshrq_n_s16(q5, 3);
|
||||
q6 = vrshrq_n_s16(q6, 3);
|
||||
q7 = vrshrq_n_s16(q7, 3);
|
||||
|
||||
q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6));
|
||||
q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7));
|
||||
q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]),
|
||||
vreinterpretq_s16_s32(q2tmp1.val[0]));
|
||||
q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]),
|
||||
vreinterpretq_s16_s32(q2tmp1.val[1]));
|
||||
|
||||
q4 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[0]),
|
||||
vreinterpret_u8_s32(d28)));
|
||||
q5 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[1]),
|
||||
vreinterpret_u8_s32(d29)));
|
||||
q6 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[0]),
|
||||
vreinterpret_u8_s32(d30)));
|
||||
q7 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[1]),
|
||||
vreinterpret_u8_s32(d31)));
|
||||
|
||||
d28 = vreinterpret_s32_u8(vqmovun_s16(q4));
|
||||
d29 = vreinterpret_s32_u8(vqmovun_s16(q5));
|
||||
d30 = vreinterpret_s32_u8(vqmovun_s16(q6));
|
||||
d31 = vreinterpret_s32_u8(vqmovun_s16(q7));
|
||||
|
||||
dst0 = dst;
|
||||
dst1 = dst + 4;
|
||||
vst1_lane_s32((int32_t *)dst0, d28, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst1, d28, 1);
|
||||
dst1 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d29, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst1, d29, 1);
|
||||
dst1 += stride;
|
||||
|
||||
vst1_lane_s32((int32_t *)dst0, d30, 0);
|
||||
dst0 += stride;
|
||||
vst1_lane_s32((int32_t *)dst1, d30, 1);
|
||||
dst1 += stride;
|
||||
vst1_lane_s32((int32_t *)dst0, d31, 0);
|
||||
vst1_lane_s32((int32_t *)dst1, d31, 1);
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,102 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
void vp8_short_inv_walsh4x4_neon(
|
||||
int16_t *input,
|
||||
int16_t *mb_dqcoeff) {
|
||||
int16x8_t q0s16, q1s16, q2s16, q3s16;
|
||||
int16x4_t d4s16, d5s16, d6s16, d7s16;
|
||||
int16x4x2_t v2tmp0, v2tmp1;
|
||||
int32x2x2_t v2tmp2, v2tmp3;
|
||||
int16x8_t qAdd3;
|
||||
|
||||
q0s16 = vld1q_s16(input);
|
||||
q1s16 = vld1q_s16(input + 8);
|
||||
|
||||
// 1st for loop
|
||||
d4s16 = vadd_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
|
||||
d6s16 = vadd_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
|
||||
d5s16 = vsub_s16(vget_low_s16(q0s16), vget_high_s16(q1s16));
|
||||
d7s16 = vsub_s16(vget_high_s16(q0s16), vget_low_s16(q1s16));
|
||||
|
||||
q2s16 = vcombine_s16(d4s16, d5s16);
|
||||
q3s16 = vcombine_s16(d6s16, d7s16);
|
||||
|
||||
q0s16 = vaddq_s16(q2s16, q3s16);
|
||||
q1s16 = vsubq_s16(q2s16, q3s16);
|
||||
|
||||
v2tmp2 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(q0s16)),
|
||||
vreinterpret_s32_s16(vget_low_s16(q1s16)));
|
||||
v2tmp3 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(q0s16)),
|
||||
vreinterpret_s32_s16(vget_high_s16(q1s16)));
|
||||
v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]),
|
||||
vreinterpret_s16_s32(v2tmp3.val[0]));
|
||||
v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]),
|
||||
vreinterpret_s16_s32(v2tmp3.val[1]));
|
||||
|
||||
// 2nd for loop
|
||||
d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[1]);
|
||||
d6s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[0]);
|
||||
d5s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[1]);
|
||||
d7s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[0]);
|
||||
q2s16 = vcombine_s16(d4s16, d5s16);
|
||||
q3s16 = vcombine_s16(d6s16, d7s16);
|
||||
|
||||
qAdd3 = vdupq_n_s16(3);
|
||||
|
||||
q0s16 = vaddq_s16(q2s16, q3s16);
|
||||
q1s16 = vsubq_s16(q2s16, q3s16);
|
||||
|
||||
q0s16 = vaddq_s16(q0s16, qAdd3);
|
||||
q1s16 = vaddq_s16(q1s16, qAdd3);
|
||||
|
||||
q0s16 = vshrq_n_s16(q0s16, 3);
|
||||
q1s16 = vshrq_n_s16(q1s16, 3);
|
||||
|
||||
// store
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 0);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 0);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 0);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 0);
|
||||
mb_dqcoeff += 16;
|
||||
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 1);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 1);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 1);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 1);
|
||||
mb_dqcoeff += 16;
|
||||
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 2);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 2);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 2);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 2);
|
||||
mb_dqcoeff += 16;
|
||||
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 3);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 3);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 3);
|
||||
mb_dqcoeff += 16;
|
||||
vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 3);
|
||||
mb_dqcoeff += 16;
|
||||
return;
|
||||
}
|
111
thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c
vendored
Normal file
111
thirdparty/libvpx/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c
vendored
Normal file
|
@ -0,0 +1,111 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "./vpx_config.h"
|
||||
|
||||
static INLINE void vp8_loop_filter_simple_horizontal_edge_neon(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit) {
|
||||
uint8_t *sp;
|
||||
uint8x16_t qblimit, q0u8;
|
||||
uint8x16_t q5u8, q6u8, q7u8, q8u8, q9u8, q10u8, q14u8, q15u8;
|
||||
int16x8_t q2s16, q3s16, q13s16;
|
||||
int8x8_t d8s8, d9s8;
|
||||
int8x16_t q2s8, q3s8, q4s8, q10s8, q11s8, q14s8;
|
||||
|
||||
qblimit = vdupq_n_u8(*blimit);
|
||||
|
||||
sp = s - (p << 1);
|
||||
q5u8 = vld1q_u8(sp);
|
||||
sp += p;
|
||||
q6u8 = vld1q_u8(sp);
|
||||
sp += p;
|
||||
q7u8 = vld1q_u8(sp);
|
||||
sp += p;
|
||||
q8u8 = vld1q_u8(sp);
|
||||
|
||||
q15u8 = vabdq_u8(q6u8, q7u8);
|
||||
q14u8 = vabdq_u8(q5u8, q8u8);
|
||||
|
||||
q15u8 = vqaddq_u8(q15u8, q15u8);
|
||||
q14u8 = vshrq_n_u8(q14u8, 1);
|
||||
q0u8 = vdupq_n_u8(0x80);
|
||||
q13s16 = vdupq_n_s16(3);
|
||||
q15u8 = vqaddq_u8(q15u8, q14u8);
|
||||
|
||||
q5u8 = veorq_u8(q5u8, q0u8);
|
||||
q6u8 = veorq_u8(q6u8, q0u8);
|
||||
q7u8 = veorq_u8(q7u8, q0u8);
|
||||
q8u8 = veorq_u8(q8u8, q0u8);
|
||||
|
||||
q15u8 = vcgeq_u8(qblimit, q15u8);
|
||||
|
||||
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7u8)),
|
||||
vget_low_s8(vreinterpretq_s8_u8(q6u8)));
|
||||
q3s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7u8)),
|
||||
vget_high_s8(vreinterpretq_s8_u8(q6u8)));
|
||||
|
||||
q4s8 = vqsubq_s8(vreinterpretq_s8_u8(q5u8),
|
||||
vreinterpretq_s8_u8(q8u8));
|
||||
|
||||
q2s16 = vmulq_s16(q2s16, q13s16);
|
||||
q3s16 = vmulq_s16(q3s16, q13s16);
|
||||
|
||||
q10u8 = vdupq_n_u8(3);
|
||||
q9u8 = vdupq_n_u8(4);
|
||||
|
||||
q2s16 = vaddw_s8(q2s16, vget_low_s8(q4s8));
|
||||
q3s16 = vaddw_s8(q3s16, vget_high_s8(q4s8));
|
||||
|
||||
d8s8 = vqmovn_s16(q2s16);
|
||||
d9s8 = vqmovn_s16(q3s16);
|
||||
q4s8 = vcombine_s8(d8s8, d9s8);
|
||||
|
||||
q14s8 = vandq_s8(q4s8, vreinterpretq_s8_u8(q15u8));
|
||||
|
||||
q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q10u8));
|
||||
q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q9u8));
|
||||
q2s8 = vshrq_n_s8(q2s8, 3);
|
||||
q3s8 = vshrq_n_s8(q3s8, 3);
|
||||
|
||||
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6u8), q2s8);
|
||||
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7u8), q3s8);
|
||||
|
||||
q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
|
||||
q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
|
||||
|
||||
vst1q_u8(s, q7u8);
|
||||
s -= p;
|
||||
vst1q_u8(s, q6u8);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_neon(
|
||||
unsigned char *y_ptr,
|
||||
int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
y_ptr += y_stride * 4;
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
|
||||
y_ptr += y_stride * 4;
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
|
||||
y_ptr += y_stride * 4;
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbhs_neon(
|
||||
unsigned char *y_ptr,
|
||||
int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit);
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,283 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx_ports/arm.h"
|
||||
|
||||
#ifdef VPX_INCOMPATIBLE_GCC
|
||||
static INLINE void write_2x4(unsigned char *dst, int pitch,
|
||||
const uint8x8x2_t result) {
|
||||
/*
|
||||
* uint8x8x2_t result
|
||||
00 01 02 03 | 04 05 06 07
|
||||
10 11 12 13 | 14 15 16 17
|
||||
---
|
||||
* after vtrn_u8
|
||||
00 10 02 12 | 04 14 06 16
|
||||
01 11 03 13 | 05 15 07 17
|
||||
*/
|
||||
const uint8x8x2_t r01_u8 = vtrn_u8(result.val[0],
|
||||
result.val[1]);
|
||||
const uint16x4_t x_0_4 = vreinterpret_u16_u8(r01_u8.val[0]);
|
||||
const uint16x4_t x_1_5 = vreinterpret_u16_u8(r01_u8.val[1]);
|
||||
vst1_lane_u16((uint16_t *)dst, x_0_4, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_1_5, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_0_4, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_1_5, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_0_4, 2);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_1_5, 2);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_0_4, 3);
|
||||
dst += pitch;
|
||||
vst1_lane_u16((uint16_t *)dst, x_1_5, 3);
|
||||
}
|
||||
|
||||
static INLINE void write_2x8(unsigned char *dst, int pitch,
|
||||
const uint8x8x2_t result,
|
||||
const uint8x8x2_t result2) {
|
||||
write_2x4(dst, pitch, result);
|
||||
dst += pitch * 8;
|
||||
write_2x4(dst, pitch, result2);
|
||||
}
|
||||
#else
|
||||
static INLINE void write_2x8(unsigned char *dst, int pitch,
|
||||
const uint8x8x2_t result,
|
||||
const uint8x8x2_t result2) {
|
||||
vst2_lane_u8(dst, result, 0);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result, 1);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result, 2);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result, 3);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result, 4);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result, 5);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result, 6);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result, 7);
|
||||
dst += pitch;
|
||||
|
||||
vst2_lane_u8(dst, result2, 0);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result2, 1);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result2, 2);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result2, 3);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result2, 4);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result2, 5);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result2, 6);
|
||||
dst += pitch;
|
||||
vst2_lane_u8(dst, result2, 7);
|
||||
}
|
||||
#endif // VPX_INCOMPATIBLE_GCC
|
||||
|
||||
|
||||
#ifdef VPX_INCOMPATIBLE_GCC
|
||||
static INLINE
|
||||
uint8x8x4_t read_4x8(unsigned char *src, int pitch) {
|
||||
uint8x8x4_t x;
|
||||
const uint8x8_t a = vld1_u8(src);
|
||||
const uint8x8_t b = vld1_u8(src + pitch * 1);
|
||||
const uint8x8_t c = vld1_u8(src + pitch * 2);
|
||||
const uint8x8_t d = vld1_u8(src + pitch * 3);
|
||||
const uint8x8_t e = vld1_u8(src + pitch * 4);
|
||||
const uint8x8_t f = vld1_u8(src + pitch * 5);
|
||||
const uint8x8_t g = vld1_u8(src + pitch * 6);
|
||||
const uint8x8_t h = vld1_u8(src + pitch * 7);
|
||||
const uint32x2x2_t r04_u32 = vtrn_u32(vreinterpret_u32_u8(a),
|
||||
vreinterpret_u32_u8(e));
|
||||
const uint32x2x2_t r15_u32 = vtrn_u32(vreinterpret_u32_u8(b),
|
||||
vreinterpret_u32_u8(f));
|
||||
const uint32x2x2_t r26_u32 = vtrn_u32(vreinterpret_u32_u8(c),
|
||||
vreinterpret_u32_u8(g));
|
||||
const uint32x2x2_t r37_u32 = vtrn_u32(vreinterpret_u32_u8(d),
|
||||
vreinterpret_u32_u8(h));
|
||||
const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u32(r04_u32.val[0]),
|
||||
vreinterpret_u16_u32(r26_u32.val[0]));
|
||||
const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u32(r15_u32.val[0]),
|
||||
vreinterpret_u16_u32(r37_u32.val[0]));
|
||||
const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
|
||||
vreinterpret_u8_u16(r13_u16.val[0]));
|
||||
const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
|
||||
vreinterpret_u8_u16(r13_u16.val[1]));
|
||||
/*
|
||||
* after vtrn_u32
|
||||
00 01 02 03 | 40 41 42 43
|
||||
10 11 12 13 | 50 51 52 53
|
||||
20 21 22 23 | 60 61 62 63
|
||||
30 31 32 33 | 70 71 72 73
|
||||
---
|
||||
* after vtrn_u16
|
||||
00 01 20 21 | 40 41 60 61
|
||||
02 03 22 23 | 42 43 62 63
|
||||
10 11 30 31 | 50 51 70 71
|
||||
12 13 32 33 | 52 52 72 73
|
||||
|
||||
00 01 20 21 | 40 41 60 61
|
||||
10 11 30 31 | 50 51 70 71
|
||||
02 03 22 23 | 42 43 62 63
|
||||
12 13 32 33 | 52 52 72 73
|
||||
---
|
||||
* after vtrn_u8
|
||||
00 10 20 30 | 40 50 60 70
|
||||
01 11 21 31 | 41 51 61 71
|
||||
02 12 22 32 | 42 52 62 72
|
||||
03 13 23 33 | 43 53 63 73
|
||||
*/
|
||||
x.val[0] = r01_u8.val[0];
|
||||
x.val[1] = r01_u8.val[1];
|
||||
x.val[2] = r23_u8.val[0];
|
||||
x.val[3] = r23_u8.val[1];
|
||||
|
||||
return x;
|
||||
}
|
||||
#else
|
||||
static INLINE
|
||||
uint8x8x4_t read_4x8(unsigned char *src, int pitch) {
|
||||
uint8x8x4_t x;
|
||||
x.val[0] = x.val[1] = x.val[2] = x.val[3] = vdup_n_u8(0);
|
||||
x = vld4_lane_u8(src, x, 0);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 1);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 2);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 3);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 4);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 5);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 6);
|
||||
src += pitch;
|
||||
x = vld4_lane_u8(src, x, 7);
|
||||
return x;
|
||||
}
|
||||
#endif // VPX_INCOMPATIBLE_GCC
|
||||
|
||||
static INLINE void vp8_loop_filter_simple_vertical_edge_neon(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit) {
|
||||
unsigned char *src1;
|
||||
uint8x16_t qblimit, q0u8;
|
||||
uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q11u8, q12u8, q14u8, q15u8;
|
||||
int16x8_t q2s16, q13s16, q11s16;
|
||||
int8x8_t d28s8, d29s8;
|
||||
int8x16_t q2s8, q3s8, q10s8, q11s8, q14s8;
|
||||
uint8x8x4_t d0u8x4; // d6, d7, d8, d9
|
||||
uint8x8x4_t d1u8x4; // d10, d11, d12, d13
|
||||
uint8x8x2_t d2u8x2; // d12, d13
|
||||
uint8x8x2_t d3u8x2; // d14, d15
|
||||
|
||||
qblimit = vdupq_n_u8(*blimit);
|
||||
|
||||
src1 = s - 2;
|
||||
d0u8x4 = read_4x8(src1, p);
|
||||
src1 += p * 8;
|
||||
d1u8x4 = read_4x8(src1, p);
|
||||
|
||||
q3u8 = vcombine_u8(d0u8x4.val[0], d1u8x4.val[0]); // d6 d10
|
||||
q4u8 = vcombine_u8(d0u8x4.val[2], d1u8x4.val[2]); // d8 d12
|
||||
q5u8 = vcombine_u8(d0u8x4.val[1], d1u8x4.val[1]); // d7 d11
|
||||
q6u8 = vcombine_u8(d0u8x4.val[3], d1u8x4.val[3]); // d9 d13
|
||||
|
||||
q15u8 = vabdq_u8(q5u8, q4u8);
|
||||
q14u8 = vabdq_u8(q3u8, q6u8);
|
||||
|
||||
q15u8 = vqaddq_u8(q15u8, q15u8);
|
||||
q14u8 = vshrq_n_u8(q14u8, 1);
|
||||
q0u8 = vdupq_n_u8(0x80);
|
||||
q11s16 = vdupq_n_s16(3);
|
||||
q15u8 = vqaddq_u8(q15u8, q14u8);
|
||||
|
||||
q3u8 = veorq_u8(q3u8, q0u8);
|
||||
q4u8 = veorq_u8(q4u8, q0u8);
|
||||
q5u8 = veorq_u8(q5u8, q0u8);
|
||||
q6u8 = veorq_u8(q6u8, q0u8);
|
||||
|
||||
q15u8 = vcgeq_u8(qblimit, q15u8);
|
||||
|
||||
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q4u8)),
|
||||
vget_low_s8(vreinterpretq_s8_u8(q5u8)));
|
||||
q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q4u8)),
|
||||
vget_high_s8(vreinterpretq_s8_u8(q5u8)));
|
||||
|
||||
q14s8 = vqsubq_s8(vreinterpretq_s8_u8(q3u8),
|
||||
vreinterpretq_s8_u8(q6u8));
|
||||
|
||||
q2s16 = vmulq_s16(q2s16, q11s16);
|
||||
q13s16 = vmulq_s16(q13s16, q11s16);
|
||||
|
||||
q11u8 = vdupq_n_u8(3);
|
||||
q12u8 = vdupq_n_u8(4);
|
||||
|
||||
q2s16 = vaddw_s8(q2s16, vget_low_s8(q14s8));
|
||||
q13s16 = vaddw_s8(q13s16, vget_high_s8(q14s8));
|
||||
|
||||
d28s8 = vqmovn_s16(q2s16);
|
||||
d29s8 = vqmovn_s16(q13s16);
|
||||
q14s8 = vcombine_s8(d28s8, d29s8);
|
||||
|
||||
q14s8 = vandq_s8(q14s8, vreinterpretq_s8_u8(q15u8));
|
||||
|
||||
q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q11u8));
|
||||
q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q12u8));
|
||||
q2s8 = vshrq_n_s8(q2s8, 3);
|
||||
q14s8 = vshrq_n_s8(q3s8, 3);
|
||||
|
||||
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q5u8), q2s8);
|
||||
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q4u8), q14s8);
|
||||
|
||||
q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
|
||||
q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
|
||||
|
||||
d2u8x2.val[0] = vget_low_u8(q6u8); // d12
|
||||
d2u8x2.val[1] = vget_low_u8(q7u8); // d14
|
||||
d3u8x2.val[0] = vget_high_u8(q6u8); // d13
|
||||
d3u8x2.val[1] = vget_high_u8(q7u8); // d15
|
||||
|
||||
src1 = s - 1;
|
||||
write_2x8(src1, p, d2u8x2, d3u8x2);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_neon(
|
||||
unsigned char *y_ptr,
|
||||
int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
y_ptr += 4;
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
|
||||
y_ptr += 4;
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
|
||||
y_ptr += 4;
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbvs_neon(
|
||||
unsigned char *y_ptr,
|
||||
int y_stride,
|
||||
const unsigned char *blimit) {
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit);
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,625 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "./vpx_config.h"
|
||||
|
||||
static INLINE void vp8_mbloop_filter_neon(
|
||||
uint8x16_t qblimit, // mblimit
|
||||
uint8x16_t qlimit, // limit
|
||||
uint8x16_t qthresh, // thresh
|
||||
uint8x16_t q3, // p2
|
||||
uint8x16_t q4, // p2
|
||||
uint8x16_t q5, // p1
|
||||
uint8x16_t q6, // p0
|
||||
uint8x16_t q7, // q0
|
||||
uint8x16_t q8, // q1
|
||||
uint8x16_t q9, // q2
|
||||
uint8x16_t q10, // q3
|
||||
uint8x16_t *q4r, // p1
|
||||
uint8x16_t *q5r, // p1
|
||||
uint8x16_t *q6r, // p0
|
||||
uint8x16_t *q7r, // q0
|
||||
uint8x16_t *q8r, // q1
|
||||
uint8x16_t *q9r) { // q1
|
||||
uint8x16_t q0u8, q1u8, q11u8, q12u8, q13u8, q14u8, q15u8;
|
||||
int16x8_t q0s16, q2s16, q11s16, q12s16, q13s16, q14s16, q15s16;
|
||||
int8x16_t q1s8, q6s8, q7s8, q2s8, q11s8, q13s8;
|
||||
uint16x8_t q0u16, q11u16, q12u16, q13u16, q14u16, q15u16;
|
||||
int8x16_t q0s8, q12s8, q14s8, q15s8;
|
||||
int8x8_t d0, d1, d2, d3, d4, d5, d24, d25, d28, d29;
|
||||
|
||||
q11u8 = vabdq_u8(q3, q4);
|
||||
q12u8 = vabdq_u8(q4, q5);
|
||||
q13u8 = vabdq_u8(q5, q6);
|
||||
q14u8 = vabdq_u8(q8, q7);
|
||||
q1u8 = vabdq_u8(q9, q8);
|
||||
q0u8 = vabdq_u8(q10, q9);
|
||||
|
||||
q11u8 = vmaxq_u8(q11u8, q12u8);
|
||||
q12u8 = vmaxq_u8(q13u8, q14u8);
|
||||
q1u8 = vmaxq_u8(q1u8, q0u8);
|
||||
q15u8 = vmaxq_u8(q11u8, q12u8);
|
||||
|
||||
q12u8 = vabdq_u8(q6, q7);
|
||||
|
||||
// vp8_hevmask
|
||||
q13u8 = vcgtq_u8(q13u8, qthresh);
|
||||
q14u8 = vcgtq_u8(q14u8, qthresh);
|
||||
q15u8 = vmaxq_u8(q15u8, q1u8);
|
||||
|
||||
q15u8 = vcgeq_u8(qlimit, q15u8);
|
||||
|
||||
q1u8 = vabdq_u8(q5, q8);
|
||||
q12u8 = vqaddq_u8(q12u8, q12u8);
|
||||
|
||||
// vp8_filter() function
|
||||
// convert to signed
|
||||
q0u8 = vdupq_n_u8(0x80);
|
||||
q9 = veorq_u8(q9, q0u8);
|
||||
q8 = veorq_u8(q8, q0u8);
|
||||
q7 = veorq_u8(q7, q0u8);
|
||||
q6 = veorq_u8(q6, q0u8);
|
||||
q5 = veorq_u8(q5, q0u8);
|
||||
q4 = veorq_u8(q4, q0u8);
|
||||
|
||||
q1u8 = vshrq_n_u8(q1u8, 1);
|
||||
q12u8 = vqaddq_u8(q12u8, q1u8);
|
||||
|
||||
q14u8 = vorrq_u8(q13u8, q14u8);
|
||||
q12u8 = vcgeq_u8(qblimit, q12u8);
|
||||
|
||||
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)),
|
||||
vget_low_s8(vreinterpretq_s8_u8(q6)));
|
||||
q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)),
|
||||
vget_high_s8(vreinterpretq_s8_u8(q6)));
|
||||
|
||||
q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5),
|
||||
vreinterpretq_s8_u8(q8));
|
||||
|
||||
q11s16 = vdupq_n_s16(3);
|
||||
q2s16 = vmulq_s16(q2s16, q11s16);
|
||||
q13s16 = vmulq_s16(q13s16, q11s16);
|
||||
|
||||
q15u8 = vandq_u8(q15u8, q12u8);
|
||||
|
||||
q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8));
|
||||
q13s16 = vaddw_s8(q13s16, vget_high_s8(q1s8));
|
||||
|
||||
q12u8 = vdupq_n_u8(3);
|
||||
q11u8 = vdupq_n_u8(4);
|
||||
// vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
d2 = vqmovn_s16(q2s16);
|
||||
d3 = vqmovn_s16(q13s16);
|
||||
q1s8 = vcombine_s8(d2, d3);
|
||||
q1s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q15u8));
|
||||
q13s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
|
||||
|
||||
q2s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q11u8));
|
||||
q13s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q12u8));
|
||||
q2s8 = vshrq_n_s8(q2s8, 3);
|
||||
q13s8 = vshrq_n_s8(q13s8, 3);
|
||||
|
||||
q7s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q2s8);
|
||||
q6s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q13s8);
|
||||
|
||||
q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
|
||||
|
||||
q0u16 = q11u16 = q12u16 = q13u16 = q14u16 = q15u16 = vdupq_n_u16(63);
|
||||
d5 = vdup_n_s8(9);
|
||||
d4 = vdup_n_s8(18);
|
||||
|
||||
q0s16 = vmlal_s8(vreinterpretq_s16_u16(q0u16), vget_low_s8(q1s8), d5);
|
||||
q11s16 = vmlal_s8(vreinterpretq_s16_u16(q11u16), vget_high_s8(q1s8), d5);
|
||||
d5 = vdup_n_s8(27);
|
||||
q12s16 = vmlal_s8(vreinterpretq_s16_u16(q12u16), vget_low_s8(q1s8), d4);
|
||||
q13s16 = vmlal_s8(vreinterpretq_s16_u16(q13u16), vget_high_s8(q1s8), d4);
|
||||
q14s16 = vmlal_s8(vreinterpretq_s16_u16(q14u16), vget_low_s8(q1s8), d5);
|
||||
q15s16 = vmlal_s8(vreinterpretq_s16_u16(q15u16), vget_high_s8(q1s8), d5);
|
||||
|
||||
d0 = vqshrn_n_s16(q0s16 , 7);
|
||||
d1 = vqshrn_n_s16(q11s16, 7);
|
||||
d24 = vqshrn_n_s16(q12s16, 7);
|
||||
d25 = vqshrn_n_s16(q13s16, 7);
|
||||
d28 = vqshrn_n_s16(q14s16, 7);
|
||||
d29 = vqshrn_n_s16(q15s16, 7);
|
||||
|
||||
q0s8 = vcombine_s8(d0, d1);
|
||||
q12s8 = vcombine_s8(d24, d25);
|
||||
q14s8 = vcombine_s8(d28, d29);
|
||||
|
||||
q11s8 = vqsubq_s8(vreinterpretq_s8_u8(q9), q0s8);
|
||||
q0s8 = vqaddq_s8(vreinterpretq_s8_u8(q4), q0s8);
|
||||
q13s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q12s8);
|
||||
q12s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q12s8);
|
||||
q15s8 = vqsubq_s8((q7s8), q14s8);
|
||||
q14s8 = vqaddq_s8((q6s8), q14s8);
|
||||
|
||||
q1u8 = vdupq_n_u8(0x80);
|
||||
*q9r = veorq_u8(vreinterpretq_u8_s8(q11s8), q1u8);
|
||||
*q8r = veorq_u8(vreinterpretq_u8_s8(q13s8), q1u8);
|
||||
*q7r = veorq_u8(vreinterpretq_u8_s8(q15s8), q1u8);
|
||||
*q6r = veorq_u8(vreinterpretq_u8_s8(q14s8), q1u8);
|
||||
*q5r = veorq_u8(vreinterpretq_u8_s8(q12s8), q1u8);
|
||||
*q4r = veorq_u8(vreinterpretq_u8_s8(q0s8), q1u8);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_mbloop_filter_horizontal_edge_y_neon(
|
||||
unsigned char *src,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh) {
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
src -= (pitch << 2);
|
||||
|
||||
q3 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q4 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q5 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q6 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q7 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q8 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q9 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q10 = vld1q_u8(src);
|
||||
|
||||
vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q4, &q5, &q6, &q7, &q8, &q9);
|
||||
|
||||
src -= (pitch * 6);
|
||||
vst1q_u8(src, q4);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q5);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q6);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q7);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q8);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q9);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_mbloop_filter_horizontal_edge_uv_neon(
|
||||
unsigned char *u,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh,
|
||||
unsigned char *v) {
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
u -= (pitch << 2);
|
||||
v -= (pitch << 2);
|
||||
|
||||
d6 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d7 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d8 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d9 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d10 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d11 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d12 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d13 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d14 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d15 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d16 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d17 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d18 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d19 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d20 = vld1_u8(u);
|
||||
d21 = vld1_u8(v);
|
||||
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
|
||||
vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q4, &q5, &q6, &q7, &q8, &q9);
|
||||
|
||||
u -= (pitch * 6);
|
||||
v -= (pitch * 6);
|
||||
vst1_u8(u, vget_low_u8(q4));
|
||||
u += pitch;
|
||||
vst1_u8(v, vget_high_u8(q4));
|
||||
v += pitch;
|
||||
vst1_u8(u, vget_low_u8(q5));
|
||||
u += pitch;
|
||||
vst1_u8(v, vget_high_u8(q5));
|
||||
v += pitch;
|
||||
vst1_u8(u, vget_low_u8(q6));
|
||||
u += pitch;
|
||||
vst1_u8(v, vget_high_u8(q6));
|
||||
v += pitch;
|
||||
vst1_u8(u, vget_low_u8(q7));
|
||||
u += pitch;
|
||||
vst1_u8(v, vget_high_u8(q7));
|
||||
v += pitch;
|
||||
vst1_u8(u, vget_low_u8(q8));
|
||||
u += pitch;
|
||||
vst1_u8(v, vget_high_u8(q8));
|
||||
v += pitch;
|
||||
vst1_u8(u, vget_low_u8(q9));
|
||||
vst1_u8(v, vget_high_u8(q9));
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_mbloop_filter_vertical_edge_y_neon(
|
||||
unsigned char *src,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh) {
|
||||
unsigned char *s1, *s2;
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
|
||||
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
|
||||
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
s1 = src - 4;
|
||||
s2 = s1 + 8 * pitch;
|
||||
d6 = vld1_u8(s1);
|
||||
s1 += pitch;
|
||||
d7 = vld1_u8(s2);
|
||||
s2 += pitch;
|
||||
d8 = vld1_u8(s1);
|
||||
s1 += pitch;
|
||||
d9 = vld1_u8(s2);
|
||||
s2 += pitch;
|
||||
d10 = vld1_u8(s1);
|
||||
s1 += pitch;
|
||||
d11 = vld1_u8(s2);
|
||||
s2 += pitch;
|
||||
d12 = vld1_u8(s1);
|
||||
s1 += pitch;
|
||||
d13 = vld1_u8(s2);
|
||||
s2 += pitch;
|
||||
d14 = vld1_u8(s1);
|
||||
s1 += pitch;
|
||||
d15 = vld1_u8(s2);
|
||||
s2 += pitch;
|
||||
d16 = vld1_u8(s1);
|
||||
s1 += pitch;
|
||||
d17 = vld1_u8(s2);
|
||||
s2 += pitch;
|
||||
d18 = vld1_u8(s1);
|
||||
s1 += pitch;
|
||||
d19 = vld1_u8(s2);
|
||||
s2 += pitch;
|
||||
d20 = vld1_u8(s1);
|
||||
d21 = vld1_u8(s2);
|
||||
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
|
||||
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
|
||||
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
|
||||
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
|
||||
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
|
||||
|
||||
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[0]));
|
||||
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[0]));
|
||||
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[1]));
|
||||
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[1]));
|
||||
|
||||
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[0]));
|
||||
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[1]));
|
||||
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[0]));
|
||||
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[1]));
|
||||
|
||||
q3 = q2tmp8.val[0];
|
||||
q4 = q2tmp8.val[1];
|
||||
q5 = q2tmp9.val[0];
|
||||
q6 = q2tmp9.val[1];
|
||||
q7 = q2tmp10.val[0];
|
||||
q8 = q2tmp10.val[1];
|
||||
q9 = q2tmp11.val[0];
|
||||
q10 = q2tmp11.val[1];
|
||||
|
||||
vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q4, &q5, &q6, &q7, &q8, &q9);
|
||||
|
||||
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
|
||||
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
|
||||
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
|
||||
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
|
||||
|
||||
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[0]));
|
||||
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[0]));
|
||||
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[1]));
|
||||
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[1]));
|
||||
|
||||
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[0]));
|
||||
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[1]));
|
||||
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[0]));
|
||||
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[1]));
|
||||
|
||||
q3 = q2tmp8.val[0];
|
||||
q4 = q2tmp8.val[1];
|
||||
q5 = q2tmp9.val[0];
|
||||
q6 = q2tmp9.val[1];
|
||||
q7 = q2tmp10.val[0];
|
||||
q8 = q2tmp10.val[1];
|
||||
q9 = q2tmp11.val[0];
|
||||
q10 = q2tmp11.val[1];
|
||||
|
||||
s1 -= 7 * pitch;
|
||||
s2 -= 7 * pitch;
|
||||
|
||||
vst1_u8(s1, vget_low_u8(q3));
|
||||
s1 += pitch;
|
||||
vst1_u8(s2, vget_high_u8(q3));
|
||||
s2 += pitch;
|
||||
vst1_u8(s1, vget_low_u8(q4));
|
||||
s1 += pitch;
|
||||
vst1_u8(s2, vget_high_u8(q4));
|
||||
s2 += pitch;
|
||||
vst1_u8(s1, vget_low_u8(q5));
|
||||
s1 += pitch;
|
||||
vst1_u8(s2, vget_high_u8(q5));
|
||||
s2 += pitch;
|
||||
vst1_u8(s1, vget_low_u8(q6));
|
||||
s1 += pitch;
|
||||
vst1_u8(s2, vget_high_u8(q6));
|
||||
s2 += pitch;
|
||||
vst1_u8(s1, vget_low_u8(q7));
|
||||
s1 += pitch;
|
||||
vst1_u8(s2, vget_high_u8(q7));
|
||||
s2 += pitch;
|
||||
vst1_u8(s1, vget_low_u8(q8));
|
||||
s1 += pitch;
|
||||
vst1_u8(s2, vget_high_u8(q8));
|
||||
s2 += pitch;
|
||||
vst1_u8(s1, vget_low_u8(q9));
|
||||
s1 += pitch;
|
||||
vst1_u8(s2, vget_high_u8(q9));
|
||||
s2 += pitch;
|
||||
vst1_u8(s1, vget_low_u8(q10));
|
||||
vst1_u8(s2, vget_high_u8(q10));
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_mbloop_filter_vertical_edge_uv_neon(
|
||||
unsigned char *u,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh,
|
||||
unsigned char *v) {
|
||||
unsigned char *us, *ud;
|
||||
unsigned char *vs, *vd;
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
|
||||
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
|
||||
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
us = u - 4;
|
||||
vs = v - 4;
|
||||
d6 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d7 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d8 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d9 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d10 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d11 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d12 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d13 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d14 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d15 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d16 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d17 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d18 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d19 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d20 = vld1_u8(us);
|
||||
d21 = vld1_u8(vs);
|
||||
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
|
||||
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
|
||||
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
|
||||
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
|
||||
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
|
||||
|
||||
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[0]));
|
||||
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[0]));
|
||||
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[1]));
|
||||
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[1]));
|
||||
|
||||
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[0]));
|
||||
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[1]));
|
||||
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[0]));
|
||||
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[1]));
|
||||
|
||||
q3 = q2tmp8.val[0];
|
||||
q4 = q2tmp8.val[1];
|
||||
q5 = q2tmp9.val[0];
|
||||
q6 = q2tmp9.val[1];
|
||||
q7 = q2tmp10.val[0];
|
||||
q8 = q2tmp10.val[1];
|
||||
q9 = q2tmp11.val[0];
|
||||
q10 = q2tmp11.val[1];
|
||||
|
||||
vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q4, &q5, &q6, &q7, &q8, &q9);
|
||||
|
||||
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
|
||||
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
|
||||
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
|
||||
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
|
||||
|
||||
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[0]));
|
||||
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[0]));
|
||||
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[1]));
|
||||
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[1]));
|
||||
|
||||
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[0]));
|
||||
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[1]));
|
||||
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[0]));
|
||||
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[1]));
|
||||
|
||||
q3 = q2tmp8.val[0];
|
||||
q4 = q2tmp8.val[1];
|
||||
q5 = q2tmp9.val[0];
|
||||
q6 = q2tmp9.val[1];
|
||||
q7 = q2tmp10.val[0];
|
||||
q8 = q2tmp10.val[1];
|
||||
q9 = q2tmp11.val[0];
|
||||
q10 = q2tmp11.val[1];
|
||||
|
||||
ud = u - 4;
|
||||
vst1_u8(ud, vget_low_u8(q3));
|
||||
ud += pitch;
|
||||
vst1_u8(ud, vget_low_u8(q4));
|
||||
ud += pitch;
|
||||
vst1_u8(ud, vget_low_u8(q5));
|
||||
ud += pitch;
|
||||
vst1_u8(ud, vget_low_u8(q6));
|
||||
ud += pitch;
|
||||
vst1_u8(ud, vget_low_u8(q7));
|
||||
ud += pitch;
|
||||
vst1_u8(ud, vget_low_u8(q8));
|
||||
ud += pitch;
|
||||
vst1_u8(ud, vget_low_u8(q9));
|
||||
ud += pitch;
|
||||
vst1_u8(ud, vget_low_u8(q10));
|
||||
|
||||
vd = v - 4;
|
||||
vst1_u8(vd, vget_high_u8(q3));
|
||||
vd += pitch;
|
||||
vst1_u8(vd, vget_high_u8(q4));
|
||||
vd += pitch;
|
||||
vst1_u8(vd, vget_high_u8(q5));
|
||||
vd += pitch;
|
||||
vst1_u8(vd, vget_high_u8(q6));
|
||||
vd += pitch;
|
||||
vst1_u8(vd, vget_high_u8(q7));
|
||||
vd += pitch;
|
||||
vst1_u8(vd, vget_high_u8(q8));
|
||||
vd += pitch;
|
||||
vst1_u8(vd, vget_high_u8(q9));
|
||||
vd += pitch;
|
||||
vst1_u8(vd, vget_high_u8(q10));
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,123 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
static const int16_t cospi8sqrt2minus1 = 20091;
|
||||
static const int16_t sinpi8sqrt2 = 35468;
|
||||
|
||||
void vp8_short_idct4x4llm_neon(
|
||||
int16_t *input,
|
||||
unsigned char *pred_ptr,
|
||||
int pred_stride,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_stride) {
|
||||
int i;
|
||||
uint32x2_t d6u32 = vdup_n_u32(0);
|
||||
uint8x8_t d1u8;
|
||||
int16x4_t d2, d3, d4, d5, d10, d11, d12, d13;
|
||||
uint16x8_t q1u16;
|
||||
int16x8_t q1s16, q2s16, q3s16, q4s16;
|
||||
int32x2x2_t v2tmp0, v2tmp1;
|
||||
int16x4x2_t v2tmp2, v2tmp3;
|
||||
|
||||
d2 = vld1_s16(input);
|
||||
d3 = vld1_s16(input + 4);
|
||||
d4 = vld1_s16(input + 8);
|
||||
d5 = vld1_s16(input + 12);
|
||||
|
||||
// 1st for loop
|
||||
q1s16 = vcombine_s16(d2, d4); // Swap d3 d4 here
|
||||
q2s16 = vcombine_s16(d3, d5);
|
||||
|
||||
q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2);
|
||||
q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1);
|
||||
|
||||
d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1
|
||||
d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1
|
||||
|
||||
q3s16 = vshrq_n_s16(q3s16, 1);
|
||||
q4s16 = vshrq_n_s16(q4s16, 1);
|
||||
|
||||
q3s16 = vqaddq_s16(q3s16, q2s16);
|
||||
q4s16 = vqaddq_s16(q4s16, q2s16);
|
||||
|
||||
d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1
|
||||
d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1
|
||||
|
||||
d2 = vqadd_s16(d12, d11);
|
||||
d3 = vqadd_s16(d13, d10);
|
||||
d4 = vqsub_s16(d13, d10);
|
||||
d5 = vqsub_s16(d12, d11);
|
||||
|
||||
v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
|
||||
v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
|
||||
v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]),
|
||||
vreinterpret_s16_s32(v2tmp1.val[0]));
|
||||
v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]),
|
||||
vreinterpret_s16_s32(v2tmp1.val[1]));
|
||||
|
||||
// 2nd for loop
|
||||
q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp3.val[0]);
|
||||
q2s16 = vcombine_s16(v2tmp2.val[1], v2tmp3.val[1]);
|
||||
|
||||
q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2);
|
||||
q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1);
|
||||
|
||||
d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1
|
||||
d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1
|
||||
|
||||
q3s16 = vshrq_n_s16(q3s16, 1);
|
||||
q4s16 = vshrq_n_s16(q4s16, 1);
|
||||
|
||||
q3s16 = vqaddq_s16(q3s16, q2s16);
|
||||
q4s16 = vqaddq_s16(q4s16, q2s16);
|
||||
|
||||
d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1
|
||||
d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1
|
||||
|
||||
d2 = vqadd_s16(d12, d11);
|
||||
d3 = vqadd_s16(d13, d10);
|
||||
d4 = vqsub_s16(d13, d10);
|
||||
d5 = vqsub_s16(d12, d11);
|
||||
|
||||
d2 = vrshr_n_s16(d2, 3);
|
||||
d3 = vrshr_n_s16(d3, 3);
|
||||
d4 = vrshr_n_s16(d4, 3);
|
||||
d5 = vrshr_n_s16(d5, 3);
|
||||
|
||||
v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4));
|
||||
v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5));
|
||||
v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]),
|
||||
vreinterpret_s16_s32(v2tmp1.val[0]));
|
||||
v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]),
|
||||
vreinterpret_s16_s32(v2tmp1.val[1]));
|
||||
|
||||
q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp2.val[1]);
|
||||
q2s16 = vcombine_s16(v2tmp3.val[0], v2tmp3.val[1]);
|
||||
|
||||
// dc_only_idct_add
|
||||
for (i = 0; i < 2; i++, q1s16 = q2s16) {
|
||||
d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 0);
|
||||
pred_ptr += pred_stride;
|
||||
d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 1);
|
||||
pred_ptr += pred_stride;
|
||||
|
||||
q1u16 = vaddw_u8(vreinterpretq_u16_s16(q1s16),
|
||||
vreinterpret_u8_u32(d6u32));
|
||||
d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16));
|
||||
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 0);
|
||||
dst_ptr += dst_stride;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 1);
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
return;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,550 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx_ports/arm.h"
|
||||
|
||||
static INLINE void vp8_loop_filter_neon(
|
||||
uint8x16_t qblimit, // flimit
|
||||
uint8x16_t qlimit, // limit
|
||||
uint8x16_t qthresh, // thresh
|
||||
uint8x16_t q3, // p3
|
||||
uint8x16_t q4, // p2
|
||||
uint8x16_t q5, // p1
|
||||
uint8x16_t q6, // p0
|
||||
uint8x16_t q7, // q0
|
||||
uint8x16_t q8, // q1
|
||||
uint8x16_t q9, // q2
|
||||
uint8x16_t q10, // q3
|
||||
uint8x16_t *q5r, // p1
|
||||
uint8x16_t *q6r, // p0
|
||||
uint8x16_t *q7r, // q0
|
||||
uint8x16_t *q8r) { // q1
|
||||
uint8x16_t q0u8, q1u8, q2u8, q11u8, q12u8, q13u8, q14u8, q15u8;
|
||||
int16x8_t q2s16, q11s16;
|
||||
uint16x8_t q4u16;
|
||||
int8x16_t q1s8, q2s8, q10s8, q11s8, q12s8, q13s8;
|
||||
int8x8_t d2s8, d3s8;
|
||||
|
||||
q11u8 = vabdq_u8(q3, q4);
|
||||
q12u8 = vabdq_u8(q4, q5);
|
||||
q13u8 = vabdq_u8(q5, q6);
|
||||
q14u8 = vabdq_u8(q8, q7);
|
||||
q3 = vabdq_u8(q9, q8);
|
||||
q4 = vabdq_u8(q10, q9);
|
||||
|
||||
q11u8 = vmaxq_u8(q11u8, q12u8);
|
||||
q12u8 = vmaxq_u8(q13u8, q14u8);
|
||||
q3 = vmaxq_u8(q3, q4);
|
||||
q15u8 = vmaxq_u8(q11u8, q12u8);
|
||||
|
||||
q9 = vabdq_u8(q6, q7);
|
||||
|
||||
// vp8_hevmask
|
||||
q13u8 = vcgtq_u8(q13u8, qthresh);
|
||||
q14u8 = vcgtq_u8(q14u8, qthresh);
|
||||
q15u8 = vmaxq_u8(q15u8, q3);
|
||||
|
||||
q2u8 = vabdq_u8(q5, q8);
|
||||
q9 = vqaddq_u8(q9, q9);
|
||||
|
||||
q15u8 = vcgeq_u8(qlimit, q15u8);
|
||||
|
||||
// vp8_filter() function
|
||||
// convert to signed
|
||||
q10 = vdupq_n_u8(0x80);
|
||||
q8 = veorq_u8(q8, q10);
|
||||
q7 = veorq_u8(q7, q10);
|
||||
q6 = veorq_u8(q6, q10);
|
||||
q5 = veorq_u8(q5, q10);
|
||||
|
||||
q2u8 = vshrq_n_u8(q2u8, 1);
|
||||
q9 = vqaddq_u8(q9, q2u8);
|
||||
|
||||
q10 = vdupq_n_u8(3);
|
||||
|
||||
q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)),
|
||||
vget_low_s8(vreinterpretq_s8_u8(q6)));
|
||||
q11s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)),
|
||||
vget_high_s8(vreinterpretq_s8_u8(q6)));
|
||||
|
||||
q9 = vcgeq_u8(qblimit, q9);
|
||||
|
||||
q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5),
|
||||
vreinterpretq_s8_u8(q8));
|
||||
|
||||
q14u8 = vorrq_u8(q13u8, q14u8);
|
||||
|
||||
q4u16 = vmovl_u8(vget_low_u8(q10));
|
||||
q2s16 = vmulq_s16(q2s16, vreinterpretq_s16_u16(q4u16));
|
||||
q11s16 = vmulq_s16(q11s16, vreinterpretq_s16_u16(q4u16));
|
||||
|
||||
q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q14u8);
|
||||
q15u8 = vandq_u8(q15u8, q9);
|
||||
|
||||
q1s8 = vreinterpretq_s8_u8(q1u8);
|
||||
q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8));
|
||||
q11s16 = vaddw_s8(q11s16, vget_high_s8(q1s8));
|
||||
|
||||
q9 = vdupq_n_u8(4);
|
||||
// vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
d2s8 = vqmovn_s16(q2s16);
|
||||
d3s8 = vqmovn_s16(q11s16);
|
||||
q1s8 = vcombine_s8(d2s8, d3s8);
|
||||
q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q15u8);
|
||||
q1s8 = vreinterpretq_s8_u8(q1u8);
|
||||
|
||||
q2s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q10));
|
||||
q1s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q9));
|
||||
q2s8 = vshrq_n_s8(q2s8, 3);
|
||||
q1s8 = vshrq_n_s8(q1s8, 3);
|
||||
|
||||
q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q2s8);
|
||||
q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q1s8);
|
||||
|
||||
q1s8 = vrshrq_n_s8(q1s8, 1);
|
||||
q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8));
|
||||
|
||||
q13s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q1s8);
|
||||
q12s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q1s8);
|
||||
|
||||
q0u8 = vdupq_n_u8(0x80);
|
||||
*q8r = veorq_u8(vreinterpretq_u8_s8(q12s8), q0u8);
|
||||
*q7r = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8);
|
||||
*q6r = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8);
|
||||
*q5r = veorq_u8(vreinterpretq_u8_s8(q13s8), q0u8);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_horizontal_edge_y_neon(
|
||||
unsigned char *src,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh) {
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
src -= (pitch << 2);
|
||||
|
||||
q3 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q4 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q5 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q6 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q7 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q8 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q9 = vld1q_u8(src);
|
||||
src += pitch;
|
||||
q10 = vld1q_u8(src);
|
||||
|
||||
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q5, &q6, &q7, &q8);
|
||||
|
||||
src -= (pitch * 5);
|
||||
vst1q_u8(src, q5);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q6);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q7);
|
||||
src += pitch;
|
||||
vst1q_u8(src, q8);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_horizontal_edge_uv_neon(
|
||||
unsigned char *u,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh,
|
||||
unsigned char *v) {
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
u -= (pitch << 2);
|
||||
v -= (pitch << 2);
|
||||
|
||||
d6 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d7 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d8 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d9 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d10 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d11 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d12 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d13 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d14 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d15 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d16 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d17 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d18 = vld1_u8(u);
|
||||
u += pitch;
|
||||
d19 = vld1_u8(v);
|
||||
v += pitch;
|
||||
d20 = vld1_u8(u);
|
||||
d21 = vld1_u8(v);
|
||||
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
|
||||
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q5, &q6, &q7, &q8);
|
||||
|
||||
u -= (pitch * 5);
|
||||
vst1_u8(u, vget_low_u8(q5));
|
||||
u += pitch;
|
||||
vst1_u8(u, vget_low_u8(q6));
|
||||
u += pitch;
|
||||
vst1_u8(u, vget_low_u8(q7));
|
||||
u += pitch;
|
||||
vst1_u8(u, vget_low_u8(q8));
|
||||
|
||||
v -= (pitch * 5);
|
||||
vst1_u8(v, vget_high_u8(q5));
|
||||
v += pitch;
|
||||
vst1_u8(v, vget_high_u8(q6));
|
||||
v += pitch;
|
||||
vst1_u8(v, vget_high_u8(q7));
|
||||
v += pitch;
|
||||
vst1_u8(v, vget_high_u8(q8));
|
||||
return;
|
||||
}
|
||||
|
||||
static INLINE void write_4x8(unsigned char *dst, int pitch,
|
||||
const uint8x8x4_t result) {
|
||||
#ifdef VPX_INCOMPATIBLE_GCC
|
||||
/*
|
||||
* uint8x8x4_t result
|
||||
00 01 02 03 | 04 05 06 07
|
||||
10 11 12 13 | 14 15 16 17
|
||||
20 21 22 23 | 24 25 26 27
|
||||
30 31 32 33 | 34 35 36 37
|
||||
---
|
||||
* after vtrn_u16
|
||||
00 01 20 21 | 04 05 24 25
|
||||
02 03 22 23 | 06 07 26 27
|
||||
10 11 30 31 | 14 15 34 35
|
||||
12 13 32 33 | 16 17 36 37
|
||||
---
|
||||
* after vtrn_u8
|
||||
00 10 20 30 | 04 14 24 34
|
||||
01 11 21 31 | 05 15 25 35
|
||||
02 12 22 32 | 06 16 26 36
|
||||
03 13 23 33 | 07 17 27 37
|
||||
*/
|
||||
const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]),
|
||||
vreinterpret_u16_u8(result.val[2]));
|
||||
const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]),
|
||||
vreinterpret_u16_u8(result.val[3]));
|
||||
const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
|
||||
vreinterpret_u8_u16(r13_u16.val[0]));
|
||||
const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
|
||||
vreinterpret_u8_u16(r13_u16.val[1]));
|
||||
const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]);
|
||||
const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]);
|
||||
const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]);
|
||||
const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]);
|
||||
vst1_lane_u32((uint32_t *)dst, x_0_4, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_1_5, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_2_6, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_3_7, 0);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_0_4, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_1_5, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_2_6, 1);
|
||||
dst += pitch;
|
||||
vst1_lane_u32((uint32_t *)dst, x_3_7, 1);
|
||||
#else
|
||||
vst4_lane_u8(dst, result, 0);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 1);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 2);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 3);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 4);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 5);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 6);
|
||||
dst += pitch;
|
||||
vst4_lane_u8(dst, result, 7);
|
||||
#endif // VPX_INCOMPATIBLE_GCC
|
||||
}
|
||||
|
||||
void vp8_loop_filter_vertical_edge_y_neon(
|
||||
unsigned char *src,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh) {
|
||||
unsigned char *s, *d;
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
|
||||
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
|
||||
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
|
||||
uint8x8x4_t q4ResultH, q4ResultL;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
s = src - 4;
|
||||
d6 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d8 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d10 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d12 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d14 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d16 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d18 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d20 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d7 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d9 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d11 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d13 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d15 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d17 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d19 = vld1_u8(s);
|
||||
s += pitch;
|
||||
d21 = vld1_u8(s);
|
||||
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
|
||||
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
|
||||
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
|
||||
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
|
||||
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
|
||||
|
||||
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[0]));
|
||||
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[0]));
|
||||
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[1]));
|
||||
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[1]));
|
||||
|
||||
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[0]));
|
||||
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[1]));
|
||||
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[0]));
|
||||
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[1]));
|
||||
|
||||
q3 = q2tmp8.val[0];
|
||||
q4 = q2tmp8.val[1];
|
||||
q5 = q2tmp9.val[0];
|
||||
q6 = q2tmp9.val[1];
|
||||
q7 = q2tmp10.val[0];
|
||||
q8 = q2tmp10.val[1];
|
||||
q9 = q2tmp11.val[0];
|
||||
q10 = q2tmp11.val[1];
|
||||
|
||||
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q5, &q6, &q7, &q8);
|
||||
|
||||
q4ResultL.val[0] = vget_low_u8(q5); // d10
|
||||
q4ResultL.val[1] = vget_low_u8(q6); // d12
|
||||
q4ResultL.val[2] = vget_low_u8(q7); // d14
|
||||
q4ResultL.val[3] = vget_low_u8(q8); // d16
|
||||
q4ResultH.val[0] = vget_high_u8(q5); // d11
|
||||
q4ResultH.val[1] = vget_high_u8(q6); // d13
|
||||
q4ResultH.val[2] = vget_high_u8(q7); // d15
|
||||
q4ResultH.val[3] = vget_high_u8(q8); // d17
|
||||
|
||||
d = src - 2;
|
||||
write_4x8(d, pitch, q4ResultL);
|
||||
d += pitch * 8;
|
||||
write_4x8(d, pitch, q4ResultH);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_vertical_edge_uv_neon(
|
||||
unsigned char *u,
|
||||
int pitch,
|
||||
unsigned char blimit,
|
||||
unsigned char limit,
|
||||
unsigned char thresh,
|
||||
unsigned char *v) {
|
||||
unsigned char *us, *ud;
|
||||
unsigned char *vs, *vd;
|
||||
uint8x16_t qblimit, qlimit, qthresh, q3, q4;
|
||||
uint8x16_t q5, q6, q7, q8, q9, q10;
|
||||
uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14;
|
||||
uint8x8_t d15, d16, d17, d18, d19, d20, d21;
|
||||
uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3;
|
||||
uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7;
|
||||
uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11;
|
||||
uint8x8x4_t q4ResultH, q4ResultL;
|
||||
|
||||
qblimit = vdupq_n_u8(blimit);
|
||||
qlimit = vdupq_n_u8(limit);
|
||||
qthresh = vdupq_n_u8(thresh);
|
||||
|
||||
us = u - 4;
|
||||
d6 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d8 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d10 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d12 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d14 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d16 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d18 = vld1_u8(us);
|
||||
us += pitch;
|
||||
d20 = vld1_u8(us);
|
||||
|
||||
vs = v - 4;
|
||||
d7 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d9 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d11 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d13 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d15 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d17 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d19 = vld1_u8(vs);
|
||||
vs += pitch;
|
||||
d21 = vld1_u8(vs);
|
||||
|
||||
q3 = vcombine_u8(d6, d7);
|
||||
q4 = vcombine_u8(d8, d9);
|
||||
q5 = vcombine_u8(d10, d11);
|
||||
q6 = vcombine_u8(d12, d13);
|
||||
q7 = vcombine_u8(d14, d15);
|
||||
q8 = vcombine_u8(d16, d17);
|
||||
q9 = vcombine_u8(d18, d19);
|
||||
q10 = vcombine_u8(d20, d21);
|
||||
|
||||
q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7));
|
||||
q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8));
|
||||
q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9));
|
||||
q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10));
|
||||
|
||||
q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[0]));
|
||||
q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[0]));
|
||||
q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp2.val[1]));
|
||||
q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]),
|
||||
vreinterpretq_u16_u32(q2tmp3.val[1]));
|
||||
|
||||
q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[0]));
|
||||
q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp5.val[1]));
|
||||
q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[0]));
|
||||
q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]),
|
||||
vreinterpretq_u8_u16(q2tmp7.val[1]));
|
||||
|
||||
q3 = q2tmp8.val[0];
|
||||
q4 = q2tmp8.val[1];
|
||||
q5 = q2tmp9.val[0];
|
||||
q6 = q2tmp9.val[1];
|
||||
q7 = q2tmp10.val[0];
|
||||
q8 = q2tmp10.val[1];
|
||||
q9 = q2tmp11.val[0];
|
||||
q10 = q2tmp11.val[1];
|
||||
|
||||
vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4,
|
||||
q5, q6, q7, q8, q9, q10,
|
||||
&q5, &q6, &q7, &q8);
|
||||
|
||||
q4ResultL.val[0] = vget_low_u8(q5); // d10
|
||||
q4ResultL.val[1] = vget_low_u8(q6); // d12
|
||||
q4ResultL.val[2] = vget_low_u8(q7); // d14
|
||||
q4ResultL.val[3] = vget_low_u8(q8); // d16
|
||||
ud = u - 2;
|
||||
write_4x8(ud, pitch, q4ResultL);
|
||||
|
||||
q4ResultH.val[0] = vget_high_u8(q5); // d11
|
||||
q4ResultH.val[1] = vget_high_u8(q6); // d13
|
||||
q4ResultH.val[2] = vget_high_u8(q7); // d15
|
||||
q4ResultH.val[3] = vget_high_u8(q8); // d17
|
||||
vd = v - 2;
|
||||
write_4x8(vd, pitch, q4ResultH);
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
const unsigned char vp8_block2left[25] =
|
||||
{
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
|
||||
};
|
||||
const unsigned char vp8_block2above[25] =
|
||||
{
|
||||
0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
|
||||
};
|
|
@ -0,0 +1,312 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_BLOCKD_H_
|
||||
#define VP8_COMMON_BLOCKD_H_
|
||||
|
||||
void vpx_log(const char *format, ...);
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vpx_scale/yv12config.h"
|
||||
#include "mv.h"
|
||||
#include "treecoder.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*#define DCPRED 1*/
|
||||
#define DCPREDSIMTHRESH 0
|
||||
#define DCPREDCNTTHRESH 3
|
||||
|
||||
#define MB_FEATURE_TREE_PROBS 3
|
||||
#define MAX_MB_SEGMENTS 4
|
||||
|
||||
#define MAX_REF_LF_DELTAS 4
|
||||
#define MAX_MODE_LF_DELTAS 4
|
||||
|
||||
/* Segment Feature Masks */
|
||||
#define SEGMENT_DELTADATA 0
|
||||
#define SEGMENT_ABSDATA 1
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int r, c;
|
||||
} POS;
|
||||
|
||||
#define PLANE_TYPE_Y_NO_DC 0
|
||||
#define PLANE_TYPE_Y2 1
|
||||
#define PLANE_TYPE_UV 2
|
||||
#define PLANE_TYPE_Y_WITH_DC 3
|
||||
|
||||
|
||||
typedef char ENTROPY_CONTEXT;
|
||||
typedef struct
|
||||
{
|
||||
ENTROPY_CONTEXT y1[4];
|
||||
ENTROPY_CONTEXT u[2];
|
||||
ENTROPY_CONTEXT v[2];
|
||||
ENTROPY_CONTEXT y2;
|
||||
} ENTROPY_CONTEXT_PLANES;
|
||||
|
||||
extern const unsigned char vp8_block2left[25];
|
||||
extern const unsigned char vp8_block2above[25];
|
||||
|
||||
#define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \
|
||||
Dest = (A)+(B);
|
||||
|
||||
|
||||
typedef enum
|
||||
{
|
||||
KEY_FRAME = 0,
|
||||
INTER_FRAME = 1
|
||||
} FRAME_TYPE;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
DC_PRED, /* average of above and left pixels */
|
||||
V_PRED, /* vertical prediction */
|
||||
H_PRED, /* horizontal prediction */
|
||||
TM_PRED, /* Truemotion prediction */
|
||||
B_PRED, /* block based prediction, each block has its own prediction mode */
|
||||
|
||||
NEARESTMV,
|
||||
NEARMV,
|
||||
ZEROMV,
|
||||
NEWMV,
|
||||
SPLITMV,
|
||||
|
||||
MB_MODE_COUNT
|
||||
} MB_PREDICTION_MODE;
|
||||
|
||||
/* Macroblock level features */
|
||||
typedef enum
|
||||
{
|
||||
MB_LVL_ALT_Q = 0, /* Use alternate Quantizer .... */
|
||||
MB_LVL_ALT_LF = 1, /* Use alternate loop filter value... */
|
||||
MB_LVL_MAX = 2 /* Number of MB level features supported */
|
||||
|
||||
} MB_LVL_FEATURES;
|
||||
|
||||
/* Segment Feature Masks */
|
||||
#define SEGMENT_ALTQ 0x01
|
||||
#define SEGMENT_ALT_LF 0x02
|
||||
|
||||
#define VP8_YMODES (B_PRED + 1)
|
||||
#define VP8_UV_MODES (TM_PRED + 1)
|
||||
|
||||
#define VP8_MVREFS (1 + SPLITMV - NEARESTMV)
|
||||
|
||||
typedef enum
|
||||
{
|
||||
B_DC_PRED, /* average of above and left pixels */
|
||||
B_TM_PRED,
|
||||
|
||||
B_VE_PRED, /* vertical prediction */
|
||||
B_HE_PRED, /* horizontal prediction */
|
||||
|
||||
B_LD_PRED,
|
||||
B_RD_PRED,
|
||||
|
||||
B_VR_PRED,
|
||||
B_VL_PRED,
|
||||
B_HD_PRED,
|
||||
B_HU_PRED,
|
||||
|
||||
LEFT4X4,
|
||||
ABOVE4X4,
|
||||
ZERO4X4,
|
||||
NEW4X4,
|
||||
|
||||
B_MODE_COUNT
|
||||
} B_PREDICTION_MODE;
|
||||
|
||||
#define VP8_BINTRAMODES (B_HU_PRED + 1) /* 10 */
|
||||
#define VP8_SUBMVREFS (1 + NEW4X4 - LEFT4X4)
|
||||
|
||||
/* For keyframes, intra block modes are predicted by the (already decoded)
|
||||
modes for the Y blocks to the left and above us; for interframes, there
|
||||
is a single probability table. */
|
||||
|
||||
union b_mode_info
|
||||
{
|
||||
B_PREDICTION_MODE as_mode;
|
||||
int_mv mv;
|
||||
};
|
||||
|
||||
typedef enum
|
||||
{
|
||||
INTRA_FRAME = 0,
|
||||
LAST_FRAME = 1,
|
||||
GOLDEN_FRAME = 2,
|
||||
ALTREF_FRAME = 3,
|
||||
MAX_REF_FRAMES = 4
|
||||
} MV_REFERENCE_FRAME;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint8_t mode, uv_mode;
|
||||
uint8_t ref_frame;
|
||||
uint8_t is_4x4;
|
||||
int_mv mv;
|
||||
|
||||
uint8_t partitioning;
|
||||
uint8_t mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
|
||||
uint8_t need_to_clamp_mvs;
|
||||
uint8_t segment_id; /* Which set of segmentation parameters should be used for this MB */
|
||||
} MB_MODE_INFO;
|
||||
|
||||
typedef struct modeinfo
|
||||
{
|
||||
MB_MODE_INFO mbmi;
|
||||
union b_mode_info bmi[16];
|
||||
} MODE_INFO;
|
||||
|
||||
#if CONFIG_MULTI_RES_ENCODING
|
||||
/* The mb-level information needed to be stored for higher-resolution encoder */
|
||||
typedef struct
|
||||
{
|
||||
MB_PREDICTION_MODE mode;
|
||||
MV_REFERENCE_FRAME ref_frame;
|
||||
int_mv mv;
|
||||
int dissim; /* dissimilarity level of the macroblock */
|
||||
} LOWER_RES_MB_INFO;
|
||||
|
||||
/* The frame-level information needed to be stored for higher-resolution
|
||||
* encoder */
|
||||
typedef struct
|
||||
{
|
||||
FRAME_TYPE frame_type;
|
||||
int is_frame_dropped;
|
||||
// The frame rate for the lowest resolution.
|
||||
double low_res_framerate;
|
||||
/* The frame number of each reference frames */
|
||||
unsigned int low_res_ref_frames[MAX_REF_FRAMES];
|
||||
// The video frame counter value for the key frame, for lowest resolution.
|
||||
unsigned int key_frame_counter_value;
|
||||
LOWER_RES_MB_INFO *mb_info;
|
||||
} LOWER_RES_FRAME_INFO;
|
||||
#endif
|
||||
|
||||
typedef struct blockd
|
||||
{
|
||||
short *qcoeff;
|
||||
short *dqcoeff;
|
||||
unsigned char *predictor;
|
||||
short *dequant;
|
||||
|
||||
int offset;
|
||||
char *eob;
|
||||
|
||||
union b_mode_info bmi;
|
||||
} BLOCKD;
|
||||
|
||||
typedef void (*vp8_subpix_fn_t)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
|
||||
|
||||
typedef struct macroblockd
|
||||
{
|
||||
DECLARE_ALIGNED(16, unsigned char, predictor[384]);
|
||||
DECLARE_ALIGNED(16, short, qcoeff[400]);
|
||||
DECLARE_ALIGNED(16, short, dqcoeff[400]);
|
||||
DECLARE_ALIGNED(16, char, eobs[25]);
|
||||
|
||||
DECLARE_ALIGNED(16, short, dequant_y1[16]);
|
||||
DECLARE_ALIGNED(16, short, dequant_y1_dc[16]);
|
||||
DECLARE_ALIGNED(16, short, dequant_y2[16]);
|
||||
DECLARE_ALIGNED(16, short, dequant_uv[16]);
|
||||
|
||||
/* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
|
||||
BLOCKD block[25];
|
||||
int fullpixel_mask;
|
||||
|
||||
YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */
|
||||
YV12_BUFFER_CONFIG dst;
|
||||
|
||||
MODE_INFO *mode_info_context;
|
||||
int mode_info_stride;
|
||||
|
||||
FRAME_TYPE frame_type;
|
||||
|
||||
int up_available;
|
||||
int left_available;
|
||||
|
||||
unsigned char *recon_above[3];
|
||||
unsigned char *recon_left[3];
|
||||
int recon_left_stride[2];
|
||||
|
||||
/* Y,U,V,Y2 */
|
||||
ENTROPY_CONTEXT_PLANES *above_context;
|
||||
ENTROPY_CONTEXT_PLANES *left_context;
|
||||
|
||||
/* 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active. */
|
||||
unsigned char segmentation_enabled;
|
||||
|
||||
/* 0 (do not update) 1 (update) the macroblock segmentation map. */
|
||||
unsigned char update_mb_segmentation_map;
|
||||
|
||||
/* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
|
||||
unsigned char update_mb_segmentation_data;
|
||||
|
||||
/* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
|
||||
unsigned char mb_segement_abs_delta;
|
||||
|
||||
/* Per frame flags that define which MB level features (such as quantizer or loop filter level) */
|
||||
/* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */
|
||||
vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS]; /* Probability Tree used to code Segment number */
|
||||
|
||||
signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; /* Segment parameters */
|
||||
|
||||
/* mode_based Loop filter adjustment */
|
||||
unsigned char mode_ref_lf_delta_enabled;
|
||||
unsigned char mode_ref_lf_delta_update;
|
||||
|
||||
/* Delta values have the range +/- MAX_LOOP_FILTER */
|
||||
signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
|
||||
signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
|
||||
signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
|
||||
signed char mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
|
||||
|
||||
/* Distance of MB away from frame edges */
|
||||
int mb_to_left_edge;
|
||||
int mb_to_right_edge;
|
||||
int mb_to_top_edge;
|
||||
int mb_to_bottom_edge;
|
||||
|
||||
|
||||
|
||||
vp8_subpix_fn_t subpixel_predict;
|
||||
vp8_subpix_fn_t subpixel_predict8x4;
|
||||
vp8_subpix_fn_t subpixel_predict8x8;
|
||||
vp8_subpix_fn_t subpixel_predict16x16;
|
||||
|
||||
void *current_bc;
|
||||
|
||||
int corrupted;
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
/* This is an intermediate buffer currently used in sub-pixel motion search
|
||||
* to keep a copy of the reference area. This buffer can be used for other
|
||||
* purpose.
|
||||
*/
|
||||
DECLARE_ALIGNED(32, unsigned char, y_buf[22*32]);
|
||||
#endif
|
||||
} MACROBLOCKD;
|
||||
|
||||
|
||||
extern void vp8_build_block_doffsets(MACROBLOCKD *x);
|
||||
extern void vp8_setup_block_dptrs(MACROBLOCKD *x);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_BLOCKD_H_
|
|
@ -0,0 +1,197 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VP8_COMMON_COEFUPDATEPROBS_H_
|
||||
#define VP8_COMMON_COEFUPDATEPROBS_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Update probabilities for the nodes in the token entropy tree.
|
||||
Generated file included by entropy.c */
|
||||
|
||||
const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] =
|
||||
{
|
||||
{
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255, },
|
||||
{234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
{
|
||||
{
|
||||
{248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
{
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, },
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_COEFUPDATEPROBS_H_
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_COMMON_H_
|
||||
#define VP8_COMMON_COMMON_H_
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
/* Interface header for common constant data structures and lookup tables */
|
||||
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Only need this for fixed-size arrays, for structs just assign. */
|
||||
|
||||
#define vp8_copy( Dest, Src) { \
|
||||
assert( sizeof( Dest) == sizeof( Src)); \
|
||||
memcpy( Dest, Src, sizeof( Src)); \
|
||||
}
|
||||
|
||||
/* Use this for variably-sized arrays. */
|
||||
|
||||
#define vp8_copy_array( Dest, Src, N) { \
|
||||
assert( sizeof( *Dest) == sizeof( *Src)); \
|
||||
memcpy( Dest, Src, N * sizeof( *Src)); \
|
||||
}
|
||||
|
||||
#define vp8_zero( Dest) memset( &Dest, 0, sizeof( Dest));
|
||||
|
||||
#define vp8_zero_array( Dest, N) memset( Dest, 0, N * sizeof( *Dest));
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_COMMON_H_
|
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
/* Copy 2 macroblocks to a buffer */
|
||||
void vp8_copy32xn_c(const unsigned char *src_ptr, int src_stride,
|
||||
unsigned char *dst_ptr, int dst_stride,
|
||||
int height)
|
||||
{
|
||||
int r;
|
||||
|
||||
for (r = 0; r < height; r++)
|
||||
{
|
||||
memcpy(dst_ptr, src_ptr, 32);
|
||||
|
||||
src_ptr += src_stride;
|
||||
dst_ptr += dst_stride;
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,155 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include "blockd.h"
|
||||
|
||||
|
||||
void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int frame)
|
||||
{
|
||||
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
int mb_index = 0;
|
||||
FILE *mvs = fopen("mvs.stt", "a");
|
||||
|
||||
/* print out the macroblock Y modes */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "Mb Modes for Frame %d\n", frame);
|
||||
|
||||
for (mb_row = 0; mb_row < rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cols; mb_col++)
|
||||
{
|
||||
|
||||
fprintf(mvs, "%2d ", mi[mb_index].mbmi.mode);
|
||||
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "Mb mv ref for Frame %d\n", frame);
|
||||
|
||||
for (mb_row = 0; mb_row < rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cols; mb_col++)
|
||||
{
|
||||
|
||||
fprintf(mvs, "%2d ", mi[mb_index].mbmi.ref_frame);
|
||||
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
/* print out the macroblock UV modes */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "UV Modes for Frame %d\n", frame);
|
||||
|
||||
for (mb_row = 0; mb_row < rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cols; mb_col++)
|
||||
{
|
||||
|
||||
fprintf(mvs, "%2d ", mi[mb_index].mbmi.uv_mode);
|
||||
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
mb_index++;
|
||||
fprintf(mvs, "\n");
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
/* print out the block modes */
|
||||
fprintf(mvs, "Mbs for Frame %d\n", frame);
|
||||
{
|
||||
int b_row;
|
||||
|
||||
for (b_row = 0; b_row < 4 * rows; b_row++)
|
||||
{
|
||||
int b_col;
|
||||
int bindex;
|
||||
|
||||
for (b_col = 0; b_col < 4 * cols; b_col++)
|
||||
{
|
||||
mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2);
|
||||
bindex = (b_row & 3) * 4 + (b_col & 3);
|
||||
|
||||
if (mi[mb_index].mbmi.mode == B_PRED)
|
||||
fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].as_mode);
|
||||
else
|
||||
fprintf(mvs, "xx ");
|
||||
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
}
|
||||
}
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
/* print out the macroblock mvs */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "MVs for Frame %d\n", frame);
|
||||
|
||||
for (mb_row = 0; mb_row < rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cols; mb_col++)
|
||||
{
|
||||
fprintf(mvs, "%5d:%-5d", mi[mb_index].mbmi.mv.as_mv.row / 2, mi[mb_index].mbmi.mv.as_mv.col / 2);
|
||||
|
||||
mb_index++;
|
||||
}
|
||||
|
||||
mb_index++;
|
||||
fprintf(mvs, "\n");
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
|
||||
/* print out the block modes */
|
||||
fprintf(mvs, "MVs for Frame %d\n", frame);
|
||||
{
|
||||
int b_row;
|
||||
|
||||
for (b_row = 0; b_row < 4 * rows; b_row++)
|
||||
{
|
||||
int b_col;
|
||||
int bindex;
|
||||
|
||||
for (b_col = 0; b_col < 4 * cols; b_col++)
|
||||
{
|
||||
mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2);
|
||||
bindex = (b_row & 3) * 4 + (b_col & 3);
|
||||
fprintf(mvs, "%3d:%-3d ", mi[mb_index].bmi[bindex].mv.as_mv.row, mi[mb_index].bmi[bindex].mv.as_mv.col);
|
||||
|
||||
}
|
||||
|
||||
fprintf(mvs, "\n");
|
||||
}
|
||||
}
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
|
||||
fclose(mvs);
|
||||
}
|
|
@ -0,0 +1,200 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VP8_COMMON_DEFAULT_COEF_PROBS_H_
|
||||
#define VP8_COMMON_DEFAULT_COEF_PROBS_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*Generated file, included by entropy.c*/
|
||||
|
||||
|
||||
static const vp8_prob default_coef_probs [BLOCK_TYPES]
|
||||
[COEF_BANDS]
|
||||
[PREV_COEF_CONTEXTS]
|
||||
[ENTROPY_NODES] =
|
||||
{
|
||||
{ /* Block Type ( 0 ) */
|
||||
{ /* Coeff Band ( 0 )*/
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 1 )*/
|
||||
{ 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
|
||||
{ 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
|
||||
{ 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 2 )*/
|
||||
{ 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
|
||||
{ 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
|
||||
{ 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 3 )*/
|
||||
{ 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
|
||||
{ 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
|
||||
{ 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 4 )*/
|
||||
{ 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
|
||||
{ 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
|
||||
{ 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 5 )*/
|
||||
{ 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
|
||||
{ 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
|
||||
{ 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 6 )*/
|
||||
{ 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
|
||||
{ 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
|
||||
{ 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 7 )*/
|
||||
{ 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
}
|
||||
},
|
||||
{ /* Block Type ( 1 ) */
|
||||
{ /* Coeff Band ( 0 )*/
|
||||
{ 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 },
|
||||
{ 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 },
|
||||
{ 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 1 )*/
|
||||
{ 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
|
||||
{ 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
|
||||
{ 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 2 )*/
|
||||
{ 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
|
||||
{ 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
|
||||
{ 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 3 )*/
|
||||
{ 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
|
||||
{ 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
|
||||
{ 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 4 )*/
|
||||
{ 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
|
||||
{ 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
|
||||
{ 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 5 )*/
|
||||
{ 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
|
||||
{ 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
|
||||
{ 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 6 )*/
|
||||
{ 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
|
||||
{ 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
|
||||
{ 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 7 )*/
|
||||
{ 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
|
||||
{ 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }
|
||||
}
|
||||
},
|
||||
{ /* Block Type ( 2 ) */
|
||||
{ /* Coeff Band ( 0 )*/
|
||||
{ 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
|
||||
{ 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
|
||||
{ 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 1 )*/
|
||||
{ 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
|
||||
{ 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
|
||||
{ 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 2 )*/
|
||||
{ 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
|
||||
{ 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
|
||||
{ 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 3 )*/
|
||||
{ 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
|
||||
{ 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 4 )*/
|
||||
{ 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
|
||||
{ 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 5 )*/
|
||||
{ 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 6 )*/
|
||||
{ 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
|
||||
{ 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 7 )*/
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
}
|
||||
},
|
||||
{ /* Block Type ( 3 ) */
|
||||
{ /* Coeff Band ( 0 )*/
|
||||
{ 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
|
||||
{ 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
|
||||
{ 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 1 )*/
|
||||
{ 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
|
||||
{ 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
|
||||
{ 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 2 )*/
|
||||
{ 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
|
||||
{ 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
|
||||
{ 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 3 )*/
|
||||
{ 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
|
||||
{ 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
|
||||
{ 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 4 )*/
|
||||
{ 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
|
||||
{ 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
|
||||
{ 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 5 )*/
|
||||
{ 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
|
||||
{ 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
|
||||
{ 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 6 )*/
|
||||
{ 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
|
||||
{ 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
|
||||
{ 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }
|
||||
},
|
||||
{ /* Coeff Band ( 7 )*/
|
||||
{ 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
|
||||
{ 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_DEFAULT_COEF_PROBS_H_
|
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
void vp8_dequantize_b_c(BLOCKD *d, short *DQC)
|
||||
{
|
||||
int i;
|
||||
short *DQ = d->dqcoeff;
|
||||
short *Q = d->qcoeff;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
DQ[i] = Q[i] * DQC[i];
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_c(short *input, short *dq,
|
||||
unsigned char *dest, int stride)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
input[i] = dq[i] * input[i];
|
||||
}
|
||||
|
||||
vp8_short_idct4x4llm_c(input, dest, stride, dest, stride);
|
||||
|
||||
memset(input, 0, 32);
|
||||
|
||||
}
|
|
@ -0,0 +1,188 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "entropy.h"
|
||||
#include "blockd.h"
|
||||
#include "onyxc_int.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
#include "coefupdateprobs.h"
|
||||
|
||||
DECLARE_ALIGNED(16, const unsigned char, vp8_norm[256]) =
|
||||
{
|
||||
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]) =
|
||||
{ 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7};
|
||||
|
||||
DECLARE_ALIGNED(16, const unsigned char,
|
||||
vp8_prev_token_class[MAX_ENTROPY_TOKENS]) =
|
||||
{ 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0};
|
||||
|
||||
DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) =
|
||||
{
|
||||
0, 1, 4, 8,
|
||||
5, 2, 3, 6,
|
||||
9, 12, 13, 10,
|
||||
7, 11, 14, 15,
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) =
|
||||
{
|
||||
1, 2, 6, 7,
|
||||
3, 5, 8, 13,
|
||||
4, 9, 12, 14,
|
||||
10, 11, 15, 16
|
||||
};
|
||||
|
||||
/* vp8_default_zig_zag_mask generated with:
|
||||
|
||||
void vp8_init_scan_order_mask()
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
vp8_default_zig_zag_mask[vp8_default_zig_zag1d[i]] = 1 << i;
|
||||
}
|
||||
|
||||
}
|
||||
*/
|
||||
DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]) =
|
||||
{
|
||||
1, 2, 32, 64,
|
||||
4, 16, 128, 4096,
|
||||
8, 256, 2048, 8192,
|
||||
512, 1024, 16384, -32768
|
||||
};
|
||||
|
||||
const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6};
|
||||
|
||||
/* Array indices are identical to previously-existing CONTEXT_NODE indices */
|
||||
|
||||
const vp8_tree_index vp8_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */
|
||||
{
|
||||
-DCT_EOB_TOKEN, 2, /* 0 = EOB */
|
||||
-ZERO_TOKEN, 4, /* 1 = ZERO */
|
||||
-ONE_TOKEN, 6, /* 2 = ONE */
|
||||
8, 12, /* 3 = LOW_VAL */
|
||||
-TWO_TOKEN, 10, /* 4 = TWO */
|
||||
-THREE_TOKEN, -FOUR_TOKEN, /* 5 = THREE */
|
||||
14, 16, /* 6 = HIGH_LOW */
|
||||
-DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 7 = CAT_ONE */
|
||||
18, 20, /* 8 = CAT_THREEFOUR */
|
||||
-DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 9 = CAT_THREE */
|
||||
-DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */
|
||||
};
|
||||
|
||||
/* vp8_coef_encodings generated with:
|
||||
vp8_tokens_from_tree(vp8_coef_encodings, vp8_coef_tree);
|
||||
*/
|
||||
vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] =
|
||||
{
|
||||
{2, 2},
|
||||
{6, 3},
|
||||
{28, 5},
|
||||
{58, 6},
|
||||
{59, 6},
|
||||
{60, 6},
|
||||
{61, 6},
|
||||
{124, 7},
|
||||
{125, 7},
|
||||
{126, 7},
|
||||
{127, 7},
|
||||
{0, 1}
|
||||
};
|
||||
|
||||
/* Trees for extra bits. Probabilities are constant and
|
||||
do not depend on previously encoded bits */
|
||||
|
||||
static const vp8_prob Pcat1[] = { 159};
|
||||
static const vp8_prob Pcat2[] = { 165, 145};
|
||||
static const vp8_prob Pcat3[] = { 173, 148, 140};
|
||||
static const vp8_prob Pcat4[] = { 176, 155, 140, 135};
|
||||
static const vp8_prob Pcat5[] = { 180, 157, 141, 134, 130};
|
||||
static const vp8_prob Pcat6[] =
|
||||
{ 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129};
|
||||
|
||||
|
||||
/* tree index tables generated with:
|
||||
|
||||
void init_bit_tree(vp8_tree_index *p, int n)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
while (++i < n)
|
||||
{
|
||||
p[0] = p[1] = i << 1;
|
||||
p += 2;
|
||||
}
|
||||
|
||||
p[0] = p[1] = 0;
|
||||
}
|
||||
|
||||
void init_bit_trees()
|
||||
{
|
||||
init_bit_tree(cat1, 1);
|
||||
init_bit_tree(cat2, 2);
|
||||
init_bit_tree(cat3, 3);
|
||||
init_bit_tree(cat4, 4);
|
||||
init_bit_tree(cat5, 5);
|
||||
init_bit_tree(cat6, 11);
|
||||
}
|
||||
*/
|
||||
|
||||
static const vp8_tree_index cat1[2] = { 0, 0 };
|
||||
static const vp8_tree_index cat2[4] = { 2, 2, 0, 0 };
|
||||
static const vp8_tree_index cat3[6] = { 2, 2, 4, 4, 0, 0 };
|
||||
static const vp8_tree_index cat4[8] = { 2, 2, 4, 4, 6, 6, 0, 0 };
|
||||
static const vp8_tree_index cat5[10] = { 2, 2, 4, 4, 6, 6, 8, 8, 0, 0 };
|
||||
static const vp8_tree_index cat6[22] = { 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12,
|
||||
14, 14, 16, 16, 18, 18, 20, 20, 0, 0 };
|
||||
|
||||
const vp8_extra_bit_struct vp8_extra_bits[12] =
|
||||
{
|
||||
{ 0, 0, 0, 0},
|
||||
{ 0, 0, 0, 1},
|
||||
{ 0, 0, 0, 2},
|
||||
{ 0, 0, 0, 3},
|
||||
{ 0, 0, 0, 4},
|
||||
{ cat1, Pcat1, 1, 5},
|
||||
{ cat2, Pcat2, 2, 7},
|
||||
{ cat3, Pcat3, 3, 11},
|
||||
{ cat4, Pcat4, 4, 19},
|
||||
{ cat5, Pcat5, 5, 35},
|
||||
{ cat6, Pcat6, 11, 67},
|
||||
{ 0, 0, 0, 0}
|
||||
};
|
||||
|
||||
#include "default_coef_probs.h"
|
||||
|
||||
void vp8_default_coef_probs(VP8_COMMON *pc)
|
||||
{
|
||||
memcpy(pc->fc.coef_probs, default_coef_probs, sizeof(default_coef_probs));
|
||||
}
|
||||
|
|
@ -0,0 +1,109 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ENTROPY_H_
|
||||
#define VP8_COMMON_ENTROPY_H_
|
||||
|
||||
#include "treecoder.h"
|
||||
#include "blockd.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Coefficient token alphabet */
|
||||
|
||||
#define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */
|
||||
#define ONE_TOKEN 1 /* 1 Extra Bits 0+1 */
|
||||
#define TWO_TOKEN 2 /* 2 Extra Bits 0+1 */
|
||||
#define THREE_TOKEN 3 /* 3 Extra Bits 0+1 */
|
||||
#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */
|
||||
#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */
|
||||
#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */
|
||||
#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */
|
||||
#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */
|
||||
#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */
|
||||
#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 11+1 */
|
||||
#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */
|
||||
|
||||
#define MAX_ENTROPY_TOKENS 12
|
||||
#define ENTROPY_NODES 11
|
||||
|
||||
extern const vp8_tree_index vp8_coef_tree[];
|
||||
|
||||
extern const struct vp8_token_struct vp8_coef_encodings[MAX_ENTROPY_TOKENS];
|
||||
|
||||
typedef struct
|
||||
{
|
||||
vp8_tree_p tree;
|
||||
const vp8_prob *prob;
|
||||
int Len;
|
||||
int base_val;
|
||||
} vp8_extra_bit_struct;
|
||||
|
||||
extern const vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */
|
||||
|
||||
#define PROB_UPDATE_BASELINE_COST 7
|
||||
|
||||
#define MAX_PROB 255
|
||||
#define DCT_MAX_VALUE 2048
|
||||
|
||||
|
||||
/* Coefficients are predicted via a 3-dimensional probability table. */
|
||||
|
||||
/* Outside dimension. 0 = Y no DC, 1 = Y2, 2 = UV, 3 = Y with DC */
|
||||
|
||||
#define BLOCK_TYPES 4
|
||||
|
||||
/* Middle dimension is a coarsening of the coefficient's
|
||||
position within the 4x4 DCT. */
|
||||
|
||||
#define COEF_BANDS 8
|
||||
extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]);
|
||||
|
||||
/* Inside dimension is 3-valued measure of nearby complexity, that is,
|
||||
the extent to which nearby coefficients are nonzero. For the first
|
||||
coefficient (DC, unless block type is 0), we look at the (already encoded)
|
||||
blocks above and to the left of the current block. The context index is
|
||||
then the number (0,1,or 2) of these blocks having nonzero coefficients.
|
||||
After decoding a coefficient, the measure is roughly the size of the
|
||||
most recently decoded coefficient (0 for 0, 1 for 1, 2 for >1).
|
||||
Note that the intuitive meaning of this measure changes as coefficients
|
||||
are decoded, e.g., prior to the first token, a zero means that my neighbors
|
||||
are empty while, after the first token, because of the use of end-of-block,
|
||||
a zero means we just decoded a zero and hence guarantees that a non-zero
|
||||
coefficient will appear later in this block. However, this shift
|
||||
in meaning is perfectly OK because our context depends also on the
|
||||
coefficient band (and since zigzag positions 0, 1, and 2 are in
|
||||
distinct bands). */
|
||||
|
||||
/*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */
|
||||
# define PREV_COEF_CONTEXTS 3
|
||||
|
||||
extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]);
|
||||
|
||||
extern const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
|
||||
|
||||
|
||||
struct VP8Common;
|
||||
void vp8_default_coef_probs(struct VP8Common *);
|
||||
|
||||
extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]);
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]);
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]);
|
||||
extern const int vp8_mb_feature_data_bits[MB_LVL_MAX];
|
||||
|
||||
void vp8_coef_tree_initialize(void);
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_ENTROPY_H_
|
|
@ -0,0 +1,171 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#define USE_PREBUILT_TABLES
|
||||
|
||||
#include "entropymode.h"
|
||||
#include "entropy.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
#include "vp8_entropymodedata.h"
|
||||
|
||||
int vp8_mv_cont(const int_mv *l, const int_mv *a)
|
||||
{
|
||||
int lez = (l->as_int == 0);
|
||||
int aez = (a->as_int == 0);
|
||||
int lea = (l->as_int == a->as_int);
|
||||
|
||||
if (lea && lez)
|
||||
return SUBMVREF_LEFT_ABOVE_ZED;
|
||||
|
||||
if (lea)
|
||||
return SUBMVREF_LEFT_ABOVE_SAME;
|
||||
|
||||
if (aez)
|
||||
return SUBMVREF_ABOVE_ZED;
|
||||
|
||||
if (lez)
|
||||
return SUBMVREF_LEFT_ZED;
|
||||
|
||||
return SUBMVREF_NORMAL;
|
||||
}
|
||||
|
||||
static const vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1] = { 180, 162, 25};
|
||||
|
||||
const vp8_prob vp8_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP8_SUBMVREFS-1] =
|
||||
{
|
||||
{ 147, 136, 18 },
|
||||
{ 106, 145, 1 },
|
||||
{ 179, 121, 1 },
|
||||
{ 223, 1 , 34 },
|
||||
{ 208, 1 , 1 }
|
||||
};
|
||||
|
||||
|
||||
|
||||
const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS] =
|
||||
{
|
||||
{
|
||||
0, 0, 0, 0,
|
||||
0, 0, 0, 0,
|
||||
1, 1, 1, 1,
|
||||
1, 1, 1, 1,
|
||||
},
|
||||
{
|
||||
0, 0, 1, 1,
|
||||
0, 0, 1, 1,
|
||||
0, 0, 1, 1,
|
||||
0, 0, 1, 1,
|
||||
},
|
||||
{
|
||||
0, 0, 1, 1,
|
||||
0, 0, 1, 1,
|
||||
2, 2, 3, 3,
|
||||
2, 2, 3, 3,
|
||||
},
|
||||
{
|
||||
0, 1, 2, 3,
|
||||
4, 5, 6, 7,
|
||||
8, 9, 10, 11,
|
||||
12, 13, 14, 15,
|
||||
}
|
||||
};
|
||||
|
||||
const int vp8_mbsplit_count [VP8_NUMMBSPLITS] = { 2, 2, 4, 16};
|
||||
|
||||
const vp8_prob vp8_mbsplit_probs [VP8_NUMMBSPLITS-1] = { 110, 111, 150};
|
||||
|
||||
|
||||
/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */
|
||||
|
||||
const vp8_tree_index vp8_bmode_tree[18] = /* INTRAMODECONTEXTNODE value */
|
||||
{
|
||||
-B_DC_PRED, 2, /* 0 = DC_NODE */
|
||||
-B_TM_PRED, 4, /* 1 = TM_NODE */
|
||||
-B_VE_PRED, 6, /* 2 = VE_NODE */
|
||||
8, 12, /* 3 = COM_NODE */
|
||||
-B_HE_PRED, 10, /* 4 = HE_NODE */
|
||||
-B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */
|
||||
-B_LD_PRED, 14, /* 6 = LD_NODE */
|
||||
-B_VL_PRED, 16, /* 7 = VL_NODE */
|
||||
-B_HD_PRED, -B_HU_PRED /* 8 = HD_NODE */
|
||||
};
|
||||
|
||||
/* Again, these trees use the same probability indices as their
|
||||
explicitly-programmed predecessors. */
|
||||
|
||||
const vp8_tree_index vp8_ymode_tree[8] =
|
||||
{
|
||||
-DC_PRED, 2,
|
||||
4, 6,
|
||||
-V_PRED, -H_PRED,
|
||||
-TM_PRED, -B_PRED
|
||||
};
|
||||
|
||||
const vp8_tree_index vp8_kf_ymode_tree[8] =
|
||||
{
|
||||
-B_PRED, 2,
|
||||
4, 6,
|
||||
-DC_PRED, -V_PRED,
|
||||
-H_PRED, -TM_PRED
|
||||
};
|
||||
|
||||
const vp8_tree_index vp8_uv_mode_tree[6] =
|
||||
{
|
||||
-DC_PRED, 2,
|
||||
-V_PRED, 4,
|
||||
-H_PRED, -TM_PRED
|
||||
};
|
||||
|
||||
const vp8_tree_index vp8_mbsplit_tree[6] =
|
||||
{
|
||||
-3, 2,
|
||||
-2, 4,
|
||||
-0, -1
|
||||
};
|
||||
|
||||
const vp8_tree_index vp8_mv_ref_tree[8] =
|
||||
{
|
||||
-ZEROMV, 2,
|
||||
-NEARESTMV, 4,
|
||||
-NEARMV, 6,
|
||||
-NEWMV, -SPLITMV
|
||||
};
|
||||
|
||||
const vp8_tree_index vp8_sub_mv_ref_tree[6] =
|
||||
{
|
||||
-LEFT4X4, 2,
|
||||
-ABOVE4X4, 4,
|
||||
-ZERO4X4, -NEW4X4
|
||||
};
|
||||
|
||||
const vp8_tree_index vp8_small_mvtree [14] =
|
||||
{
|
||||
2, 8,
|
||||
4, 6,
|
||||
-0, -1,
|
||||
-2, -3,
|
||||
10, 12,
|
||||
-4, -5,
|
||||
-6, -7
|
||||
};
|
||||
|
||||
void vp8_init_mbmode_probs(VP8_COMMON *x)
|
||||
{
|
||||
memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob));
|
||||
memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob));
|
||||
memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob));
|
||||
}
|
||||
|
||||
void vp8_default_bmode_probs(vp8_prob p [VP8_BINTRAMODES-1])
|
||||
{
|
||||
memcpy(p, vp8_bmode_prob, sizeof(vp8_bmode_prob));
|
||||
}
|
||||
|
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ENTROPYMODE_H_
|
||||
#define VP8_COMMON_ENTROPYMODE_H_
|
||||
|
||||
#include "onyxc_int.h"
|
||||
#include "treecoder.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef enum
|
||||
{
|
||||
SUBMVREF_NORMAL,
|
||||
SUBMVREF_LEFT_ZED,
|
||||
SUBMVREF_ABOVE_ZED,
|
||||
SUBMVREF_LEFT_ABOVE_SAME,
|
||||
SUBMVREF_LEFT_ABOVE_ZED
|
||||
} sumvfref_t;
|
||||
|
||||
typedef int vp8_mbsplit[16];
|
||||
|
||||
#define VP8_NUMMBSPLITS 4
|
||||
|
||||
extern const vp8_mbsplit vp8_mbsplits [VP8_NUMMBSPLITS];
|
||||
|
||||
extern const int vp8_mbsplit_count [VP8_NUMMBSPLITS]; /* # of subsets */
|
||||
|
||||
extern const vp8_prob vp8_mbsplit_probs [VP8_NUMMBSPLITS-1];
|
||||
|
||||
extern int vp8_mv_cont(const int_mv *l, const int_mv *a);
|
||||
#define SUBMVREF_COUNT 5
|
||||
extern const vp8_prob vp8_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP8_SUBMVREFS-1];
|
||||
|
||||
|
||||
extern const unsigned int vp8_kf_default_bmode_counts [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES];
|
||||
|
||||
|
||||
extern const vp8_tree_index vp8_bmode_tree[];
|
||||
|
||||
extern const vp8_tree_index vp8_ymode_tree[];
|
||||
extern const vp8_tree_index vp8_kf_ymode_tree[];
|
||||
extern const vp8_tree_index vp8_uv_mode_tree[];
|
||||
|
||||
extern const vp8_tree_index vp8_mbsplit_tree[];
|
||||
extern const vp8_tree_index vp8_mv_ref_tree[];
|
||||
extern const vp8_tree_index vp8_sub_mv_ref_tree[];
|
||||
|
||||
extern const struct vp8_token_struct vp8_bmode_encodings[VP8_BINTRAMODES];
|
||||
extern const struct vp8_token_struct vp8_ymode_encodings[VP8_YMODES];
|
||||
extern const struct vp8_token_struct vp8_kf_ymode_encodings[VP8_YMODES];
|
||||
extern const struct vp8_token_struct vp8_uv_mode_encodings[VP8_UV_MODES];
|
||||
extern const struct vp8_token_struct vp8_mbsplit_encodings[VP8_NUMMBSPLITS];
|
||||
|
||||
/* Inter mode values do not start at zero */
|
||||
|
||||
extern const struct vp8_token_struct vp8_mv_ref_encoding_array[VP8_MVREFS];
|
||||
extern const struct vp8_token_struct vp8_sub_mv_ref_encoding_array[VP8_SUBMVREFS];
|
||||
|
||||
extern const vp8_tree_index vp8_small_mvtree[];
|
||||
|
||||
extern const struct vp8_token_struct vp8_small_mvencodings[8];
|
||||
|
||||
/* Key frame default mode probs */
|
||||
extern const vp8_prob vp8_kf_bmode_prob[VP8_BINTRAMODES][VP8_BINTRAMODES]
|
||||
[VP8_BINTRAMODES-1];
|
||||
extern const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES-1];
|
||||
extern const vp8_prob vp8_kf_ymode_prob[VP8_YMODES-1];
|
||||
|
||||
void vp8_init_mbmode_probs(VP8_COMMON *x);
|
||||
void vp8_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES-1]);
|
||||
void vp8_kf_default_bmode_probs(vp8_prob dest [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_ENTROPYMODE_H_
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "entropymv.h"
|
||||
|
||||
const MV_CONTEXT vp8_mv_update_probs[2] =
|
||||
{
|
||||
{{
|
||||
237,
|
||||
246,
|
||||
253, 253, 254, 254, 254, 254, 254,
|
||||
254, 254, 254, 254, 254, 250, 250, 252, 254, 254
|
||||
}},
|
||||
{{
|
||||
231,
|
||||
243,
|
||||
245, 253, 254, 254, 254, 254, 254,
|
||||
254, 254, 254, 254, 254, 251, 251, 254, 254, 254
|
||||
}}
|
||||
};
|
||||
const MV_CONTEXT vp8_default_mv_context[2] =
|
||||
{
|
||||
{{
|
||||
/* row */
|
||||
162, /* is short */
|
||||
128, /* sign */
|
||||
225, 146, 172, 147, 214, 39, 156, /* short tree */
|
||||
128, 129, 132, 75, 145, 178, 206, 239, 254, 254 /* long bits */
|
||||
}},
|
||||
|
||||
|
||||
|
||||
{{
|
||||
/* same for column */
|
||||
164, /* is short */
|
||||
128,
|
||||
204, 170, 119, 235, 140, 230, 228,
|
||||
128, 130, 130, 74, 148, 180, 203, 236, 254, 254 /* long bits */
|
||||
|
||||
}}
|
||||
};
|
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ENTROPYMV_H_
|
||||
#define VP8_COMMON_ENTROPYMV_H_
|
||||
|
||||
#include "treecoder.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum
|
||||
{
|
||||
mv_max = 1023, /* max absolute value of a MV component */
|
||||
MVvals = (2 * mv_max) + 1, /* # possible values "" */
|
||||
mvfp_max = 255, /* max absolute value of a full pixel MV component */
|
||||
MVfpvals = (2 * mvfp_max) +1, /* # possible full pixel MV values */
|
||||
|
||||
mvlong_width = 10, /* Large MVs have 9 bit magnitudes */
|
||||
mvnum_short = 8, /* magnitudes 0 through 7 */
|
||||
|
||||
/* probability offsets for coding each MV component */
|
||||
|
||||
mvpis_short = 0, /* short (<= 7) vs long (>= 8) */
|
||||
MVPsign, /* sign for non-zero */
|
||||
MVPshort, /* 8 short values = 7-position tree */
|
||||
|
||||
MVPbits = MVPshort + mvnum_short - 1, /* mvlong_width long value bits */
|
||||
MVPcount = MVPbits + mvlong_width /* (with independent probabilities) */
|
||||
};
|
||||
|
||||
typedef struct mv_context
|
||||
{
|
||||
vp8_prob prob[MVPcount]; /* often come in row, col pairs */
|
||||
} MV_CONTEXT;
|
||||
|
||||
extern const MV_CONTEXT vp8_mv_update_probs[2], vp8_default_mv_context[2];
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_ENTROPYMV_H_
|
|
@ -0,0 +1,188 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "extend.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
|
||||
static void copy_and_extend_plane
|
||||
(
|
||||
unsigned char *s, /* source */
|
||||
int sp, /* source pitch */
|
||||
unsigned char *d, /* destination */
|
||||
int dp, /* destination pitch */
|
||||
int h, /* height */
|
||||
int w, /* width */
|
||||
int et, /* extend top border */
|
||||
int el, /* extend left border */
|
||||
int eb, /* extend bottom border */
|
||||
int er /* extend right border */
|
||||
)
|
||||
{
|
||||
int i;
|
||||
unsigned char *src_ptr1, *src_ptr2;
|
||||
unsigned char *dest_ptr1, *dest_ptr2;
|
||||
int linesize;
|
||||
|
||||
/* copy the left and right most columns out */
|
||||
src_ptr1 = s;
|
||||
src_ptr2 = s + w - 1;
|
||||
dest_ptr1 = d - el;
|
||||
dest_ptr2 = d + w;
|
||||
|
||||
for (i = 0; i < h; i++)
|
||||
{
|
||||
memset(dest_ptr1, src_ptr1[0], el);
|
||||
memcpy(dest_ptr1 + el, src_ptr1, w);
|
||||
memset(dest_ptr2, src_ptr2[0], er);
|
||||
src_ptr1 += sp;
|
||||
src_ptr2 += sp;
|
||||
dest_ptr1 += dp;
|
||||
dest_ptr2 += dp;
|
||||
}
|
||||
|
||||
/* Now copy the top and bottom lines into each line of the respective
|
||||
* borders
|
||||
*/
|
||||
src_ptr1 = d - el;
|
||||
src_ptr2 = d + dp * (h - 1) - el;
|
||||
dest_ptr1 = d + dp * (-et) - el;
|
||||
dest_ptr2 = d + dp * (h) - el;
|
||||
linesize = el + er + w;
|
||||
|
||||
for (i = 0; i < et; i++)
|
||||
{
|
||||
memcpy(dest_ptr1, src_ptr1, linesize);
|
||||
dest_ptr1 += dp;
|
||||
}
|
||||
|
||||
for (i = 0; i < eb; i++)
|
||||
{
|
||||
memcpy(dest_ptr2, src_ptr2, linesize);
|
||||
dest_ptr2 += dp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
|
||||
YV12_BUFFER_CONFIG *dst)
|
||||
{
|
||||
int et = dst->border;
|
||||
int el = dst->border;
|
||||
int eb = dst->border + dst->y_height - src->y_height;
|
||||
int er = dst->border + dst->y_width - src->y_width;
|
||||
|
||||
copy_and_extend_plane(src->y_buffer, src->y_stride,
|
||||
dst->y_buffer, dst->y_stride,
|
||||
src->y_height, src->y_width,
|
||||
et, el, eb, er);
|
||||
|
||||
et = dst->border >> 1;
|
||||
el = dst->border >> 1;
|
||||
eb = (dst->border >> 1) + dst->uv_height - src->uv_height;
|
||||
er = (dst->border >> 1) + dst->uv_width - src->uv_width;
|
||||
|
||||
copy_and_extend_plane(src->u_buffer, src->uv_stride,
|
||||
dst->u_buffer, dst->uv_stride,
|
||||
src->uv_height, src->uv_width,
|
||||
et, el, eb, er);
|
||||
|
||||
copy_and_extend_plane(src->v_buffer, src->uv_stride,
|
||||
dst->v_buffer, dst->uv_stride,
|
||||
src->uv_height, src->uv_width,
|
||||
et, el, eb, er);
|
||||
}
|
||||
|
||||
|
||||
void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
|
||||
YV12_BUFFER_CONFIG *dst,
|
||||
int srcy, int srcx,
|
||||
int srch, int srcw)
|
||||
{
|
||||
int et = dst->border;
|
||||
int el = dst->border;
|
||||
int eb = dst->border + dst->y_height - src->y_height;
|
||||
int er = dst->border + dst->y_width - src->y_width;
|
||||
int src_y_offset = srcy * src->y_stride + srcx;
|
||||
int dst_y_offset = srcy * dst->y_stride + srcx;
|
||||
int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1);
|
||||
int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1);
|
||||
|
||||
/* If the side is not touching the bounder then don't extend. */
|
||||
if (srcy)
|
||||
et = 0;
|
||||
if (srcx)
|
||||
el = 0;
|
||||
if (srcy + srch != src->y_height)
|
||||
eb = 0;
|
||||
if (srcx + srcw != src->y_width)
|
||||
er = 0;
|
||||
|
||||
copy_and_extend_plane(src->y_buffer + src_y_offset,
|
||||
src->y_stride,
|
||||
dst->y_buffer + dst_y_offset,
|
||||
dst->y_stride,
|
||||
srch, srcw,
|
||||
et, el, eb, er);
|
||||
|
||||
et = (et + 1) >> 1;
|
||||
el = (el + 1) >> 1;
|
||||
eb = (eb + 1) >> 1;
|
||||
er = (er + 1) >> 1;
|
||||
srch = (srch + 1) >> 1;
|
||||
srcw = (srcw + 1) >> 1;
|
||||
|
||||
copy_and_extend_plane(src->u_buffer + src_uv_offset,
|
||||
src->uv_stride,
|
||||
dst->u_buffer + dst_uv_offset,
|
||||
dst->uv_stride,
|
||||
srch, srcw,
|
||||
et, el, eb, er);
|
||||
|
||||
copy_and_extend_plane(src->v_buffer + src_uv_offset,
|
||||
src->uv_stride,
|
||||
dst->v_buffer + dst_uv_offset,
|
||||
dst->uv_stride,
|
||||
srch, srcw,
|
||||
et, el, eb, er);
|
||||
}
|
||||
|
||||
|
||||
/* note the extension is only for the last row, for intra prediction purpose */
|
||||
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf,
|
||||
unsigned char *YPtr,
|
||||
unsigned char *UPtr,
|
||||
unsigned char *VPtr)
|
||||
{
|
||||
int i;
|
||||
|
||||
YPtr += ybf->y_stride * 14;
|
||||
UPtr += ybf->uv_stride * 6;
|
||||
VPtr += ybf->uv_stride * 6;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
YPtr[i] = YPtr[-1];
|
||||
UPtr[i] = UPtr[-1];
|
||||
VPtr[i] = VPtr[-1];
|
||||
}
|
||||
|
||||
YPtr += ybf->y_stride;
|
||||
UPtr += ybf->uv_stride;
|
||||
VPtr += ybf->uv_stride;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
YPtr[i] = YPtr[-1];
|
||||
UPtr[i] = UPtr[-1];
|
||||
VPtr[i] = VPtr[-1];
|
||||
}
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_EXTEND_H_
|
||||
#define VP8_COMMON_EXTEND_H_
|
||||
|
||||
#include "vpx_scale/yv12config.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr);
|
||||
void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
|
||||
YV12_BUFFER_CONFIG *dst);
|
||||
void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src,
|
||||
YV12_BUFFER_CONFIG *dst,
|
||||
int srcy, int srcx,
|
||||
int srch, int srcw);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_EXTEND_H_
|
|
@ -0,0 +1,493 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "filter.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
|
||||
{
|
||||
{ 128, 0 },
|
||||
{ 112, 16 },
|
||||
{ 96, 32 },
|
||||
{ 80, 48 },
|
||||
{ 64, 64 },
|
||||
{ 48, 80 },
|
||||
{ 32, 96 },
|
||||
{ 16, 112 }
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
|
||||
{
|
||||
|
||||
{ 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
|
||||
{ 0, -6, 123, 12, -1, 0 },
|
||||
{ 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -9, 93, 50, -6, 0 },
|
||||
{ 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */
|
||||
{ 0, -6, 50, 93, -9, 0 },
|
||||
{ 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -1, 12, 123, -6, 0 },
|
||||
};
|
||||
|
||||
static void filter_block2d_first_pass
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
|
||||
for (i = 0; i < output_height; i++)
|
||||
{
|
||||
for (j = 0; j < output_width; j++)
|
||||
{
|
||||
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
|
||||
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
|
||||
((int)src_ptr[0] * vp8_filter[2]) +
|
||||
((int)src_ptr[pixel_step] * vp8_filter[3]) +
|
||||
((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
|
||||
((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
|
||||
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
|
||||
|
||||
/* Normalize back to 0-255 */
|
||||
Temp = Temp >> VP8_FILTER_SHIFT;
|
||||
|
||||
if (Temp < 0)
|
||||
Temp = 0;
|
||||
else if (Temp > 255)
|
||||
Temp = 255;
|
||||
|
||||
output_ptr[j] = Temp;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_width;
|
||||
}
|
||||
}
|
||||
|
||||
static void filter_block2d_second_pass
|
||||
(
|
||||
int *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
int output_pitch,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
|
||||
for (i = 0; i < output_height; i++)
|
||||
{
|
||||
for (j = 0; j < output_width; j++)
|
||||
{
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
|
||||
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
|
||||
((int)src_ptr[0] * vp8_filter[2]) +
|
||||
((int)src_ptr[pixel_step] * vp8_filter[3]) +
|
||||
((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
|
||||
((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
|
||||
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
|
||||
|
||||
/* Normalize back to 0-255 */
|
||||
Temp = Temp >> VP8_FILTER_SHIFT;
|
||||
|
||||
if (Temp < 0)
|
||||
Temp = 0;
|
||||
else if (Temp > 255)
|
||||
Temp = 255;
|
||||
|
||||
output_ptr[j] = (unsigned char)Temp;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Start next row */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_pitch;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void filter_block2d
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
int output_pitch,
|
||||
const short *HFilter,
|
||||
const short *VFilter
|
||||
)
|
||||
{
|
||||
int FData[9*4]; /* Temp data buffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
|
||||
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
|
||||
}
|
||||
|
||||
|
||||
void vp8_sixtap_predict4x4_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
|
||||
}
|
||||
void vp8_sixtap_predict8x8_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[13*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
|
||||
|
||||
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
|
||||
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict8x4_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[13*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
|
||||
|
||||
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
|
||||
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict16x16_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[21*24]; /* Temp data buffer used in filtering */
|
||||
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
|
||||
|
||||
/* then filter verticaly... */
|
||||
filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
|
||||
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : filter_block2d_bil_first_pass
|
||||
*
|
||||
* INPUTS : UINT8 *src_ptr : Pointer to source block.
|
||||
* UINT32 src_stride : Stride of source block.
|
||||
* UINT32 height : Block height.
|
||||
* UINT32 width : Block width.
|
||||
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
|
||||
*
|
||||
* OUTPUTS : INT32 *dst_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
|
||||
* in the horizontal direction to produce the filtered output
|
||||
* block. Used to implement first-pass of 2-D separable filter.
|
||||
*
|
||||
* SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
|
||||
* Two filter taps should sum to VP8_FILTER_WEIGHT.
|
||||
*
|
||||
****************************************************************************/
|
||||
static void filter_block2d_bil_first_pass
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *dst_ptr,
|
||||
unsigned int src_stride,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = 0; i < height; i++)
|
||||
{
|
||||
for (j = 0; j < width; j++)
|
||||
{
|
||||
/* Apply bilinear filter */
|
||||
dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[1] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
src_ptr += src_stride - width;
|
||||
dst_ptr += width;
|
||||
}
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : filter_block2d_bil_second_pass
|
||||
*
|
||||
* INPUTS : INT32 *src_ptr : Pointer to source block.
|
||||
* UINT32 dst_pitch : Destination block pitch.
|
||||
* UINT32 height : Block height.
|
||||
* UINT32 width : Block width.
|
||||
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
|
||||
*
|
||||
* OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
|
||||
* in the vertical direction to produce the filtered output
|
||||
* block. Used to implement second-pass of 2-D separable filter.
|
||||
*
|
||||
* SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
|
||||
* Two filter taps should sum to VP8_FILTER_WEIGHT.
|
||||
*
|
||||
****************************************************************************/
|
||||
static void filter_block2d_bil_second_pass
|
||||
(
|
||||
unsigned short *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
|
||||
for (i = 0; i < height; i++)
|
||||
{
|
||||
for (j = 0; j < width; j++)
|
||||
{
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[width] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT / 2);
|
||||
dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
dst_ptr += dst_pitch;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : filter_block2d_bil
|
||||
*
|
||||
* INPUTS : UINT8 *src_ptr : Pointer to source block.
|
||||
* UINT32 src_pitch : Stride of source block.
|
||||
* UINT32 dst_pitch : Stride of destination block.
|
||||
* INT32 *HFilter : Array of 2 horizontal filter taps.
|
||||
* INT32 *VFilter : Array of 2 vertical filter taps.
|
||||
* INT32 Width : Block width
|
||||
* INT32 Height : Block height
|
||||
*
|
||||
* OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : 2-D filters an input block by applying a 2-tap
|
||||
* bi-linear filter horizontally followed by a 2-tap
|
||||
* bi-linear filter vertically on the result.
|
||||
*
|
||||
* SPECIAL NOTES : The largest block size can be handled here is 16x16
|
||||
*
|
||||
****************************************************************************/
|
||||
static void filter_block2d_bil
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned int dst_pitch,
|
||||
const short *HFilter,
|
||||
const short *VFilter,
|
||||
int Width,
|
||||
int Height
|
||||
)
|
||||
{
|
||||
|
||||
unsigned short FData[17*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
|
||||
}
|
||||
|
||||
|
||||
void vp8_bilinear_predict4x4_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
#if 0
|
||||
{
|
||||
int i;
|
||||
unsigned char temp1[16];
|
||||
unsigned char temp2[16];
|
||||
|
||||
bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
|
||||
filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
if (temp1[i] != temp2[i])
|
||||
{
|
||||
bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
|
||||
filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
|
||||
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x8_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
|
||||
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x4_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
|
||||
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict16x16_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_FILTER_H_
|
||||
#define VP8_COMMON_FILTER_H_
|
||||
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define BLOCK_HEIGHT_WIDTH 4
|
||||
#define VP8_FILTER_WEIGHT 128
|
||||
#define VP8_FILTER_SHIFT 7
|
||||
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]);
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_FILTER_H_
|
|
@ -0,0 +1,193 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "findnearmv.h"
|
||||
|
||||
const unsigned char vp8_mbsplit_offset[4][16] = {
|
||||
{ 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
|
||||
};
|
||||
|
||||
/* Predict motion vectors using those from already-decoded nearby blocks.
|
||||
Note that we only consider one 4x4 subblock from each candidate 16x16
|
||||
macroblock. */
|
||||
void vp8_find_near_mvs
|
||||
(
|
||||
MACROBLOCKD *xd,
|
||||
const MODE_INFO *here,
|
||||
int_mv *nearest,
|
||||
int_mv *nearby,
|
||||
int_mv *best_mv,
|
||||
int cnt[4],
|
||||
int refframe,
|
||||
int *ref_frame_sign_bias
|
||||
)
|
||||
{
|
||||
const MODE_INFO *above = here - xd->mode_info_stride;
|
||||
const MODE_INFO *left = here - 1;
|
||||
const MODE_INFO *aboveleft = above - 1;
|
||||
int_mv near_mvs[4];
|
||||
int_mv *mv = near_mvs;
|
||||
int *cntx = cnt;
|
||||
enum {CNT_INTRA, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV};
|
||||
|
||||
/* Zero accumulators */
|
||||
mv[0].as_int = mv[1].as_int = mv[2].as_int = 0;
|
||||
cnt[0] = cnt[1] = cnt[2] = cnt[3] = 0;
|
||||
|
||||
/* Process above */
|
||||
if (above->mbmi.ref_frame != INTRA_FRAME)
|
||||
{
|
||||
if (above->mbmi.mv.as_int)
|
||||
{
|
||||
(++mv)->as_int = above->mbmi.mv.as_int;
|
||||
mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, mv, ref_frame_sign_bias);
|
||||
++cntx;
|
||||
}
|
||||
|
||||
*cntx += 2;
|
||||
}
|
||||
|
||||
/* Process left */
|
||||
if (left->mbmi.ref_frame != INTRA_FRAME)
|
||||
{
|
||||
if (left->mbmi.mv.as_int)
|
||||
{
|
||||
int_mv this_mv;
|
||||
|
||||
this_mv.as_int = left->mbmi.mv.as_int;
|
||||
mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
|
||||
|
||||
if (this_mv.as_int != mv->as_int)
|
||||
{
|
||||
(++mv)->as_int = this_mv.as_int;
|
||||
++cntx;
|
||||
}
|
||||
|
||||
*cntx += 2;
|
||||
}
|
||||
else
|
||||
cnt[CNT_INTRA] += 2;
|
||||
}
|
||||
|
||||
/* Process above left */
|
||||
if (aboveleft->mbmi.ref_frame != INTRA_FRAME)
|
||||
{
|
||||
if (aboveleft->mbmi.mv.as_int)
|
||||
{
|
||||
int_mv this_mv;
|
||||
|
||||
this_mv.as_int = aboveleft->mbmi.mv.as_int;
|
||||
mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
|
||||
|
||||
if (this_mv.as_int != mv->as_int)
|
||||
{
|
||||
(++mv)->as_int = this_mv.as_int;
|
||||
++cntx;
|
||||
}
|
||||
|
||||
*cntx += 1;
|
||||
}
|
||||
else
|
||||
cnt[CNT_INTRA] += 1;
|
||||
}
|
||||
|
||||
/* If we have three distinct MV's ... */
|
||||
if (cnt[CNT_SPLITMV])
|
||||
{
|
||||
/* See if above-left MV can be merged with NEAREST */
|
||||
if (mv->as_int == near_mvs[CNT_NEAREST].as_int)
|
||||
cnt[CNT_NEAREST] += 1;
|
||||
}
|
||||
|
||||
cnt[CNT_SPLITMV] = ((above->mbmi.mode == SPLITMV)
|
||||
+ (left->mbmi.mode == SPLITMV)) * 2
|
||||
+ (aboveleft->mbmi.mode == SPLITMV);
|
||||
|
||||
/* Swap near and nearest if necessary */
|
||||
if (cnt[CNT_NEAR] > cnt[CNT_NEAREST])
|
||||
{
|
||||
int tmp;
|
||||
tmp = cnt[CNT_NEAREST];
|
||||
cnt[CNT_NEAREST] = cnt[CNT_NEAR];
|
||||
cnt[CNT_NEAR] = tmp;
|
||||
tmp = near_mvs[CNT_NEAREST].as_int;
|
||||
near_mvs[CNT_NEAREST].as_int = near_mvs[CNT_NEAR].as_int;
|
||||
near_mvs[CNT_NEAR].as_int = tmp;
|
||||
}
|
||||
|
||||
/* Use near_mvs[0] to store the "best" MV */
|
||||
if (cnt[CNT_NEAREST] >= cnt[CNT_INTRA])
|
||||
near_mvs[CNT_INTRA] = near_mvs[CNT_NEAREST];
|
||||
|
||||
/* Set up return values */
|
||||
best_mv->as_int = near_mvs[0].as_int;
|
||||
nearest->as_int = near_mvs[CNT_NEAREST].as_int;
|
||||
nearby->as_int = near_mvs[CNT_NEAR].as_int;
|
||||
}
|
||||
|
||||
|
||||
static void invert_and_clamp_mvs(int_mv *inv, int_mv *src, MACROBLOCKD *xd)
|
||||
{
|
||||
inv->as_mv.row = src->as_mv.row * -1;
|
||||
inv->as_mv.col = src->as_mv.col * -1;
|
||||
vp8_clamp_mv2(inv, xd);
|
||||
vp8_clamp_mv2(src, xd);
|
||||
}
|
||||
|
||||
|
||||
int vp8_find_near_mvs_bias
|
||||
(
|
||||
MACROBLOCKD *xd,
|
||||
const MODE_INFO *here,
|
||||
int_mv mode_mv_sb[2][MB_MODE_COUNT],
|
||||
int_mv best_mv_sb[2],
|
||||
int cnt[4],
|
||||
int refframe,
|
||||
int *ref_frame_sign_bias
|
||||
)
|
||||
{
|
||||
int sign_bias = ref_frame_sign_bias[refframe];
|
||||
|
||||
vp8_find_near_mvs(xd,
|
||||
here,
|
||||
&mode_mv_sb[sign_bias][NEARESTMV],
|
||||
&mode_mv_sb[sign_bias][NEARMV],
|
||||
&best_mv_sb[sign_bias],
|
||||
cnt,
|
||||
refframe,
|
||||
ref_frame_sign_bias);
|
||||
|
||||
invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARESTMV],
|
||||
&mode_mv_sb[sign_bias][NEARESTMV], xd);
|
||||
invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARMV],
|
||||
&mode_mv_sb[sign_bias][NEARMV], xd);
|
||||
invert_and_clamp_mvs(&best_mv_sb[!sign_bias],
|
||||
&best_mv_sb[sign_bias], xd);
|
||||
|
||||
return sign_bias;
|
||||
}
|
||||
|
||||
|
||||
vp8_prob *vp8_mv_ref_probs(
|
||||
vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4]
|
||||
)
|
||||
{
|
||||
p[0] = vp8_mode_contexts [near_mv_ref_ct[0]] [0];
|
||||
p[1] = vp8_mode_contexts [near_mv_ref_ct[1]] [1];
|
||||
p[2] = vp8_mode_contexts [near_mv_ref_ct[2]] [2];
|
||||
p[3] = vp8_mode_contexts [near_mv_ref_ct[3]] [3];
|
||||
/*p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];*/
|
||||
return p;
|
||||
}
|
||||
|
|
@ -0,0 +1,195 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_FINDNEARMV_H_
|
||||
#define VP8_COMMON_FINDNEARMV_H_
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "mv.h"
|
||||
#include "blockd.h"
|
||||
#include "modecont.h"
|
||||
#include "treecoder.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
static INLINE void mv_bias(int refmb_ref_frame_sign_bias, int refframe,
|
||||
int_mv *mvp, const int *ref_frame_sign_bias)
|
||||
{
|
||||
if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe])
|
||||
{
|
||||
mvp->as_mv.row *= -1;
|
||||
mvp->as_mv.col *= -1;
|
||||
}
|
||||
}
|
||||
|
||||
#define LEFT_TOP_MARGIN (16 << 3)
|
||||
#define RIGHT_BOTTOM_MARGIN (16 << 3)
|
||||
static INLINE void vp8_clamp_mv2(int_mv *mv, const MACROBLOCKD *xd)
|
||||
{
|
||||
if (mv->as_mv.col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
|
||||
mv->as_mv.col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->as_mv.col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->as_mv.col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
|
||||
|
||||
if (mv->as_mv.row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
|
||||
mv->as_mv.row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->as_mv.row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->as_mv.row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
|
||||
}
|
||||
|
||||
static INLINE void vp8_clamp_mv(int_mv *mv, int mb_to_left_edge,
|
||||
int mb_to_right_edge, int mb_to_top_edge,
|
||||
int mb_to_bottom_edge)
|
||||
{
|
||||
mv->as_mv.col = (mv->as_mv.col < mb_to_left_edge) ?
|
||||
mb_to_left_edge : mv->as_mv.col;
|
||||
mv->as_mv.col = (mv->as_mv.col > mb_to_right_edge) ?
|
||||
mb_to_right_edge : mv->as_mv.col;
|
||||
mv->as_mv.row = (mv->as_mv.row < mb_to_top_edge) ?
|
||||
mb_to_top_edge : mv->as_mv.row;
|
||||
mv->as_mv.row = (mv->as_mv.row > mb_to_bottom_edge) ?
|
||||
mb_to_bottom_edge : mv->as_mv.row;
|
||||
}
|
||||
static INLINE unsigned int vp8_check_mv_bounds(int_mv *mv, int mb_to_left_edge,
|
||||
int mb_to_right_edge,
|
||||
int mb_to_top_edge,
|
||||
int mb_to_bottom_edge)
|
||||
{
|
||||
unsigned int need_to_clamp;
|
||||
need_to_clamp = (mv->as_mv.col < mb_to_left_edge);
|
||||
need_to_clamp |= (mv->as_mv.col > mb_to_right_edge);
|
||||
need_to_clamp |= (mv->as_mv.row < mb_to_top_edge);
|
||||
need_to_clamp |= (mv->as_mv.row > mb_to_bottom_edge);
|
||||
return need_to_clamp;
|
||||
}
|
||||
|
||||
void vp8_find_near_mvs
|
||||
(
|
||||
MACROBLOCKD *xd,
|
||||
const MODE_INFO *here,
|
||||
int_mv *nearest, int_mv *nearby, int_mv *best,
|
||||
int near_mv_ref_cts[4],
|
||||
int refframe,
|
||||
int *ref_frame_sign_bias
|
||||
);
|
||||
|
||||
|
||||
int vp8_find_near_mvs_bias
|
||||
(
|
||||
MACROBLOCKD *xd,
|
||||
const MODE_INFO *here,
|
||||
int_mv mode_mv_sb[2][MB_MODE_COUNT],
|
||||
int_mv best_mv_sb[2],
|
||||
int cnt[4],
|
||||
int refframe,
|
||||
int *ref_frame_sign_bias
|
||||
);
|
||||
|
||||
|
||||
vp8_prob *vp8_mv_ref_probs(
|
||||
vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4]
|
||||
);
|
||||
|
||||
extern const unsigned char vp8_mbsplit_offset[4][16];
|
||||
|
||||
|
||||
static INLINE uint32_t left_block_mv(const MODE_INFO *cur_mb, int b)
|
||||
{
|
||||
if (!(b & 3))
|
||||
{
|
||||
/* On L edge, get from MB to left of us */
|
||||
--cur_mb;
|
||||
|
||||
if(cur_mb->mbmi.mode != SPLITMV)
|
||||
return cur_mb->mbmi.mv.as_int;
|
||||
b += 4;
|
||||
}
|
||||
|
||||
return (cur_mb->bmi + b - 1)->mv.as_int;
|
||||
}
|
||||
|
||||
static INLINE uint32_t above_block_mv(const MODE_INFO *cur_mb, int b,
|
||||
int mi_stride)
|
||||
{
|
||||
if (!(b >> 2))
|
||||
{
|
||||
/* On top edge, get from MB above us */
|
||||
cur_mb -= mi_stride;
|
||||
|
||||
if(cur_mb->mbmi.mode != SPLITMV)
|
||||
return cur_mb->mbmi.mv.as_int;
|
||||
b += 16;
|
||||
}
|
||||
|
||||
return (cur_mb->bmi + (b - 4))->mv.as_int;
|
||||
}
|
||||
static INLINE B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b)
|
||||
{
|
||||
if (!(b & 3))
|
||||
{
|
||||
/* On L edge, get from MB to left of us */
|
||||
--cur_mb;
|
||||
switch (cur_mb->mbmi.mode)
|
||||
{
|
||||
case B_PRED:
|
||||
return (cur_mb->bmi + b + 3)->as_mode;
|
||||
case DC_PRED:
|
||||
return B_DC_PRED;
|
||||
case V_PRED:
|
||||
return B_VE_PRED;
|
||||
case H_PRED:
|
||||
return B_HE_PRED;
|
||||
case TM_PRED:
|
||||
return B_TM_PRED;
|
||||
default:
|
||||
return B_DC_PRED;
|
||||
}
|
||||
}
|
||||
|
||||
return (cur_mb->bmi + b - 1)->as_mode;
|
||||
}
|
||||
|
||||
static INLINE B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b,
|
||||
int mi_stride)
|
||||
{
|
||||
if (!(b >> 2))
|
||||
{
|
||||
/* On top edge, get from MB above us */
|
||||
cur_mb -= mi_stride;
|
||||
|
||||
switch (cur_mb->mbmi.mode)
|
||||
{
|
||||
case B_PRED:
|
||||
return (cur_mb->bmi + b + 12)->as_mode;
|
||||
case DC_PRED:
|
||||
return B_DC_PRED;
|
||||
case V_PRED:
|
||||
return B_VE_PRED;
|
||||
case H_PRED:
|
||||
return B_HE_PRED;
|
||||
case TM_PRED:
|
||||
return B_TM_PRED;
|
||||
default:
|
||||
return B_DC_PRED;
|
||||
}
|
||||
}
|
||||
|
||||
return (cur_mb->bmi + b - 4)->as_mode;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_FINDNEARMV_H_
|
|
@ -0,0 +1,106 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#if ARCH_ARM
|
||||
#include "vpx_ports/arm.h"
|
||||
#elif ARCH_X86 || ARCH_X86_64
|
||||
#include "vpx_ports/x86.h"
|
||||
#endif
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
#include "vp8/common/systemdependent.h"
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
#if HAVE_UNISTD_H && !defined(__OS2__)
|
||||
#include <unistd.h>
|
||||
#elif defined(_WIN32)
|
||||
#include <windows.h>
|
||||
typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO);
|
||||
#elif defined(__OS2__)
|
||||
#define INCL_DOS
|
||||
#define INCL_DOSSPINLOCK
|
||||
#include <os2.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
static int get_cpu_count()
|
||||
{
|
||||
int core_count = 16;
|
||||
|
||||
#if HAVE_UNISTD_H && !defined(__OS2__)
|
||||
#if defined(_SC_NPROCESSORS_ONLN)
|
||||
core_count = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
#elif defined(_SC_NPROC_ONLN)
|
||||
core_count = sysconf(_SC_NPROC_ONLN);
|
||||
#endif
|
||||
#elif defined(_WIN32)
|
||||
{
|
||||
#if _WIN32_WINNT >= 0x0501
|
||||
SYSTEM_INFO sysinfo;
|
||||
GetNativeSystemInfo(&sysinfo);
|
||||
#else
|
||||
PGNSI pGNSI;
|
||||
SYSTEM_INFO sysinfo;
|
||||
|
||||
/* Call GetNativeSystemInfo if supported or
|
||||
* GetSystemInfo otherwise. */
|
||||
|
||||
pGNSI = (PGNSI) GetProcAddress(
|
||||
GetModuleHandle(TEXT("kernel32.dll")), "GetNativeSystemInfo");
|
||||
if (pGNSI != NULL)
|
||||
pGNSI(&sysinfo);
|
||||
else
|
||||
GetSystemInfo(&sysinfo);
|
||||
#endif
|
||||
|
||||
core_count = sysinfo.dwNumberOfProcessors;
|
||||
}
|
||||
#elif defined(__OS2__)
|
||||
{
|
||||
ULONG proc_id;
|
||||
ULONG status;
|
||||
|
||||
core_count = 0;
|
||||
for (proc_id = 1; ; proc_id++)
|
||||
{
|
||||
if (DosGetProcessorStatus(proc_id, &status))
|
||||
break;
|
||||
|
||||
if (status == PROC_ONLINE)
|
||||
core_count++;
|
||||
}
|
||||
}
|
||||
#else
|
||||
/* other platforms */
|
||||
#endif
|
||||
|
||||
return core_count > 0 ? core_count : 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
void vp8_clear_system_state_c() {};
|
||||
|
||||
void vp8_machine_specific_config(VP8_COMMON *ctx)
|
||||
{
|
||||
#if CONFIG_MULTITHREAD
|
||||
ctx->processor_core_count = get_cpu_count();
|
||||
#else
|
||||
(void)ctx;
|
||||
#endif /* CONFIG_MULTITHREAD */
|
||||
|
||||
#if ARCH_ARM
|
||||
ctx->cpu_caps = arm_cpu_caps();
|
||||
#elif ARCH_X86 || ARCH_X86_64
|
||||
ctx->cpu_caps = x86_simd_caps();
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_HEADER_H_
|
||||
#define VP8_COMMON_HEADER_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* 24 bits total */
|
||||
typedef struct
|
||||
{
|
||||
unsigned int type: 1;
|
||||
unsigned int version: 3;
|
||||
unsigned int show_frame: 1;
|
||||
|
||||
/* Allow 2^20 bytes = 8 megabits for first partition */
|
||||
|
||||
unsigned int first_partition_length_in_bytes: 19;
|
||||
|
||||
#ifdef PACKET_TESTING
|
||||
unsigned int frame_number;
|
||||
unsigned int update_gold: 1;
|
||||
unsigned int uses_gold: 1;
|
||||
unsigned int update_last: 1;
|
||||
unsigned int uses_last: 1;
|
||||
#endif
|
||||
|
||||
} VP8_HEADER;
|
||||
|
||||
#ifdef PACKET_TESTING
|
||||
#define VP8_HEADER_SIZE 8
|
||||
#else
|
||||
#define VP8_HEADER_SIZE 3
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_HEADER_H_
|
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
void vp8_dequant_idct_add_c(short *input, short *dq,
|
||||
unsigned char *dest, int stride);
|
||||
void vp8_dc_only_idct_add_c(short input_dc, unsigned char * pred,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride);
|
||||
|
||||
void vp8_dequant_idct_add_y_block_c
|
||||
(short *q, short *dq,
|
||||
unsigned char *dst, int stride, char *eobs)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_c (q, dq, dst, stride);
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_c (q[0]*dq[0], dst, stride, dst, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 16;
|
||||
dst += 4;
|
||||
}
|
||||
|
||||
dst += 4*stride - 16;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_c
|
||||
(short *q, short *dq,
|
||||
unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_c (q, dq, dstu, stride);
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_c (q[0]*dq[0], dstu, stride, dstu, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 16;
|
||||
dstu += 4;
|
||||
}
|
||||
|
||||
dstu += 4*stride - 8;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
if (*eobs++ > 1)
|
||||
vp8_dequant_idct_add_c (q, dq, dstv, stride);
|
||||
else
|
||||
{
|
||||
vp8_dc_only_idct_add_c (q[0]*dq[0], dstv, stride, dstv, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 16;
|
||||
dstv += 4;
|
||||
}
|
||||
|
||||
dstv += 4*stride - 8;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,205 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp8_rtcd.h"
|
||||
|
||||
/****************************************************************************
|
||||
* Notes:
|
||||
*
|
||||
* This implementation makes use of 16 bit fixed point verio of two multiply
|
||||
* constants:
|
||||
* 1. sqrt(2) * cos (pi/8)
|
||||
* 2. sqrt(2) * sin (pi/8)
|
||||
* Becuase the first constant is bigger than 1, to maintain the same 16 bit
|
||||
* fixed point precision as the second one, we use a trick of
|
||||
* x * a = x + x*(a-1)
|
||||
* so
|
||||
* x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
|
||||
**************************************************************************/
|
||||
static const int cospi8sqrt2minus1 = 20091;
|
||||
static const int sinpi8sqrt2 = 35468;
|
||||
|
||||
void vp8_short_idct4x4llm_c(short *input, unsigned char *pred_ptr,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride)
|
||||
{
|
||||
int i;
|
||||
int r, c;
|
||||
int a1, b1, c1, d1;
|
||||
short output[16];
|
||||
short *ip = input;
|
||||
short *op = output;
|
||||
int temp1, temp2;
|
||||
int shortpitch = 4;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ip[0] + ip[8];
|
||||
b1 = ip[0] - ip[8];
|
||||
|
||||
temp1 = (ip[4] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
|
||||
temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[12] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
|
||||
op[shortpitch*0] = a1 + d1;
|
||||
op[shortpitch*3] = a1 - d1;
|
||||
|
||||
op[shortpitch*1] = b1 + c1;
|
||||
op[shortpitch*2] = b1 - c1;
|
||||
|
||||
ip++;
|
||||
op++;
|
||||
}
|
||||
|
||||
ip = output;
|
||||
op = output;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ip[0] + ip[2];
|
||||
b1 = ip[0] - ip[2];
|
||||
|
||||
temp1 = (ip[1] * sinpi8sqrt2) >> 16;
|
||||
temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
|
||||
c1 = temp1 - temp2;
|
||||
|
||||
temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
|
||||
temp2 = (ip[3] * sinpi8sqrt2) >> 16;
|
||||
d1 = temp1 + temp2;
|
||||
|
||||
|
||||
op[0] = (a1 + d1 + 4) >> 3;
|
||||
op[3] = (a1 - d1 + 4) >> 3;
|
||||
|
||||
op[1] = (b1 + c1 + 4) >> 3;
|
||||
op[2] = (b1 - c1 + 4) >> 3;
|
||||
|
||||
ip += shortpitch;
|
||||
op += shortpitch;
|
||||
}
|
||||
|
||||
ip = output;
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
for (c = 0; c < 4; c++)
|
||||
{
|
||||
int a = ip[c] + pred_ptr[c] ;
|
||||
|
||||
if (a < 0)
|
||||
a = 0;
|
||||
|
||||
if (a > 255)
|
||||
a = 255;
|
||||
|
||||
dst_ptr[c] = (unsigned char) a ;
|
||||
}
|
||||
ip += 4;
|
||||
dst_ptr += dst_stride;
|
||||
pred_ptr += pred_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride)
|
||||
{
|
||||
int a1 = ((input_dc + 4) >> 3);
|
||||
int r, c;
|
||||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
for (c = 0; c < 4; c++)
|
||||
{
|
||||
int a = a1 + pred_ptr[c] ;
|
||||
|
||||
if (a < 0)
|
||||
a = 0;
|
||||
|
||||
if (a > 255)
|
||||
a = 255;
|
||||
|
||||
dst_ptr[c] = (unsigned char) a ;
|
||||
}
|
||||
|
||||
dst_ptr += dst_stride;
|
||||
pred_ptr += pred_stride;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void vp8_short_inv_walsh4x4_c(short *input, short *mb_dqcoeff)
|
||||
{
|
||||
short output[16];
|
||||
int i;
|
||||
int a1, b1, c1, d1;
|
||||
int a2, b2, c2, d2;
|
||||
short *ip = input;
|
||||
short *op = output;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ip[0] + ip[12];
|
||||
b1 = ip[4] + ip[8];
|
||||
c1 = ip[4] - ip[8];
|
||||
d1 = ip[0] - ip[12];
|
||||
|
||||
op[0] = a1 + b1;
|
||||
op[4] = c1 + d1;
|
||||
op[8] = a1 - b1;
|
||||
op[12] = d1 - c1;
|
||||
ip++;
|
||||
op++;
|
||||
}
|
||||
|
||||
ip = output;
|
||||
op = output;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a1 = ip[0] + ip[3];
|
||||
b1 = ip[1] + ip[2];
|
||||
c1 = ip[1] - ip[2];
|
||||
d1 = ip[0] - ip[3];
|
||||
|
||||
a2 = a1 + b1;
|
||||
b2 = c1 + d1;
|
||||
c2 = a1 - b1;
|
||||
d2 = d1 - c1;
|
||||
|
||||
op[0] = (a2 + 3) >> 3;
|
||||
op[1] = (b2 + 3) >> 3;
|
||||
op[2] = (c2 + 3) >> 3;
|
||||
op[3] = (d2 + 3) >> 3;
|
||||
|
||||
ip += 4;
|
||||
op += 4;
|
||||
}
|
||||
|
||||
for(i = 0; i < 16; i++)
|
||||
{
|
||||
mb_dqcoeff[i * 16] = output[i];
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_short_inv_walsh4x4_1_c(short *input, short *mb_dqcoeff)
|
||||
{
|
||||
int i;
|
||||
int a1;
|
||||
|
||||
a1 = ((input[0] + 3) >> 3);
|
||||
for(i = 0; i < 16; i++)
|
||||
{
|
||||
mb_dqcoeff[i * 16] = a1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_INVTRANS_H_
|
||||
#define VP8_COMMON_INVTRANS_H_
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "blockd.h"
|
||||
#include "onyxc_int.h"
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static void eob_adjust(char *eobs, short *diff)
|
||||
{
|
||||
/* eob adjust.... the idct can only skip if both the dc and eob are zero */
|
||||
int js;
|
||||
for(js = 0; js < 16; js++)
|
||||
{
|
||||
if((eobs[js] == 0) && (diff[0] != 0))
|
||||
eobs[js]++;
|
||||
diff+=16;
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void vp8_inverse_transform_mby(MACROBLOCKD *xd)
|
||||
{
|
||||
short *DQC = xd->dequant_y1;
|
||||
|
||||
if (xd->mode_info_context->mbmi.mode != SPLITMV)
|
||||
{
|
||||
/* do 2nd order transform on the dc block */
|
||||
if (xd->eobs[24] > 1)
|
||||
{
|
||||
vp8_short_inv_walsh4x4
|
||||
(&xd->block[24].dqcoeff[0], xd->qcoeff);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_short_inv_walsh4x4_1
|
||||
(&xd->block[24].dqcoeff[0], xd->qcoeff);
|
||||
}
|
||||
eob_adjust(xd->eobs, xd->qcoeff);
|
||||
|
||||
DQC = xd->dequant_y1_dc;
|
||||
}
|
||||
vp8_dequant_idct_add_y_block
|
||||
(xd->qcoeff, DQC,
|
||||
xd->dst.y_buffer,
|
||||
xd->dst.y_stride, xd->eobs);
|
||||
}
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_INVTRANS_H_
|
|
@ -0,0 +1,113 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_LOOPFILTER_H_
|
||||
#define VP8_COMMON_LOOPFILTER_H_
|
||||
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MAX_LOOP_FILTER 63
|
||||
/* fraction of total macroblock rows to be used in fast filter level picking */
|
||||
/* has to be > 2 */
|
||||
#define PARTIAL_FRAME_FRACTION 8
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NORMAL_LOOPFILTER = 0,
|
||||
SIMPLE_LOOPFILTER = 1
|
||||
} LOOPFILTERTYPE;
|
||||
|
||||
#if ARCH_ARM
|
||||
#define SIMD_WIDTH 1
|
||||
#else
|
||||
#define SIMD_WIDTH 16
|
||||
#endif
|
||||
|
||||
/* Need to align this structure so when it is declared and
|
||||
* passed it can be loaded into vector registers.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, mblim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, blim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, lim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, hev_thr[4][SIMD_WIDTH]);
|
||||
unsigned char lvl[4][4][4];
|
||||
unsigned char hev_thr_lut[2][MAX_LOOP_FILTER + 1];
|
||||
unsigned char mode_lf_lut[10];
|
||||
} loop_filter_info_n;
|
||||
|
||||
typedef struct loop_filter_info
|
||||
{
|
||||
const unsigned char * mblim;
|
||||
const unsigned char * blim;
|
||||
const unsigned char * lim;
|
||||
const unsigned char * hev_thr;
|
||||
} loop_filter_info;
|
||||
|
||||
|
||||
typedef void loop_filter_uvfunction
|
||||
(
|
||||
unsigned char *u, /* source pointer */
|
||||
int p, /* pitch */
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
unsigned char *v
|
||||
);
|
||||
|
||||
/* assorted loopfilter functions which get used elsewhere */
|
||||
struct VP8Common;
|
||||
struct macroblockd;
|
||||
struct modeinfo;
|
||||
|
||||
void vp8_loop_filter_init(struct VP8Common *cm);
|
||||
|
||||
void vp8_loop_filter_frame_init(struct VP8Common *cm,
|
||||
struct macroblockd *mbd,
|
||||
int default_filt_lvl);
|
||||
|
||||
void vp8_loop_filter_frame(struct VP8Common *cm, struct macroblockd *mbd,
|
||||
int frame_type);
|
||||
|
||||
void vp8_loop_filter_partial_frame(struct VP8Common *cm,
|
||||
struct macroblockd *mbd,
|
||||
int default_filt_lvl);
|
||||
|
||||
void vp8_loop_filter_frame_yonly(struct VP8Common *cm,
|
||||
struct macroblockd *mbd,
|
||||
int default_filt_lvl);
|
||||
|
||||
void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi,
|
||||
int sharpness_lvl);
|
||||
|
||||
void vp8_loop_filter_row_normal(struct VP8Common *cm,
|
||||
struct modeinfo *mode_info_context,
|
||||
int mb_row, int post_ystride, int post_uvstride,
|
||||
unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr);
|
||||
|
||||
void vp8_loop_filter_row_simple(struct VP8Common *cm,
|
||||
struct modeinfo *mode_info_context,
|
||||
int mb_row, int post_ystride, int post_uvstride,
|
||||
unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr);
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_LOOPFILTER_H_
|
|
@ -0,0 +1,430 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "loopfilter.h"
|
||||
#include "onyxc_int.h"
|
||||
|
||||
typedef unsigned char uc;
|
||||
|
||||
static signed char vp8_signed_char_clamp(int t)
|
||||
{
|
||||
t = (t < -128 ? -128 : t);
|
||||
t = (t > 127 ? 127 : t);
|
||||
return (signed char) t;
|
||||
}
|
||||
|
||||
|
||||
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
|
||||
static signed char vp8_filter_mask(uc limit, uc blimit,
|
||||
uc p3, uc p2, uc p1, uc p0,
|
||||
uc q0, uc q1, uc q2, uc q3)
|
||||
{
|
||||
signed char mask = 0;
|
||||
mask |= (abs(p3 - p2) > limit);
|
||||
mask |= (abs(p2 - p1) > limit);
|
||||
mask |= (abs(p1 - p0) > limit);
|
||||
mask |= (abs(q1 - q0) > limit);
|
||||
mask |= (abs(q2 - q1) > limit);
|
||||
mask |= (abs(q3 - q2) > limit);
|
||||
mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit);
|
||||
return mask - 1;
|
||||
}
|
||||
|
||||
/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
|
||||
static signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1)
|
||||
{
|
||||
signed char hev = 0;
|
||||
hev |= (abs(p1 - p0) > thresh) * -1;
|
||||
hev |= (abs(q1 - q0) > thresh) * -1;
|
||||
return hev;
|
||||
}
|
||||
|
||||
static void vp8_filter(signed char mask, uc hev, uc *op1,
|
||||
uc *op0, uc *oq0, uc *oq1)
|
||||
|
||||
{
|
||||
signed char ps0, qs0;
|
||||
signed char ps1, qs1;
|
||||
signed char filter_value, Filter1, Filter2;
|
||||
signed char u;
|
||||
|
||||
ps1 = (signed char) * op1 ^ 0x80;
|
||||
ps0 = (signed char) * op0 ^ 0x80;
|
||||
qs0 = (signed char) * oq0 ^ 0x80;
|
||||
qs1 = (signed char) * oq1 ^ 0x80;
|
||||
|
||||
/* add outer taps if we have high edge variance */
|
||||
filter_value = vp8_signed_char_clamp(ps1 - qs1);
|
||||
filter_value &= hev;
|
||||
|
||||
/* inner taps */
|
||||
filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
|
||||
filter_value &= mask;
|
||||
|
||||
/* save bottom 3 bits so that we round one side +4 and the other +3
|
||||
* if it equals 4 we'll set to adjust by -1 to account for the fact
|
||||
* we'd round 3 the other way
|
||||
*/
|
||||
Filter1 = vp8_signed_char_clamp(filter_value + 4);
|
||||
Filter2 = vp8_signed_char_clamp(filter_value + 3);
|
||||
Filter1 >>= 3;
|
||||
Filter2 >>= 3;
|
||||
u = vp8_signed_char_clamp(qs0 - Filter1);
|
||||
*oq0 = u ^ 0x80;
|
||||
u = vp8_signed_char_clamp(ps0 + Filter2);
|
||||
*op0 = u ^ 0x80;
|
||||
filter_value = Filter1;
|
||||
|
||||
/* outer tap adjustments */
|
||||
filter_value += 1;
|
||||
filter_value >>= 1;
|
||||
filter_value &= ~hev;
|
||||
|
||||
u = vp8_signed_char_clamp(qs1 - filter_value);
|
||||
*oq1 = u ^ 0x80;
|
||||
u = vp8_signed_char_clamp(ps1 + filter_value);
|
||||
*op1 = u ^ 0x80;
|
||||
|
||||
}
|
||||
void vp8_loop_filter_horizontal_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p, /* pitch */
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
int hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
/* loop filter designed to work using chars so that we can make maximum use
|
||||
* of 8 bit simd instructions.
|
||||
*/
|
||||
do
|
||||
{
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
|
||||
s[0*p], s[1*p], s[2*p], s[3*p]);
|
||||
|
||||
hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
|
||||
vp8_filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
|
||||
|
||||
++s;
|
||||
}
|
||||
while (++i < count * 8);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_vertical_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
int hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
/* loop filter designed to work using chars so that we can make maximum use
|
||||
* of 8 bit simd instructions.
|
||||
*/
|
||||
do
|
||||
{
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
|
||||
|
||||
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
|
||||
|
||||
vp8_filter(mask, hev, s - 2, s - 1, s, s + 1);
|
||||
|
||||
s += p;
|
||||
}
|
||||
while (++i < count * 8);
|
||||
}
|
||||
|
||||
static void vp8_mbfilter(signed char mask, uc hev,
|
||||
uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
|
||||
{
|
||||
signed char s, u;
|
||||
signed char filter_value, Filter1, Filter2;
|
||||
signed char ps2 = (signed char) * op2 ^ 0x80;
|
||||
signed char ps1 = (signed char) * op1 ^ 0x80;
|
||||
signed char ps0 = (signed char) * op0 ^ 0x80;
|
||||
signed char qs0 = (signed char) * oq0 ^ 0x80;
|
||||
signed char qs1 = (signed char) * oq1 ^ 0x80;
|
||||
signed char qs2 = (signed char) * oq2 ^ 0x80;
|
||||
|
||||
/* add outer taps if we have high edge variance */
|
||||
filter_value = vp8_signed_char_clamp(ps1 - qs1);
|
||||
filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0));
|
||||
filter_value &= mask;
|
||||
|
||||
Filter2 = filter_value;
|
||||
Filter2 &= hev;
|
||||
|
||||
/* save bottom 3 bits so that we round one side +4 and the other +3 */
|
||||
Filter1 = vp8_signed_char_clamp(Filter2 + 4);
|
||||
Filter2 = vp8_signed_char_clamp(Filter2 + 3);
|
||||
Filter1 >>= 3;
|
||||
Filter2 >>= 3;
|
||||
qs0 = vp8_signed_char_clamp(qs0 - Filter1);
|
||||
ps0 = vp8_signed_char_clamp(ps0 + Filter2);
|
||||
|
||||
|
||||
/* only apply wider filter if not high edge variance */
|
||||
filter_value &= ~hev;
|
||||
Filter2 = filter_value;
|
||||
|
||||
/* roughly 3/7th difference across boundary */
|
||||
u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7);
|
||||
s = vp8_signed_char_clamp(qs0 - u);
|
||||
*oq0 = s ^ 0x80;
|
||||
s = vp8_signed_char_clamp(ps0 + u);
|
||||
*op0 = s ^ 0x80;
|
||||
|
||||
/* roughly 2/7th difference across boundary */
|
||||
u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7);
|
||||
s = vp8_signed_char_clamp(qs1 - u);
|
||||
*oq1 = s ^ 0x80;
|
||||
s = vp8_signed_char_clamp(ps1 + u);
|
||||
*op1 = s ^ 0x80;
|
||||
|
||||
/* roughly 1/7th difference across boundary */
|
||||
u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7);
|
||||
s = vp8_signed_char_clamp(qs2 - u);
|
||||
*oq2 = s ^ 0x80;
|
||||
s = vp8_signed_char_clamp(ps2 + u);
|
||||
*op2 = s ^ 0x80;
|
||||
}
|
||||
|
||||
void vp8_mbloop_filter_horizontal_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
signed char hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
/* loop filter designed to work using chars so that we can make maximum use
|
||||
* of 8 bit simd instructions.
|
||||
*/
|
||||
do
|
||||
{
|
||||
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
|
||||
s[0*p], s[1*p], s[2*p], s[3*p]);
|
||||
|
||||
hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
|
||||
vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p, s + 2 * p);
|
||||
|
||||
++s;
|
||||
}
|
||||
while (++i < count * 8);
|
||||
|
||||
}
|
||||
|
||||
|
||||
void vp8_mbloop_filter_vertical_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
signed char hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
do
|
||||
{
|
||||
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
|
||||
|
||||
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
|
||||
|
||||
vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2);
|
||||
|
||||
s += p;
|
||||
}
|
||||
while (++i < count * 8);
|
||||
|
||||
}
|
||||
|
||||
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
|
||||
static signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1)
|
||||
{
|
||||
/* Why does this cause problems for win32?
|
||||
* error C2143: syntax error : missing ';' before 'type'
|
||||
* (void) limit;
|
||||
*/
|
||||
signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1;
|
||||
return mask;
|
||||
}
|
||||
|
||||
static void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
|
||||
{
|
||||
signed char filter_value, Filter1, Filter2;
|
||||
signed char p1 = (signed char) * op1 ^ 0x80;
|
||||
signed char p0 = (signed char) * op0 ^ 0x80;
|
||||
signed char q0 = (signed char) * oq0 ^ 0x80;
|
||||
signed char q1 = (signed char) * oq1 ^ 0x80;
|
||||
signed char u;
|
||||
|
||||
filter_value = vp8_signed_char_clamp(p1 - q1);
|
||||
filter_value = vp8_signed_char_clamp(filter_value + 3 * (q0 - p0));
|
||||
filter_value &= mask;
|
||||
|
||||
/* save bottom 3 bits so that we round one side +4 and the other +3 */
|
||||
Filter1 = vp8_signed_char_clamp(filter_value + 4);
|
||||
Filter1 >>= 3;
|
||||
u = vp8_signed_char_clamp(q0 - Filter1);
|
||||
*oq0 = u ^ 0x80;
|
||||
|
||||
Filter2 = vp8_signed_char_clamp(filter_value + 3);
|
||||
Filter2 >>= 3;
|
||||
u = vp8_signed_char_clamp(p0 + Filter2);
|
||||
*op0 = u ^ 0x80;
|
||||
}
|
||||
|
||||
void vp8_loop_filter_simple_horizontal_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit
|
||||
)
|
||||
{
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
do
|
||||
{
|
||||
mask = vp8_simple_filter_mask(blimit[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
|
||||
++s;
|
||||
}
|
||||
while (++i < 16);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_simple_vertical_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const unsigned char *blimit
|
||||
)
|
||||
{
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
do
|
||||
{
|
||||
mask = vp8_simple_filter_mask(blimit[0], s[-2], s[-1], s[0], s[1]);
|
||||
vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
|
||||
s += p;
|
||||
}
|
||||
while (++i < 16);
|
||||
|
||||
}
|
||||
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_c(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_c(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit);
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "blockd.h"
|
||||
|
||||
void vp8_setup_block_dptrs(MACROBLOCKD *x)
|
||||
{
|
||||
int r, c;
|
||||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
for (c = 0; c < 4; c++)
|
||||
{
|
||||
x->block[r*4+c].predictor = x->predictor + r * 4 * 16 + c * 4;
|
||||
}
|
||||
}
|
||||
|
||||
for (r = 0; r < 2; r++)
|
||||
{
|
||||
for (c = 0; c < 2; c++)
|
||||
{
|
||||
x->block[16+r*2+c].predictor = x->predictor + 256 + r * 4 * 8 + c * 4;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
for (r = 0; r < 2; r++)
|
||||
{
|
||||
for (c = 0; c < 2; c++)
|
||||
{
|
||||
x->block[20+r*2+c].predictor = x->predictor + 320 + r * 4 * 8 + c * 4;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
for (r = 0; r < 25; r++)
|
||||
{
|
||||
x->block[r].qcoeff = x->qcoeff + r * 16;
|
||||
x->block[r].dqcoeff = x->dqcoeff + r * 16;
|
||||
x->block[r].eob = x->eobs + r;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_build_block_doffsets(MACROBLOCKD *x)
|
||||
{
|
||||
int block;
|
||||
|
||||
for (block = 0; block < 16; block++) /* y blocks */
|
||||
{
|
||||
x->block[block].offset =
|
||||
(block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4;
|
||||
}
|
||||
|
||||
for (block = 16; block < 20; block++) /* U and V blocks */
|
||||
{
|
||||
x->block[block+4].offset =
|
||||
x->block[block].offset =
|
||||
((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "entropy.h"
|
||||
|
||||
const int vp8_mode_contexts[6][4] =
|
||||
{
|
||||
{
|
||||
/* 0 */
|
||||
7, 1, 1, 143,
|
||||
},
|
||||
{
|
||||
/* 1 */
|
||||
14, 18, 14, 107,
|
||||
},
|
||||
{
|
||||
/* 2 */
|
||||
135, 64, 57, 68,
|
||||
},
|
||||
{
|
||||
/* 3 */
|
||||
60, 56, 128, 65,
|
||||
},
|
||||
{
|
||||
/* 4 */
|
||||
159, 134, 128, 34,
|
||||
},
|
||||
{
|
||||
/* 5 */
|
||||
234, 188, 128, 28,
|
||||
},
|
||||
};
|
|
@ -0,0 +1,25 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_MODECONT_H_
|
||||
#define VP8_COMMON_MODECONT_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern const int vp8_mode_contexts[6][4];
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_MODECONT_H_
|
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_MV_H_
|
||||
#define VP8_COMMON_MV_H_
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
short row;
|
||||
short col;
|
||||
} MV;
|
||||
|
||||
typedef union int_mv
|
||||
{
|
||||
uint32_t as_int;
|
||||
MV as_mv;
|
||||
} int_mv; /* facilitates faster equality tests and copies */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_MV_H_
|
|
@ -0,0 +1,185 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ONYXC_INT_H_
|
||||
#define VP8_COMMON_ONYXC_INT_H_
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vpx/internal/vpx_codec_internal.h"
|
||||
#include "loopfilter.h"
|
||||
#include "entropymv.h"
|
||||
#include "entropy.h"
|
||||
#if CONFIG_POSTPROC
|
||||
#include "postproc.h"
|
||||
#endif
|
||||
|
||||
/*#ifdef PACKET_TESTING*/
|
||||
#include "header.h"
|
||||
/*#endif*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MINQ 0
|
||||
#define MAXQ 127
|
||||
#define QINDEX_RANGE (MAXQ + 1)
|
||||
|
||||
#define NUM_YV12_BUFFERS 4
|
||||
|
||||
#define MAX_PARTITIONS 9
|
||||
|
||||
typedef struct frame_contexts
|
||||
{
|
||||
vp8_prob bmode_prob [VP8_BINTRAMODES-1];
|
||||
vp8_prob ymode_prob [VP8_YMODES-1]; /* interframe intra mode probs */
|
||||
vp8_prob uv_mode_prob [VP8_UV_MODES-1];
|
||||
vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1];
|
||||
vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
|
||||
MV_CONTEXT mvc[2];
|
||||
} FRAME_CONTEXT;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
ONE_PARTITION = 0,
|
||||
TWO_PARTITION = 1,
|
||||
FOUR_PARTITION = 2,
|
||||
EIGHT_PARTITION = 3
|
||||
} TOKEN_PARTITION;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
RECON_CLAMP_REQUIRED = 0,
|
||||
RECON_CLAMP_NOTREQUIRED = 1
|
||||
} CLAMP_TYPE;
|
||||
|
||||
typedef struct VP8Common
|
||||
|
||||
{
|
||||
struct vpx_internal_error_info error;
|
||||
|
||||
DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][2]);
|
||||
DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][2]);
|
||||
DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][2]);
|
||||
|
||||
int Width;
|
||||
int Height;
|
||||
int horiz_scale;
|
||||
int vert_scale;
|
||||
|
||||
CLAMP_TYPE clamp_type;
|
||||
|
||||
YV12_BUFFER_CONFIG *frame_to_show;
|
||||
|
||||
YV12_BUFFER_CONFIG yv12_fb[NUM_YV12_BUFFERS];
|
||||
int fb_idx_ref_cnt[NUM_YV12_BUFFERS];
|
||||
int new_fb_idx, lst_fb_idx, gld_fb_idx, alt_fb_idx;
|
||||
|
||||
YV12_BUFFER_CONFIG temp_scale_frame;
|
||||
|
||||
#if CONFIG_POSTPROC
|
||||
YV12_BUFFER_CONFIG post_proc_buffer;
|
||||
YV12_BUFFER_CONFIG post_proc_buffer_int;
|
||||
int post_proc_buffer_int_used;
|
||||
unsigned char *pp_limits_buffer; /* post-processing filter coefficients */
|
||||
#endif
|
||||
|
||||
FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */
|
||||
FRAME_TYPE frame_type;
|
||||
|
||||
int show_frame;
|
||||
|
||||
int frame_flags;
|
||||
int MBs;
|
||||
int mb_rows;
|
||||
int mb_cols;
|
||||
int mode_info_stride;
|
||||
|
||||
/* profile settings */
|
||||
int mb_no_coeff_skip;
|
||||
int no_lpf;
|
||||
int use_bilinear_mc_filter;
|
||||
int full_pixel;
|
||||
|
||||
int base_qindex;
|
||||
|
||||
int y1dc_delta_q;
|
||||
int y2dc_delta_q;
|
||||
int y2ac_delta_q;
|
||||
int uvdc_delta_q;
|
||||
int uvac_delta_q;
|
||||
|
||||
/* We allocate a MODE_INFO struct for each macroblock, together with
|
||||
an extra row on top and column on the left to simplify prediction. */
|
||||
|
||||
MODE_INFO *mip; /* Base of allocated array */
|
||||
MODE_INFO *mi; /* Corresponds to upper left visible macroblock */
|
||||
#if CONFIG_ERROR_CONCEALMENT
|
||||
MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
|
||||
MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */
|
||||
#endif
|
||||
MODE_INFO *show_frame_mi; /* MODE_INFO for the last decoded frame
|
||||
to show */
|
||||
LOOPFILTERTYPE filter_type;
|
||||
|
||||
loop_filter_info_n lf_info;
|
||||
|
||||
int filter_level;
|
||||
int last_sharpness_level;
|
||||
int sharpness_level;
|
||||
|
||||
int refresh_last_frame; /* Two state 0 = NO, 1 = YES */
|
||||
int refresh_golden_frame; /* Two state 0 = NO, 1 = YES */
|
||||
int refresh_alt_ref_frame; /* Two state 0 = NO, 1 = YES */
|
||||
|
||||
int copy_buffer_to_gf; /* 0 none, 1 Last to GF, 2 ARF to GF */
|
||||
int copy_buffer_to_arf; /* 0 none, 1 Last to ARF, 2 GF to ARF */
|
||||
|
||||
int refresh_entropy_probs; /* Two state 0 = NO, 1 = YES */
|
||||
|
||||
int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */
|
||||
|
||||
/* Y,U,V,Y2 */
|
||||
ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */
|
||||
ENTROPY_CONTEXT_PLANES left_context; /* (up to) 4 contexts "" */
|
||||
|
||||
FRAME_CONTEXT lfc; /* last frame entropy */
|
||||
FRAME_CONTEXT fc; /* this frame entropy */
|
||||
|
||||
unsigned int current_video_frame;
|
||||
|
||||
int version;
|
||||
|
||||
TOKEN_PARTITION multi_token_partition;
|
||||
|
||||
#ifdef PACKET_TESTING
|
||||
VP8_HEADER oh;
|
||||
#endif
|
||||
#if CONFIG_POSTPROC_VISUALIZER
|
||||
double bitrate;
|
||||
double framerate;
|
||||
#endif
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
int processor_core_count;
|
||||
#endif
|
||||
#if CONFIG_POSTPROC
|
||||
struct postproc_state postproc_state;
|
||||
#endif
|
||||
int cpu_caps;
|
||||
} VP8_COMMON;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_ONYXC_INT_H_
|
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_ONYXD_H_
|
||||
#define VP8_COMMON_ONYXD_H_
|
||||
|
||||
|
||||
/* Create/destroy static data structures. */
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
#include "vpx_scale/yv12config.h"
|
||||
#include "ppflags.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vpx/vpx_codec.h"
|
||||
#include "vpx/vp8.h"
|
||||
|
||||
struct VP8D_COMP;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int Width;
|
||||
int Height;
|
||||
int Version;
|
||||
int postprocess;
|
||||
int max_threads;
|
||||
int error_concealment;
|
||||
} VP8D_CONFIG;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
VP8D_OK = 0
|
||||
} VP8D_SETTING;
|
||||
|
||||
void vp8dx_initialize(void);
|
||||
|
||||
void vp8dx_set_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst, int x);
|
||||
|
||||
int vp8dx_get_setting(struct VP8D_COMP* comp, VP8D_SETTING oxst);
|
||||
|
||||
int vp8dx_receive_compressed_data(struct VP8D_COMP* comp,
|
||||
size_t size, const uint8_t *dest,
|
||||
int64_t time_stamp);
|
||||
int vp8dx_get_raw_frame(struct VP8D_COMP* comp, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags);
|
||||
|
||||
vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
|
||||
vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP* comp, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif // VP8_COMMON_ONYXD_H_
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_PPFLAGS_H_
|
||||
#define VP8_COMMON_PPFLAGS_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
enum
|
||||
{
|
||||
VP8D_NOFILTERING = 0,
|
||||
VP8D_DEBLOCK = 1<<0,
|
||||
VP8D_DEMACROBLOCK = 1<<1,
|
||||
VP8D_ADDNOISE = 1<<2,
|
||||
VP8D_DEBUG_TXT_FRAME_INFO = 1<<3,
|
||||
VP8D_DEBUG_TXT_MBLK_MODES = 1<<4,
|
||||
VP8D_DEBUG_TXT_DC_DIFF = 1<<5,
|
||||
VP8D_DEBUG_TXT_RATE_INFO = 1<<6,
|
||||
VP8D_DEBUG_DRAW_MV = 1<<7,
|
||||
VP8D_DEBUG_CLR_BLK_MODES = 1<<8,
|
||||
VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9,
|
||||
VP8D_MFQE = 1<<10
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int post_proc_flag;
|
||||
int deblocking_level;
|
||||
int noise_level;
|
||||
int display_ref_frame_flag;
|
||||
int display_mb_modes_flag;
|
||||
int display_b_modes_flag;
|
||||
int display_mv_flag;
|
||||
} vp8_ppflags_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_PPFLAGS_H_
|
|
@ -0,0 +1,135 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "quant_common.h"
|
||||
|
||||
static const int dc_qlookup[QINDEX_RANGE] =
|
||||
{
|
||||
4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
|
||||
18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
|
||||
29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
|
||||
44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
|
||||
59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
|
||||
75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
|
||||
91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
|
||||
122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
|
||||
};
|
||||
|
||||
static const int ac_qlookup[QINDEX_RANGE] =
|
||||
{
|
||||
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
|
||||
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
|
||||
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
|
||||
52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
|
||||
78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
|
||||
110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
|
||||
155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
|
||||
213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
|
||||
};
|
||||
|
||||
|
||||
int vp8_dc_quant(int QIndex, int Delta)
|
||||
{
|
||||
int retval;
|
||||
|
||||
QIndex = QIndex + Delta;
|
||||
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
|
||||
retval = dc_qlookup[ QIndex ];
|
||||
return retval;
|
||||
}
|
||||
|
||||
int vp8_dc2quant(int QIndex, int Delta)
|
||||
{
|
||||
int retval;
|
||||
|
||||
QIndex = QIndex + Delta;
|
||||
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
|
||||
retval = dc_qlookup[ QIndex ] * 2;
|
||||
return retval;
|
||||
|
||||
}
|
||||
int vp8_dc_uv_quant(int QIndex, int Delta)
|
||||
{
|
||||
int retval;
|
||||
|
||||
QIndex = QIndex + Delta;
|
||||
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
|
||||
retval = dc_qlookup[ QIndex ];
|
||||
|
||||
if (retval > 132)
|
||||
retval = 132;
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
int vp8_ac_yquant(int QIndex)
|
||||
{
|
||||
int retval;
|
||||
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
|
||||
retval = ac_qlookup[ QIndex ];
|
||||
return retval;
|
||||
}
|
||||
|
||||
int vp8_ac2quant(int QIndex, int Delta)
|
||||
{
|
||||
int retval;
|
||||
|
||||
QIndex = QIndex + Delta;
|
||||
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
|
||||
/* For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16.
|
||||
* The smallest precision for that is '(x*6349) >> 12' but 16 is a good
|
||||
* word size. */
|
||||
retval = (ac_qlookup[ QIndex ] * 101581) >> 16;
|
||||
|
||||
if (retval < 8)
|
||||
retval = 8;
|
||||
|
||||
return retval;
|
||||
}
|
||||
int vp8_ac_uv_quant(int QIndex, int Delta)
|
||||
{
|
||||
int retval;
|
||||
|
||||
QIndex = QIndex + Delta;
|
||||
|
||||
if (QIndex > 127)
|
||||
QIndex = 127;
|
||||
else if (QIndex < 0)
|
||||
QIndex = 0;
|
||||
|
||||
retval = ac_qlookup[ QIndex ];
|
||||
return retval;
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VP8_COMMON_QUANT_COMMON_H_
|
||||
#define VP8_COMMON_QUANT_COMMON_H_
|
||||
|
||||
|
||||
#include "string.h"
|
||||
#include "blockd.h"
|
||||
#include "onyxc_int.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern int vp8_ac_yquant(int QIndex);
|
||||
extern int vp8_dc_quant(int QIndex, int Delta);
|
||||
extern int vp8_dc2quant(int QIndex, int Delta);
|
||||
extern int vp8_ac2quant(int QIndex, int Delta);
|
||||
extern int vp8_dc_uv_quant(int QIndex, int Delta);
|
||||
extern int vp8_ac_uv_quant(int QIndex, int Delta);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_QUANT_COMMON_H_
|
|
@ -0,0 +1,544 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "blockd.h"
|
||||
#include "reconinter.h"
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
#include "onyxc_int.h"
|
||||
#endif
|
||||
|
||||
void vp8_copy_mem16x16_c(
|
||||
unsigned char *src,
|
||||
int src_stride,
|
||||
unsigned char *dst,
|
||||
int dst_stride)
|
||||
{
|
||||
|
||||
int r;
|
||||
|
||||
for (r = 0; r < 16; r++)
|
||||
{
|
||||
memcpy(dst, src, 16);
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void vp8_copy_mem8x8_c(
|
||||
unsigned char *src,
|
||||
int src_stride,
|
||||
unsigned char *dst,
|
||||
int dst_stride)
|
||||
{
|
||||
int r;
|
||||
|
||||
for (r = 0; r < 8; r++)
|
||||
{
|
||||
memcpy(dst, src, 8);
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void vp8_copy_mem8x4_c(
|
||||
unsigned char *src,
|
||||
int src_stride,
|
||||
unsigned char *dst,
|
||||
int dst_stride)
|
||||
{
|
||||
int r;
|
||||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
memcpy(dst, src, 8);
|
||||
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, unsigned char *base_pre, int pre_stride, vp8_subpix_fn_t sppf)
|
||||
{
|
||||
int r;
|
||||
unsigned char *pred_ptr = d->predictor;
|
||||
unsigned char *ptr;
|
||||
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
|
||||
|
||||
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
|
||||
{
|
||||
sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
pred_ptr[0] = ptr[0];
|
||||
pred_ptr[1] = ptr[1];
|
||||
pred_ptr[2] = ptr[2];
|
||||
pred_ptr[3] = ptr[3];
|
||||
pred_ptr += pitch;
|
||||
ptr += pre_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride)
|
||||
{
|
||||
unsigned char *ptr;
|
||||
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
|
||||
|
||||
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
|
||||
{
|
||||
x->subpixel_predict8x8(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem8x8(ptr, pre_stride, dst, dst_stride);
|
||||
}
|
||||
}
|
||||
|
||||
static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride)
|
||||
{
|
||||
unsigned char *ptr;
|
||||
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
|
||||
|
||||
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
|
||||
{
|
||||
x->subpixel_predict8x4(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem8x4(ptr, pre_stride, dst, dst_stride);
|
||||
}
|
||||
}
|
||||
|
||||
static void build_inter_predictors_b(BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride, vp8_subpix_fn_t sppf)
|
||||
{
|
||||
int r;
|
||||
unsigned char *ptr;
|
||||
ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3);
|
||||
|
||||
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
|
||||
{
|
||||
sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
dst[0] = ptr[0];
|
||||
dst[1] = ptr[1];
|
||||
dst[2] = ptr[2];
|
||||
dst[3] = ptr[3];
|
||||
dst += dst_stride;
|
||||
ptr += pre_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*encoder only*/
|
||||
void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x)
|
||||
{
|
||||
unsigned char *uptr, *vptr;
|
||||
unsigned char *upred_ptr = &x->predictor[256];
|
||||
unsigned char *vpred_ptr = &x->predictor[320];
|
||||
|
||||
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
|
||||
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
|
||||
int offset;
|
||||
int pre_stride = x->pre.uv_stride;
|
||||
|
||||
/* calc uv motion vectors */
|
||||
mv_row += 1 | (mv_row >> (sizeof(int) * CHAR_BIT - 1));
|
||||
mv_col += 1 | (mv_col >> (sizeof(int) * CHAR_BIT - 1));
|
||||
mv_row /= 2;
|
||||
mv_col /= 2;
|
||||
mv_row &= x->fullpixel_mask;
|
||||
mv_col &= x->fullpixel_mask;
|
||||
|
||||
offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
|
||||
uptr = x->pre.u_buffer + offset;
|
||||
vptr = x->pre.v_buffer + offset;
|
||||
|
||||
if ((mv_row | mv_col) & 7)
|
||||
{
|
||||
x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 8);
|
||||
x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 8);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem8x8(uptr, pre_stride, upred_ptr, 8);
|
||||
vp8_copy_mem8x8(vptr, pre_stride, vpred_ptr, 8);
|
||||
}
|
||||
}
|
||||
|
||||
/*encoder only*/
|
||||
void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x)
|
||||
{
|
||||
int i, j;
|
||||
int pre_stride = x->pre.uv_stride;
|
||||
unsigned char *base_pre;
|
||||
|
||||
/* build uv mvs */
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
int yoffset = i * 8 + j * 2;
|
||||
int uoffset = 16 + i * 2 + j;
|
||||
int voffset = 20 + i * 2 + j;
|
||||
|
||||
int temp;
|
||||
|
||||
temp = x->block[yoffset ].bmi.mv.as_mv.row
|
||||
+ x->block[yoffset+1].bmi.mv.as_mv.row
|
||||
+ x->block[yoffset+4].bmi.mv.as_mv.row
|
||||
+ x->block[yoffset+5].bmi.mv.as_mv.row;
|
||||
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
|
||||
x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
|
||||
|
||||
temp = x->block[yoffset ].bmi.mv.as_mv.col
|
||||
+ x->block[yoffset+1].bmi.mv.as_mv.col
|
||||
+ x->block[yoffset+4].bmi.mv.as_mv.col
|
||||
+ x->block[yoffset+5].bmi.mv.as_mv.col;
|
||||
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
|
||||
x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
|
||||
|
||||
x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int;
|
||||
}
|
||||
}
|
||||
|
||||
base_pre = x->pre.u_buffer;
|
||||
for (i = 16; i < 20; i += 2)
|
||||
{
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride);
|
||||
else
|
||||
{
|
||||
vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict);
|
||||
vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, x->subpixel_predict);
|
||||
}
|
||||
}
|
||||
|
||||
base_pre = x->pre.v_buffer;
|
||||
for (i = 20; i < 24; i += 2)
|
||||
{
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride);
|
||||
else
|
||||
{
|
||||
vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict);
|
||||
vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, x->subpixel_predict);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*encoder only*/
|
||||
void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x,
|
||||
unsigned char *dst_y,
|
||||
int dst_ystride)
|
||||
{
|
||||
unsigned char *ptr_base;
|
||||
unsigned char *ptr;
|
||||
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
|
||||
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
|
||||
int pre_stride = x->pre.y_stride;
|
||||
|
||||
ptr_base = x->pre.y_buffer;
|
||||
ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
|
||||
|
||||
if ((mv_row | mv_col) & 7)
|
||||
{
|
||||
x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7,
|
||||
dst_y, dst_ystride);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem16x16(ptr, pre_stride, dst_y,
|
||||
dst_ystride);
|
||||
}
|
||||
}
|
||||
|
||||
static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
|
||||
{
|
||||
/* If the MV points so far into the UMV border that no visible pixels
|
||||
* are used for reconstruction, the subpel part of the MV can be
|
||||
* discarded and the MV limited to 16 pixels with equivalent results.
|
||||
*
|
||||
* This limit kicks in at 19 pixels for the top and left edges, for
|
||||
* the 16 pixels plus 3 taps right of the central pixel when subpel
|
||||
* filtering. The bottom and right edges use 16 pixels plus 2 pixels
|
||||
* left of the central pixel when filtering.
|
||||
*/
|
||||
if (mv->col < (xd->mb_to_left_edge - (19 << 3)))
|
||||
mv->col = xd->mb_to_left_edge - (16 << 3);
|
||||
else if (mv->col > xd->mb_to_right_edge + (18 << 3))
|
||||
mv->col = xd->mb_to_right_edge + (16 << 3);
|
||||
|
||||
if (mv->row < (xd->mb_to_top_edge - (19 << 3)))
|
||||
mv->row = xd->mb_to_top_edge - (16 << 3);
|
||||
else if (mv->row > xd->mb_to_bottom_edge + (18 << 3))
|
||||
mv->row = xd->mb_to_bottom_edge + (16 << 3);
|
||||
}
|
||||
|
||||
/* A version of the above function for chroma block MVs.*/
|
||||
static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
|
||||
{
|
||||
mv->col = (2*mv->col < (xd->mb_to_left_edge - (19 << 3))) ?
|
||||
(xd->mb_to_left_edge - (16 << 3)) >> 1 : mv->col;
|
||||
mv->col = (2*mv->col > xd->mb_to_right_edge + (18 << 3)) ?
|
||||
(xd->mb_to_right_edge + (16 << 3)) >> 1 : mv->col;
|
||||
|
||||
mv->row = (2*mv->row < (xd->mb_to_top_edge - (19 << 3))) ?
|
||||
(xd->mb_to_top_edge - (16 << 3)) >> 1 : mv->row;
|
||||
mv->row = (2*mv->row > xd->mb_to_bottom_edge + (18 << 3)) ?
|
||||
(xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row;
|
||||
}
|
||||
|
||||
void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
|
||||
unsigned char *dst_y,
|
||||
unsigned char *dst_u,
|
||||
unsigned char *dst_v,
|
||||
int dst_ystride,
|
||||
int dst_uvstride)
|
||||
{
|
||||
int offset;
|
||||
unsigned char *ptr;
|
||||
unsigned char *uptr, *vptr;
|
||||
|
||||
int_mv _16x16mv;
|
||||
|
||||
unsigned char *ptr_base = x->pre.y_buffer;
|
||||
int pre_stride = x->pre.y_stride;
|
||||
|
||||
_16x16mv.as_int = x->mode_info_context->mbmi.mv.as_int;
|
||||
|
||||
if (x->mode_info_context->mbmi.need_to_clamp_mvs)
|
||||
{
|
||||
clamp_mv_to_umv_border(&_16x16mv.as_mv, x);
|
||||
}
|
||||
|
||||
ptr = ptr_base + ( _16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3);
|
||||
|
||||
if ( _16x16mv.as_int & 0x00070007)
|
||||
{
|
||||
x->subpixel_predict16x16(ptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_y, dst_ystride);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem16x16(ptr, pre_stride, dst_y, dst_ystride);
|
||||
}
|
||||
|
||||
/* calc uv motion vectors */
|
||||
_16x16mv.as_mv.row += 1 | (_16x16mv.as_mv.row >> (sizeof(int) * CHAR_BIT - 1));
|
||||
_16x16mv.as_mv.col += 1 | (_16x16mv.as_mv.col >> (sizeof(int) * CHAR_BIT - 1));
|
||||
_16x16mv.as_mv.row /= 2;
|
||||
_16x16mv.as_mv.col /= 2;
|
||||
_16x16mv.as_mv.row &= x->fullpixel_mask;
|
||||
_16x16mv.as_mv.col &= x->fullpixel_mask;
|
||||
|
||||
pre_stride >>= 1;
|
||||
offset = ( _16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3);
|
||||
uptr = x->pre.u_buffer + offset;
|
||||
vptr = x->pre.v_buffer + offset;
|
||||
|
||||
if ( _16x16mv.as_int & 0x00070007)
|
||||
{
|
||||
x->subpixel_predict8x8(uptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_u, dst_uvstride);
|
||||
x->subpixel_predict8x8(vptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_v, dst_uvstride);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_copy_mem8x8(uptr, pre_stride, dst_u, dst_uvstride);
|
||||
vp8_copy_mem8x8(vptr, pre_stride, dst_v, dst_uvstride);
|
||||
}
|
||||
}
|
||||
|
||||
static void build_inter4x4_predictors_mb(MACROBLOCKD *x)
|
||||
{
|
||||
int i;
|
||||
unsigned char *base_dst = x->dst.y_buffer;
|
||||
unsigned char *base_pre = x->pre.y_buffer;
|
||||
|
||||
if (x->mode_info_context->mbmi.partitioning < 3)
|
||||
{
|
||||
BLOCKD *b;
|
||||
int dst_stride = x->dst.y_stride;
|
||||
|
||||
x->block[ 0].bmi = x->mode_info_context->bmi[ 0];
|
||||
x->block[ 2].bmi = x->mode_info_context->bmi[ 2];
|
||||
x->block[ 8].bmi = x->mode_info_context->bmi[ 8];
|
||||
x->block[10].bmi = x->mode_info_context->bmi[10];
|
||||
if (x->mode_info_context->mbmi.need_to_clamp_mvs)
|
||||
{
|
||||
clamp_mv_to_umv_border(&x->block[ 0].bmi.mv.as_mv, x);
|
||||
clamp_mv_to_umv_border(&x->block[ 2].bmi.mv.as_mv, x);
|
||||
clamp_mv_to_umv_border(&x->block[ 8].bmi.mv.as_mv, x);
|
||||
clamp_mv_to_umv_border(&x->block[10].bmi.mv.as_mv, x);
|
||||
}
|
||||
|
||||
b = &x->block[ 0];
|
||||
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
|
||||
b = &x->block[ 2];
|
||||
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
|
||||
b = &x->block[ 8];
|
||||
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
|
||||
b = &x->block[10];
|
||||
build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < 16; i += 2)
|
||||
{
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
int dst_stride = x->dst.y_stride;
|
||||
|
||||
x->block[i+0].bmi = x->mode_info_context->bmi[i+0];
|
||||
x->block[i+1].bmi = x->mode_info_context->bmi[i+1];
|
||||
if (x->mode_info_context->mbmi.need_to_clamp_mvs)
|
||||
{
|
||||
clamp_mv_to_umv_border(&x->block[i+0].bmi.mv.as_mv, x);
|
||||
clamp_mv_to_umv_border(&x->block[i+1].bmi.mv.as_mv, x);
|
||||
}
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride);
|
||||
else
|
||||
{
|
||||
build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
|
||||
build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
base_dst = x->dst.u_buffer;
|
||||
base_pre = x->pre.u_buffer;
|
||||
for (i = 16; i < 20; i += 2)
|
||||
{
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
int dst_stride = x->dst.uv_stride;
|
||||
|
||||
/* Note: uv mvs already clamped in build_4x4uvmvs() */
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride);
|
||||
else
|
||||
{
|
||||
build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
|
||||
build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
|
||||
}
|
||||
}
|
||||
|
||||
base_dst = x->dst.v_buffer;
|
||||
base_pre = x->pre.v_buffer;
|
||||
for (i = 20; i < 24; i += 2)
|
||||
{
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
int dst_stride = x->dst.uv_stride;
|
||||
|
||||
/* Note: uv mvs already clamped in build_4x4uvmvs() */
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride);
|
||||
else
|
||||
{
|
||||
build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
|
||||
build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
void build_4x4uvmvs(MACROBLOCKD *x)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
int yoffset = i * 8 + j * 2;
|
||||
int uoffset = 16 + i * 2 + j;
|
||||
int voffset = 20 + i * 2 + j;
|
||||
|
||||
int temp;
|
||||
|
||||
temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.row
|
||||
+ x->mode_info_context->bmi[yoffset + 1].mv.as_mv.row
|
||||
+ x->mode_info_context->bmi[yoffset + 4].mv.as_mv.row
|
||||
+ x->mode_info_context->bmi[yoffset + 5].mv.as_mv.row;
|
||||
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
|
||||
x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;
|
||||
|
||||
temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.col
|
||||
+ x->mode_info_context->bmi[yoffset + 1].mv.as_mv.col
|
||||
+ x->mode_info_context->bmi[yoffset + 4].mv.as_mv.col
|
||||
+ x->mode_info_context->bmi[yoffset + 5].mv.as_mv.col;
|
||||
|
||||
temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
|
||||
|
||||
x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;
|
||||
|
||||
if (x->mode_info_context->mbmi.need_to_clamp_mvs)
|
||||
clamp_uvmv_to_umv_border(&x->block[uoffset].bmi.mv.as_mv, x);
|
||||
|
||||
x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_build_inter_predictors_mb(MACROBLOCKD *xd)
|
||||
{
|
||||
if (xd->mode_info_context->mbmi.mode != SPLITMV)
|
||||
{
|
||||
vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer,
|
||||
xd->dst.u_buffer, xd->dst.v_buffer,
|
||||
xd->dst.y_stride, xd->dst.uv_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
build_4x4uvmvs(xd);
|
||||
build_inter4x4_predictors_mb(xd);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_RECONINTER_H_
|
||||
#define VP8_COMMON_RECONINTER_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern void vp8_build_inter_predictors_mb(MACROBLOCKD *x);
|
||||
extern void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
|
||||
unsigned char *dst_y,
|
||||
unsigned char *dst_u,
|
||||
unsigned char *dst_v,
|
||||
int dst_ystride,
|
||||
int dst_uvstride);
|
||||
|
||||
|
||||
extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x,
|
||||
unsigned char *dst_y,
|
||||
int dst_ystride);
|
||||
extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch,
|
||||
unsigned char *base_pre,
|
||||
int pre_stride,
|
||||
vp8_subpix_fn_t sppf);
|
||||
|
||||
extern void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x);
|
||||
extern void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_RECONINTER_H_
|
|
@ -0,0 +1,117 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "vpx_ports/vpx_once.h"
|
||||
#include "blockd.h"
|
||||
#include "vp8/common/reconintra.h"
|
||||
#include "vp8/common/reconintra4x4.h"
|
||||
|
||||
enum {
|
||||
SIZE_16,
|
||||
SIZE_8,
|
||||
NUM_SIZES,
|
||||
};
|
||||
|
||||
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
|
||||
const uint8_t *above, const uint8_t *left);
|
||||
|
||||
static intra_pred_fn pred[4][NUM_SIZES];
|
||||
static intra_pred_fn dc_pred[2][2][NUM_SIZES];
|
||||
|
||||
static void vp8_init_intra_predictors_internal(void)
|
||||
{
|
||||
#define INIT_SIZE(sz) \
|
||||
pred[V_PRED][SIZE_##sz] = vpx_v_predictor_##sz##x##sz; \
|
||||
pred[H_PRED][SIZE_##sz] = vpx_h_predictor_##sz##x##sz; \
|
||||
pred[TM_PRED][SIZE_##sz] = vpx_tm_predictor_##sz##x##sz; \
|
||||
\
|
||||
dc_pred[0][0][SIZE_##sz] = vpx_dc_128_predictor_##sz##x##sz; \
|
||||
dc_pred[0][1][SIZE_##sz] = vpx_dc_top_predictor_##sz##x##sz; \
|
||||
dc_pred[1][0][SIZE_##sz] = vpx_dc_left_predictor_##sz##x##sz; \
|
||||
dc_pred[1][1][SIZE_##sz] = vpx_dc_predictor_##sz##x##sz
|
||||
|
||||
INIT_SIZE(16);
|
||||
INIT_SIZE(8);
|
||||
vp8_init_intra4x4_predictors_internal();
|
||||
}
|
||||
|
||||
void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
|
||||
unsigned char * yabove_row,
|
||||
unsigned char * yleft,
|
||||
int left_stride,
|
||||
unsigned char * ypred_ptr,
|
||||
int y_stride)
|
||||
{
|
||||
MB_PREDICTION_MODE mode = x->mode_info_context->mbmi.mode;
|
||||
DECLARE_ALIGNED(16, uint8_t, yleft_col[16]);
|
||||
int i;
|
||||
intra_pred_fn fn;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
yleft_col[i] = yleft[i* left_stride];
|
||||
}
|
||||
|
||||
if (mode == DC_PRED)
|
||||
{
|
||||
fn = dc_pred[x->left_available][x->up_available][SIZE_16];
|
||||
}
|
||||
else
|
||||
{
|
||||
fn = pred[mode][SIZE_16];
|
||||
}
|
||||
|
||||
fn(ypred_ptr, y_stride, yabove_row, yleft_col);
|
||||
}
|
||||
|
||||
void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x,
|
||||
unsigned char * uabove_row,
|
||||
unsigned char * vabove_row,
|
||||
unsigned char * uleft,
|
||||
unsigned char * vleft,
|
||||
int left_stride,
|
||||
unsigned char * upred_ptr,
|
||||
unsigned char * vpred_ptr,
|
||||
int pred_stride)
|
||||
{
|
||||
MB_PREDICTION_MODE uvmode = x->mode_info_context->mbmi.uv_mode;
|
||||
unsigned char uleft_col[8];
|
||||
unsigned char vleft_col[8];
|
||||
int i;
|
||||
intra_pred_fn fn;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
uleft_col[i] = uleft[i * left_stride];
|
||||
vleft_col[i] = vleft[i * left_stride];
|
||||
}
|
||||
|
||||
if (uvmode == DC_PRED)
|
||||
{
|
||||
fn = dc_pred[x->left_available][x->up_available][SIZE_8];
|
||||
}
|
||||
else
|
||||
{
|
||||
fn = pred[uvmode][SIZE_8];
|
||||
}
|
||||
|
||||
fn(upred_ptr, pred_stride, uabove_row, uleft_col);
|
||||
fn(vpred_ptr, pred_stride, vabove_row, vleft_col);
|
||||
}
|
||||
|
||||
void vp8_init_intra_predictors(void)
|
||||
{
|
||||
once(vp8_init_intra_predictors_internal);
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_RECONINTRA_H_
|
||||
#define VP8_COMMON_RECONINTRA_H_
|
||||
|
||||
#include "vp8/common/blockd.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x,
|
||||
unsigned char *yabove_row,
|
||||
unsigned char *yleft,
|
||||
int left_stride,
|
||||
unsigned char *ypred_ptr,
|
||||
int y_stride);
|
||||
|
||||
void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x,
|
||||
unsigned char * uabove_row,
|
||||
unsigned char * vabove_row,
|
||||
unsigned char * uleft,
|
||||
unsigned char * vleft,
|
||||
int left_stride,
|
||||
unsigned char * upred_ptr,
|
||||
unsigned char * vpred_ptr,
|
||||
int pred_stride);
|
||||
|
||||
void vp8_init_intra_predictors(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_RECONINTRA_H_
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "blockd.h"
|
||||
|
||||
typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride,
|
||||
const uint8_t *above, const uint8_t *left);
|
||||
|
||||
static intra_pred_fn pred[10];
|
||||
|
||||
void vp8_init_intra4x4_predictors_internal(void)
|
||||
{
|
||||
pred[B_DC_PRED] = vpx_dc_predictor_4x4;
|
||||
pred[B_TM_PRED] = vpx_tm_predictor_4x4;
|
||||
pred[B_VE_PRED] = vpx_ve_predictor_4x4;
|
||||
pred[B_HE_PRED] = vpx_he_predictor_4x4;
|
||||
pred[B_LD_PRED] = vpx_d45e_predictor_4x4;
|
||||
pred[B_RD_PRED] = vpx_d135_predictor_4x4;
|
||||
pred[B_VR_PRED] = vpx_d117_predictor_4x4;
|
||||
pred[B_VL_PRED] = vpx_d63f_predictor_4x4;
|
||||
pred[B_HD_PRED] = vpx_d153_predictor_4x4;
|
||||
pred[B_HU_PRED] = vpx_d207_predictor_4x4;
|
||||
}
|
||||
|
||||
void vp8_intra4x4_predict(unsigned char *above,
|
||||
unsigned char *yleft, int left_stride,
|
||||
B_PREDICTION_MODE b_mode,
|
||||
unsigned char *dst, int dst_stride,
|
||||
unsigned char top_left)
|
||||
{
|
||||
unsigned char Left[4];
|
||||
unsigned char Aboveb[12], *Above = Aboveb + 4;
|
||||
|
||||
Left[0] = yleft[0];
|
||||
Left[1] = yleft[left_stride];
|
||||
Left[2] = yleft[2 * left_stride];
|
||||
Left[3] = yleft[3 * left_stride];
|
||||
memcpy(Above, above, 8);
|
||||
Above[-1] = top_left;
|
||||
|
||||
pred[b_mode](dst, dst_stride, Above, Left);
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_RECONINTRA4X4_H_
|
||||
#define VP8_COMMON_RECONINTRA4X4_H_
|
||||
#include "vp8/common/blockd.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static INLINE void intra_prediction_down_copy(MACROBLOCKD *xd,
|
||||
unsigned char *above_right_src)
|
||||
{
|
||||
int dst_stride = xd->dst.y_stride;
|
||||
unsigned char *above_right_dst = xd->dst.y_buffer - dst_stride + 16;
|
||||
|
||||
unsigned int *src_ptr = (unsigned int *)above_right_src;
|
||||
unsigned int *dst_ptr0 = (unsigned int *)(above_right_dst + 4 * dst_stride);
|
||||
unsigned int *dst_ptr1 = (unsigned int *)(above_right_dst + 8 * dst_stride);
|
||||
unsigned int *dst_ptr2 = (unsigned int *)(above_right_dst + 12 * dst_stride);
|
||||
|
||||
*dst_ptr0 = *src_ptr;
|
||||
*dst_ptr1 = *src_ptr;
|
||||
*dst_ptr2 = *src_ptr;
|
||||
}
|
||||
|
||||
void vp8_intra4x4_predict(unsigned char *Above,
|
||||
unsigned char *yleft, int left_stride,
|
||||
B_PREDICTION_MODE b_mode,
|
||||
unsigned char *dst, int dst_stride,
|
||||
unsigned char top_left);
|
||||
|
||||
void vp8_init_intra4x4_predictors_internal(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_RECONINTRA4X4_H_
|
|
@ -0,0 +1,19 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "./vpx_config.h"
|
||||
#define RTCD_C
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vpx_ports/vpx_once.h"
|
||||
|
||||
|
||||
void vp8_rtcd()
|
||||
{
|
||||
once(setup_rtcd_internal);
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "setupintrarecon.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* set up frame new frame for intra coded blocks */
|
||||
memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
|
||||
for (i = 0; i < ybf->y_height; i++)
|
||||
ybf->y_buffer[ybf->y_stride *i - 1] = (unsigned char) 129;
|
||||
|
||||
memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
for (i = 0; i < ybf->uv_height; i++)
|
||||
ybf->u_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129;
|
||||
|
||||
memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
for (i = 0; i < ybf->uv_height; i++)
|
||||
ybf->v_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129;
|
||||
|
||||
}
|
||||
|
||||
void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf)
|
||||
{
|
||||
memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
|
||||
memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VP8_COMMON_SETUPINTRARECON_H_
|
||||
#define VP8_COMMON_SETUPINTRARECON_H_
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx_scale/yv12config.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
extern void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf);
|
||||
extern void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf);
|
||||
|
||||
static INLINE void setup_intra_recon_left(unsigned char *y_buffer,
|
||||
unsigned char *u_buffer,
|
||||
unsigned char *v_buffer,
|
||||
int y_stride,
|
||||
int uv_stride)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
y_buffer[y_stride *i] = (unsigned char) 129;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
u_buffer[uv_stride *i] = (unsigned char) 129;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
v_buffer[uv_stride *i] = (unsigned char) 129;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_SETUPINTRARECON_H_
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "swapyv12buffer.h"
|
||||
|
||||
void vp8_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame, YV12_BUFFER_CONFIG *last_frame)
|
||||
{
|
||||
unsigned char *temp;
|
||||
|
||||
temp = last_frame->buffer_alloc;
|
||||
last_frame->buffer_alloc = new_frame->buffer_alloc;
|
||||
new_frame->buffer_alloc = temp;
|
||||
|
||||
temp = last_frame->y_buffer;
|
||||
last_frame->y_buffer = new_frame->y_buffer;
|
||||
new_frame->y_buffer = temp;
|
||||
|
||||
temp = last_frame->u_buffer;
|
||||
last_frame->u_buffer = new_frame->u_buffer;
|
||||
new_frame->u_buffer = temp;
|
||||
|
||||
temp = last_frame->v_buffer;
|
||||
last_frame->v_buffer = new_frame->v_buffer;
|
||||
new_frame->v_buffer = temp;
|
||||
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_SWAPYV12BUFFER_H_
|
||||
#define VP8_COMMON_SWAPYV12BUFFER_H_
|
||||
|
||||
#include "vpx_scale/yv12config.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void vp8_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame, YV12_BUFFER_CONFIG *last_frame);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_SWAPYV12BUFFER_H_
|
|
@ -0,0 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VP8_COMMON_SYSTEMDEPENDENT_H_
|
||||
#define VP8_COMMON_SYSTEMDEPENDENT_H_
|
||||
|
||||
#include "vpx_config.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct VP8Common;
|
||||
void vp8_machine_specific_config(struct VP8Common *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_SYSTEMDEPENDENT_H_
|
|
@ -0,0 +1,232 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_THREADING_H_
|
||||
#define VP8_COMMON_THREADING_H_
|
||||
|
||||
#include "./vpx_config.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD
|
||||
|
||||
/* Thread management macros */
|
||||
#if defined(_WIN32) && !HAVE_PTHREAD_H
|
||||
/* Win32 */
|
||||
#include <process.h>
|
||||
#include <windows.h>
|
||||
#define THREAD_FUNCTION unsigned int __stdcall
|
||||
#define THREAD_FUNCTION_RETURN DWORD
|
||||
#define THREAD_SPECIFIC_INDEX DWORD
|
||||
#define pthread_t HANDLE
|
||||
#define pthread_attr_t DWORD
|
||||
#define pthread_detach(thread) if(thread!=NULL)CloseHandle(thread)
|
||||
#define thread_sleep(nms) Sleep(nms)
|
||||
#define pthread_cancel(thread) terminate_thread(thread,0)
|
||||
#define ts_key_create(ts_key, destructor) {ts_key = TlsAlloc();};
|
||||
#define pthread_getspecific(ts_key) TlsGetValue(ts_key)
|
||||
#define pthread_setspecific(ts_key, value) TlsSetValue(ts_key, (void *)value)
|
||||
#define pthread_self() GetCurrentThreadId()
|
||||
|
||||
#elif defined(__OS2__)
|
||||
/* OS/2 */
|
||||
#define INCL_DOS
|
||||
#include <os2.h>
|
||||
|
||||
#include <stdlib.h>
|
||||
#define THREAD_FUNCTION void *
|
||||
#define THREAD_FUNCTION_RETURN void *
|
||||
#define THREAD_SPECIFIC_INDEX PULONG
|
||||
#define pthread_t TID
|
||||
#define pthread_attr_t ULONG
|
||||
#define pthread_detach(thread) 0
|
||||
#define thread_sleep(nms) DosSleep(nms)
|
||||
#define pthread_cancel(thread) DosKillThread(thread)
|
||||
#define ts_key_create(ts_key, destructor) \
|
||||
DosAllocThreadLocalMemory(1, &(ts_key));
|
||||
#define pthread_getspecific(ts_key) ((void *)(*(ts_key)))
|
||||
#define pthread_setspecific(ts_key, value) (*(ts_key)=(ULONG)(value))
|
||||
#define pthread_self() _gettid()
|
||||
#else
|
||||
#ifdef __APPLE__
|
||||
#include <mach/mach_init.h>
|
||||
#include <mach/semaphore.h>
|
||||
#include <mach/task.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#else
|
||||
#include <semaphore.h>
|
||||
#endif
|
||||
|
||||
#include <pthread.h>
|
||||
/* pthreads */
|
||||
/* Nearly everything is already defined */
|
||||
#define THREAD_FUNCTION void *
|
||||
#define THREAD_FUNCTION_RETURN void *
|
||||
#define THREAD_SPECIFIC_INDEX pthread_key_t
|
||||
#define ts_key_create(ts_key, destructor) pthread_key_create (&(ts_key), destructor);
|
||||
#endif
|
||||
|
||||
/* Synchronization macros: Win32 and Pthreads */
|
||||
#if defined(_WIN32) && !HAVE_PTHREAD_H
|
||||
#define sem_t HANDLE
|
||||
#define pause(voidpara) __asm PAUSE
|
||||
#define sem_init(sem, sem_attr1, sem_init_value) (int)((*sem = CreateSemaphore(NULL,0,32768,NULL))==NULL)
|
||||
#define sem_wait(sem) (int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem,INFINITE))
|
||||
#define sem_post(sem) ReleaseSemaphore(*sem,1,NULL)
|
||||
#define sem_destroy(sem) if(*sem)((int)(CloseHandle(*sem))==TRUE)
|
||||
#define thread_sleep(nms) Sleep(nms)
|
||||
|
||||
#elif defined(__OS2__)
|
||||
typedef struct
|
||||
{
|
||||
HEV event;
|
||||
HMTX wait_mutex;
|
||||
HMTX count_mutex;
|
||||
int count;
|
||||
} sem_t;
|
||||
|
||||
static inline int sem_init(sem_t *sem, int pshared, unsigned int value)
|
||||
{
|
||||
DosCreateEventSem(NULL, &sem->event, pshared ? DC_SEM_SHARED : 0,
|
||||
value > 0 ? TRUE : FALSE);
|
||||
DosCreateMutexSem(NULL, &sem->wait_mutex, 0, FALSE);
|
||||
DosCreateMutexSem(NULL, &sem->count_mutex, 0, FALSE);
|
||||
|
||||
sem->count = value;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int sem_wait(sem_t * sem)
|
||||
{
|
||||
DosRequestMutexSem(sem->wait_mutex, -1);
|
||||
|
||||
DosWaitEventSem(sem->event, -1);
|
||||
|
||||
DosRequestMutexSem(sem->count_mutex, -1);
|
||||
|
||||
sem->count--;
|
||||
if (sem->count == 0)
|
||||
{
|
||||
ULONG post_count;
|
||||
|
||||
DosResetEventSem(sem->event, &post_count);
|
||||
}
|
||||
|
||||
DosReleaseMutexSem(sem->count_mutex);
|
||||
|
||||
DosReleaseMutexSem(sem->wait_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int sem_post(sem_t * sem)
|
||||
{
|
||||
DosRequestMutexSem(sem->count_mutex, -1);
|
||||
|
||||
if (sem->count < 32768)
|
||||
{
|
||||
sem->count++;
|
||||
DosPostEventSem(sem->event);
|
||||
}
|
||||
|
||||
DosReleaseMutexSem(sem->count_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int sem_destroy(sem_t * sem)
|
||||
{
|
||||
DosCloseEventSem(sem->event);
|
||||
DosCloseMutexSem(sem->wait_mutex);
|
||||
DosCloseMutexSem(sem->count_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define thread_sleep(nms) DosSleep(nms)
|
||||
|
||||
#else
|
||||
|
||||
#ifdef __APPLE__
|
||||
#define sem_t semaphore_t
|
||||
#define sem_init(X,Y,Z) semaphore_create(mach_task_self(), X, SYNC_POLICY_FIFO, Z)
|
||||
#define sem_wait(sem) (semaphore_wait(*sem) )
|
||||
#define sem_post(sem) semaphore_signal(*sem)
|
||||
#define sem_destroy(sem) semaphore_destroy(mach_task_self(),*sem)
|
||||
#define thread_sleep(nms) /* { struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#include <sched.h>
|
||||
#define thread_sleep(nms) sched_yield();/* {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */
|
||||
#endif
|
||||
/* Not Windows. Assume pthreads */
|
||||
|
||||
#endif
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
#include "vpx_ports/x86.h"
|
||||
#else
|
||||
#define x86_pause_hint()
|
||||
#endif
|
||||
|
||||
#include "vpx_util/vpx_thread.h"
|
||||
|
||||
static INLINE void mutex_lock(pthread_mutex_t *const mutex) {
|
||||
const int kMaxTryLocks = 4000;
|
||||
int locked = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < kMaxTryLocks; ++i) {
|
||||
if (!pthread_mutex_trylock(mutex)) {
|
||||
locked = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!locked)
|
||||
pthread_mutex_lock(mutex);
|
||||
}
|
||||
|
||||
static INLINE int protected_read(pthread_mutex_t *const mutex, const int *p) {
|
||||
int ret;
|
||||
mutex_lock(mutex);
|
||||
ret = *p;
|
||||
pthread_mutex_unlock(mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static INLINE void sync_read(pthread_mutex_t *const mutex, int mb_col,
|
||||
const int *last_row_current_mb_col,
|
||||
const int nsync) {
|
||||
while (mb_col > (protected_read(mutex, last_row_current_mb_col) - nsync)) {
|
||||
x86_pause_hint();
|
||||
thread_sleep(0);
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void protected_write(pthread_mutex_t *mutex, int *p, int v) {
|
||||
mutex_lock(mutex);
|
||||
*p = v;
|
||||
pthread_mutex_unlock(mutex);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_THREADING_H_
|
|
@ -0,0 +1,143 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#if CONFIG_DEBUG
|
||||
#include <assert.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
|
||||
#include "treecoder.h"
|
||||
|
||||
static void tree2tok(
|
||||
struct vp8_token_struct *const p,
|
||||
vp8_tree t,
|
||||
int i,
|
||||
int v,
|
||||
int L
|
||||
)
|
||||
{
|
||||
v += v;
|
||||
++L;
|
||||
|
||||
do
|
||||
{
|
||||
const vp8_tree_index j = t[i++];
|
||||
|
||||
if (j <= 0)
|
||||
{
|
||||
p[-j].value = v;
|
||||
p[-j].Len = L;
|
||||
}
|
||||
else
|
||||
tree2tok(p, t, j, v, L);
|
||||
}
|
||||
while (++v & 1);
|
||||
}
|
||||
|
||||
void vp8_tokens_from_tree(struct vp8_token_struct *p, vp8_tree t)
|
||||
{
|
||||
tree2tok(p, t, 0, 0, 0);
|
||||
}
|
||||
|
||||
void vp8_tokens_from_tree_offset(struct vp8_token_struct *p, vp8_tree t,
|
||||
int offset)
|
||||
{
|
||||
tree2tok(p - offset, t, 0, 0, 0);
|
||||
}
|
||||
|
||||
static void branch_counts(
|
||||
int n, /* n = size of alphabet */
|
||||
vp8_token tok [ /* n */ ],
|
||||
vp8_tree tree,
|
||||
unsigned int branch_ct [ /* n-1 */ ] [2],
|
||||
const unsigned int num_events[ /* n */ ]
|
||||
)
|
||||
{
|
||||
const int tree_len = n - 1;
|
||||
int t = 0;
|
||||
|
||||
#if CONFIG_DEBUG
|
||||
assert(tree_len);
|
||||
#endif
|
||||
|
||||
do
|
||||
{
|
||||
branch_ct[t][0] = branch_ct[t][1] = 0;
|
||||
}
|
||||
while (++t < tree_len);
|
||||
|
||||
t = 0;
|
||||
|
||||
do
|
||||
{
|
||||
int L = tok[t].Len;
|
||||
const int enc = tok[t].value;
|
||||
const unsigned int ct = num_events[t];
|
||||
|
||||
vp8_tree_index i = 0;
|
||||
|
||||
do
|
||||
{
|
||||
const int b = (enc >> --L) & 1;
|
||||
const int j = i >> 1;
|
||||
#if CONFIG_DEBUG
|
||||
assert(j < tree_len && 0 <= L);
|
||||
#endif
|
||||
|
||||
branch_ct [j] [b] += ct;
|
||||
i = tree[ i + b];
|
||||
}
|
||||
while (i > 0);
|
||||
|
||||
#if CONFIG_DEBUG
|
||||
assert(!L);
|
||||
#endif
|
||||
}
|
||||
while (++t < n);
|
||||
|
||||
}
|
||||
|
||||
|
||||
void vp8_tree_probs_from_distribution(
|
||||
int n, /* n = size of alphabet */
|
||||
vp8_token tok [ /* n */ ],
|
||||
vp8_tree tree,
|
||||
vp8_prob probs [ /* n-1 */ ],
|
||||
unsigned int branch_ct [ /* n-1 */ ] [2],
|
||||
const unsigned int num_events[ /* n */ ],
|
||||
unsigned int Pfac,
|
||||
int rd
|
||||
)
|
||||
{
|
||||
const int tree_len = n - 1;
|
||||
int t = 0;
|
||||
|
||||
branch_counts(n, tok, tree, branch_ct, num_events);
|
||||
|
||||
do
|
||||
{
|
||||
const unsigned int *const c = branch_ct[t];
|
||||
const unsigned int tot = c[0] + c[1];
|
||||
|
||||
#if CONFIG_DEBUG
|
||||
assert(tot < (1 << 24)); /* no overflow below */
|
||||
#endif
|
||||
|
||||
if (tot)
|
||||
{
|
||||
const unsigned int p = ((c[0] * Pfac) + (rd ? tot >> 1 : 0)) / tot;
|
||||
probs[t] = p < 256 ? (p ? p : 1) : 255; /* agree w/old version for now */
|
||||
}
|
||||
else
|
||||
probs[t] = vp8_prob_half;
|
||||
}
|
||||
while (++t < tree_len);
|
||||
}
|
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP8_COMMON_TREECODER_H_
|
||||
#define VP8_COMMON_TREECODER_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef unsigned char vp8bc_index_t; /* probability index */
|
||||
|
||||
|
||||
typedef unsigned char vp8_prob;
|
||||
|
||||
#define vp8_prob_half ( (vp8_prob) 128)
|
||||
|
||||
typedef signed char vp8_tree_index;
|
||||
struct bool_coder_spec;
|
||||
|
||||
typedef struct bool_coder_spec bool_coder_spec;
|
||||
typedef struct bool_writer bool_writer;
|
||||
typedef struct bool_reader bool_reader;
|
||||
|
||||
typedef const bool_coder_spec c_bool_coder_spec;
|
||||
typedef const bool_writer c_bool_writer;
|
||||
typedef const bool_reader c_bool_reader;
|
||||
|
||||
|
||||
|
||||
# define vp8_complement( x) (255 - x)
|
||||
|
||||
|
||||
/* We build coding trees compactly in arrays.
|
||||
Each node of the tree is a pair of vp8_tree_indices.
|
||||
Array index often references a corresponding probability table.
|
||||
Index <= 0 means done encoding/decoding and value = -Index,
|
||||
Index > 0 means need another bit, specification at index.
|
||||
Nonnegative indices are always even; processing begins at node 0. */
|
||||
|
||||
typedef const vp8_tree_index vp8_tree[], *vp8_tree_p;
|
||||
|
||||
|
||||
typedef const struct vp8_token_struct
|
||||
{
|
||||
int value;
|
||||
int Len;
|
||||
} vp8_token;
|
||||
|
||||
/* Construct encoding array from tree. */
|
||||
|
||||
void vp8_tokens_from_tree(struct vp8_token_struct *, vp8_tree);
|
||||
void vp8_tokens_from_tree_offset(struct vp8_token_struct *, vp8_tree,
|
||||
int offset);
|
||||
|
||||
|
||||
/* Convert array of token occurrence counts into a table of probabilities
|
||||
for the associated binary encoding tree. Also writes count of branches
|
||||
taken for each node on the tree; this facilitiates decisions as to
|
||||
probability updates. */
|
||||
|
||||
void vp8_tree_probs_from_distribution(
|
||||
int n, /* n = size of alphabet */
|
||||
vp8_token tok [ /* n */ ],
|
||||
vp8_tree tree,
|
||||
vp8_prob probs [ /* n-1 */ ],
|
||||
unsigned int branch_ct [ /* n-1 */ ] [2],
|
||||
const unsigned int num_events[ /* n */ ],
|
||||
unsigned int Pfactor,
|
||||
int Round
|
||||
);
|
||||
|
||||
/* Variant of above using coder spec rather than hardwired 8-bit probs. */
|
||||
|
||||
void vp8bc_tree_probs_from_distribution(
|
||||
int n, /* n = size of alphabet */
|
||||
vp8_token tok [ /* n */ ],
|
||||
vp8_tree tree,
|
||||
vp8_prob probs [ /* n-1 */ ],
|
||||
unsigned int branch_ct [ /* n-1 */ ] [2],
|
||||
const unsigned int num_events[ /* n */ ],
|
||||
c_bool_coder_spec *s
|
||||
);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_TREECODER_H_
|
|
@ -0,0 +1,254 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VP8_COMMON_VP8_ENTROPYMODEDATA_H_
|
||||
#define VP8_COMMON_VP8_ENTROPYMODEDATA_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*Generated file, included by entropymode.c*/
|
||||
|
||||
|
||||
const struct vp8_token_struct vp8_bmode_encodings[VP8_BINTRAMODES] =
|
||||
{
|
||||
{ 0, 1 },
|
||||
{ 2, 2 },
|
||||
{ 6, 3 },
|
||||
{ 28, 5 },
|
||||
{ 30, 5 },
|
||||
{ 58, 6 },
|
||||
{ 59, 6 },
|
||||
{ 62, 6 },
|
||||
{ 126, 7 },
|
||||
{ 127, 7 }
|
||||
};
|
||||
|
||||
const struct vp8_token_struct vp8_ymode_encodings[VP8_YMODES] =
|
||||
{
|
||||
{ 0, 1 },
|
||||
{ 4, 3 },
|
||||
{ 5, 3 },
|
||||
{ 6, 3 },
|
||||
{ 7, 3 }
|
||||
};
|
||||
|
||||
const struct vp8_token_struct vp8_kf_ymode_encodings[VP8_YMODES] =
|
||||
{
|
||||
{ 4, 3 },
|
||||
{ 5, 3 },
|
||||
{ 6, 3 },
|
||||
{ 7, 3 },
|
||||
{ 0, 1 }
|
||||
};
|
||||
|
||||
const struct vp8_token_struct vp8_uv_mode_encodings[VP8_UV_MODES] =
|
||||
{
|
||||
{ 0, 1 },
|
||||
{ 2, 2 },
|
||||
{ 6, 3 },
|
||||
{ 7, 3 }
|
||||
};
|
||||
|
||||
const struct vp8_token_struct vp8_mbsplit_encodings[VP8_NUMMBSPLITS] =
|
||||
{
|
||||
{ 6, 3 },
|
||||
{ 7, 3 },
|
||||
{ 2, 2 },
|
||||
{ 0, 1 }
|
||||
};
|
||||
|
||||
const struct vp8_token_struct vp8_mv_ref_encoding_array[VP8_MVREFS] =
|
||||
{
|
||||
{ 2, 2 },
|
||||
{ 6, 3 },
|
||||
{ 0, 1 },
|
||||
{ 14, 4 },
|
||||
{ 15, 4 }
|
||||
};
|
||||
|
||||
const struct vp8_token_struct vp8_sub_mv_ref_encoding_array[VP8_SUBMVREFS] =
|
||||
{
|
||||
{ 0, 1 },
|
||||
{ 2, 2 },
|
||||
{ 6, 3 },
|
||||
{ 7, 3 }
|
||||
};
|
||||
|
||||
const struct vp8_token_struct vp8_small_mvencodings[8] =
|
||||
{
|
||||
{ 0, 3 },
|
||||
{ 1, 3 },
|
||||
{ 2, 3 },
|
||||
{ 3, 3 },
|
||||
{ 4, 3 },
|
||||
{ 5, 3 },
|
||||
{ 6, 3 },
|
||||
{ 7, 3 }
|
||||
};
|
||||
|
||||
const vp8_prob vp8_ymode_prob[VP8_YMODES-1] =
|
||||
{
|
||||
112, 86, 140, 37
|
||||
};
|
||||
|
||||
const vp8_prob vp8_kf_ymode_prob[VP8_YMODES-1] =
|
||||
{
|
||||
145, 156, 163, 128
|
||||
};
|
||||
|
||||
const vp8_prob vp8_uv_mode_prob[VP8_UV_MODES-1] =
|
||||
{
|
||||
162, 101, 204
|
||||
};
|
||||
|
||||
const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES-1] =
|
||||
{
|
||||
142, 114, 183
|
||||
};
|
||||
|
||||
const vp8_prob vp8_bmode_prob[VP8_BINTRAMODES-1] =
|
||||
{
|
||||
120, 90, 79, 133, 87, 85, 80, 111, 151
|
||||
};
|
||||
|
||||
|
||||
|
||||
const vp8_prob vp8_kf_bmode_prob
|
||||
[VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1] =
|
||||
{
|
||||
{
|
||||
{ 231, 120, 48, 89, 115, 113, 120, 152, 112 },
|
||||
{ 152, 179, 64, 126, 170, 118, 46, 70, 95 },
|
||||
{ 175, 69, 143, 80, 85, 82, 72, 155, 103 },
|
||||
{ 56, 58, 10, 171, 218, 189, 17, 13, 152 },
|
||||
{ 144, 71, 10, 38, 171, 213, 144, 34, 26 },
|
||||
{ 114, 26, 17, 163, 44, 195, 21, 10, 173 },
|
||||
{ 121, 24, 80, 195, 26, 62, 44, 64, 85 },
|
||||
{ 170, 46, 55, 19, 136, 160, 33, 206, 71 },
|
||||
{ 63, 20, 8, 114, 114, 208, 12, 9, 226 },
|
||||
{ 81, 40, 11, 96, 182, 84, 29, 16, 36 }
|
||||
},
|
||||
{
|
||||
{ 134, 183, 89, 137, 98, 101, 106, 165, 148 },
|
||||
{ 72, 187, 100, 130, 157, 111, 32, 75, 80 },
|
||||
{ 66, 102, 167, 99, 74, 62, 40, 234, 128 },
|
||||
{ 41, 53, 9, 178, 241, 141, 26, 8, 107 },
|
||||
{ 104, 79, 12, 27, 217, 255, 87, 17, 7 },
|
||||
{ 74, 43, 26, 146, 73, 166, 49, 23, 157 },
|
||||
{ 65, 38, 105, 160, 51, 52, 31, 115, 128 },
|
||||
{ 87, 68, 71, 44, 114, 51, 15, 186, 23 },
|
||||
{ 47, 41, 14, 110, 182, 183, 21, 17, 194 },
|
||||
{ 66, 45, 25, 102, 197, 189, 23, 18, 22 }
|
||||
},
|
||||
{
|
||||
{ 88, 88, 147, 150, 42, 46, 45, 196, 205 },
|
||||
{ 43, 97, 183, 117, 85, 38, 35, 179, 61 },
|
||||
{ 39, 53, 200, 87, 26, 21, 43, 232, 171 },
|
||||
{ 56, 34, 51, 104, 114, 102, 29, 93, 77 },
|
||||
{ 107, 54, 32, 26, 51, 1, 81, 43, 31 },
|
||||
{ 39, 28, 85, 171, 58, 165, 90, 98, 64 },
|
||||
{ 34, 22, 116, 206, 23, 34, 43, 166, 73 },
|
||||
{ 68, 25, 106, 22, 64, 171, 36, 225, 114 },
|
||||
{ 34, 19, 21, 102, 132, 188, 16, 76, 124 },
|
||||
{ 62, 18, 78, 95, 85, 57, 50, 48, 51 }
|
||||
},
|
||||
{
|
||||
{ 193, 101, 35, 159, 215, 111, 89, 46, 111 },
|
||||
{ 60, 148, 31, 172, 219, 228, 21, 18, 111 },
|
||||
{ 112, 113, 77, 85, 179, 255, 38, 120, 114 },
|
||||
{ 40, 42, 1, 196, 245, 209, 10, 25, 109 },
|
||||
{ 100, 80, 8, 43, 154, 1, 51, 26, 71 },
|
||||
{ 88, 43, 29, 140, 166, 213, 37, 43, 154 },
|
||||
{ 61, 63, 30, 155, 67, 45, 68, 1, 209 },
|
||||
{ 142, 78, 78, 16, 255, 128, 34, 197, 171 },
|
||||
{ 41, 40, 5, 102, 211, 183, 4, 1, 221 },
|
||||
{ 51, 50, 17, 168, 209, 192, 23, 25, 82 }
|
||||
},
|
||||
{
|
||||
{ 125, 98, 42, 88, 104, 85, 117, 175, 82 },
|
||||
{ 95, 84, 53, 89, 128, 100, 113, 101, 45 },
|
||||
{ 75, 79, 123, 47, 51, 128, 81, 171, 1 },
|
||||
{ 57, 17, 5, 71, 102, 57, 53, 41, 49 },
|
||||
{ 115, 21, 2, 10, 102, 255, 166, 23, 6 },
|
||||
{ 38, 33, 13, 121, 57, 73, 26, 1, 85 },
|
||||
{ 41, 10, 67, 138, 77, 110, 90, 47, 114 },
|
||||
{ 101, 29, 16, 10, 85, 128, 101, 196, 26 },
|
||||
{ 57, 18, 10, 102, 102, 213, 34, 20, 43 },
|
||||
{ 117, 20, 15, 36, 163, 128, 68, 1, 26 }
|
||||
},
|
||||
{
|
||||
{ 138, 31, 36, 171, 27, 166, 38, 44, 229 },
|
||||
{ 67, 87, 58, 169, 82, 115, 26, 59, 179 },
|
||||
{ 63, 59, 90, 180, 59, 166, 93, 73, 154 },
|
||||
{ 40, 40, 21, 116, 143, 209, 34, 39, 175 },
|
||||
{ 57, 46, 22, 24, 128, 1, 54, 17, 37 },
|
||||
{ 47, 15, 16, 183, 34, 223, 49, 45, 183 },
|
||||
{ 46, 17, 33, 183, 6, 98, 15, 32, 183 },
|
||||
{ 65, 32, 73, 115, 28, 128, 23, 128, 205 },
|
||||
{ 40, 3, 9, 115, 51, 192, 18, 6, 223 },
|
||||
{ 87, 37, 9, 115, 59, 77, 64, 21, 47 }
|
||||
},
|
||||
{
|
||||
{ 104, 55, 44, 218, 9, 54, 53, 130, 226 },
|
||||
{ 64, 90, 70, 205, 40, 41, 23, 26, 57 },
|
||||
{ 54, 57, 112, 184, 5, 41, 38, 166, 213 },
|
||||
{ 30, 34, 26, 133, 152, 116, 10, 32, 134 },
|
||||
{ 75, 32, 12, 51, 192, 255, 160, 43, 51 },
|
||||
{ 39, 19, 53, 221, 26, 114, 32, 73, 255 },
|
||||
{ 31, 9, 65, 234, 2, 15, 1, 118, 73 },
|
||||
{ 88, 31, 35, 67, 102, 85, 55, 186, 85 },
|
||||
{ 56, 21, 23, 111, 59, 205, 45, 37, 192 },
|
||||
{ 55, 38, 70, 124, 73, 102, 1, 34, 98 }
|
||||
},
|
||||
{
|
||||
{ 102, 61, 71, 37, 34, 53, 31, 243, 192 },
|
||||
{ 69, 60, 71, 38, 73, 119, 28, 222, 37 },
|
||||
{ 68, 45, 128, 34, 1, 47, 11, 245, 171 },
|
||||
{ 62, 17, 19, 70, 146, 85, 55, 62, 70 },
|
||||
{ 75, 15, 9, 9, 64, 255, 184, 119, 16 },
|
||||
{ 37, 43, 37, 154, 100, 163, 85, 160, 1 },
|
||||
{ 63, 9, 92, 136, 28, 64, 32, 201, 85 },
|
||||
{ 86, 6, 28, 5, 64, 255, 25, 248, 1 },
|
||||
{ 56, 8, 17, 132, 137, 255, 55, 116, 128 },
|
||||
{ 58, 15, 20, 82, 135, 57, 26, 121, 40 }
|
||||
},
|
||||
{
|
||||
{ 164, 50, 31, 137, 154, 133, 25, 35, 218 },
|
||||
{ 51, 103, 44, 131, 131, 123, 31, 6, 158 },
|
||||
{ 86, 40, 64, 135, 148, 224, 45, 183, 128 },
|
||||
{ 22, 26, 17, 131, 240, 154, 14, 1, 209 },
|
||||
{ 83, 12, 13, 54, 192, 255, 68, 47, 28 },
|
||||
{ 45, 16, 21, 91, 64, 222, 7, 1, 197 },
|
||||
{ 56, 21, 39, 155, 60, 138, 23, 102, 213 },
|
||||
{ 85, 26, 85, 85, 128, 128, 32, 146, 171 },
|
||||
{ 18, 11, 7, 63, 144, 171, 4, 4, 246 },
|
||||
{ 35, 27, 10, 146, 174, 171, 12, 26, 128 }
|
||||
},
|
||||
{
|
||||
{ 190, 80, 35, 99, 180, 80, 126, 54, 45 },
|
||||
{ 85, 126, 47, 87, 176, 51, 41, 20, 32 },
|
||||
{ 101, 75, 128, 139, 118, 146, 116, 128, 85 },
|
||||
{ 56, 41, 15, 176, 236, 85, 37, 9, 62 },
|
||||
{ 146, 36, 19, 30, 171, 255, 97, 27, 20 },
|
||||
{ 71, 30, 17, 119, 118, 255, 17, 18, 138 },
|
||||
{ 101, 38, 60, 138, 55, 70, 43, 26, 142 },
|
||||
{ 138, 45, 61, 62, 219, 1, 81, 188, 64 },
|
||||
{ 32, 41, 20, 117, 151, 142, 20, 21, 163 },
|
||||
{ 112, 19, 12, 61, 195, 128, 48, 4, 24 }
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_VP8_ENTROPYMODEDATA_H_
|
|
@ -0,0 +1,661 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "loopfilter.h"
|
||||
#include "onyxc_int.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
|
||||
static void lf_init_lut(loop_filter_info_n *lfi)
|
||||
{
|
||||
int filt_lvl;
|
||||
|
||||
for (filt_lvl = 0; filt_lvl <= MAX_LOOP_FILTER; filt_lvl++)
|
||||
{
|
||||
if (filt_lvl >= 40)
|
||||
{
|
||||
lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 2;
|
||||
lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 3;
|
||||
}
|
||||
else if (filt_lvl >= 20)
|
||||
{
|
||||
lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1;
|
||||
lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 2;
|
||||
}
|
||||
else if (filt_lvl >= 15)
|
||||
{
|
||||
lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1;
|
||||
lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 0;
|
||||
lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
lfi->mode_lf_lut[DC_PRED] = 1;
|
||||
lfi->mode_lf_lut[V_PRED] = 1;
|
||||
lfi->mode_lf_lut[H_PRED] = 1;
|
||||
lfi->mode_lf_lut[TM_PRED] = 1;
|
||||
lfi->mode_lf_lut[B_PRED] = 0;
|
||||
|
||||
lfi->mode_lf_lut[ZEROMV] = 1;
|
||||
lfi->mode_lf_lut[NEARESTMV] = 2;
|
||||
lfi->mode_lf_lut[NEARMV] = 2;
|
||||
lfi->mode_lf_lut[NEWMV] = 2;
|
||||
lfi->mode_lf_lut[SPLITMV] = 3;
|
||||
|
||||
}
|
||||
|
||||
void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi,
|
||||
int sharpness_lvl)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* For each possible value for the loop filter fill out limits */
|
||||
for (i = 0; i <= MAX_LOOP_FILTER; i++)
|
||||
{
|
||||
int filt_lvl = i;
|
||||
int block_inside_limit = 0;
|
||||
|
||||
/* Set loop filter paramaeters that control sharpness. */
|
||||
block_inside_limit = filt_lvl >> (sharpness_lvl > 0);
|
||||
block_inside_limit = block_inside_limit >> (sharpness_lvl > 4);
|
||||
|
||||
if (sharpness_lvl > 0)
|
||||
{
|
||||
if (block_inside_limit > (9 - sharpness_lvl))
|
||||
block_inside_limit = (9 - sharpness_lvl);
|
||||
}
|
||||
|
||||
if (block_inside_limit < 1)
|
||||
block_inside_limit = 1;
|
||||
|
||||
memset(lfi->lim[i], block_inside_limit, SIMD_WIDTH);
|
||||
memset(lfi->blim[i], (2 * filt_lvl + block_inside_limit), SIMD_WIDTH);
|
||||
memset(lfi->mblim[i], (2 * (filt_lvl + 2) + block_inside_limit),
|
||||
SIMD_WIDTH);
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_loop_filter_init(VP8_COMMON *cm)
|
||||
{
|
||||
loop_filter_info_n *lfi = &cm->lf_info;
|
||||
int i;
|
||||
|
||||
/* init limits for given sharpness*/
|
||||
vp8_loop_filter_update_sharpness(lfi, cm->sharpness_level);
|
||||
cm->last_sharpness_level = cm->sharpness_level;
|
||||
|
||||
/* init LUT for lvl and hev thr picking */
|
||||
lf_init_lut(lfi);
|
||||
|
||||
/* init hev threshold const vectors */
|
||||
for(i = 0; i < 4 ; i++)
|
||||
{
|
||||
memset(lfi->hev_thr[i], i, SIMD_WIDTH);
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_loop_filter_frame_init(VP8_COMMON *cm,
|
||||
MACROBLOCKD *mbd,
|
||||
int default_filt_lvl)
|
||||
{
|
||||
int seg, /* segment number */
|
||||
ref, /* index in ref_lf_deltas */
|
||||
mode; /* index in mode_lf_deltas */
|
||||
|
||||
loop_filter_info_n *lfi = &cm->lf_info;
|
||||
|
||||
/* update limits if sharpness has changed */
|
||||
if(cm->last_sharpness_level != cm->sharpness_level)
|
||||
{
|
||||
vp8_loop_filter_update_sharpness(lfi, cm->sharpness_level);
|
||||
cm->last_sharpness_level = cm->sharpness_level;
|
||||
}
|
||||
|
||||
for(seg = 0; seg < MAX_MB_SEGMENTS; seg++)
|
||||
{
|
||||
int lvl_seg = default_filt_lvl;
|
||||
int lvl_ref, lvl_mode;
|
||||
|
||||
/* Note the baseline filter values for each segment */
|
||||
if (mbd->segmentation_enabled)
|
||||
{
|
||||
/* Abs value */
|
||||
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
|
||||
{
|
||||
lvl_seg = mbd->segment_feature_data[MB_LVL_ALT_LF][seg];
|
||||
}
|
||||
else /* Delta Value */
|
||||
{
|
||||
lvl_seg += mbd->segment_feature_data[MB_LVL_ALT_LF][seg];
|
||||
}
|
||||
lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63: lvl_seg) : 0;
|
||||
}
|
||||
|
||||
if (!mbd->mode_ref_lf_delta_enabled)
|
||||
{
|
||||
/* we could get rid of this if we assume that deltas are set to
|
||||
* zero when not in use; encoder always uses deltas
|
||||
*/
|
||||
memset(lfi->lvl[seg][0], lvl_seg, 4 * 4 );
|
||||
continue;
|
||||
}
|
||||
|
||||
/* INTRA_FRAME */
|
||||
ref = INTRA_FRAME;
|
||||
|
||||
/* Apply delta for reference frame */
|
||||
lvl_ref = lvl_seg + mbd->ref_lf_deltas[ref];
|
||||
|
||||
/* Apply delta for Intra modes */
|
||||
mode = 0; /* B_PRED */
|
||||
/* Only the split mode BPRED has a further special case */
|
||||
lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode];
|
||||
/* clamp */
|
||||
lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0;
|
||||
|
||||
lfi->lvl[seg][ref][mode] = lvl_mode;
|
||||
|
||||
mode = 1; /* all the rest of Intra modes */
|
||||
/* clamp */
|
||||
lvl_mode = (lvl_ref > 0) ? (lvl_ref > 63 ? 63 : lvl_ref) : 0;
|
||||
lfi->lvl[seg][ref][mode] = lvl_mode;
|
||||
|
||||
/* LAST, GOLDEN, ALT */
|
||||
for(ref = 1; ref < MAX_REF_FRAMES; ref++)
|
||||
{
|
||||
/* Apply delta for reference frame */
|
||||
lvl_ref = lvl_seg + mbd->ref_lf_deltas[ref];
|
||||
|
||||
/* Apply delta for Inter modes */
|
||||
for (mode = 1; mode < 4; mode++)
|
||||
{
|
||||
lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode];
|
||||
/* clamp */
|
||||
lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0;
|
||||
|
||||
lfi->lvl[seg][ref][mode] = lvl_mode;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_row_normal(VP8_COMMON *cm, MODE_INFO *mode_info_context,
|
||||
int mb_row, int post_ystride, int post_uvstride,
|
||||
unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr)
|
||||
{
|
||||
int mb_col;
|
||||
int filter_level;
|
||||
loop_filter_info_n *lfi_n = &cm->lf_info;
|
||||
loop_filter_info lfi;
|
||||
FRAME_TYPE frame_type = cm->frame_type;
|
||||
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
||||
{
|
||||
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
|
||||
mode_info_context->mbmi.mode != SPLITMV &&
|
||||
mode_info_context->mbmi.mb_skip_coeff);
|
||||
|
||||
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
|
||||
const int seg = mode_info_context->mbmi.segment_id;
|
||||
const int ref_frame = mode_info_context->mbmi.ref_frame;
|
||||
|
||||
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
|
||||
|
||||
if (filter_level)
|
||||
{
|
||||
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
|
||||
lfi.mblim = lfi_n->mblim[filter_level];
|
||||
lfi.blim = lfi_n->blim[filter_level];
|
||||
lfi.lim = lfi_n->lim[filter_level];
|
||||
lfi.hev_thr = lfi_n->hev_thr[hev_index];
|
||||
|
||||
if (mb_col > 0)
|
||||
vp8_loop_filter_mbv
|
||||
(y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi);
|
||||
|
||||
if (!skip_lf)
|
||||
vp8_loop_filter_bv
|
||||
(y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi);
|
||||
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
vp8_loop_filter_mbh
|
||||
(y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi);
|
||||
|
||||
if (!skip_lf)
|
||||
vp8_loop_filter_bh
|
||||
(y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi);
|
||||
}
|
||||
|
||||
y_ptr += 16;
|
||||
u_ptr += 8;
|
||||
v_ptr += 8;
|
||||
|
||||
mode_info_context++; /* step to next MB */
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void vp8_loop_filter_row_simple(VP8_COMMON *cm, MODE_INFO *mode_info_context,
|
||||
int mb_row, int post_ystride, int post_uvstride,
|
||||
unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr)
|
||||
{
|
||||
int mb_col;
|
||||
int filter_level;
|
||||
loop_filter_info_n *lfi_n = &cm->lf_info;
|
||||
(void)post_uvstride;
|
||||
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
||||
{
|
||||
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
|
||||
mode_info_context->mbmi.mode != SPLITMV &&
|
||||
mode_info_context->mbmi.mb_skip_coeff);
|
||||
|
||||
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
|
||||
const int seg = mode_info_context->mbmi.segment_id;
|
||||
const int ref_frame = mode_info_context->mbmi.ref_frame;
|
||||
|
||||
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
|
||||
|
||||
if (filter_level)
|
||||
{
|
||||
if (mb_col > 0)
|
||||
vp8_loop_filter_simple_mbv
|
||||
(y_ptr, post_ystride, lfi_n->mblim[filter_level]);
|
||||
|
||||
if (!skip_lf)
|
||||
vp8_loop_filter_simple_bv
|
||||
(y_ptr, post_ystride, lfi_n->blim[filter_level]);
|
||||
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
vp8_loop_filter_simple_mbh
|
||||
(y_ptr, post_ystride, lfi_n->mblim[filter_level]);
|
||||
|
||||
if (!skip_lf)
|
||||
vp8_loop_filter_simple_bh
|
||||
(y_ptr, post_ystride, lfi_n->blim[filter_level]);
|
||||
}
|
||||
|
||||
y_ptr += 16;
|
||||
u_ptr += 8;
|
||||
v_ptr += 8;
|
||||
|
||||
mode_info_context++; /* step to next MB */
|
||||
}
|
||||
|
||||
}
|
||||
void vp8_loop_filter_frame(VP8_COMMON *cm,
|
||||
MACROBLOCKD *mbd,
|
||||
int frame_type)
|
||||
{
|
||||
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
|
||||
loop_filter_info_n *lfi_n = &cm->lf_info;
|
||||
loop_filter_info lfi;
|
||||
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
int mb_rows = cm->mb_rows;
|
||||
int mb_cols = cm->mb_cols;
|
||||
|
||||
int filter_level;
|
||||
|
||||
unsigned char *y_ptr, *u_ptr, *v_ptr;
|
||||
|
||||
/* Point at base of Mb MODE_INFO list */
|
||||
const MODE_INFO *mode_info_context = cm->mi;
|
||||
int post_y_stride = post->y_stride;
|
||||
int post_uv_stride = post->uv_stride;
|
||||
|
||||
/* Initialize the loop filter for this frame. */
|
||||
vp8_loop_filter_frame_init(cm, mbd, cm->filter_level);
|
||||
|
||||
/* Set up the buffer pointers */
|
||||
y_ptr = post->y_buffer;
|
||||
u_ptr = post->u_buffer;
|
||||
v_ptr = post->v_buffer;
|
||||
|
||||
/* vp8_filter each macro block */
|
||||
if (cm->filter_type == NORMAL_LOOPFILTER)
|
||||
{
|
||||
for (mb_row = 0; mb_row < mb_rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < mb_cols; mb_col++)
|
||||
{
|
||||
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
|
||||
mode_info_context->mbmi.mode != SPLITMV &&
|
||||
mode_info_context->mbmi.mb_skip_coeff);
|
||||
|
||||
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
|
||||
const int seg = mode_info_context->mbmi.segment_id;
|
||||
const int ref_frame = mode_info_context->mbmi.ref_frame;
|
||||
|
||||
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
|
||||
|
||||
if (filter_level)
|
||||
{
|
||||
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
|
||||
lfi.mblim = lfi_n->mblim[filter_level];
|
||||
lfi.blim = lfi_n->blim[filter_level];
|
||||
lfi.lim = lfi_n->lim[filter_level];
|
||||
lfi.hev_thr = lfi_n->hev_thr[hev_index];
|
||||
|
||||
if (mb_col > 0)
|
||||
vp8_loop_filter_mbv
|
||||
(y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
|
||||
|
||||
if (!skip_lf)
|
||||
vp8_loop_filter_bv
|
||||
(y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
|
||||
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
vp8_loop_filter_mbh
|
||||
(y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
|
||||
|
||||
if (!skip_lf)
|
||||
vp8_loop_filter_bh
|
||||
(y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi);
|
||||
}
|
||||
|
||||
y_ptr += 16;
|
||||
u_ptr += 8;
|
||||
v_ptr += 8;
|
||||
|
||||
mode_info_context++; /* step to next MB */
|
||||
}
|
||||
y_ptr += post_y_stride * 16 - post->y_width;
|
||||
u_ptr += post_uv_stride * 8 - post->uv_width;
|
||||
v_ptr += post_uv_stride * 8 - post->uv_width;
|
||||
|
||||
mode_info_context++; /* Skip border mb */
|
||||
|
||||
}
|
||||
}
|
||||
else /* SIMPLE_LOOPFILTER */
|
||||
{
|
||||
for (mb_row = 0; mb_row < mb_rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < mb_cols; mb_col++)
|
||||
{
|
||||
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
|
||||
mode_info_context->mbmi.mode != SPLITMV &&
|
||||
mode_info_context->mbmi.mb_skip_coeff);
|
||||
|
||||
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
|
||||
const int seg = mode_info_context->mbmi.segment_id;
|
||||
const int ref_frame = mode_info_context->mbmi.ref_frame;
|
||||
|
||||
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
|
||||
if (filter_level)
|
||||
{
|
||||
const unsigned char * mblim = lfi_n->mblim[filter_level];
|
||||
const unsigned char * blim = lfi_n->blim[filter_level];
|
||||
|
||||
if (mb_col > 0)
|
||||
vp8_loop_filter_simple_mbv
|
||||
(y_ptr, post_y_stride, mblim);
|
||||
|
||||
if (!skip_lf)
|
||||
vp8_loop_filter_simple_bv
|
||||
(y_ptr, post_y_stride, blim);
|
||||
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
vp8_loop_filter_simple_mbh
|
||||
(y_ptr, post_y_stride, mblim);
|
||||
|
||||
if (!skip_lf)
|
||||
vp8_loop_filter_simple_bh
|
||||
(y_ptr, post_y_stride, blim);
|
||||
}
|
||||
|
||||
y_ptr += 16;
|
||||
u_ptr += 8;
|
||||
v_ptr += 8;
|
||||
|
||||
mode_info_context++; /* step to next MB */
|
||||
}
|
||||
y_ptr += post_y_stride * 16 - post->y_width;
|
||||
u_ptr += post_uv_stride * 8 - post->uv_width;
|
||||
v_ptr += post_uv_stride * 8 - post->uv_width;
|
||||
|
||||
mode_info_context++; /* Skip border mb */
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_loop_filter_frame_yonly
|
||||
(
|
||||
VP8_COMMON *cm,
|
||||
MACROBLOCKD *mbd,
|
||||
int default_filt_lvl
|
||||
)
|
||||
{
|
||||
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
|
||||
|
||||
unsigned char *y_ptr;
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
|
||||
loop_filter_info_n *lfi_n = &cm->lf_info;
|
||||
loop_filter_info lfi;
|
||||
|
||||
int filter_level;
|
||||
FRAME_TYPE frame_type = cm->frame_type;
|
||||
|
||||
/* Point at base of Mb MODE_INFO list */
|
||||
const MODE_INFO *mode_info_context = cm->mi;
|
||||
|
||||
#if 0
|
||||
if(default_filt_lvl == 0) /* no filter applied */
|
||||
return;
|
||||
#endif
|
||||
|
||||
/* Initialize the loop filter for this frame. */
|
||||
vp8_loop_filter_frame_init( cm, mbd, default_filt_lvl);
|
||||
|
||||
/* Set up the buffer pointers */
|
||||
y_ptr = post->y_buffer;
|
||||
|
||||
/* vp8_filter each macro block */
|
||||
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
||||
{
|
||||
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
|
||||
mode_info_context->mbmi.mode != SPLITMV &&
|
||||
mode_info_context->mbmi.mb_skip_coeff);
|
||||
|
||||
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
|
||||
const int seg = mode_info_context->mbmi.segment_id;
|
||||
const int ref_frame = mode_info_context->mbmi.ref_frame;
|
||||
|
||||
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
|
||||
|
||||
if (filter_level)
|
||||
{
|
||||
if (cm->filter_type == NORMAL_LOOPFILTER)
|
||||
{
|
||||
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
|
||||
lfi.mblim = lfi_n->mblim[filter_level];
|
||||
lfi.blim = lfi_n->blim[filter_level];
|
||||
lfi.lim = lfi_n->lim[filter_level];
|
||||
lfi.hev_thr = lfi_n->hev_thr[hev_index];
|
||||
|
||||
if (mb_col > 0)
|
||||
vp8_loop_filter_mbv
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
|
||||
if (!skip_lf)
|
||||
vp8_loop_filter_bv
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
vp8_loop_filter_mbh
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
|
||||
if (!skip_lf)
|
||||
vp8_loop_filter_bh
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (mb_col > 0)
|
||||
vp8_loop_filter_simple_mbv
|
||||
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
||||
|
||||
if (!skip_lf)
|
||||
vp8_loop_filter_simple_bv
|
||||
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
||||
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
vp8_loop_filter_simple_mbh
|
||||
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
||||
|
||||
if (!skip_lf)
|
||||
vp8_loop_filter_simple_bh
|
||||
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
||||
}
|
||||
}
|
||||
|
||||
y_ptr += 16;
|
||||
mode_info_context ++; /* step to next MB */
|
||||
|
||||
}
|
||||
|
||||
y_ptr += post->y_stride * 16 - post->y_width;
|
||||
mode_info_context ++; /* Skip border mb */
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void vp8_loop_filter_partial_frame
|
||||
(
|
||||
VP8_COMMON *cm,
|
||||
MACROBLOCKD *mbd,
|
||||
int default_filt_lvl
|
||||
)
|
||||
{
|
||||
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
|
||||
|
||||
unsigned char *y_ptr;
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
int mb_cols = post->y_width >> 4;
|
||||
int mb_rows = post->y_height >> 4;
|
||||
|
||||
int linestocopy;
|
||||
|
||||
loop_filter_info_n *lfi_n = &cm->lf_info;
|
||||
loop_filter_info lfi;
|
||||
|
||||
int filter_level;
|
||||
FRAME_TYPE frame_type = cm->frame_type;
|
||||
|
||||
const MODE_INFO *mode_info_context;
|
||||
|
||||
#if 0
|
||||
if(default_filt_lvl == 0) /* no filter applied */
|
||||
return;
|
||||
#endif
|
||||
|
||||
/* Initialize the loop filter for this frame. */
|
||||
vp8_loop_filter_frame_init( cm, mbd, default_filt_lvl);
|
||||
|
||||
/* number of MB rows to use in partial filtering */
|
||||
linestocopy = mb_rows / PARTIAL_FRAME_FRACTION;
|
||||
linestocopy = linestocopy ? linestocopy << 4 : 16; /* 16 lines per MB */
|
||||
|
||||
/* Set up the buffer pointers; partial image starts at ~middle of frame */
|
||||
y_ptr = post->y_buffer + ((post->y_height >> 5) * 16) * post->y_stride;
|
||||
mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);
|
||||
|
||||
/* vp8_filter each macro block */
|
||||
for (mb_row = 0; mb_row<(linestocopy >> 4); mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < mb_cols; mb_col++)
|
||||
{
|
||||
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
|
||||
mode_info_context->mbmi.mode != SPLITMV &&
|
||||
mode_info_context->mbmi.mb_skip_coeff);
|
||||
|
||||
const int mode_index =
|
||||
lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
|
||||
const int seg = mode_info_context->mbmi.segment_id;
|
||||
const int ref_frame = mode_info_context->mbmi.ref_frame;
|
||||
|
||||
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
|
||||
|
||||
if (filter_level)
|
||||
{
|
||||
if (cm->filter_type == NORMAL_LOOPFILTER)
|
||||
{
|
||||
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
|
||||
lfi.mblim = lfi_n->mblim[filter_level];
|
||||
lfi.blim = lfi_n->blim[filter_level];
|
||||
lfi.lim = lfi_n->lim[filter_level];
|
||||
lfi.hev_thr = lfi_n->hev_thr[hev_index];
|
||||
|
||||
if (mb_col > 0)
|
||||
vp8_loop_filter_mbv
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
|
||||
if (!skip_lf)
|
||||
vp8_loop_filter_bv
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
|
||||
vp8_loop_filter_mbh
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
|
||||
if (!skip_lf)
|
||||
vp8_loop_filter_bh
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (mb_col > 0)
|
||||
vp8_loop_filter_simple_mbv
|
||||
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
||||
|
||||
if (!skip_lf)
|
||||
vp8_loop_filter_simple_bv
|
||||
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
||||
|
||||
vp8_loop_filter_simple_mbh
|
||||
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
||||
|
||||
if (!skip_lf)
|
||||
vp8_loop_filter_simple_bh
|
||||
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
||||
}
|
||||
}
|
||||
|
||||
y_ptr += 16;
|
||||
mode_info_context += 1; /* step to next MB */
|
||||
}
|
||||
|
||||
y_ptr += post->y_stride * 16 - post->y_width;
|
||||
mode_info_context += 1; /* Skip border mb */
|
||||
}
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
|
||||
;void vp8_copy32xn_sse2(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *dst_ptr,
|
||||
; int dst_stride,
|
||||
; int height);
|
||||
global sym(vp8_copy32xn_sse2) PRIVATE
|
||||
sym(vp8_copy32xn_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
SAVE_XMM 7
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;dst_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride
|
||||
movsxd rdx, dword ptr arg(3) ;dst_stride
|
||||
movsxd rcx, dword ptr arg(4) ;height
|
||||
|
||||
.block_copy_sse2_loopx4:
|
||||
movdqu xmm0, XMMWORD PTR [rsi]
|
||||
movdqu xmm1, XMMWORD PTR [rsi + 16]
|
||||
movdqu xmm2, XMMWORD PTR [rsi + rax]
|
||||
movdqu xmm3, XMMWORD PTR [rsi + rax + 16]
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
|
||||
movdqu xmm4, XMMWORD PTR [rsi]
|
||||
movdqu xmm5, XMMWORD PTR [rsi + 16]
|
||||
movdqu xmm6, XMMWORD PTR [rsi + rax]
|
||||
movdqu xmm7, XMMWORD PTR [rsi + rax + 16]
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
|
||||
movdqa XMMWORD PTR [rdi], xmm0
|
||||
movdqa XMMWORD PTR [rdi + 16], xmm1
|
||||
movdqa XMMWORD PTR [rdi + rdx], xmm2
|
||||
movdqa XMMWORD PTR [rdi + rdx + 16], xmm3
|
||||
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
movdqa XMMWORD PTR [rdi], xmm4
|
||||
movdqa XMMWORD PTR [rdi + 16], xmm5
|
||||
movdqa XMMWORD PTR [rdi + rdx], xmm6
|
||||
movdqa XMMWORD PTR [rdi + rdx + 16], xmm7
|
||||
|
||||
lea rdi, [rdi+rdx*2]
|
||||
|
||||
sub rcx, 4
|
||||
cmp rcx, 4
|
||||
jge .block_copy_sse2_loopx4
|
||||
|
||||
cmp rcx, 0
|
||||
je .copy_is_done
|
||||
|
||||
.block_copy_sse2_loop:
|
||||
movdqu xmm0, XMMWORD PTR [rsi]
|
||||
movdqu xmm1, XMMWORD PTR [rsi + 16]
|
||||
lea rsi, [rsi+rax]
|
||||
|
||||
movdqa XMMWORD PTR [rdi], xmm0
|
||||
movdqa XMMWORD PTR [rdi + 16], xmm1
|
||||
lea rdi, [rdi+rdx]
|
||||
|
||||
sub rcx, 1
|
||||
jne .block_copy_sse2_loop
|
||||
|
||||
.copy_is_done:
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
|
@ -0,0 +1,146 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
%macro STACK_FRAME_CREATE_X3 0
|
||||
%if ABI_IS_32BIT
|
||||
%define src_ptr rsi
|
||||
%define src_stride rax
|
||||
%define ref_ptr rdi
|
||||
%define ref_stride rdx
|
||||
%define end_ptr rcx
|
||||
%define ret_var rbx
|
||||
%define result_ptr arg(4)
|
||||
%define max_sad arg(4)
|
||||
%define height dword ptr arg(4)
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
push rsi
|
||||
push rdi
|
||||
push rbx
|
||||
|
||||
mov rsi, arg(0) ; src_ptr
|
||||
mov rdi, arg(2) ; ref_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ; src_stride
|
||||
movsxd rdx, dword ptr arg(3) ; ref_stride
|
||||
%else
|
||||
%if LIBVPX_YASM_WIN64
|
||||
SAVE_XMM 7, u
|
||||
%define src_ptr rcx
|
||||
%define src_stride rdx
|
||||
%define ref_ptr r8
|
||||
%define ref_stride r9
|
||||
%define end_ptr r10
|
||||
%define ret_var r11
|
||||
%define result_ptr [rsp+xmm_stack_space+8+4*8]
|
||||
%define max_sad [rsp+xmm_stack_space+8+4*8]
|
||||
%define height dword ptr [rsp+xmm_stack_space+8+4*8]
|
||||
%else
|
||||
%define src_ptr rdi
|
||||
%define src_stride rsi
|
||||
%define ref_ptr rdx
|
||||
%define ref_stride rcx
|
||||
%define end_ptr r9
|
||||
%define ret_var r10
|
||||
%define result_ptr r8
|
||||
%define max_sad r8
|
||||
%define height r8
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%endmacro
|
||||
|
||||
%macro STACK_FRAME_DESTROY_X3 0
|
||||
%define src_ptr
|
||||
%define src_stride
|
||||
%define ref_ptr
|
||||
%define ref_stride
|
||||
%define end_ptr
|
||||
%define ret_var
|
||||
%define result_ptr
|
||||
%define max_sad
|
||||
%define height
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
pop rbx
|
||||
pop rdi
|
||||
pop rsi
|
||||
pop rbp
|
||||
%else
|
||||
%if LIBVPX_YASM_WIN64
|
||||
RESTORE_XMM
|
||||
%endif
|
||||
%endif
|
||||
ret
|
||||
%endmacro
|
||||
|
||||
|
||||
;void vp8_copy32xn_sse3(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_stride,
|
||||
; unsigned char *dst_ptr,
|
||||
; int dst_stride,
|
||||
; int height);
|
||||
global sym(vp8_copy32xn_sse3) PRIVATE
|
||||
sym(vp8_copy32xn_sse3):
|
||||
|
||||
STACK_FRAME_CREATE_X3
|
||||
|
||||
.block_copy_sse3_loopx4:
|
||||
lea end_ptr, [src_ptr+src_stride*2]
|
||||
|
||||
movdqu xmm0, XMMWORD PTR [src_ptr]
|
||||
movdqu xmm1, XMMWORD PTR [src_ptr + 16]
|
||||
movdqu xmm2, XMMWORD PTR [src_ptr + src_stride]
|
||||
movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16]
|
||||
movdqu xmm4, XMMWORD PTR [end_ptr]
|
||||
movdqu xmm5, XMMWORD PTR [end_ptr + 16]
|
||||
movdqu xmm6, XMMWORD PTR [end_ptr + src_stride]
|
||||
movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16]
|
||||
|
||||
lea src_ptr, [src_ptr+src_stride*4]
|
||||
|
||||
lea end_ptr, [ref_ptr+ref_stride*2]
|
||||
|
||||
movdqa XMMWORD PTR [ref_ptr], xmm0
|
||||
movdqa XMMWORD PTR [ref_ptr + 16], xmm1
|
||||
movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2
|
||||
movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3
|
||||
movdqa XMMWORD PTR [end_ptr], xmm4
|
||||
movdqa XMMWORD PTR [end_ptr + 16], xmm5
|
||||
movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6
|
||||
movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7
|
||||
|
||||
lea ref_ptr, [ref_ptr+ref_stride*4]
|
||||
|
||||
sub height, 4
|
||||
cmp height, 4
|
||||
jge .block_copy_sse3_loopx4
|
||||
|
||||
;Check to see if there is more rows need to be copied.
|
||||
cmp height, 0
|
||||
je .copy_is_done
|
||||
|
||||
.block_copy_sse3_loop:
|
||||
movdqu xmm0, XMMWORD PTR [src_ptr]
|
||||
movdqu xmm1, XMMWORD PTR [src_ptr + 16]
|
||||
lea src_ptr, [src_ptr+src_stride]
|
||||
|
||||
movdqa XMMWORD PTR [ref_ptr], xmm0
|
||||
movdqa XMMWORD PTR [ref_ptr + 16], xmm1
|
||||
lea ref_ptr, [ref_ptr+ref_stride]
|
||||
|
||||
sub height, 1
|
||||
jne .block_copy_sse3_loop
|
||||
|
||||
.copy_is_done:
|
||||
STACK_FRAME_DESTROY_X3
|
|
@ -0,0 +1,258 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
|
||||
;void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q)
|
||||
global sym(vp8_dequantize_b_impl_mmx) PRIVATE
|
||||
sym(vp8_dequantize_b_impl_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 3
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;sq
|
||||
mov rdi, arg(1) ;dq
|
||||
mov rax, arg(2) ;q
|
||||
|
||||
movq mm1, [rsi]
|
||||
pmullw mm1, [rax+0] ; mm4 *= kernel 0 modifiers.
|
||||
movq [rdi], mm1
|
||||
|
||||
movq mm1, [rsi+8]
|
||||
pmullw mm1, [rax+8] ; mm4 *= kernel 0 modifiers.
|
||||
movq [rdi+8], mm1
|
||||
|
||||
movq mm1, [rsi+16]
|
||||
pmullw mm1, [rax+16] ; mm4 *= kernel 0 modifiers.
|
||||
movq [rdi+16], mm1
|
||||
|
||||
movq mm1, [rsi+24]
|
||||
pmullw mm1, [rax+24] ; mm4 *= kernel 0 modifiers.
|
||||
movq [rdi+24], mm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void dequant_idct_add_mmx(
|
||||
;short *input, 0
|
||||
;short *dq, 1
|
||||
;unsigned char *dest, 2
|
||||
;int stride) 3
|
||||
global sym(vp8_dequant_idct_add_mmx) PRIVATE
|
||||
sym(vp8_dequant_idct_add_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
GET_GOT rbx
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rax, arg(0) ;input
|
||||
mov rdx, arg(1) ;dq
|
||||
|
||||
|
||||
movq mm0, [rax ]
|
||||
pmullw mm0, [rdx]
|
||||
|
||||
movq mm1, [rax +8]
|
||||
pmullw mm1, [rdx +8]
|
||||
|
||||
movq mm2, [rax+16]
|
||||
pmullw mm2, [rdx+16]
|
||||
|
||||
movq mm3, [rax+24]
|
||||
pmullw mm3, [rdx+24]
|
||||
|
||||
mov rdx, arg(2) ;dest
|
||||
|
||||
pxor mm7, mm7
|
||||
|
||||
|
||||
movq [rax], mm7
|
||||
movq [rax+8], mm7
|
||||
|
||||
movq [rax+16],mm7
|
||||
movq [rax+24],mm7
|
||||
|
||||
|
||||
movsxd rdi, dword ptr arg(3) ;stride
|
||||
|
||||
psubw mm0, mm2 ; b1= 0-2
|
||||
paddw mm2, mm2 ;
|
||||
|
||||
movq mm5, mm1
|
||||
paddw mm2, mm0 ; a1 =0+2
|
||||
|
||||
pmulhw mm5, [GLOBAL(x_s1sqr2)];
|
||||
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
movq mm7, mm3 ;
|
||||
pmulhw mm7, [GLOBAL(x_c1sqr2less1)];
|
||||
|
||||
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw mm7, mm5 ; c1
|
||||
|
||||
movq mm5, mm1
|
||||
movq mm4, mm3
|
||||
|
||||
pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
|
||||
paddw mm5, mm1
|
||||
|
||||
pmulhw mm3, [GLOBAL(x_s1sqr2)]
|
||||
paddw mm3, mm4
|
||||
|
||||
paddw mm3, mm5 ; d1
|
||||
movq mm6, mm2 ; a1
|
||||
|
||||
movq mm4, mm0 ; b1
|
||||
paddw mm2, mm3 ;0
|
||||
|
||||
paddw mm4, mm7 ;1
|
||||
psubw mm0, mm7 ;2
|
||||
|
||||
psubw mm6, mm3 ;3
|
||||
|
||||
movq mm1, mm2 ; 03 02 01 00
|
||||
movq mm3, mm4 ; 23 22 21 20
|
||||
|
||||
punpcklwd mm1, mm0 ; 11 01 10 00
|
||||
punpckhwd mm2, mm0 ; 13 03 12 02
|
||||
|
||||
punpcklwd mm3, mm6 ; 31 21 30 20
|
||||
punpckhwd mm4, mm6 ; 33 23 32 22
|
||||
|
||||
movq mm0, mm1 ; 11 01 10 00
|
||||
movq mm5, mm2 ; 13 03 12 02
|
||||
|
||||
punpckldq mm0, mm3 ; 30 20 10 00
|
||||
punpckhdq mm1, mm3 ; 31 21 11 01
|
||||
|
||||
punpckldq mm2, mm4 ; 32 22 12 02
|
||||
punpckhdq mm5, mm4 ; 33 23 13 03
|
||||
|
||||
movq mm3, mm5 ; 33 23 13 03
|
||||
|
||||
psubw mm0, mm2 ; b1= 0-2
|
||||
paddw mm2, mm2 ;
|
||||
|
||||
movq mm5, mm1
|
||||
paddw mm2, mm0 ; a1 =0+2
|
||||
|
||||
pmulhw mm5, [GLOBAL(x_s1sqr2)];
|
||||
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
movq mm7, mm3 ;
|
||||
pmulhw mm7, [GLOBAL(x_c1sqr2less1)];
|
||||
|
||||
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw mm7, mm5 ; c1
|
||||
|
||||
movq mm5, mm1
|
||||
movq mm4, mm3
|
||||
|
||||
pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
|
||||
paddw mm5, mm1
|
||||
|
||||
pmulhw mm3, [GLOBAL(x_s1sqr2)]
|
||||
paddw mm3, mm4
|
||||
|
||||
paddw mm3, mm5 ; d1
|
||||
paddw mm0, [GLOBAL(fours)]
|
||||
|
||||
paddw mm2, [GLOBAL(fours)]
|
||||
movq mm6, mm2 ; a1
|
||||
|
||||
movq mm4, mm0 ; b1
|
||||
paddw mm2, mm3 ;0
|
||||
|
||||
paddw mm4, mm7 ;1
|
||||
psubw mm0, mm7 ;2
|
||||
|
||||
psubw mm6, mm3 ;3
|
||||
psraw mm2, 3
|
||||
|
||||
psraw mm0, 3
|
||||
psraw mm4, 3
|
||||
|
||||
psraw mm6, 3
|
||||
|
||||
movq mm1, mm2 ; 03 02 01 00
|
||||
movq mm3, mm4 ; 23 22 21 20
|
||||
|
||||
punpcklwd mm1, mm0 ; 11 01 10 00
|
||||
punpckhwd mm2, mm0 ; 13 03 12 02
|
||||
|
||||
punpcklwd mm3, mm6 ; 31 21 30 20
|
||||
punpckhwd mm4, mm6 ; 33 23 32 22
|
||||
|
||||
movq mm0, mm1 ; 11 01 10 00
|
||||
movq mm5, mm2 ; 13 03 12 02
|
||||
|
||||
punpckldq mm0, mm3 ; 30 20 10 00
|
||||
punpckhdq mm1, mm3 ; 31 21 11 01
|
||||
|
||||
punpckldq mm2, mm4 ; 32 22 12 02
|
||||
punpckhdq mm5, mm4 ; 33 23 13 03
|
||||
|
||||
pxor mm7, mm7
|
||||
|
||||
movd mm4, [rdx]
|
||||
punpcklbw mm4, mm7
|
||||
paddsw mm0, mm4
|
||||
packuswb mm0, mm7
|
||||
movd [rdx], mm0
|
||||
|
||||
movd mm4, [rdx+rdi]
|
||||
punpcklbw mm4, mm7
|
||||
paddsw mm1, mm4
|
||||
packuswb mm1, mm7
|
||||
movd [rdx+rdi], mm1
|
||||
|
||||
movd mm4, [rdx+2*rdi]
|
||||
punpcklbw mm4, mm7
|
||||
paddsw mm2, mm4
|
||||
packuswb mm2, mm7
|
||||
movd [rdx+rdi*2], mm2
|
||||
|
||||
add rdx, rdi
|
||||
|
||||
movd mm4, [rdx+2*rdi]
|
||||
punpcklbw mm4, mm7
|
||||
paddsw mm5, mm4
|
||||
packuswb mm5, mm7
|
||||
movd [rdx+rdi*2], mm5
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
x_s1sqr2:
|
||||
times 4 dw 0x8A8C
|
||||
align 16
|
||||
x_c1sqr2less1:
|
||||
times 4 dw 0x4E7B
|
||||
align 16
|
||||
fours:
|
||||
times 4 dw 0x0004
|
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vp8/common/x86/filter_x86.h"
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]) =
|
||||
{
|
||||
{ 128, 128, 128, 128, 0, 0, 0, 0 },
|
||||
{ 112, 112, 112, 112, 16, 16, 16, 16 },
|
||||
{ 96, 96, 96, 96, 32, 32, 32, 32 },
|
||||
{ 80, 80, 80, 80, 48, 48, 48, 48 },
|
||||
{ 64, 64, 64, 64, 64, 64, 64, 64 },
|
||||
{ 48, 48, 48, 48, 80, 80, 80, 80 },
|
||||
{ 32, 32, 32, 32, 96, 96, 96, 96 },
|
||||
{ 16, 16, 16, 16, 112, 112, 112, 112 }
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]) =
|
||||
{
|
||||
{ 128, 128, 128, 128, 128, 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||
{ 112, 112, 112, 112, 112, 112, 112, 112, 16, 16, 16, 16, 16, 16, 16, 16 },
|
||||
{ 96, 96, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32 },
|
||||
{ 80, 80, 80, 80, 80, 80, 80, 80, 48, 48, 48, 48, 48, 48, 48, 48 },
|
||||
{ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 },
|
||||
{ 48, 48, 48, 48, 48, 48, 48, 48, 80, 80, 80, 80, 80, 80, 80, 80 },
|
||||
{ 32, 32, 32, 32, 32, 32, 32, 32, 96, 96, 96, 96, 96, 96, 96, 96 },
|
||||
{ 16, 16, 16, 16, 16, 16, 16, 16, 112, 112, 112, 112, 112, 112, 112, 112 }
|
||||
};
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VP8_COMMON_X86_FILTER_X86_H_
|
||||
#define VP8_COMMON_X86_FILTER_X86_H_
|
||||
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* x86 assembly specific copy of vp8/common/filter.c:vp8_bilinear_filters with
|
||||
* duplicated values */
|
||||
|
||||
/* duplicated 4x */
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]);
|
||||
|
||||
/* duplicated 8x */
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP8_COMMON_X86_FILTER_X86_H_
|
|
@ -0,0 +1,128 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
extern void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q);
|
||||
|
||||
void vp8_dequantize_b_mmx(BLOCKD *d, short *DQC)
|
||||
{
|
||||
short *sq = (short *) d->qcoeff;
|
||||
short *dq = (short *) d->dqcoeff;
|
||||
|
||||
vp8_dequantize_b_impl_mmx(sq, dq, DQC);
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_y_block_mmx
|
||||
(short *q, short *dq,
|
||||
unsigned char *dst, int stride, char *eobs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_mmx (q, dq, dst, stride);
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[0]*dq[0], dst, stride, dst, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_mmx (q+16, dq, dst+4, stride);
|
||||
else if (eobs[1] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[16]*dq[0], dst+4, stride,
|
||||
dst+4, stride);
|
||||
memset(q + 16, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[2] > 1)
|
||||
vp8_dequant_idct_add_mmx (q+32, dq, dst+8, stride);
|
||||
else if (eobs[2] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[32]*dq[0], dst+8, stride,
|
||||
dst+8, stride);
|
||||
memset(q + 32, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[3] > 1)
|
||||
vp8_dequant_idct_add_mmx (q+48, dq, dst+12, stride);
|
||||
else if (eobs[3] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[48]*dq[0], dst+12, stride,
|
||||
dst+12, stride);
|
||||
memset(q + 48, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 64;
|
||||
dst += 4*stride;
|
||||
eobs += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_mmx
|
||||
(short *q, short *dq,
|
||||
unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_mmx (q, dq, dstu, stride);
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstu, stride, dstu, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_mmx (q+16, dq, dstu+4, stride);
|
||||
else if (eobs[1] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstu+4, stride,
|
||||
dstu+4, stride);
|
||||
memset(q + 16, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstu += 4*stride;
|
||||
eobs += 2;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_mmx (q, dq, dstv, stride);
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstv, stride, dstv, stride);
|
||||
memset(q, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_mmx (q+16, dq, dstv+4, stride);
|
||||
else if (eobs[1] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstv+4, stride,
|
||||
dstv+4, stride);
|
||||
memset(q + 16, 0, 2 * sizeof(q[0]));
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstv += 4*stride;
|
||||
eobs += 2;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,89 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
|
||||
void vp8_idct_dequant_0_2x_sse2
|
||||
(short *q, short *dq ,
|
||||
unsigned char *dst, int dst_stride);
|
||||
void vp8_idct_dequant_full_2x_sse2
|
||||
(short *q, short *dq ,
|
||||
unsigned char *dst, int dst_stride);
|
||||
|
||||
void vp8_dequant_idct_add_y_block_sse2
|
||||
(short *q, short *dq,
|
||||
unsigned char *dst, int stride, char *eobs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (((short *)(eobs))[0])
|
||||
{
|
||||
if (((short *)(eobs))[0] & 0xfefe)
|
||||
vp8_idct_dequant_full_2x_sse2 (q, dq, dst, stride);
|
||||
else
|
||||
vp8_idct_dequant_0_2x_sse2 (q, dq, dst, stride);
|
||||
}
|
||||
if (((short *)(eobs))[1])
|
||||
{
|
||||
if (((short *)(eobs))[1] & 0xfefe)
|
||||
vp8_idct_dequant_full_2x_sse2 (q+32, dq, dst+8, stride);
|
||||
else
|
||||
vp8_idct_dequant_0_2x_sse2 (q+32, dq, dst+8, stride);
|
||||
}
|
||||
q += 64;
|
||||
dst += stride*4;
|
||||
eobs += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_sse2
|
||||
(short *q, short *dq,
|
||||
unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
|
||||
{
|
||||
if (((short *)(eobs))[0])
|
||||
{
|
||||
if (((short *)(eobs))[0] & 0xfefe)
|
||||
vp8_idct_dequant_full_2x_sse2 (q, dq, dstu, stride);
|
||||
else
|
||||
vp8_idct_dequant_0_2x_sse2 (q, dq, dstu, stride);
|
||||
}
|
||||
q += 32;
|
||||
dstu += stride*4;
|
||||
|
||||
if (((short *)(eobs))[1])
|
||||
{
|
||||
if (((short *)(eobs))[1] & 0xfefe)
|
||||
vp8_idct_dequant_full_2x_sse2 (q, dq, dstu, stride);
|
||||
else
|
||||
vp8_idct_dequant_0_2x_sse2 (q, dq, dstu, stride);
|
||||
}
|
||||
q += 32;
|
||||
|
||||
if (((short *)(eobs))[2])
|
||||
{
|
||||
if (((short *)(eobs))[2] & 0xfefe)
|
||||
vp8_idct_dequant_full_2x_sse2 (q, dq, dstv, stride);
|
||||
else
|
||||
vp8_idct_dequant_0_2x_sse2 (q, dq, dstv, stride);
|
||||
}
|
||||
q += 32;
|
||||
dstv += stride*4;
|
||||
|
||||
if (((short *)(eobs))[3])
|
||||
{
|
||||
if (((short *)(eobs))[3] & 0xfefe)
|
||||
vp8_idct_dequant_full_2x_sse2 (q, dq, dstv, stride);
|
||||
else
|
||||
vp8_idct_dequant_0_2x_sse2 (q, dq, dstv, stride);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,295 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
; /****************************************************************************
|
||||
; * Notes:
|
||||
; *
|
||||
; * This implementation makes use of 16 bit fixed point version of two multiply
|
||||
; * constants:
|
||||
; * 1. sqrt(2) * cos (pi/8)
|
||||
; * 2. sqrt(2) * sin (pi/8)
|
||||
; * Because the first constant is bigger than 1, to maintain the same 16 bit
|
||||
; * fixed point precision as the second one, we use a trick of
|
||||
; * x * a = x + x*(a-1)
|
||||
; * so
|
||||
; * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
|
||||
; *
|
||||
; * For the second constant, because of the 16bit version is 35468, which
|
||||
; * is bigger than 32768, in signed 16 bit multiply, it becomes a negative
|
||||
; * number.
|
||||
; * (x * (unsigned)35468 >> 16) = x * (signed)35468 >> 16 + x
|
||||
; *
|
||||
; **************************************************************************/
|
||||
|
||||
|
||||
;void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred,
|
||||
;int pitch, unsigned char *dest,int stride)
|
||||
global sym(vp8_short_idct4x4llm_mmx) PRIVATE
|
||||
sym(vp8_short_idct4x4llm_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rax, arg(0) ;input
|
||||
mov rsi, arg(1) ;pred
|
||||
|
||||
movq mm0, [rax ]
|
||||
movq mm1, [rax+ 8]
|
||||
movq mm2, [rax+16]
|
||||
movq mm3, [rax+24]
|
||||
|
||||
%if 0
|
||||
pxor mm7, mm7
|
||||
movq [rax], mm7
|
||||
movq [rax+8], mm7
|
||||
movq [rax+16],mm7
|
||||
movq [rax+24],mm7
|
||||
%endif
|
||||
movsxd rax, dword ptr arg(2) ;pitch
|
||||
mov rdx, arg(3) ;dest
|
||||
movsxd rdi, dword ptr arg(4) ;stride
|
||||
|
||||
|
||||
psubw mm0, mm2 ; b1= 0-2
|
||||
paddw mm2, mm2 ;
|
||||
|
||||
movq mm5, mm1
|
||||
paddw mm2, mm0 ; a1 =0+2
|
||||
|
||||
pmulhw mm5, [GLOBAL(x_s1sqr2)];
|
||||
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
movq mm7, mm3 ;
|
||||
pmulhw mm7, [GLOBAL(x_c1sqr2less1)];
|
||||
|
||||
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw mm7, mm5 ; c1
|
||||
|
||||
movq mm5, mm1
|
||||
movq mm4, mm3
|
||||
|
||||
pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
|
||||
paddw mm5, mm1
|
||||
|
||||
pmulhw mm3, [GLOBAL(x_s1sqr2)]
|
||||
paddw mm3, mm4
|
||||
|
||||
paddw mm3, mm5 ; d1
|
||||
movq mm6, mm2 ; a1
|
||||
|
||||
movq mm4, mm0 ; b1
|
||||
paddw mm2, mm3 ;0
|
||||
|
||||
paddw mm4, mm7 ;1
|
||||
psubw mm0, mm7 ;2
|
||||
|
||||
psubw mm6, mm3 ;3
|
||||
|
||||
movq mm1, mm2 ; 03 02 01 00
|
||||
movq mm3, mm4 ; 23 22 21 20
|
||||
|
||||
punpcklwd mm1, mm0 ; 11 01 10 00
|
||||
punpckhwd mm2, mm0 ; 13 03 12 02
|
||||
|
||||
punpcklwd mm3, mm6 ; 31 21 30 20
|
||||
punpckhwd mm4, mm6 ; 33 23 32 22
|
||||
|
||||
movq mm0, mm1 ; 11 01 10 00
|
||||
movq mm5, mm2 ; 13 03 12 02
|
||||
|
||||
punpckldq mm0, mm3 ; 30 20 10 00
|
||||
punpckhdq mm1, mm3 ; 31 21 11 01
|
||||
|
||||
punpckldq mm2, mm4 ; 32 22 12 02
|
||||
punpckhdq mm5, mm4 ; 33 23 13 03
|
||||
|
||||
movq mm3, mm5 ; 33 23 13 03
|
||||
|
||||
psubw mm0, mm2 ; b1= 0-2
|
||||
paddw mm2, mm2 ;
|
||||
|
||||
movq mm5, mm1
|
||||
paddw mm2, mm0 ; a1 =0+2
|
||||
|
||||
pmulhw mm5, [GLOBAL(x_s1sqr2)];
|
||||
paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
movq mm7, mm3 ;
|
||||
pmulhw mm7, [GLOBAL(x_c1sqr2less1)];
|
||||
|
||||
paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw mm7, mm5 ; c1
|
||||
|
||||
movq mm5, mm1
|
||||
movq mm4, mm3
|
||||
|
||||
pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
|
||||
paddw mm5, mm1
|
||||
|
||||
pmulhw mm3, [GLOBAL(x_s1sqr2)]
|
||||
paddw mm3, mm4
|
||||
|
||||
paddw mm3, mm5 ; d1
|
||||
paddw mm0, [GLOBAL(fours)]
|
||||
|
||||
paddw mm2, [GLOBAL(fours)]
|
||||
movq mm6, mm2 ; a1
|
||||
|
||||
movq mm4, mm0 ; b1
|
||||
paddw mm2, mm3 ;0
|
||||
|
||||
paddw mm4, mm7 ;1
|
||||
psubw mm0, mm7 ;2
|
||||
|
||||
psubw mm6, mm3 ;3
|
||||
psraw mm2, 3
|
||||
|
||||
psraw mm0, 3
|
||||
psraw mm4, 3
|
||||
|
||||
psraw mm6, 3
|
||||
|
||||
movq mm1, mm2 ; 03 02 01 00
|
||||
movq mm3, mm4 ; 23 22 21 20
|
||||
|
||||
punpcklwd mm1, mm0 ; 11 01 10 00
|
||||
punpckhwd mm2, mm0 ; 13 03 12 02
|
||||
|
||||
punpcklwd mm3, mm6 ; 31 21 30 20
|
||||
punpckhwd mm4, mm6 ; 33 23 32 22
|
||||
|
||||
movq mm0, mm1 ; 11 01 10 00
|
||||
movq mm5, mm2 ; 13 03 12 02
|
||||
|
||||
punpckldq mm0, mm3 ; 30 20 10 00
|
||||
punpckhdq mm1, mm3 ; 31 21 11 01
|
||||
|
||||
punpckldq mm2, mm4 ; 32 22 12 02
|
||||
punpckhdq mm5, mm4 ; 33 23 13 03
|
||||
|
||||
pxor mm7, mm7
|
||||
|
||||
movd mm4, [rsi]
|
||||
punpcklbw mm4, mm7
|
||||
paddsw mm0, mm4
|
||||
packuswb mm0, mm7
|
||||
movd [rdx], mm0
|
||||
|
||||
movd mm4, [rsi+rax]
|
||||
punpcklbw mm4, mm7
|
||||
paddsw mm1, mm4
|
||||
packuswb mm1, mm7
|
||||
movd [rdx+rdi], mm1
|
||||
|
||||
movd mm4, [rsi+2*rax]
|
||||
punpcklbw mm4, mm7
|
||||
paddsw mm2, mm4
|
||||
packuswb mm2, mm7
|
||||
movd [rdx+rdi*2], mm2
|
||||
|
||||
add rdx, rdi
|
||||
add rsi, rax
|
||||
|
||||
movd mm4, [rsi+2*rax]
|
||||
punpcklbw mm4, mm7
|
||||
paddsw mm5, mm4
|
||||
packuswb mm5, mm7
|
||||
movd [rdx+rdi*2], mm5
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void vp8_dc_only_idct_add_mmx(
|
||||
;short input_dc,
|
||||
;unsigned char *pred_ptr,
|
||||
;int pred_stride,
|
||||
;unsigned char *dst_ptr,
|
||||
;int stride)
|
||||
global sym(vp8_dc_only_idct_add_mmx) PRIVATE
|
||||
sym(vp8_dc_only_idct_add_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
GET_GOT rbx
|
||||
; end prolog
|
||||
|
||||
movd mm5, arg(0) ;input_dc
|
||||
mov rax, arg(1) ;pred_ptr
|
||||
movsxd rdx, dword ptr arg(2) ;pred_stride
|
||||
|
||||
pxor mm0, mm0
|
||||
|
||||
paddw mm5, [GLOBAL(fours)]
|
||||
lea rcx, [rdx + rdx*2]
|
||||
|
||||
psraw mm5, 3
|
||||
|
||||
punpcklwd mm5, mm5
|
||||
|
||||
punpckldq mm5, mm5
|
||||
|
||||
movd mm1, [rax]
|
||||
movd mm2, [rax+rdx]
|
||||
movd mm3, [rax+2*rdx]
|
||||
movd mm4, [rax+rcx]
|
||||
|
||||
mov rax, arg(3) ;d -- destination
|
||||
movsxd rdx, dword ptr arg(4) ;dst_stride
|
||||
|
||||
punpcklbw mm1, mm0
|
||||
paddsw mm1, mm5
|
||||
packuswb mm1, mm0 ; pack and unpack to saturate
|
||||
lea rcx, [rdx + rdx*2]
|
||||
|
||||
punpcklbw mm2, mm0
|
||||
paddsw mm2, mm5
|
||||
packuswb mm2, mm0 ; pack and unpack to saturate
|
||||
|
||||
punpcklbw mm3, mm0
|
||||
paddsw mm3, mm5
|
||||
packuswb mm3, mm0 ; pack and unpack to saturate
|
||||
|
||||
punpcklbw mm4, mm0
|
||||
paddsw mm4, mm5
|
||||
packuswb mm4, mm0 ; pack and unpack to saturate
|
||||
|
||||
movd [rax], mm1
|
||||
movd [rax+rdx], mm2
|
||||
movd [rax+2*rdx], mm3
|
||||
movd [rax+rcx], mm4
|
||||
|
||||
; begin epilog
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
x_s1sqr2:
|
||||
times 4 dw 0x8A8C
|
||||
align 16
|
||||
x_c1sqr2less1:
|
||||
times 4 dw 0x4E7B
|
||||
align 16
|
||||
fours:
|
||||
times 4 dw 0x0004
|
|
@ -0,0 +1,708 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
;void vp8_idct_dequant_0_2x_sse2
|
||||
; (
|
||||
; short *qcoeff - 0
|
||||
; short *dequant - 1
|
||||
; unsigned char *dst - 2
|
||||
; int dst_stride - 3
|
||||
; )
|
||||
|
||||
global sym(vp8_idct_dequant_0_2x_sse2) PRIVATE
|
||||
sym(vp8_idct_dequant_0_2x_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
GET_GOT rbx
|
||||
; end prolog
|
||||
|
||||
mov rdx, arg(1) ; dequant
|
||||
mov rax, arg(0) ; qcoeff
|
||||
|
||||
movd xmm4, [rax]
|
||||
movd xmm5, [rdx]
|
||||
|
||||
pinsrw xmm4, [rax+32], 4
|
||||
pinsrw xmm5, [rdx], 4
|
||||
|
||||
pmullw xmm4, xmm5
|
||||
|
||||
; Zero out xmm5, for use unpacking
|
||||
pxor xmm5, xmm5
|
||||
|
||||
; clear coeffs
|
||||
movd [rax], xmm5
|
||||
movd [rax+32], xmm5
|
||||
;pshufb
|
||||
mov rax, arg(2) ; dst
|
||||
movsxd rdx, dword ptr arg(3) ; dst_stride
|
||||
|
||||
pshuflw xmm4, xmm4, 00000000b
|
||||
pshufhw xmm4, xmm4, 00000000b
|
||||
|
||||
lea rcx, [rdx + rdx*2]
|
||||
paddw xmm4, [GLOBAL(fours)]
|
||||
|
||||
psraw xmm4, 3
|
||||
|
||||
movq xmm0, [rax]
|
||||
movq xmm1, [rax+rdx]
|
||||
movq xmm2, [rax+2*rdx]
|
||||
movq xmm3, [rax+rcx]
|
||||
|
||||
punpcklbw xmm0, xmm5
|
||||
punpcklbw xmm1, xmm5
|
||||
punpcklbw xmm2, xmm5
|
||||
punpcklbw xmm3, xmm5
|
||||
|
||||
|
||||
; Add to predict buffer
|
||||
paddw xmm0, xmm4
|
||||
paddw xmm1, xmm4
|
||||
paddw xmm2, xmm4
|
||||
paddw xmm3, xmm4
|
||||
|
||||
; pack up before storing
|
||||
packuswb xmm0, xmm5
|
||||
packuswb xmm1, xmm5
|
||||
packuswb xmm2, xmm5
|
||||
packuswb xmm3, xmm5
|
||||
|
||||
; store blocks back out
|
||||
movq [rax], xmm0
|
||||
movq [rax + rdx], xmm1
|
||||
|
||||
lea rax, [rax + 2*rdx]
|
||||
|
||||
movq [rax], xmm2
|
||||
movq [rax + rdx], xmm3
|
||||
|
||||
; begin epilog
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void vp8_idct_dequant_full_2x_sse2
|
||||
; (
|
||||
; short *qcoeff - 0
|
||||
; short *dequant - 1
|
||||
; unsigned char *dst - 2
|
||||
; int dst_stride - 3
|
||||
; )
|
||||
global sym(vp8_idct_dequant_full_2x_sse2) PRIVATE
|
||||
sym(vp8_idct_dequant_full_2x_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
; special case when 2 blocks have 0 or 1 coeffs
|
||||
; dc is set as first coeff, so no need to load qcoeff
|
||||
mov rax, arg(0) ; qcoeff
|
||||
mov rdx, arg(1) ; dequant
|
||||
mov rdi, arg(2) ; dst
|
||||
|
||||
|
||||
; Zero out xmm7, for use unpacking
|
||||
pxor xmm7, xmm7
|
||||
|
||||
|
||||
; note the transpose of xmm1 and xmm2, necessary for shuffle
|
||||
; to spit out sensicle data
|
||||
movdqa xmm0, [rax]
|
||||
movdqa xmm2, [rax+16]
|
||||
movdqa xmm1, [rax+32]
|
||||
movdqa xmm3, [rax+48]
|
||||
|
||||
; Clear out coeffs
|
||||
movdqa [rax], xmm7
|
||||
movdqa [rax+16], xmm7
|
||||
movdqa [rax+32], xmm7
|
||||
movdqa [rax+48], xmm7
|
||||
|
||||
; dequantize qcoeff buffer
|
||||
pmullw xmm0, [rdx]
|
||||
pmullw xmm2, [rdx+16]
|
||||
pmullw xmm1, [rdx]
|
||||
pmullw xmm3, [rdx+16]
|
||||
movsxd rdx, dword ptr arg(3) ; dst_stride
|
||||
|
||||
; repack so block 0 row x and block 1 row x are together
|
||||
movdqa xmm4, xmm0
|
||||
punpckldq xmm0, xmm1
|
||||
punpckhdq xmm4, xmm1
|
||||
|
||||
pshufd xmm0, xmm0, 11011000b
|
||||
pshufd xmm1, xmm4, 11011000b
|
||||
|
||||
movdqa xmm4, xmm2
|
||||
punpckldq xmm2, xmm3
|
||||
punpckhdq xmm4, xmm3
|
||||
|
||||
pshufd xmm2, xmm2, 11011000b
|
||||
pshufd xmm3, xmm4, 11011000b
|
||||
|
||||
; first pass
|
||||
psubw xmm0, xmm2 ; b1 = 0-2
|
||||
paddw xmm2, xmm2 ;
|
||||
|
||||
movdqa xmm5, xmm1
|
||||
paddw xmm2, xmm0 ; a1 = 0+2
|
||||
|
||||
pmulhw xmm5, [GLOBAL(x_s1sqr2)]
|
||||
lea rcx, [rdx + rdx*2] ;dst_stride * 3
|
||||
paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
movdqa xmm7, xmm3
|
||||
pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
|
||||
|
||||
paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw xmm7, xmm5 ; c1
|
||||
|
||||
movdqa xmm5, xmm1
|
||||
movdqa xmm4, xmm3
|
||||
|
||||
pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
|
||||
paddw xmm5, xmm1
|
||||
|
||||
pmulhw xmm3, [GLOBAL(x_s1sqr2)]
|
||||
paddw xmm3, xmm4
|
||||
|
||||
paddw xmm3, xmm5 ; d1
|
||||
movdqa xmm6, xmm2 ; a1
|
||||
|
||||
movdqa xmm4, xmm0 ; b1
|
||||
paddw xmm2, xmm3 ;0
|
||||
|
||||
paddw xmm4, xmm7 ;1
|
||||
psubw xmm0, xmm7 ;2
|
||||
|
||||
psubw xmm6, xmm3 ;3
|
||||
|
||||
; transpose for the second pass
|
||||
movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
|
||||
punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
|
||||
punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
|
||||
|
||||
movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
|
||||
punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
|
||||
punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
|
||||
|
||||
|
||||
movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
|
||||
punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
|
||||
punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
|
||||
|
||||
movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
|
||||
punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
|
||||
punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
|
||||
|
||||
|
||||
movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
|
||||
punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
|
||||
punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
|
||||
|
||||
movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
|
||||
punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
|
||||
punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
|
||||
|
||||
pshufd xmm0, xmm2, 11011000b
|
||||
pshufd xmm2, xmm1, 11011000b
|
||||
|
||||
pshufd xmm1, xmm5, 11011000b
|
||||
pshufd xmm3, xmm7, 11011000b
|
||||
|
||||
; second pass
|
||||
psubw xmm0, xmm2 ; b1 = 0-2
|
||||
paddw xmm2, xmm2
|
||||
|
||||
movdqa xmm5, xmm1
|
||||
paddw xmm2, xmm0 ; a1 = 0+2
|
||||
|
||||
pmulhw xmm5, [GLOBAL(x_s1sqr2)]
|
||||
paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
movdqa xmm7, xmm3
|
||||
pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
|
||||
|
||||
paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw xmm7, xmm5 ; c1
|
||||
|
||||
movdqa xmm5, xmm1
|
||||
movdqa xmm4, xmm3
|
||||
|
||||
pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
|
||||
paddw xmm5, xmm1
|
||||
|
||||
pmulhw xmm3, [GLOBAL(x_s1sqr2)]
|
||||
paddw xmm3, xmm4
|
||||
|
||||
paddw xmm3, xmm5 ; d1
|
||||
paddw xmm0, [GLOBAL(fours)]
|
||||
|
||||
paddw xmm2, [GLOBAL(fours)]
|
||||
movdqa xmm6, xmm2 ; a1
|
||||
|
||||
movdqa xmm4, xmm0 ; b1
|
||||
paddw xmm2, xmm3 ;0
|
||||
|
||||
paddw xmm4, xmm7 ;1
|
||||
psubw xmm0, xmm7 ;2
|
||||
|
||||
psubw xmm6, xmm3 ;3
|
||||
psraw xmm2, 3
|
||||
|
||||
psraw xmm0, 3
|
||||
psraw xmm4, 3
|
||||
|
||||
psraw xmm6, 3
|
||||
|
||||
; transpose to save
|
||||
movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
|
||||
punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
|
||||
punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
|
||||
|
||||
movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
|
||||
punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
|
||||
punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
|
||||
|
||||
|
||||
movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
|
||||
punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
|
||||
punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
|
||||
|
||||
movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
|
||||
punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
|
||||
punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
|
||||
|
||||
|
||||
movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
|
||||
punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
|
||||
punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
|
||||
|
||||
movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
|
||||
punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
|
||||
punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
|
||||
|
||||
pshufd xmm0, xmm2, 11011000b
|
||||
pshufd xmm2, xmm1, 11011000b
|
||||
|
||||
pshufd xmm1, xmm5, 11011000b
|
||||
pshufd xmm3, xmm7, 11011000b
|
||||
|
||||
pxor xmm7, xmm7
|
||||
|
||||
; Load up predict blocks
|
||||
movq xmm4, [rdi]
|
||||
movq xmm5, [rdi+rdx]
|
||||
|
||||
punpcklbw xmm4, xmm7
|
||||
punpcklbw xmm5, xmm7
|
||||
|
||||
paddw xmm0, xmm4
|
||||
paddw xmm1, xmm5
|
||||
|
||||
movq xmm4, [rdi+2*rdx]
|
||||
movq xmm5, [rdi+rcx]
|
||||
|
||||
punpcklbw xmm4, xmm7
|
||||
punpcklbw xmm5, xmm7
|
||||
|
||||
paddw xmm2, xmm4
|
||||
paddw xmm3, xmm5
|
||||
|
||||
.finish:
|
||||
|
||||
; pack up before storing
|
||||
packuswb xmm0, xmm7
|
||||
packuswb xmm1, xmm7
|
||||
packuswb xmm2, xmm7
|
||||
packuswb xmm3, xmm7
|
||||
|
||||
; store blocks back out
|
||||
movq [rdi], xmm0
|
||||
movq [rdi + rdx], xmm1
|
||||
movq [rdi + rdx*2], xmm2
|
||||
movq [rdi + rcx], xmm3
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void vp8_idct_dequant_dc_0_2x_sse2
|
||||
; (
|
||||
; short *qcoeff - 0
|
||||
; short *dequant - 1
|
||||
; unsigned char *dst - 2
|
||||
; int dst_stride - 3
|
||||
; short *dc - 4
|
||||
; )
|
||||
global sym(vp8_idct_dequant_dc_0_2x_sse2) PRIVATE
|
||||
sym(vp8_idct_dequant_dc_0_2x_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
GET_GOT rbx
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
; special case when 2 blocks have 0 or 1 coeffs
|
||||
; dc is set as first coeff, so no need to load qcoeff
|
||||
mov rax, arg(0) ; qcoeff
|
||||
|
||||
mov rdi, arg(2) ; dst
|
||||
mov rdx, arg(4) ; dc
|
||||
|
||||
; Zero out xmm5, for use unpacking
|
||||
pxor xmm5, xmm5
|
||||
|
||||
; load up 2 dc words here == 2*16 = doubleword
|
||||
movd xmm4, [rdx]
|
||||
|
||||
movsxd rdx, dword ptr arg(3) ; dst_stride
|
||||
lea rcx, [rdx + rdx*2]
|
||||
; Load up predict blocks
|
||||
movq xmm0, [rdi]
|
||||
movq xmm1, [rdi+rdx*1]
|
||||
movq xmm2, [rdi+rdx*2]
|
||||
movq xmm3, [rdi+rcx]
|
||||
|
||||
; Duplicate and expand dc across
|
||||
punpcklwd xmm4, xmm4
|
||||
punpckldq xmm4, xmm4
|
||||
|
||||
; Rounding to dequant and downshift
|
||||
paddw xmm4, [GLOBAL(fours)]
|
||||
psraw xmm4, 3
|
||||
|
||||
; Predict buffer needs to be expanded from bytes to words
|
||||
punpcklbw xmm0, xmm5
|
||||
punpcklbw xmm1, xmm5
|
||||
punpcklbw xmm2, xmm5
|
||||
punpcklbw xmm3, xmm5
|
||||
|
||||
; Add to predict buffer
|
||||
paddw xmm0, xmm4
|
||||
paddw xmm1, xmm4
|
||||
paddw xmm2, xmm4
|
||||
paddw xmm3, xmm4
|
||||
|
||||
; pack up before storing
|
||||
packuswb xmm0, xmm5
|
||||
packuswb xmm1, xmm5
|
||||
packuswb xmm2, xmm5
|
||||
packuswb xmm3, xmm5
|
||||
|
||||
; store blocks back out
|
||||
movq [rdi], xmm0
|
||||
movq [rdi + rdx], xmm1
|
||||
movq [rdi + rdx*2], xmm2
|
||||
movq [rdi + rcx], xmm3
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
;void vp8_idct_dequant_dc_full_2x_sse2
|
||||
; (
|
||||
; short *qcoeff - 0
|
||||
; short *dequant - 1
|
||||
; unsigned char *dst - 2
|
||||
; int dst_stride - 3
|
||||
; short *dc - 4
|
||||
; )
|
||||
global sym(vp8_idct_dequant_dc_full_2x_sse2) PRIVATE
|
||||
sym(vp8_idct_dequant_dc_full_2x_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
; special case when 2 blocks have 0 or 1 coeffs
|
||||
; dc is set as first coeff, so no need to load qcoeff
|
||||
mov rax, arg(0) ; qcoeff
|
||||
mov rdx, arg(1) ; dequant
|
||||
|
||||
mov rdi, arg(2) ; dst
|
||||
|
||||
; Zero out xmm7, for use unpacking
|
||||
pxor xmm7, xmm7
|
||||
|
||||
|
||||
; note the transpose of xmm1 and xmm2, necessary for shuffle
|
||||
; to spit out sensicle data
|
||||
movdqa xmm0, [rax]
|
||||
movdqa xmm2, [rax+16]
|
||||
movdqa xmm1, [rax+32]
|
||||
movdqa xmm3, [rax+48]
|
||||
|
||||
; Clear out coeffs
|
||||
movdqa [rax], xmm7
|
||||
movdqa [rax+16], xmm7
|
||||
movdqa [rax+32], xmm7
|
||||
movdqa [rax+48], xmm7
|
||||
|
||||
; dequantize qcoeff buffer
|
||||
pmullw xmm0, [rdx]
|
||||
pmullw xmm2, [rdx+16]
|
||||
pmullw xmm1, [rdx]
|
||||
pmullw xmm3, [rdx+16]
|
||||
|
||||
; DC component
|
||||
mov rdx, arg(4)
|
||||
|
||||
; repack so block 0 row x and block 1 row x are together
|
||||
movdqa xmm4, xmm0
|
||||
punpckldq xmm0, xmm1
|
||||
punpckhdq xmm4, xmm1
|
||||
|
||||
pshufd xmm0, xmm0, 11011000b
|
||||
pshufd xmm1, xmm4, 11011000b
|
||||
|
||||
movdqa xmm4, xmm2
|
||||
punpckldq xmm2, xmm3
|
||||
punpckhdq xmm4, xmm3
|
||||
|
||||
pshufd xmm2, xmm2, 11011000b
|
||||
pshufd xmm3, xmm4, 11011000b
|
||||
|
||||
; insert DC component
|
||||
pinsrw xmm0, [rdx], 0
|
||||
pinsrw xmm0, [rdx+2], 4
|
||||
|
||||
; first pass
|
||||
psubw xmm0, xmm2 ; b1 = 0-2
|
||||
paddw xmm2, xmm2 ;
|
||||
|
||||
movdqa xmm5, xmm1
|
||||
paddw xmm2, xmm0 ; a1 = 0+2
|
||||
|
||||
pmulhw xmm5, [GLOBAL(x_s1sqr2)]
|
||||
paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
movdqa xmm7, xmm3
|
||||
pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
|
||||
|
||||
paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw xmm7, xmm5 ; c1
|
||||
|
||||
movdqa xmm5, xmm1
|
||||
movdqa xmm4, xmm3
|
||||
|
||||
pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
|
||||
paddw xmm5, xmm1
|
||||
|
||||
pmulhw xmm3, [GLOBAL(x_s1sqr2)]
|
||||
paddw xmm3, xmm4
|
||||
|
||||
paddw xmm3, xmm5 ; d1
|
||||
movdqa xmm6, xmm2 ; a1
|
||||
|
||||
movdqa xmm4, xmm0 ; b1
|
||||
paddw xmm2, xmm3 ;0
|
||||
|
||||
paddw xmm4, xmm7 ;1
|
||||
psubw xmm0, xmm7 ;2
|
||||
|
||||
psubw xmm6, xmm3 ;3
|
||||
|
||||
; transpose for the second pass
|
||||
movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
|
||||
punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
|
||||
punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
|
||||
|
||||
movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
|
||||
punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
|
||||
punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
|
||||
|
||||
|
||||
movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
|
||||
punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
|
||||
punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
|
||||
|
||||
movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
|
||||
punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
|
||||
punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
|
||||
|
||||
|
||||
movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
|
||||
punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
|
||||
punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
|
||||
|
||||
movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
|
||||
punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
|
||||
punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
|
||||
|
||||
pshufd xmm0, xmm2, 11011000b
|
||||
pshufd xmm2, xmm1, 11011000b
|
||||
|
||||
pshufd xmm1, xmm5, 11011000b
|
||||
pshufd xmm3, xmm7, 11011000b
|
||||
|
||||
; second pass
|
||||
psubw xmm0, xmm2 ; b1 = 0-2
|
||||
paddw xmm2, xmm2
|
||||
|
||||
movdqa xmm5, xmm1
|
||||
paddw xmm2, xmm0 ; a1 = 0+2
|
||||
|
||||
pmulhw xmm5, [GLOBAL(x_s1sqr2)]
|
||||
paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
movdqa xmm7, xmm3
|
||||
pmulhw xmm7, [GLOBAL(x_c1sqr2less1)]
|
||||
|
||||
paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw xmm7, xmm5 ; c1
|
||||
|
||||
movdqa xmm5, xmm1
|
||||
movdqa xmm4, xmm3
|
||||
|
||||
pmulhw xmm5, [GLOBAL(x_c1sqr2less1)]
|
||||
paddw xmm5, xmm1
|
||||
|
||||
pmulhw xmm3, [GLOBAL(x_s1sqr2)]
|
||||
paddw xmm3, xmm4
|
||||
|
||||
paddw xmm3, xmm5 ; d1
|
||||
paddw xmm0, [GLOBAL(fours)]
|
||||
|
||||
paddw xmm2, [GLOBAL(fours)]
|
||||
movdqa xmm6, xmm2 ; a1
|
||||
|
||||
movdqa xmm4, xmm0 ; b1
|
||||
paddw xmm2, xmm3 ;0
|
||||
|
||||
paddw xmm4, xmm7 ;1
|
||||
psubw xmm0, xmm7 ;2
|
||||
|
||||
psubw xmm6, xmm3 ;3
|
||||
psraw xmm2, 3
|
||||
|
||||
psraw xmm0, 3
|
||||
psraw xmm4, 3
|
||||
|
||||
psraw xmm6, 3
|
||||
|
||||
; transpose to save
|
||||
movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
|
||||
punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
|
||||
punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
|
||||
|
||||
movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
|
||||
punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
|
||||
punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
|
||||
|
||||
|
||||
movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
|
||||
punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
|
||||
punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
|
||||
|
||||
movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
|
||||
punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
|
||||
punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
|
||||
|
||||
|
||||
movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
|
||||
punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
|
||||
punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
|
||||
|
||||
movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
|
||||
punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
|
||||
punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
|
||||
|
||||
pshufd xmm0, xmm2, 11011000b
|
||||
pshufd xmm2, xmm1, 11011000b
|
||||
|
||||
pshufd xmm1, xmm5, 11011000b
|
||||
pshufd xmm3, xmm7, 11011000b
|
||||
|
||||
pxor xmm7, xmm7
|
||||
|
||||
; Load up predict blocks
|
||||
movsxd rdx, dword ptr arg(3) ; dst_stride
|
||||
movq xmm4, [rdi]
|
||||
movq xmm5, [rdi+rdx]
|
||||
lea rcx, [rdx + rdx*2]
|
||||
|
||||
punpcklbw xmm4, xmm7
|
||||
punpcklbw xmm5, xmm7
|
||||
|
||||
paddw xmm0, xmm4
|
||||
paddw xmm1, xmm5
|
||||
|
||||
movq xmm4, [rdi+rdx*2]
|
||||
movq xmm5, [rdi+rcx]
|
||||
|
||||
punpcklbw xmm4, xmm7
|
||||
punpcklbw xmm5, xmm7
|
||||
|
||||
paddw xmm2, xmm4
|
||||
paddw xmm3, xmm5
|
||||
|
||||
.finish:
|
||||
|
||||
; pack up before storing
|
||||
packuswb xmm0, xmm7
|
||||
packuswb xmm1, xmm7
|
||||
packuswb xmm2, xmm7
|
||||
packuswb xmm3, xmm7
|
||||
|
||||
; Load destination stride before writing out,
|
||||
; doesn't need to persist
|
||||
movsxd rdx, dword ptr arg(3) ; dst_stride
|
||||
|
||||
; store blocks back out
|
||||
movq [rdi], xmm0
|
||||
movq [rdi + rdx], xmm1
|
||||
|
||||
lea rdi, [rdi + 2*rdx]
|
||||
|
||||
movq [rdi], xmm2
|
||||
movq [rdi + rdx], xmm3
|
||||
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
fours:
|
||||
times 8 dw 0x0004
|
||||
align 16
|
||||
x_s1sqr2:
|
||||
times 8 dw 0x8A8C
|
||||
align 16
|
||||
x_c1sqr2less1:
|
||||
times 8 dw 0x4E7B
|
|
@ -0,0 +1,140 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
;void vp8_short_inv_walsh4x4_mmx(short *input, short *output)
|
||||
global sym(vp8_short_inv_walsh4x4_mmx) PRIVATE
|
||||
sym(vp8_short_inv_walsh4x4_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 2
|
||||
; end prolog
|
||||
|
||||
mov rdx, arg(0)
|
||||
mov rax, 30003h
|
||||
|
||||
movq mm0, [rdx + 0] ;ip[0]
|
||||
movq mm1, [rdx + 8] ;ip[4]
|
||||
movq mm7, rax
|
||||
|
||||
movq mm2, [rdx + 16] ;ip[8]
|
||||
movq mm3, [rdx + 24] ;ip[12]
|
||||
punpcklwd mm7, mm7 ;0003000300030003h
|
||||
mov rdx, arg(1)
|
||||
|
||||
movq mm4, mm0
|
||||
movq mm5, mm1
|
||||
|
||||
paddw mm4, mm3 ;ip[0] + ip[12] aka al
|
||||
paddw mm5, mm2 ;ip[4] + ip[8] aka bl
|
||||
|
||||
movq mm6, mm4 ;temp al
|
||||
paddw mm4, mm5 ;al + bl
|
||||
psubw mm6, mm5 ;al - bl
|
||||
|
||||
psubw mm0, mm3 ;ip[0] - ip[12] aka d1
|
||||
psubw mm1, mm2 ;ip[4] - ip[8] aka c1
|
||||
|
||||
movq mm5, mm0 ;temp dl
|
||||
paddw mm0, mm1 ;dl + cl
|
||||
psubw mm5, mm1 ;dl - cl
|
||||
|
||||
; 03 02 01 00
|
||||
; 13 12 11 10
|
||||
; 23 22 21 20
|
||||
; 33 32 31 30
|
||||
|
||||
movq mm3, mm4 ; 03 02 01 00
|
||||
punpcklwd mm4, mm0 ; 11 01 10 00
|
||||
punpckhwd mm3, mm0 ; 13 03 12 02
|
||||
|
||||
movq mm1, mm6 ; 23 22 21 20
|
||||
punpcklwd mm6, mm5 ; 31 21 30 20
|
||||
punpckhwd mm1, mm5 ; 33 23 32 22
|
||||
|
||||
movq mm0, mm4 ; 11 01 10 00
|
||||
movq mm2, mm3 ; 13 03 12 02
|
||||
|
||||
punpckldq mm0, mm6 ; 30 20 10 00 aka ip[0]
|
||||
punpckhdq mm4, mm6 ; 31 21 11 01 aka ip[4]
|
||||
|
||||
punpckldq mm2, mm1 ; 32 22 12 02 aka ip[8]
|
||||
punpckhdq mm3, mm1 ; 33 23 13 03 aka ip[12]
|
||||
;~~~~~~~~~~~~~~~~~~~~~
|
||||
movq mm1, mm0
|
||||
movq mm5, mm4
|
||||
paddw mm1, mm3 ;ip[0] + ip[12] aka al
|
||||
paddw mm5, mm2 ;ip[4] + ip[8] aka bl
|
||||
|
||||
movq mm6, mm1 ;temp al
|
||||
paddw mm1, mm5 ;al + bl
|
||||
psubw mm6, mm5 ;al - bl
|
||||
paddw mm1, mm7
|
||||
paddw mm6, mm7
|
||||
psraw mm1, 3
|
||||
psraw mm6, 3
|
||||
|
||||
psubw mm0, mm3 ;ip[0] - ip[12] aka d1
|
||||
psubw mm4, mm2 ;ip[4] - ip[8] aka c1
|
||||
|
||||
movq mm5, mm0 ;temp dl
|
||||
paddw mm0, mm4 ;dl + cl
|
||||
psubw mm5, mm4 ;dl - cl
|
||||
paddw mm0, mm7
|
||||
paddw mm5, mm7
|
||||
psraw mm0, 3
|
||||
psraw mm5, 3
|
||||
;~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
movd eax, mm1
|
||||
movd ecx, mm0
|
||||
psrlq mm0, 32
|
||||
psrlq mm1, 32
|
||||
mov word ptr[rdx+32*0], ax
|
||||
mov word ptr[rdx+32*1], cx
|
||||
shr eax, 16
|
||||
shr ecx, 16
|
||||
mov word ptr[rdx+32*4], ax
|
||||
mov word ptr[rdx+32*5], cx
|
||||
movd eax, mm1
|
||||
movd ecx, mm0
|
||||
mov word ptr[rdx+32*8], ax
|
||||
mov word ptr[rdx+32*9], cx
|
||||
shr eax, 16
|
||||
shr ecx, 16
|
||||
mov word ptr[rdx+32*12], ax
|
||||
mov word ptr[rdx+32*13], cx
|
||||
|
||||
movd eax, mm6
|
||||
movd ecx, mm5
|
||||
psrlq mm5, 32
|
||||
psrlq mm6, 32
|
||||
mov word ptr[rdx+32*2], ax
|
||||
mov word ptr[rdx+32*3], cx
|
||||
shr eax, 16
|
||||
shr ecx, 16
|
||||
mov word ptr[rdx+32*6], ax
|
||||
mov word ptr[rdx+32*7], cx
|
||||
movd eax, mm6
|
||||
movd ecx, mm5
|
||||
mov word ptr[rdx+32*10], ax
|
||||
mov word ptr[rdx+32*11], cx
|
||||
shr eax, 16
|
||||
shr ecx, 16
|
||||
mov word ptr[rdx+32*14], ax
|
||||
mov word ptr[rdx+32*15], cx
|
||||
|
||||
; begin epilog
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
|
@ -0,0 +1,121 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
;void vp8_short_inv_walsh4x4_sse2(short *input, short *output)
|
||||
global sym(vp8_short_inv_walsh4x4_sse2) PRIVATE
|
||||
sym(vp8_short_inv_walsh4x4_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 2
|
||||
; end prolog
|
||||
|
||||
mov rcx, arg(0)
|
||||
mov rdx, arg(1)
|
||||
mov rax, 30003h
|
||||
|
||||
movdqa xmm0, [rcx + 0] ;ip[4] ip[0]
|
||||
movdqa xmm1, [rcx + 16] ;ip[12] ip[8]
|
||||
|
||||
|
||||
pshufd xmm2, xmm1, 4eh ;ip[8] ip[12]
|
||||
movdqa xmm3, xmm0 ;ip[4] ip[0]
|
||||
|
||||
paddw xmm0, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
|
||||
psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
|
||||
|
||||
movdqa xmm4, xmm0
|
||||
punpcklqdq xmm0, xmm3 ;d1 a1
|
||||
punpckhqdq xmm4, xmm3 ;c1 b1
|
||||
|
||||
movdqa xmm1, xmm4 ;c1 b1
|
||||
paddw xmm4, xmm0 ;dl+cl a1+b1 aka op[4] op[0]
|
||||
psubw xmm0, xmm1 ;d1-c1 a1-b1 aka op[12] op[8]
|
||||
|
||||
;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
; 13 12 11 10 03 02 01 00
|
||||
;
|
||||
; 33 32 31 30 23 22 21 20
|
||||
;
|
||||
movdqa xmm3, xmm4 ; 13 12 11 10 03 02 01 00
|
||||
punpcklwd xmm4, xmm0 ; 23 03 22 02 21 01 20 00
|
||||
punpckhwd xmm3, xmm0 ; 33 13 32 12 31 11 30 10
|
||||
movdqa xmm1, xmm4 ; 23 03 22 02 21 01 20 00
|
||||
punpcklwd xmm4, xmm3 ; 31 21 11 01 30 20 10 00
|
||||
punpckhwd xmm1, xmm3 ; 33 23 13 03 32 22 12 02
|
||||
;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
movd xmm0, eax
|
||||
pshufd xmm2, xmm1, 4eh ;ip[8] ip[12]
|
||||
movdqa xmm3, xmm4 ;ip[4] ip[0]
|
||||
|
||||
pshufd xmm0, xmm0, 0 ;03 03 03 03 03 03 03 03
|
||||
|
||||
paddw xmm4, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
|
||||
psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
|
||||
|
||||
movdqa xmm5, xmm4
|
||||
punpcklqdq xmm4, xmm3 ;d1 a1
|
||||
punpckhqdq xmm5, xmm3 ;c1 b1
|
||||
|
||||
movdqa xmm1, xmm5 ;c1 b1
|
||||
paddw xmm5, xmm4 ;dl+cl a1+b1 aka op[4] op[0]
|
||||
psubw xmm4, xmm1 ;d1-c1 a1-b1 aka op[12] op[8]
|
||||
|
||||
paddw xmm5, xmm0
|
||||
paddw xmm4, xmm0
|
||||
psraw xmm5, 3
|
||||
psraw xmm4, 3
|
||||
|
||||
movd eax, xmm5
|
||||
movd ecx, xmm4
|
||||
psrldq xmm5, 4
|
||||
psrldq xmm4, 4
|
||||
mov word ptr[rdx+32*0], ax
|
||||
mov word ptr[rdx+32*2], cx
|
||||
shr eax, 16
|
||||
shr ecx, 16
|
||||
mov word ptr[rdx+32*4], ax
|
||||
mov word ptr[rdx+32*6], cx
|
||||
movd eax, xmm5
|
||||
movd ecx, xmm4
|
||||
psrldq xmm5, 4
|
||||
psrldq xmm4, 4
|
||||
mov word ptr[rdx+32*8], ax
|
||||
mov word ptr[rdx+32*10], cx
|
||||
shr eax, 16
|
||||
shr ecx, 16
|
||||
mov word ptr[rdx+32*12], ax
|
||||
mov word ptr[rdx+32*14], cx
|
||||
|
||||
movd eax, xmm5
|
||||
movd ecx, xmm4
|
||||
psrldq xmm5, 4
|
||||
psrldq xmm4, 4
|
||||
mov word ptr[rdx+32*1], ax
|
||||
mov word ptr[rdx+32*3], cx
|
||||
shr eax, 16
|
||||
shr ecx, 16
|
||||
mov word ptr[rdx+32*5], ax
|
||||
mov word ptr[rdx+32*7], cx
|
||||
movd eax, xmm5
|
||||
movd ecx, xmm4
|
||||
mov word ptr[rdx+32*9], ax
|
||||
mov word ptr[rdx+32*11], cx
|
||||
shr eax, 16
|
||||
shr ecx, 16
|
||||
mov word ptr[rdx+32*13], ax
|
||||
mov word ptr[rdx+32*15], cx
|
||||
|
||||
; begin epilog
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
|
@ -0,0 +1,815 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
%macro LF_ABS 2
|
||||
; %1 value not preserved
|
||||
; %2 value preserved
|
||||
; output in %1
|
||||
movdqa scratch1, %2 ; v2
|
||||
|
||||
psubusb scratch1, %1 ; v2 - v1
|
||||
psubusb %1, %2 ; v1 - v2
|
||||
por %1, scratch1 ; abs(v2 - v1)
|
||||
%endmacro
|
||||
|
||||
%macro LF_FILTER_HEV_MASK 8-9
|
||||
|
||||
LF_ABS %1, %2 ; abs(p3 - p2)
|
||||
LF_ABS %2, %3 ; abs(p2 - p1)
|
||||
pmaxub %1, %2 ; accumulate mask
|
||||
%if %0 == 8
|
||||
movdqa scratch2, %3 ; save p1
|
||||
LF_ABS scratch2, %4 ; abs(p1 - p0)
|
||||
%endif
|
||||
LF_ABS %4, %5 ; abs(p0 - q0)
|
||||
LF_ABS %5, %6 ; abs(q0 - q1)
|
||||
%if %0 == 8
|
||||
pmaxub %5, scratch2 ; accumulate hev
|
||||
%else
|
||||
pmaxub %5, %9
|
||||
%endif
|
||||
pmaxub %1, %5 ; accumulate mask
|
||||
|
||||
LF_ABS %3, %6 ; abs(p1 - q1)
|
||||
LF_ABS %6, %7 ; abs(q1 - q2)
|
||||
pmaxub %1, %6 ; accumulate mask
|
||||
LF_ABS %7, %8 ; abs(q2 - q3)
|
||||
pmaxub %1, %7 ; accumulate mask
|
||||
|
||||
paddusb %4, %4 ; 2 * abs(p0 - q0)
|
||||
pand %3, [GLOBAL(tfe)]
|
||||
psrlw %3, 1 ; abs(p1 - q1) / 2
|
||||
paddusb %4, %3 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
|
||||
|
||||
psubusb %1, [limit]
|
||||
psubusb %4, [blimit]
|
||||
por %1, %4
|
||||
pcmpeqb %1, zero ; mask
|
||||
|
||||
psubusb %5, [thresh]
|
||||
pcmpeqb %5, zero ; ~hev
|
||||
%endmacro
|
||||
|
||||
%macro LF_FILTER 6
|
||||
; %1-%4: p1-q1
|
||||
; %5: mask
|
||||
; %6: hev
|
||||
|
||||
movdqa scratch2, %6 ; save hev
|
||||
|
||||
pxor %1, [GLOBAL(t80)] ; ps1
|
||||
pxor %4, [GLOBAL(t80)] ; qs1
|
||||
movdqa scratch1, %1
|
||||
psubsb scratch1, %4 ; signed_char_clamp(ps1 - qs1)
|
||||
pandn scratch2, scratch1 ; vp8_filter &= hev
|
||||
|
||||
pxor %2, [GLOBAL(t80)] ; ps0
|
||||
pxor %3, [GLOBAL(t80)] ; qs0
|
||||
movdqa scratch1, %3
|
||||
psubsb scratch1, %2 ; qs0 - ps0
|
||||
paddsb scratch2, scratch1 ; vp8_filter += (qs0 - ps0)
|
||||
paddsb scratch2, scratch1 ; vp8_filter += (qs0 - ps0)
|
||||
paddsb scratch2, scratch1 ; vp8_filter += (qs0 - ps0)
|
||||
pand %5, scratch2 ; &= mask
|
||||
|
||||
movdqa scratch2, %5
|
||||
paddsb %5, [GLOBAL(t4)] ; Filter1
|
||||
paddsb scratch2, [GLOBAL(t3)] ; Filter2
|
||||
|
||||
; Filter1 >> 3
|
||||
movdqa scratch1, zero
|
||||
pcmpgtb scratch1, %5
|
||||
psrlw %5, 3
|
||||
pand scratch1, [GLOBAL(te0)]
|
||||
pand %5, [GLOBAL(t1f)]
|
||||
por %5, scratch1
|
||||
|
||||
psubsb %3, %5 ; qs0 - Filter1
|
||||
pxor %3, [GLOBAL(t80)]
|
||||
|
||||
; Filter2 >> 3
|
||||
movdqa scratch1, zero
|
||||
pcmpgtb scratch1, scratch2
|
||||
psrlw scratch2, 3
|
||||
pand scratch1, [GLOBAL(te0)]
|
||||
pand scratch2, [GLOBAL(t1f)]
|
||||
por scratch2, scratch1
|
||||
|
||||
paddsb %2, scratch2 ; ps0 + Filter2
|
||||
pxor %2, [GLOBAL(t80)]
|
||||
|
||||
; outer tap adjustments
|
||||
paddsb %5, [GLOBAL(t1)]
|
||||
movdqa scratch1, zero
|
||||
pcmpgtb scratch1, %5
|
||||
psrlw %5, 1
|
||||
pand scratch1, [GLOBAL(t80)]
|
||||
pand %5, [GLOBAL(t7f)]
|
||||
por %5, scratch1
|
||||
pand %5, %6 ; vp8_filter &= ~hev
|
||||
|
||||
psubsb %4, %5 ; qs1 - vp8_filter
|
||||
pxor %4, [GLOBAL(t80)]
|
||||
|
||||
paddsb %1, %5 ; ps1 + vp8_filter
|
||||
pxor %1, [GLOBAL(t80)]
|
||||
%endmacro
|
||||
|
||||
;void vp8_loop_filter_bh_y_sse2
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh
|
||||
;)
|
||||
global sym(vp8_loop_filter_bh_y_sse2) PRIVATE
|
||||
sym(vp8_loop_filter_bh_y_sse2):
|
||||
|
||||
%if LIBVPX_YASM_WIN64
|
||||
%define src rcx ; src_ptr
|
||||
%define stride rdx ; src_pixel_step
|
||||
%define blimit r8
|
||||
%define limit r9
|
||||
%define thresh r10
|
||||
|
||||
%define spp rax
|
||||
%define stride3 r11
|
||||
%define stride5 r12
|
||||
%define stride7 r13
|
||||
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SAVE_XMM 11
|
||||
push r12
|
||||
push r13
|
||||
mov thresh, arg(4)
|
||||
%else
|
||||
%define src rdi ; src_ptr
|
||||
%define stride rsi ; src_pixel_step
|
||||
%define blimit rdx
|
||||
%define limit rcx
|
||||
%define thresh r8
|
||||
|
||||
%define spp rax
|
||||
%define stride3 r9
|
||||
%define stride5 r10
|
||||
%define stride7 r11
|
||||
%endif
|
||||
|
||||
%define scratch1 xmm5
|
||||
%define scratch2 xmm6
|
||||
%define zero xmm7
|
||||
|
||||
%define i0 [src]
|
||||
%define i1 [spp]
|
||||
%define i2 [src + 2 * stride]
|
||||
%define i3 [spp + 2 * stride]
|
||||
%define i4 [src + 4 * stride]
|
||||
%define i5 [spp + 4 * stride]
|
||||
%define i6 [src + 2 * stride3]
|
||||
%define i7 [spp + 2 * stride3]
|
||||
%define i8 [src + 8 * stride]
|
||||
%define i9 [spp + 8 * stride]
|
||||
%define i10 [src + 2 * stride5]
|
||||
%define i11 [spp + 2 * stride5]
|
||||
%define i12 [src + 4 * stride3]
|
||||
%define i13 [spp + 4 * stride3]
|
||||
%define i14 [src + 2 * stride7]
|
||||
%define i15 [spp + 2 * stride7]
|
||||
|
||||
; prep work
|
||||
lea spp, [src + stride]
|
||||
lea stride3, [stride + 2 * stride]
|
||||
lea stride5, [stride3 + 2 * stride]
|
||||
lea stride7, [stride3 + 4 * stride]
|
||||
pxor zero, zero
|
||||
|
||||
; load the first set into registers
|
||||
movdqa xmm0, i0
|
||||
movdqa xmm1, i1
|
||||
movdqa xmm2, i2
|
||||
movdqa xmm3, i3
|
||||
movdqa xmm4, i4
|
||||
movdqa xmm8, i5
|
||||
movdqa xmm9, i6 ; q2, will contain abs(p1-p0)
|
||||
movdqa xmm10, i7
|
||||
LF_FILTER_HEV_MASK xmm0, xmm1, xmm2, xmm3, xmm4, xmm8, xmm9, xmm10
|
||||
|
||||
movdqa xmm1, i2
|
||||
movdqa xmm2, i3
|
||||
movdqa xmm3, i4
|
||||
movdqa xmm8, i5
|
||||
LF_FILTER xmm1, xmm2, xmm3, xmm8, xmm0, xmm4
|
||||
movdqa i2, xmm1
|
||||
movdqa i3, xmm2
|
||||
|
||||
; second set
|
||||
movdqa i4, xmm3
|
||||
movdqa i5, xmm8
|
||||
|
||||
movdqa xmm0, i6
|
||||
movdqa xmm1, i7
|
||||
movdqa xmm2, i8
|
||||
movdqa xmm4, i9
|
||||
movdqa xmm10, i10 ; q2, will contain abs(p1-p0)
|
||||
movdqa xmm11, i11
|
||||
LF_FILTER_HEV_MASK xmm3, xmm8, xmm0, xmm1, xmm2, xmm4, xmm10, xmm11, xmm9
|
||||
|
||||
movdqa xmm0, i6
|
||||
movdqa xmm1, i7
|
||||
movdqa xmm4, i8
|
||||
movdqa xmm8, i9
|
||||
LF_FILTER xmm0, xmm1, xmm4, xmm8, xmm3, xmm2
|
||||
movdqa i6, xmm0
|
||||
movdqa i7, xmm1
|
||||
|
||||
; last set
|
||||
movdqa i8, xmm4
|
||||
movdqa i9, xmm8
|
||||
|
||||
movdqa xmm0, i10
|
||||
movdqa xmm1, i11
|
||||
movdqa xmm2, i12
|
||||
movdqa xmm3, i13
|
||||
movdqa xmm9, i14 ; q2, will contain abs(p1-p0)
|
||||
movdqa xmm11, i15
|
||||
LF_FILTER_HEV_MASK xmm4, xmm8, xmm0, xmm1, xmm2, xmm3, xmm9, xmm11, xmm10
|
||||
|
||||
movdqa xmm0, i10
|
||||
movdqa xmm1, i11
|
||||
movdqa xmm3, i12
|
||||
movdqa xmm8, i13
|
||||
LF_FILTER xmm0, xmm1, xmm3, xmm8, xmm4, xmm2
|
||||
movdqa i10, xmm0
|
||||
movdqa i11, xmm1
|
||||
movdqa i12, xmm3
|
||||
movdqa i13, xmm8
|
||||
|
||||
%if LIBVPX_YASM_WIN64
|
||||
pop r13
|
||||
pop r12
|
||||
RESTORE_XMM
|
||||
pop rbp
|
||||
%endif
|
||||
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_loop_filter_bv_y_sse2
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh
|
||||
;)
|
||||
|
||||
global sym(vp8_loop_filter_bv_y_sse2) PRIVATE
|
||||
sym(vp8_loop_filter_bv_y_sse2):
|
||||
|
||||
%if LIBVPX_YASM_WIN64
|
||||
%define src rcx ; src_ptr
|
||||
%define stride rdx ; src_pixel_step
|
||||
%define blimit r8
|
||||
%define limit r9
|
||||
%define thresh r10
|
||||
|
||||
%define spp rax
|
||||
%define stride3 r11
|
||||
%define stride5 r12
|
||||
%define stride7 r13
|
||||
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SAVE_XMM 15
|
||||
push r12
|
||||
push r13
|
||||
mov thresh, arg(4)
|
||||
%else
|
||||
%define src rdi
|
||||
%define stride rsi
|
||||
%define blimit rdx
|
||||
%define limit rcx
|
||||
%define thresh r8
|
||||
|
||||
%define spp rax
|
||||
%define stride3 r9
|
||||
%define stride5 r10
|
||||
%define stride7 r11
|
||||
%endif
|
||||
|
||||
%define scratch1 xmm5
|
||||
%define scratch2 xmm6
|
||||
%define zero xmm7
|
||||
|
||||
%define s0 [src]
|
||||
%define s1 [spp]
|
||||
%define s2 [src + 2 * stride]
|
||||
%define s3 [spp + 2 * stride]
|
||||
%define s4 [src + 4 * stride]
|
||||
%define s5 [spp + 4 * stride]
|
||||
%define s6 [src + 2 * stride3]
|
||||
%define s7 [spp + 2 * stride3]
|
||||
%define s8 [src + 8 * stride]
|
||||
%define s9 [spp + 8 * stride]
|
||||
%define s10 [src + 2 * stride5]
|
||||
%define s11 [spp + 2 * stride5]
|
||||
%define s12 [src + 4 * stride3]
|
||||
%define s13 [spp + 4 * stride3]
|
||||
%define s14 [src + 2 * stride7]
|
||||
%define s15 [spp + 2 * stride7]
|
||||
|
||||
%define i0 [rsp]
|
||||
%define i1 [rsp + 16]
|
||||
%define i2 [rsp + 32]
|
||||
%define i3 [rsp + 48]
|
||||
%define i4 [rsp + 64]
|
||||
%define i5 [rsp + 80]
|
||||
%define i6 [rsp + 96]
|
||||
%define i7 [rsp + 112]
|
||||
%define i8 [rsp + 128]
|
||||
%define i9 [rsp + 144]
|
||||
%define i10 [rsp + 160]
|
||||
%define i11 [rsp + 176]
|
||||
%define i12 [rsp + 192]
|
||||
%define i13 [rsp + 208]
|
||||
%define i14 [rsp + 224]
|
||||
%define i15 [rsp + 240]
|
||||
|
||||
ALIGN_STACK 16, rax
|
||||
|
||||
; reserve stack space
|
||||
%define temp_storage 0 ; size is 256 (16*16)
|
||||
%define stack_size 256
|
||||
sub rsp, stack_size
|
||||
|
||||
; prep work
|
||||
lea spp, [src + stride]
|
||||
lea stride3, [stride + 2 * stride]
|
||||
lea stride5, [stride3 + 2 * stride]
|
||||
lea stride7, [stride3 + 4 * stride]
|
||||
|
||||
; 8-f
|
||||
movdqa xmm0, s8
|
||||
movdqa xmm1, xmm0
|
||||
punpcklbw xmm0, s9 ; 80 90
|
||||
punpckhbw xmm1, s9 ; 88 98
|
||||
|
||||
movdqa xmm2, s10
|
||||
movdqa xmm3, xmm2
|
||||
punpcklbw xmm2, s11 ; a0 b0
|
||||
punpckhbw xmm3, s11 ; a8 b8
|
||||
|
||||
movdqa xmm4, xmm0
|
||||
punpcklwd xmm0, xmm2 ; 80 90 a0 b0
|
||||
punpckhwd xmm4, xmm2 ; 84 94 a4 b4
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
punpcklwd xmm1, xmm3 ; 88 98 a8 b8
|
||||
punpckhwd xmm2, xmm3 ; 8c 9c ac bc
|
||||
|
||||
; using xmm[0124]
|
||||
; work on next 4 rows
|
||||
|
||||
movdqa xmm3, s12
|
||||
movdqa xmm5, xmm3
|
||||
punpcklbw xmm3, s13 ; c0 d0
|
||||
punpckhbw xmm5, s13 ; c8 d8
|
||||
|
||||
movdqa xmm6, s14
|
||||
movdqa xmm7, xmm6
|
||||
punpcklbw xmm6, s15 ; e0 f0
|
||||
punpckhbw xmm7, s15 ; e8 f8
|
||||
|
||||
movdqa xmm8, xmm3
|
||||
punpcklwd xmm3, xmm6 ; c0 d0 e0 f0
|
||||
punpckhwd xmm8, xmm6 ; c4 d4 e4 f4
|
||||
|
||||
movdqa xmm6, xmm5
|
||||
punpcklwd xmm5, xmm7 ; c8 d8 e8 f8
|
||||
punpckhwd xmm6, xmm7 ; cc dc ec fc
|
||||
|
||||
; pull the third and fourth sets together
|
||||
|
||||
movdqa xmm7, xmm0
|
||||
punpckldq xmm0, xmm3 ; 80 90 a0 b0 c0 d0 e0 f0
|
||||
punpckhdq xmm7, xmm3 ; 82 92 a2 b2 c2 d2 e2 f2
|
||||
|
||||
movdqa xmm3, xmm4
|
||||
punpckldq xmm4, xmm8 ; 84 94 a4 b4 c4 d4 e4 f4
|
||||
punpckhdq xmm3, xmm8 ; 86 96 a6 b6 c6 d6 e6 f6
|
||||
|
||||
movdqa xmm8, xmm1
|
||||
punpckldq xmm1, xmm5 ; 88 88 a8 b8 c8 d8 e8 f8
|
||||
punpckhdq xmm8, xmm5 ; 8a 9a aa ba ca da ea fa
|
||||
|
||||
movdqa xmm5, xmm2
|
||||
punpckldq xmm2, xmm6 ; 8c 9c ac bc cc dc ec fc
|
||||
punpckhdq xmm5, xmm6 ; 8e 9e ae be ce de ee fe
|
||||
|
||||
; save the calculations. we only have 15 registers ...
|
||||
movdqa i0, xmm0
|
||||
movdqa i1, xmm7
|
||||
movdqa i2, xmm4
|
||||
movdqa i3, xmm3
|
||||
movdqa i4, xmm1
|
||||
movdqa i5, xmm8
|
||||
movdqa i6, xmm2
|
||||
movdqa i7, xmm5
|
||||
|
||||
; 0-7
|
||||
movdqa xmm0, s0
|
||||
movdqa xmm1, xmm0
|
||||
punpcklbw xmm0, s1 ; 00 10
|
||||
punpckhbw xmm1, s1 ; 08 18
|
||||
|
||||
movdqa xmm2, s2
|
||||
movdqa xmm3, xmm2
|
||||
punpcklbw xmm2, s3 ; 20 30
|
||||
punpckhbw xmm3, s3 ; 28 38
|
||||
|
||||
movdqa xmm4, xmm0
|
||||
punpcklwd xmm0, xmm2 ; 00 10 20 30
|
||||
punpckhwd xmm4, xmm2 ; 04 14 24 34
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
punpcklwd xmm1, xmm3 ; 08 18 28 38
|
||||
punpckhwd xmm2, xmm3 ; 0c 1c 2c 3c
|
||||
|
||||
; using xmm[0124]
|
||||
; work on next 4 rows
|
||||
|
||||
movdqa xmm3, s4
|
||||
movdqa xmm5, xmm3
|
||||
punpcklbw xmm3, s5 ; 40 50
|
||||
punpckhbw xmm5, s5 ; 48 58
|
||||
|
||||
movdqa xmm6, s6
|
||||
movdqa xmm7, xmm6
|
||||
punpcklbw xmm6, s7 ; 60 70
|
||||
punpckhbw xmm7, s7 ; 68 78
|
||||
|
||||
movdqa xmm8, xmm3
|
||||
punpcklwd xmm3, xmm6 ; 40 50 60 70
|
||||
punpckhwd xmm8, xmm6 ; 44 54 64 74
|
||||
|
||||
movdqa xmm6, xmm5
|
||||
punpcklwd xmm5, xmm7 ; 48 58 68 78
|
||||
punpckhwd xmm6, xmm7 ; 4c 5c 6c 7c
|
||||
|
||||
; pull the first two sets together
|
||||
|
||||
movdqa xmm7, xmm0
|
||||
punpckldq xmm0, xmm3 ; 00 10 20 30 40 50 60 70
|
||||
punpckhdq xmm7, xmm3 ; 02 12 22 32 42 52 62 72
|
||||
|
||||
movdqa xmm3, xmm4
|
||||
punpckldq xmm4, xmm8 ; 04 14 24 34 44 54 64 74
|
||||
punpckhdq xmm3, xmm8 ; 06 16 26 36 46 56 66 76
|
||||
|
||||
movdqa xmm8, xmm1
|
||||
punpckldq xmm1, xmm5 ; 08 18 28 38 48 58 68 78
|
||||
punpckhdq xmm8, xmm5 ; 0a 1a 2a 3a 4a 5a 6a 7a
|
||||
|
||||
movdqa xmm5, xmm2
|
||||
punpckldq xmm2, xmm6 ; 0c 1c 2c 3c 4c 5c 6c 7c
|
||||
punpckhdq xmm5, xmm6 ; 0e 1e 2e 3e 4e 5e 6e 7e
|
||||
; final combination
|
||||
|
||||
movdqa xmm6, xmm0
|
||||
punpcklqdq xmm0, i0
|
||||
punpckhqdq xmm6, i0
|
||||
|
||||
movdqa xmm9, xmm7
|
||||
punpcklqdq xmm7, i1
|
||||
punpckhqdq xmm9, i1
|
||||
|
||||
movdqa xmm10, xmm4
|
||||
punpcklqdq xmm4, i2
|
||||
punpckhqdq xmm10, i2
|
||||
|
||||
movdqa xmm11, xmm3
|
||||
punpcklqdq xmm3, i3
|
||||
punpckhqdq xmm11, i3
|
||||
|
||||
movdqa xmm12, xmm1
|
||||
punpcklqdq xmm1, i4
|
||||
punpckhqdq xmm12, i4
|
||||
|
||||
movdqa xmm13, xmm8
|
||||
punpcklqdq xmm8, i5
|
||||
punpckhqdq xmm13, i5
|
||||
|
||||
movdqa xmm14, xmm2
|
||||
punpcklqdq xmm2, i6
|
||||
punpckhqdq xmm14, i6
|
||||
|
||||
movdqa xmm15, xmm5
|
||||
punpcklqdq xmm5, i7
|
||||
punpckhqdq xmm15, i7
|
||||
|
||||
movdqa i0, xmm0
|
||||
movdqa i1, xmm6
|
||||
movdqa i2, xmm7
|
||||
movdqa i3, xmm9
|
||||
movdqa i4, xmm4
|
||||
movdqa i5, xmm10
|
||||
movdqa i6, xmm3
|
||||
movdqa i7, xmm11
|
||||
movdqa i8, xmm1
|
||||
movdqa i9, xmm12
|
||||
movdqa i10, xmm8
|
||||
movdqa i11, xmm13
|
||||
movdqa i12, xmm2
|
||||
movdqa i13, xmm14
|
||||
movdqa i14, xmm5
|
||||
movdqa i15, xmm15
|
||||
|
||||
; TRANSPOSED DATA AVAILABLE ON THE STACK
|
||||
|
||||
movdqa xmm12, xmm6
|
||||
movdqa xmm13, xmm7
|
||||
|
||||
pxor zero, zero
|
||||
|
||||
LF_FILTER_HEV_MASK xmm0, xmm12, xmm13, xmm9, xmm4, xmm10, xmm3, xmm11
|
||||
|
||||
movdqa xmm1, i2
|
||||
movdqa xmm2, i3
|
||||
movdqa xmm8, i4
|
||||
movdqa xmm9, i5
|
||||
LF_FILTER xmm1, xmm2, xmm8, xmm9, xmm0, xmm4
|
||||
movdqa i2, xmm1
|
||||
movdqa i3, xmm2
|
||||
|
||||
; second set
|
||||
movdqa i4, xmm8
|
||||
movdqa i5, xmm9
|
||||
|
||||
movdqa xmm0, i6
|
||||
movdqa xmm1, i7
|
||||
movdqa xmm2, i8
|
||||
movdqa xmm4, i9
|
||||
movdqa xmm10, i10 ; q2, will contain abs(p1-p0)
|
||||
movdqa xmm11, i11
|
||||
LF_FILTER_HEV_MASK xmm8, xmm9, xmm0, xmm1, xmm2, xmm4, xmm10, xmm11, xmm3
|
||||
|
||||
movdqa xmm0, i6
|
||||
movdqa xmm1, i7
|
||||
movdqa xmm3, i8
|
||||
movdqa xmm4, i9
|
||||
LF_FILTER xmm0, xmm1, xmm3, xmm4, xmm8, xmm2
|
||||
movdqa i6, xmm0
|
||||
movdqa i7, xmm1
|
||||
|
||||
; last set
|
||||
movdqa i8, xmm3
|
||||
movdqa i9, xmm4
|
||||
|
||||
movdqa xmm0, i10
|
||||
movdqa xmm1, i11
|
||||
movdqa xmm2, i12
|
||||
movdqa xmm8, i13
|
||||
movdqa xmm9, i14 ; q2, will contain abs(p1-p0)
|
||||
movdqa xmm11, i15
|
||||
LF_FILTER_HEV_MASK xmm3, xmm4, xmm0, xmm1, xmm2, xmm8, xmm9, xmm11, xmm10
|
||||
|
||||
movdqa xmm0, i10
|
||||
movdqa xmm1, i11
|
||||
movdqa xmm4, i12
|
||||
movdqa xmm8, i13
|
||||
LF_FILTER xmm0, xmm1, xmm4, xmm8, xmm3, xmm2
|
||||
movdqa i10, xmm0
|
||||
movdqa i11, xmm1
|
||||
movdqa i12, xmm4
|
||||
movdqa i13, xmm8
|
||||
|
||||
|
||||
; RESHUFFLE AND WRITE OUT
|
||||
; 8-f
|
||||
movdqa xmm0, i8
|
||||
movdqa xmm1, xmm0
|
||||
punpcklbw xmm0, i9 ; 80 90
|
||||
punpckhbw xmm1, i9 ; 88 98
|
||||
|
||||
movdqa xmm2, i10
|
||||
movdqa xmm3, xmm2
|
||||
punpcklbw xmm2, i11 ; a0 b0
|
||||
punpckhbw xmm3, i11 ; a8 b8
|
||||
|
||||
movdqa xmm4, xmm0
|
||||
punpcklwd xmm0, xmm2 ; 80 90 a0 b0
|
||||
punpckhwd xmm4, xmm2 ; 84 94 a4 b4
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
punpcklwd xmm1, xmm3 ; 88 98 a8 b8
|
||||
punpckhwd xmm2, xmm3 ; 8c 9c ac bc
|
||||
|
||||
; using xmm[0124]
|
||||
; work on next 4 rows
|
||||
|
||||
movdqa xmm3, i12
|
||||
movdqa xmm5, xmm3
|
||||
punpcklbw xmm3, i13 ; c0 d0
|
||||
punpckhbw xmm5, i13 ; c8 d8
|
||||
|
||||
movdqa xmm6, i14
|
||||
movdqa xmm7, xmm6
|
||||
punpcklbw xmm6, i15 ; e0 f0
|
||||
punpckhbw xmm7, i15 ; e8 f8
|
||||
|
||||
movdqa xmm8, xmm3
|
||||
punpcklwd xmm3, xmm6 ; c0 d0 e0 f0
|
||||
punpckhwd xmm8, xmm6 ; c4 d4 e4 f4
|
||||
|
||||
movdqa xmm6, xmm5
|
||||
punpcklwd xmm5, xmm7 ; c8 d8 e8 f8
|
||||
punpckhwd xmm6, xmm7 ; cc dc ec fc
|
||||
|
||||
; pull the third and fourth sets together
|
||||
|
||||
movdqa xmm7, xmm0
|
||||
punpckldq xmm0, xmm3 ; 80 90 a0 b0 c0 d0 e0 f0
|
||||
punpckhdq xmm7, xmm3 ; 82 92 a2 b2 c2 d2 e2 f2
|
||||
|
||||
movdqa xmm3, xmm4
|
||||
punpckldq xmm4, xmm8 ; 84 94 a4 b4 c4 d4 e4 f4
|
||||
punpckhdq xmm3, xmm8 ; 86 96 a6 b6 c6 d6 e6 f6
|
||||
|
||||
movdqa xmm8, xmm1
|
||||
punpckldq xmm1, xmm5 ; 88 88 a8 b8 c8 d8 e8 f8
|
||||
punpckhdq xmm8, xmm5 ; 8a 9a aa ba ca da ea fa
|
||||
|
||||
movdqa xmm5, xmm2
|
||||
punpckldq xmm2, xmm6 ; 8c 9c ac bc cc dc ec fc
|
||||
punpckhdq xmm5, xmm6 ; 8e 9e ae be ce de ee fe
|
||||
|
||||
; save the calculations. we only have 15 registers ...
|
||||
movdqa i8, xmm0
|
||||
movdqa i9, xmm7
|
||||
movdqa i10, xmm4
|
||||
movdqa i11, xmm3
|
||||
movdqa i12, xmm1
|
||||
movdqa i13, xmm8
|
||||
movdqa i14, xmm2
|
||||
movdqa i15, xmm5
|
||||
|
||||
; 0-7
|
||||
movdqa xmm0, i0
|
||||
movdqa xmm1, xmm0
|
||||
punpcklbw xmm0, i1 ; 00 10
|
||||
punpckhbw xmm1, i1 ; 08 18
|
||||
|
||||
movdqa xmm2, i2
|
||||
movdqa xmm3, xmm2
|
||||
punpcklbw xmm2, i3 ; 20 30
|
||||
punpckhbw xmm3, i3 ; 28 38
|
||||
|
||||
movdqa xmm4, xmm0
|
||||
punpcklwd xmm0, xmm2 ; 00 10 20 30
|
||||
punpckhwd xmm4, xmm2 ; 04 14 24 34
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
punpcklwd xmm1, xmm3 ; 08 18 28 38
|
||||
punpckhwd xmm2, xmm3 ; 0c 1c 2c 3c
|
||||
|
||||
; using xmm[0124]
|
||||
; work on next 4 rows
|
||||
|
||||
movdqa xmm3, i4
|
||||
movdqa xmm5, xmm3
|
||||
punpcklbw xmm3, i5 ; 40 50
|
||||
punpckhbw xmm5, i5 ; 48 58
|
||||
|
||||
movdqa xmm6, i6
|
||||
movdqa xmm7, xmm6
|
||||
punpcklbw xmm6, i7 ; 60 70
|
||||
punpckhbw xmm7, i7 ; 68 78
|
||||
|
||||
movdqa xmm8, xmm3
|
||||
punpcklwd xmm3, xmm6 ; 40 50 60 70
|
||||
punpckhwd xmm8, xmm6 ; 44 54 64 74
|
||||
|
||||
movdqa xmm6, xmm5
|
||||
punpcklwd xmm5, xmm7 ; 48 58 68 78
|
||||
punpckhwd xmm6, xmm7 ; 4c 5c 6c 7c
|
||||
|
||||
; pull the first two sets together
|
||||
|
||||
movdqa xmm7, xmm0
|
||||
punpckldq xmm0, xmm3 ; 00 10 20 30 40 50 60 70
|
||||
punpckhdq xmm7, xmm3 ; 02 12 22 32 42 52 62 72
|
||||
|
||||
movdqa xmm3, xmm4
|
||||
punpckldq xmm4, xmm8 ; 04 14 24 34 44 54 64 74
|
||||
punpckhdq xmm3, xmm8 ; 06 16 26 36 46 56 66 76
|
||||
|
||||
movdqa xmm8, xmm1
|
||||
punpckldq xmm1, xmm5 ; 08 18 28 38 48 58 68 78
|
||||
punpckhdq xmm8, xmm5 ; 0a 1a 2a 3a 4a 5a 6a 7a
|
||||
|
||||
movdqa xmm5, xmm2
|
||||
punpckldq xmm2, xmm6 ; 0c 1c 2c 3c 4c 5c 6c 7c
|
||||
punpckhdq xmm5, xmm6 ; 0e 1e 2e 3e 4e 5e 6e 7e
|
||||
; final combination
|
||||
|
||||
movdqa xmm6, xmm0
|
||||
punpcklqdq xmm0, i8
|
||||
punpckhqdq xmm6, i8
|
||||
|
||||
movdqa xmm9, xmm7
|
||||
punpcklqdq xmm7, i9
|
||||
punpckhqdq xmm9, i9
|
||||
|
||||
movdqa xmm10, xmm4
|
||||
punpcklqdq xmm4, i10
|
||||
punpckhqdq xmm10, i10
|
||||
|
||||
movdqa xmm11, xmm3
|
||||
punpcklqdq xmm3, i11
|
||||
punpckhqdq xmm11, i11
|
||||
|
||||
movdqa xmm12, xmm1
|
||||
punpcklqdq xmm1, i12
|
||||
punpckhqdq xmm12, i12
|
||||
|
||||
movdqa xmm13, xmm8
|
||||
punpcklqdq xmm8, i13
|
||||
punpckhqdq xmm13, i13
|
||||
|
||||
movdqa xmm14, xmm2
|
||||
punpcklqdq xmm2, i14
|
||||
punpckhqdq xmm14, i14
|
||||
|
||||
movdqa xmm15, xmm5
|
||||
punpcklqdq xmm5, i15
|
||||
punpckhqdq xmm15, i15
|
||||
|
||||
movdqa s0, xmm0
|
||||
movdqa s1, xmm6
|
||||
movdqa s2, xmm7
|
||||
movdqa s3, xmm9
|
||||
movdqa s4, xmm4
|
||||
movdqa s5, xmm10
|
||||
movdqa s6, xmm3
|
||||
movdqa s7, xmm11
|
||||
movdqa s8, xmm1
|
||||
movdqa s9, xmm12
|
||||
movdqa s10, xmm8
|
||||
movdqa s11, xmm13
|
||||
movdqa s12, xmm2
|
||||
movdqa s13, xmm14
|
||||
movdqa s14, xmm5
|
||||
movdqa s15, xmm15
|
||||
|
||||
; free stack space
|
||||
add rsp, stack_size
|
||||
|
||||
; un-ALIGN_STACK
|
||||
pop rsp
|
||||
|
||||
%if LIBVPX_YASM_WIN64
|
||||
pop r13
|
||||
pop r12
|
||||
RESTORE_XMM
|
||||
pop rbp
|
||||
%endif
|
||||
|
||||
ret
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
te0:
|
||||
times 16 db 0xe0
|
||||
align 16
|
||||
t7f:
|
||||
times 16 db 0x7f
|
||||
align 16
|
||||
tfe:
|
||||
times 16 db 0xfe
|
||||
align 16
|
||||
t1f:
|
||||
times 16 db 0x1f
|
||||
align 16
|
||||
t80:
|
||||
times 16 db 0x80
|
||||
align 16
|
||||
t1:
|
||||
times 16 db 0x01
|
||||
align 16
|
||||
t3:
|
||||
times 16 db 0x03
|
||||
align 16
|
||||
t4:
|
||||
times 16 db 0x04
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,198 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8/common/loopfilter.h"
|
||||
|
||||
#define prototype_loopfilter(sym) \
|
||||
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
|
||||
const unsigned char *limit, const unsigned char *thresh, int count)
|
||||
|
||||
#define prototype_loopfilter_nc(sym) \
|
||||
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
|
||||
const unsigned char *limit, const unsigned char *thresh)
|
||||
|
||||
#define prototype_simple_loopfilter(sym) \
|
||||
void sym(unsigned char *y, int ystride, const unsigned char *blimit)
|
||||
|
||||
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_mmx);
|
||||
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_mmx);
|
||||
prototype_loopfilter(vp8_loop_filter_vertical_edge_mmx);
|
||||
prototype_loopfilter(vp8_loop_filter_horizontal_edge_mmx);
|
||||
prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_mmx);
|
||||
prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx);
|
||||
|
||||
#if HAVE_SSE2 && ARCH_X86_64
|
||||
prototype_loopfilter(vp8_loop_filter_bv_y_sse2);
|
||||
prototype_loopfilter(vp8_loop_filter_bh_y_sse2);
|
||||
#else
|
||||
prototype_loopfilter_nc(vp8_loop_filter_vertical_edge_sse2);
|
||||
prototype_loopfilter_nc(vp8_loop_filter_horizontal_edge_sse2);
|
||||
#endif
|
||||
prototype_loopfilter_nc(vp8_mbloop_filter_vertical_edge_sse2);
|
||||
prototype_loopfilter_nc(vp8_mbloop_filter_horizontal_edge_sse2);
|
||||
|
||||
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2;
|
||||
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2;
|
||||
extern loop_filter_uvfunction vp8_mbloop_filter_horizontal_edge_uv_sse2;
|
||||
extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2;
|
||||
|
||||
#if HAVE_MMX
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||
}
|
||||
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, blimit);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* Horizontal MB filtering */
|
||||
#if HAVE_SSE2
|
||||
void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
|
||||
}
|
||||
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
|
||||
}
|
||||
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
#if ARCH_X86_64
|
||||
vp8_loop_filter_bh_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
#else
|
||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
|
||||
#endif
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4 * uv_stride);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||
}
|
||||
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
#if ARCH_X86_64
|
||||
vp8_loop_filter_bv_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
#else
|
||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
|
||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
|
||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr);
|
||||
#endif
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, blimit);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,274 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
|
||||
;void copy_mem8x8_mmx(
|
||||
; unsigned char *src,
|
||||
; int src_stride,
|
||||
; unsigned char *dst,
|
||||
; int dst_stride
|
||||
; )
|
||||
global sym(vp8_copy_mem8x8_mmx) PRIVATE
|
||||
sym(vp8_copy_mem8x8_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src;
|
||||
movq mm0, [rsi]
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride;
|
||||
mov rdi, arg(2) ;dst;
|
||||
|
||||
movq mm1, [rsi+rax]
|
||||
movq mm2, [rsi+rax*2]
|
||||
|
||||
movsxd rcx, dword ptr arg(3) ;dst_stride
|
||||
lea rsi, [rsi+rax*2]
|
||||
|
||||
movq [rdi], mm0
|
||||
add rsi, rax
|
||||
|
||||
movq [rdi+rcx], mm1
|
||||
movq [rdi+rcx*2], mm2
|
||||
|
||||
|
||||
lea rdi, [rdi+rcx*2]
|
||||
movq mm3, [rsi]
|
||||
|
||||
add rdi, rcx
|
||||
movq mm4, [rsi+rax]
|
||||
|
||||
movq mm5, [rsi+rax*2]
|
||||
movq [rdi], mm3
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
movq [rdi+rcx], mm4
|
||||
|
||||
movq [rdi+rcx*2], mm5
|
||||
lea rdi, [rdi+rcx*2]
|
||||
|
||||
movq mm0, [rsi+rax]
|
||||
movq mm1, [rsi+rax*2]
|
||||
|
||||
movq [rdi+rcx], mm0
|
||||
movq [rdi+rcx*2],mm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void copy_mem8x4_mmx(
|
||||
; unsigned char *src,
|
||||
; int src_stride,
|
||||
; unsigned char *dst,
|
||||
; int dst_stride
|
||||
; )
|
||||
global sym(vp8_copy_mem8x4_mmx) PRIVATE
|
||||
sym(vp8_copy_mem8x4_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src;
|
||||
movq mm0, [rsi]
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride;
|
||||
mov rdi, arg(2) ;dst;
|
||||
|
||||
movq mm1, [rsi+rax]
|
||||
movq mm2, [rsi+rax*2]
|
||||
|
||||
movsxd rcx, dword ptr arg(3) ;dst_stride
|
||||
lea rsi, [rsi+rax*2]
|
||||
|
||||
movq [rdi], mm0
|
||||
movq [rdi+rcx], mm1
|
||||
|
||||
movq [rdi+rcx*2], mm2
|
||||
lea rdi, [rdi+rcx*2]
|
||||
|
||||
movq mm3, [rsi+rax]
|
||||
movq [rdi+rcx], mm3
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void copy_mem16x16_mmx(
|
||||
; unsigned char *src,
|
||||
; int src_stride,
|
||||
; unsigned char *dst,
|
||||
; int dst_stride
|
||||
; )
|
||||
global sym(vp8_copy_mem16x16_mmx) PRIVATE
|
||||
sym(vp8_copy_mem16x16_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src;
|
||||
movsxd rax, dword ptr arg(1) ;src_stride;
|
||||
|
||||
mov rdi, arg(2) ;dst;
|
||||
movsxd rcx, dword ptr arg(3) ;dst_stride
|
||||
|
||||
movq mm0, [rsi]
|
||||
movq mm3, [rsi+8];
|
||||
|
||||
movq mm1, [rsi+rax]
|
||||
movq mm4, [rsi+rax+8]
|
||||
|
||||
movq mm2, [rsi+rax*2]
|
||||
movq mm5, [rsi+rax*2+8]
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
add rsi, rax
|
||||
|
||||
movq [rdi], mm0
|
||||
movq [rdi+8], mm3
|
||||
|
||||
movq [rdi+rcx], mm1
|
||||
movq [rdi+rcx+8], mm4
|
||||
|
||||
movq [rdi+rcx*2], mm2
|
||||
movq [rdi+rcx*2+8], mm5
|
||||
|
||||
lea rdi, [rdi+rcx*2]
|
||||
add rdi, rcx
|
||||
|
||||
movq mm0, [rsi]
|
||||
movq mm3, [rsi+8];
|
||||
|
||||
movq mm1, [rsi+rax]
|
||||
movq mm4, [rsi+rax+8]
|
||||
|
||||
movq mm2, [rsi+rax*2]
|
||||
movq mm5, [rsi+rax*2+8]
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
add rsi, rax
|
||||
|
||||
movq [rdi], mm0
|
||||
movq [rdi+8], mm3
|
||||
|
||||
movq [rdi+rcx], mm1
|
||||
movq [rdi+rcx+8], mm4
|
||||
|
||||
movq [rdi+rcx*2], mm2
|
||||
movq [rdi+rcx*2+8], mm5
|
||||
|
||||
lea rdi, [rdi+rcx*2]
|
||||
add rdi, rcx
|
||||
|
||||
movq mm0, [rsi]
|
||||
movq mm3, [rsi+8];
|
||||
|
||||
movq mm1, [rsi+rax]
|
||||
movq mm4, [rsi+rax+8]
|
||||
|
||||
movq mm2, [rsi+rax*2]
|
||||
movq mm5, [rsi+rax*2+8]
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
add rsi, rax
|
||||
|
||||
movq [rdi], mm0
|
||||
movq [rdi+8], mm3
|
||||
|
||||
movq [rdi+rcx], mm1
|
||||
movq [rdi+rcx+8], mm4
|
||||
|
||||
movq [rdi+rcx*2], mm2
|
||||
movq [rdi+rcx*2+8], mm5
|
||||
|
||||
lea rdi, [rdi+rcx*2]
|
||||
add rdi, rcx
|
||||
|
||||
movq mm0, [rsi]
|
||||
movq mm3, [rsi+8];
|
||||
|
||||
movq mm1, [rsi+rax]
|
||||
movq mm4, [rsi+rax+8]
|
||||
|
||||
movq mm2, [rsi+rax*2]
|
||||
movq mm5, [rsi+rax*2+8]
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
add rsi, rax
|
||||
|
||||
movq [rdi], mm0
|
||||
movq [rdi+8], mm3
|
||||
|
||||
movq [rdi+rcx], mm1
|
||||
movq [rdi+rcx+8], mm4
|
||||
|
||||
movq [rdi+rcx*2], mm2
|
||||
movq [rdi+rcx*2+8], mm5
|
||||
|
||||
lea rdi, [rdi+rcx*2]
|
||||
add rdi, rcx
|
||||
|
||||
movq mm0, [rsi]
|
||||
movq mm3, [rsi+8];
|
||||
|
||||
movq mm1, [rsi+rax]
|
||||
movq mm4, [rsi+rax+8]
|
||||
|
||||
movq mm2, [rsi+rax*2]
|
||||
movq mm5, [rsi+rax*2+8]
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
add rsi, rax
|
||||
|
||||
movq [rdi], mm0
|
||||
movq [rdi+8], mm3
|
||||
|
||||
movq [rdi+rcx], mm1
|
||||
movq [rdi+rcx+8], mm4
|
||||
|
||||
movq [rdi+rcx*2], mm2
|
||||
movq [rdi+rcx*2+8], mm5
|
||||
|
||||
lea rdi, [rdi+rcx*2]
|
||||
add rdi, rcx
|
||||
|
||||
movq mm0, [rsi]
|
||||
movq mm3, [rsi+8];
|
||||
|
||||
movq [rdi], mm0
|
||||
movq [rdi+8], mm3
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
|
@ -0,0 +1,116 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
;void copy_mem16x16_sse2(
|
||||
; unsigned char *src,
|
||||
; int src_stride,
|
||||
; unsigned char *dst,
|
||||
; int dst_stride
|
||||
; )
|
||||
global sym(vp8_copy_mem16x16_sse2) PRIVATE
|
||||
sym(vp8_copy_mem16x16_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src;
|
||||
movdqu xmm0, [rsi]
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_stride;
|
||||
mov rdi, arg(2) ;dst;
|
||||
|
||||
movdqu xmm1, [rsi+rax]
|
||||
movdqu xmm2, [rsi+rax*2]
|
||||
|
||||
movsxd rcx, dword ptr arg(3) ;dst_stride
|
||||
lea rsi, [rsi+rax*2]
|
||||
|
||||
movdqa [rdi], xmm0
|
||||
add rsi, rax
|
||||
|
||||
movdqa [rdi+rcx], xmm1
|
||||
movdqa [rdi+rcx*2],xmm2
|
||||
|
||||
lea rdi, [rdi+rcx*2]
|
||||
movdqu xmm3, [rsi]
|
||||
|
||||
add rdi, rcx
|
||||
movdqu xmm4, [rsi+rax]
|
||||
|
||||
movdqu xmm5, [rsi+rax*2]
|
||||
lea rsi, [rsi+rax*2]
|
||||
|
||||
movdqa [rdi], xmm3
|
||||
add rsi, rax
|
||||
|
||||
movdqa [rdi+rcx], xmm4
|
||||
movdqa [rdi+rcx*2],xmm5
|
||||
|
||||
lea rdi, [rdi+rcx*2]
|
||||
movdqu xmm0, [rsi]
|
||||
|
||||
add rdi, rcx
|
||||
movdqu xmm1, [rsi+rax]
|
||||
|
||||
movdqu xmm2, [rsi+rax*2]
|
||||
lea rsi, [rsi+rax*2]
|
||||
|
||||
movdqa [rdi], xmm0
|
||||
add rsi, rax
|
||||
|
||||
movdqa [rdi+rcx], xmm1
|
||||
|
||||
movdqa [rdi+rcx*2], xmm2
|
||||
movdqu xmm3, [rsi]
|
||||
|
||||
movdqu xmm4, [rsi+rax]
|
||||
lea rdi, [rdi+rcx*2]
|
||||
|
||||
add rdi, rcx
|
||||
movdqu xmm5, [rsi+rax*2]
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
movdqa [rdi], xmm3
|
||||
|
||||
add rsi, rax
|
||||
movdqa [rdi+rcx], xmm4
|
||||
|
||||
movdqa [rdi+rcx*2],xmm5
|
||||
movdqu xmm0, [rsi]
|
||||
|
||||
lea rdi, [rdi+rcx*2]
|
||||
movdqu xmm1, [rsi+rax]
|
||||
|
||||
add rdi, rcx
|
||||
movdqu xmm2, [rsi+rax*2]
|
||||
|
||||
lea rsi, [rsi+rax*2]
|
||||
movdqa [rdi], xmm0
|
||||
|
||||
movdqa [rdi+rcx], xmm1
|
||||
movdqa [rdi+rcx*2],xmm2
|
||||
|
||||
movdqu xmm3, [rsi+rax]
|
||||
lea rdi, [rdi+rcx*2]
|
||||
|
||||
movdqa [rdi+rcx], xmm3
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
|
@ -0,0 +1,702 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
extern sym(vp8_bilinear_filters_x86_8)
|
||||
|
||||
|
||||
%define BLOCK_HEIGHT_WIDTH 4
|
||||
%define vp8_filter_weight 128
|
||||
%define VP8_FILTER_SHIFT 7
|
||||
|
||||
|
||||
;void vp8_filter_block1d_h6_mmx
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; unsigned short *output_ptr,
|
||||
; unsigned int src_pixels_per_line,
|
||||
; unsigned int pixel_step,
|
||||
; unsigned int output_height,
|
||||
; unsigned int output_width,
|
||||
; short * vp8_filter
|
||||
;)
|
||||
global sym(vp8_filter_block1d_h6_mmx) PRIVATE
|
||||
sym(vp8_filter_block1d_h6_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rdx, arg(6) ;vp8_filter
|
||||
|
||||
movq mm1, [rdx + 16] ; do both the negative taps first!!!
|
||||
movq mm2, [rdx + 32] ;
|
||||
movq mm6, [rdx + 48] ;
|
||||
movq mm7, [rdx + 64] ;
|
||||
|
||||
mov rdi, arg(1) ;output_ptr
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
movsxd rcx, dword ptr arg(4) ;output_height
|
||||
movsxd rax, dword ptr arg(5) ;output_width ; destination pitch?
|
||||
pxor mm0, mm0 ; mm0 = 00000000
|
||||
|
||||
.nextrow:
|
||||
movq mm3, [rsi-2] ; mm3 = p-2..p5
|
||||
movq mm4, mm3 ; mm4 = p-2..p5
|
||||
psrlq mm3, 8 ; mm3 = p-1..p5
|
||||
punpcklbw mm3, mm0 ; mm3 = p-1..p2
|
||||
pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers.
|
||||
|
||||
movq mm5, mm4 ; mm5 = p-2..p5
|
||||
punpckhbw mm4, mm0 ; mm5 = p2..p5
|
||||
pmullw mm4, mm7 ; mm5 *= kernel 4 modifiers
|
||||
paddsw mm3, mm4 ; mm3 += mm5
|
||||
|
||||
movq mm4, mm5 ; mm4 = p-2..p5;
|
||||
psrlq mm5, 16 ; mm5 = p0..p5;
|
||||
punpcklbw mm5, mm0 ; mm5 = p0..p3
|
||||
pmullw mm5, mm2 ; mm5 *= kernel 2 modifiers
|
||||
paddsw mm3, mm5 ; mm3 += mm5
|
||||
|
||||
movq mm5, mm4 ; mm5 = p-2..p5
|
||||
psrlq mm4, 24 ; mm4 = p1..p5
|
||||
punpcklbw mm4, mm0 ; mm4 = p1..p4
|
||||
pmullw mm4, mm6 ; mm5 *= kernel 3 modifiers
|
||||
paddsw mm3, mm4 ; mm3 += mm5
|
||||
|
||||
; do outer positive taps
|
||||
movd mm4, [rsi+3]
|
||||
punpcklbw mm4, mm0 ; mm5 = p3..p6
|
||||
pmullw mm4, [rdx+80] ; mm5 *= kernel 0 modifiers
|
||||
paddsw mm3, mm4 ; mm3 += mm5
|
||||
|
||||
punpcklbw mm5, mm0 ; mm5 = p-2..p1
|
||||
pmullw mm5, [rdx] ; mm5 *= kernel 5 modifiers
|
||||
paddsw mm3, mm5 ; mm3 += mm5
|
||||
|
||||
paddsw mm3, [GLOBAL(rd)] ; mm3 += round value
|
||||
psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128
|
||||
packuswb mm3, mm0 ; pack and unpack to saturate
|
||||
punpcklbw mm3, mm0 ;
|
||||
|
||||
movq [rdi], mm3 ; store the results in the destination
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
add rsi, dword ptr arg(2) ;src_pixels_per_line ; next line
|
||||
add rdi, rax;
|
||||
%else
|
||||
movsxd r8, dword ptr arg(2) ;src_pixels_per_line
|
||||
add rdi, rax;
|
||||
|
||||
add rsi, r8 ; next line
|
||||
%endif
|
||||
|
||||
dec rcx ; decrement count
|
||||
jnz .nextrow ; next row
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_filter_block1dc_v6_mmx
|
||||
;(
|
||||
; short *src_ptr,
|
||||
; unsigned char *output_ptr,
|
||||
; int output_pitch,
|
||||
; unsigned int pixels_per_line,
|
||||
; unsigned int pixel_step,
|
||||
; unsigned int output_height,
|
||||
; unsigned int output_width,
|
||||
; short * vp8_filter
|
||||
;)
|
||||
global sym(vp8_filter_block1dc_v6_mmx) PRIVATE
|
||||
sym(vp8_filter_block1dc_v6_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 8
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
movq mm5, [GLOBAL(rd)]
|
||||
push rbx
|
||||
mov rbx, arg(7) ;vp8_filter
|
||||
movq mm1, [rbx + 16] ; do both the negative taps first!!!
|
||||
movq mm2, [rbx + 32] ;
|
||||
movq mm6, [rbx + 48] ;
|
||||
movq mm7, [rbx + 64] ;
|
||||
|
||||
movsxd rdx, dword ptr arg(3) ;pixels_per_line
|
||||
mov rdi, arg(1) ;output_ptr
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
sub rsi, rdx
|
||||
sub rsi, rdx
|
||||
movsxd rcx, DWORD PTR arg(5) ;output_height
|
||||
movsxd rax, DWORD PTR arg(2) ;output_pitch ; destination pitch?
|
||||
pxor mm0, mm0 ; mm0 = 00000000
|
||||
|
||||
|
||||
.nextrow_cv:
|
||||
movq mm3, [rsi+rdx] ; mm3 = p0..p8 = row -1
|
||||
pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers.
|
||||
|
||||
|
||||
movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 2
|
||||
pmullw mm4, mm7 ; mm4 *= kernel 4 modifiers.
|
||||
paddsw mm3, mm4 ; mm3 += mm4
|
||||
|
||||
movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 0
|
||||
pmullw mm4, mm2 ; mm4 *= kernel 2 modifiers.
|
||||
paddsw mm3, mm4 ; mm3 += mm4
|
||||
|
||||
movq mm4, [rsi] ; mm4 = p0..p3 = row -2
|
||||
pmullw mm4, [rbx] ; mm4 *= kernel 0 modifiers.
|
||||
paddsw mm3, mm4 ; mm3 += mm4
|
||||
|
||||
|
||||
add rsi, rdx ; move source forward 1 line to avoid 3 * pitch
|
||||
movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 1
|
||||
pmullw mm4, mm6 ; mm4 *= kernel 3 modifiers.
|
||||
paddsw mm3, mm4 ; mm3 += mm4
|
||||
|
||||
movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 3
|
||||
pmullw mm4, [rbx +80] ; mm4 *= kernel 3 modifiers.
|
||||
paddsw mm3, mm4 ; mm3 += mm4
|
||||
|
||||
|
||||
paddsw mm3, mm5 ; mm3 += round value
|
||||
psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128
|
||||
packuswb mm3, mm0 ; pack and saturate
|
||||
|
||||
movd [rdi],mm3 ; store the results in the destination
|
||||
; the subsequent iterations repeat 3 out of 4 of these reads. Since the
|
||||
; recon block should be in cache this shouldn't cost much. Its obviously
|
||||
; avoidable!!!.
|
||||
lea rdi, [rdi+rax] ;
|
||||
dec rcx ; decrement count
|
||||
jnz .nextrow_cv ; next row
|
||||
|
||||
pop rbx
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void bilinear_predict8x8_mmx
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixels_per_line,
|
||||
; int xoffset,
|
||||
; int yoffset,
|
||||
; unsigned char *dst_ptr,
|
||||
; int dst_pitch
|
||||
;)
|
||||
global sym(vp8_bilinear_predict8x8_mmx) PRIVATE
|
||||
sym(vp8_bilinear_predict8x8_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset];
|
||||
;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset];
|
||||
|
||||
movsxd rax, dword ptr arg(2) ;xoffset
|
||||
mov rdi, arg(4) ;dst_ptr ;
|
||||
|
||||
shl rax, 5 ; offset * 32
|
||||
lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))]
|
||||
|
||||
add rax, rcx ; HFilter
|
||||
mov rsi, arg(0) ;src_ptr ;
|
||||
|
||||
movsxd rdx, dword ptr arg(5) ;dst_pitch
|
||||
movq mm1, [rax] ;
|
||||
|
||||
movq mm2, [rax+16] ;
|
||||
movsxd rax, dword ptr arg(3) ;yoffset
|
||||
|
||||
pxor mm0, mm0 ;
|
||||
|
||||
shl rax, 5 ; offset*32
|
||||
add rax, rcx ; VFilter
|
||||
|
||||
lea rcx, [rdi+rdx*8] ;
|
||||
movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ;
|
||||
|
||||
|
||||
|
||||
; get the first horizontal line done ;
|
||||
movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
|
||||
movq mm4, mm3 ; make a copy of current line
|
||||
|
||||
punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
|
||||
punpckhbw mm4, mm0 ;
|
||||
|
||||
pmullw mm3, mm1 ;
|
||||
pmullw mm4, mm1 ;
|
||||
|
||||
movq mm5, [rsi+1] ;
|
||||
movq mm6, mm5 ;
|
||||
|
||||
punpcklbw mm5, mm0 ;
|
||||
punpckhbw mm6, mm0 ;
|
||||
|
||||
pmullw mm5, mm2 ;
|
||||
pmullw mm6, mm2 ;
|
||||
|
||||
paddw mm3, mm5 ;
|
||||
paddw mm4, mm6 ;
|
||||
|
||||
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
paddw mm4, [GLOBAL(rd)] ;
|
||||
psraw mm4, VP8_FILTER_SHIFT ;
|
||||
|
||||
movq mm7, mm3 ;
|
||||
packuswb mm7, mm4 ;
|
||||
|
||||
add rsi, rdx ; next line
|
||||
.next_row_8x8:
|
||||
movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
|
||||
movq mm4, mm3 ; make a copy of current line
|
||||
|
||||
punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
|
||||
punpckhbw mm4, mm0 ;
|
||||
|
||||
pmullw mm3, mm1 ;
|
||||
pmullw mm4, mm1 ;
|
||||
|
||||
movq mm5, [rsi+1] ;
|
||||
movq mm6, mm5 ;
|
||||
|
||||
punpcklbw mm5, mm0 ;
|
||||
punpckhbw mm6, mm0 ;
|
||||
|
||||
pmullw mm5, mm2 ;
|
||||
pmullw mm6, mm2 ;
|
||||
|
||||
paddw mm3, mm5 ;
|
||||
paddw mm4, mm6 ;
|
||||
|
||||
movq mm5, mm7 ;
|
||||
movq mm6, mm7 ;
|
||||
|
||||
punpcklbw mm5, mm0 ;
|
||||
punpckhbw mm6, mm0
|
||||
|
||||
pmullw mm5, [rax] ;
|
||||
pmullw mm6, [rax] ;
|
||||
|
||||
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
paddw mm4, [GLOBAL(rd)] ;
|
||||
psraw mm4, VP8_FILTER_SHIFT ;
|
||||
|
||||
movq mm7, mm3 ;
|
||||
packuswb mm7, mm4 ;
|
||||
|
||||
|
||||
pmullw mm3, [rax+16] ;
|
||||
pmullw mm4, [rax+16] ;
|
||||
|
||||
paddw mm3, mm5 ;
|
||||
paddw mm4, mm6 ;
|
||||
|
||||
|
||||
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
paddw mm4, [GLOBAL(rd)] ;
|
||||
psraw mm4, VP8_FILTER_SHIFT ;
|
||||
|
||||
packuswb mm3, mm4
|
||||
|
||||
movq [rdi], mm3 ; store the results in the destination
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
add rsi, rdx ; next line
|
||||
add rdi, dword ptr arg(5) ;dst_pitch ;
|
||||
%else
|
||||
movsxd r8, dword ptr arg(5) ;dst_pitch
|
||||
add rsi, rdx ; next line
|
||||
add rdi, r8 ;dst_pitch
|
||||
%endif
|
||||
cmp rdi, rcx ;
|
||||
jne .next_row_8x8
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void bilinear_predict8x4_mmx
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixels_per_line,
|
||||
; int xoffset,
|
||||
; int yoffset,
|
||||
; unsigned char *dst_ptr,
|
||||
; int dst_pitch
|
||||
;)
|
||||
global sym(vp8_bilinear_predict8x4_mmx) PRIVATE
|
||||
sym(vp8_bilinear_predict8x4_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset];
|
||||
;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset];
|
||||
|
||||
movsxd rax, dword ptr arg(2) ;xoffset
|
||||
mov rdi, arg(4) ;dst_ptr ;
|
||||
|
||||
lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))]
|
||||
shl rax, 5
|
||||
|
||||
mov rsi, arg(0) ;src_ptr ;
|
||||
add rax, rcx
|
||||
|
||||
movsxd rdx, dword ptr arg(5) ;dst_pitch
|
||||
movq mm1, [rax] ;
|
||||
|
||||
movq mm2, [rax+16] ;
|
||||
movsxd rax, dword ptr arg(3) ;yoffset
|
||||
|
||||
pxor mm0, mm0 ;
|
||||
shl rax, 5
|
||||
|
||||
add rax, rcx
|
||||
lea rcx, [rdi+rdx*4] ;
|
||||
|
||||
movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ;
|
||||
|
||||
; get the first horizontal line done ;
|
||||
movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
|
||||
movq mm4, mm3 ; make a copy of current line
|
||||
|
||||
punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
|
||||
punpckhbw mm4, mm0 ;
|
||||
|
||||
pmullw mm3, mm1 ;
|
||||
pmullw mm4, mm1 ;
|
||||
|
||||
movq mm5, [rsi+1] ;
|
||||
movq mm6, mm5 ;
|
||||
|
||||
punpcklbw mm5, mm0 ;
|
||||
punpckhbw mm6, mm0 ;
|
||||
|
||||
pmullw mm5, mm2 ;
|
||||
pmullw mm6, mm2 ;
|
||||
|
||||
paddw mm3, mm5 ;
|
||||
paddw mm4, mm6 ;
|
||||
|
||||
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
paddw mm4, [GLOBAL(rd)] ;
|
||||
psraw mm4, VP8_FILTER_SHIFT ;
|
||||
|
||||
movq mm7, mm3 ;
|
||||
packuswb mm7, mm4 ;
|
||||
|
||||
add rsi, rdx ; next line
|
||||
.next_row_8x4:
|
||||
movq mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
|
||||
movq mm4, mm3 ; make a copy of current line
|
||||
|
||||
punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
|
||||
punpckhbw mm4, mm0 ;
|
||||
|
||||
pmullw mm3, mm1 ;
|
||||
pmullw mm4, mm1 ;
|
||||
|
||||
movq mm5, [rsi+1] ;
|
||||
movq mm6, mm5 ;
|
||||
|
||||
punpcklbw mm5, mm0 ;
|
||||
punpckhbw mm6, mm0 ;
|
||||
|
||||
pmullw mm5, mm2 ;
|
||||
pmullw mm6, mm2 ;
|
||||
|
||||
paddw mm3, mm5 ;
|
||||
paddw mm4, mm6 ;
|
||||
|
||||
movq mm5, mm7 ;
|
||||
movq mm6, mm7 ;
|
||||
|
||||
punpcklbw mm5, mm0 ;
|
||||
punpckhbw mm6, mm0
|
||||
|
||||
pmullw mm5, [rax] ;
|
||||
pmullw mm6, [rax] ;
|
||||
|
||||
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
paddw mm4, [GLOBAL(rd)] ;
|
||||
psraw mm4, VP8_FILTER_SHIFT ;
|
||||
|
||||
movq mm7, mm3 ;
|
||||
packuswb mm7, mm4 ;
|
||||
|
||||
|
||||
pmullw mm3, [rax+16] ;
|
||||
pmullw mm4, [rax+16] ;
|
||||
|
||||
paddw mm3, mm5 ;
|
||||
paddw mm4, mm6 ;
|
||||
|
||||
|
||||
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
paddw mm4, [GLOBAL(rd)] ;
|
||||
psraw mm4, VP8_FILTER_SHIFT ;
|
||||
|
||||
packuswb mm3, mm4
|
||||
|
||||
movq [rdi], mm3 ; store the results in the destination
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
add rsi, rdx ; next line
|
||||
add rdi, dword ptr arg(5) ;dst_pitch ;
|
||||
%else
|
||||
movsxd r8, dword ptr arg(5) ;dst_pitch
|
||||
add rsi, rdx ; next line
|
||||
add rdi, r8
|
||||
%endif
|
||||
cmp rdi, rcx ;
|
||||
jne .next_row_8x4
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void bilinear_predict4x4_mmx
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixels_per_line,
|
||||
; int xoffset,
|
||||
; int yoffset,
|
||||
; unsigned char *dst_ptr,
|
||||
; int dst_pitch
|
||||
;)
|
||||
global sym(vp8_bilinear_predict4x4_mmx) PRIVATE
|
||||
sym(vp8_bilinear_predict4x4_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
;const short *HFilter = vp8_bilinear_filters_x86_8[xoffset];
|
||||
;const short *VFilter = vp8_bilinear_filters_x86_8[yoffset];
|
||||
|
||||
movsxd rax, dword ptr arg(2) ;xoffset
|
||||
mov rdi, arg(4) ;dst_ptr ;
|
||||
|
||||
lea rcx, [GLOBAL(sym(vp8_bilinear_filters_x86_8))]
|
||||
shl rax, 5
|
||||
|
||||
add rax, rcx ; HFilter
|
||||
mov rsi, arg(0) ;src_ptr ;
|
||||
|
||||
movsxd rdx, dword ptr arg(5) ;ldst_pitch
|
||||
movq mm1, [rax] ;
|
||||
|
||||
movq mm2, [rax+16] ;
|
||||
movsxd rax, dword ptr arg(3) ;yoffset
|
||||
|
||||
pxor mm0, mm0 ;
|
||||
shl rax, 5
|
||||
|
||||
add rax, rcx
|
||||
lea rcx, [rdi+rdx*4] ;
|
||||
|
||||
movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ;
|
||||
|
||||
; get the first horizontal line done ;
|
||||
movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
|
||||
punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
|
||||
|
||||
pmullw mm3, mm1 ;
|
||||
movd mm5, [rsi+1] ;
|
||||
|
||||
punpcklbw mm5, mm0 ;
|
||||
pmullw mm5, mm2 ;
|
||||
|
||||
paddw mm3, mm5 ;
|
||||
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
|
||||
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
movq mm7, mm3 ;
|
||||
packuswb mm7, mm0 ;
|
||||
|
||||
add rsi, rdx ; next line
|
||||
.next_row_4x4:
|
||||
movd mm3, [rsi] ; xx 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14
|
||||
punpcklbw mm3, mm0 ; xx 00 01 02 03 04 05 06
|
||||
|
||||
pmullw mm3, mm1 ;
|
||||
movd mm5, [rsi+1] ;
|
||||
|
||||
punpcklbw mm5, mm0 ;
|
||||
pmullw mm5, mm2 ;
|
||||
|
||||
paddw mm3, mm5 ;
|
||||
|
||||
movq mm5, mm7 ;
|
||||
punpcklbw mm5, mm0 ;
|
||||
|
||||
pmullw mm5, [rax] ;
|
||||
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
|
||||
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
movq mm7, mm3 ;
|
||||
|
||||
packuswb mm7, mm0 ;
|
||||
|
||||
pmullw mm3, [rax+16] ;
|
||||
paddw mm3, mm5 ;
|
||||
|
||||
|
||||
paddw mm3, [GLOBAL(rd)] ; xmm3 += round value
|
||||
psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
packuswb mm3, mm0
|
||||
movd [rdi], mm3 ; store the results in the destination
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
add rsi, rdx ; next line
|
||||
add rdi, dword ptr arg(5) ;dst_pitch ;
|
||||
%else
|
||||
movsxd r8, dword ptr arg(5) ;dst_pitch ;
|
||||
add rsi, rdx ; next line
|
||||
add rdi, r8
|
||||
%endif
|
||||
|
||||
cmp rdi, rcx ;
|
||||
jne .next_row_4x4
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
rd:
|
||||
times 4 dw 0x40
|
||||
|
||||
align 16
|
||||
global HIDDEN_DATA(sym(vp8_six_tap_mmx))
|
||||
sym(vp8_six_tap_mmx):
|
||||
times 8 dw 0
|
||||
times 8 dw 0
|
||||
times 8 dw 128
|
||||
times 8 dw 0
|
||||
times 8 dw 0
|
||||
times 8 dw 0
|
||||
|
||||
times 8 dw 0
|
||||
times 8 dw -6
|
||||
times 8 dw 123
|
||||
times 8 dw 12
|
||||
times 8 dw -1
|
||||
times 8 dw 0
|
||||
|
||||
times 8 dw 2
|
||||
times 8 dw -11
|
||||
times 8 dw 108
|
||||
times 8 dw 36
|
||||
times 8 dw -8
|
||||
times 8 dw 1
|
||||
|
||||
times 8 dw 0
|
||||
times 8 dw -9
|
||||
times 8 dw 93
|
||||
times 8 dw 50
|
||||
times 8 dw -6
|
||||
times 8 dw 0
|
||||
|
||||
times 8 dw 3
|
||||
times 8 dw -16
|
||||
times 8 dw 77
|
||||
times 8 dw 77
|
||||
times 8 dw -16
|
||||
times 8 dw 3
|
||||
|
||||
times 8 dw 0
|
||||
times 8 dw -6
|
||||
times 8 dw 50
|
||||
times 8 dw 93
|
||||
times 8 dw -9
|
||||
times 8 dw 0
|
||||
|
||||
times 8 dw 1
|
||||
times 8 dw -8
|
||||
times 8 dw 36
|
||||
times 8 dw 108
|
||||
times 8 dw -11
|
||||
times 8 dw 2
|
||||
|
||||
times 8 dw 0
|
||||
times 8 dw -1
|
||||
times 8 dw 12
|
||||
times 8 dw 123
|
||||
times 8 dw -6
|
||||
times 8 dw 0
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,625 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "filter_x86.h"
|
||||
|
||||
extern const short vp8_six_tap_mmx[8][6*8];
|
||||
|
||||
extern void vp8_filter_block1d_h6_mmx
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
extern void vp8_filter_block1dc_v6_mmx
|
||||
(
|
||||
unsigned short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
int output_pitch,
|
||||
unsigned int pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
extern void vp8_filter_block1d8_h6_sse2
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
extern void vp8_filter_block1d16_h6_sse2
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
extern void vp8_filter_block1d8_v6_sse2
|
||||
(
|
||||
unsigned short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
int dst_ptich,
|
||||
unsigned int pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
extern void vp8_filter_block1d16_v6_sse2
|
||||
(
|
||||
unsigned short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
int dst_ptich,
|
||||
unsigned int pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
extern void vp8_unpack_block1d16_h6_sse2
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width
|
||||
);
|
||||
extern void vp8_filter_block1d8_h6_only_sse2
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned char *output_ptr,
|
||||
int dst_ptich,
|
||||
unsigned int output_height,
|
||||
const short *vp8_filter
|
||||
);
|
||||
extern void vp8_filter_block1d16_h6_only_sse2
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned char *output_ptr,
|
||||
int dst_ptich,
|
||||
unsigned int output_height,
|
||||
const short *vp8_filter
|
||||
);
|
||||
extern void vp8_filter_block1d8_v6_only_sse2
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned char *output_ptr,
|
||||
int dst_ptich,
|
||||
unsigned int output_height,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
|
||||
#if HAVE_MMX
|
||||
void vp8_sixtap_predict4x4_mmx
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED(16, unsigned short, FData2[16*16]); /* Temp data bufffer used in filtering */
|
||||
const short *HFilter, *VFilter;
|
||||
HFilter = vp8_six_tap_mmx[xoffset];
|
||||
vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 8, HFilter);
|
||||
VFilter = vp8_six_tap_mmx[yoffset];
|
||||
vp8_filter_block1dc_v6_mmx(FData2 + 8, dst_ptr, dst_pitch, 8, 4 , 4, 4, VFilter);
|
||||
|
||||
}
|
||||
|
||||
|
||||
void vp8_sixtap_predict16x16_mmx
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
|
||||
DECLARE_ALIGNED(16, unsigned short, FData2[24*24]); /* Temp data bufffer used in filtering */
|
||||
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
|
||||
HFilter = vp8_six_tap_mmx[xoffset];
|
||||
|
||||
vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 21, 32, HFilter);
|
||||
vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, FData2 + 4, src_pixels_per_line, 1, 21, 32, HFilter);
|
||||
vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 8, FData2 + 8, src_pixels_per_line, 1, 21, 32, HFilter);
|
||||
vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 12, FData2 + 12, src_pixels_per_line, 1, 21, 32, HFilter);
|
||||
|
||||
VFilter = vp8_six_tap_mmx[yoffset];
|
||||
vp8_filter_block1dc_v6_mmx(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, 16, VFilter);
|
||||
vp8_filter_block1dc_v6_mmx(FData2 + 36, dst_ptr + 4, dst_pitch, 32, 16 , 16, 16, VFilter);
|
||||
vp8_filter_block1dc_v6_mmx(FData2 + 40, dst_ptr + 8, dst_pitch, 32, 16 , 16, 16, VFilter);
|
||||
vp8_filter_block1dc_v6_mmx(FData2 + 44, dst_ptr + 12, dst_pitch, 32, 16 , 16, 16, VFilter);
|
||||
|
||||
}
|
||||
|
||||
|
||||
void vp8_sixtap_predict8x8_mmx
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
|
||||
DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */
|
||||
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
HFilter = vp8_six_tap_mmx[xoffset];
|
||||
vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 13, 16, HFilter);
|
||||
vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, FData2 + 4, src_pixels_per_line, 1, 13, 16, HFilter);
|
||||
|
||||
VFilter = vp8_six_tap_mmx[yoffset];
|
||||
vp8_filter_block1dc_v6_mmx(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 8, 8, VFilter);
|
||||
vp8_filter_block1dc_v6_mmx(FData2 + 20, dst_ptr + 4, dst_pitch, 16, 8 , 8, 8, VFilter);
|
||||
|
||||
}
|
||||
|
||||
|
||||
void vp8_sixtap_predict8x4_mmx
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
|
||||
DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */
|
||||
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
HFilter = vp8_six_tap_mmx[xoffset];
|
||||
vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 16, HFilter);
|
||||
vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line) + 4, FData2 + 4, src_pixels_per_line, 1, 9, 16, HFilter);
|
||||
|
||||
VFilter = vp8_six_tap_mmx[yoffset];
|
||||
vp8_filter_block1dc_v6_mmx(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 4, 8, VFilter);
|
||||
vp8_filter_block1dc_v6_mmx(FData2 + 20, dst_ptr + 4, dst_pitch, 16, 8 , 4, 8, VFilter);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
void vp8_bilinear_predict16x16_mmx
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
vp8_bilinear_predict8x8_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pitch);
|
||||
vp8_bilinear_predict8x8_mmx(src_ptr + 8, src_pixels_per_line, xoffset, yoffset, dst_ptr + 8, dst_pitch);
|
||||
vp8_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line, src_pixels_per_line, xoffset, yoffset, dst_ptr + dst_pitch * 8, dst_pitch);
|
||||
vp8_bilinear_predict8x8_mmx(src_ptr + 8 * src_pixels_per_line + 8, src_pixels_per_line, xoffset, yoffset, dst_ptr + dst_pitch * 8 + 8, dst_pitch);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#if HAVE_SSE2
|
||||
void vp8_sixtap_predict16x16_sse2
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED(16, unsigned short, FData2[24*24]); /* Temp data bufffer used in filtering */
|
||||
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
if (xoffset)
|
||||
{
|
||||
if (yoffset)
|
||||
{
|
||||
HFilter = vp8_six_tap_mmx[xoffset];
|
||||
vp8_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 21, 32, HFilter);
|
||||
VFilter = vp8_six_tap_mmx[yoffset];
|
||||
vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* First-pass only */
|
||||
HFilter = vp8_six_tap_mmx[xoffset];
|
||||
vp8_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, HFilter);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Second-pass only */
|
||||
VFilter = vp8_six_tap_mmx[yoffset];
|
||||
vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 21, 32);
|
||||
vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8_sixtap_predict8x8_sse2
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
if (xoffset)
|
||||
{
|
||||
if (yoffset)
|
||||
{
|
||||
HFilter = vp8_six_tap_mmx[xoffset];
|
||||
vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 13, 16, HFilter);
|
||||
VFilter = vp8_six_tap_mmx[yoffset];
|
||||
vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 8, dst_pitch, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* First-pass only */
|
||||
HFilter = vp8_six_tap_mmx[xoffset];
|
||||
vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, HFilter);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Second-pass only */
|
||||
VFilter = vp8_six_tap_mmx[yoffset];
|
||||
vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, VFilter);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8_sixtap_predict8x4_sse2
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */
|
||||
const short *HFilter, *VFilter;
|
||||
|
||||
if (xoffset)
|
||||
{
|
||||
if (yoffset)
|
||||
{
|
||||
HFilter = vp8_six_tap_mmx[xoffset];
|
||||
vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 16, HFilter);
|
||||
VFilter = vp8_six_tap_mmx[yoffset];
|
||||
vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 4, dst_pitch, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* First-pass only */
|
||||
HFilter = vp8_six_tap_mmx[xoffset];
|
||||
vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, HFilter);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Second-pass only */
|
||||
VFilter = vp8_six_tap_mmx[yoffset];
|
||||
vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, VFilter);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if HAVE_SSSE3
|
||||
|
||||
extern void vp8_filter_block1d8_h6_ssse3
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int output_pitch,
|
||||
unsigned int output_height,
|
||||
unsigned int vp8_filter_index
|
||||
);
|
||||
|
||||
extern void vp8_filter_block1d16_h6_ssse3
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int output_pitch,
|
||||
unsigned int output_height,
|
||||
unsigned int vp8_filter_index
|
||||
);
|
||||
|
||||
extern void vp8_filter_block1d16_v6_ssse3
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int out_pitch,
|
||||
unsigned int output_height,
|
||||
unsigned int vp8_filter_index
|
||||
);
|
||||
|
||||
extern void vp8_filter_block1d8_v6_ssse3
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int out_pitch,
|
||||
unsigned int output_height,
|
||||
unsigned int vp8_filter_index
|
||||
);
|
||||
|
||||
extern void vp8_filter_block1d4_h6_ssse3
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int output_pitch,
|
||||
unsigned int output_height,
|
||||
unsigned int vp8_filter_index
|
||||
);
|
||||
|
||||
extern void vp8_filter_block1d4_v6_ssse3
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int out_pitch,
|
||||
unsigned int output_height,
|
||||
unsigned int vp8_filter_index
|
||||
);
|
||||
|
||||
void vp8_sixtap_predict16x16_ssse3
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED(16, unsigned char, FData2[24*24]);
|
||||
|
||||
if (xoffset)
|
||||
{
|
||||
if (yoffset)
|
||||
{
|
||||
vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
|
||||
src_pixels_per_line, FData2,
|
||||
16, 21, xoffset);
|
||||
vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch,
|
||||
16, yoffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* First-pass only */
|
||||
vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pitch, 16, xoffset);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (yoffset)
|
||||
{
|
||||
/* Second-pass only */
|
||||
vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
|
||||
src_pixels_per_line,
|
||||
dst_ptr, dst_pitch, 16, yoffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* ssse3 second-pass only function couldn't handle (xoffset==0 &&
|
||||
* yoffset==0) case correctly. Add copy function here to guarantee
|
||||
* six-tap function handles all possible offsets. */
|
||||
vp8_copy_mem16x16(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict8x8_ssse3
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED(16, unsigned char, FData2[256]);
|
||||
|
||||
if (xoffset)
|
||||
{
|
||||
if (yoffset)
|
||||
{
|
||||
vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
|
||||
src_pixels_per_line, FData2,
|
||||
8, 13, xoffset);
|
||||
vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch,
|
||||
8, yoffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pitch, 8, xoffset);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (yoffset)
|
||||
{
|
||||
/* Second-pass only */
|
||||
vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
|
||||
src_pixels_per_line,
|
||||
dst_ptr, dst_pitch, 8, yoffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* ssse3 second-pass only function couldn't handle (xoffset==0 &&
|
||||
* yoffset==0) case correctly. Add copy function here to guarantee
|
||||
* six-tap function handles all possible offsets. */
|
||||
vp8_copy_mem8x8(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8_sixtap_predict8x4_ssse3
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED(16, unsigned char, FData2[256]);
|
||||
|
||||
if (xoffset)
|
||||
{
|
||||
if (yoffset)
|
||||
{
|
||||
vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
|
||||
src_pixels_per_line, FData2,
|
||||
8, 9, xoffset);
|
||||
vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch,
|
||||
4, yoffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* First-pass only */
|
||||
vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pitch, 4, xoffset);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (yoffset)
|
||||
{
|
||||
/* Second-pass only */
|
||||
vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
|
||||
src_pixels_per_line,
|
||||
dst_ptr, dst_pitch, 4, yoffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* ssse3 second-pass only function couldn't handle (xoffset==0 &&
|
||||
* yoffset==0) case correctly. Add copy function here to guarantee
|
||||
* six-tap function handles all possible offsets. */
|
||||
vp8_copy_mem8x4(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict4x4_ssse3
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED(16, unsigned char, FData2[4*9]);
|
||||
|
||||
if (xoffset)
|
||||
{
|
||||
if (yoffset)
|
||||
{
|
||||
vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
|
||||
src_pixels_per_line,
|
||||
FData2, 4, 9, xoffset);
|
||||
vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch,
|
||||
4, yoffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pitch, 4, xoffset);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (yoffset)
|
||||
{
|
||||
vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
|
||||
src_pixels_per_line,
|
||||
dst_ptr, dst_pitch, 4, yoffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* ssse3 second-pass only function couldn't handle (xoffset==0 &&
|
||||
* yoffset==0) case correctly. Add copy function here to guarantee
|
||||
* six-tap function handles all possible offsets. */
|
||||
int r;
|
||||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
dst_ptr[0] = src_ptr[0];
|
||||
dst_ptr[1] = src_ptr[1];
|
||||
dst_ptr[2] = src_ptr[2];
|
||||
dst_ptr[3] = src_ptr[3];
|
||||
dst_ptr += dst_pitch;
|
||||
src_ptr += src_pixels_per_line;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue