Merge pull request #81077 from akien-mga/3.5-cherrypicks

Cherry-picks for the 3.5 branch (future 3.5.3) - 2nd batch
This commit is contained in:
Rémi Verschelde 2023-08-29 09:58:34 +02:00 committed by GitHub
commit 08f4e2b835
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
278 changed files with 38503 additions and 37112 deletions

4
.gitignore vendored
View File

@ -62,6 +62,10 @@ tests/data/*.translation
# Binutils tmp linker output of the form "stXXXXXX" where "X" is alphanumeric
st[A-Za-z0-9][A-Za-z0-9][A-Za-z0-9][A-Za-z0-9][A-Za-z0-9][A-Za-z0-9]
# Python development
.venv
venv
# Python generated
__pycache__/
*.pyc

View File

@ -117,16 +117,25 @@ bool Array::operator==(const Array &p_array) const {
}
uint32_t Array::hash() const {
return recursive_hash(0);
}
uint32_t Array::recursive_hash(int p_recursion_count) const {
ERR_FAIL_COND_V_MSG(p_recursion_count > MAX_RECURSION, 0, "Max recursion reached");
p_recursion_count++;
uint32_t h = hash_djb2_one_32(0);
for (int i = 0; i < _p->array.size(); i++) {
h = hash_djb2_one_32(_p->array[i].hash(), h);
h = hash_djb2_one_32(_p->array[i].recursive_hash(p_recursion_count), h);
}
return h;
}
void Array::operator=(const Array &p_array) {
_ref(p_array);
}
void Array::push_back(const Variant &p_value) {
_p->array.push_back(p_value);
}

View File

@ -60,6 +60,7 @@ public:
bool operator==(const Array &p_array) const;
uint32_t hash() const;
uint32_t recursive_hash(int p_recursion_count) const;
void operator=(const Array &p_array);
void push_back(const Variant &p_value);

View File

@ -212,12 +212,20 @@ void Dictionary::_unref() const {
}
_p = nullptr;
}
uint32_t Dictionary::hash() const {
return recursive_hash(0);
}
uint32_t Dictionary::recursive_hash(int p_recursion_count) const {
ERR_FAIL_COND_V_MSG(p_recursion_count > MAX_RECURSION, 0, "Max recursion reached");
p_recursion_count++;
uint32_t h = hash_djb2_one_32(Variant::DICTIONARY);
for (OrderedHashMap<Variant, Variant, VariantHasher, VariantComparator>::Element E = _p->variant_map.front(); E; E = E.next()) {
h = hash_djb2_one_32(E.key().hash(), h);
h = hash_djb2_one_32(E.value().hash(), h);
h = hash_djb2_one_32(E.key().recursive_hash(p_recursion_count), h);
h = hash_djb2_one_32(E.value().recursive_hash(p_recursion_count), h);
}
return h;

View File

@ -74,6 +74,7 @@ public:
bool operator!=(const Dictionary &p_dictionary) const;
uint32_t hash() const;
uint32_t recursive_hash(int p_recursion_count) const;
void operator=(const Dictionary &p_dictionary);
const Variant *next(const Variant *p_key = nullptr) const;

View File

@ -148,7 +148,7 @@ uint64_t FileAccessMemory::get_buffer(uint8_t *p_dst, uint64_t p_length) const {
}
memcpy(p_dst, &data[pos], read);
pos += p_length;
pos += read;
return read;
}
@ -176,7 +176,7 @@ void FileAccessMemory::store_buffer(const uint8_t *p_src, uint64_t p_length) {
}
memcpy(&data[pos], p_src, write);
pos += p_length;
pos += write;
}
FileAccessMemory::FileAccessMemory() {

View File

@ -2508,6 +2508,10 @@ Variant::~Variant() {
}*/
uint32_t Variant::hash() const {
return recursive_hash(0);
}
uint32_t Variant::recursive_hash(int p_recursion_count) const {
switch (type) {
case NIL: {
return 0;
@ -2622,13 +2626,10 @@ uint32_t Variant::hash() const {
return reinterpret_cast<const NodePath *>(_data._mem)->hash();
} break;
case DICTIONARY: {
return reinterpret_cast<const Dictionary *>(_data._mem)->hash();
return reinterpret_cast<const Dictionary *>(_data._mem)->recursive_hash(p_recursion_count);
} break;
case ARRAY: {
const Array &arr = *reinterpret_cast<const Array *>(_data._mem);
return arr.hash();
return reinterpret_cast<const Array *>(_data._mem)->recursive_hash(p_recursion_count);
} break;
case POOL_BYTE_ARRAY: {
const PoolVector<uint8_t> &arr = *reinterpret_cast<const PoolVector<uint8_t> *>(_data._mem);

View File

@ -413,6 +413,7 @@ public:
bool operator!=(const Variant &p_variant) const;
bool operator<(const Variant &p_variant) const;
uint32_t hash() const;
uint32_t recursive_hash(int p_recursion_count) const;
bool hash_compare(const Variant &p_variant) const;
bool booleanize() const;

View File

@ -40,13 +40,13 @@
// Defines the main "branch" version. Patch versions in this branch should be
// forward-compatible.
// Example: "3.1"
#define VERSION_BRANCH "" _MKSTR(VERSION_MAJOR) "." _MKSTR(VERSION_MINOR)
#define VERSION_BRANCH _MKSTR(VERSION_MAJOR) "." _MKSTR(VERSION_MINOR)
#if VERSION_PATCH
// Example: "3.1.4"
#define VERSION_NUMBER "" VERSION_BRANCH "." _MKSTR(VERSION_PATCH)
#define VERSION_NUMBER VERSION_BRANCH "." _MKSTR(VERSION_PATCH)
#else // patch is 0, we don't include it in the "pretty" version number.
// Example: "3.1" instead of "3.1.0"
#define VERSION_NUMBER "" VERSION_BRANCH
#define VERSION_NUMBER VERSION_BRANCH
#endif // VERSION_PATCH
// Version number encoded as hexadecimal int with one byte for each number,
@ -57,16 +57,16 @@
// Describes the full configuration of that Godot version, including the version number,
// the status (beta, stable, etc.) and potential module-specific features (e.g. mono).
// Example: "3.1.4.stable.mono"
#define VERSION_FULL_CONFIG "" VERSION_NUMBER "." VERSION_STATUS VERSION_MODULE_CONFIG
#define VERSION_FULL_CONFIG VERSION_NUMBER "." VERSION_STATUS VERSION_MODULE_CONFIG
// Similar to VERSION_FULL_CONFIG, but also includes the (potentially custom) VERSION_BUILD
// description (e.g. official, custom_build, etc.).
// Example: "3.1.4.stable.mono.official"
#define VERSION_FULL_BUILD "" VERSION_FULL_CONFIG "." VERSION_BUILD
#define VERSION_FULL_BUILD VERSION_FULL_CONFIG "." VERSION_BUILD
// Same as above, but prepended with Godot's name and a cosmetic "v" for "version".
// Example: "Godot v3.1.4.stable.official.mono"
#define VERSION_FULL_NAME "" VERSION_NAME " v" VERSION_FULL_BUILD
#define VERSION_FULL_NAME VERSION_NAME " v" VERSION_FULL_BUILD
// Git commit hash, generated at build time in `core/version_hash.gen.cpp`.
extern const char *const VERSION_HASH;

View File

@ -166,7 +166,7 @@
The desired frames per second. If the hardware cannot keep up, this setting may not be respected. A value of 0 means no limit.
</member>
<member name="time_scale" type="float" setter="set_time_scale" getter="get_time_scale" default="1.0">
Controls how fast or slow the in-game clock ticks versus the real life one. It defaults to 1.0. A value of 2.0 means the game moves twice as fast as real life, whilst a value of 0.5 means the game moves at half the regular speed.
Controls how fast or slow the in-game clock ticks versus the real life one. It defaults to 1.0. A value of 2.0 means the game moves twice as fast as real life, whilst a value of 0.5 means the game moves at half the regular speed. This also affects [Timer] and [SceneTreeTimer] (see [method SceneTree.create_timer] for how to control this).
</member>
</members>
<constants>

View File

@ -145,14 +145,14 @@
<return type="String" />
<argument index="0" name="device" type="int" />
<description>
Returns a SDL2-compatible device GUID on platforms that use gamepad remapping. Returns [code]"Default Gamepad"[/code] otherwise.
Returns a SDL2-compatible device GUID on platforms that use gamepad remapping, e.g. [code]030000004c050000c405000000010000[/code]. Returns [code]"Default Gamepad"[/code] otherwise. Godot uses the [url=https://github.com/gabomdq/SDL_GameControllerDB]SDL2 game controller database[/url] to determine gamepad names and mappings based on this GUID.
</description>
</method>
<method name="get_joy_name">
<return type="String" />
<argument index="0" name="device" type="int" />
<description>
Returns the name of the joypad at the specified device index.
Returns the name of the joypad at the specified device index, e.g. [code]PS4 Controller[/code]. Godot uses the [url=https://github.com/gabomdq/SDL_GameControllerDB]SDL2 game controller database[/url] to determine gamepad names.
</description>
</method>
<method name="get_joy_vibration_duration">
@ -318,7 +318,7 @@
<argument index="2" name="hotspot" type="Vector2" default="Vector2( 0, 0 )" />
<description>
Sets a custom mouse cursor image, which is only visible inside the game window. The hotspot can also be specified. Passing [code]null[/code] to the image parameter resets to the system cursor. See [enum CursorShape] for the list of shapes.
[code]image[/code]'s size must be lower than 256×256.
[code]image[/code]'s size must be lower than or equal to 256×256. To avoid rendering issues, sizes lower than or equal to 128×128 are recommended.
[code]hotspot[/code] must be within [code]image[/code]'s size.
[b]Note:[/b] [AnimatedTexture]s aren't supported as custom mouse cursors. If using an [AnimatedTexture], only the first frame will be displayed.
[b]Note:[/b] Only images imported with the [b]Lossless[/b], [b]Lossy[/b] or [b]Uncompressed[/b] compression modes are supported. The [b]Video RAM[/b] compression mode can't be used for custom cursors.

View File

@ -47,12 +47,22 @@
</member>
<member name="scroll_horizontal" type="int" setter="set_h_scroll" getter="get_h_scroll" default="0">
The current horizontal scroll value.
[b]Note:[/b] If you are setting this value in the [method Node._ready] function or earlier, it needs to be wrapped with [method Object.set_deferred], since scroll bar's [member Range.max_value] is not initialized yet.
[codeblock]
func _ready():
set_deferred("scroll_horizontal", 600)
[/codeblock]
</member>
<member name="scroll_horizontal_enabled" type="bool" setter="set_enable_h_scroll" getter="is_h_scroll_enabled" default="true">
If [code]true[/code], enables horizontal scrolling.
</member>
<member name="scroll_vertical" type="int" setter="set_v_scroll" getter="get_v_scroll" default="0">
The current vertical scroll value.
[b]Note:[/b] Setting it early needs to be deferred, just like in [member scroll_horizontal].
[codeblock]
func _ready():
set_deferred("scroll_vertical", 600)
[/codeblock]
</member>
<member name="scroll_vertical_enabled" type="bool" setter="set_enable_v_scroll" getter="is_v_scroll_enabled" default="true">
If [code]true[/code], enables vertical scrolling.

View File

@ -5,6 +5,7 @@
</brief_description>
<description>
Counts down a specified interval and emits a signal on reaching 0. Can be set to repeat or "one-shot" mode.
[b]Note:[/b] Timers are affected by [member Engine.time_scale], a higher scale means quicker timeouts, and vice versa.
[b]Note:[/b] To create a one-shot timer without instantiating a node, use [method SceneTree.create_timer].
</description>
<tutorials>
@ -52,7 +53,7 @@
</member>
<member name="wait_time" type="float" setter="set_wait_time" getter="get_wait_time" default="1.0">
The wait time in seconds.
[b]Note:[/b] Timers can only emit once per rendered frame at most (or once per physics frame if [member process_mode] is [constant TIMER_PROCESS_PHYSICS]). This means very low wait times (lower than 0.05 seconds) will behave in significantly different ways depending on the rendered framerate. For very low wait times, it is recommended to use a process loop in a script instead of using a Timer node.
[b]Note:[/b] Timers can only emit once per rendered frame at most (or once per physics frame if [member process_mode] is [constant TIMER_PROCESS_PHYSICS]). This means very low wait times (lower than 0.05 seconds) will behave in significantly different ways depending on the rendered framerate. For very low wait times, it is recommended to use a process loop in a script instead of using a Timer node. Timers are affected by [member Engine.time_scale], a higher scale means quicker timeouts, and vice versa.
</member>
</members>
<signals>

View File

@ -317,6 +317,14 @@ Error AudioDriverWASAPI::init_render_device(bool reinit) {
return err;
switch (audio_output.channels) {
case 1: // Mono
case 3: // Surround 2.1
case 5: // Surround 5.0
case 7: // Surround 7.0
// We will downmix as required.
channels = audio_output.channels + 1;
break;
case 2: // Stereo
case 4: // Surround 3.1
case 6: // Surround 5.1
@ -343,7 +351,7 @@ Error AudioDriverWASAPI::init_render_device(bool reinit) {
input_position = 0;
input_size = 0;
print_verbose("WASAPI: detected " + itos(channels) + " channels");
print_verbose("WASAPI: detected " + itos(audio_output.channels) + " channels");
print_verbose("WASAPI: audio buffer frames: " + itos(buffer_frames) + " calculated latency: " + itos(buffer_frames * 1000 / mix_rate) + "ms");
return OK;
@ -584,6 +592,19 @@ void AudioDriverWASAPI::thread_func(void *p_udata) {
for (unsigned int i = 0; i < write_frames * ad->channels; i++) {
ad->write_sample(ad->audio_output.format_tag, ad->audio_output.bits_per_sample, buffer, i, ad->samples_in.write[write_ofs++]);
}
} else if (ad->channels == ad->audio_output.channels + 1) {
// Pass all channels except the last two as-is, and then mix the last two
// together as one channel. E.g. stereo -> mono, or 3.1 -> 2.1.
unsigned int last_chan = ad->audio_output.channels - 1;
for (unsigned int i = 0; i < write_frames; i++) {
for (unsigned int j = 0; j < last_chan; j++) {
ad->write_sample(ad->audio_output.format_tag, ad->audio_output.bits_per_sample, buffer, i * ad->audio_output.channels + j, ad->samples_in.write[write_ofs++]);
}
int32_t l = ad->samples_in.write[write_ofs++];
int32_t r = ad->samples_in.write[write_ofs++];
int32_t c = (int32_t)(((int64_t)l + (int64_t)r) / 2);
ad->write_sample(ad->audio_output.format_tag, ad->audio_output.bits_per_sample, buffer, i * ad->audio_output.channels + last_chan, c);
}
} else {
for (unsigned int i = 0; i < write_frames; i++) {
for (unsigned int j = 0; j < MIN(ad->channels, ad->audio_output.channels); j++) {

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 13 KiB

After

Width:  |  Height:  |  Size: 13 KiB

View File

@ -229,8 +229,8 @@ void MeshLibraryEditor::_menu_cbk(int p_option) {
} break;
case MENU_OPTION_REMOVE_ITEM: {
String p = editor->get_inspector()->get_selected_path();
if (p.begins_with("/MeshLibrary/item") && p.get_slice_count("/") >= 3) {
to_erase = p.get_slice("/", 3).to_int();
if (p.begins_with("item") && p.get_slice_count("/") >= 2) {
to_erase = p.get_slice("/", 1).to_int();
cd_remove->set_text(vformat(TTR("Remove item %d?"), to_erase));
cd_remove->popup_centered(Size2(300, 60));
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 18 KiB

After

Width:  |  Height:  |  Size: 25 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 41 KiB

View File

@ -282,8 +282,9 @@ void InputDefault::joy_connection_changed(int p_idx, bool p_connected, String p_
};
joy_names[p_idx] = js;
emit_signal("joy_connection_changed", p_idx, p_connected);
};
// Ensure this signal is emitted on the main thread, as some platforms (e.g. Linux) call this from a different thread.
call_deferred("emit_signal", "joy_connection_changed", p_idx, p_connected);
}
Vector3 InputDefault::get_gravity() const {
_THREAD_SAFE_METHOD_

View File

@ -751,7 +751,7 @@ def generate_vs_project(env, num_jobs):
for plat_id in ModuleConfigs.PLATFORM_IDS
]
self.arg_dict["cpppaths"] += ModuleConfigs.for_every_variant(env["CPPPATH"] + [includes])
self.arg_dict["cppdefines"] += ModuleConfigs.for_every_variant(env["CPPDEFINES"] + defines)
self.arg_dict["cppdefines"] += ModuleConfigs.for_every_variant(list(env["CPPDEFINES"]) + defines)
self.arg_dict["cmdargs"] += ModuleConfigs.for_every_variant(cli_args)
def build_commandline(self, commands):

View File

@ -556,8 +556,8 @@
<return type="float" />
<argument index="0" name="s" type="float" />
<description>
Natural logarithm. The amount of time needed to reach a certain level of continuous growth.
[b]Note:[/b] This is not the same as the "log" function on most calculators, which uses a base 10 logarithm.
Returns the [url=https://en.wikipedia.org/wiki/Natural_logarithm]natural logarithm[/url] of [code]s[/code] (base [url=https://en.wikipedia.org/wiki/E_(mathematical_constant)][i]e[/i][/url], with [i]e[/i] being approximately 2.71828). This is the amount of time needed to reach a certain level of continuous growth.
[b]Note:[/b] This is not the same as the "log" function on most calculators, which uses a base 10 logarithm. To use base 10 logarithm, use [code]log(x) / log(10)[/code].
[codeblock]
log(10) # Returns 2.302585
[/codeblock]

View File

@ -13,6 +13,7 @@ if env["builtin_libwebp"]:
thirdparty_dir = "#thirdparty/libwebp/"
thirdparty_sources = [
"sharpyuv/sharpyuv.c",
"sharpyuv/sharpyuv_cpu.c",
"sharpyuv/sharpyuv_csp.c",
"sharpyuv/sharpyuv_dsp.c",
"sharpyuv/sharpyuv_gamma.c",

View File

@ -334,13 +334,13 @@ static const LoadingScreenInfo loading_screen_infos[] = {
{ PNAME("landscape_launch_screens/ipad_1024x768"), "Default-Landscape.png", 1024, 768, false },
{ PNAME("landscape_launch_screens/ipad_2048x1536"), "Default-Landscape@2x.png", 2048, 1536, false },
{ PNAME("portrait_launch_screens/iphone_640x960"), "Default-480h@2x.png", 640, 960, true },
{ PNAME("portrait_launch_screens/iphone_640x1136"), "Default-568h@2x.png", 640, 1136, true },
{ PNAME("portrait_launch_screens/iphone_750x1334"), "Default-667h@2x.png", 750, 1334, true },
{ PNAME("portrait_launch_screens/iphone_1125x2436"), "Default-Portrait-X.png", 1125, 2436, true },
{ PNAME("portrait_launch_screens/ipad_768x1024"), "Default-Portrait.png", 768, 1024, true },
{ PNAME("portrait_launch_screens/ipad_1536x2048"), "Default-Portrait@2x.png", 1536, 2048, true },
{ PNAME("portrait_launch_screens/iphone_1242x2208"), "Default-Portrait-736h@3x.png", 1242, 2208, true }
{ PNAME("portrait_launch_screens/iphone_640x960"), "Default-480h@2x.png", 640, 960, false },
{ PNAME("portrait_launch_screens/iphone_640x1136"), "Default-568h@2x.png", 640, 1136, false },
{ PNAME("portrait_launch_screens/iphone_750x1334"), "Default-667h@2x.png", 750, 1334, false },
{ PNAME("portrait_launch_screens/iphone_1125x2436"), "Default-Portrait-X.png", 1125, 2436, false },
{ PNAME("portrait_launch_screens/ipad_768x1024"), "Default-Portrait.png", 768, 1024, false },
{ PNAME("portrait_launch_screens/ipad_1536x2048"), "Default-Portrait@2x.png", 1536, 2048, false },
{ PNAME("portrait_launch_screens/iphone_1242x2208"), "Default-Portrait-736h@3x.png", 1242, 2208, false }
};
void EditorExportPlatformIOS::get_export_options(List<ExportOption> *r_options) {

View File

@ -216,6 +216,9 @@ def configure_msvc(env, manual_msvc_config):
else:
env.AppendUnique(CCFLAGS=["/MD"])
# MSVC incremental linking is broken and _increases_ link time (GH-77968).
env.Append(LINKFLAGS=["/INCREMENTAL:NO"])
env.AppendUnique(CCFLAGS=["/Gd", "/GR", "/nologo"])
env.AppendUnique(CCFLAGS=["/utf-8"]) # Force to use Unicode encoding.
env.AppendUnique(CXXFLAGS=["/TP"]) # assume all sources are C++

View File

@ -100,11 +100,25 @@ bool JoypadWindows::is_xinput_device(const GUID *p_guid) {
static GUID IID_X360WiredGamepad = { MAKELONG(0x045E, 0x02A1), 0x0000, 0x0000, { 0x00, 0x00, 0x50, 0x49, 0x44, 0x56, 0x49, 0x44 } };
static GUID IID_X360WirelessGamepad = { MAKELONG(0x045E, 0x028E), 0x0000, 0x0000, { 0x00, 0x00, 0x50, 0x49, 0x44, 0x56, 0x49, 0x44 } };
static GUID IID_XSWirelessGamepad = { MAKELONG(0x045E, 0x0B13), 0x0000, 0x0000, { 0x00, 0x00, 0x50, 0x49, 0x44, 0x56, 0x49, 0x44 } };
static GUID IID_XEliteWirelessGamepad = { MAKELONG(0x045E, 0x0B05), 0x0000, 0x0000, { 0x00, 0x00, 0x50, 0x49, 0x44, 0x56, 0x49, 0x44 } };
static GUID IID_XOneWiredGamepad = { MAKELONG(0x045E, 0x02FF), 0x0000, 0x0000, { 0x00, 0x00, 0x50, 0x49, 0x44, 0x56, 0x49, 0x44 } };
static GUID IID_XOneWirelessGamepad = { MAKELONG(0x045E, 0x02DD), 0x0000, 0x0000, { 0x00, 0x00, 0x50, 0x49, 0x44, 0x56, 0x49, 0x44 } };
static GUID IID_XOneNewWirelessGamepad = { MAKELONG(0x045E, 0x02D1), 0x0000, 0x0000, { 0x00, 0x00, 0x50, 0x49, 0x44, 0x56, 0x49, 0x44 } };
static GUID IID_XOneSWirelessGamepad = { MAKELONG(0x045E, 0x02EA), 0x0000, 0x0000, { 0x00, 0x00, 0x50, 0x49, 0x44, 0x56, 0x49, 0x44 } };
static GUID IID_XOneSBluetoothGamepad = { MAKELONG(0x045E, 0x02E0), 0x0000, 0x0000, { 0x00, 0x00, 0x50, 0x49, 0x44, 0x56, 0x49, 0x44 } };
static GUID IID_XOneEliteWirelessGamepad = { MAKELONG(0x045E, 0x02E3), 0x0000, 0x0000, { 0x00, 0x00, 0x50, 0x49, 0x44, 0x56, 0x49, 0x44 } };
if (memcmp(p_guid, &IID_ValveStreamingGamepad, sizeof(*p_guid)) == 0 ||
memcmp(p_guid, &IID_X360WiredGamepad, sizeof(*p_guid)) == 0 ||
memcmp(p_guid, &IID_X360WirelessGamepad, sizeof(*p_guid)) == 0 ||
memcmp(p_guid, &IID_XSWirelessGamepad, sizeof(*p_guid)) == 0)
memcmp(p_guid, &IID_XSWirelessGamepad, sizeof(*p_guid)) == 0 ||
memcmp(p_guid, &IID_XEliteWirelessGamepad, sizeof(*p_guid)) == 0 ||
memcmp(p_guid, &IID_XOneWiredGamepad, sizeof(*p_guid)) == 0 ||
memcmp(p_guid, &IID_XOneWirelessGamepad, sizeof(*p_guid)) == 0 ||
memcmp(p_guid, &IID_XOneNewWirelessGamepad, sizeof(*p_guid)) == 0 ||
memcmp(p_guid, &IID_XOneSWirelessGamepad, sizeof(*p_guid)) == 0 ||
memcmp(p_guid, &IID_XOneSBluetoothGamepad, sizeof(*p_guid)) == 0 ||
memcmp(p_guid, &IID_XOneEliteWirelessGamepad, sizeof(*p_guid)) == 0)
return true;
PRAWINPUTDEVICELIST dev_list = NULL;

View File

@ -79,6 +79,7 @@ void JoypadLinux::Joypad::reset() {
events.clear();
}
#ifdef UDEV_ENABLED
// This function is derived from SDL:
// https://github.com/libsdl-org/SDL/blob/main/src/core/linux/SDL_sandbox.c#L28-L45
static bool detect_sandbox() {
@ -98,6 +99,7 @@ static bool detect_sandbox() {
return false;
}
#endif // UDEV_ENABLED
JoypadLinux::JoypadLinux(InputDefault *in) {
#ifdef UDEV_ENABLED

View File

@ -62,6 +62,7 @@ Rect2 BackBufferCopy::get_anchorable_rect() const {
void BackBufferCopy::set_rect(const Rect2 &p_rect) {
rect = p_rect;
_update_copy_mode();
item_rect_changed();
}
Rect2 BackBufferCopy::get_rect() const {

View File

@ -54,12 +54,14 @@ void ScrollBar::_gui_input(Ref<InputEvent> p_event) {
accept_event();
if (b->get_button_index() == BUTTON_WHEEL_DOWN && b->is_pressed()) {
set_value(get_value() + get_page() / 4.0);
double change = get_page() != 0.0 ? get_page() / 4.0 : (get_max() - get_min()) / 16.0;
set_value(get_value() + MAX(change, get_step()));
accept_event();
}
if (b->get_button_index() == BUTTON_WHEEL_UP && b->is_pressed()) {
set_value(get_value() - get_page() / 4.0);
double change = get_page() != 0.0 ? get_page() / 4.0 : (get_max() - get_min()) / 16.0;
set_value(get_value() - MAX(change, get_step()));
accept_event();
}
@ -98,7 +100,8 @@ void ScrollBar::_gui_input(Ref<InputEvent> p_event) {
if (scrolling) {
target_scroll = CLAMP(target_scroll - get_page(), get_min(), get_max() - get_page());
} else {
target_scroll = CLAMP(get_value() - get_page(), get_min(), get_max() - get_page());
double change = get_page() != 0.0 ? get_page() : (get_max() - get_min()) / 16.0;
target_scroll = CLAMP(get_value() - change, get_min(), get_max() - get_page());
}
if (smooth_scroll_enabled) {
@ -121,7 +124,8 @@ void ScrollBar::_gui_input(Ref<InputEvent> p_event) {
if (scrolling) {
target_scroll = CLAMP(target_scroll + get_page(), get_min(), get_max() - get_page());
} else {
target_scroll = CLAMP(get_value() + get_page(), get_min(), get_max() - get_page());
double change = get_page() != 0.0 ? get_page() : (get_max() - get_min()) / 16.0;
target_scroll = CLAMP(get_value() + change, get_min(), get_max() - get_page());
}
if (smooth_scroll_enabled) {

10
thirdparty/README.md vendored
View File

@ -35,7 +35,7 @@ Includes some patches in the `patches` folder which have been sent upstream.
## certs
- Upstream: Mozilla, via https://github.com/bagder/ca-bundle
- Version: git (b2f7415648411b6fd7c298c6c92d6552f0165f60, 2022)
- Version: git (3aaca635bad074a0ce5c15fa8aa0dff47f5c639a, 2023)
- License: MPL 2.0
@ -267,7 +267,7 @@ from the Android NDK r18.
## libwebp
- Upstream: https://chromium.googlesource.com/webm/libwebp/
- Version: 1.2.4 (0d1f12546bd803099a60c070517a552483f3790e, 2022)
- Version: 1.3.1 (fd7bb21c0cb56e8a82e9bfa376164b842f433f3b, 2023)
- License: BSD-3-Clause
Files extracted from upstream source:
@ -279,7 +279,7 @@ Files extracted from upstream source:
## mbedtls
- Upstream: https://github.com/Mbed-TLS/mbedtls
- Version: 2.18.2 (89f040a5c938985c5f30728baed21e49d0846a53, 2022)
- Version: 2.28.4 (aeb97a18913a86f051afab11b2c92c6be0c2eb83, 2023)
- License: Apache 2.0
File extracted from upstream release tarball:
@ -287,7 +287,7 @@ File extracted from upstream release tarball:
- All `*.h` from `include/mbedtls/` to `thirdparty/mbedtls/include/mbedtls/` except `config_psa.h` and `psa_util.h`.
- All `*.c` and `*.h` from `library/` to `thirdparty/mbedtls/library/` except those starting with `psa_*`.
- The `LICENSE` file.
- Applied the patch in `patches/1453.diff` (upstream PR:
- Applied the patch in `patches/1453.diff` to fix UWP build (upstream PR:
https://github.com/ARMmbed/mbedtls/pull/1453).
Applied the patch in `patches/windows-arm64-hardclock.diff`
- Added 2 files `godot_core_mbedtls_platform.c` and `godot_core_mbedtls_config.h`
@ -538,7 +538,7 @@ comments and a patch is provided in the squish/ folder.
## tinyexr
- Upstream: https://github.com/syoyo/tinyexr
- Version: 1.0.1 (67010eae802211202d0797f4df2b809f4ba7442c, 2021)
- Version: 1.0.5 (3627ab3060592468d49547b4cdf5353e9e2b50dc, 2023)
- License: BSD-3-Clause
Files extracted from upstream source:

View File

@ -1,7 +1,7 @@
##
## Bundle of CA Root Certificates
##
## Certificate data from Mozilla as of: Fri Oct 21 16:14:43 2022 GMT
## Certificate data from Mozilla as of: Fri Jun 2 21:47:52 2023 GMT
##
## This is a bundle of X.509 certificates of public Certificate Authorities
## (CA). These were automatically extracted from Mozilla's root certificates
@ -14,7 +14,7 @@
## Just configure this file as the SSLCACertificateFile.
##
## Conversion done with mk-ca-bundle.pl version 1.29.
## SHA256: 3ff8bd209b5f2e739b9f2b96eacb694a774114685b02978257824f37ff528f71
## SHA256: c47475103fb05bb562bbadff0d1e72346b03236154e1448a6ca191b740f83507
##
@ -489,29 +489,6 @@ IGfE7vmLV2H0knZ9P4SNVbfo5azV8fUZVqZa+5Acr5Pr5RzUZ5ddBA6+C4OmF4O5MBKgxTMVBbkN
+8cFduPYSo38NBejxiEovjBFMR7HeL5YYTisO+IBZQ==
-----END CERTIFICATE-----
Network Solutions Certificate Authority
=======================================
-----BEGIN CERTIFICATE-----
MIID5jCCAs6gAwIBAgIQV8szb8JcFuZHFhfjkDFo4DANBgkqhkiG9w0BAQUFADBiMQswCQYDVQQG
EwJVUzEhMB8GA1UEChMYTmV0d29yayBTb2x1dGlvbnMgTC5MLkMuMTAwLgYDVQQDEydOZXR3b3Jr
IFNvbHV0aW9ucyBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkwHhcNMDYxMjAxMDAwMDAwWhcNMjkxMjMx
MjM1OTU5WjBiMQswCQYDVQQGEwJVUzEhMB8GA1UEChMYTmV0d29yayBTb2x1dGlvbnMgTC5MLkMu
MTAwLgYDVQQDEydOZXR3b3JrIFNvbHV0aW9ucyBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkwggEiMA0G
CSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDkvH6SMG3G2I4rC7xGzuAnlt7e+foS0zwzc7MEL7xx
jOWftiJgPl9dzgn/ggwbmlFQGiaJ3dVhXRncEg8tCqJDXRfQNJIg6nPPOCwGJgl6cvf6UDL4wpPT
aaIjzkGxzOTVHzbRijr4jGPiFFlp7Q3Tf2vouAPlT2rlmGNpSAW+Lv8ztumXWWn4Zxmuk2GWRBXT
crA/vGp97Eh/jcOrqnErU2lBUzS1sLnFBgrEsEX1QV1uiUV7PTsmjHTC5dLRfbIR1PtYMiKagMnc
/Qzpf14Dl847ABSHJ3A4qY5usyd2mFHgBeMhqxrVhSI8KbWaFsWAqPS7azCPL0YCorEMIuDTAgMB
AAGjgZcwgZQwHQYDVR0OBBYEFCEwyfsA106Y2oeqKtCnLrFAMadMMA4GA1UdDwEB/wQEAwIBBjAP
BgNVHRMBAf8EBTADAQH/MFIGA1UdHwRLMEkwR6BFoEOGQWh0dHA6Ly9jcmwubmV0c29sc3NsLmNv
bS9OZXR3b3JrU29sdXRpb25zQ2VydGlmaWNhdGVBdXRob3JpdHkuY3JsMA0GCSqGSIb3DQEBBQUA
A4IBAQC7rkvnt1frf6ott3NHhWrB5KUd5Oc86fRZZXe1eltajSU24HqXLjjAV2CDmAaDn7l2em5Q
4LqILPxFzBiwmZVRDuwduIj/h1AcgsLj4DKAv6ALR8jDMe+ZZzKATxcheQxpXN5eNK4CtSbqUN9/
GGUsyfJj4akH/nxxH2szJGoeBfcFaMBqEssuXmHLrijTfsK0ZpEmXzwuJF/LWA/rKOyvEZbz3Htv
wKeI8lN3s2Berq4o2jUsbzRF0ybh3uxbTydrFny9RAQYgrOJeRcQcT16ohZO9QHNpGxlaKFJdlxD
ydi8NmdspZS11My5vWo1ViHe2MPr+8ukYEywVaCge1ey
-----END CERTIFICATE-----
COMODO ECC Certification Authority
==================================
-----BEGIN CERTIFICATE-----
@ -626,26 +603,6 @@ NwUASZQDhETnv0Mxz3WLJdH0pmT1kvarBes96aULNmLazAZfNou2XjG4Kvte9nHfRCaexOYNkbQu
dZWAUWpLMKawYqGT8ZvYzsRjdT9ZR7E=
-----END CERTIFICATE-----
Hongkong Post Root CA 1
=======================
-----BEGIN CERTIFICATE-----
MIIDMDCCAhigAwIBAgICA+gwDQYJKoZIhvcNAQEFBQAwRzELMAkGA1UEBhMCSEsxFjAUBgNVBAoT
DUhvbmdrb25nIFBvc3QxIDAeBgNVBAMTF0hvbmdrb25nIFBvc3QgUm9vdCBDQSAxMB4XDTAzMDUx
NTA1MTMxNFoXDTIzMDUxNTA0NTIyOVowRzELMAkGA1UEBhMCSEsxFjAUBgNVBAoTDUhvbmdrb25n
IFBvc3QxIDAeBgNVBAMTF0hvbmdrb25nIFBvc3QgUm9vdCBDQSAxMIIBIjANBgkqhkiG9w0BAQEF
AAOCAQ8AMIIBCgKCAQEArP84tulmAknjorThkPlAj3n54r15/gK97iSSHSL22oVyaf7XPwnU3ZG1
ApzQjVrhVcNQhrkpJsLj2aDxaQMoIIBFIi1WpztUlVYiWR8o3x8gPW2iNr4joLFutbEnPzlTCeqr
auh0ssJlXI6/fMN4hM2eFvz1Lk8gKgifd/PFHsSaUmYeSF7jEAaPIpjhZY4bXSNmO7ilMlHIhqqh
qZ5/dpTCpmy3QfDVyAY45tQM4vM7TG1QjMSDJ8EThFk9nnV0ttgCXjqQesBCNnLsak3c78QA3xMY
V18meMjWCnl3v/evt3a5pQuEF10Q6m/hq5URX208o1xNg1vysxmKgIsLhwIDAQABoyYwJDASBgNV
HRMBAf8ECDAGAQH/AgEDMA4GA1UdDwEB/wQEAwIBxjANBgkqhkiG9w0BAQUFAAOCAQEADkbVPK7i
h9legYsCmEEIjEy82tvuJxuC52pF7BaLT4Wg87JwvVqWuspube5Gi27nKi6Wsxkz67SfqLI37pio
l7Yutmcn1KZJ/RyTZXaeQi/cImyaT/JaFTmxcdcrUehtHJjA2Sr0oYJ71clBoiMBdDhViw+5Lmei
IAQ32pwL0xch4I+XeTRvhEgCIDMb5jREn5Fw9IBehEPCKdJsEhTkYY2sEJCehFC78JZvRZ+K88ps
T/oROhUVRsPNH4NbLUES7VBnQRM9IauUiqpOfMGx+6fWtScvl6tu4B3i0RwsH0Ti/L6RoZz71ilT
c4afU9hDDl3WY4JxHYB0yvbiAmvZWg==
-----END CERTIFICATE-----
SecureSign RootCA11
===================
-----BEGIN CERTIFICATE-----
@ -1284,40 +1241,6 @@ Y2XQ8xwOFvVrhlhNGNTkDY6lnVuR3HYkUD/GKvvZt5y11ubQ2egZixVxSK236thZiNSQvxaz2ems
WWFUyBy6ysHK4bkgTI86k4mloMy/0/Z1pHWWbVY=
-----END CERTIFICATE-----
E-Tugra Certification Authority
===============================
-----BEGIN CERTIFICATE-----
MIIGSzCCBDOgAwIBAgIIamg+nFGby1MwDQYJKoZIhvcNAQELBQAwgbIxCzAJBgNVBAYTAlRSMQ8w
DQYDVQQHDAZBbmthcmExQDA+BgNVBAoMN0UtVHXEn3JhIEVCRyBCaWxpxZ9pbSBUZWtub2xvamls
ZXJpIHZlIEhpem1ldGxlcmkgQS7Fni4xJjAkBgNVBAsMHUUtVHVncmEgU2VydGlmaWthc3lvbiBN
ZXJrZXppMSgwJgYDVQQDDB9FLVR1Z3JhIENlcnRpZmljYXRpb24gQXV0aG9yaXR5MB4XDTEzMDMw
NTEyMDk0OFoXDTIzMDMwMzEyMDk0OFowgbIxCzAJBgNVBAYTAlRSMQ8wDQYDVQQHDAZBbmthcmEx
QDA+BgNVBAoMN0UtVHXEn3JhIEVCRyBCaWxpxZ9pbSBUZWtub2xvamlsZXJpIHZlIEhpem1ldGxl
cmkgQS7Fni4xJjAkBgNVBAsMHUUtVHVncmEgU2VydGlmaWthc3lvbiBNZXJrZXppMSgwJgYDVQQD
DB9FLVR1Z3JhIENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIICIjANBgkqhkiG9w0BAQEFAAOCAg8A
MIICCgKCAgEA4vU/kwVRHoViVF56C/UYB4Oufq9899SKa6VjQzm5S/fDxmSJPZQuVIBSOTkHS0vd
hQd2h8y/L5VMzH2nPbxHD5hw+IyFHnSOkm0bQNGZDbt1bsipa5rAhDGvykPL6ys06I+XawGb1Q5K
CKpbknSFQ9OArqGIW66z6l7LFpp3RMih9lRozt6Plyu6W0ACDGQXwLWTzeHxE2bODHnv0ZEoq1+g
ElIwcxmOj+GMB6LDu0rw6h8VqO4lzKRG+Bsi77MOQ7osJLjFLFzUHPhdZL3Dk14opz8n8Y4e0ypQ
BaNV2cvnOVPAmJ6MVGKLJrD3fY185MaeZkJVgkfnsliNZvcHfC425lAcP9tDJMW/hkd5s3kc91r0
E+xs+D/iWR+V7kI+ua2oMoVJl0b+SzGPWsutdEcf6ZG33ygEIqDUD13ieU/qbIWGvaimzuT6w+Gz
rt48Ue7LE3wBf4QOXVGUnhMMti6lTPk5cDZvlsouDERVxcr6XQKj39ZkjFqzAQqptQpHF//vkUAq
jqFGOjGY5RH8zLtJVor8udBhmm9lbObDyz51Sf6Pp+KJxWfXnUYTTjF2OySznhFlhqt/7x3U+Lzn
rFpct1pHXFXOVbQicVtbC/DP3KBhZOqp12gKY6fgDT+gr9Oq0n7vUaDmUStVkhUXU8u3Zg5mTPj5
dUyQ5xJwx0UCAwEAAaNjMGEwHQYDVR0OBBYEFC7j27JJ0JxUeVz6Jyr+zE7S6E5UMA8GA1UdEwEB
/wQFMAMBAf8wHwYDVR0jBBgwFoAULuPbsknQnFR5XPonKv7MTtLoTlQwDgYDVR0PAQH/BAQDAgEG
MA0GCSqGSIb3DQEBCwUAA4ICAQAFNzr0TbdF4kV1JI+2d1LoHNgQk2Xz8lkGpD4eKexd0dCrfOAK
kEh47U6YA5n+KGCRHTAduGN8qOY1tfrTYXbm1gdLymmasoR6d5NFFxWfJNCYExL/u6Au/U5Mh/jO
XKqYGwXgAEZKgoClM4so3O0409/lPun++1ndYYRP0lSWE2ETPo+Aab6TR7U1Q9Jauz1c77NCR807
VRMGsAnb/WP2OogKmW9+4c4bU2pEZiNRCHu8W1Ki/QY3OEBhj0qWuJA3+GbHeJAAFS6LrVE1Uweo
a2iu+U48BybNCAVwzDk/dr2l02cmAYamU9JgO3xDf1WKvJUawSg5TB9D0pH0clmKuVb8P7Sd2nCc
dlqMQ1DujjByTd//SffGqWfZbawCEeI6FiWnWAjLb1NBnEg4R2gz0dfHj9R0IdTDBZB6/86WiLEV
KV0jq9BgoRJP3vQXzTLlyb/IQ639Lo7xr+L0mPoSHyDYwKcMhcWQ9DstliaxLL5Mq+ux0orJ23gT
Dx4JnW2PAJ8C2sH6H3p6CcRK5ogql5+Ji/03X186zjhZhkuvcQu02PJwT58yE+Owp1fl2tpDy4Q0
8ijE6m30Ku/Ba3ba+367hTzSU8JNvnHhRdH9I2cNE3X7z2VnIp2usAnRCf8dNL/+I5c30jn6PQ0G
C7TbO6Orb1wdtn7os4I07QZcJA==
-----END CERTIFICATE-----
T-TeleSec GlobalRoot Class 2
============================
-----BEGIN CERTIFICATE-----
@ -1654,36 +1577,6 @@ uglB4Zf4+/2a4n0Sye18ZNPLBSWLVtmg515dTguDnFt2KaAJJiFqYgIwcdK1j1zqO+F4CYWodZI7
yFz9SO8NdCKoCOJuxUnOxwy8p2Fp8fc74SrL+SvzZpA3
-----END CERTIFICATE-----
Staat der Nederlanden EV Root CA
================================
-----BEGIN CERTIFICATE-----
MIIFcDCCA1igAwIBAgIEAJiWjTANBgkqhkiG9w0BAQsFADBYMQswCQYDVQQGEwJOTDEeMBwGA1UE
CgwVU3RhYXQgZGVyIE5lZGVybGFuZGVuMSkwJwYDVQQDDCBTdGFhdCBkZXIgTmVkZXJsYW5kZW4g
RVYgUm9vdCBDQTAeFw0xMDEyMDgxMTE5MjlaFw0yMjEyMDgxMTEwMjhaMFgxCzAJBgNVBAYTAk5M
MR4wHAYDVQQKDBVTdGFhdCBkZXIgTmVkZXJsYW5kZW4xKTAnBgNVBAMMIFN0YWF0IGRlciBOZWRl
cmxhbmRlbiBFViBSb290IENBMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA48d+ifkk
SzrSM4M1LGns3Amk41GoJSt5uAg94JG6hIXGhaTK5skuU6TJJB79VWZxXSzFYGgEt9nCUiY4iKTW
O0Cmws0/zZiTs1QUWJZV1VD+hq2kY39ch/aO5ieSZxeSAgMs3NZmdO3dZ//BYY1jTw+bbRcwJu+r
0h8QoPnFfxZpgQNH7R5ojXKhTbImxrpsX23Wr9GxE46prfNeaXUmGD5BKyF/7otdBwadQ8QpCiv8
Kj6GyzyDOvnJDdrFmeK8eEEzduG/L13lpJhQDBXd4Pqcfzho0LKmeqfRMb1+ilgnQ7O6M5HTp5gV
XJrm0w912fxBmJc+qiXbj5IusHsMX/FjqTf5m3VpTCgmJdrV8hJwRVXj33NeN/UhbJCONVrJ0yPr
08C+eKxCKFhmpUZtcALXEPlLVPxdhkqHz3/KRawRWrUgUY0viEeXOcDPusBCAUCZSCELa6fS/ZbV
0b5GnUngC6agIk440ME8MLxwjyx1zNDFjFE7PZQIZCZhfbnDZY8UnCHQqv0XcgOPvZuM5l5Tnrmd
74K74bzickFbIZTTRTeU0d8JOV3nI6qaHcptqAqGhYqCvkIH1vI4gnPah1vlPNOePqc7nvQDs/nx
fRN0Av+7oeX6AHkcpmZBiFxgV6YuCcS6/ZrPpx9Aw7vMWgpVSzs4dlG4Y4uElBbmVvMCAwEAAaNC
MEAwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFP6rAJCYniT8qcwa
ivsnuL8wbqg7MA0GCSqGSIb3DQEBCwUAA4ICAQDPdyxuVr5Os7aEAJSrR8kN0nbHhp8dB9O2tLsI
eK9p0gtJ3jPFrK3CiAJ9Brc1AsFgyb/E6JTe1NOpEyVa/m6irn0F3H3zbPB+po3u2dfOWBfoqSmu
c0iH55vKbimhZF8ZE/euBhD/UcabTVUlT5OZEAFTdfETzsemQUHSv4ilf0X8rLiltTMMgsT7B/Zq
5SWEXwbKwYY5EdtYzXc7LMJMD16a4/CrPmEbUCTCwPTxGfARKbalGAKb12NMcIxHowNDXLldRqAN
b/9Zjr7dn3LDWyvfjFvO5QxGbJKyCqNMVEIYFRIYvdr8unRu/8G2oGTYqV9Vrp9canaW2HNnh/tN
f1zuacpzEPuKqf2evTY4SUmH9A4U8OmHuD+nT3pajnnUk+S7aFKErGzp85hwVXIy+TSrK0m1zSBi
5Dp6Z2Orltxtrpfs/J92VoguZs9btsmksNcFuuEnL5O7Jiqik7Ab846+HUCjuTaPPoIaGl6I6lD4
WeKDRikL40Rc4ZW2aZCaFG+XroHPaO+Zmr615+F/+PoTRxZMzG0IQOeLeG9QgkRQP2YGiqtDhFZK
DyAthg710tvSeopLzaXoTvFeJiUBWSOgftL2fiFX1ye8FVdMpEbB4IMeDExNH08GGeL5qPQ6gqGy
eUN51q1veieQA6TqJIc/2b3Z6fJfUEkc7uzXLg==
-----END CERTIFICATE-----
IdenTrust Commercial Root CA 1
==============================
-----BEGIN CERTIFICATE-----
@ -2135,87 +2028,6 @@ F8Io2c9Si1vIY9RCPqAzekYu9wogRlR+ak8x8YF+QnQ4ZXMn7sZ8uI7XpTrXmKGcjBBV09tL7ECQ
aaApJUqlyyvdimYHFngVV3Eb7PVHhPOeMTd61X8kreS8/f3MboPoDKi3QWwH3b08hpcv0g==
-----END CERTIFICATE-----
TrustCor RootCert CA-1
======================
-----BEGIN CERTIFICATE-----
MIIEMDCCAxigAwIBAgIJANqb7HHzA7AZMA0GCSqGSIb3DQEBCwUAMIGkMQswCQYDVQQGEwJQQTEP
MA0GA1UECAwGUGFuYW1hMRQwEgYDVQQHDAtQYW5hbWEgQ2l0eTEkMCIGA1UECgwbVHJ1c3RDb3Ig
U3lzdGVtcyBTLiBkZSBSLkwuMScwJQYDVQQLDB5UcnVzdENvciBDZXJ0aWZpY2F0ZSBBdXRob3Jp
dHkxHzAdBgNVBAMMFlRydXN0Q29yIFJvb3RDZXJ0IENBLTEwHhcNMTYwMjA0MTIzMjE2WhcNMjkx
MjMxMTcyMzE2WjCBpDELMAkGA1UEBhMCUEExDzANBgNVBAgMBlBhbmFtYTEUMBIGA1UEBwwLUGFu
YW1hIENpdHkxJDAiBgNVBAoMG1RydXN0Q29yIFN5c3RlbXMgUy4gZGUgUi5MLjEnMCUGA1UECwwe
VHJ1c3RDb3IgQ2VydGlmaWNhdGUgQXV0aG9yaXR5MR8wHQYDVQQDDBZUcnVzdENvciBSb290Q2Vy
dCBDQS0xMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAv463leLCJhJrMxnHQFgKq1mq
jQCj/IDHUHuO1CAmujIS2CNUSSUQIpidRtLByZ5OGy4sDjjzGiVoHKZaBeYei0i/mJZ0PmnK6bV4
pQa81QBeCQryJ3pS/C3Vseq0iWEk8xoT26nPUu0MJLq5nux+AHT6k61sKZKuUbS701e/s/OojZz0
JEsq1pme9J7+wH5COucLlVPat2gOkEz7cD+PSiyU8ybdY2mplNgQTsVHCJCZGxdNuWxu72CVEY4h
gLW9oHPY0LJ3xEXqWib7ZnZ2+AYfYW0PVcWDtxBWcgYHpfOxGgMFZA6dWorWhnAbJN7+KIor0Gqw
/Hqi3LJ5DotlDwIDAQABo2MwYTAdBgNVHQ4EFgQU7mtJPHo/DeOxCbeKyKsZn3MzUOcwHwYDVR0j
BBgwFoAU7mtJPHo/DeOxCbeKyKsZn3MzUOcwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMC
AYYwDQYJKoZIhvcNAQELBQADggEBACUY1JGPE+6PHh0RU9otRCkZoB5rMZ5NDp6tPVxBb5UrJKF5
mDo4Nvu7Zp5I/5CQ7z3UuJu0h3U/IJvOcs+hVcFNZKIZBqEHMwwLKeXx6quj7LUKdJDHfXLy11yf
ke+Ri7fc7Waiz45mO7yfOgLgJ90WmMCV1Aqk5IGadZQ1nJBfiDcGrVmVCrDRZ9MZyonnMlo2HD6C
qFqTvsbQZJG2z9m2GM/bftJlo6bEjhcxwft+dtvTheNYsnd6djtsL1Ac59v2Z3kf9YKVmgenFK+P
3CghZwnS1k1aHBkcjndcw5QkPTJrS37UeJSDvjdNzl/HHk484IkzlQsPpTLWPFp5LBk=
-----END CERTIFICATE-----
TrustCor RootCert CA-2
======================
-----BEGIN CERTIFICATE-----
MIIGLzCCBBegAwIBAgIIJaHfyjPLWQIwDQYJKoZIhvcNAQELBQAwgaQxCzAJBgNVBAYTAlBBMQ8w
DQYDVQQIDAZQYW5hbWExFDASBgNVBAcMC1BhbmFtYSBDaXR5MSQwIgYDVQQKDBtUcnVzdENvciBT
eXN0ZW1zIFMuIGRlIFIuTC4xJzAlBgNVBAsMHlRydXN0Q29yIENlcnRpZmljYXRlIEF1dGhvcml0
eTEfMB0GA1UEAwwWVHJ1c3RDb3IgUm9vdENlcnQgQ0EtMjAeFw0xNjAyMDQxMjMyMjNaFw0zNDEy
MzExNzI2MzlaMIGkMQswCQYDVQQGEwJQQTEPMA0GA1UECAwGUGFuYW1hMRQwEgYDVQQHDAtQYW5h
bWEgQ2l0eTEkMCIGA1UECgwbVHJ1c3RDb3IgU3lzdGVtcyBTLiBkZSBSLkwuMScwJQYDVQQLDB5U
cnVzdENvciBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkxHzAdBgNVBAMMFlRydXN0Q29yIFJvb3RDZXJ0
IENBLTIwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCnIG7CKqJiJJWQdsg4foDSq8Gb
ZQWU9MEKENUCrO2fk8eHyLAnK0IMPQo+QVqedd2NyuCb7GgypGmSaIwLgQ5WoD4a3SwlFIIvl9Nk
RvRUqdw6VC0xK5mC8tkq1+9xALgxpL56JAfDQiDyitSSBBtlVkxs1Pu2YVpHI7TYabS3OtB0PAx1
oYxOdqHp2yqlO/rOsP9+aij9JxzIsekp8VduZLTQwRVtDr4uDkbIXvRR/u8OYzo7cbrPb1nKDOOb
XUm4TOJXsZiKQlecdu/vvdFoqNL0Cbt3Nb4lggjEFixEIFapRBF37120Hapeaz6LMvYHL1cEksr1
/p3C6eizjkxLAjHZ5DxIgif3GIJ2SDpxsROhOdUuxTTCHWKF3wP+TfSvPd9cW436cOGlfifHhi5q
jxLGhF5DUVCcGZt45vz27Ud+ez1m7xMTiF88oWP7+ayHNZ/zgp6kPwqcMWmLmaSISo5uZk3vFsQP
eSghYA2FFn3XVDjxklb9tTNMg9zXEJ9L/cb4Qr26fHMC4P99zVvh1Kxhe1fVSntb1IVYJ12/+Ctg
rKAmrhQhJ8Z3mjOAPF5GP/fDsaOGM8boXg25NSyqRsGFAnWAoOsk+xWq5Gd/bnc/9ASKL3x74xdh
8N0JqSDIvgmk0H5Ew7IwSjiqqewYmgeCK9u4nBit2uBGF6zPXQIDAQABo2MwYTAdBgNVHQ4EFgQU
2f4hQG6UnrybPZx9mCAZ5YwwYrIwHwYDVR0jBBgwFoAU2f4hQG6UnrybPZx9mCAZ5YwwYrIwDwYD
VR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAYYwDQYJKoZIhvcNAQELBQADggIBAJ5Fngw7tu/h
Osh80QA9z+LqBrWyOrsGS2h60COXdKcs8AjYeVrXWoSK2BKaG9l9XE1wxaX5q+WjiYndAfrs3fnp
kpfbsEZC89NiqpX+MWcUaViQCqoL7jcjx1BRtPV+nuN79+TMQjItSQzL/0kMmx40/W5ulop5A7Zv
2wnL/V9lFDfhOPXzYRZY5LVtDQsEGz9QLX+zx3oaFoBg+Iof6Rsqxvm6ARppv9JYx1RXCI/hOWB3
S6xZhBqI8d3LT3jX5+EzLfzuQfogsL7L9ziUwOHQhQ+77Sxzq+3+knYaZH9bDTMJBzN7Bj8RpFxw
PIXAz+OQqIN3+tvmxYxoZxBnpVIt8MSZj3+/0WvitUfW2dCFmU2Umw9Lje4AWkcdEQOsQRivh7dv
DDqPys/cA8GiCcjl/YBeyGBCARsaU1q7N6a3vLqE6R5sGtRk2tRD/pOLS/IseRYQ1JMLiI+h2IYU
RpFHmygk71dSTlxCnKr3Sewn6EAes6aJInKc9Q0ztFijMDvd1GpUk74aTfOTlPf8hAs/hCBcNANE
xdqtvArBAs8e5ZTZ845b2EzwnexhF7sUMlQMAimTHpKG9n/v55IFDlndmQguLvqcAFLTxWYp5KeX
RKQOKIETNcX2b2TmQcTVL8w0RSXPQQCWPUouwpaYT05KnJe32x+SMsj/D1Fu1uwJ
-----END CERTIFICATE-----
TrustCor ECA-1
==============
-----BEGIN CERTIFICATE-----
MIIEIDCCAwigAwIBAgIJAISCLF8cYtBAMA0GCSqGSIb3DQEBCwUAMIGcMQswCQYDVQQGEwJQQTEP
MA0GA1UECAwGUGFuYW1hMRQwEgYDVQQHDAtQYW5hbWEgQ2l0eTEkMCIGA1UECgwbVHJ1c3RDb3Ig
U3lzdGVtcyBTLiBkZSBSLkwuMScwJQYDVQQLDB5UcnVzdENvciBDZXJ0aWZpY2F0ZSBBdXRob3Jp
dHkxFzAVBgNVBAMMDlRydXN0Q29yIEVDQS0xMB4XDTE2MDIwNDEyMzIzM1oXDTI5MTIzMTE3Mjgw
N1owgZwxCzAJBgNVBAYTAlBBMQ8wDQYDVQQIDAZQYW5hbWExFDASBgNVBAcMC1BhbmFtYSBDaXR5
MSQwIgYDVQQKDBtUcnVzdENvciBTeXN0ZW1zIFMuIGRlIFIuTC4xJzAlBgNVBAsMHlRydXN0Q29y
IENlcnRpZmljYXRlIEF1dGhvcml0eTEXMBUGA1UEAwwOVHJ1c3RDb3IgRUNBLTEwggEiMA0GCSqG
SIb3DQEBAQUAA4IBDwAwggEKAoIBAQDPj+ARtZ+odnbb3w9U73NjKYKtR8aja+3+XzP4Q1HpGjOR
MRegdMTUpwHmspI+ap3tDvl0mEDTPwOABoJA6LHip1GnHYMma6ve+heRK9jGrB6xnhkB1Zem6g23
xFUfJ3zSCNV2HykVh0A53ThFEXXQmqc04L/NyFIduUd+Dbi7xgz2c1cWWn5DkR9VOsZtRASqnKmc
p0yJF4OuowReUoCLHhIlERnXDH19MURB6tuvsBzvgdAsxZohmz3tQjtQJvLsznFhBmIhVE5/wZ0+
fyCMgMsq2JdiyIMzkX2woloPV+g7zPIlstR8L+xNxqE6FXrntl019fZISjZFZtS6mFjBAgMBAAGj
YzBhMB0GA1UdDgQWBBREnkj1zG1I1KBLf/5ZJC+Dl5mahjAfBgNVHSMEGDAWgBREnkj1zG1I1KBL
f/5ZJC+Dl5mahjAPBgNVHRMBAf8EBTADAQH/MA4GA1UdDwEB/wQEAwIBhjANBgkqhkiG9w0BAQsF
AAOCAQEABT41XBVwm8nHc2FvcivUwo/yQ10CzsSUuZQRg2dd4mdsdXa/uwyqNsatR5Nj3B5+1t4u
/ukZMjgDfxT2AHMsWbEhBuH7rBiVDKP/mZb3Kyeb1STMHd3BOuCYRLDE5D53sXOpZCz2HAF8P11F
hcCF5yWPldwX8zyfGm6wyuMdKulMY/okYWLW2n62HGz1Ah3UKt1VkOsqEUc8Ll50soIipX1TH0Xs
J5F95yIW6MBoNtjG8U+ARDL54dHRHareqKucBK+tIA5kmE2la8BIWJZpTdwHjFGTot+fDz2LYLSC
jaoITmJF4PkL0uDgPFveXHEnJcLmA4GLEFPjx1WitJ/X5g==
-----END CERTIFICATE-----
SSL.com Root Certification Authority RSA
========================================
-----BEGIN CERTIFICATE-----
@ -3504,3 +3316,48 @@ BggqhkjOPQQDAwNoADBlAjAVXUI9/Lbu9zuxNuie9sRGKEkz0FhDKmMpzE2xtHqiuQ04pV1IKv3L
snNdo4gIxwwCMQDAqy0Obe0YottT6SXbVQjgUMzfRGEWgqtJsLKB7HOHeLRMsmIbEvoWTSVLY70e
N9k=
-----END CERTIFICATE-----
BJCA Global Root CA1
====================
-----BEGIN CERTIFICATE-----
MIIFdDCCA1ygAwIBAgIQVW9l47TZkGobCdFsPsBsIDANBgkqhkiG9w0BAQsFADBUMQswCQYDVQQG
EwJDTjEmMCQGA1UECgwdQkVJSklORyBDRVJUSUZJQ0FURSBBVVRIT1JJVFkxHTAbBgNVBAMMFEJK
Q0EgR2xvYmFsIFJvb3QgQ0ExMB4XDTE5MTIxOTAzMTYxN1oXDTQ0MTIxMjAzMTYxN1owVDELMAkG
A1UEBhMCQ04xJjAkBgNVBAoMHUJFSUpJTkcgQ0VSVElGSUNBVEUgQVVUSE9SSVRZMR0wGwYDVQQD
DBRCSkNBIEdsb2JhbCBSb290IENBMTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAPFm
CL3ZxRVhy4QEQaVpN3cdwbB7+sN3SJATcmTRuHyQNZ0YeYjjlwE8R4HyDqKYDZ4/N+AZspDyRhyS
sTphzvq3Rp4Dhtczbu33RYx2N95ulpH3134rhxfVizXuhJFyV9xgw8O558dnJCNPYwpj9mZ9S1Wn
P3hkSWkSl+BMDdMJoDIwOvqfwPKcxRIqLhy1BDPapDgRat7GGPZHOiJBhyL8xIkoVNiMpTAK+BcW
yqw3/XmnkRd4OJmtWO2y3syJfQOcs4ll5+M7sSKGjwZteAf9kRJ/sGsciQ35uMt0WwfCyPQ10WRj
eulumijWML3mG90Vr4TqnMfK9Q7q8l0ph49pczm+LiRvRSGsxdRpJQaDrXpIhRMsDQa4bHlW/KNn
MoH1V6XKV0Jp6VwkYe/iMBhORJhVb3rCk9gZtt58R4oRTklH2yiUAguUSiz5EtBP6DF+bHq/pj+b
OT0CFqMYs2esWz8sgytnOYFcuX6U1WTdno9uruh8W7TXakdI136z1C2OVnZOz2nxbkRs1CTqjSSh
GL+9V/6pmTW12xB3uD1IutbB5/EjPtffhZ0nPNRAvQoMvfXnjSXWgXSHRtQpdaJCbPdzied9v3pK
H9MiyRVVz99vfFXQpIsHETdfg6YmV6YBW37+WGgHqel62bno/1Afq8K0wM7o6v0PvY1NuLxxAgMB
AAGjQjBAMB0GA1UdDgQWBBTF7+3M2I0hxkjk49cULqcWk+WYATAPBgNVHRMBAf8EBTADAQH/MA4G
A1UdDwEB/wQEAwIBBjANBgkqhkiG9w0BAQsFAAOCAgEAUoKsITQfI/Ki2Pm4rzc2IInRNwPWaZ+4
YRC6ojGYWUfo0Q0lHhVBDOAqVdVXUsv45Mdpox1NcQJeXyFFYEhcCY5JEMEE3KliawLwQ8hOnThJ
dMkycFRtwUf8jrQ2ntScvd0g1lPJGKm1Vrl2i5VnZu69mP6u775u+2D2/VnGKhs/I0qUJDAnyIm8
60Qkmss9vk/Ves6OF8tiwdneHg56/0OGNFK8YT88X7vZdrRTvJez/opMEi4r89fO4aL/3Xtw+zuh
TaRjAv04l5U/BXCga99igUOLtFkNSoxUnMW7gZ/NfaXvCyUeOiDbHPwfmGcCCtRzRBPbUYQaVQNW
4AB+dAb/OMRyHdOoP2gxXdMJxy6MW2Pg6Nwe0uxhHvLe5e/2mXZgLR6UcnHGCyoyx5JO1UbXHfmp
GQrI+pXObSOYqgs4rZpWDW+N8TEAiMEXnM0ZNjX+VVOg4DwzX5Ze4jLp3zO7Bkqp2IRzznfSxqxx
4VyjHQy7Ct9f4qNx2No3WqB4K/TUfet27fJhcKVlmtOJNBir+3I+17Q9eVzYH6Eze9mCUAyTF6ps
3MKCuwJXNq+YJyo5UOGwifUll35HaBC07HPKs5fRJNz2YqAo07WjuGS3iGJCz51TzZm+ZGiPTx4S
SPfSKcOYKMryMguTjClPPGAyzQWWYezyr/6zcCwupvI=
-----END CERTIFICATE-----
BJCA Global Root CA2
====================
-----BEGIN CERTIFICATE-----
MIICJTCCAaugAwIBAgIQLBcIfWQqwP6FGFkGz7RK6zAKBggqhkjOPQQDAzBUMQswCQYDVQQGEwJD
TjEmMCQGA1UECgwdQkVJSklORyBDRVJUSUZJQ0FURSBBVVRIT1JJVFkxHTAbBgNVBAMMFEJKQ0Eg
R2xvYmFsIFJvb3QgQ0EyMB4XDTE5MTIxOTAzMTgyMVoXDTQ0MTIxMjAzMTgyMVowVDELMAkGA1UE
BhMCQ04xJjAkBgNVBAoMHUJFSUpJTkcgQ0VSVElGSUNBVEUgQVVUSE9SSVRZMR0wGwYDVQQDDBRC
SkNBIEdsb2JhbCBSb290IENBMjB2MBAGByqGSM49AgEGBSuBBAAiA2IABJ3LgJGNU2e1uVCxA/jl
SR9BIgmwUVJY1is0j8USRhTFiy8shP8sbqjV8QnjAyEUxEM9fMEsxEtqSs3ph+B99iK++kpRuDCK
/eHeGBIK9ke35xe/J4rUQUyWPGCWwf0VHKNCMEAwHQYDVR0OBBYEFNJKsVF/BvDRgh9Obl+rg/xI
1LCRMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgEGMAoGCCqGSM49BAMDA2gAMGUCMBq8
W9f+qdJUDkpd0m2xQNz0Q9XSSpkZElaA94M04TVOSG0ED1cxMDAtsaqdAzjbBgIxAMvMh1PLet8g
UXOQwKhbYdDFUDn9hf7B43j4ptZLvZuHjw/l1lOWqzzIQNph91Oj9w==
-----END CERTIFICATE-----

View File

@ -11,11 +11,13 @@ Contributors:
- Djordje Pesut (djordje dot pesut at imgtec dot com)
- Frank Barchard (fbarchard at google dot com)
- Hui Su (huisu at google dot com)
- H. Vetinari (h dot vetinari at gmx dot com)
- Ilya Kurdyukov (jpegqs at gmail dot com)
- Ingvar Stepanyan (rreverser at google dot com)
- James Zern (jzern at google dot com)
- Jan Engelhardt (jengelh at medozas dot de)
- Jehan (jehan at girinstud dot io)
- Jeremy Maitin-Shepard (jbms at google dot com)
- Johann Koenig (johann dot koenig at duck dot com)
- Jovan Zelincevic (jovan dot zelincevic at imgtec dot com)
- Jyrki Alakuijala (jyrki at google dot com)
@ -30,6 +32,7 @@ Contributors:
- Mislav Bradac (mislavm at google dot com)
- Nico Weber (thakis at chromium dot org)
- Noel Chromium (noel at chromium dot org)
- Nozomi Isozaki (nontan at pixiv dot co dot jp)
- Oliver Wolff (oliver dot wolff at qt dot io)
- Owen Rodley (orodley at google dot com)
- Parag Salasakar (img dot mips1 at gmail dot com)
@ -45,6 +48,7 @@ Contributors:
- Somnath Banerjee (somnath dot banerjee at gmail dot com)
- Sriraman Tallam (tmsriram at google dot com)
- Tamar Levy (tamar dot levy at intel dot com)
- Thiago Perrotta (tperrotta at google dot com)
- Timothy Gu (timothygu99 at gmail dot com)
- Urvang Joshi (urvang at google dot com)
- Vikas Arora (vikasa at google dot com)

View File

@ -15,15 +15,21 @@
#include <assert.h>
#include <limits.h>
#include <math.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include "src/webp/types.h"
#include "src/dsp/cpu.h"
#include "sharpyuv/sharpyuv_cpu.h"
#include "sharpyuv/sharpyuv_dsp.h"
#include "sharpyuv/sharpyuv_gamma.h"
//------------------------------------------------------------------------------
int SharpYuvGetVersion(void) {
return SHARPYUV_VERSION;
}
//------------------------------------------------------------------------------
// Sharp RGB->YUV conversion
@ -414,24 +420,46 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr,
}
#undef SAFE_ALLOC
#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
#include <pthread.h> // NOLINT
#define LOCK_ACCESS \
static pthread_mutex_t sharpyuv_lock = PTHREAD_MUTEX_INITIALIZER; \
if (pthread_mutex_lock(&sharpyuv_lock)) return
#define UNLOCK_ACCESS_AND_RETURN \
do { \
(void)pthread_mutex_unlock(&sharpyuv_lock); \
return; \
} while (0)
#else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
#define LOCK_ACCESS do {} while (0)
#define UNLOCK_ACCESS_AND_RETURN return
#endif // defined(WEBP_USE_THREAD) && !defined(_WIN32)
// Hidden exported init function.
// By default SharpYuvConvert calls it with NULL. If needed, users can declare
// it as extern and call it with a VP8CPUInfo function.
extern void SharpYuvInit(VP8CPUInfo cpu_info_func);
// By default SharpYuvConvert calls it with SharpYuvGetCPUInfo. If needed,
// users can declare it as extern and call it with an alternate VP8CPUInfo
// function.
extern VP8CPUInfo SharpYuvGetCPUInfo;
SHARPYUV_EXTERN void SharpYuvInit(VP8CPUInfo cpu_info_func);
void SharpYuvInit(VP8CPUInfo cpu_info_func) {
static volatile VP8CPUInfo sharpyuv_last_cpuinfo_used =
(VP8CPUInfo)&sharpyuv_last_cpuinfo_used;
const int initialized =
(sharpyuv_last_cpuinfo_used != (VP8CPUInfo)&sharpyuv_last_cpuinfo_used);
if (cpu_info_func == NULL && initialized) return;
if (sharpyuv_last_cpuinfo_used == cpu_info_func) return;
SharpYuvInitDsp(cpu_info_func);
if (!initialized) {
SharpYuvInitGammaTables();
LOCK_ACCESS;
// Only update SharpYuvGetCPUInfo when called from external code to avoid a
// race on reading the value in SharpYuvConvert().
if (cpu_info_func != (VP8CPUInfo)&SharpYuvGetCPUInfo) {
SharpYuvGetCPUInfo = cpu_info_func;
}
if (sharpyuv_last_cpuinfo_used == SharpYuvGetCPUInfo) {
UNLOCK_ACCESS_AND_RETURN;
}
sharpyuv_last_cpuinfo_used = cpu_info_func;
SharpYuvInitDsp();
SharpYuvInitGammaTables();
sharpyuv_last_cpuinfo_used = SharpYuvGetCPUInfo;
UNLOCK_ACCESS_AND_RETURN;
}
int SharpYuvConvert(const void* r_ptr, const void* g_ptr,
@ -467,7 +495,8 @@ int SharpYuvConvert(const void* r_ptr, const void* g_ptr,
// Stride should be even for uint16_t buffers.
return 0;
}
SharpYuvInit(NULL);
// The address of the function pointer is used to avoid a read race.
SharpYuvInit((VP8CPUInfo)&SharpYuvGetCPUInfo);
// Add scaling factor to go from rgb_bit_depth to yuv_bit_depth, to the
// rgb->yuv conversion matrix.

View File

@ -12,16 +12,32 @@
#ifndef WEBP_SHARPYUV_SHARPYUV_H_
#define WEBP_SHARPYUV_SHARPYUV_H_
#include <inttypes.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifndef SHARPYUV_EXTERN
#ifdef WEBP_EXTERN
#define SHARPYUV_EXTERN WEBP_EXTERN
#else
// This explicitly marks library functions and allows for changing the
// signature for e.g., Windows DLL builds.
#if defined(__GNUC__) && __GNUC__ >= 4
#define SHARPYUV_EXTERN extern __attribute__((visibility("default")))
#else
#if defined(_MSC_VER) && defined(WEBP_DLL)
#define SHARPYUV_EXTERN __declspec(dllexport)
#else
#define SHARPYUV_EXTERN extern
#endif /* _MSC_VER && WEBP_DLL */
#endif /* __GNUC__ >= 4 */
#endif /* WEBP_EXTERN */
#endif /* SHARPYUV_EXTERN */
// SharpYUV API version following the convention from semver.org
#define SHARPYUV_VERSION_MAJOR 0
#define SHARPYUV_VERSION_MINOR 1
#define SHARPYUV_VERSION_PATCH 0
#define SHARPYUV_VERSION_MINOR 2
#define SHARPYUV_VERSION_PATCH 1
// Version as a uint32_t. The major number is the high 8 bits.
// The minor number is the middle 8 bits. The patch number is the low 16 bits.
#define SHARPYUV_MAKE_VERSION(MAJOR, MINOR, PATCH) \
@ -30,6 +46,10 @@ extern "C" {
SHARPYUV_MAKE_VERSION(SHARPYUV_VERSION_MAJOR, SHARPYUV_VERSION_MINOR, \
SHARPYUV_VERSION_PATCH)
// Returns the library's version number, packed in hexadecimal. See
// SHARPYUV_VERSION.
SHARPYUV_EXTERN int SharpYuvGetVersion(void);
// RGB to YUV conversion matrix, in 16 bit fixed point.
// y = rgb_to_y[0] * r + rgb_to_y[1] * g + rgb_to_y[2] * b + rgb_to_y[3]
// u = rgb_to_u[0] * r + rgb_to_u[1] * g + rgb_to_u[2] * b + rgb_to_u[3]
@ -65,11 +85,13 @@ typedef struct {
// adjacent pixels on the y, u and v channels. If yuv_bit_depth > 8, they
// should be multiples of 2.
// width, height: width and height of the image in pixels
int SharpYuvConvert(const void* r_ptr, const void* g_ptr, const void* b_ptr,
int rgb_step, int rgb_stride, int rgb_bit_depth,
void* y_ptr, int y_stride, void* u_ptr, int u_stride,
void* v_ptr, int v_stride, int yuv_bit_depth, int width,
int height, const SharpYuvConversionMatrix* yuv_matrix);
SHARPYUV_EXTERN int SharpYuvConvert(const void* r_ptr, const void* g_ptr,
const void* b_ptr, int rgb_step,
int rgb_stride, int rgb_bit_depth,
void* y_ptr, int y_stride, void* u_ptr,
int u_stride, void* v_ptr, int v_stride,
int yuv_bit_depth, int width, int height,
const SharpYuvConversionMatrix* yuv_matrix);
// TODO(b/194336375): Add YUV444 to YUV420 conversion. Maybe also add 422
// support (it's rarely used in practice, especially for images).

View File

@ -0,0 +1,14 @@
// Copyright 2022 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
#include "sharpyuv/sharpyuv_cpu.h"
// Include src/dsp/cpu.c to create SharpYuvGetCPUInfo from VP8GetCPUInfo. The
// function pointer is renamed in sharpyuv_cpu.h.
#include "src/dsp/cpu.c"

View File

@ -0,0 +1,22 @@
// Copyright 2022 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
#ifndef WEBP_SHARPYUV_SHARPYUV_CPU_H_
#define WEBP_SHARPYUV_SHARPYUV_CPU_H_
#include "sharpyuv/sharpyuv.h"
// Avoid exporting SharpYuvGetCPUInfo in shared object / DLL builds.
// SharpYuvInit() replaces the use of the function pointer.
#undef WEBP_EXTERN
#define WEBP_EXTERN extern
#define VP8GetCPUInfo SharpYuvGetCPUInfo
#include "src/dsp/cpu.h"
#endif // WEBP_SHARPYUV_SHARPYUV_CPU_H_

View File

@ -13,7 +13,7 @@
#include <assert.h>
#include <math.h>
#include <string.h>
#include <stddef.h>
static int ToFixed16(float f) { return (int)floor(f * (1 << 16) + 0.5f); }

View File

@ -35,8 +35,9 @@ typedef struct {
} SharpYuvColorSpace;
// Fills in 'matrix' for the given YUVColorSpace.
void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space,
SharpYuvConversionMatrix* matrix);
SHARPYUV_EXTERN void SharpYuvComputeConversionMatrix(
const SharpYuvColorSpace* yuv_color_space,
SharpYuvConversionMatrix* matrix);
// Enums for precomputed conversion matrices.
typedef enum {
@ -49,7 +50,7 @@ typedef enum {
} SharpYuvMatrixType;
// Returns a pointer to a matrix for one of the predefined colorspaces.
const SharpYuvConversionMatrix* SharpYuvGetConversionMatrix(
SHARPYUV_EXTERN const SharpYuvConversionMatrix* SharpYuvGetConversionMatrix(
SharpYuvMatrixType matrix_type);
#ifdef __cplusplus

View File

@ -16,7 +16,7 @@
#include <assert.h>
#include <stdlib.h>
#include "src/dsp/cpu.h"
#include "sharpyuv/sharpyuv_cpu.h"
//-----------------------------------------------------------------------------
@ -72,26 +72,28 @@ void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len,
const uint16_t* best_y, uint16_t* out,
int bit_depth);
extern VP8CPUInfo SharpYuvGetCPUInfo;
extern void InitSharpYuvSSE2(void);
extern void InitSharpYuvNEON(void);
void SharpYuvInitDsp(VP8CPUInfo cpu_info_func) {
(void)cpu_info_func;
void SharpYuvInitDsp(void) {
#if !WEBP_NEON_OMIT_C_CODE
SharpYuvUpdateY = SharpYuvUpdateY_C;
SharpYuvUpdateRGB = SharpYuvUpdateRGB_C;
SharpYuvFilterRow = SharpYuvFilterRow_C;
#endif
if (SharpYuvGetCPUInfo != NULL) {
#if defined(WEBP_HAVE_SSE2)
if (cpu_info_func == NULL || cpu_info_func(kSSE2)) {
InitSharpYuvSSE2();
}
if (SharpYuvGetCPUInfo(kSSE2)) {
InitSharpYuvSSE2();
}
#endif // WEBP_HAVE_SSE2
}
#if defined(WEBP_HAVE_NEON)
if (WEBP_NEON_OMIT_C_CODE || cpu_info_func == NULL || cpu_info_func(kNEON)) {
if (WEBP_NEON_OMIT_C_CODE ||
(SharpYuvGetCPUInfo != NULL && SharpYuvGetCPUInfo(kNEON))) {
InitSharpYuvNEON();
}
#endif // WEBP_HAVE_NEON

View File

@ -12,9 +12,8 @@
#ifndef WEBP_SHARPYUV_SHARPYUV_DSP_H_
#define WEBP_SHARPYUV_SHARPYUV_DSP_H_
#include <stdint.h>
#include "src/dsp/cpu.h"
#include "sharpyuv/sharpyuv_cpu.h"
#include "src/webp/types.h"
extern uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref,
uint16_t* dst, int len, int bit_depth);
@ -24,6 +23,6 @@ extern void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len,
const uint16_t* best_y, uint16_t* out,
int bit_depth);
void SharpYuvInitDsp(VP8CPUInfo cpu_info_func);
void SharpYuvInitDsp(void);
#endif // WEBP_SHARPYUV_SHARPYUV_DSP_H_

View File

@ -13,7 +13,6 @@
#include <assert.h>
#include <math.h>
#include <stdint.h>
#include "src/webp/types.h"

View File

@ -12,7 +12,7 @@
#ifndef WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
#define WEBP_SHARPYUV_SHARPYUV_GAMMA_H_
#include <stdint.h>
#include "src/webp/types.h"
#ifdef __cplusplus
extern "C" {

View File

@ -17,11 +17,6 @@
#include <assert.h>
#include <stdlib.h>
#include <arm_neon.h>
#endif
extern void InitSharpYuvNEON(void);
#if defined(WEBP_USE_NEON)
static uint16_t clip_NEON(int v, int max) {
return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
@ -169,6 +164,8 @@ static void SharpYuvFilterRow_NEON(const int16_t* A, const int16_t* B, int len,
//------------------------------------------------------------------------------
extern void InitSharpYuvNEON(void);
WEBP_TSAN_IGNORE_FUNCTION void InitSharpYuvNEON(void) {
SharpYuvUpdateY = SharpYuvUpdateY_NEON;
SharpYuvUpdateRGB = SharpYuvUpdateRGB_NEON;
@ -177,6 +174,8 @@ WEBP_TSAN_IGNORE_FUNCTION void InitSharpYuvNEON(void) {
#else // !WEBP_USE_NEON
extern void InitSharpYuvNEON(void);
void InitSharpYuvNEON(void) {}
#endif // WEBP_USE_NEON

View File

@ -16,11 +16,6 @@
#if defined(WEBP_USE_SSE2)
#include <stdlib.h>
#include <emmintrin.h>
#endif
extern void InitSharpYuvSSE2(void);
#if defined(WEBP_USE_SSE2)
static uint16_t clip_SSE2(int v, int max) {
return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v;
@ -199,6 +194,8 @@ WEBP_TSAN_IGNORE_FUNCTION void InitSharpYuvSSE2(void) {
}
#else // !WEBP_USE_SSE2
extern void InitSharpYuvSSE2(void);
void InitSharpYuvSSE2(void) {}
#endif // WEBP_USE_SSE2

View File

@ -12,10 +12,11 @@
// Author: Skal (pascal.massimino@gmail.com)
#include "src/dec/vp8i_dec.h"
#include "src/dsp/cpu.h"
#include "src/utils/bit_reader_inl_utils.h"
#if !defined(USE_GENERIC_TREE)
#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)
#if !defined(__arm__) && !defined(_M_ARM) && !WEBP_AARCH64
// using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then.
#define USE_GENERIC_TREE 1 // ALTERNATE_CODE
#else

View File

@ -494,6 +494,8 @@ static int GetCoeffsAlt(VP8BitReader* const br,
return 16;
}
extern VP8CPUInfo VP8GetCPUInfo;
WEBP_DSP_INIT_FUNC(InitGetCoeffs) {
if (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kSlowSSSE3)) {
GetCoeffs = GetCoeffsAlt;

View File

@ -31,8 +31,8 @@ extern "C" {
// version numbers
#define DEC_MAJ_VERSION 1
#define DEC_MIN_VERSION 2
#define DEC_REV_VERSION 4
#define DEC_MIN_VERSION 3
#define DEC_REV_VERSION 1
// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
// Constraints are: We need to store one 16x16 block of luma samples (y),

View File

@ -1336,7 +1336,7 @@ static int ReadTransform(int* const xsize, int const* ysize,
ok = ok && ExpandColorMap(num_colors, transform);
break;
}
case SUBTRACT_GREEN:
case SUBTRACT_GREEN_TRANSFORM:
break;
default:
assert(0); // can't happen

View File

@ -179,7 +179,7 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,
return VP8_STATUS_BITSTREAM_ERROR; // Not a valid chunk size.
}
// For odd-sized chunk-payload, there's one byte padding at the end.
disk_chunk_size = (CHUNK_HEADER_SIZE + chunk_size + 1) & ~1;
disk_chunk_size = (CHUNK_HEADER_SIZE + chunk_size + 1) & ~1u;
total_size += disk_chunk_size;
// Check that total bytes skipped so far does not exceed riff_size.
@ -658,19 +658,26 @@ uint8_t* WebPDecodeBGRA(const uint8_t* data, size_t data_size,
uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size,
int* width, int* height, uint8_t** u, uint8_t** v,
int* stride, int* uv_stride) {
WebPDecBuffer output; // only to preserve the side-infos
uint8_t* const out = Decode(MODE_YUV, data, data_size,
width, height, &output);
if (out != NULL) {
const WebPYUVABuffer* const buf = &output.u.YUVA;
*u = buf->u;
*v = buf->v;
*stride = buf->y_stride;
*uv_stride = buf->u_stride;
assert(buf->u_stride == buf->v_stride);
// data, width and height are checked by Decode().
if (u == NULL || v == NULL || stride == NULL || uv_stride == NULL) {
return NULL;
}
{
WebPDecBuffer output; // only to preserve the side-infos
uint8_t* const out = Decode(MODE_YUV, data, data_size,
width, height, &output);
if (out != NULL) {
const WebPYUVABuffer* const buf = &output.u.YUVA;
*u = buf->u;
*v = buf->v;
*stride = buf->y_stride;
*uv_stride = buf->u_stride;
assert(buf->u_stride == buf->v_stride);
}
return out;
}
return out;
}
static void DefaultFeatures(WebPBitstreamFeatures* const features) {

View File

@ -24,8 +24,8 @@
#include "src/webp/format_constants.h"
#define DMUX_MAJ_VERSION 1
#define DMUX_MIN_VERSION 2
#define DMUX_REV_VERSION 4
#define DMUX_MIN_VERSION 3
#define DMUX_REV_VERSION 1
typedef struct {
size_t start_; // start location of the data

View File

@ -425,6 +425,7 @@ void (*WebPAlphaReplace)(uint32_t* src, int length, uint32_t color);
//------------------------------------------------------------------------------
// Init function
extern VP8CPUInfo VP8GetCPUInfo;
extern void WebPInitAlphaProcessingMIPSdspR2(void);
extern void WebPInitAlphaProcessingSSE2(void);
extern void WebPInitAlphaProcessingSSE41(void);

View File

@ -26,8 +26,8 @@ static int DispatchAlpha_SSE2(const uint8_t* WEBP_RESTRICT alpha,
uint32_t alpha_and = 0xff;
int i, j;
const __m128i zero = _mm_setzero_si128();
const __m128i rgb_mask = _mm_set1_epi32(0xffffff00u); // to preserve RGB
const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u);
const __m128i rgb_mask = _mm_set1_epi32((int)0xffffff00); // to preserve RGB
const __m128i all_0xff = _mm_set_epi32(0, 0, ~0, ~0);
__m128i all_alphas = all_0xff;
// We must be able to access 3 extra bytes after the last written byte
@ -106,8 +106,8 @@ static int ExtractAlpha_SSE2(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
// value is not 0xff if any of the alpha[] is not equal to 0xff.
uint32_t alpha_and = 0xff;
int i, j;
const __m128i a_mask = _mm_set1_epi32(0xffu); // to preserve alpha
const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u);
const __m128i a_mask = _mm_set1_epi32(0xff); // to preserve alpha
const __m128i all_0xff = _mm_set_epi32(0, 0, ~0, ~0);
__m128i all_alphas = all_0xff;
// We must be able to access 3 extra bytes after the last written byte
@ -178,7 +178,7 @@ static int ExtractAlpha_SSE2(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
static void ApplyAlphaMultiply_SSE2(uint8_t* rgba, int alpha_first,
int w, int h, int stride) {
const __m128i zero = _mm_setzero_si128();
const __m128i kMult = _mm_set1_epi16(0x8081u);
const __m128i kMult = _mm_set1_epi16((short)0x8081);
const __m128i kMask = _mm_set_epi16(0, 0xff, 0xff, 0, 0, 0xff, 0xff, 0);
const int kSpan = 4;
while (h-- > 0) {
@ -267,7 +267,7 @@ static int HasAlpha32b_SSE2(const uint8_t* src, int length) {
}
static void AlphaReplace_SSE2(uint32_t* src, int length, uint32_t color) {
const __m128i m_color = _mm_set1_epi32(color);
const __m128i m_color = _mm_set1_epi32((int)color);
const __m128i zero = _mm_setzero_si128();
int i = 0;
for (; i + 8 <= length; i += 8) {

View File

@ -26,7 +26,7 @@ static int ExtractAlpha_SSE41(const uint8_t* WEBP_RESTRICT argb,
// value is not 0xff if any of the alpha[] is not equal to 0xff.
uint32_t alpha_and = 0xff;
int i, j;
const __m128i all_0xff = _mm_set1_epi32(~0u);
const __m128i all_0xff = _mm_set1_epi32(~0);
__m128i all_alphas = all_0xff;
// We must be able to access 3 extra bytes after the last written byte

View File

@ -374,6 +374,7 @@ static void SetResidualCoeffs_C(const int16_t* const coeffs,
VP8GetResidualCostFunc VP8GetResidualCost;
VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
extern VP8CPUInfo VP8GetCPUInfo;
extern void VP8EncDspCostInitMIPS32(void);
extern void VP8EncDspCostInitMIPSdspR2(void);
extern void VP8EncDspCostInitSSE2(void);

View File

@ -29,7 +29,7 @@ static void SetResidualCoeffs_NEON(const int16_t* const coeffs,
const uint8x16_t eob = vcombine_u8(vqmovn_u16(eob_0), vqmovn_u16(eob_1));
const uint8x16_t masked = vandq_u8(eob, vld1q_u8(position));
#ifdef __aarch64__
#if WEBP_AARCH64
res->last = vmaxvq_u8(masked) - 1;
#else
const uint8x8_t eob_8x8 = vmax_u8(vget_low_u8(masked), vget_high_u8(masked));
@ -43,7 +43,7 @@ static void SetResidualCoeffs_NEON(const int16_t* const coeffs,
vst1_lane_s32(&res->last, vreinterpret_s32_u32(eob_32x2), 0);
--res->last;
#endif // __aarch64__
#endif // WEBP_AARCH64
res->coeffs = coeffs;
}

View File

@ -173,6 +173,7 @@ static int x86CPUInfo(CPUFeature feature) {
}
return 0;
}
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
VP8CPUInfo VP8GetCPUInfo = x86CPUInfo;
#elif defined(WEBP_ANDROID_NEON) // NB: needs to be before generic NEON test.
static int AndroidCPUInfo(CPUFeature feature) {
@ -184,6 +185,7 @@ static int AndroidCPUInfo(CPUFeature feature) {
}
return 0;
}
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
#elif defined(EMSCRIPTEN) // also needs to be before generic NEON test
// Use compile flags as an indicator of SIMD support instead of a runtime check.
@ -208,11 +210,12 @@ static int wasmCPUInfo(CPUFeature feature) {
}
return 0;
}
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
VP8CPUInfo VP8GetCPUInfo = wasmCPUInfo;
#elif defined(WEBP_HAVE_NEON)
// In most cases this function doesn't check for NEON support (it's assumed by
// the configuration), but enables turning off NEON at runtime, for testing
// purposes, by setting VP8DecGetCPUInfo = NULL.
// purposes, by setting VP8GetCPUInfo = NULL.
static int armCPUInfo(CPUFeature feature) {
if (feature != kNEON) return 0;
#if defined(__linux__) && defined(WEBP_HAVE_NEON_RTCD)
@ -236,6 +239,7 @@ static int armCPUInfo(CPUFeature feature) {
return 1;
#endif
}
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
#elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2) || \
defined(WEBP_USE_MSA)
@ -247,7 +251,9 @@ static int mipsCPUInfo(CPUFeature feature) {
}
}
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
#else
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
VP8CPUInfo VP8GetCPUInfo = NULL;
#endif

View File

@ -14,6 +14,8 @@
#ifndef WEBP_DSP_CPU_H_
#define WEBP_DSP_CPU_H_
#include <stddef.h>
#ifdef HAVE_CONFIG_H
#include "src/webp/config.h"
#endif
@ -41,6 +43,9 @@
#define __has_builtin(x) 0
#endif
//------------------------------------------------------------------------------
// x86 defines.
#if !defined(HAVE_CONFIG_H)
#if defined(_MSC_VER) && _MSC_VER > 1310 && \
(defined(_M_X64) || defined(_M_IX86))
@ -78,6 +83,9 @@
#undef WEBP_MSC_SSE41
#undef WEBP_MSC_SSE2
//------------------------------------------------------------------------------
// Arm defines.
// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
// inline assembly would need to be modified for use with Native Client.
#if ((defined(__ARM_NEON__) || defined(__aarch64__)) && \
@ -96,16 +104,26 @@
// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in
// arm_neon.h. Compile errors were seen with Visual Studio 2019 16.4 with
// vtbl4_u8(); a fix was made in 16.6.
#if defined(_MSC_VER) && ((_MSC_VER >= 1700 && defined(_M_ARM)) || \
(_MSC_VER >= 1926 && defined(_M_ARM64)))
#if defined(_MSC_VER) && \
((_MSC_VER >= 1700 && defined(_M_ARM)) || \
(_MSC_VER >= 1926 && (defined(_M_ARM64) || defined(_M_ARM64EC))))
#define WEBP_USE_NEON
#define WEBP_USE_INTRINSICS
#endif
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
#define WEBP_AARCH64 1
#else
#define WEBP_AARCH64 0
#endif
#if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON)
#define WEBP_HAVE_NEON
#endif
//------------------------------------------------------------------------------
// MIPS defines.
#if defined(__mips__) && !defined(__mips64) && defined(__mips_isa_rev) && \
(__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
#define WEBP_USE_MIPS32
@ -121,6 +139,8 @@
#define WEBP_USE_MSA
#endif
//------------------------------------------------------------------------------
#ifndef WEBP_DSP_OMIT_C_CODE
#define WEBP_DSP_OMIT_C_CODE 1
#endif
@ -131,13 +151,14 @@
#define WEBP_NEON_OMIT_C_CODE 0
#endif
#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || \
defined(__aarch64__))
#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || WEBP_AARCH64)
#define WEBP_NEON_WORK_AROUND_GCC 1
#else
#define WEBP_NEON_WORK_AROUND_GCC 0
#endif
//------------------------------------------------------------------------------
// This macro prevents thread_sanitizer from reporting known concurrent writes.
#define WEBP_TSAN_IGNORE_FUNCTION
#if defined(__has_feature)
@ -239,16 +260,7 @@ typedef enum {
kMSA
} CPUFeature;
#ifdef __cplusplus
extern "C" {
#endif
// returns true if the CPU supports the feature.
typedef int (*VP8CPUInfo)(CPUFeature feature);
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
#ifdef __cplusplus
} // extern "C"
#endif
#endif // WEBP_DSP_CPU_H_

View File

@ -734,6 +734,7 @@ VP8SimpleFilterFunc VP8SimpleHFilter16i;
void (*VP8DitherCombine8x8)(const uint8_t* dither, uint8_t* dst,
int dst_stride);
extern VP8CPUInfo VP8GetCPUInfo;
extern void VP8DspInitSSE2(void);
extern void VP8DspInitSSE41(void);
extern void VP8DspInitNEON(void);

View File

@ -1428,7 +1428,7 @@ static WEBP_INLINE void DC8_NEON(uint8_t* dst, int do_top, int do_left) {
if (do_top) {
const uint8x8_t A = vld1_u8(dst - BPS); // top row
#if defined(__aarch64__)
#if WEBP_AARCH64
const uint16_t p2 = vaddlv_u8(A);
sum_top = vdupq_n_u16(p2);
#else
@ -1511,7 +1511,7 @@ static WEBP_INLINE void DC16_NEON(uint8_t* dst, int do_top, int do_left) {
if (do_top) {
const uint8x16_t A = vld1q_u8(dst - BPS); // top row
#if defined(__aarch64__)
#if WEBP_AARCH64
const uint16_t p3 = vaddlvq_u8(A);
sum_top = vdupq_n_u16(p3);
#else

View File

@ -158,10 +158,10 @@ static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {
dst3 = _mm_loadl_epi64((__m128i*)(dst + 3 * BPS));
} else {
// Load four bytes/pixels per line.
dst0 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 0 * BPS));
dst1 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 1 * BPS));
dst2 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 2 * BPS));
dst3 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 3 * BPS));
dst0 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 0 * BPS));
dst1 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 1 * BPS));
dst2 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 2 * BPS));
dst3 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 3 * BPS));
}
// Convert to 16b.
dst0 = _mm_unpacklo_epi8(dst0, zero);
@ -187,10 +187,10 @@ static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) {
_mm_storel_epi64((__m128i*)(dst + 3 * BPS), dst3);
} else {
// Store four bytes/pixels per line.
WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(dst0));
WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(dst1));
WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2));
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3));
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(dst0));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(dst1));
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2));
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3));
}
}
}
@ -213,10 +213,10 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
const __m128i m3 = _mm_subs_epi16(B, d4);
const __m128i zero = _mm_setzero_si128();
// Load the source pixels.
__m128i dst0 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 0 * BPS));
__m128i dst1 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 1 * BPS));
__m128i dst2 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 2 * BPS));
__m128i dst3 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 3 * BPS));
__m128i dst0 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 0 * BPS));
__m128i dst1 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 1 * BPS));
__m128i dst2 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 2 * BPS));
__m128i dst3 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 3 * BPS));
// Convert to 16b.
dst0 = _mm_unpacklo_epi8(dst0, zero);
dst1 = _mm_unpacklo_epi8(dst1, zero);
@ -233,10 +233,10 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) {
dst2 = _mm_packus_epi16(dst2, dst2);
dst3 = _mm_packus_epi16(dst3, dst3);
// Store the results.
WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(dst0));
WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(dst1));
WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2));
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3));
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(dst0));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(dst1));
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2));
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3));
}
#undef MUL
#endif // USE_TRANSFORM_AC3
@ -477,11 +477,11 @@ static WEBP_INLINE void Load8x4_SSE2(const uint8_t* const b, int stride,
// A0 = 63 62 61 60 23 22 21 20 43 42 41 40 03 02 01 00
// A1 = 73 72 71 70 33 32 31 30 53 52 51 50 13 12 11 10
const __m128i A0 = _mm_set_epi32(
WebPMemToUint32(&b[6 * stride]), WebPMemToUint32(&b[2 * stride]),
WebPMemToUint32(&b[4 * stride]), WebPMemToUint32(&b[0 * stride]));
WebPMemToInt32(&b[6 * stride]), WebPMemToInt32(&b[2 * stride]),
WebPMemToInt32(&b[4 * stride]), WebPMemToInt32(&b[0 * stride]));
const __m128i A1 = _mm_set_epi32(
WebPMemToUint32(&b[7 * stride]), WebPMemToUint32(&b[3 * stride]),
WebPMemToUint32(&b[5 * stride]), WebPMemToUint32(&b[1 * stride]));
WebPMemToInt32(&b[7 * stride]), WebPMemToInt32(&b[3 * stride]),
WebPMemToInt32(&b[5 * stride]), WebPMemToInt32(&b[1 * stride]));
// B0 = 53 43 52 42 51 41 50 40 13 03 12 02 11 01 10 00
// B1 = 73 63 72 62 71 61 70 60 33 23 32 22 31 21 30 20
@ -540,7 +540,7 @@ static WEBP_INLINE void Store4x4_SSE2(__m128i* const x,
uint8_t* dst, int stride) {
int i;
for (i = 0; i < 4; ++i, dst += stride) {
WebPUint32ToMem(dst, _mm_cvtsi128_si32(*x));
WebPInt32ToMem(dst, _mm_cvtsi128_si32(*x));
*x = _mm_srli_si128(*x, 4);
}
}
@ -908,10 +908,10 @@ static void VE4_SSE2(uint8_t* dst) { // vertical
const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGH00), one);
const __m128i b = _mm_subs_epu8(a, lsb);
const __m128i avg = _mm_avg_epu8(b, BCDEFGH0);
const uint32_t vals = _mm_cvtsi128_si32(avg);
const int vals = _mm_cvtsi128_si32(avg);
int i;
for (i = 0; i < 4; ++i) {
WebPUint32ToMem(dst + i * BPS, vals);
WebPInt32ToMem(dst + i * BPS, vals);
}
}
@ -925,10 +925,10 @@ static void LD4_SSE2(uint8_t* dst) { // Down-Left
const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGHH0), one);
const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
const __m128i abcdefg = _mm_avg_epu8(avg2, BCDEFGH0);
WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcdefg ));
WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcdefg ));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
}
static void VR4_SSE2(uint8_t* dst) { // Vertical-Right
@ -946,10 +946,10 @@ static void VR4_SSE2(uint8_t* dst) { // Vertical-Right
const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one);
const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
const __m128i efgh = _mm_avg_epu8(avg2, XABCD);
WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcd ));
WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( efgh ));
WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1)));
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1)));
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcd ));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( efgh ));
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1)));
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1)));
// these two are hard to implement in SSE2, so we keep the C-version:
DST(0, 2) = AVG3(J, I, X);
@ -970,11 +970,12 @@ static void VL4_SSE2(uint8_t* dst) { // Vertical-Left
const __m128i abbc = _mm_or_si128(ab, bc);
const __m128i lsb2 = _mm_and_si128(abbc, lsb1);
const __m128i avg4 = _mm_subs_epu8(avg3, lsb2);
const uint32_t extra_out = _mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( avg1 ));
WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( avg4 ));
WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1)));
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1)));
const uint32_t extra_out =
(uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( avg1 ));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( avg4 ));
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1)));
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1)));
// these two are hard to get and irregular
DST(3, 2) = (extra_out >> 0) & 0xff;
@ -990,7 +991,7 @@ static void RD4_SSE2(uint8_t* dst) { // Down-right
const uint32_t K = dst[-1 + 2 * BPS];
const uint32_t L = dst[-1 + 3 * BPS];
const __m128i LKJI_____ =
_mm_cvtsi32_si128(L | (K << 8) | (J << 16) | (I << 24));
_mm_cvtsi32_si128((int)(L | (K << 8) | (J << 16) | (I << 24)));
const __m128i LKJIXABCD = _mm_or_si128(LKJI_____, ____XABCD);
const __m128i KJIXABCD_ = _mm_srli_si128(LKJIXABCD, 1);
const __m128i JIXABCD__ = _mm_srli_si128(LKJIXABCD, 2);
@ -998,10 +999,10 @@ static void RD4_SSE2(uint8_t* dst) { // Down-right
const __m128i lsb = _mm_and_si128(_mm_xor_si128(JIXABCD__, LKJIXABCD), one);
const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
const __m128i abcdefg = _mm_avg_epu8(avg2, KJIXABCD_);
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32( abcdefg ));
WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32( abcdefg ));
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
}
#undef DST
@ -1015,13 +1016,13 @@ static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, int size) {
const __m128i zero = _mm_setzero_si128();
int y;
if (size == 4) {
const __m128i top_values = _mm_cvtsi32_si128(WebPMemToUint32(top));
const __m128i top_values = _mm_cvtsi32_si128(WebPMemToInt32(top));
const __m128i top_base = _mm_unpacklo_epi8(top_values, zero);
for (y = 0; y < 4; ++y, dst += BPS) {
const int val = dst[-1] - top[-1];
const __m128i base = _mm_set1_epi16(val);
const __m128i out = _mm_packus_epi16(_mm_add_epi16(base, top_base), zero);
WebPUint32ToMem(dst, _mm_cvtsi128_si32(out));
WebPInt32ToMem(dst, _mm_cvtsi128_si32(out));
}
} else if (size == 8) {
const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
@ -1062,7 +1063,7 @@ static void VE16_SSE2(uint8_t* dst) {
static void HE16_SSE2(uint8_t* dst) { // horizontal
int j;
for (j = 16; j > 0; --j) {
const __m128i values = _mm_set1_epi8(dst[-1]);
const __m128i values = _mm_set1_epi8((char)dst[-1]);
_mm_storeu_si128((__m128i*)dst, values);
dst += BPS;
}
@ -1070,7 +1071,7 @@ static void HE16_SSE2(uint8_t* dst) { // horizontal
static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) {
int j;
const __m128i values = _mm_set1_epi8(v);
const __m128i values = _mm_set1_epi8((char)v);
for (j = 0; j < 16; ++j) {
_mm_storeu_si128((__m128i*)(dst + j * BPS), values);
}
@ -1130,7 +1131,7 @@ static void VE8uv_SSE2(uint8_t* dst) { // vertical
// helper for chroma-DC predictions
static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
int j;
const __m128i values = _mm_set1_epi8(v);
const __m128i values = _mm_set1_epi8((char)v);
for (j = 0; j < 8; ++j) {
_mm_storel_epi64((__m128i*)(dst + j * BPS), values);
}

View File

@ -23,7 +23,7 @@ static void HE16_SSE41(uint8_t* dst) { // horizontal
int j;
const __m128i kShuffle3 = _mm_set1_epi8(3);
for (j = 16; j > 0; --j) {
const __m128i in = _mm_cvtsi32_si128(WebPMemToUint32(dst - 4));
const __m128i in = _mm_cvtsi32_si128(WebPMemToInt32(dst - 4));
const __m128i values = _mm_shuffle_epi8(in, kShuffle3);
_mm_storeu_si128((__m128i*)dst, values);
dst += BPS;

View File

@ -732,6 +732,7 @@ VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
VP8BlockCopy VP8Copy4x4;
VP8BlockCopy VP8Copy16x8;
extern VP8CPUInfo VP8GetCPUInfo;
extern void VP8EncDspInitSSE2(void);
extern void VP8EncDspInitSSE41(void);
extern void VP8EncDspInitNEON(void);

View File

@ -764,9 +764,14 @@ static WEBP_INLINE void AccumulateSSE16_NEON(const uint8_t* const a,
// Horizontal sum of all four uint32_t values in 'sum'.
static int SumToInt_NEON(uint32x4_t sum) {
#if WEBP_AARCH64
return (int)vaddvq_u32(sum);
#else
const uint64x2_t sum2 = vpaddlq_u32(sum);
const uint64_t sum3 = vgetq_lane_u64(sum2, 0) + vgetq_lane_u64(sum2, 1);
return (int)sum3;
const uint32x2_t sum3 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(sum2)),
vreinterpret_u32_u64(vget_high_u64(sum2)));
return (int)vget_lane_u32(sum3, 0);
#endif
}
static int SSE16x16_NEON(const uint8_t* a, const uint8_t* b) {
@ -860,7 +865,7 @@ static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16],
uint8x8x4_t shuffles;
// vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
// non-standard versions there.
#if defined(__APPLE__) && defined(__aarch64__) && \
#if defined(__APPLE__) && WEBP_AARCH64 && \
defined(__apple_build_version__) && (__apple_build_version__< 6020037)
uint8x16x2_t all_out;
INIT_VECTOR2(all_out, vreinterpretq_u8_s16(out0), vreinterpretq_u8_s16(out1));

View File

@ -25,9 +25,160 @@
//------------------------------------------------------------------------------
// Transforms (Paragraph 14.4)
// Does one or two inverse transforms.
static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
int do_two) {
// Does one inverse transform.
static void ITransform_One_SSE2(const uint8_t* ref, const int16_t* in,
uint8_t* dst) {
// This implementation makes use of 16-bit fixed point versions of two
// multiply constants:
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
// K2 = sqrt(2) * sin (pi/8) ~= 35468 / 2^16
//
// To be able to use signed 16-bit integers, we use the following trick to
// have constants within range:
// - Associated constants are obtained by subtracting the 16-bit fixed point
// version of one:
// k = K - (1 << 16) => K = k + (1 << 16)
// K1 = 85267 => k1 = 20091
// K2 = 35468 => k2 = -30068
// - The multiplication of a variable by a constant become the sum of the
// variable and the multiplication of that variable by the associated
// constant:
// (x * K) >> 16 = (x * (k + (1 << 16))) >> 16 = ((x * k ) >> 16) + x
const __m128i k1k2 = _mm_set_epi16(-30068, -30068, -30068, -30068,
20091, 20091, 20091, 20091);
const __m128i k2k1 = _mm_set_epi16(20091, 20091, 20091, 20091,
-30068, -30068, -30068, -30068);
const __m128i zero = _mm_setzero_si128();
const __m128i zero_four = _mm_set_epi16(0, 0, 0, 0, 4, 4, 4, 4);
__m128i T01, T23;
// Load and concatenate the transform coefficients.
const __m128i in01 = _mm_loadu_si128((const __m128i*)&in[0]);
const __m128i in23 = _mm_loadu_si128((const __m128i*)&in[8]);
// a00 a10 a20 a30 a01 a11 a21 a31
// a02 a12 a22 a32 a03 a13 a23 a33
// Vertical pass and subsequent transpose.
{
const __m128i in1 = _mm_unpackhi_epi64(in01, in01);
const __m128i in3 = _mm_unpackhi_epi64(in23, in23);
// First pass, c and d calculations are longer because of the "trick"
// multiplications.
// c = MUL(in1, K2) - MUL(in3, K1) = MUL(in1, k2) - MUL(in3, k1) + in1 - in3
// d = MUL(in1, K1) + MUL(in3, K2) = MUL(in1, k1) + MUL(in3, k2) + in1 + in3
const __m128i a_d3 = _mm_add_epi16(in01, in23);
const __m128i b_c3 = _mm_sub_epi16(in01, in23);
const __m128i c1d1 = _mm_mulhi_epi16(in1, k2k1);
const __m128i c2d2 = _mm_mulhi_epi16(in3, k1k2);
const __m128i c3 = _mm_unpackhi_epi64(b_c3, b_c3);
const __m128i c4 = _mm_sub_epi16(c1d1, c2d2);
const __m128i c = _mm_add_epi16(c3, c4);
const __m128i d4u = _mm_add_epi16(c1d1, c2d2);
const __m128i du = _mm_add_epi16(a_d3, d4u);
const __m128i d = _mm_unpackhi_epi64(du, du);
// Second pass.
const __m128i comb_ab = _mm_unpacklo_epi64(a_d3, b_c3);
const __m128i comb_dc = _mm_unpacklo_epi64(d, c);
const __m128i tmp01 = _mm_add_epi16(comb_ab, comb_dc);
const __m128i tmp32 = _mm_sub_epi16(comb_ab, comb_dc);
const __m128i tmp23 = _mm_shuffle_epi32(tmp32, _MM_SHUFFLE(1, 0, 3, 2));
const __m128i transpose_0 = _mm_unpacklo_epi16(tmp01, tmp23);
const __m128i transpose_1 = _mm_unpackhi_epi16(tmp01, tmp23);
// a00 a20 a01 a21 a02 a22 a03 a23
// a10 a30 a11 a31 a12 a32 a13 a33
T01 = _mm_unpacklo_epi16(transpose_0, transpose_1);
T23 = _mm_unpackhi_epi16(transpose_0, transpose_1);
// a00 a10 a20 a30 a01 a11 a21 a31
// a02 a12 a22 a32 a03 a13 a23 a33
}
// Horizontal pass and subsequent transpose.
{
const __m128i T1 = _mm_unpackhi_epi64(T01, T01);
const __m128i T3 = _mm_unpackhi_epi64(T23, T23);
// First pass, c and d calculations are longer because of the "trick"
// multiplications.
const __m128i dc = _mm_add_epi16(T01, zero_four);
// c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3
// d = MUL(T1, K1) + MUL(T3, K2) = MUL(T1, k1) + MUL(T3, k2) + T1 + T3
const __m128i a_d3 = _mm_add_epi16(dc, T23);
const __m128i b_c3 = _mm_sub_epi16(dc, T23);
const __m128i c1d1 = _mm_mulhi_epi16(T1, k2k1);
const __m128i c2d2 = _mm_mulhi_epi16(T3, k1k2);
const __m128i c3 = _mm_unpackhi_epi64(b_c3, b_c3);
const __m128i c4 = _mm_sub_epi16(c1d1, c2d2);
const __m128i c = _mm_add_epi16(c3, c4);
const __m128i d4u = _mm_add_epi16(c1d1, c2d2);
const __m128i du = _mm_add_epi16(a_d3, d4u);
const __m128i d = _mm_unpackhi_epi64(du, du);
// Second pass.
const __m128i comb_ab = _mm_unpacklo_epi64(a_d3, b_c3);
const __m128i comb_dc = _mm_unpacklo_epi64(d, c);
const __m128i tmp01 = _mm_add_epi16(comb_ab, comb_dc);
const __m128i tmp32 = _mm_sub_epi16(comb_ab, comb_dc);
const __m128i tmp23 = _mm_shuffle_epi32(tmp32, _MM_SHUFFLE(1, 0, 3, 2));
const __m128i shifted01 = _mm_srai_epi16(tmp01, 3);
const __m128i shifted23 = _mm_srai_epi16(tmp23, 3);
// a00 a01 a02 a03 a10 a11 a12 a13
// a20 a21 a22 a23 a30 a31 a32 a33
const __m128i transpose_0 = _mm_unpacklo_epi16(shifted01, shifted23);
const __m128i transpose_1 = _mm_unpackhi_epi16(shifted01, shifted23);
// a00 a20 a01 a21 a02 a22 a03 a23
// a10 a30 a11 a31 a12 a32 a13 a33
T01 = _mm_unpacklo_epi16(transpose_0, transpose_1);
T23 = _mm_unpackhi_epi16(transpose_0, transpose_1);
// a00 a10 a20 a30 a01 a11 a21 a31
// a02 a12 a22 a32 a03 a13 a23 a33
}
// Add inverse transform to 'ref' and store.
{
// Load the reference(s).
__m128i ref01, ref23, ref0123;
int32_t buf[4];
// Load four bytes/pixels per line.
const __m128i ref0 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[0 * BPS]));
const __m128i ref1 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[1 * BPS]));
const __m128i ref2 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[2 * BPS]));
const __m128i ref3 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[3 * BPS]));
ref01 = _mm_unpacklo_epi32(ref0, ref1);
ref23 = _mm_unpacklo_epi32(ref2, ref3);
// Convert to 16b.
ref01 = _mm_unpacklo_epi8(ref01, zero);
ref23 = _mm_unpacklo_epi8(ref23, zero);
// Add the inverse transform(s).
ref01 = _mm_add_epi16(ref01, T01);
ref23 = _mm_add_epi16(ref23, T23);
// Unsigned saturate to 8b.
ref0123 = _mm_packus_epi16(ref01, ref23);
_mm_storeu_si128((__m128i *)buf, ref0123);
// Store four bytes/pixels per line.
WebPInt32ToMem(&dst[0 * BPS], buf[0]);
WebPInt32ToMem(&dst[1 * BPS], buf[1]);
WebPInt32ToMem(&dst[2 * BPS], buf[2]);
WebPInt32ToMem(&dst[3 * BPS], buf[3]);
}
}
// Does two inverse transforms.
static void ITransform_Two_SSE2(const uint8_t* ref, const int16_t* in,
uint8_t* dst) {
// This implementation makes use of 16-bit fixed point versions of two
// multiply constants:
// K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
@ -49,33 +200,21 @@ static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
__m128i T0, T1, T2, T3;
// Load and concatenate the transform coefficients (we'll do two inverse
// transforms in parallel). In the case of only one inverse transform, the
// second half of the vectors will just contain random value we'll never
// use nor store.
// transforms in parallel).
__m128i in0, in1, in2, in3;
{
in0 = _mm_loadl_epi64((const __m128i*)&in[0]);
in1 = _mm_loadl_epi64((const __m128i*)&in[4]);
in2 = _mm_loadl_epi64((const __m128i*)&in[8]);
in3 = _mm_loadl_epi64((const __m128i*)&in[12]);
// a00 a10 a20 a30 x x x x
// a01 a11 a21 a31 x x x x
// a02 a12 a22 a32 x x x x
// a03 a13 a23 a33 x x x x
if (do_two) {
const __m128i inB0 = _mm_loadl_epi64((const __m128i*)&in[16]);
const __m128i inB1 = _mm_loadl_epi64((const __m128i*)&in[20]);
const __m128i inB2 = _mm_loadl_epi64((const __m128i*)&in[24]);
const __m128i inB3 = _mm_loadl_epi64((const __m128i*)&in[28]);
in0 = _mm_unpacklo_epi64(in0, inB0);
in1 = _mm_unpacklo_epi64(in1, inB1);
in2 = _mm_unpacklo_epi64(in2, inB2);
in3 = _mm_unpacklo_epi64(in3, inB3);
// a00 a10 a20 a30 b00 b10 b20 b30
// a01 a11 a21 a31 b01 b11 b21 b31
// a02 a12 a22 a32 b02 b12 b22 b32
// a03 a13 a23 a33 b03 b13 b23 b33
}
const __m128i tmp0 = _mm_loadu_si128((const __m128i*)&in[0]);
const __m128i tmp1 = _mm_loadu_si128((const __m128i*)&in[8]);
const __m128i tmp2 = _mm_loadu_si128((const __m128i*)&in[16]);
const __m128i tmp3 = _mm_loadu_si128((const __m128i*)&in[24]);
in0 = _mm_unpacklo_epi64(tmp0, tmp2);
in1 = _mm_unpackhi_epi64(tmp0, tmp2);
in2 = _mm_unpacklo_epi64(tmp1, tmp3);
in3 = _mm_unpackhi_epi64(tmp1, tmp3);
// a00 a10 a20 a30 b00 b10 b20 b30
// a01 a11 a21 a31 b01 b11 b21 b31
// a02 a12 a22 a32 b02 b12 b22 b32
// a03 a13 a23 a33 b03 b13 b23 b33
}
// Vertical pass and subsequent transpose.
@ -148,19 +287,11 @@ static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
const __m128i zero = _mm_setzero_si128();
// Load the reference(s).
__m128i ref0, ref1, ref2, ref3;
if (do_two) {
// Load eight bytes/pixels per line.
ref0 = _mm_loadl_epi64((const __m128i*)&ref[0 * BPS]);
ref1 = _mm_loadl_epi64((const __m128i*)&ref[1 * BPS]);
ref2 = _mm_loadl_epi64((const __m128i*)&ref[2 * BPS]);
ref3 = _mm_loadl_epi64((const __m128i*)&ref[3 * BPS]);
} else {
// Load four bytes/pixels per line.
ref0 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[0 * BPS]));
ref1 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[1 * BPS]));
ref2 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[2 * BPS]));
ref3 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[3 * BPS]));
}
// Load eight bytes/pixels per line.
ref0 = _mm_loadl_epi64((const __m128i*)&ref[0 * BPS]);
ref1 = _mm_loadl_epi64((const __m128i*)&ref[1 * BPS]);
ref2 = _mm_loadl_epi64((const __m128i*)&ref[2 * BPS]);
ref3 = _mm_loadl_epi64((const __m128i*)&ref[3 * BPS]);
// Convert to 16b.
ref0 = _mm_unpacklo_epi8(ref0, zero);
ref1 = _mm_unpacklo_epi8(ref1, zero);
@ -176,20 +307,21 @@ static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
ref1 = _mm_packus_epi16(ref1, ref1);
ref2 = _mm_packus_epi16(ref2, ref2);
ref3 = _mm_packus_epi16(ref3, ref3);
// Store the results.
if (do_two) {
// Store eight bytes/pixels per line.
_mm_storel_epi64((__m128i*)&dst[0 * BPS], ref0);
_mm_storel_epi64((__m128i*)&dst[1 * BPS], ref1);
_mm_storel_epi64((__m128i*)&dst[2 * BPS], ref2);
_mm_storel_epi64((__m128i*)&dst[3 * BPS], ref3);
} else {
// Store four bytes/pixels per line.
WebPUint32ToMem(&dst[0 * BPS], _mm_cvtsi128_si32(ref0));
WebPUint32ToMem(&dst[1 * BPS], _mm_cvtsi128_si32(ref1));
WebPUint32ToMem(&dst[2 * BPS], _mm_cvtsi128_si32(ref2));
WebPUint32ToMem(&dst[3 * BPS], _mm_cvtsi128_si32(ref3));
}
// Store eight bytes/pixels per line.
_mm_storel_epi64((__m128i*)&dst[0 * BPS], ref0);
_mm_storel_epi64((__m128i*)&dst[1 * BPS], ref1);
_mm_storel_epi64((__m128i*)&dst[2 * BPS], ref2);
_mm_storel_epi64((__m128i*)&dst[3 * BPS], ref3);
}
}
// Does one or two inverse transforms.
static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
int do_two) {
if (do_two) {
ITransform_Two_SSE2(ref, in, dst);
} else {
ITransform_One_SSE2(ref, in, dst);
}
}
@ -481,7 +613,7 @@ static void CollectHistogram_SSE2(const uint8_t* ref, const uint8_t* pred,
// helper for chroma-DC predictions
static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
int j;
const __m128i values = _mm_set1_epi8(v);
const __m128i values = _mm_set1_epi8((char)v);
for (j = 0; j < 8; ++j) {
_mm_storel_epi64((__m128i*)(dst + j * BPS), values);
}
@ -489,7 +621,7 @@ static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) {
static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) {
int j;
const __m128i values = _mm_set1_epi8(v);
const __m128i values = _mm_set1_epi8((char)v);
for (j = 0; j < 16; ++j) {
_mm_store_si128((__m128i*)(dst + j * BPS), values);
}
@ -540,7 +672,7 @@ static WEBP_INLINE void VerticalPred_SSE2(uint8_t* dst,
static WEBP_INLINE void HE8uv_SSE2(uint8_t* dst, const uint8_t* left) {
int j;
for (j = 0; j < 8; ++j) {
const __m128i values = _mm_set1_epi8(left[j]);
const __m128i values = _mm_set1_epi8((char)left[j]);
_mm_storel_epi64((__m128i*)dst, values);
dst += BPS;
}
@ -549,7 +681,7 @@ static WEBP_INLINE void HE8uv_SSE2(uint8_t* dst, const uint8_t* left) {
static WEBP_INLINE void HE16_SSE2(uint8_t* dst, const uint8_t* left) {
int j;
for (j = 0; j < 16; ++j) {
const __m128i values = _mm_set1_epi8(left[j]);
const __m128i values = _mm_set1_epi8((char)left[j]);
_mm_store_si128((__m128i*)dst, values);
dst += BPS;
}
@ -722,10 +854,10 @@ static WEBP_INLINE void VE4_SSE2(uint8_t* dst,
const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGH00), one);
const __m128i b = _mm_subs_epu8(a, lsb);
const __m128i avg = _mm_avg_epu8(b, BCDEFGH0);
const uint32_t vals = _mm_cvtsi128_si32(avg);
const int vals = _mm_cvtsi128_si32(avg);
int i;
for (i = 0; i < 4; ++i) {
WebPUint32ToMem(dst + i * BPS, vals);
WebPInt32ToMem(dst + i * BPS, vals);
}
}
@ -760,10 +892,10 @@ static WEBP_INLINE void LD4_SSE2(uint8_t* dst,
const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGHH0), one);
const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
const __m128i abcdefg = _mm_avg_epu8(avg2, BCDEFGH0);
WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcdefg ));
WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcdefg ));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
}
static WEBP_INLINE void VR4_SSE2(uint8_t* dst,
@ -782,10 +914,10 @@ static WEBP_INLINE void VR4_SSE2(uint8_t* dst,
const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one);
const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
const __m128i efgh = _mm_avg_epu8(avg2, XABCD);
WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcd ));
WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( efgh ));
WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1)));
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1)));
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcd ));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( efgh ));
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1)));
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1)));
// these two are hard to implement in SSE2, so we keep the C-version:
DST(0, 2) = AVG3(J, I, X);
@ -807,11 +939,12 @@ static WEBP_INLINE void VL4_SSE2(uint8_t* dst,
const __m128i abbc = _mm_or_si128(ab, bc);
const __m128i lsb2 = _mm_and_si128(abbc, lsb1);
const __m128i avg4 = _mm_subs_epu8(avg3, lsb2);
const uint32_t extra_out = _mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( avg1 ));
WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( avg4 ));
WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1)));
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1)));
const uint32_t extra_out =
(uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( avg1 ));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( avg4 ));
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1)));
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1)));
// these two are hard to get and irregular
DST(3, 2) = (extra_out >> 0) & 0xff;
@ -829,10 +962,10 @@ static WEBP_INLINE void RD4_SSE2(uint8_t* dst,
const __m128i lsb = _mm_and_si128(_mm_xor_si128(JIXABCD__, LKJIXABCD), one);
const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
const __m128i abcdefg = _mm_avg_epu8(avg2, KJIXABCD_);
WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32( abcdefg ));
WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32( abcdefg ));
WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1)));
WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2)));
WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3)));
}
static WEBP_INLINE void HU4_SSE2(uint8_t* dst, const uint8_t* top) {
@ -875,14 +1008,14 @@ static WEBP_INLINE void HD4_SSE2(uint8_t* dst, const uint8_t* top) {
static WEBP_INLINE void TM4_SSE2(uint8_t* dst, const uint8_t* top) {
const __m128i zero = _mm_setzero_si128();
const __m128i top_values = _mm_cvtsi32_si128(WebPMemToUint32(top));
const __m128i top_values = _mm_cvtsi32_si128(WebPMemToInt32(top));
const __m128i top_base = _mm_unpacklo_epi8(top_values, zero);
int y;
for (y = 0; y < 4; ++y, dst += BPS) {
const int val = top[-2 - y] - top[-1];
const __m128i base = _mm_set1_epi16(val);
const __m128i out = _mm_packus_epi16(_mm_add_epi16(base, top_base), zero);
WebPUint32ToMem(dst, _mm_cvtsi128_si32(out));
WebPInt32ToMem(dst, _mm_cvtsi128_si32(out));
}
}

View File

@ -233,6 +233,7 @@ static void GradientUnfilter_C(const uint8_t* prev, const uint8_t* in,
WebPFilterFunc WebPFilters[WEBP_FILTER_LAST];
WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST];
extern VP8CPUInfo VP8GetCPUInfo;
extern void VP8FiltersInitMIPSdspR2(void);
extern void VP8FiltersInitMSA(void);
extern void VP8FiltersInitNEON(void);

View File

@ -49,7 +49,7 @@ static WEBP_INLINE uint32_t Clip255(uint32_t a) {
}
static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
return Clip255(a + b - c);
return Clip255((uint32_t)(a + b - c));
}
static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
@ -66,7 +66,7 @@ static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
}
static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
return Clip255(a + (a - b) / 2);
return Clip255((uint32_t)(a + (a - b) / 2));
}
static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
@ -293,10 +293,10 @@ void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
const uint32_t red = argb >> 16;
int new_red = red & 0xff;
int new_blue = argb & 0xff;
new_red += ColorTransformDelta(m->green_to_red_, green);
new_red += ColorTransformDelta((int8_t)m->green_to_red_, green);
new_red &= 0xff;
new_blue += ColorTransformDelta(m->green_to_blue_, green);
new_blue += ColorTransformDelta(m->red_to_blue_, (int8_t)new_red);
new_blue += ColorTransformDelta((int8_t)m->green_to_blue_, green);
new_blue += ColorTransformDelta((int8_t)m->red_to_blue_, (int8_t)new_red);
new_blue &= 0xff;
dst[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
}
@ -395,7 +395,7 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
assert(row_start < row_end);
assert(row_end <= transform->ysize_);
switch (transform->type_) {
case SUBTRACT_GREEN:
case SUBTRACT_GREEN_TRANSFORM:
VP8LAddGreenToBlueAndRed(in, (row_end - row_start) * width, out);
break;
case PREDICTOR_TRANSFORM:
@ -588,6 +588,7 @@ VP8LConvertFunc VP8LConvertBGRAToBGR;
VP8LMapARGBFunc VP8LMapColor32b;
VP8LMapAlphaFunc VP8LMapColor8b;
extern VP8CPUInfo VP8GetCPUInfo;
extern void VP8LDspInitSSE2(void);
extern void VP8LDspInitSSE41(void);
extern void VP8LDspInitNEON(void);

View File

@ -522,11 +522,11 @@ static void GetCombinedEntropyUnrefined_C(const uint32_t X[],
void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) {
int i;
for (i = 0; i < num_pixels; ++i) {
const int argb = argb_data[i];
const int argb = (int)argb_data[i];
const int green = (argb >> 8) & 0xff;
const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
const uint32_t new_b = (((argb >> 0) & 0xff) - green) & 0xff;
argb_data[i] = (argb & 0xff00ff00u) | (new_r << 16) | new_b;
argb_data[i] = ((uint32_t)argb & 0xff00ff00u) | (new_r << 16) | new_b;
}
}
@ -547,10 +547,10 @@ void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data,
const int8_t red = U32ToS8(argb >> 16);
int new_red = red & 0xff;
int new_blue = argb & 0xff;
new_red -= ColorTransformDelta(m->green_to_red_, green);
new_red -= ColorTransformDelta((int8_t)m->green_to_red_, green);
new_red &= 0xff;
new_blue -= ColorTransformDelta(m->green_to_blue_, green);
new_blue -= ColorTransformDelta(m->red_to_blue_, red);
new_blue -= ColorTransformDelta((int8_t)m->green_to_blue_, green);
new_blue -= ColorTransformDelta((int8_t)m->red_to_blue_, red);
new_blue &= 0xff;
data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
}
@ -560,7 +560,7 @@ static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
uint32_t argb) {
const int8_t green = U32ToS8(argb >> 8);
int new_red = argb >> 16;
new_red -= ColorTransformDelta(green_to_red, green);
new_red -= ColorTransformDelta((int8_t)green_to_red, green);
return (new_red & 0xff);
}
@ -569,9 +569,9 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
uint32_t argb) {
const int8_t green = U32ToS8(argb >> 8);
const int8_t red = U32ToS8(argb >> 16);
uint8_t new_blue = argb & 0xff;
new_blue -= ColorTransformDelta(green_to_blue, green);
new_blue -= ColorTransformDelta(red_to_blue, red);
int new_blue = argb & 0xff;
new_blue -= ColorTransformDelta((int8_t)green_to_blue, green);
new_blue -= ColorTransformDelta((int8_t)red_to_blue, red);
return (new_blue & 0xff);
}
@ -791,6 +791,7 @@ VP8LBundleColorMapFunc VP8LBundleColorMap;
VP8LPredictorAddSubFunc VP8LPredictorsSub[16];
VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
extern VP8CPUInfo VP8GetCPUInfo;
extern void VP8LEncDspInitSSE2(void);
extern void VP8LEncDspInitSSE41(void);
extern void VP8LEncDspInitNEON(void);

View File

@ -25,7 +25,7 @@
// vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
// non-standard versions there.
#if defined(__APPLE__) && defined(__aarch64__) && \
#if defined(__APPLE__) && WEBP_AARCH64 && \
defined(__apple_build_version__) && (__apple_build_version__< 6020037)
#define USE_VTBLQ
#endif

View File

@ -54,8 +54,8 @@ static void TransformColor_SSE2(const VP8LMultipliers* const m,
const __m128i mults_rb = MK_CST_16(CST_5b(m->green_to_red_),
CST_5b(m->green_to_blue_));
const __m128i mults_b2 = MK_CST_16(CST_5b(m->red_to_blue_), 0);
const __m128i mask_ag = _mm_set1_epi32(0xff00ff00); // alpha-green masks
const __m128i mask_rb = _mm_set1_epi32(0x00ff00ff); // red-blue masks
const __m128i mask_ag = _mm_set1_epi32((int)0xff00ff00); // alpha-green masks
const __m128i mask_rb = _mm_set1_epi32(0x00ff00ff); // red-blue masks
int i;
for (i = 0; i + 4 <= num_pixels; i += 4) {
const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); // argb
@ -376,7 +376,7 @@ static void BundleColorMap_SSE2(const uint8_t* const row, int width, int xbits,
break;
}
case 2: {
const __m128i mask_or = _mm_set1_epi32(0xff000000);
const __m128i mask_or = _mm_set1_epi32((int)0xff000000);
const __m128i mul_cst = _mm_set1_epi16(0x0104);
const __m128i mask_mul = _mm_set1_epi16(0x0f00);
for (x = 0; x + 16 <= width; x += 16, dst += 4) {
@ -427,7 +427,7 @@ static WEBP_INLINE void Average2_m128i(const __m128i* const a0,
static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int i;
const __m128i black = _mm_set1_epi32(ARGB_BLACK);
const __m128i black = _mm_set1_epi32((int)ARGB_BLACK);
for (i = 0; i + 4 <= num_pixels; i += 4) {
const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
const __m128i res = _mm_sub_epi8(src, black);

View File

@ -498,7 +498,7 @@ static void PredictorAdd13_NEON(const uint32_t* in, const uint32_t* upper,
// vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
// non-standard versions there.
#if defined(__APPLE__) && defined(__aarch64__) && \
#if defined(__APPLE__) && WEBP_AARCH64 && \
defined(__apple_build_version__) && (__apple_build_version__< 6020037)
#define USE_VTBLQ
#endif

View File

@ -27,23 +27,22 @@ static WEBP_INLINE uint32_t ClampedAddSubtractFull_SSE2(uint32_t c0,
uint32_t c1,
uint32_t c2) {
const __m128i zero = _mm_setzero_si128();
const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c0), zero);
const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c1), zero);
const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c2), zero);
const __m128i V1 = _mm_add_epi16(C0, C1);
const __m128i V2 = _mm_sub_epi16(V1, C2);
const __m128i b = _mm_packus_epi16(V2, V2);
const uint32_t output = _mm_cvtsi128_si32(b);
return output;
return (uint32_t)_mm_cvtsi128_si32(b);
}
static WEBP_INLINE uint32_t ClampedAddSubtractHalf_SSE2(uint32_t c0,
uint32_t c1,
uint32_t c2) {
const __m128i zero = _mm_setzero_si128();
const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c0), zero);
const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c1), zero);
const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c2), zero);
const __m128i avg = _mm_add_epi16(C1, C0);
const __m128i A0 = _mm_srli_epi16(avg, 1);
const __m128i A1 = _mm_sub_epi16(A0, B0);
@ -52,16 +51,15 @@ static WEBP_INLINE uint32_t ClampedAddSubtractHalf_SSE2(uint32_t c0,
const __m128i A3 = _mm_srai_epi16(A2, 1);
const __m128i A4 = _mm_add_epi16(A0, A3);
const __m128i A5 = _mm_packus_epi16(A4, A4);
const uint32_t output = _mm_cvtsi128_si32(A5);
return output;
return (uint32_t)_mm_cvtsi128_si32(A5);
}
static WEBP_INLINE uint32_t Select_SSE2(uint32_t a, uint32_t b, uint32_t c) {
int pa_minus_pb;
const __m128i zero = _mm_setzero_si128();
const __m128i A0 = _mm_cvtsi32_si128(a);
const __m128i B0 = _mm_cvtsi32_si128(b);
const __m128i C0 = _mm_cvtsi32_si128(c);
const __m128i A0 = _mm_cvtsi32_si128((int)a);
const __m128i B0 = _mm_cvtsi32_si128((int)b);
const __m128i C0 = _mm_cvtsi32_si128((int)c);
const __m128i AC0 = _mm_subs_epu8(A0, C0);
const __m128i CA0 = _mm_subs_epu8(C0, A0);
const __m128i BC0 = _mm_subs_epu8(B0, C0);
@ -94,8 +92,8 @@ static WEBP_INLINE void Average2_uint32_SSE2(const uint32_t a0,
__m128i* const avg) {
// (a + b) >> 1 = ((a + b + 1) >> 1) - ((a ^ b) & 1)
const __m128i ones = _mm_set1_epi8(1);
const __m128i A0 = _mm_cvtsi32_si128(a0);
const __m128i A1 = _mm_cvtsi32_si128(a1);
const __m128i A0 = _mm_cvtsi32_si128((int)a0);
const __m128i A1 = _mm_cvtsi32_si128((int)a1);
const __m128i avg1 = _mm_avg_epu8(A0, A1);
const __m128i one = _mm_and_si128(_mm_xor_si128(A0, A1), ones);
*avg = _mm_sub_epi8(avg1, one);
@ -103,8 +101,8 @@ static WEBP_INLINE void Average2_uint32_SSE2(const uint32_t a0,
static WEBP_INLINE __m128i Average2_uint32_16_SSE2(uint32_t a0, uint32_t a1) {
const __m128i zero = _mm_setzero_si128();
const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a0), zero);
const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a1), zero);
const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)a0), zero);
const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)a1), zero);
const __m128i sum = _mm_add_epi16(A1, A0);
return _mm_srli_epi16(sum, 1);
}
@ -112,19 +110,18 @@ static WEBP_INLINE __m128i Average2_uint32_16_SSE2(uint32_t a0, uint32_t a1) {
static WEBP_INLINE uint32_t Average2_SSE2(uint32_t a0, uint32_t a1) {
__m128i output;
Average2_uint32_SSE2(a0, a1, &output);
return _mm_cvtsi128_si32(output);
return (uint32_t)_mm_cvtsi128_si32(output);
}
static WEBP_INLINE uint32_t Average3_SSE2(uint32_t a0, uint32_t a1,
uint32_t a2) {
const __m128i zero = _mm_setzero_si128();
const __m128i avg1 = Average2_uint32_16_SSE2(a0, a2);
const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a1), zero);
const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)a1), zero);
const __m128i sum = _mm_add_epi16(avg1, A1);
const __m128i avg2 = _mm_srli_epi16(sum, 1);
const __m128i A2 = _mm_packus_epi16(avg2, avg2);
const uint32_t output = _mm_cvtsi128_si32(A2);
return output;
return (uint32_t)_mm_cvtsi128_si32(A2);
}
static WEBP_INLINE uint32_t Average4_SSE2(uint32_t a0, uint32_t a1,
@ -134,8 +131,7 @@ static WEBP_INLINE uint32_t Average4_SSE2(uint32_t a0, uint32_t a1,
const __m128i sum = _mm_add_epi16(avg2, avg1);
const __m128i avg3 = _mm_srli_epi16(sum, 1);
const __m128i A0 = _mm_packus_epi16(avg3, avg3);
const uint32_t output = _mm_cvtsi128_si32(A0);
return output;
return (uint32_t)_mm_cvtsi128_si32(A0);
}
static uint32_t Predictor5_SSE2(const uint32_t* const left,
@ -192,7 +188,7 @@ static uint32_t Predictor13_SSE2(const uint32_t* const left,
static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int i;
const __m128i black = _mm_set1_epi32(ARGB_BLACK);
const __m128i black = _mm_set1_epi32((int)ARGB_BLACK);
for (i = 0; i + 4 <= num_pixels; i += 4) {
const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
const __m128i res = _mm_add_epi8(src, black);
@ -208,7 +204,7 @@ static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper,
static void PredictorAdd1_SSE2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int i;
__m128i prev = _mm_set1_epi32(out[-1]);
__m128i prev = _mm_set1_epi32((int)out[-1]);
for (i = 0; i + 4 <= num_pixels; i += 4) {
// a | b | c | d
const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
@ -285,12 +281,12 @@ GENERATE_PREDICTOR_2(9, upper[i + 1])
#undef GENERATE_PREDICTOR_2
// Predictor10: average of (average of (L,TL), average of (T, TR)).
#define DO_PRED10(OUT) do { \
__m128i avgLTL, avg; \
Average2_m128i(&L, &TL, &avgLTL); \
Average2_m128i(&avgTTR, &avgLTL, &avg); \
L = _mm_add_epi8(avg, src); \
out[i + (OUT)] = _mm_cvtsi128_si32(L); \
#define DO_PRED10(OUT) do { \
__m128i avgLTL, avg; \
Average2_m128i(&L, &TL, &avgLTL); \
Average2_m128i(&avgTTR, &avgLTL, &avg); \
L = _mm_add_epi8(avg, src); \
out[i + (OUT)] = (uint32_t)_mm_cvtsi128_si32(L); \
} while (0)
#define DO_PRED10_SHIFT do { \
@ -303,7 +299,7 @@ GENERATE_PREDICTOR_2(9, upper[i + 1])
static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int i;
__m128i L = _mm_cvtsi32_si128(out[-1]);
__m128i L = _mm_cvtsi32_si128((int)out[-1]);
for (i = 0; i + 4 <= num_pixels; i += 4) {
__m128i src = _mm_loadu_si128((const __m128i*)&in[i]);
__m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]);
@ -336,7 +332,7 @@ static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper,
const __m128i B = _mm_andnot_si128(mask, T); \
const __m128i pred = _mm_or_si128(A, B); /* pred = (pa > b)? L : T*/ \
L = _mm_add_epi8(src, pred); \
out[i + (OUT)] = _mm_cvtsi128_si32(L); \
out[i + (OUT)] = (uint32_t)_mm_cvtsi128_si32(L); \
} while (0)
#define DO_PRED11_SHIFT do { \
@ -351,7 +347,7 @@ static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int i;
__m128i pa;
__m128i L = _mm_cvtsi32_si128(out[-1]);
__m128i L = _mm_cvtsi32_si128((int)out[-1]);
for (i = 0; i + 4 <= num_pixels; i += 4) {
__m128i T = _mm_loadu_si128((const __m128i*)&upper[i]);
__m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]);
@ -384,12 +380,12 @@ static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper,
#undef DO_PRED11_SHIFT
// Predictor12: ClampedAddSubtractFull.
#define DO_PRED12(DIFF, LANE, OUT) do { \
const __m128i all = _mm_add_epi16(L, (DIFF)); \
const __m128i alls = _mm_packus_epi16(all, all); \
const __m128i res = _mm_add_epi8(src, alls); \
out[i + (OUT)] = _mm_cvtsi128_si32(res); \
L = _mm_unpacklo_epi8(res, zero); \
#define DO_PRED12(DIFF, LANE, OUT) do { \
const __m128i all = _mm_add_epi16(L, (DIFF)); \
const __m128i alls = _mm_packus_epi16(all, all); \
const __m128i res = _mm_add_epi8(src, alls); \
out[i + (OUT)] = (uint32_t)_mm_cvtsi128_si32(res); \
L = _mm_unpacklo_epi8(res, zero); \
} while (0)
#define DO_PRED12_SHIFT(DIFF, LANE) do { \
@ -402,7 +398,7 @@ static void PredictorAdd12_SSE2(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int i;
const __m128i zero = _mm_setzero_si128();
const __m128i L8 = _mm_cvtsi32_si128(out[-1]);
const __m128i L8 = _mm_cvtsi32_si128((int)out[-1]);
__m128i L = _mm_unpacklo_epi8(L8, zero);
for (i = 0; i + 4 <= num_pixels; i += 4) {
// Load 4 pixels at a time.
@ -468,7 +464,7 @@ static void TransformColorInverse_SSE2(const VP8LMultipliers* const m,
const __m128i mults_b2 = MK_CST_16(CST(red_to_blue_), 0);
#undef MK_CST_16
#undef CST
const __m128i mask_ag = _mm_set1_epi32(0xff00ff00); // alpha-green masks
const __m128i mask_ag = _mm_set1_epi32((int)0xff00ff00); // alpha-green masks
int i;
for (i = 0; i + 4 <= num_pixels; i += 4) {
const __m128i in = _mm_loadu_si128((const __m128i*)&src[i]); // argb
@ -532,7 +528,7 @@ static void ConvertBGRAToRGB_SSE2(const uint32_t* src, int num_pixels,
static void ConvertBGRAToRGBA_SSE2(const uint32_t* src,
int num_pixels, uint8_t* dst) {
const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ffu);
const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ff);
const __m128i* in = (const __m128i*)src;
__m128i* out = (__m128i*)dst;
while (num_pixels >= 8) {
@ -561,7 +557,7 @@ static void ConvertBGRAToRGBA_SSE2(const uint32_t* src,
static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* src,
int num_pixels, uint8_t* dst) {
const __m128i mask_0x0f = _mm_set1_epi8(0x0f);
const __m128i mask_0xf0 = _mm_set1_epi8(0xf0);
const __m128i mask_0xf0 = _mm_set1_epi8((char)0xf0);
const __m128i* in = (const __m128i*)src;
__m128i* out = (__m128i*)dst;
while (num_pixels >= 8) {
@ -596,8 +592,8 @@ static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* src,
static void ConvertBGRAToRGB565_SSE2(const uint32_t* src,
int num_pixels, uint8_t* dst) {
const __m128i mask_0xe0 = _mm_set1_epi8(0xe0);
const __m128i mask_0xf8 = _mm_set1_epi8(0xf8);
const __m128i mask_0xe0 = _mm_set1_epi8((char)0xe0);
const __m128i mask_0xf8 = _mm_set1_epi8((char)0xf8);
const __m128i mask_0x07 = _mm_set1_epi8(0x07);
const __m128i* in = (const __m128i*)src;
__m128i* out = (__m128i*)dst;

View File

@ -25,11 +25,12 @@ static void TransformColorInverse_SSE41(const VP8LMultipliers* const m,
int num_pixels, uint32_t* dst) {
// sign-extended multiplying constants, pre-shifted by 5.
#define CST(X) (((int16_t)(m->X << 8)) >> 5) // sign-extend
const __m128i mults_rb = _mm_set1_epi32((uint32_t)CST(green_to_red_) << 16 |
(CST(green_to_blue_) & 0xffff));
const __m128i mults_rb =
_mm_set1_epi32((int)((uint32_t)CST(green_to_red_) << 16 |
(CST(green_to_blue_) & 0xffff)));
const __m128i mults_b2 = _mm_set1_epi32(CST(red_to_blue_));
#undef CST
const __m128i mask_ag = _mm_set1_epi32(0xff00ff00);
const __m128i mask_ag = _mm_set1_epi32((int)0xff00ff00);
const __m128i perm1 = _mm_setr_epi8(-1, 1, -1, 1, -1, 5, -1, 5,
-1, 9, -1, 9, -1, 13, -1, 13);
const __m128i perm2 = _mm_setr_epi8(-1, 2, -1, -1, -1, 6, -1, -1,

View File

@ -21,7 +21,7 @@
// Right now, some intrinsics functions seem slower, so we disable them
// everywhere except newer clang/gcc or aarch64 where the inline assembly is
// incompatible.
#if LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,9) || defined(__aarch64__)
#if LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 9) || WEBP_AARCH64
#define WEBP_USE_INTRINSICS // use intrinsics when possible
#endif
@ -46,7 +46,7 @@
// if using intrinsics, this flag avoids some functions that make gcc-4.6.3
// crash ("internal compiler error: in immed_double_const, at emit-rtl.").
// (probably similar to gcc.gnu.org/bugzilla/show_bug.cgi?id=48183)
#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || WEBP_AARCH64)
#define WORK_AROUND_GCC
#endif

View File

@ -21,10 +21,15 @@
#define IsFlat IsFlat_NEON
static uint32x2_t horizontal_add_uint32x4(const uint32x4_t a) {
static uint32_t horizontal_add_uint32x4(const uint32x4_t a) {
#if WEBP_AARCH64
return vaddvq_u32(a);
#else
const uint64x2_t b = vpaddlq_u32(a);
return vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
vreinterpret_u32_u64(vget_high_u64(b)));
const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
vreinterpret_u32_u64(vget_high_u64(b)));
return vget_lane_u32(c, 0);
#endif
}
static WEBP_INLINE int IsFlat(const int16_t* levels, int num_blocks,
@ -45,7 +50,7 @@ static WEBP_INLINE int IsFlat(const int16_t* levels, int num_blocks,
levels += 16;
}
return thresh >= (int32_t)vget_lane_u32(horizontal_add_uint32x4(sum), 0);
return thresh >= (int)horizontal_add_uint32x4(sum);
}
#else

View File

@ -197,6 +197,7 @@ WebPRescalerImportRowFunc WebPRescalerImportRowShrink;
WebPRescalerExportRowFunc WebPRescalerExportRowExpand;
WebPRescalerExportRowFunc WebPRescalerExportRowShrink;
extern VP8CPUInfo VP8GetCPUInfo;
extern void WebPRescalerDspInitSSE2(void);
extern void WebPRescalerDspInitMIPS32(void);
extern void WebPRescalerDspInitMIPSdspR2(void);

View File

@ -85,7 +85,7 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk,
const __m128i mult = _mm_cvtsi32_si128(((x_add - accum) << 16) | accum);
const __m128i out = _mm_madd_epi16(cur_pixels, mult);
assert(sizeof(*frow) == sizeof(uint32_t));
WebPUint32ToMem((uint8_t*)frow, _mm_cvtsi128_si32(out));
WebPInt32ToMem((uint8_t*)frow, _mm_cvtsi128_si32(out));
frow += 1;
if (frow >= frow_end) break;
accum -= wrk->x_sub;
@ -132,7 +132,7 @@ static void RescalerImportRowShrink_SSE2(WebPRescaler* const wrk,
__m128i base = zero;
accum += wrk->x_add;
while (accum > 0) {
const __m128i A = _mm_cvtsi32_si128(WebPMemToUint32(src));
const __m128i A = _mm_cvtsi32_si128(WebPMemToInt32(src));
src += 4;
base = _mm_unpacklo_epi8(A, zero);
// To avoid overflow, we need: base * x_add / x_sub < 32768
@ -198,7 +198,7 @@ static WEBP_INLINE void ProcessRow_SSE2(const __m128i* const A0,
const __m128i* const mult,
uint8_t* const dst) {
const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER);
const __m128i mask = _mm_set_epi32(0xffffffffu, 0, 0xffffffffu, 0);
const __m128i mask = _mm_set_epi32(~0, 0, ~0, 0);
const __m128i B0 = _mm_mul_epu32(*A0, *mult);
const __m128i B1 = _mm_mul_epu32(*A1, *mult);
const __m128i B2 = _mm_mul_epu32(*A2, *mult);

View File

@ -137,6 +137,7 @@ VP8SSIMGetClippedFunc VP8SSIMGetClipped;
VP8AccumulateSSEFunc VP8AccumulateSSE;
#endif
extern VP8CPUInfo VP8GetCPUInfo;
extern void VP8SSIMDspInitSSE2(void);
WEBP_DSP_INIT_FUNC(VP8SSIMDspInit) {

View File

@ -215,6 +215,7 @@ static void EmptyYuv444Func(const uint8_t* y,
WebPYUV444Converter WebPYUV444Converters[MODE_LAST];
extern VP8CPUInfo VP8GetCPUInfo;
extern void WebPInitYUV444ConvertersMIPSdspR2(void);
extern void WebPInitYUV444ConvertersSSE2(void);
extern void WebPInitYUV444ConvertersSSE41(void);

View File

@ -111,7 +111,7 @@ static const int16_t kCoeffs1[4] = { 19077, 26149, 6419, 13320 };
vst4_u8(out, v255_r_g_b); \
} while (0)
#if !defined(WEBP_SWAP_16BIT_CSP)
#if (WEBP_SWAP_16BIT_CSP == 0)
#define ZIP_U8(lo, hi) vzip_u8((lo), (hi))
#else
#define ZIP_U8(lo, hi) vzip_u8((hi), (lo))

View File

@ -121,7 +121,7 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
int uv_pos, pos; \
/* 16byte-aligned array to cache reconstructed u and v */ \
uint8_t uv_buf[14 * 32 + 15] = { 0 }; \
uint8_t* const r_u = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \
uint8_t* const r_u = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~(uintptr_t)15); \
uint8_t* const r_v = r_u + 32; \
\
assert(top_y != NULL); \

View File

@ -70,6 +70,7 @@ void WebPSamplerProcessPlane(const uint8_t* y, int y_stride,
WebPSamplerRowFunc WebPSamplers[MODE_LAST];
extern VP8CPUInfo VP8GetCPUInfo;
extern void WebPInitSamplersSSE2(void);
extern void WebPInitSamplersSSE41(void);
extern void WebPInitSamplersMIPS32(void);

View File

@ -15,10 +15,12 @@
#if defined(WEBP_USE_SSE2)
#include "src/dsp/common_sse2.h"
#include <stdlib.h>
#include <emmintrin.h>
#include "src/dsp/common_sse2.h"
#include "src/utils/utils.h"
//-----------------------------------------------------------------------------
// Convert spans of 32 pixels to various RGB formats for the fancy upsampler.
@ -74,7 +76,7 @@ static WEBP_INLINE __m128i Load_HI_16_SSE2(const uint8_t* src) {
// Load and replicate the U/V samples
static WEBP_INLINE __m128i Load_UV_HI_8_SSE2(const uint8_t* src) {
const __m128i zero = _mm_setzero_si128();
const __m128i tmp0 = _mm_cvtsi32_si128(*(const uint32_t*)src);
const __m128i tmp0 = _mm_cvtsi32_si128(WebPMemToInt32(src));
const __m128i tmp1 = _mm_unpacklo_epi8(zero, tmp0);
return _mm_unpacklo_epi16(tmp1, tmp1); // replicate samples
}
@ -130,7 +132,7 @@ static WEBP_INLINE void PackAndStore4444_SSE2(const __m128i* const R,
const __m128i rg0 = _mm_packus_epi16(*B, *A);
const __m128i ba0 = _mm_packus_epi16(*R, *G);
#endif
const __m128i mask_0xf0 = _mm_set1_epi8(0xf0);
const __m128i mask_0xf0 = _mm_set1_epi8((char)0xf0);
const __m128i rb1 = _mm_unpacklo_epi8(rg0, ba0); // rbrbrbrbrb...
const __m128i ga1 = _mm_unpackhi_epi8(rg0, ba0); // gagagagaga...
const __m128i rb2 = _mm_and_si128(rb1, mask_0xf0);
@ -147,9 +149,10 @@ static WEBP_INLINE void PackAndStore565_SSE2(const __m128i* const R,
const __m128i r0 = _mm_packus_epi16(*R, *R);
const __m128i g0 = _mm_packus_epi16(*G, *G);
const __m128i b0 = _mm_packus_epi16(*B, *B);
const __m128i r1 = _mm_and_si128(r0, _mm_set1_epi8(0xf8));
const __m128i r1 = _mm_and_si128(r0, _mm_set1_epi8((char)0xf8));
const __m128i b1 = _mm_and_si128(_mm_srli_epi16(b0, 3), _mm_set1_epi8(0x1f));
const __m128i g1 = _mm_srli_epi16(_mm_and_si128(g0, _mm_set1_epi8(0xe0)), 5);
const __m128i g1 =
_mm_srli_epi16(_mm_and_si128(g0, _mm_set1_epi8((char)0xe0)), 5);
const __m128i g2 = _mm_slli_epi16(_mm_and_si128(g0, _mm_set1_epi8(0x1c)), 3);
const __m128i rg = _mm_or_si128(r1, g1);
const __m128i gb = _mm_or_si128(g2, b1);

View File

@ -15,10 +15,12 @@
#if defined(WEBP_USE_SSE41)
#include "src/dsp/common_sse41.h"
#include <stdlib.h>
#include <smmintrin.h>
#include "src/dsp/common_sse41.h"
#include "src/utils/utils.h"
//-----------------------------------------------------------------------------
// Convert spans of 32 pixels to various RGB formats for the fancy upsampler.
@ -74,7 +76,7 @@ static WEBP_INLINE __m128i Load_HI_16_SSE41(const uint8_t* src) {
// Load and replicate the U/V samples
static WEBP_INLINE __m128i Load_UV_HI_8_SSE41(const uint8_t* src) {
const __m128i zero = _mm_setzero_si128();
const __m128i tmp0 = _mm_cvtsi32_si128(*(const uint32_t*)src);
const __m128i tmp0 = _mm_cvtsi32_si128(WebPMemToInt32(src));
const __m128i tmp1 = _mm_unpacklo_epi8(zero, tmp0);
return _mm_unpacklo_epi16(tmp1, tmp1); // replicate samples
}

View File

@ -13,6 +13,7 @@
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "src/enc/vp8i_enc.h"
#include "src/dsp/dsp.h"
@ -140,6 +141,11 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
!reduce_levels, &tmp_bw, &result->stats);
if (ok) {
output = VP8LBitWriterFinish(&tmp_bw);
if (tmp_bw.error_) {
VP8LBitWriterWipeOut(&tmp_bw);
memset(&result->bw, 0, sizeof(result->bw));
return 0;
}
output_size = VP8LBitWriterNumBytes(&tmp_bw);
if (output_size > data_size) {
// compressed size is larger than source! Revert to uncompressed mode.
@ -148,6 +154,7 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
}
} else {
VP8LBitWriterWipeOut(&tmp_bw);
memset(&result->bw, 0, sizeof(result->bw));
return 0;
}
}
@ -162,7 +169,7 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
header = method | (filter << 2);
if (reduce_levels) header |= ALPHA_PREPROCESSED_LEVELS << 4;
VP8BitWriterInit(&result->bw, ALPHA_HEADER_LEN + output_size);
if (!VP8BitWriterInit(&result->bw, ALPHA_HEADER_LEN + output_size)) ok = 0;
ok = ok && VP8BitWriterAppend(&result->bw, &header, ALPHA_HEADER_LEN);
ok = ok && VP8BitWriterAppend(&result->bw, output, output_size);
@ -312,11 +319,11 @@ static int EncodeAlpha(VP8Encoder* const enc,
assert(filter >= WEBP_FILTER_NONE && filter <= WEBP_FILTER_FAST);
if (quality < 0 || quality > 100) {
return 0;
return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION);
}
if (method < ALPHA_NO_COMPRESSION || method > ALPHA_LOSSLESS_COMPRESSION) {
return 0;
return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION);
}
if (method == ALPHA_NO_COMPRESSION) {
@ -326,7 +333,7 @@ static int EncodeAlpha(VP8Encoder* const enc,
quant_alpha = (uint8_t*)WebPSafeMalloc(1ULL, data_size);
if (quant_alpha == NULL) {
return 0;
return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
// Extract alpha data (width x height) from raw_data (stride x height).
@ -346,6 +353,9 @@ static int EncodeAlpha(VP8Encoder* const enc,
ok = ApplyFiltersAndEncode(quant_alpha, width, height, data_size, method,
filter, reduce_levels, effort_level, output,
output_size, pic->stats);
if (!ok) {
WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); // imprecise
}
#if !defined(WEBP_DISABLE_STATS)
if (pic->stats != NULL) { // need stats?
pic->stats->coded_size += (int)(*output_size);
@ -405,7 +415,7 @@ int VP8EncStartAlpha(VP8Encoder* const enc) {
WebPWorker* const worker = &enc->alpha_worker_;
// Makes sure worker is good to go.
if (!WebPGetWorkerInterface()->Reset(worker)) {
return 0;
return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
WebPGetWorkerInterface()->Launch(worker);
return 1;

View File

@ -391,12 +391,14 @@ static int DoSegmentsJob(void* arg1, void* arg2) {
return ok;
}
#ifdef WEBP_USE_THREAD
static void MergeJobs(const SegmentJob* const src, SegmentJob* const dst) {
int i;
for (i = 0; i <= MAX_ALPHA; ++i) dst->alphas[i] += src->alphas[i];
dst->alpha += src->alpha;
dst->uv_alpha += src->uv_alpha;
}
#endif
// initialize the job struct with some tasks to perform
static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job,
@ -425,10 +427,10 @@ int VP8EncAnalyze(VP8Encoder* const enc) {
(enc->method_ <= 1); // for method 0 - 1, we need preds_[] to be filled.
if (do_segments) {
const int last_row = enc->mb_h_;
// We give a little more than a half work to the main thread.
const int split_row = (9 * last_row + 15) >> 4;
const int total_mb = last_row * enc->mb_w_;
#ifdef WEBP_USE_THREAD
// We give a little more than a half work to the main thread.
const int split_row = (9 * last_row + 15) >> 4;
const int kMinSplitRow = 2; // minimal rows needed for mt to be worth it
const int do_mt = (enc->thread_level_ > 0) && (split_row >= kMinSplitRow);
#else
@ -438,6 +440,7 @@ int VP8EncAnalyze(VP8Encoder* const enc) {
WebPGetWorkerInterface();
SegmentJob main_job;
if (do_mt) {
#ifdef WEBP_USE_THREAD
SegmentJob side_job;
// Note the use of '&' instead of '&&' because we must call the functions
// no matter what.
@ -455,6 +458,7 @@ int VP8EncAnalyze(VP8Encoder* const enc) {
}
worker_interface->End(&side_job.worker);
if (ok) MergeJobs(&side_job, &main_job); // merge results together
#endif // WEBP_USE_THREAD
} else {
// Even for single-thread case, we use the generic Worker tools.
InitSegmentJob(enc, &main_job, 0, last_row);
@ -470,6 +474,10 @@ int VP8EncAnalyze(VP8Encoder* const enc) {
} else { // Use only one default segment.
ResetAllMBInfo(enc);
}
if (!ok) {
return WebPEncodingSetError(enc->pic_,
VP8_ENC_ERROR_OUT_OF_MEMORY); // imprecise
}
return ok;
}

View File

@ -283,8 +283,7 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality,
hash_to_first_index =
(int32_t*)WebPSafeMalloc(HASH_SIZE, sizeof(*hash_to_first_index));
if (hash_to_first_index == NULL) {
WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
return 0;
return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
percent_range = remaining_percent / 2;
@ -1050,8 +1049,7 @@ int VP8LGetBackwardReferences(
refs_best = GetBackwardReferencesLowEffort(
width, height, argb, cache_bits_best, hash_chain, refs);
if (refs_best == NULL) {
WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
return 0;
return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
// Set it in first position.
BackwardRefsSwap(refs_best, &refs[0]);
@ -1059,8 +1057,7 @@ int VP8LGetBackwardReferences(
if (!GetBackwardReferences(width, height, argb, quality, lz77_types_to_try,
cache_bits_max, do_no_cache, hash_chain, refs,
cache_bits_best)) {
WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
return 0;
return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
}

View File

@ -689,7 +689,7 @@ static int PreLoopInitialize(VP8Encoder* const enc) {
}
if (!ok) {
VP8EncFreeBitWriters(enc); // malloc error occurred
WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
return ok;
}
@ -719,6 +719,7 @@ static int PostLoopFinalize(VP8EncIterator* const it, int ok) {
} else {
// Something bad happened -> need to do some memory cleanup.
VP8EncFreeBitWriters(enc);
return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
return ok;
}
@ -754,6 +755,11 @@ int VP8EncLoop(VP8Encoder* const enc) {
// *then* decide how to code the skip decision if there's one.
if (!VP8Decimate(&it, &info, rd_opt) || dont_use_skip) {
CodeResiduals(it.bw_, &it, &info);
if (it.bw_->error_) {
// enc->pic_->error_code is set in PostLoopFinalize().
ok = 0;
break;
}
} else { // reset predictors after a skip
ResetAfterSkip(&it);
}

View File

@ -69,10 +69,12 @@ static int CheckNonOpaque(const uint8_t* alpha, int width, int height,
int WebPPictureHasTransparency(const WebPPicture* picture) {
if (picture == NULL) return 0;
if (picture->use_argb) {
const int alpha_offset = ALPHA_OFFSET;
return CheckNonOpaque((const uint8_t*)picture->argb + alpha_offset,
picture->width, picture->height,
4, picture->argb_stride * sizeof(*picture->argb));
if (picture->argb != NULL) {
return CheckNonOpaque((const uint8_t*)picture->argb + ALPHA_OFFSET,
picture->width, picture->height,
4, picture->argb_stride * sizeof(*picture->argb));
}
return 0;
}
return CheckNonOpaque(picture->a, picture->width, picture->height,
1, picture->a_stride);
@ -96,6 +98,7 @@ static int kLinearToGammaTab[GAMMA_TAB_SIZE + 1];
static uint16_t kGammaToLinearTab[256];
static volatile int kGammaTablesOk = 0;
static void InitGammaTables(void);
extern VP8CPUInfo VP8GetCPUInfo;
WEBP_DSP_INIT_FUNC(InitGammaTables) {
if (!kGammaTablesOk) {
@ -170,21 +173,6 @@ static const int kMinDimensionIterativeConversion = 4;
//------------------------------------------------------------------------------
// Main function
extern void SharpYuvInit(VP8CPUInfo cpu_info_func);
static void SafeInitSharpYuv(void) {
#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
static pthread_mutex_t initsharpyuv_lock = PTHREAD_MUTEX_INITIALIZER;
if (pthread_mutex_lock(&initsharpyuv_lock)) return;
#endif
SharpYuvInit(VP8GetCPUInfo);
#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
(void)pthread_mutex_unlock(&initsharpyuv_lock);
#endif
}
static int PreprocessARGB(const uint8_t* r_ptr,
const uint8_t* g_ptr,
const uint8_t* b_ptr,
@ -481,6 +469,8 @@ static WEBP_INLINE void ConvertRowsToUV(const uint16_t* rgb,
}
}
extern void SharpYuvInit(VP8CPUInfo cpu_info_func);
static int ImportYUVAFromRGBA(const uint8_t* r_ptr,
const uint8_t* g_ptr,
const uint8_t* b_ptr,
@ -516,7 +506,7 @@ static int ImportYUVAFromRGBA(const uint8_t* r_ptr,
}
if (use_iterative_conversion) {
SafeInitSharpYuv();
SharpYuvInit(VP8GetCPUInfo);
if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) {
return 0;
}
@ -545,7 +535,9 @@ static int ImportYUVAFromRGBA(const uint8_t* r_ptr,
WebPInitConvertARGBToYUV();
InitGammaTables();
if (tmp_rgb == NULL) return 0; // malloc error
if (tmp_rgb == NULL) {
return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
// Downsample Y/U/V planes, two rows at a time
for (y = 0; y < (height >> 1); ++y) {

View File

@ -137,7 +137,9 @@ int WebPPictureCrop(WebPPicture* pic,
PictureGrabSpecs(pic, &tmp);
tmp.width = width;
tmp.height = height;
if (!WebPPictureAlloc(&tmp)) return 0;
if (!WebPPictureAlloc(&tmp)) {
return WebPEncodingSetError(pic, tmp.error_code);
}
if (!pic->use_argb) {
const int y_offset = top * pic->y_stride + left;
@ -212,26 +214,28 @@ int WebPPictureRescale(WebPPicture* picture, int width, int height) {
prev_height = picture->height;
if (!WebPRescalerGetScaledDimensions(
prev_width, prev_height, &width, &height)) {
return 0;
return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
}
PictureGrabSpecs(picture, &tmp);
tmp.width = width;
tmp.height = height;
if (!WebPPictureAlloc(&tmp)) return 0;
if (!WebPPictureAlloc(&tmp)) {
return WebPEncodingSetError(picture, tmp.error_code);
}
if (!picture->use_argb) {
work = (rescaler_t*)WebPSafeMalloc(2ULL * width, sizeof(*work));
if (work == NULL) {
WebPPictureFree(&tmp);
return 0;
return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
// If present, we need to rescale alpha first (for AlphaMultiplyY).
if (picture->a != NULL) {
WebPInitAlphaProcessing();
if (!RescalePlane(picture->a, prev_width, prev_height, picture->a_stride,
tmp.a, width, height, tmp.a_stride, work, 1)) {
return 0;
return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
}
}
@ -246,14 +250,14 @@ int WebPPictureRescale(WebPPicture* picture, int width, int height) {
!RescalePlane(picture->v, HALVE(prev_width), HALVE(prev_height),
picture->uv_stride, tmp.v, HALVE(width), HALVE(height),
tmp.uv_stride, work, 1)) {
return 0;
return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
}
AlphaMultiplyY(&tmp, 1);
} else {
work = (rescaler_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work));
if (work == NULL) {
WebPPictureFree(&tmp);
return 0;
return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
// In order to correctly interpolate colors, we need to apply the alpha
// weighting first (black-matting), scale the RGB values, and remove
@ -263,7 +267,7 @@ int WebPPictureRescale(WebPPicture* picture, int width, int height) {
if (!RescalePlane((const uint8_t*)picture->argb, prev_width, prev_height,
picture->argb_stride * 4, (uint8_t*)tmp.argb, width,
height, tmp.argb_stride * 4, work, 4)) {
return 0;
return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
}
AlphaMultiplyARGB(&tmp, 1);
}

View File

@ -258,7 +258,10 @@ static int EmitPartitionsSize(const VP8Encoder* const enc,
buf[3 * p + 1] = (part_size >> 8) & 0xff;
buf[3 * p + 2] = (part_size >> 16) & 0xff;
}
return p ? pic->writer(buf, 3 * p, pic) : 1;
if (p && !pic->writer(buf, 3 * p, pic)) {
return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE);
}
return 1;
}
//------------------------------------------------------------------------------
@ -381,6 +384,7 @@ int VP8EncWrite(VP8Encoder* const enc) {
enc->coded_size_ = (int)(CHUNK_HEADER_SIZE + riff_size);
ok = ok && WebPReportProgress(pic, final_percent, &enc->percent_);
if (!ok) WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE);
return ok;
}

View File

@ -31,8 +31,8 @@ extern "C" {
// version numbers
#define ENC_MAJ_VERSION 1
#define ENC_MIN_VERSION 2
#define ENC_REV_VERSION 4
#define ENC_MIN_VERSION 3
#define ENC_REV_VERSION 1
enum { MAX_LF_LEVELS = 64, // Maximum loop filter level
MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost

View File

@ -196,8 +196,7 @@ static int CoOccurrenceBuild(const WebPPicture* const pic,
uint32_t palette_sorted[MAX_PALETTE_SIZE];
lines = (uint32_t*)WebPSafeMalloc(2 * pic->width, sizeof(*lines));
if (lines == NULL) {
WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
return 0;
return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
line_top = &lines[0];
line_current = &lines[pic->width];
@ -255,10 +254,10 @@ static int PaletteSortModifiedZeng(
cooccurrence =
(uint32_t*)WebPSafeCalloc(num_colors * num_colors, sizeof(*cooccurrence));
if (cooccurrence == NULL) {
WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
return 0;
return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
if (!CoOccurrenceBuild(pic, palette_sorted, num_colors, cooccurrence)) {
WebPSafeFree(cooccurrence);
return 0;
}
@ -361,10 +360,11 @@ typedef enum {
kHistoTotal // Must be last.
} HistoIx;
static void AddSingleSubGreen(int p, uint32_t* const r, uint32_t* const b) {
const int green = p >> 8; // The upper bits are masked away later.
++r[((p >> 16) - green) & 0xff];
++b[((p >> 0) - green) & 0xff];
static void AddSingleSubGreen(uint32_t p,
uint32_t* const r, uint32_t* const b) {
const int green = (int)p >> 8; // The upper bits are masked away later.
++r[(((int)p >> 16) - green) & 0xff];
++b[(((int)p >> 0) - green) & 0xff];
}
static void AddSingle(uint32_t p,
@ -1011,8 +1011,7 @@ static int StoreImageToBitMask(
VP8LRefsCursorNext(&c);
}
if (bw->error_) {
WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
return 0;
return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
return 1;
}
@ -1296,7 +1295,10 @@ static int EncodeImageInternal(
}
}
tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens));
if (tokens == NULL) goto Error;
if (tokens == NULL) {
WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
goto Error;
}
for (i = 0; i < 5 * histogram_image_size; ++i) {
HuffmanTreeCode* const codes = &huffman_codes[i];
StoreHuffmanCode(bw, huff_tree, tokens, codes);
@ -1354,7 +1356,7 @@ static int EncodeImageInternal(
static void ApplySubtractGreen(VP8LEncoder* const enc, int width, int height,
VP8LBitWriter* const bw) {
VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
VP8LPutBits(bw, SUBTRACT_GREEN, 2);
VP8LPutBits(bw, SUBTRACT_GREEN_TRANSFORM, 2);
VP8LSubtractGreenFromBlueAndRed(enc->argb_, width * height);
}
@ -1447,18 +1449,21 @@ static int WriteImage(const WebPPicture* const pic, VP8LBitWriter* const bw,
const size_t vp8l_size = VP8L_SIGNATURE_SIZE + webpll_size;
const size_t pad = vp8l_size & 1;
const size_t riff_size = TAG_SIZE + CHUNK_HEADER_SIZE + vp8l_size + pad;
*coded_size = 0;
if (bw->error_) {
return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
if (!WriteRiffHeader(pic, riff_size, vp8l_size) ||
!pic->writer(webpll_data, webpll_size, pic)) {
WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE);
return 0;
return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE);
}
if (pad) {
const uint8_t pad_byte[1] = { 0 };
if (!pic->writer(pad_byte, 1, pic)) {
WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE);
return 0;
return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE);
}
}
*coded_size = CHUNK_HEADER_SIZE + riff_size;
@ -1503,8 +1508,7 @@ static int AllocateTransformBuffer(VP8LEncoder* const enc, int width,
ClearTransformBuffer(enc);
mem = (uint32_t*)WebPSafeMalloc(mem_size, sizeof(*mem));
if (mem == NULL) {
WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
return 0;
return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
enc->transform_mem_ = mem;
enc->transform_mem_size_ = (size_t)mem_size;
@ -1612,8 +1616,7 @@ static int ApplyPalette(const uint32_t* src, uint32_t src_stride, uint32_t* dst,
int x, y;
if (tmp_row == NULL) {
WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
return 0;
return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
if (palette_size < APPLY_PALETTE_GREEDY_MAX) {
@ -1967,9 +1970,8 @@ int VP8LEncodeStream(const WebPConfig* const config,
int ok_main;
if (enc_main == NULL || !VP8LBitWriterInit(&bw_side, 0)) {
WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
VP8LEncoderDelete(enc_main);
return 0;
return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
}
// Avoid "garbage value" error from Clang's static analysis tool.
@ -2116,8 +2118,7 @@ int VP8LEncodeImage(const WebPConfig* const config,
if (picture == NULL) return 0;
if (config == NULL || picture->argb == NULL) {
WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
return 0;
return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
}
width = picture->width;

View File

@ -307,7 +307,10 @@ int WebPEncodingSetError(const WebPPicture* const pic,
WebPEncodingError error) {
assert((int)error < VP8_ENC_ERROR_LAST);
assert((int)error >= VP8_ENC_OK);
((WebPPicture*)pic)->error_code = error;
// The oldest error reported takes precedence over the new one.
if (pic->error_code == VP8_ENC_OK) {
((WebPPicture*)pic)->error_code = error;
}
return 0;
}
@ -317,8 +320,7 @@ int WebPReportProgress(const WebPPicture* const pic,
*percent_store = percent;
if (pic->progress_hook && !pic->progress_hook(percent, pic)) {
// user abort requested
WebPEncodingSetError(pic, VP8_ENC_ERROR_USER_ABORT);
return 0;
return WebPEncodingSetError(pic, VP8_ENC_ERROR_USER_ABORT);
}
}
return 1; // ok
@ -329,7 +331,7 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
int ok = 0;
if (pic == NULL) return 0;
WebPEncodingSetError(pic, VP8_ENC_OK); // all ok so far
pic->error_code = VP8_ENC_OK; // all ok so far
if (config == NULL) { // bad params
return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER);
}

View File

@ -28,8 +28,8 @@ extern "C" {
// Defines and constants.
#define MUX_MAJ_VERSION 1
#define MUX_MIN_VERSION 2
#define MUX_REV_VERSION 4
#define MUX_MIN_VERSION 3
#define MUX_REV_VERSION 1
// Chunk object.
typedef struct WebPChunk WebPChunk;

View File

@ -116,9 +116,12 @@ static int MuxImageParse(const WebPChunk* const chunk, int copy_data,
// Each of ANMF chunk contain a header at the beginning. So, its size should
// be at least 'hdr_size'.
if (size < hdr_size) goto Fail;
ChunkAssignData(&subchunk, &temp, copy_data, chunk->tag_);
if (ChunkAssignData(&subchunk, &temp, copy_data,
chunk->tag_) != WEBP_MUX_OK) {
goto Fail;
}
}
ChunkSetHead(&subchunk, &wpi->header_);
if (ChunkSetHead(&subchunk, &wpi->header_) != WEBP_MUX_OK) goto Fail;
wpi->is_partial_ = 1; // Waiting for ALPH and/or VP8/VP8L chunks.
// Rest of the chunks.

View File

@ -148,9 +148,9 @@ int VP8GetSigned(VP8BitReader* WEBP_RESTRICT const br, int v,
const range_t value = (range_t)(br->value_ >> pos);
const int32_t mask = (int32_t)(split - value) >> 31; // -1 or 0
br->bits_ -= 1;
br->range_ += mask;
br->range_ += (range_t)mask;
br->range_ |= 1;
br->value_ -= (bit_t)((split + 1) & mask) << pos;
br->value_ -= (bit_t)((split + 1) & (uint32_t)mask) << pos;
BT_TRACK(br);
return (v ^ mask) - mask;
}

View File

@ -15,6 +15,7 @@
#include "src/webp/config.h"
#endif
#include "src/dsp/cpu.h"
#include "src/utils/bit_reader_inl_utils.h"
#include "src/utils/utils.h"
@ -121,7 +122,7 @@ int32_t VP8GetSignedValue(VP8BitReader* const br, int bits,
#define VP8L_LOG8_WBITS 4 // Number of bytes needed to store VP8L_WBITS bits.
#if defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || \
#if defined(__arm__) || defined(_M_ARM) || WEBP_AARCH64 || \
defined(__i386__) || defined(_M_IX86) || \
defined(__x86_64__) || defined(_M_X64)
#define VP8L_USE_FAST_LOAD

View File

@ -19,6 +19,7 @@
#ifdef _MSC_VER
#include <stdlib.h> // _byteswap_ulong
#endif
#include "src/dsp/cpu.h"
#include "src/webp/types.h"
// Warning! This macro triggers quite some MACRO wizardry around func signature!
@ -64,7 +65,7 @@ extern "C" {
#define BITS 56
#elif defined(__arm__) || defined(_M_ARM) // ARM
#define BITS 24
#elif defined(__aarch64__) // ARM 64bit
#elif WEBP_AARCH64 // ARM 64bit
#define BITS 56
#elif defined(__mips__) // MIPS
#define BITS 24

View File

@ -142,7 +142,7 @@ static int BuildHuffmanTable(HuffmanCode* const root_table, int root_bits,
{
int step; // step size to replicate values in current table
uint32_t low = -1; // low bits for current root entry
uint32_t low = 0xffffffffu; // low bits for current root entry
uint32_t mask = total_size - 1; // mask for low bits
uint32_t key = 0; // reversed prefix code
int num_nodes = 1; // number of Huffman tree nodes

View File

@ -64,7 +64,8 @@ WEBP_EXTERN void WebPSafeFree(void* const ptr);
// Alignment
#define WEBP_ALIGN_CST 31
#define WEBP_ALIGN(PTR) (((uintptr_t)(PTR) + WEBP_ALIGN_CST) & ~WEBP_ALIGN_CST)
#define WEBP_ALIGN(PTR) (((uintptr_t)(PTR) + WEBP_ALIGN_CST) & \
~(uintptr_t)WEBP_ALIGN_CST)
#include <string.h>
// memcpy() is the safe way of moving potentially unaligned 32b memory.
@ -73,10 +74,19 @@ static WEBP_INLINE uint32_t WebPMemToUint32(const uint8_t* const ptr) {
memcpy(&A, ptr, sizeof(A));
return A;
}
static WEBP_INLINE int32_t WebPMemToInt32(const uint8_t* const ptr) {
return (int32_t)WebPMemToUint32(ptr);
}
static WEBP_INLINE void WebPUint32ToMem(uint8_t* const ptr, uint32_t val) {
memcpy(ptr, &val, sizeof(val));
}
static WEBP_INLINE void WebPInt32ToMem(uint8_t* const ptr, int val) {
WebPUint32ToMem(ptr, (uint32_t)val);
}
//------------------------------------------------------------------------------
// Reading/writing data.

View File

@ -81,10 +81,11 @@ WEBP_EXTERN uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size,
// returned is the Y samples buffer. Upon return, *u and *v will point to
// the U and V chroma data. These U and V buffers need NOT be passed to
// WebPFree(), unlike the returned Y luma one. The dimension of the U and V
// planes are both (*width + 1) / 2 and (*height + 1)/ 2.
// planes are both (*width + 1) / 2 and (*height + 1) / 2.
// Upon return, the Y buffer has a stride returned as '*stride', while U and V
// have a common stride returned as '*uv_stride'.
// Return NULL in case of error.
// 'width' and 'height' may be NULL, the other pointers must not be.
// Returns NULL in case of error.
// (*) Also named Y'CbCr. See: https://en.wikipedia.org/wiki/YCbCr
WEBP_EXTERN uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size,
int* width, int* height,

Some files were not shown because too many files have changed in this diff Show More