Merge pull request #97325 from BlueCube3310/bcdec

Replace squish with bcdec for BC decompression
2024-09-29 00:47:02 +02:00 · 2024-09-29 00:47:02 +02:00 · 422306ef87
parent 3fbd33af85 2167157aaf
commit 422306ef87
40 changed files with 1550 additions and 5138 deletions
--- a/1
+++ b/1
@ -299,7 +299,6 @@ opts.Add(BoolVariable("builtin_pcre2_with_jit", "Use JIT compiler for the built-
 opts.Add(BoolVariable("builtin_recastnavigation", "Use the built-in Recast navigation library", True))
 opts.Add(BoolVariable("builtin_rvo2_2d", "Use the built-in RVO2 2D library", True))
 opts.Add(BoolVariable("builtin_rvo2_3d", "Use the built-in RVO2 3D library", True))
 opts.Add(BoolVariable("builtin_squish", "Use the built-in squish library", True))
 opts.Add(BoolVariable("builtin_xatlas", "Use the built-in xatlas library", True))
 opts.Add(BoolVariable("builtin_zlib", "Use the built-in zlib library", True))
 opts.Add(BoolVariable("builtin_zstd", "Use the built-in Zstd library", True))
--- a/modules/bcdec/SCsub
+++ b/modules/bcdec/SCsub
@ -0,0 +1,10 @@
 #!/usr/bin/env python
 from misc.utility.scons_hints import *
 Import("env")
 Import("env_modules")
 env_bcdec = env_modules.Clone()
 # Godot source files
 env_bcdec.add_source_files(env.modules_sources, "*.cpp")
--- a/modules/squish/config.py
+++ b/modules/squish/config.py
--- a/modules/bcdec/image_decompress_bcdec.cpp
+++ b/modules/bcdec/image_decompress_bcdec.cpp
@ -0,0 +1,181 @@
 /**************************************************************************/
 /*  image_decompress_bcdec.cpp                                            */
 /**************************************************************************/
 /*                         This file is part of:                          */
 /*                             GODOT ENGINE                               */
 /*                        https://godotengine.org                         */
 /**************************************************************************/
 /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
 /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
 /*                                                                        */
 /* Permission is hereby granted, free of charge, to any person obtaining  */
 /* a copy of this software and associated documentation files (the        */
 /* "Software"), to deal in the Software without restriction, including    */
 /* without limitation the rights to use, copy, modify, merge, publish,    */
 /* distribute, sublicense, and/or sell copies of the Software, and to     */
 /* permit persons to whom the Software is furnished to do so, subject to  */
 /* the following conditions:                                              */
 /*                                                                        */
 /* The above copyright notice and this permission notice shall be         */
 /* included in all copies or substantial portions of the Software.        */
 /*                                                                        */
 /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
 /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
 /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
 /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
 /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
 /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
 /**************************************************************************/
 #include "image_decompress_bcdec.h"
 #include "core/os/os.h"
 #include "core/string/print_string.h"
 #define BCDEC_IMPLEMENTATION
 #include "thirdparty/misc/bcdec.h"
 inline void bcdec_bc6h_half_s(const void *compressedBlock, void *decompressedBlock, int destinationPitch) {
 	bcdec_bc6h_half(compressedBlock, decompressedBlock, destinationPitch, true);
 }
 inline void bcdec_bc6h_half_u(const void *compressedBlock, void *decompressedBlock, int destinationPitch) {
 	bcdec_bc6h_half(compressedBlock, decompressedBlock, destinationPitch, false);
 }
 static void decompress_image(BCdecFormat format, const void *src, void *dst, const uint64_t width, const uint64_t height) {
 	const uint8_t *src_blocks = reinterpret_cast<const uint8_t *>(src);
 	uint8_t *dec_blocks = reinterpret_cast<uint8_t *>(dst);
 	uint64_t src_pos = 0, dst_pos = 0;
 #define DECOMPRESS_LOOP(func, block_size, color_bytesize, color_components)            \
 	for (uint64_t y = 0; y < height; y += 4) {                                         \
 		for (uint64_t x = 0; x < width; x += 4) {                                      \
 			func(&src_blocks[src_pos], &dec_blocks[dst_pos], width *color_components); \
 			src_pos += block_size;                                                     \
 			dst_pos += 4 * color_bytesize;                                             \
 		}                                                                              \
 		dst_pos += 3 * width * color_bytesize;                                         \
 	}
 	switch (format) {
 		case BCdec_BC1: {
 			DECOMPRESS_LOOP(bcdec_bc1, BCDEC_BC1_BLOCK_SIZE, 4, 4)
 		} break;
 		case BCdec_BC2: {
 			DECOMPRESS_LOOP(bcdec_bc2, BCDEC_BC2_BLOCK_SIZE, 4, 4)
 		} break;
 		case BCdec_BC3: {
 			DECOMPRESS_LOOP(bcdec_bc3, BCDEC_BC3_BLOCK_SIZE, 4, 4)
 		} break;
 		case BCdec_BC4: {
 			DECOMPRESS_LOOP(bcdec_bc4, BCDEC_BC4_BLOCK_SIZE, 1, 1)
 		} break;
 		case BCdec_BC5: {
 			DECOMPRESS_LOOP(bcdec_bc5, BCDEC_BC5_BLOCK_SIZE, 2, 2)
 		} break;
 		case BCdec_BC6U: {
 			DECOMPRESS_LOOP(bcdec_bc6h_half_u, BCDEC_BC6H_BLOCK_SIZE, 6, 3)
 		} break;
 		case BCdec_BC6S: {
 			DECOMPRESS_LOOP(bcdec_bc6h_half_s, BCDEC_BC6H_BLOCK_SIZE, 6, 3)
 		} break;
 		case BCdec_BC7: {
 			DECOMPRESS_LOOP(bcdec_bc7, BCDEC_BC7_BLOCK_SIZE, 4, 4)
 		} break;
 	}
 #undef DECOMPRESS_LOOP
 }
 void image_decompress_bcdec(Image *p_image) {
 	uint64_t start_time = OS::get_singleton()->get_ticks_msec();
 	int w = p_image->get_width();
 	int h = p_image->get_height();
 	Image::Format source_format = p_image->get_format();
 	Image::Format target_format = Image::FORMAT_MAX;
 	BCdecFormat bcdec_format = BCdec_BC1;
 	switch (source_format) {
 		case Image::FORMAT_DXT1:
 			bcdec_format = BCdec_BC1;
 			target_format = Image::FORMAT_RGBA8;
 			break;
 		case Image::FORMAT_DXT3:
 			bcdec_format = BCdec_BC2;
 			target_format = Image::FORMAT_RGBA8;
 			break;
 		case Image::FORMAT_DXT5:
 		case Image::FORMAT_DXT5_RA_AS_RG:
 			bcdec_format = BCdec_BC3;
 			target_format = Image::FORMAT_RGBA8;
 			break;
 		case Image::FORMAT_RGTC_R:
 			bcdec_format = BCdec_BC4;
 			target_format = Image::FORMAT_R8;
 			break;
 		case Image::FORMAT_RGTC_RG:
 			bcdec_format = BCdec_BC5;
 			target_format = Image::FORMAT_RG8;
 			break;
 		case Image::FORMAT_BPTC_RGBFU:
 			bcdec_format = BCdec_BC6U;
 			target_format = Image::FORMAT_RGBH;
 			break;
 		case Image::FORMAT_BPTC_RGBF:
 			bcdec_format = BCdec_BC6S;
 			target_format = Image::FORMAT_RGBH;
 			break;
 		case Image::FORMAT_BPTC_RGBA:
 			bcdec_format = BCdec_BC7;
 			target_format = Image::FORMAT_RGBA8;
 			break;
 		default:
 			ERR_FAIL_MSG("bcdec: Can't decompress unknown format: " + Image::get_format_name(source_format) + ".");
 			break;
 	}
 	int mm_count = p_image->get_mipmap_count();
 	int64_t target_size = Image::get_image_data_size(w, h, target_format, p_image->has_mipmaps());
 	Vector<uint8_t> data;
 	data.resize(target_size);
 	const uint8_t *rb = p_image->get_data().ptr();
 	uint8_t *wb = data.ptrw();
 	// Decompress mipmaps.
 	for (int i = 0; i <= mm_count; i++) {
 		int64_t src_ofs = 0, mipmap_size = 0;
 		int mipmap_w = 0, mipmap_h = 0;
 		p_image->get_mipmap_offset_size_and_dimensions(i, src_ofs, mipmap_size, mipmap_w, mipmap_h);
 		int64_t dst_ofs = Image::get_image_mipmap_offset(p_image->get_width(), p_image->get_height(), target_format, i);
 		decompress_image(bcdec_format, rb + src_ofs, wb + dst_ofs, mipmap_w, mipmap_h);
 		w >>= 1;
 		h >>= 1;
 	}
 	p_image->set_data(p_image->get_width(), p_image->get_height(), p_image->has_mipmaps(), target_format, data);
 	// Swap channels if necessary.
 	if (source_format == Image::FORMAT_DXT5_RA_AS_RG) {
 		p_image->convert_ra_rgba8_to_rg();
 	}
 	print_verbose(vformat("bcdec: Decompression of a %dx%d %s image with %d mipmaps took %d ms.",
 			p_image->get_width(), p_image->get_height(), Image::get_format_name(source_format), p_image->get_mipmap_count(), OS::get_singleton()->get_ticks_msec() - start_time));
 }
--- a/modules/squish/image_decompress_squish.h
+++ b/modules/squish/image_decompress_squish.h
@ -1,5 +1,5 @@
 /**************************************************************************/
-/*  image_decompress_squish.h                                             */
+/*  image_decompress_bcdec.h                                              */
 /**************************************************************************/
 /*                         This file is part of:                          */
 /*                             GODOT ENGINE                               */
@ -28,11 +28,22 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
 /**************************************************************************/
-#ifndef IMAGE_DECOMPRESS_SQUISH_H
+#ifndef IMAGE_DECOMPRESS_BCDEC_H
-#define IMAGE_DECOMPRESS_SQUISH_H
+#define IMAGE_DECOMPRESS_BCDEC_H
 #include "core/io/image.h"
-void image_decompress_squish(Image *p_image);
+enum BCdecFormat {
 	BCdec_BC1,
 	BCdec_BC2,
 	BCdec_BC3,
 	BCdec_BC4,
 	BCdec_BC5,
 	BCdec_BC6S,
 	BCdec_BC6U,
 	BCdec_BC7,
 };
-#endif // IMAGE_DECOMPRESS_SQUISH_H
+void image_decompress_bcdec(Image *p_image);
 #endif // IMAGE_DECOMPRESS_BCDEC_H
--- a/modules/squish/register_types.cpp
+++ b/modules/squish/register_types.cpp
@ -30,17 +30,18 @@
 #include "register_types.h"
-#include "image_decompress_squish.h"
+#include "image_decompress_bcdec.h"
-void initialize_squish_module(ModuleInitializationLevel p_level) {
+void initialize_bcdec_module(ModuleInitializationLevel p_level) {
 	if (p_level != MODULE_INITIALIZATION_LEVEL_SCENE) {
 		return;
 	}
-	Image::_image_decompress_bc = image_decompress_squish;
+	Image::_image_decompress_bc = image_decompress_bcdec;
 	Image::_image_decompress_bptc = image_decompress_bcdec;
 }
-void uninitialize_squish_module(ModuleInitializationLevel p_level) {
+void uninitialize_bcdec_module(ModuleInitializationLevel p_level) {
 	if (p_level != MODULE_INITIALIZATION_LEVEL_SCENE) {
 		return;
 	}
--- a/modules/squish/register_types.h
+++ b/modules/squish/register_types.h
@ -28,12 +28,12 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
 /**************************************************************************/
-#ifndef SQUISH_REGISTER_TYPES_H
+#ifndef BCDEC_REGISTER_TYPES_H
-#define SQUISH_REGISTER_TYPES_H
+#define BCDEC_REGISTER_TYPES_H
 #include "modules/register_module_types.h"
-void initialize_squish_module(ModuleInitializationLevel p_level);
+void initialize_bcdec_module(ModuleInitializationLevel p_level);
-void uninitialize_squish_module(ModuleInitializationLevel p_level);
+void uninitialize_bcdec_module(ModuleInitializationLevel p_level);
-#endif // SQUISH_REGISTER_TYPES_H
+#endif // BCDEC_REGISTER_TYPES_H
--- a/modules/cvtt/register_types.cpp
+++ b/modules/cvtt/register_types.cpp
@ -40,7 +40,6 @@ void initialize_cvtt_module(ModuleInitializationLevel p_level) {
 	}
 	Image::set_compress_bptc_func(image_compress_cvtt);
 	Image::_image_decompress_bptc = image_decompress_cvtt;
 }
 void uninitialize_cvtt_module(ModuleInitializationLevel p_level) {
--- a/modules/squish/SCsub
+++ b/modules/squish/SCsub
@ -1,45 +0,0 @@
 #!/usr/bin/env python
 from misc.utility.scons_hints import *
 Import("env")
 Import("env_modules")
 env_squish = env_modules.Clone()
 # Thirdparty source files
 thirdparty_obj = []
 if env["builtin_squish"]:
    thirdparty_dir = "#thirdparty/squish/"
    thirdparty_sources = [
        "alpha.cpp",
        "clusterfit.cpp",
        "colourblock.cpp",
        "colourfit.cpp",
        "colourset.cpp",
        "maths.cpp",
        "rangefit.cpp",
        "singlecolourfit.cpp",
        "squish.cpp",
    ]
    thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
    env_squish.Prepend(CPPPATH=[thirdparty_dir])
    env_thirdparty = env_squish.Clone()
    env_thirdparty.disable_warnings()
    env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources)
    env.modules_sources += thirdparty_obj
 # Godot source files
 module_obj = []
 env_squish.add_source_files(module_obj, "*.cpp")
 env.modules_sources += module_obj
 # Needed to force rebuilding the module files when the thirdparty library is updated.
 env.Depends(module_obj, thirdparty_obj)
--- a/modules/squish/image_decompress_squish.cpp
+++ b/modules/squish/image_decompress_squish.cpp
@ -1,96 +0,0 @@
 /**************************************************************************/
 /*  image_decompress_squish.cpp                                           */
 /**************************************************************************/
 /*                         This file is part of:                          */
 /*                             GODOT ENGINE                               */
 /*                        https://godotengine.org                         */
 /**************************************************************************/
 /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
 /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
 /*                                                                        */
 /* Permission is hereby granted, free of charge, to any person obtaining  */
 /* a copy of this software and associated documentation files (the        */
 /* "Software"), to deal in the Software without restriction, including    */
 /* without limitation the rights to use, copy, modify, merge, publish,    */
 /* distribute, sublicense, and/or sell copies of the Software, and to     */
 /* permit persons to whom the Software is furnished to do so, subject to  */
 /* the following conditions:                                              */
 /*                                                                        */
 /* The above copyright notice and this permission notice shall be         */
 /* included in all copies or substantial portions of the Software.        */
 /*                                                                        */
 /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
 /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
 /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
 /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
 /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
 /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
 /**************************************************************************/
 #include "image_decompress_squish.h"
 #include <squish.h>
 void image_decompress_squish(Image *p_image) {
 	int w = p_image->get_width();
 	int h = p_image->get_height();
 	Image::Format source_format = p_image->get_format();
 	Image::Format target_format = Image::FORMAT_RGBA8;
 	Vector<uint8_t> data;
 	int64_t target_size = Image::get_image_data_size(w, h, target_format, p_image->has_mipmaps());
 	int mm_count = p_image->get_mipmap_count();
 	data.resize(target_size);
 	const uint8_t *rb = p_image->get_data().ptr();
 	uint8_t *wb = data.ptrw();
 	int squish_flags = 0;
 	switch (source_format) {
 		case Image::FORMAT_DXT1:
 			squish_flags = squish::kDxt1;
 			break;
 		case Image::FORMAT_DXT3:
 			squish_flags = squish::kDxt3;
 			break;
 		case Image::FORMAT_DXT5:
 		case Image::FORMAT_DXT5_RA_AS_RG:
 			squish_flags = squish::kDxt5;
 			break;
 		case Image::FORMAT_RGTC_R:
 			squish_flags = squish::kBc4;
 			break;
 		case Image::FORMAT_RGTC_RG:
 			squish_flags = squish::kBc5;
 			break;
 		default:
 			ERR_FAIL_MSG("Squish: Can't decompress unknown format: " + itos(p_image->get_format()) + ".");
 			break;
 	}
 	for (int i = 0; i <= mm_count; i++) {
 		int64_t src_ofs = 0, mipmap_size = 0;
 		int mipmap_w = 0, mipmap_h = 0;
 		p_image->get_mipmap_offset_size_and_dimensions(i, src_ofs, mipmap_size, mipmap_w, mipmap_h);
 		int64_t dst_ofs = Image::get_image_mipmap_offset(p_image->get_width(), p_image->get_height(), target_format, i);
 		squish::DecompressImage(&wb[dst_ofs], w, h, &rb[src_ofs], squish_flags);
 		w >>= 1;
 		h >>= 1;
 	}
 	p_image->set_data(p_image->get_width(), p_image->get_height(), p_image->has_mipmaps(), target_format, data);
 	if (source_format == Image::FORMAT_DXT5_RA_AS_RG) {
 		p_image->convert_ra_rgba8_to_rg();
 	}
 }
--- a/platform/linuxbsd/detect.py
+++ b/platform/linuxbsd/detect.py
@ -256,10 +256,6 @@ def configure(env: "SConsEnvironment"):
    if not env["builtin_enet"]:
        env.ParseConfig("pkg-config libenet --cflags --libs")
    if not env["builtin_squish"]:
        # libsquish doesn't reliably install its .pc file, so some distros lack it.
        env.Append(LIBS=["libsquish"])
    if not env["builtin_zstd"]:
        env.ParseConfig("pkg-config libzstd --cflags --libs")
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@ -650,6 +650,10 @@ comments and a patch is provided in the `patches` folder.
 Collection of single-file libraries used in Godot components.
 - `bcdec.h`
  * Upstream: https://github.com/iOrange/bcdec
  * Version: git (026acf98ea271045cb10713daa96ba98528badb7, 2022)
  * License: MIT
 - `clipper.{cpp,hpp}`
  * Upstream: https://sourceforge.net/projects/polyclipping
  * Version: 6.4.2 (2017) + Godot changes (added optional exceptions handling)
@ -873,23 +877,6 @@ They can be reapplied using the patches included in the `patches`
 folder, in order.
 ## squish
 - Upstream: https://sourceforge.net/projects/libsquish
 - Version: 1.15 (r104, 2017)
 - License: MIT
 Files extracted from upstream source:
 - `LICENSE.txt`
 - All `.cpp`, `.h` and `.inl` files
 Some downstream changes have been made and are identified by
 `// -- GODOT begin --` and `// -- GODOT end --` comments.
 They can be reapplied using the patches included in the `patches`
 folder.
 ## tinyexr
 - Upstream: https://github.com/syoyo/tinyexr
--- a/thirdparty/misc/bcdec.h
+++ b/thirdparty/misc/bcdec.h
--- a/thirdparty/squish/LICENSE.txt
+++ b/thirdparty/squish/LICENSE.txt
@ -1,20 +0,0 @@
 Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
 "Software"), to deal in the Software without restriction, including
 without limitation the rights to use, copy, modify, merge, publish,
 distribute, sublicense, and/or sell copies of the Software, and to
 permit persons to whom the Software is furnished to do so, subject to
 the following conditions:
 The above copyright notice and this permission notice shall be included
 in all copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/thirdparty/squish/alpha.cpp
+++ b/thirdparty/squish/alpha.cpp
@ -1,350 +0,0 @@
 /* -----------------------------------------------------------------------------
    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:
    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #include "alpha.h"
 #include <climits>
 #include <algorithm>
 namespace squish {
 static int FloatToInt( float a, int limit )
 {
    // use ANSI round-to-zero behaviour to get round-to-nearest
    int i = ( int )( a + 0.5f );
    // clamp to the limit
    if( i < 0 )
        i = 0;
    else if( i > limit )
        i = limit;
    // done
    return i;
 }
 void CompressAlphaDxt3( u8 const* rgba, int mask, void* block )
 {
    u8* bytes = reinterpret_cast< u8* >( block );
    // quantise and pack the alpha values pairwise
    for( int i = 0; i < 8; ++i )
    {
        // quantise down to 4 bits
        float alpha1 = ( float )rgba[8*i + 3] * ( 15.0f/255.0f );
        float alpha2 = ( float )rgba[8*i + 7] * ( 15.0f/255.0f );
        int quant1 = FloatToInt( alpha1, 15 );
        int quant2 = FloatToInt( alpha2, 15 );
        // set alpha to zero where masked
        int bit1 = 1 << ( 2*i );
        int bit2 = 1 << ( 2*i + 1 );
        if( ( mask & bit1 ) == 0 )
            quant1 = 0;
        if( ( mask & bit2 ) == 0 )
            quant2 = 0;
        // pack into the byte
        bytes[i] = ( u8 )( quant1 | ( quant2 << 4 ) );
    }
 }
 void DecompressAlphaDxt3( u8* rgba, void const* block )
 {
    u8 const* bytes = reinterpret_cast< u8 const* >( block );
    // unpack the alpha values pairwise
    for( int i = 0; i < 8; ++i )
    {
        // quantise down to 4 bits
        u8 quant = bytes[i];
        // unpack the values
        u8 lo = quant & 0x0f;
        u8 hi = quant & 0xf0;
        // convert back up to bytes
        rgba[8*i + 3] = lo | ( lo << 4 );
        rgba[8*i + 7] = hi | ( hi >> 4 );
    }
 }
 static void FixRange( int& min, int& max, int steps )
 {
    if( max - min < steps )
        max = std::min( min + steps, 255 );
    if( max - min < steps )
        min = std::max( 0, max - steps );
 }
 static int FitCodes( u8 const* rgba, int mask, u8 const* codes, u8* indices )
 {
    // fit each alpha value to the codebook
    int err = 0;
    for( int i = 0; i < 16; ++i )
    {
        // check this pixel is valid
        int bit = 1 << i;
        if( ( mask & bit ) == 0 )
        {
            // use the first code
            indices[i] = 0;
            continue;
        }
        // find the least error and corresponding index
        int value = rgba[4*i + 3];
        int least = INT_MAX;
        int index = 0;
        for( int j = 0; j < 8; ++j )
        {
            // get the squared error from this code
            int dist = ( int )value - ( int )codes[j];
            dist *= dist;
            // compare with the best so far
            if( dist < least )
            {
                least = dist;
                index = j;
            }
        }
        // save this index and accumulate the error
        indices[i] = ( u8 )index;
        err += least;
    }
    // return the total error
    return err;
 }
 static void WriteAlphaBlock( int alpha0, int alpha1, u8 const* indices, void* block )
 {
    u8* bytes = reinterpret_cast< u8* >( block );
    // write the first two bytes
    bytes[0] = ( u8 )alpha0;
    bytes[1] = ( u8 )alpha1;
    // pack the indices with 3 bits each
    u8* dest = bytes + 2;
    u8 const* src = indices;
    for( int i = 0; i < 2; ++i )
    {
        // pack 8 3-bit values
        int value = 0;
        for( int j = 0; j < 8; ++j )
        {
            int index = *src++;
            value |= ( index << 3*j );
        }
        // store in 3 bytes
        for( int j = 0; j < 3; ++j )
        {
            int byte = ( value >> 8*j ) & 0xff;
            *dest++ = ( u8 )byte;
        }
    }
 }
 static void WriteAlphaBlock5( int alpha0, int alpha1, u8 const* indices, void* block )
 {
    // check the relative values of the endpoints
    if( alpha0 > alpha1 )
    {
        // swap the indices
        u8 swapped[16];
        for( int i = 0; i < 16; ++i )
        {
            u8 index = indices[i];
            if( index == 0 )
                swapped[i] = 1;
            else if( index == 1 )
                swapped[i] = 0;
            else if( index <= 5 )
                swapped[i] = 7 - index;
            else
                swapped[i] = index;
        }
        // write the block
        WriteAlphaBlock( alpha1, alpha0, swapped, block );
    }
    else
    {
        // write the block
        WriteAlphaBlock( alpha0, alpha1, indices, block );
    }
 }
 static void WriteAlphaBlock7( int alpha0, int alpha1, u8 const* indices, void* block )
 {
    // check the relative values of the endpoints
    if( alpha0 < alpha1 )
    {
        // swap the indices
        u8 swapped[16];
        for( int i = 0; i < 16; ++i )
        {
            u8 index = indices[i];
            if( index == 0 )
                swapped[i] = 1;
            else if( index == 1 )
                swapped[i] = 0;
            else
                swapped[i] = 9 - index;
        }
        // write the block
        WriteAlphaBlock( alpha1, alpha0, swapped, block );
    }
    else
    {
        // write the block
        WriteAlphaBlock( alpha0, alpha1, indices, block );
    }
 }
 void CompressAlphaDxt5( u8 const* rgba, int mask, void* block )
 {
    // get the range for 5-alpha and 7-alpha interpolation
    int min5 = 255;
    int max5 = 0;
    int min7 = 255;
    int max7 = 0;
    for( int i = 0; i < 16; ++i )
    {
        // check this pixel is valid
        int bit = 1 << i;
        if( ( mask & bit ) == 0 )
            continue;
        // incorporate into the min/max
        int value = rgba[4*i + 3];
        if( value < min7 )
            min7 = value;
        if( value > max7 )
            max7 = value;
        if( value != 0 && value < min5 )
            min5 = value;
        if( value != 255 && value > max5 )
            max5 = value;
    }
    // handle the case that no valid range was found
    if( min5 > max5 )
        min5 = max5;
    if( min7 > max7 )
        min7 = max7;
    // fix the range to be the minimum in each case
    FixRange( min5, max5, 5 );
    FixRange( min7, max7, 7 );
    // set up the 5-alpha code book
    u8 codes5[8];
    codes5[0] = ( u8 )min5;
    codes5[1] = ( u8 )max5;
    for( int i = 1; i < 5; ++i )
        codes5[1 + i] = ( u8 )( ( ( 5 - i )*min5 + i*max5 )/5 );
    codes5[6] = 0;
    codes5[7] = 255;
    // set up the 7-alpha code book
    u8 codes7[8];
    codes7[0] = ( u8 )min7;
    codes7[1] = ( u8 )max7;
    for( int i = 1; i < 7; ++i )
        codes7[1 + i] = ( u8 )( ( ( 7 - i )*min7 + i*max7 )/7 );
    // fit the data to both code books
    u8 indices5[16];
    u8 indices7[16];
    int err5 = FitCodes( rgba, mask, codes5, indices5 );
    int err7 = FitCodes( rgba, mask, codes7, indices7 );
    // save the block with least error
    if( err5 <= err7 )
        WriteAlphaBlock5( min5, max5, indices5, block );
    else
        WriteAlphaBlock7( min7, max7, indices7, block );
 }
 void DecompressAlphaDxt5( u8* rgba, void const* block )
 {
    // get the two alpha values
    u8 const* bytes = reinterpret_cast< u8 const* >( block );
    int alpha0 = bytes[0];
    int alpha1 = bytes[1];
    // compare the values to build the codebook
    u8 codes[8];
    codes[0] = ( u8 )alpha0;
    codes[1] = ( u8 )alpha1;
    if( alpha0 <= alpha1 )
    {
        // use 5-alpha codebook
        for( int i = 1; i < 5; ++i )
            codes[1 + i] = ( u8 )( ( ( 5 - i )*alpha0 + i*alpha1 )/5 );
        codes[6] = 0;
        codes[7] = 255;
    }
    else
    {
        // use 7-alpha codebook
        for( int i = 1; i < 7; ++i )
            codes[1 + i] = ( u8 )( ( ( 7 - i )*alpha0 + i*alpha1 )/7 );
    }
    // decode the indices
    u8 indices[16];
    u8 const* src = bytes + 2;
    u8* dest = indices;
    for( int i = 0; i < 2; ++i )
    {
        // grab 3 bytes
        int value = 0;
        for( int j = 0; j < 3; ++j )
        {
            int byte = *src++;
            value |= ( byte << 8*j );
        }
        // unpack 8 3-bit values from it
        for( int j = 0; j < 8; ++j )
        {
            int index = ( value >> 3*j ) & 0x7;
            *dest++ = ( u8 )index;
        }
    }
    // write out the indexed codebook values
    for( int i = 0; i < 16; ++i )
        rgba[4*i + 3] = codes[indices[i]];
 }
 } // namespace squish
--- a/thirdparty/squish/alpha.h
+++ b/thirdparty/squish/alpha.h
@ -1,41 +0,0 @@
 /* -----------------------------------------------------------------------------
    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:
    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #ifndef SQUISH_ALPHA_H
 #define SQUISH_ALPHA_H
 #include "squish.h"
 namespace squish {
 void CompressAlphaDxt3( u8 const* rgba, int mask, void* block );
 void CompressAlphaDxt5( u8 const* rgba, int mask, void* block );
 void DecompressAlphaDxt3( u8* rgba, void const* block );
 void DecompressAlphaDxt5( u8* rgba, void const* block );
 } // namespace squish
 #endif // ndef SQUISH_ALPHA_H
--- a/thirdparty/squish/clusterfit.cpp
+++ b/thirdparty/squish/clusterfit.cpp
@ -1,392 +0,0 @@
 /* -----------------------------------------------------------------------------
    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
    Copyright (c) 2007 Ignacio Castano                   icastano@nvidia.com
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:
    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #include "clusterfit.h"
 #include "colourset.h"
 #include "colourblock.h"
 #include <cfloat>
 namespace squish {
 ClusterFit::ClusterFit( ColourSet const* colours, int flags, float* metric )
  : ColourFit( colours, flags )
 {
    // set the iteration count
    m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1;
    // initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
    if( metric )
        m_metric = Vec4( metric[0], metric[1], metric[2], 1.0f );
    else
        m_metric = VEC4_CONST( 1.0f );
    // initialise the best error
    m_besterror = VEC4_CONST( FLT_MAX );
    // cache some values
    int const count = m_colours->GetCount();
    Vec3 const* values = m_colours->GetPoints();
    // get the covariance matrix
    Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() );
    // compute the principle component
    m_principle = ComputePrincipleComponent( covariance );
 }
 bool ClusterFit::ConstructOrdering( Vec3 const& axis, int iteration )
 {
    // cache some values
    int const count = m_colours->GetCount();
    Vec3 const* values = m_colours->GetPoints();
    // build the list of dot products
    float dps[16];
    u8* order = ( u8* )m_order + 16*iteration;
    for( int i = 0; i < count; ++i )
    {
        dps[i] = Dot( values[i], axis );
        order[i] = ( u8 )i;
    }
    // stable sort using them
    for( int i = 0; i < count; ++i )
    {
        for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j )
        {
            std::swap( dps[j], dps[j - 1] );
            std::swap( order[j], order[j - 1] );
        }
    }
    // check this ordering is unique
    for( int it = 0; it < iteration; ++it )
    {
        u8 const* prev = ( u8* )m_order + 16*it;
        bool same = true;
        for( int i = 0; i < count; ++i )
        {
            if( order[i] != prev[i] )
            {
                same = false;
                break;
            }
        }
        if( same )
            return false;
    }
    // copy the ordering and weight all the points
    Vec3 const* unweighted = m_colours->GetPoints();
    float const* weights = m_colours->GetWeights();
    m_xsum_wsum = VEC4_CONST( 0.0f );
    for( int i = 0; i < count; ++i )
    {
        int j = order[i];
        Vec4 p( unweighted[j].X(), unweighted[j].Y(), unweighted[j].Z(), 1.0f );
        Vec4 w( weights[j] );
        Vec4 x = p*w;
        m_points_weights[i] = x;
        m_xsum_wsum += x;
    }
    return true;
 }
 void ClusterFit::Compress3( void* block )
 {
    // declare variables
    int const count = m_colours->GetCount();
    Vec4 const two = VEC4_CONST( 2.0 );
    Vec4 const one = VEC4_CONST( 1.0f );
    Vec4 const half_half2( 0.5f, 0.5f, 0.5f, 0.25f );
    Vec4 const zero = VEC4_CONST( 0.0f );
    Vec4 const half = VEC4_CONST( 0.5f );
    Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
    Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
    // prepare an ordering using the principle axis
    ConstructOrdering( m_principle, 0 );
    // check all possible clusters and iterate on the total order
    Vec4 beststart = VEC4_CONST( 0.0f );
    Vec4 bestend = VEC4_CONST( 0.0f );
    Vec4 besterror = m_besterror;
    u8 bestindices[16];
    int bestiteration = 0;
    int besti = 0, bestj = 0;
    // loop over iterations (we avoid the case that all points in first or last cluster)
    for( int iterationIndex = 0;; )
    {
        // first cluster [0,i) is at the start
        Vec4 part0 = VEC4_CONST( 0.0f );
        for( int i = 0; i < count; ++i )
        {
            // second cluster [i,j) is half along
            Vec4 part1 = ( i == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
            int jmin = ( i == 0 ) ? 1 : i;
            for( int j = jmin;; )
            {
                // last cluster [j,count) is at the end
                Vec4 part2 = m_xsum_wsum - part1 - part0;
                // compute least squares terms directly
                Vec4 alphax_sum = MultiplyAdd( part1, half_half2, part0 );
                Vec4 alpha2_sum = alphax_sum.SplatW();
                Vec4 betax_sum = MultiplyAdd( part1, half_half2, part2 );
                Vec4 beta2_sum = betax_sum.SplatW();
                Vec4 alphabeta_sum = ( part1*half_half2 ).SplatW();
                // compute the least-squares optimal points
                Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
                Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
                Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
                // clamp to the grid
                a = Min( one, Max( zero, a ) );
                b = Min( one, Max( zero, b ) );
                a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
                b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
                // compute the error (we skip the constant xxsum)
                Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
                Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
                Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
                Vec4 e4 = MultiplyAdd( two, e3, e1 );
                // apply the metric to the error term
                Vec4 e5 = e4*m_metric;
                Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
                // keep the solution if it wins
                if( CompareAnyLessThan( error, besterror ) )
                {
                    beststart = a;
                    bestend = b;
                    besti = i;
                    bestj = j;
                    besterror = error;
                    bestiteration = iterationIndex;
                }
                // advance
                if( j == count )
                    break;
                part1 += m_points_weights[j];
                ++j;
            }
            // advance
            part0 += m_points_weights[i];
        }
        // stop if we didn't improve in this iteration
        if( bestiteration != iterationIndex )
            break;
        // advance if possible
        ++iterationIndex;
        if( iterationIndex == m_iterationCount )
            break;
        // stop if a new iteration is an ordering that has already been tried
        Vec3 axis = ( bestend - beststart ).GetVec3();
        if( !ConstructOrdering( axis, iterationIndex ) )
            break;
    }
    // save the block if necessary
    if( CompareAnyLessThan( besterror, m_besterror ) )
    {
        // remap the indices
        u8 const* order = ( u8* )m_order + 16*bestiteration;
        u8 unordered[16];
        for( int m = 0; m < besti; ++m )
            unordered[order[m]] = 0;
        for( int m = besti; m < bestj; ++m )
            unordered[order[m]] = 2;
        for( int m = bestj; m < count; ++m )
            unordered[order[m]] = 1;
        m_colours->RemapIndices( unordered, bestindices );
        // save the block
        WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
        // save the error
        m_besterror = besterror;
    }
 }
 void ClusterFit::Compress4( void* block )
 {
    // declare variables
    int const count = m_colours->GetCount();
    Vec4 const two = VEC4_CONST( 2.0f );
    Vec4 const one = VEC4_CONST( 1.0f );
    Vec4 const onethird_onethird2( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
    Vec4 const twothirds_twothirds2( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
    Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
    Vec4 const zero = VEC4_CONST( 0.0f );
    Vec4 const half = VEC4_CONST( 0.5f );
    Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
    Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
    // prepare an ordering using the principle axis
    ConstructOrdering( m_principle, 0 );
    // check all possible clusters and iterate on the total order
    Vec4 beststart = VEC4_CONST( 0.0f );
    Vec4 bestend = VEC4_CONST( 0.0f );
    Vec4 besterror = m_besterror;
    u8 bestindices[16];
    int bestiteration = 0;
    int besti = 0, bestj = 0, bestk = 0;
    // loop over iterations (we avoid the case that all points in first or last cluster)
    for( int iterationIndex = 0;; )
    {
        // first cluster [0,i) is at the start
        Vec4 part0 = VEC4_CONST( 0.0f );
        for( int i = 0; i < count; ++i )
        {
            // second cluster [i,j) is one third along
            Vec4 part1 = VEC4_CONST( 0.0f );
            for( int j = i;; )
            {
                // third cluster [j,k) is two thirds along
                Vec4 part2 = ( j == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
                int kmin = ( j == 0 ) ? 1 : j;
                for( int k = kmin;; )
                {
                    // last cluster [k,count) is at the end
                    Vec4 part3 = m_xsum_wsum - part2 - part1 - part0;
                    // compute least squares terms directly
                    Vec4 const alphax_sum = MultiplyAdd( part2, onethird_onethird2, MultiplyAdd( part1, twothirds_twothirds2, part0 ) );
                    Vec4 const alpha2_sum = alphax_sum.SplatW();
                    Vec4 const betax_sum = MultiplyAdd( part1, onethird_onethird2, MultiplyAdd( part2, twothirds_twothirds2, part3 ) );
                    Vec4 const beta2_sum = betax_sum.SplatW();
                    Vec4 const alphabeta_sum = twonineths*( part1 + part2 ).SplatW();
                    // compute the least-squares optimal points
                    Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
                    Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
                    Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
                    // clamp to the grid
                    a = Min( one, Max( zero, a ) );
                    b = Min( one, Max( zero, b ) );
                    a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
                    b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
                    // compute the error (we skip the constant xxsum)
                    Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
                    Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
                    Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
                    Vec4 e4 = MultiplyAdd( two, e3, e1 );
                    // apply the metric to the error term
                    Vec4 e5 = e4*m_metric;
                    Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
                    // keep the solution if it wins
                    if( CompareAnyLessThan( error, besterror ) )
                    {
                        beststart = a;
                        bestend = b;
                        besterror = error;
                        besti = i;
                        bestj = j;
                        bestk = k;
                        bestiteration = iterationIndex;
                    }
                    // advance
                    if( k == count )
                        break;
                    part2 += m_points_weights[k];
                    ++k;
                }
                // advance
                if( j == count )
                    break;
                part1 += m_points_weights[j];
                ++j;
            }
            // advance
            part0 += m_points_weights[i];
        }
        // stop if we didn't improve in this iteration
        if( bestiteration != iterationIndex )
            break;
        // advance if possible
        ++iterationIndex;
        if( iterationIndex == m_iterationCount )
            break;
        // stop if a new iteration is an ordering that has already been tried
        Vec3 axis = ( bestend - beststart ).GetVec3();
        if( !ConstructOrdering( axis, iterationIndex ) )
            break;
    }
    // save the block if necessary
    if( CompareAnyLessThan( besterror, m_besterror ) )
    {
        // remap the indices
        u8 const* order = ( u8* )m_order + 16*bestiteration;
        u8 unordered[16];
        for( int m = 0; m < besti; ++m )
            unordered[order[m]] = 0;
        for( int m = besti; m < bestj; ++m )
            unordered[order[m]] = 2;
        for( int m = bestj; m < bestk; ++m )
            unordered[order[m]] = 3;
        for( int m = bestk; m < count; ++m )
            unordered[order[m]] = 1;
        m_colours->RemapIndices( unordered, bestindices );
        // save the block
        WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
        // save the error
        m_besterror = besterror;
    }
 }
 } // namespace squish
--- a/thirdparty/squish/clusterfit.h
+++ b/thirdparty/squish/clusterfit.h
@ -1,61 +0,0 @@
 /* -----------------------------------------------------------------------------
    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
    Copyright (c) 2007 Ignacio Castano                   icastano@nvidia.com
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:
    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #ifndef SQUISH_CLUSTERFIT_H
 #define SQUISH_CLUSTERFIT_H
 #include "squish.h"
 #include "maths.h"
 #include "simd.h"
 #include "colourfit.h"
 namespace squish {
 class ClusterFit : public ColourFit
 {
 public:
    ClusterFit( ColourSet const* colours, int flags, float* metric );
 private:
    bool ConstructOrdering( Vec3 const& axis, int iteration );
    virtual void Compress3( void* block );
    virtual void Compress4( void* block );
    enum { kMaxIterations = 8 };
    int m_iterationCount;
    Vec3 m_principle;
    u8 m_order[16*kMaxIterations];
    Vec4 m_points_weights[16];
    Vec4 m_xsum_wsum;
    Vec4 m_metric;
    Vec4 m_besterror;
 };
 } // namespace squish
 #endif // ndef SQUISH_CLUSTERFIT_H
--- a/thirdparty/squish/colourblock.cpp
+++ b/thirdparty/squish/colourblock.cpp
@ -1,247 +0,0 @@
 /* -----------------------------------------------------------------------------
    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:
    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #include "colourblock.h"
 // -- GODOT start --
 #include "alpha.h"
 // -- GODOT end --
 namespace squish {
 static int FloatToInt( float a, int limit )
 {
    // use ANSI round-to-zero behaviour to get round-to-nearest
    int i = ( int )( a + 0.5f );
    // clamp to the limit
    if( i < 0 )
        i = 0;
    else if( i > limit )
        i = limit;
    // done
    return i;
 }
 static int FloatTo565( Vec3::Arg colour )
 {
    // get the components in the correct range
    int r = FloatToInt( 31.0f*colour.X(), 31 );
    int g = FloatToInt( 63.0f*colour.Y(), 63 );
    int b = FloatToInt( 31.0f*colour.Z(), 31 );
    // pack into a single value
    return ( r << 11 ) | ( g << 5 ) | b;
 }
 static void WriteColourBlock( int a, int b, u8* indices, void* block )
 {
    // get the block as bytes
    u8* bytes = ( u8* )block;
    // write the endpoints
    bytes[0] = ( u8 )( a & 0xff );
    bytes[1] = ( u8 )( a >> 8 );
    bytes[2] = ( u8 )( b & 0xff );
    bytes[3] = ( u8 )( b >> 8 );
    // write the indices
    for( int i = 0; i < 4; ++i )
    {
        u8 const* ind = indices + 4*i;
        bytes[4 + i] = ind[0] | ( ind[1] << 2 ) | ( ind[2] << 4 ) | ( ind[3] << 6 );
    }
 }
 void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block )
 {
    // get the packed values
    int a = FloatTo565( start );
    int b = FloatTo565( end );
    // remap the indices
    u8 remapped[16];
    if( a <= b )
    {
        // use the indices directly
        for( int i = 0; i < 16; ++i )
            remapped[i] = indices[i];
    }
    else
    {
        // swap a and b
        std::swap( a, b );
        for( int i = 0; i < 16; ++i )
        {
            if( indices[i] == 0 )
                remapped[i] = 1;
            else if( indices[i] == 1 )
                remapped[i] = 0;
            else
                remapped[i] = indices[i];
        }
    }
    // write the block
    WriteColourBlock( a, b, remapped, block );
 }
 void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block )
 {
    // get the packed values
    int a = FloatTo565( start );
    int b = FloatTo565( end );
    // remap the indices
    u8 remapped[16];
    if( a < b )
    {
        // swap a and b
        std::swap( a, b );
        for( int i = 0; i < 16; ++i )
            remapped[i] = ( indices[i] ^ 0x1 ) & 0x3;
    }
    else if( a == b )
    {
        // use index 0
        for( int i = 0; i < 16; ++i )
            remapped[i] = 0;
    }
    else
    {
        // use the indices directly
        for( int i = 0; i < 16; ++i )
            remapped[i] = indices[i];
    }
    // write the block
    WriteColourBlock( a, b, remapped, block );
 }
 static int Unpack565( u8 const* packed, u8* colour )
 {
    // build the packed value
    int value = ( int )packed[0] | ( ( int )packed[1] << 8 );
    // get the components in the stored range
    u8 red = ( u8 )( ( value >> 11 ) & 0x1f );
    u8 green = ( u8 )( ( value >> 5 ) & 0x3f );
    u8 blue = ( u8 )( value & 0x1f );
    // scale up to 8 bits
    colour[0] = ( red << 3 ) | ( red >> 2 );
    colour[1] = ( green << 2 ) | ( green >> 4 );
    colour[2] = ( blue << 3 ) | ( blue >> 2 );
    colour[3] = 255;
    // return the value
    return value;
 }
 void DecompressColour( u8* rgba, void const* block, bool isDxt1 )
 {
    // get the block bytes
    u8 const* bytes = reinterpret_cast< u8 const* >( block );
    // unpack the endpoints
    u8 codes[16];
    int a = Unpack565( bytes, codes );
    int b = Unpack565( bytes + 2, codes + 4 );
    // generate the midpoints
    for( int i = 0; i < 3; ++i )
    {
        int c = codes[i];
        int d = codes[4 + i];
        if( isDxt1 && a <= b )
        {
            codes[8 + i] = ( u8 )( ( c + d )/2 );
            codes[12 + i] = 0;
        }
        else
        {
            codes[8 + i] = ( u8 )( ( 2*c + d )/3 );
            codes[12 + i] = ( u8 )( ( c + 2*d )/3 );
        }
    }
    // fill in alpha for the intermediate values
    codes[8 + 3] = 255;
    codes[12 + 3] = ( isDxt1 && a <= b ) ? 0 : 255;
    // unpack the indices
    u8 indices[16];
    for( int i = 0; i < 4; ++i )
    {
        u8* ind = indices + 4*i;
        u8 packed = bytes[4 + i];
        ind[0] = packed & 0x3;
        ind[1] = ( packed >> 2 ) & 0x3;
        ind[2] = ( packed >> 4 ) & 0x3;
        ind[3] = ( packed >> 6 ) & 0x3;
    }
    // store out the colours
    for( int i = 0; i < 16; ++i )
    {
        u8 offset = 4*indices[i];
        for( int j = 0; j < 4; ++j )
            rgba[4*i + j] = codes[offset + j];
    }
 }
 // -- GODOT start --
 void DecompressColourBc4( u8* rgba, void const* block)
 {
    DecompressAlphaDxt5(rgba,block);
    for ( int i = 0; i < 16; ++i ) {
        rgba[i*4] = rgba[i*4 + 3];
 		rgba[i*4 + 1] = 0;
 		rgba[i*4 + 2] = 0;
        rgba[i*4 + 3] = 255;
    }
 }
 void DecompressColourBc5( u8* rgba, void const* block)
 {
    void const* rblock = block;
    void const* gblock = reinterpret_cast< u8 const* >( block ) + 8;
    DecompressAlphaDxt5(rgba,rblock);
    for ( int i = 0; i < 16; ++i ) {
        rgba[i*4] = rgba[i*4 + 3];
    }
    DecompressAlphaDxt5(rgba,gblock);
    for ( int i = 0; i < 16; ++i ) {
        rgba[i*4+1] = rgba[i*4 + 3];
        rgba[i*4 + 2] = 0;
        rgba[i*4 + 3] = 255;
    }
 }
 // -- GODOT end --
 } // namespace squish
--- a/thirdparty/squish/colourblock.h
+++ b/thirdparty/squish/colourblock.h
@ -1,45 +0,0 @@
 /* -----------------------------------------------------------------------------
    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:
    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #ifndef SQUISH_COLOURBLOCK_H
 #define SQUISH_COLOURBLOCK_H
 #include "squish.h"
 #include "maths.h"
 namespace squish {
 void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
 void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
 void DecompressColour( u8* rgba, void const* block, bool isDxt1 );
 // -- GODOT start --
 void DecompressColourBc4( u8* rgba, void const* block );
 void DecompressColourBc5( u8* rgba, void const* block );
 // -- GODOT end --
 } // namespace squish
 #endif // ndef SQUISH_COLOURBLOCK_H
--- a/thirdparty/squish/colourfit.cpp
+++ b/thirdparty/squish/colourfit.cpp
@ -1,54 +0,0 @@
 /* -----------------------------------------------------------------------------
    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:
    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #include "colourfit.h"
 #include "colourset.h"
 namespace squish {
 ColourFit::ColourFit( ColourSet const* colours, int flags )
  : m_colours( colours ),
    m_flags( flags )
 {
 }
 ColourFit::~ColourFit()
 {
 }
 void ColourFit::Compress( void* block )
 {
    bool isDxt1 = ( ( m_flags & kDxt1 ) != 0 );
    if( isDxt1 )
    {
        Compress3( block );
        if( !m_colours->IsTransparent() )
            Compress4( block );
    }
    else
        Compress4( block );
 }
 } // namespace squish
--- a/thirdparty/squish/colourfit.h
+++ b/thirdparty/squish/colourfit.h
@ -1,56 +0,0 @@
 /* -----------------------------------------------------------------------------
    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:
    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #ifndef SQUISH_COLOURFIT_H
 #define SQUISH_COLOURFIT_H
 #include "squish.h"
 #include "maths.h"
 #include <climits>
 namespace squish {
 class ColourSet;
 class ColourFit
 {
 public:
    ColourFit( ColourSet const* colours, int flags );
    virtual ~ColourFit();
    void Compress( void* block );
 protected:
    virtual void Compress3( void* block ) = 0;
    virtual void Compress4( void* block ) = 0;
    ColourSet const* m_colours;
    int m_flags;
 };
 } // namespace squish
 #endif // ndef SQUISH_COLOURFIT_H
--- a/thirdparty/squish/colourset.cpp
+++ b/thirdparty/squish/colourset.cpp
@ -1,121 +0,0 @@
 /* -----------------------------------------------------------------------------
    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:
    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #include "colourset.h"
 namespace squish {
 ColourSet::ColourSet( u8 const* rgba, int mask, int flags )
  : m_count( 0 ),
    m_transparent( false )
 {
    // check the compression mode for dxt1
    bool isDxt1 = ( ( flags & kDxt1 ) != 0 );
    bool weightByAlpha = ( ( flags & kWeightColourByAlpha ) != 0 );
    // create the minimal set
    for( int i = 0; i < 16; ++i )
    {
        // check this pixel is enabled
        int bit = 1 << i;
        if( ( mask & bit ) == 0 )
        {
            m_remap[i] = -1;
            continue;
        }
        // check for transparent pixels when using dxt1
        if( isDxt1 && rgba[4*i + 3] < 128 )
        {
            m_remap[i] = -1;
            m_transparent = true;
            continue;
        }
        // loop over previous points for a match
        for( int j = 0;; ++j )
        {
            // allocate a new point
            if( j == i )
            {
                // normalise coordinates to [0,1]
                float x = ( float )rgba[4*i] / 255.0f;
                float y = ( float )rgba[4*i + 1] / 255.0f;
                float z = ( float )rgba[4*i + 2] / 255.0f;
                // ensure there is always non-zero weight even for zero alpha
                float w = ( float )( rgba[4*i + 3] + 1 ) / 256.0f;
                // add the point
                m_points[m_count] = Vec3( x, y, z );
                m_weights[m_count] = ( weightByAlpha ? w : 1.0f );
                m_remap[i] = m_count;
                // advance
                ++m_count;
                break;
            }
            // check for a match
            int oldbit = 1 << j;
            bool match = ( ( mask & oldbit ) != 0 )
                && ( rgba[4*i] == rgba[4*j] )
                && ( rgba[4*i + 1] == rgba[4*j + 1] )
                && ( rgba[4*i + 2] == rgba[4*j + 2] )
                && ( rgba[4*j + 3] >= 128 || !isDxt1 );
            if( match )
            {
                // get the index of the match
                int index = m_remap[j];
                // ensure there is always non-zero weight even for zero alpha
                float w = ( float )( rgba[4*i + 3] + 1 ) / 256.0f;
                // map to this point and increase the weight
                m_weights[index] += ( weightByAlpha ? w : 1.0f );
                m_remap[i] = index;
                break;
            }
        }
    }
    // square root the weights
    for( int i = 0; i < m_count; ++i )
        m_weights[i] = std::sqrt( m_weights[i] );
 }
 void ColourSet::RemapIndices( u8 const* source, u8* target ) const
 {
    for( int i = 0; i < 16; ++i )
    {
        int j = m_remap[i];
        if( j == -1 )
            target[i] = 3;
        else
            target[i] = source[j];
    }
 }
 } // namespace squish
--- a/thirdparty/squish/colourset.h
+++ b/thirdparty/squish/colourset.h
@ -1,58 +0,0 @@
 /* -----------------------------------------------------------------------------
    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:
    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #ifndef SQUISH_COLOURSET_H
 #define SQUISH_COLOURSET_H
 #include "squish.h"
 #include "maths.h"
 namespace squish {
 /*! @brief Represents a set of block colours
 */
 class ColourSet
 {
 public:
    ColourSet( u8 const* rgba, int mask, int flags );
    int GetCount() const { return m_count; }
    Vec3 const* GetPoints() const { return m_points; }
    float const* GetWeights() const { return m_weights; }
    bool IsTransparent() const { return m_transparent; }
    void RemapIndices( u8 const* source, u8* target ) const;
 private:
    int m_count;
    Vec3 m_points[16];
    float m_weights[16];
    int m_remap[16];
    bool m_transparent;
 };
 } // namespace sqish
 #endif // ndef SQUISH_COLOURSET_H
--- a/thirdparty/squish/config.h
+++ b/thirdparty/squish/config.h
@ -1,69 +0,0 @@
 /* -----------------------------------------------------------------------------
    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:
    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #ifndef SQUISH_CONFIG_H
 #define SQUISH_CONFIG_H
 // Set to 1 when building squish to use Altivec instructions.
 #ifndef SQUISH_USE_ALTIVEC
 #define SQUISH_USE_ALTIVEC 0
 #endif
 // Set to 1 or 2 when building squish to use SSE or SSE2 instructions.
 // -- GODOT start --
 #ifdef _MSC_VER
  #if defined(_M_IX86_FP)
    #if _M_IX86_FP >= 2
      #define SQUISH_USE_SSE 2
    #elif _M_IX86_FP >= 1
      #define SQUISH_USE_SSE 1
    #endif
  #elif defined(_M_X64)
    #define SQUISH_USE_SSE 2
  #endif
 #else
  #if defined(__SSE2__)
    #define SQUISH_USE_SSE 2
  #elif defined(__SSE__)
    #define SQUISH_USE_SSE 1
  #endif
 #endif
 // -- GODOT end --
 #ifndef SQUISH_USE_SSE
 #define SQUISH_USE_SSE 0
 #endif
 // Internally set SQUISH_USE_SIMD when either Altivec or SSE is available.
 #if SQUISH_USE_ALTIVEC && SQUISH_USE_SSE
 #error "Cannot enable both Altivec and SSE!"
 #endif
 #if SQUISH_USE_ALTIVEC || SQUISH_USE_SSE
 #define SQUISH_USE_SIMD 1
 #else
 #define SQUISH_USE_SIMD 0
 #endif
 #endif // ndef SQUISH_CONFIG_H
--- a/thirdparty/squish/maths.cpp
+++ b/thirdparty/squish/maths.cpp
@ -1,259 +0,0 @@
 /* -----------------------------------------------------------------------------
    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:
    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 /*! @file
    The symmetric eigensystem solver algorithm is from
    http://www.geometrictools.com/Documentation/EigenSymmetric3x3.pdf
 */
 #include "maths.h"
 #include "simd.h"
 #include <cfloat>
 namespace squish {
 Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights )
 {
    // compute the centroid
    float total = 0.0f;
    Vec3 centroid( 0.0f );
    for( int i = 0; i < n; ++i )
    {
        total += weights[i];
        centroid += weights[i]*points[i];
    }
    if( total > FLT_EPSILON )
        centroid /= total;
    // accumulate the covariance matrix
    Sym3x3 covariance( 0.0f );
    for( int i = 0; i < n; ++i )
    {
        Vec3 a = points[i] - centroid;
        Vec3 b = weights[i]*a;
        covariance[0] += a.X()*b.X();
        covariance[1] += a.X()*b.Y();
        covariance[2] += a.X()*b.Z();
        covariance[3] += a.Y()*b.Y();
        covariance[4] += a.Y()*b.Z();
        covariance[5] += a.Z()*b.Z();
    }
    // return it
    return covariance;
 }
 #if 0
 static Vec3 GetMultiplicity1Evector( Sym3x3 const& matrix, float evalue )
 {
    // compute M
    Sym3x3 m;
    m[0] = matrix[0] - evalue;
    m[1] = matrix[1];
    m[2] = matrix[2];
    m[3] = matrix[3] - evalue;
    m[4] = matrix[4];
    m[5] = matrix[5] - evalue;
    // compute U
    Sym3x3 u;
    u[0] = m[3]*m[5] - m[4]*m[4];
    u[1] = m[2]*m[4] - m[1]*m[5];
    u[2] = m[1]*m[4] - m[2]*m[3];
    u[3] = m[0]*m[5] - m[2]*m[2];
    u[4] = m[1]*m[2] - m[4]*m[0];
    u[5] = m[0]*m[3] - m[1]*m[1];
    // find the largest component
    float mc = std::fabs( u[0] );
    int mi = 0;
    for( int i = 1; i < 6; ++i )
    {
        float c = std::fabs( u[i] );
        if( c > mc )
        {
            mc = c;
            mi = i;
        }
    }
    // pick the column with this component
    switch( mi )
    {
    case 0:
        return Vec3( u[0], u[1], u[2] );
    case 1:
    case 3:
        return Vec3( u[1], u[3], u[4] );
    default:
        return Vec3( u[2], u[4], u[5] );
    }
 }
 static Vec3 GetMultiplicity2Evector( Sym3x3 const& matrix, float evalue )
 {
    // compute M
    Sym3x3 m;
    m[0] = matrix[0] - evalue;
    m[1] = matrix[1];
    m[2] = matrix[2];
    m[3] = matrix[3] - evalue;
    m[4] = matrix[4];
    m[5] = matrix[5] - evalue;
    // find the largest component
    float mc = std::fabs( m[0] );
    int mi = 0;
    for( int i = 1; i < 6; ++i )
    {
        float c = std::fabs( m[i] );
        if( c > mc )
        {
            mc = c;
            mi = i;
        }
    }
    // pick the first eigenvector based on this index
    switch( mi )
    {
    case 0:
    case 1:
        return Vec3( -m[1], m[0], 0.0f );
    case 2:
        return Vec3( m[2], 0.0f, -m[0] );
    case 3:
    case 4:
        return Vec3( 0.0f, -m[4], m[3] );
    default:
        return Vec3( 0.0f, -m[5], m[4] );
    }
 }
 Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
 {
    // compute the cubic coefficients
    float c0 = matrix[0]*matrix[3]*matrix[5]
        + 2.0f*matrix[1]*matrix[2]*matrix[4]
        - matrix[0]*matrix[4]*matrix[4]
        - matrix[3]*matrix[2]*matrix[2]
        - matrix[5]*matrix[1]*matrix[1];
    float c1 = matrix[0]*matrix[3] + matrix[0]*matrix[5] + matrix[3]*matrix[5]
        - matrix[1]*matrix[1] - matrix[2]*matrix[2] - matrix[4]*matrix[4];
    float c2 = matrix[0] + matrix[3] + matrix[5];
    // compute the quadratic coefficients
    float a = c1 - ( 1.0f/3.0f )*c2*c2;
    float b = ( -2.0f/27.0f )*c2*c2*c2 + ( 1.0f/3.0f )*c1*c2 - c0;
    // compute the root count check
    float Q = 0.25f*b*b + ( 1.0f/27.0f )*a*a*a;
    // test the multiplicity
    if( FLT_EPSILON < Q )
    {
        // only one root, which implies we have a multiple of the identity
        return Vec3( 1.0f );
    }
    else if( Q < -FLT_EPSILON )
    {
        // three distinct roots
        float theta = std::atan2( std::sqrt( -Q ), -0.5f*b );
        float rho = std::sqrt( 0.25f*b*b - Q );
        float rt = std::pow( rho, 1.0f/3.0f );
        float ct = std::cos( theta/3.0f );
        float st = std::sin( theta/3.0f );
        float l1 = ( 1.0f/3.0f )*c2 + 2.0f*rt*ct;
        float l2 = ( 1.0f/3.0f )*c2 - rt*( ct + ( float )sqrt( 3.0f )*st );
        float l3 = ( 1.0f/3.0f )*c2 - rt*( ct - ( float )sqrt( 3.0f )*st );
        // pick the larger
        if( std::fabs( l2 ) > std::fabs( l1 ) )
            l1 = l2;
        if( std::fabs( l3 ) > std::fabs( l1 ) )
            l1 = l3;
        // get the eigenvector
        return GetMultiplicity1Evector( matrix, l1 );
    }
    else // if( -FLT_EPSILON <= Q && Q <= FLT_EPSILON )
    {
        // two roots
        float rt;
        if( b < 0.0f )
            rt = -std::pow( -0.5f*b, 1.0f/3.0f );
        else
            rt = std::pow( 0.5f*b, 1.0f/3.0f );
        float l1 = ( 1.0f/3.0f )*c2 + rt;        // repeated
        float l2 = ( 1.0f/3.0f )*c2 - 2.0f*rt;
        // get the eigenvector
        if( std::fabs( l1 ) > std::fabs( l2 ) )
            return GetMultiplicity2Evector( matrix, l1 );
        else
            return GetMultiplicity1Evector( matrix, l2 );
    }
 }
 #else
 #define POWER_ITERATION_COUNT    8
 Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
 {
    Vec4 const row0( matrix[0], matrix[1], matrix[2], 0.0f );
    Vec4 const row1( matrix[1], matrix[3], matrix[4], 0.0f );
    Vec4 const row2( matrix[2], matrix[4], matrix[5], 0.0f );
    Vec4 v = VEC4_CONST( 1.0f );
    for( int i = 0; i < POWER_ITERATION_COUNT; ++i )
    {
        // matrix multiply
        Vec4 w = row0*v.SplatX();
        w = MultiplyAdd(row1, v.SplatY(), w);
        w = MultiplyAdd(row2, v.SplatZ(), w);
        // get max component from xyz in all channels
        Vec4 a = Max(w.SplatX(), Max(w.SplatY(), w.SplatZ()));
        // divide through and advance
        v = w*Reciprocal(a);
    }
    return v.GetVec3();
 }
 #endif
 } // namespace squish
--- a/thirdparty/squish/maths.h
+++ b/thirdparty/squish/maths.h
@ -1,233 +0,0 @@
 /* -----------------------------------------------------------------------------
    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:
    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #ifndef SQUISH_MATHS_H
 #define SQUISH_MATHS_H
 #include <cmath>
 #include <algorithm>
 #include "config.h"
 namespace squish {
 class Vec3
 {
 public:
    typedef Vec3 const& Arg;
    Vec3()
    {
    }
    explicit Vec3( float s )
    {
        m_x = s;
        m_y = s;
        m_z = s;
    }
    Vec3( float x, float y, float z )
    {
        m_x = x;
        m_y = y;
        m_z = z;
    }
    float X() const { return m_x; }
    float Y() const { return m_y; }
    float Z() const { return m_z; }
    Vec3 operator-() const
    {
        return Vec3( -m_x, -m_y, -m_z );
    }
    Vec3& operator+=( Arg v )
    {
        m_x += v.m_x;
        m_y += v.m_y;
        m_z += v.m_z;
        return *this;
    }
    Vec3& operator-=( Arg v )
    {
        m_x -= v.m_x;
        m_y -= v.m_y;
        m_z -= v.m_z;
        return *this;
    }
    Vec3& operator*=( Arg v )
    {
        m_x *= v.m_x;
        m_y *= v.m_y;
        m_z *= v.m_z;
        return *this;
    }
    Vec3& operator*=( float s )
    {
        m_x *= s;
        m_y *= s;
        m_z *= s;
        return *this;
    }
    Vec3& operator/=( Arg v )
    {
        m_x /= v.m_x;
        m_y /= v.m_y;
        m_z /= v.m_z;
        return *this;
    }
    Vec3& operator/=( float s )
    {
        float t = 1.0f/s;
        m_x *= t;
        m_y *= t;
        m_z *= t;
        return *this;
    }
    friend Vec3 operator+( Arg left, Arg right )
    {
        Vec3 copy( left );
        return copy += right;
    }
    friend Vec3 operator-( Arg left, Arg right )
    {
        Vec3 copy( left );
        return copy -= right;
    }
    friend Vec3 operator*( Arg left, Arg right )
    {
        Vec3 copy( left );
        return copy *= right;
    }
    friend Vec3 operator*( Arg left, float right )
    {
        Vec3 copy( left );
        return copy *= right;
    }
    friend Vec3 operator*( float left, Arg right )
    {
        Vec3 copy( right );
        return copy *= left;
    }
    friend Vec3 operator/( Arg left, Arg right )
    {
        Vec3 copy( left );
        return copy /= right;
    }
    friend Vec3 operator/( Arg left, float right )
    {
        Vec3 copy( left );
        return copy /= right;
    }
    friend float Dot( Arg left, Arg right )
    {
        return left.m_x*right.m_x + left.m_y*right.m_y + left.m_z*right.m_z;
    }
    friend Vec3 Min( Arg left, Arg right )
    {
        return Vec3(
            std::min( left.m_x, right.m_x ),
            std::min( left.m_y, right.m_y ),
            std::min( left.m_z, right.m_z )
        );
    }
    friend Vec3 Max( Arg left, Arg right )
    {
        return Vec3(
            std::max( left.m_x, right.m_x ),
            std::max( left.m_y, right.m_y ),
            std::max( left.m_z, right.m_z )
        );
    }
    friend Vec3 Truncate( Arg v )
    {
        return Vec3(
            v.m_x > 0.0f ? std::floor( v.m_x ) : std::ceil( v.m_x ),
            v.m_y > 0.0f ? std::floor( v.m_y ) : std::ceil( v.m_y ),
            v.m_z > 0.0f ? std::floor( v.m_z ) : std::ceil( v.m_z )
        );
    }
 private:
    float m_x;
    float m_y;
    float m_z;
 };
 inline float LengthSquared( Vec3::Arg v )
 {
    return Dot( v, v );
 }
 class Sym3x3
 {
 public:
    Sym3x3()
    {
    }
    Sym3x3( float s )
    {
        for( int i = 0; i < 6; ++i )
            m_x[i] = s;
    }
    float operator[]( int index ) const
    {
        return m_x[index];
    }
    float& operator[]( int index )
    {
        return m_x[index];
    }
 private:
    float m_x[6];
 };
 Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weights );
 Vec3 ComputePrincipleComponent( Sym3x3 const& matrix );
 } // namespace squish
 #endif // ndef SQUISH_MATHS_H
--- a/thirdparty/squish/patches/config_sse.patch
+++ b/thirdparty/squish/patches/config_sse.patch
@ -1,31 +0,0 @@
 diff --git a/thirdparty/squish/config.h b/thirdparty/squish/config.h
 index 92edefe966..05f8d72598 100644
 --- a/thirdparty/squish/config.h
 +++ b/thirdparty/squish/config.h
@@ -32,6 +32,26 @@
 #endif
 // Set to 1 or 2 when building squish to use SSE or SSE2 instructions.
 +// -- GODOT start --
 +#ifdef _MSC_VER
 +  #if defined(_M_IX86_FP)
 +    #if _M_IX86_FP >= 2
 +      #define SQUISH_USE_SSE 2
 +    #elif _M_IX86_FP >= 1
 +      #define SQUISH_USE_SSE 1
 +    #endif
 +  #elif defined(_M_X64)
 +    #define SQUISH_USE_SSE 2
 +  #endif
 +#else
 +  #if defined(__SSE2__)
 +    #define SQUISH_USE_SSE 2
 +  #elif defined(__SSE__)
 +    #define SQUISH_USE_SSE 1
 +  #endif
 +#endif
 +// -- GODOT end --
 +
 #ifndef SQUISH_USE_SSE
 #define SQUISH_USE_SSE 0
 #endif
--- a/thirdparty/squish/patches/decompress_bc4_bc5.patch
+++ b/thirdparty/squish/patches/decompress_bc4_bc5.patch
@ -1,85 +0,0 @@
 diff --git a/thirdparty/squish/colourblock.cpp b/thirdparty/squish/colourblock.cpp
 index af8b980365..f14c9362bd 100644
 --- a/thirdparty/squish/colourblock.cpp
 +++ b/thirdparty/squish/colourblock.cpp
@@ -24,6 +24,9 @@
    -------------------------------------------------------------------------- */
 #include "colourblock.h"
 +// -- GODOT start --
 +#include "alpha.h"
 +// -- GODOT end --
 namespace squish {
@@ -211,4 +214,34 @@ void DecompressColour( u8* rgba, void const* block, bool isDxt1 )
     }
 }
 +// -- GODOT start --
 +void DecompressColourBc4( u8* rgba, void const* block)
 +{
 +    DecompressAlphaDxt5(rgba,block);
 +    for ( int i = 0; i < 16; ++i ) {
 +        rgba[i*4] = rgba[i*4 + 3];
 +		rgba[i*4 + 1] = 0;
 +		rgba[i*4 + 2] = 0;
 +        rgba[i*4 + 3] = 255;
 +    }
 +}
 +
 +void DecompressColourBc5( u8* rgba, void const* block)
 +{
 +    void const* rblock = block;
 +    void const* gblock = reinterpret_cast< u8 const* >( block ) + 8;
 +    DecompressAlphaDxt5(rgba,rblock);
 +    for ( int i = 0; i < 16; ++i ) {
 +        rgba[i*4] = rgba[i*4 + 3];
 +    }
 +    DecompressAlphaDxt5(rgba,gblock);
 +    for ( int i = 0; i < 16; ++i ) {
 +        rgba[i*4+1] = rgba[i*4 + 3];
 +        rgba[i*4 + 2] = 0;
 +        rgba[i*4 + 3] = 255;
 +    }
 +}
 +// -- GODOT end --
 +
 +
 } // namespace squish
 diff --git a/thirdparty/squish/colourblock.h b/thirdparty/squish/colourblock.h
 index fee2cd7c5d..e1eb9e4917 100644
 --- a/thirdparty/squish/colourblock.h
 +++ b/thirdparty/squish/colourblock.h
@@ -35,6 +35,10 @@ void WriteColourBlock3( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void*
 void WriteColourBlock4( Vec3::Arg start, Vec3::Arg end, u8 const* indices, void* block );
 void DecompressColour( u8* rgba, void const* block, bool isDxt1 );
 +// -- GODOT start --
 +void DecompressColourBc4( u8* rgba, void const* block );
 +void DecompressColourBc5( u8* rgba, void const* block );
 +// -- GODOT end --
 } // namespace squish
 diff --git a/thirdparty/squish/squish.cpp b/thirdparty/squish/squish.cpp
 index 1d22a64ad6..086ba11cd0 100644
 --- a/thirdparty/squish/squish.cpp
 +++ b/thirdparty/squish/squish.cpp
@@ -135,7 +135,15 @@ void Decompress( u8* rgba, void const* block, int flags )
         colourBlock = reinterpret_cast< u8 const* >( block ) + 8;
     // decompress colour
 -    DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
 +    // -- GODOT start --
 +    //DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
 +    if(( flags & ( kBc4 ) ) != 0)
 +        DecompressColourBc4( rgba, colourBlock);
 +    else if(( flags & ( kBc5 ) ) != 0)
 +        DecompressColourBc5( rgba, colourBlock);
 +    else
 +        DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
 +    // -- GODOT end --
     // decompress alpha separately if necessary
     if( ( flags & kDxt3 ) != 0 )
--- a/thirdparty/squish/rangefit.cpp
+++ b/thirdparty/squish/rangefit.cpp
@ -1,201 +0,0 @@
 /* -----------------------------------------------------------------------------
    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:
    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #include "rangefit.h"
 #include "colourset.h"
 #include "colourblock.h"
 #include <cfloat>
 namespace squish {
 RangeFit::RangeFit( ColourSet const* colours, int flags, float* metric )
  : ColourFit( colours, flags )
 {
    // initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
    if( metric )
        m_metric = Vec3( metric[0], metric[1], metric[2] );
    else
        m_metric = Vec3( 1.0f );
    // initialise the best error
    m_besterror = FLT_MAX;
    // cache some values
    int const count = m_colours->GetCount();
    Vec3 const* values = m_colours->GetPoints();
    float const* weights = m_colours->GetWeights();
    // get the covariance matrix
    Sym3x3 covariance = ComputeWeightedCovariance( count, values, weights );
    // compute the principle component
    Vec3 principle = ComputePrincipleComponent( covariance );
    // get the min and max range as the codebook endpoints
    Vec3 start( 0.0f );
    Vec3 end( 0.0f );
    if( count > 0 )
    {
        float min, max;
        // compute the range
        start = end = values[0];
        min = max = Dot( values[0], principle );
        for( int i = 1; i < count; ++i )
        {
            float val = Dot( values[i], principle );
            if( val < min )
            {
                start = values[i];
                min = val;
            }
            else if( val > max )
            {
                end = values[i];
                max = val;
            }
        }
    }
    // clamp the output to [0, 1]
    Vec3 const one( 1.0f );
    Vec3 const zero( 0.0f );
    start = Min( one, Max( zero, start ) );
    end = Min( one, Max( zero, end ) );
    // clamp to the grid and save
    Vec3 const grid( 31.0f, 63.0f, 31.0f );
    Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
    Vec3 const half( 0.5f );
    m_start = Truncate( grid*start + half )*gridrcp;
    m_end = Truncate( grid*end + half )*gridrcp;
 }
 void RangeFit::Compress3( void* block )
 {
    // cache some values
    int const count = m_colours->GetCount();
    Vec3 const* values = m_colours->GetPoints();
    // create a codebook
    Vec3 codes[3];
    codes[0] = m_start;
    codes[1] = m_end;
    codes[2] = 0.5f*m_start + 0.5f*m_end;
    // match each point to the closest code
    u8 closest[16];
    float error = 0.0f;
    for( int i = 0; i < count; ++i )
    {
        // find the closest code
        float dist = FLT_MAX;
        int idx = 0;
        for( int j = 0; j < 3; ++j )
        {
            float d = LengthSquared( m_metric*( values[i] - codes[j] ) );
            if( d < dist )
            {
                dist = d;
                idx = j;
            }
        }
        // save the index
        closest[i] = ( u8 )idx;
        // accumulate the error
        error += dist;
    }
    // save this scheme if it wins
    if( error < m_besterror )
    {
        // remap the indices
        u8 indices[16];
        m_colours->RemapIndices( closest, indices );
        // save the block
        WriteColourBlock3( m_start, m_end, indices, block );
        // save the error
        m_besterror = error;
    }
 }
 void RangeFit::Compress4( void* block )
 {
    // cache some values
    int const count = m_colours->GetCount();
    Vec3 const* values = m_colours->GetPoints();
    // create a codebook
    Vec3 codes[4];
    codes[0] = m_start;
    codes[1] = m_end;
    codes[2] = ( 2.0f/3.0f )*m_start + ( 1.0f/3.0f )*m_end;
    codes[3] = ( 1.0f/3.0f )*m_start + ( 2.0f/3.0f )*m_end;
    // match each point to the closest code
    u8 closest[16];
    float error = 0.0f;
    for( int i = 0; i < count; ++i )
    {
        // find the closest code
        float dist = FLT_MAX;
        int idx = 0;
        for( int j = 0; j < 4; ++j )
        {
            float d = LengthSquared( m_metric*( values[i] - codes[j] ) );
            if( d < dist )
            {
                dist = d;
                idx = j;
            }
        }
        // save the index
        closest[i] = ( u8 )idx;
        // accumulate the error
        error += dist;
    }
    // save this scheme if it wins
    if( error < m_besterror )
    {
        // remap the indices
        u8 indices[16];
        m_colours->RemapIndices( closest, indices );
        // save the block
        WriteColourBlock4( m_start, m_end, indices, block );
        // save the error
        m_besterror = error;
    }
 }
 } // namespace squish
--- a/thirdparty/squish/rangefit.h
+++ b/thirdparty/squish/rangefit.h
@ -1,54 +0,0 @@
 /* -----------------------------------------------------------------------------
    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:
    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #ifndef SQUISH_RANGEFIT_H
 #define SQUISH_RANGEFIT_H
 #include "squish.h"
 #include "colourfit.h"
 #include "maths.h"
 namespace squish {
 class ColourSet;
 class RangeFit : public ColourFit
 {
 public:
    RangeFit( ColourSet const* colours, int flags, float* metric );
 private:
    virtual void Compress3( void* block );
    virtual void Compress4( void* block );
    Vec3 m_metric;
    Vec3 m_start;
    Vec3 m_end;
    float m_besterror;
 };
 } // squish
 #endif // ndef SQUISH_RANGEFIT_H
--- a/thirdparty/squish/simd.h
+++ b/thirdparty/squish/simd.h
@ -1,40 +0,0 @@
 /* -----------------------------------------------------------------------------
    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:
    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #ifndef SQUISH_SIMD_H
 #define SQUISH_SIMD_H
 #include "maths.h"
 #if SQUISH_USE_ALTIVEC
 #include "simd_ve.h"
 #elif SQUISH_USE_SSE
 #include "simd_sse.h"
 #else
 #include "simd_float.h"
 #endif
 #endif // ndef SQUISH_SIMD_H
--- a/thirdparty/squish/simd_float.h
+++ b/thirdparty/squish/simd_float.h
@ -1,183 +0,0 @@
 /* -----------------------------------------------------------------------------
    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:
    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #ifndef SQUISH_SIMD_FLOAT_H
 #define SQUISH_SIMD_FLOAT_H
 #include <algorithm>
 namespace squish {
 #define VEC4_CONST( X ) Vec4( X )
 class Vec4
 {
 public:
    typedef Vec4 const& Arg;
    Vec4() {}
    explicit Vec4( float s )
      : m_x( s ),
        m_y( s ),
        m_z( s ),
        m_w( s )
    {
    }
    Vec4( float x, float y, float z, float w )
      : m_x( x ),
        m_y( y ),
        m_z( z ),
        m_w( w )
    {
    }
    Vec3 GetVec3() const
    {
        return Vec3( m_x, m_y, m_z );
    }
    Vec4 SplatX() const { return Vec4( m_x ); }
    Vec4 SplatY() const { return Vec4( m_y ); }
    Vec4 SplatZ() const { return Vec4( m_z ); }
    Vec4 SplatW() const { return Vec4( m_w ); }
    Vec4& operator+=( Arg v )
    {
        m_x += v.m_x;
        m_y += v.m_y;
        m_z += v.m_z;
        m_w += v.m_w;
        return *this;
    }
    Vec4& operator-=( Arg v )
    {
        m_x -= v.m_x;
        m_y -= v.m_y;
        m_z -= v.m_z;
        m_w -= v.m_w;
        return *this;
    }
    Vec4& operator*=( Arg v )
    {
        m_x *= v.m_x;
        m_y *= v.m_y;
        m_z *= v.m_z;
        m_w *= v.m_w;
        return *this;
    }
    friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right  )
    {
        Vec4 copy( left );
        return copy += right;
    }
    friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right  )
    {
        Vec4 copy( left );
        return copy -= right;
    }
    friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right  )
    {
        Vec4 copy( left );
        return copy *= right;
    }
    //! Returns a*b + c
    friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
    {
        return a*b + c;
    }
    //! Returns -( a*b - c )
    friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
    {
        return c - a*b;
    }
    friend Vec4 Reciprocal( Vec4::Arg v )
    {
        return Vec4(
            1.0f/v.m_x,
            1.0f/v.m_y,
            1.0f/v.m_z,
            1.0f/v.m_w
        );
    }
    friend Vec4 Min( Vec4::Arg left, Vec4::Arg right )
    {
        return Vec4(
            std::min( left.m_x, right.m_x ),
            std::min( left.m_y, right.m_y ),
            std::min( left.m_z, right.m_z ),
            std::min( left.m_w, right.m_w )
        );
    }
    friend Vec4 Max( Vec4::Arg left, Vec4::Arg right )
    {
        return Vec4(
            std::max( left.m_x, right.m_x ),
            std::max( left.m_y, right.m_y ),
            std::max( left.m_z, right.m_z ),
            std::max( left.m_w, right.m_w )
        );
    }
    friend Vec4 Truncate( Vec4::Arg v )
    {
        return Vec4(
            v.m_x > 0.0f ? std::floor( v.m_x ) : std::ceil( v.m_x ),
            v.m_y > 0.0f ? std::floor( v.m_y ) : std::ceil( v.m_y ),
            v.m_z > 0.0f ? std::floor( v.m_z ) : std::ceil( v.m_z ),
            v.m_w > 0.0f ? std::floor( v.m_w ) : std::ceil( v.m_w )
        );
    }
    friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right )
    {
        return left.m_x < right.m_x
            || left.m_y < right.m_y
            || left.m_z < right.m_z
            || left.m_w < right.m_w;
    }
 private:
    float m_x;
    float m_y;
    float m_z;
    float m_w;
 };
 } // namespace squish
 #endif // ndef SQUISH_SIMD_FLOAT_H
--- a/thirdparty/squish/simd_sse.h
+++ b/thirdparty/squish/simd_sse.h
@ -1,180 +0,0 @@
 /* -----------------------------------------------------------------------------
    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:
    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #ifndef SQUISH_SIMD_SSE_H
 #define SQUISH_SIMD_SSE_H
 #include <xmmintrin.h>
 #if ( SQUISH_USE_SSE > 1 )
 #include <emmintrin.h>
 #endif
 #define SQUISH_SSE_SPLAT( a )                                        \
    ( ( a ) | ( ( a ) << 2 ) | ( ( a ) << 4 ) | ( ( a ) << 6 ) )
 #define SQUISH_SSE_SHUF( x, y, z, w )                                \
    ( ( x ) | ( ( y ) << 2 ) | ( ( z ) << 4 ) | ( ( w ) << 6 ) )
 namespace squish {
 #define VEC4_CONST( X ) Vec4( X )
 class Vec4
 {
 public:
    typedef Vec4 const& Arg;
    Vec4() {}
    explicit Vec4( __m128 v ) : m_v( v ) {}
    Vec4( Vec4 const& arg ) : m_v( arg.m_v ) {}
    Vec4& operator=( Vec4 const& arg )
    {
        m_v = arg.m_v;
        return *this;
    }
    explicit Vec4( float s ) : m_v( _mm_set1_ps( s ) ) {}
    Vec4( float x, float y, float z, float w ) : m_v( _mm_setr_ps( x, y, z, w ) ) {}
    Vec3 GetVec3() const
    {
 #ifdef __GNUC__
        __attribute__ ((__aligned__ (16))) float c[4];
 #else
        __declspec(align(16)) float c[4];
 #endif
        _mm_store_ps( c, m_v );
        return Vec3( c[0], c[1], c[2] );
    }
    Vec4 SplatX() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 0 ) ) ); }
    Vec4 SplatY() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 1 ) ) ); }
    Vec4 SplatZ() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 2 ) ) ); }
    Vec4 SplatW() const { return Vec4( _mm_shuffle_ps( m_v, m_v, SQUISH_SSE_SPLAT( 3 ) ) ); }
    Vec4& operator+=( Arg v )
    {
        m_v = _mm_add_ps( m_v, v.m_v );
        return *this;
    }
    Vec4& operator-=( Arg v )
    {
        m_v = _mm_sub_ps( m_v, v.m_v );
        return *this;
    }
    Vec4& operator*=( Arg v )
    {
        m_v = _mm_mul_ps( m_v, v.m_v );
        return *this;
    }
    friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right  )
    {
        return Vec4( _mm_add_ps( left.m_v, right.m_v ) );
    }
    friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right  )
    {
        return Vec4( _mm_sub_ps( left.m_v, right.m_v ) );
    }
    friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right  )
    {
        return Vec4( _mm_mul_ps( left.m_v, right.m_v ) );
    }
    //! Returns a*b + c
    friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
    {
        return Vec4( _mm_add_ps( _mm_mul_ps( a.m_v, b.m_v ), c.m_v ) );
    }
    //! Returns -( a*b - c )
    friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
    {
        return Vec4( _mm_sub_ps( c.m_v, _mm_mul_ps( a.m_v, b.m_v ) ) );
    }
    friend Vec4 Reciprocal( Vec4::Arg v )
    {
        // get the reciprocal estimate
        __m128 estimate = _mm_rcp_ps( v.m_v );
        // one round of Newton-Rhaphson refinement
        __m128 diff = _mm_sub_ps( _mm_set1_ps( 1.0f ), _mm_mul_ps( estimate, v.m_v ) );
        return Vec4( _mm_add_ps( _mm_mul_ps( diff, estimate ), estimate ) );
    }
    friend Vec4 Min( Vec4::Arg left, Vec4::Arg right )
    {
        return Vec4( _mm_min_ps( left.m_v, right.m_v ) );
    }
    friend Vec4 Max( Vec4::Arg left, Vec4::Arg right )
    {
        return Vec4( _mm_max_ps( left.m_v, right.m_v ) );
    }
    friend Vec4 Truncate( Vec4::Arg v )
    {
 #if ( SQUISH_USE_SSE == 1 )
        // convert to ints
        __m128 input = v.m_v;
        __m64 lo = _mm_cvttps_pi32( input );
        __m64 hi = _mm_cvttps_pi32( _mm_movehl_ps( input, input ) );
        // convert to floats
        __m128 part = _mm_movelh_ps( input, _mm_cvtpi32_ps( input, hi ) );
        __m128 truncated = _mm_cvtpi32_ps( part, lo );
        // clear out the MMX multimedia state to allow FP calls later
        _mm_empty();
        return Vec4( truncated );
 #else
        // use SSE2 instructions
        return Vec4( _mm_cvtepi32_ps( _mm_cvttps_epi32( v.m_v ) ) );
 #endif
    }
    friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right )
    {
        __m128 bits = _mm_cmplt_ps( left.m_v, right.m_v );
        int value = _mm_movemask_ps( bits );
        return value != 0;
    }
 private:
    __m128 m_v;
 };
 } // namespace squish
 #endif // ndef SQUISH_SIMD_SSE_H
--- a/thirdparty/squish/simd_ve.h
+++ b/thirdparty/squish/simd_ve.h
@ -1,166 +0,0 @@
 /* -----------------------------------------------------------------------------
    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:
    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #ifndef SQUISH_SIMD_VE_H
 #define SQUISH_SIMD_VE_H
 #include <altivec.h>
 #undef bool
 namespace squish {
 #define VEC4_CONST( X ) Vec4( ( vector float ){ X } )
 class Vec4
 {
 public:
    typedef Vec4 Arg;
    Vec4() {}
    explicit Vec4( vector float v ) : m_v( v ) {}
    Vec4( Vec4 const& arg ) : m_v( arg.m_v ) {}
    Vec4& operator=( Vec4 const& arg )
    {
        m_v = arg.m_v;
        return *this;
    }
    explicit Vec4( float s )
    {
        union { vector float v; float c[4]; } u;
        u.c[0] = s;
        u.c[1] = s;
        u.c[2] = s;
        u.c[3] = s;
        m_v = u.v;
    }
    Vec4( float x, float y, float z, float w )
    {
        union { vector float v; float c[4]; } u;
        u.c[0] = x;
        u.c[1] = y;
        u.c[2] = z;
        u.c[3] = w;
        m_v = u.v;
    }
    Vec3 GetVec3() const
    {
        union { vector float v; float c[4]; } u;
        u.v = m_v;
        return Vec3( u.c[0], u.c[1], u.c[2] );
    }
    Vec4 SplatX() const { return Vec4( vec_splat( m_v, 0 ) ); }
    Vec4 SplatY() const { return Vec4( vec_splat( m_v, 1 ) ); }
    Vec4 SplatZ() const { return Vec4( vec_splat( m_v, 2 ) ); }
    Vec4 SplatW() const { return Vec4( vec_splat( m_v, 3 ) ); }
    Vec4& operator+=( Arg v )
    {
        m_v = vec_add( m_v, v.m_v );
        return *this;
    }
    Vec4& operator-=( Arg v )
    {
        m_v = vec_sub( m_v, v.m_v );
        return *this;
    }
    Vec4& operator*=( Arg v )
    {
        m_v = vec_madd( m_v, v.m_v, ( vector float ){ -0.0f } );
        return *this;
    }
    friend Vec4 operator+( Vec4::Arg left, Vec4::Arg right  )
    {
        return Vec4( vec_add( left.m_v, right.m_v ) );
    }
    friend Vec4 operator-( Vec4::Arg left, Vec4::Arg right  )
    {
        return Vec4( vec_sub( left.m_v, right.m_v ) );
    }
    friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right  )
    {
        return Vec4( vec_madd( left.m_v, right.m_v, ( vector float ){ -0.0f } ) );
    }
    //! Returns a*b + c
    friend Vec4 MultiplyAdd( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
    {
        return Vec4( vec_madd( a.m_v, b.m_v, c.m_v ) );
    }
    //! Returns -( a*b - c )
    friend Vec4 NegativeMultiplySubtract( Vec4::Arg a, Vec4::Arg b, Vec4::Arg c )
    {
        return Vec4( vec_nmsub( a.m_v, b.m_v, c.m_v ) );
    }
    friend Vec4 Reciprocal( Vec4::Arg v )
    {
        // get the reciprocal estimate
        vector float estimate = vec_re( v.m_v );
        // one round of Newton-Rhaphson refinement
        vector float diff = vec_nmsub( estimate, v.m_v, ( vector float ){ 1.0f } );
        return Vec4( vec_madd( diff, estimate, estimate ) );
    }
    friend Vec4 Min( Vec4::Arg left, Vec4::Arg right )
    {
        return Vec4( vec_min( left.m_v, right.m_v ) );
    }
    friend Vec4 Max( Vec4::Arg left, Vec4::Arg right )
    {
        return Vec4( vec_max( left.m_v, right.m_v ) );
    }
    friend Vec4 Truncate( Vec4::Arg v )
    {
        return Vec4( vec_trunc( v.m_v ) );
    }
    friend bool CompareAnyLessThan( Vec4::Arg left, Vec4::Arg right )
    {
        return vec_any_lt( left.m_v, right.m_v ) != 0;
    }
 private:
    vector float m_v;
 };
 } // namespace squish
 #endif // ndef SQUISH_SIMD_VE_H
--- a/thirdparty/squish/singlecolourfit.cpp
+++ b/thirdparty/squish/singlecolourfit.cpp
@ -1,172 +0,0 @@
 /* -----------------------------------------------------------------------------
    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:
    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #include "singlecolourfit.h"
 #include "colourset.h"
 #include "colourblock.h"
 namespace squish {
 struct SourceBlock
 {
    u8 start;
    u8 end;
    u8 error;
 };
 struct SingleColourLookup
 {
    SourceBlock sources[2];
 };
 #include "singlecolourlookup.inl"
 static int FloatToInt( float a, int limit )
 {
    // use ANSI round-to-zero behaviour to get round-to-nearest
    int i = ( int )( a + 0.5f );
    // clamp to the limit
    if( i < 0 )
        i = 0;
    else if( i > limit )
        i = limit;
    // done
    return i;
 }
 SingleColourFit::SingleColourFit( ColourSet const* colours, int flags )
  : ColourFit( colours, flags )
 {
    // grab the single colour
    Vec3 const* values = m_colours->GetPoints();
    m_colour[0] = ( u8 )FloatToInt( 255.0f*values->X(), 255 );
    m_colour[1] = ( u8 )FloatToInt( 255.0f*values->Y(), 255 );
    m_colour[2] = ( u8 )FloatToInt( 255.0f*values->Z(), 255 );
    // initialise the best error
    m_besterror = INT_MAX;
 }
 void SingleColourFit::Compress3( void* block )
 {
    // build the table of lookups
    SingleColourLookup const* const lookups[] =
    {
        lookup_5_3,
        lookup_6_3,
        lookup_5_3
    };
    // find the best end-points and index
    ComputeEndPoints( lookups );
    // build the block if we win
    if( m_error < m_besterror )
    {
        // remap the indices
        u8 indices[16];
        m_colours->RemapIndices( &m_index, indices );
        // save the block
        WriteColourBlock3( m_start, m_end, indices, block );
        // save the error
        m_besterror = m_error;
    }
 }
 void SingleColourFit::Compress4( void* block )
 {
    // build the table of lookups
    SingleColourLookup const* const lookups[] =
    {
        lookup_5_4,
        lookup_6_4,
        lookup_5_4
    };
    // find the best end-points and index
    ComputeEndPoints( lookups );
    // build the block if we win
    if( m_error < m_besterror )
    {
        // remap the indices
        u8 indices[16];
        m_colours->RemapIndices( &m_index, indices );
        // save the block
        WriteColourBlock4( m_start, m_end, indices, block );
        // save the error
        m_besterror = m_error;
    }
 }
 void SingleColourFit::ComputeEndPoints( SingleColourLookup const* const* lookups )
 {
    // check each index combination (endpoint or intermediate)
    m_error = INT_MAX;
    for( int index = 0; index < 2; ++index )
    {
        // check the error for this codebook index
        SourceBlock const* sources[3];
        int error = 0;
        for( int channel = 0; channel < 3; ++channel )
        {
            // grab the lookup table and index for this channel
            SingleColourLookup const* lookup = lookups[channel];
            int target = m_colour[channel];
            // store a pointer to the source for this channel
            sources[channel] = lookup[target].sources + index;
            // accumulate the error
            int diff = sources[channel]->error;
            error += diff*diff;
        }
        // keep it if the error is lower
        if( error < m_error )
        {
            m_start = Vec3(
                ( float )sources[0]->start/31.0f,
                ( float )sources[1]->start/63.0f,
                ( float )sources[2]->start/31.0f
            );
            m_end = Vec3(
                ( float )sources[0]->end/31.0f,
                ( float )sources[1]->end/63.0f,
                ( float )sources[2]->end/31.0f
            );
            m_index = ( u8 )( 2*index );
            m_error = error;
        }
    }
 }
 } // namespace squish
--- a/thirdparty/squish/singlecolourfit.h
+++ b/thirdparty/squish/singlecolourfit.h
@ -1,58 +0,0 @@
 /* -----------------------------------------------------------------------------
    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:
    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #ifndef SQUISH_SINGLECOLOURFIT_H
 #define SQUISH_SINGLECOLOURFIT_H
 #include "squish.h"
 #include "colourfit.h"
 namespace squish {
 class ColourSet;
 struct SingleColourLookup;
 class SingleColourFit : public ColourFit
 {
 public:
    SingleColourFit( ColourSet const* colours, int flags );
 private:
    virtual void Compress3( void* block );
    virtual void Compress4( void* block );
    void ComputeEndPoints( SingleColourLookup const* const* lookups );
    u8 m_colour[3];
    Vec3 m_start;
    Vec3 m_end;
    u8 m_index;
    int m_error;
    int m_besterror;
 };
 } // namespace squish
 #endif // ndef SQUISH_SINGLECOLOURFIT_H
--- a/thirdparty/squish/singlecolourlookup.inl
+++ b/thirdparty/squish/singlecolourlookup.inl
--- a/thirdparty/squish/squish.cpp
+++ b/thirdparty/squish/squish.cpp
@ -1,411 +0,0 @@
 /* -----------------------------------------------------------------------------
    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:
    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #include <string.h>
 #include "squish.h"
 #include "colourset.h"
 #include "maths.h"
 #include "rangefit.h"
 #include "clusterfit.h"
 #include "colourblock.h"
 #include "alpha.h"
 #include "singlecolourfit.h"
 namespace squish {
 static int FixFlags( int flags )
 {
    // grab the flag bits
    int method = flags & ( kDxt1 | kDxt3 | kDxt5 | kBc4 | kBc5 );
    int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit );
    int extra = flags & kWeightColourByAlpha;
    // set defaults
    if ( method != kDxt3
    &&   method != kDxt5
    &&   method != kBc4
    &&   method != kBc5 )
    {
        method = kDxt1;
    }
    if( fit != kColourRangeFit && fit != kColourIterativeClusterFit )
        fit = kColourClusterFit;
    // done
    return method | fit | extra;
 }
 void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric )
 {
    // fix any bad flags
    flags = FixFlags( flags );
    if ( ( flags & ( kBc4 | kBc5 ) ) != 0 )
    {
        u8 alpha[16*4];
        for( int i = 0; i < 16; ++i )
        {
            alpha[i*4 + 3] = rgba[i*4 + 0]; // copy R to A
        }
        u8* rBlock = reinterpret_cast< u8* >( block );
        CompressAlphaDxt5( alpha, mask, rBlock );
        if ( ( flags & ( kBc5 ) ) != 0 )
        {
            for( int i = 0; i < 16; ++i )
            {
                alpha[i*4 + 3] = rgba[i*4 + 1]; // copy G to A
            }
            u8* gBlock = reinterpret_cast< u8* >( block ) + 8;
            CompressAlphaDxt5( alpha, mask, gBlock );
        }
        return;
    }
    // get the block locations
    void* colourBlock = block;
    void* alphaBlock = block;
    if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
        colourBlock = reinterpret_cast< u8* >( block ) + 8;
    // create the minimal point set
    ColourSet colours( rgba, mask, flags );
    // check the compression type and compress colour
    if( colours.GetCount() == 1 )
    {
        // always do a single colour fit
        SingleColourFit fit( &colours, flags );
        fit.Compress( colourBlock );
    }
    else if( ( flags & kColourRangeFit ) != 0 || colours.GetCount() == 0 )
    {
        // do a range fit
        RangeFit fit( &colours, flags, metric );
        fit.Compress( colourBlock );
    }
    else
    {
        // default to a cluster fit (could be iterative or not)
        ClusterFit fit( &colours, flags, metric );
        fit.Compress( colourBlock );
    }
    // compress alpha separately if necessary
    if( ( flags & kDxt3 ) != 0 )
        CompressAlphaDxt3( rgba, mask, alphaBlock );
    else if( ( flags & kDxt5 ) != 0 )
        CompressAlphaDxt5( rgba, mask, alphaBlock );
 }
 void Decompress( u8* rgba, void const* block, int flags )
 {
    // fix any bad flags
    flags = FixFlags( flags );
    // get the block locations
    void const* colourBlock = block;
    void const* alphaBlock = block;
    if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
        colourBlock = reinterpret_cast< u8 const* >( block ) + 8;
    // decompress colour
    // -- GODOT start --
    //DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
    if(( flags & ( kBc4 ) ) != 0)
        DecompressColourBc4( rgba, colourBlock);
    else if(( flags & ( kBc5 ) ) != 0)
        DecompressColourBc5( rgba, colourBlock);
    else
        DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
    // -- GODOT end --
    // decompress alpha separately if necessary
    if( ( flags & kDxt3 ) != 0 )
        DecompressAlphaDxt3( rgba, alphaBlock );
    else if( ( flags & kDxt5 ) != 0 )
        DecompressAlphaDxt5( rgba, alphaBlock );
 }
 int GetStorageRequirements( int width, int height, int flags )
 {
    // fix any bad flags
    flags = FixFlags( flags );
    // compute the storage requirements
    int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 );
    int blocksize = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
    return blockcount*blocksize;
 }
 void CopyRGBA( u8 const* source, u8* dest, int flags )
 {
    if (flags & kSourceBGRA)
    {
        // convert from bgra to rgba
        dest[0] = source[2];
        dest[1] = source[1];
        dest[2] = source[0];
        dest[3] = source[3];
    }
    else
    {
        for( int i = 0; i < 4; ++i )
            *dest++ = *source++;
    }
 }
 void CompressImage( u8 const* rgba, int width, int height, int pitch, void* blocks, int flags, float* metric )
 {
    // fix any bad flags
    flags = FixFlags( flags );
    // loop over blocks
 #ifdef SQUISH_USE_OPENMP
 #   pragma omp parallel for
 #endif
    for( int y = 0; y < height; y += 4 )
    {
        // initialise the block output
        u8* targetBlock = reinterpret_cast< u8* >( blocks );
        int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
        targetBlock += ( (y / 4) * ( (width + 3) / 4) ) * bytesPerBlock;
        for( int x = 0; x < width; x += 4 )
        {
            // build the 4x4 block of pixels
            u8 sourceRgba[16*4];
            u8* targetPixel = sourceRgba;
            int mask = 0;
            for( int py = 0; py < 4; ++py )
            {
                for( int px = 0; px < 4; ++px )
                {
                    // get the source pixel in the image
                    int sx = x + px;
                    int sy = y + py;
                    // enable if we're in the image
                    if( sx < width && sy < height )
                    {
                        // copy the rgba value
                        u8 const* sourcePixel = rgba + pitch*sy + 4*sx;
                        CopyRGBA(sourcePixel, targetPixel, flags);
                        // enable this pixel
                        mask |= ( 1 << ( 4*py + px ) );
                    }
                    // advance to the next pixel
                    targetPixel += 4;
                }
            }
            // compress it into the output
            CompressMasked( sourceRgba, mask, targetBlock, flags, metric );
            // advance
            targetBlock += bytesPerBlock;
        }
    }
 }
 void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric )
 {
    CompressImage(rgba, width, height, width*4, blocks, flags, metric);
 }
 void DecompressImage( u8* rgba, int width, int height, int pitch, void const* blocks, int flags )
 {
    // fix any bad flags
    flags = FixFlags( flags );
    // loop over blocks
 #ifdef SQUISH_USE_OPENMP
 #   pragma omp parallel for
 #endif
    for( int y = 0; y < height; y += 4 )
    {
        // initialise the block input
        u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks );
        int bytesPerBlock = ( ( flags & ( kDxt1 | kBc4 ) ) != 0 ) ? 8 : 16;
        sourceBlock += ( (y / 4) * ( (width + 3) / 4) ) * bytesPerBlock;
        for( int x = 0; x < width; x += 4 )
        {
            // decompress the block
            u8 targetRgba[4*16];
            Decompress( targetRgba, sourceBlock, flags );
            // write the decompressed pixels to the correct image locations
            u8 const* sourcePixel = targetRgba;
            for( int py = 0; py < 4; ++py )
            {
                for( int px = 0; px < 4; ++px )
                {
                    // get the target location
                    int sx = x + px;
                    int sy = y + py;
                    // write if we're in the image
                    if( sx < width && sy < height )
                    {
                        // copy the rgba value
                        u8* targetPixel = rgba + pitch*sy + 4*sx;
                        CopyRGBA(sourcePixel, targetPixel, flags);
                    }
                    // advance to the next pixel
                    sourcePixel += 4;
                }
            }
            // advance
            sourceBlock += bytesPerBlock;
        }
    }
 }
 void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags )
 {
    DecompressImage( rgba, width, height, width*4, blocks, flags );
 }
 static double ErrorSq(double x, double y)
 {
    return (x - y) * (x - y);
 }
 static void ComputeBlockWMSE(u8 const *original, u8 const *compressed, unsigned int w, unsigned int h, double &cmse, double &amse)
 {
    // Computes the MSE for the block and weights it by the variance of the original block.
    // If the variance of the original block is less than 4 (i.e. a standard deviation of 1 per channel)
    // then the block is close to being a single colour. Quantisation errors in single colour blocks
    // are easier to see than similar errors in blocks that contain more colours, particularly when there
    // are many such blocks in a large area (eg a blue sky background) as they cause banding.  Given that
    // banding is easier to see than small errors in "complex" blocks, we weight the errors by a factor
    // of 5. This implies that images with large, single colour areas will have a higher potential WMSE
    // than images with lots of detail.
    cmse = amse = 0;
    unsigned int sum_p[4];  // per channel sum of pixels
    unsigned int sum_p2[4]; // per channel sum of pixels squared
    memset(sum_p, 0, sizeof(sum_p));
    memset(sum_p2, 0, sizeof(sum_p2));
    for( unsigned int py = 0; py < 4; ++py )
    {
        for( unsigned int px = 0; px < 4; ++px )
        {
            if( px < w && py < h )
            {
                double pixelCMSE = 0;
                for( int i = 0; i < 3; ++i )
                {
                    pixelCMSE += ErrorSq(original[i], compressed[i]);
                    sum_p[i] += original[i];
                    sum_p2[i] += (unsigned int)original[i]*original[i];
                }
                if( original[3] == 0 && compressed[3] == 0 )
                    pixelCMSE = 0; // transparent in both, so colour is inconsequential
                amse += ErrorSq(original[3], compressed[3]);
                cmse += pixelCMSE;
                sum_p[3] += original[3];
                sum_p2[3] += (unsigned int)original[3]*original[3];
            }
            original += 4;
            compressed += 4;
        }
    }
    unsigned int variance = 0;
    for( int i = 0; i < 4; ++i )
        variance += w*h*sum_p2[i] - sum_p[i]*sum_p[i];
    if( variance < 4 * w * w * h * h )
    {
        amse *= 5;
        cmse *= 5;
    }
 }
 void ComputeMSE( u8 const *rgba, int width, int height, int pitch, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE )
 {
    // fix any bad flags
    flags = FixFlags( flags );
    colourMSE = alphaMSE = 0;
    // initialise the block input
    squish::u8 const* sourceBlock = dxt;
    int bytesPerBlock = ( ( flags & squish::kDxt1 ) != 0 ) ? 8 : 16;
    // loop over blocks
    for( int y = 0; y < height; y += 4 )
    {
        for( int x = 0; x < width; x += 4 )
        {
            // decompress the block
            u8 targetRgba[4*16];
            Decompress( targetRgba, sourceBlock, flags );
            u8 const* sourcePixel = targetRgba;
            // copy across to a similar pixel block
            u8 originalRgba[4*16];
            u8* originalPixel = originalRgba;
            for( int py = 0; py < 4; ++py )
            {
                for( int px = 0; px < 4; ++px )
                {
                    int sx = x + px;
                    int sy = y + py;
                    if( sx < width && sy < height )
                    {
                        u8 const* targetPixel = rgba + pitch*sy + 4*sx;
                        CopyRGBA(targetPixel, originalPixel, flags);
                    }
                    sourcePixel += 4;
                    originalPixel += 4;
                }
            }
            // compute the weighted MSE of the block
            double blockCMSE, blockAMSE;
            ComputeBlockWMSE(originalRgba, targetRgba, std::min(4, width - x), std::min(4, height - y), blockCMSE, blockAMSE);
            colourMSE += blockCMSE;
            alphaMSE += blockAMSE;
            // advance
            sourceBlock += bytesPerBlock;
        }
    }
    colourMSE /= (width * height * 3);
    alphaMSE /= (width * height);
 }
 void ComputeMSE( u8 const *rgba, int width, int height, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE )
 {
    ComputeMSE(rgba, width, height, width*4, dxt, flags, colourMSE, alphaMSE);
 }
 } // namespace squish
--- a/thirdparty/squish/squish.h
+++ b/thirdparty/squish/squish.h
@ -1,309 +0,0 @@
 /* -----------------------------------------------------------------------------
    Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    "Software"), to deal in the Software without restriction, including
    without limitation the rights to use, copy, modify, merge, publish,
    distribute, sublicense, and/or sell copies of the Software, and to
    permit persons to whom the Software is furnished to do so, subject to
    the following conditions:
    The above copyright notice and this permission notice shall be included
    in all copies or substantial portions of the Software.
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   -------------------------------------------------------------------------- */
 #ifndef SQUISH_H
 #define SQUISH_H
 //! All squish API functions live in this namespace.
 namespace squish {
 // -----------------------------------------------------------------------------
 //! Typedef a quantity that is a single unsigned byte.
 typedef unsigned char u8;
 // -----------------------------------------------------------------------------
 enum
 {
    //! Use DXT1 compression.
    kDxt1 = ( 1 << 0 ),
    //! Use DXT3 compression.
    kDxt3 = ( 1 << 1 ),
    //! Use DXT5 compression.
    kDxt5 = ( 1 << 2 ),
    //! Use BC4 compression.
    kBc4 = ( 1 << 3 ),
    //! Use BC5 compression.
    kBc5 = ( 1 << 4 ),
    //! Use a slow but high quality colour compressor (the default).
    kColourClusterFit = ( 1 << 5 ),
    //! Use a fast but low quality colour compressor.
    kColourRangeFit = ( 1 << 6 ),
    //! Weight the colour by alpha during cluster fit (disabled by default).
    kWeightColourByAlpha = ( 1 << 7 ),
    //! Use a very slow but very high quality colour compressor.
    kColourIterativeClusterFit = ( 1 << 8 ),
    //! Source is BGRA rather than RGBA
    kSourceBGRA = ( 1 << 9 )
 };
 // -----------------------------------------------------------------------------
 /*! @brief Compresses a 4x4 block of pixels.
    @param rgba   The rgba values of the 16 source pixels.
    @param mask   The valid pixel mask.
    @param block  Storage for the compressed DXT block.
    @param flags  Compression flags.
    @param metric An optional perceptual metric.
    The source pixels should be presented as a contiguous array of 16 rgba
    values, with each component as 1 byte each. In memory this should be:
        { r1, g1, b1, a1, .... , r16, g16, b16, a16 }
    The mask parameter enables only certain pixels within the block. The lowest
    bit enables the first pixel and so on up to the 16th bit. Bits beyond the
    16th bit are ignored. Pixels that are not enabled are allowed to take
    arbitrary colours in the output block. An example of how this can be used
    is in the CompressImage function to disable pixels outside the bounds of
    the image when the width or height is not divisible by 4.
    The flags parameter should specify kDxt1, kDxt3, kDxt5, kBc4, or kBc5 compression,
    however, DXT1 will be used by default if none is specified. When using DXT1
    compression, 8 bytes of storage are required for the compressed DXT block.
    DXT3 and DXT5 compression require 16 bytes of storage per block.
    The flags parameter can also specify a preferred colour compressor to use
    when fitting the RGB components of the data. Possible colour compressors
    are: kColourClusterFit (the default), kColourRangeFit (very fast, low
    quality) or kColourIterativeClusterFit (slowest, best quality).
    When using kColourClusterFit or kColourIterativeClusterFit, an additional
    flag can be specified to weight the importance of each pixel by its alpha
    value. For images that are rendered using alpha blending, this can
    significantly increase the perceived quality.
    The metric parameter can be used to weight the relative importance of each
    colour channel, or pass NULL to use the default uniform weight of
    { 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that
    allowed either uniform or "perceptual" weights with the fixed values
    { 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a
    contiguous array of 3 floats.
 */
 void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric = 0 );
 // -----------------------------------------------------------------------------
 /*! @brief Compresses a 4x4 block of pixels.
    @param rgba   The rgba values of the 16 source pixels.
    @param block  Storage for the compressed DXT block.
    @param flags  Compression flags.
    @param metric An optional perceptual metric.
    The source pixels should be presented as a contiguous array of 16 rgba
    values, with each component as 1 byte each. In memory this should be:
        { r1, g1, b1, a1, .... , r16, g16, b16, a16 }
    The flags parameter should specify kDxt1, kDxt3, kDxt5, kBc4, or kBc5 compression,
    however, DXT1 will be used by default if none is specified. When using DXT1
    compression, 8 bytes of storage are required for the compressed DXT block.
    DXT3 and DXT5 compression require 16 bytes of storage per block.
    The flags parameter can also specify a preferred colour compressor to use
    when fitting the RGB components of the data. Possible colour compressors
    are: kColourClusterFit (the default), kColourRangeFit (very fast, low
    quality) or kColourIterativeClusterFit (slowest, best quality).
    When using kColourClusterFit or kColourIterativeClusterFit, an additional
    flag can be specified to weight the importance of each pixel by its alpha
    value. For images that are rendered using alpha blending, this can
    significantly increase the perceived quality.
    The metric parameter can be used to weight the relative importance of each
    colour channel, or pass NULL to use the default uniform weight of
    { 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that
    allowed either uniform or "perceptual" weights with the fixed values
    { 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a
    contiguous array of 3 floats.
    This method is an inline that calls CompressMasked with a mask of 0xffff,
    provided for compatibility with older versions of squish.
 */
 inline void Compress( u8 const* rgba, void* block, int flags, float* metric = 0 )
 {
    CompressMasked( rgba, 0xffff, block, flags, metric );
 }
 // -----------------------------------------------------------------------------
 /*! @brief Decompresses a 4x4 block of pixels.
    @param rgba  Storage for the 16 decompressed pixels.
    @param block The compressed DXT block.
    @param flags Compression flags.
    The decompressed pixels will be written as a contiguous array of 16 rgba
    values, with each component as 1 byte each. In memory this is:
        { r1, g1, b1, a1, .... , r16, g16, b16, a16 }
    The flags parameter should specify kDxt1, kDxt3, kDxt5, kBc4, or kBc5 compression,
    however, DXT1 will be used by default if none is specified. All other flags
    are ignored.
 */
 void Decompress( u8* rgba, void const* block, int flags );
 // -----------------------------------------------------------------------------
 /*! @brief Computes the amount of compressed storage required.
    @param width  The width of the image.
    @param height The height of the image.
    @param flags  Compression flags.
    The flags parameter should specify kDxt1, kDxt3, kDxt5, kBc4, or kBc5 compression,
    however, DXT1 will be used by default if none is specified. All other flags
    are ignored.
    Most DXT images will be a multiple of 4 in each dimension, but this
    function supports arbitrary size images by allowing the outer blocks to
    be only partially used.
 */
 int GetStorageRequirements( int width, int height, int flags );
 // -----------------------------------------------------------------------------
 /*! @brief Compresses an image in memory.
    @param rgba   The pixels of the source.
    @param width  The width of the source image.
    @param height The height of the source image.
    @param pitch  The pitch of the source image.
    @param blocks Storage for the compressed output.
    @param flags  Compression flags.
    @param metric An optional perceptual metric.
    The source pixels should be presented as a contiguous array of width*height
    rgba values, with each component as 1 byte each. In memory this should be:
        { r1, g1, b1, a1, .... , rn, gn, bn, an } for n = width*height
    The flags parameter should specify kDxt1, kDxt3, kDxt5, kBc4, or kBc5 compression,
    however, DXT1 will be used by default if none is specified. When using DXT1
    compression, 8 bytes of storage are required for each compressed DXT block.
    DXT3 and DXT5 compression require 16 bytes of storage per block.
    The flags parameter can also specify a preferred colour compressor to use
    when fitting the RGB components of the data. Possible colour compressors
    are: kColourClusterFit (the default), kColourRangeFit (very fast, low
    quality) or kColourIterativeClusterFit (slowest, best quality).
    When using kColourClusterFit or kColourIterativeClusterFit, an additional
    flag can be specified to weight the importance of each pixel by its alpha
    value. For images that are rendered using alpha blending, this can
    significantly increase the perceived quality.
    The metric parameter can be used to weight the relative importance of each
    colour channel, or pass NULL to use the default uniform weight of
    { 1.0f, 1.0f, 1.0f }. This replaces the previous flag-based control that
    allowed either uniform or "perceptual" weights with the fixed values
    { 0.2126f, 0.7152f, 0.0722f }. If non-NULL, the metric should point to a
    contiguous array of 3 floats.
    Internally this function calls squish::CompressMasked for each block, which
    allows for pixels outside the image to take arbitrary values. The function
    squish::GetStorageRequirements can be called to compute the amount of memory
    to allocate for the compressed output.
    Note on compression quality: When compressing textures with
    libsquish it is recommended to apply a gamma-correction
    beforehand. This will reduce the blockiness in dark areas. The
    level of necessary gamma-correction is platform dependent. For
    example, a gamma correction with gamma = 0.5 before compression
    and gamma = 2.0 after decompression yields good results on the
    Windows platform but for other platforms like MacOS X a different
    gamma value may be more suitable.
 */
 void CompressImage( u8 const* rgba, int width, int height, int pitch, void* blocks, int flags, float* metric = 0 );
 void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric = 0 );
 // -----------------------------------------------------------------------------
 /*! @brief Decompresses an image in memory.
    @param rgba   Storage for the decompressed pixels.
    @param width  The width of the source image.
    @param height The height of the source image.
    @param pitch  The pitch of the decompressed pixels.
    @param blocks The compressed DXT blocks.
    @param flags  Compression flags.
    The decompressed pixels will be written as a contiguous array of width*height
    16 rgba values, with each component as 1 byte each. In memory this is:
        { r1, g1, b1, a1, .... , rn, gn, bn, an } for n = width*height
    The flags parameter should specify kDxt1, kDxt3, kDxt5, kBc4, or kBc5 compression,
    however, DXT1 will be used by default if none is specified. All other flags
    are ignored.
    Internally this function calls squish::Decompress for each block.
 */
 void DecompressImage( u8* rgba, int width, int height, int pitch, void const* blocks, int flags );
 void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags );
 // -----------------------------------------------------------------------------
 /*! @brief Computes MSE of an compressed image in memory.
    @param rgba      The original image pixels.
    @param width     The width of the source image.
    @param height    The height of the source image.
    @param pitch     The pitch of the source image.
    @param dxt       The compressed dxt blocks
    @param flags     Compression flags.
    @param colourMSE The MSE of the colour values.
    @param alphaMSE  The MSE of the alpha values.
    The colour MSE and alpha MSE are computed across all pixels. The colour MSE is
    averaged across all rgb values (i.e. colourMSE = sum sum_k ||dxt.k - rgba.k||/3)
    The flags parameter should specify kDxt1, kDxt3, kDxt5, kBc4, or kBc5 compression,
    however, DXT1 will be used by default if none is specified. All other flags
    are ignored.
    Internally this function calls squish::Decompress for each block.
 */
 void ComputeMSE(u8 const *rgba, int width, int height, int pitch, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE);
 void ComputeMSE(u8 const *rgba, int width, int height, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE);
 // -----------------------------------------------------------------------------
 } // namespace squish
 #endif // ndef SQUISH_H