Use instanced array buffer instead of UBO for canvas item batching

This simplifies the generated shader code which increases both performance and compile time on low end devices
This commit is contained in:
clayjohn 2022-12-13 17:54:44 -08:00
parent aa8a899f52
commit b6a1aa15b1
6 changed files with 235 additions and 207 deletions

View File

@ -568,9 +568,8 @@ void RasterizerCanvasGLES3::_render_items(RID p_to_render_target, int p_item_cou
_new_batch(batch_broken, index);
// Override the start position and index as we want to start from where we finished off last time.
state.canvas_instance_batches[state.current_batch_index].start = r_last_index * sizeof(InstanceData);
state.canvas_instance_batches[state.current_batch_index].start = r_last_index;
index = 0;
_align_instance_data_buffer(index);
for (int i = 0; i < p_item_count; i++) {
Item *ci = items[i];
@ -630,14 +629,14 @@ void RasterizerCanvasGLES3::_render_items(RID p_to_render_target, int p_item_cou
}
// Copy over all data needed for rendering.
glBindBuffer(GL_UNIFORM_BUFFER, state.canvas_instance_data_buffers[state.current_buffer].ubo);
glBindBuffer(GL_ARRAY_BUFFER, state.canvas_instance_data_buffers[state.current_buffer].buffer);
#ifdef WEB_ENABLED
glBufferSubData(GL_UNIFORM_BUFFER, r_last_index * sizeof(InstanceData), sizeof(InstanceData) * index, state.instance_data_array);
glBufferSubData(GL_ARRAY_BUFFER, r_last_index * sizeof(InstanceData), sizeof(InstanceData) * index, state.instance_data_array);
#else
// On Desktop and mobile we map the memory without synchronizing for maximum speed.
void *ubo = glMapBufferRange(GL_UNIFORM_BUFFER, r_last_index * sizeof(InstanceData), index * sizeof(InstanceData), GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
memcpy(ubo, state.instance_data_array, index * sizeof(InstanceData));
glUnmapBuffer(GL_UNIFORM_BUFFER);
void *buffer = glMapBufferRange(GL_ARRAY_BUFFER, r_last_index * sizeof(InstanceData), index * sizeof(InstanceData), GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
memcpy(buffer, state.instance_data_array, index * sizeof(InstanceData));
glUnmapBuffer(GL_ARRAY_BUFFER);
#endif
glDisable(GL_SCISSOR_TEST);
@ -664,7 +663,17 @@ void RasterizerCanvasGLES3::_render_items(RID p_to_render_target, int p_item_cou
uint64_t specialization = 0;
specialization |= uint64_t(state.canvas_instance_batches[i].lights_disabled);
specialization |= uint64_t(!GLES3::Config::get_singleton()->float_texture_supported) << 1;
bool success = _bind_material(material_data, variant, specialization);
RID shader_version = data.canvas_shader_default_version;
if (material_data) {
if (material_data->shader_data->version.is_valid() && material_data->shader_data->valid) {
// Bind uniform buffer and textures
material_data->bind_uniforms();
shader_version = material_data->shader_data->version;
}
}
bool success = GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(shader_version, variant, specialization);
if (!success) {
continue;
}
@ -1217,26 +1226,15 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) {
_bind_canvas_texture(state.canvas_instance_batches[p_index].tex, state.canvas_instance_batches[p_index].filter, state.canvas_instance_batches[p_index].repeat);
// Bind the region of the UBO used by this batch.
// If region exceeds the boundary of the UBO, just ignore.
uint32_t range_bytes = data.max_instances_per_batch * sizeof(InstanceData);
if (state.canvas_instance_batches[p_index].start >= (data.max_instances_per_ubo - 1) * sizeof(InstanceData)) {
return;
} else if (state.canvas_instance_batches[p_index].start >= (data.max_instances_per_ubo - data.max_instances_per_batch) * sizeof(InstanceData)) {
// If we have less than a full batch at the end, we can just draw it anyway.
// OpenGL will complain about the UBO being smaller than expected, but it should render fine.
range_bytes = (data.max_instances_per_ubo - 1) * sizeof(InstanceData) - state.canvas_instance_batches[p_index].start;
}
uint32_t range_start = state.canvas_instance_batches[p_index].start;
glBindBufferRange(GL_UNIFORM_BUFFER, INSTANCE_UNIFORM_LOCATION, state.canvas_instance_data_buffers[state.current_buffer].ubo, range_start, range_bytes);
switch (state.canvas_instance_batches[p_index].command_type) {
case Item::Command::TYPE_RECT:
case Item::Command::TYPE_NINEPATCH: {
glBindVertexArray(data.indexed_quad_array);
glDrawElements(GL_TRIANGLES, state.canvas_instance_batches[p_index].instance_count * 6, GL_UNSIGNED_INT, 0);
glBindBuffer(GL_UNIFORM_BUFFER, 0);
glBindBuffer(GL_ARRAY_BUFFER, state.canvas_instance_data_buffers[state.current_buffer].buffer);
uint32_t range_start = state.canvas_instance_batches[p_index].start * sizeof(InstanceData);
_enable_attributes(range_start, false);
glDrawElementsInstanced(GL_TRIANGLES, 6, GL_UNSIGNED_INT, 0, state.canvas_instance_batches[p_index].instance_count);
glBindVertexArray(0);
} break;
@ -1248,18 +1246,21 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) {
ERR_FAIL_COND(!pb);
glBindVertexArray(pb->vertex_array);
glBindBuffer(GL_ARRAY_BUFFER, state.canvas_instance_data_buffers[state.current_buffer].buffer);
uint32_t range_start = state.canvas_instance_batches[p_index].start * sizeof(InstanceData);
_enable_attributes(range_start, false);
if (pb->color_disabled && pb->color != Color(1.0, 1.0, 1.0, 1.0)) {
glVertexAttrib4f(RS::ARRAY_COLOR, pb->color.r, pb->color.g, pb->color.b, pb->color.a);
}
if (pb->index_buffer != 0) {
glDrawElements(prim[polygon->primitive], pb->count, GL_UNSIGNED_INT, nullptr);
glDrawElementsInstanced(prim[polygon->primitive], pb->count, GL_UNSIGNED_INT, nullptr, 1);
} else {
glDrawArrays(prim[polygon->primitive], 0, pb->count);
glDrawArraysInstanced(prim[polygon->primitive], 0, pb->count, 1);
}
glBindVertexArray(0);
glBindBuffer(GL_UNIFORM_BUFFER, 0);
if (pb->color_disabled && pb->color != Color(1.0, 1.0, 1.0, 1.0)) {
// Reset so this doesn't pollute other draw calls.
@ -1269,14 +1270,16 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) {
case Item::Command::TYPE_PRIMITIVE: {
glBindVertexArray(data.canvas_quad_array);
glBindBuffer(GL_ARRAY_BUFFER, state.canvas_instance_data_buffers[state.current_buffer].buffer);
uint32_t range_start = state.canvas_instance_batches[p_index].start * sizeof(InstanceData);
_enable_attributes(range_start, true);
const GLenum primitive[5] = { GL_POINTS, GL_POINTS, GL_LINES, GL_TRIANGLES, GL_TRIANGLES };
int instance_count = state.canvas_instance_batches[p_index].instance_count;
if (instance_count > 1) {
ERR_FAIL_COND(instance_count <= 0);
if (instance_count >= 1) {
glDrawArraysInstanced(primitive[state.canvas_instance_batches[p_index].primitive_points], 0, state.canvas_instance_batches[p_index].primitive_points, instance_count);
} else {
glDrawArrays(primitive[state.canvas_instance_batches[p_index].primitive_points], 0, state.canvas_instance_batches[p_index].primitive_points);
}
glBindBuffer(GL_UNIFORM_BUFFER, 0);
} break;
@ -1370,6 +1373,11 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) {
index_array_gl = mesh_storage->mesh_surface_get_index_buffer(surface, 0);
bool use_index_buffer = false;
glBindVertexArray(vertex_array_gl);
glBindBuffer(GL_ARRAY_BUFFER, state.canvas_instance_data_buffers[state.current_buffer].buffer);
uint32_t range_start = state.canvas_instance_batches[p_index].start * sizeof(InstanceData);
_enable_attributes(range_start, false, instance_count);
if (index_array_gl != 0) {
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_array_gl);
use_index_buffer = true;
@ -1396,20 +1404,13 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) {
}
GLenum primitive_gl = prim[int(primitive)];
if (instance_count == 1) {
if (use_index_buffer) {
glDrawElements(primitive_gl, mesh_storage->mesh_surface_get_vertices_drawn_count(surface), mesh_storage->mesh_surface_get_index_type(surface), 0);
} else {
glDrawArrays(primitive_gl, 0, mesh_storage->mesh_surface_get_vertices_drawn_count(surface));
}
} else if (instance_count > 1) {
if (use_index_buffer) {
glDrawElementsInstanced(primitive_gl, mesh_storage->mesh_surface_get_vertices_drawn_count(surface), mesh_storage->mesh_surface_get_index_type(surface), 0, instance_count);
} else {
glDrawArraysInstanced(primitive_gl, 0, mesh_storage->mesh_surface_get_vertices_drawn_count(surface), instance_count);
}
}
if (use_index_buffer) {
glDrawElementsInstanced(primitive_gl, mesh_storage->mesh_surface_get_vertices_drawn_count(surface), mesh_storage->mesh_surface_get_index_type(surface), 0, instance_count);
} else {
glDrawArraysInstanced(primitive_gl, 0, mesh_storage->mesh_surface_get_vertices_drawn_count(surface), instance_count);
}
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
if (instance_count > 1) {
glDisableVertexAttribArray(5);
@ -1429,7 +1430,7 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) {
}
void RasterizerCanvasGLES3::_add_to_batch(uint32_t &r_index, bool &r_batch_broken) {
if (r_index >= data.max_instances_per_ubo - 1) {
if (r_index >= data.max_instances_per_buffer - 1) {
ERR_PRINT_ONCE("Trying to draw too many items. Please increase maximum number of items in the project settings 'rendering/gl_compatibility/item_buffer_size'");
return;
}
@ -1457,27 +1458,25 @@ void RasterizerCanvasGLES3::_new_batch(bool &r_batch_broken, uint32_t &r_index)
// Copy the properties of the current batch, we will manually update the things that changed.
Batch new_batch = state.canvas_instance_batches[state.current_batch_index];
new_batch.instance_count = 0;
new_batch.start = state.canvas_instance_batches[state.current_batch_index].start + state.canvas_instance_batches[state.current_batch_index].instance_count * sizeof(InstanceData);
new_batch.start = state.canvas_instance_batches[state.current_batch_index].start + state.canvas_instance_batches[state.current_batch_index].instance_count;
state.current_batch_index++;
state.canvas_instance_batches.push_back(new_batch);
_align_instance_data_buffer(r_index);
}
bool RasterizerCanvasGLES3::_bind_material(GLES3::CanvasMaterialData *p_material_data, CanvasShaderGLES3::ShaderVariant p_variant, uint64_t p_specialization) {
if (p_material_data) {
if (p_material_data->shader_data->version.is_valid() && p_material_data->shader_data->valid) {
// Bind uniform buffer and textures
p_material_data->bind_uniforms();
return GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(p_material_data->shader_data->version, p_variant, p_specialization);
} else {
return GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(data.canvas_shader_default_version, p_variant, p_specialization);
}
} else {
return GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(data.canvas_shader_default_version, p_variant, p_specialization);
void RasterizerCanvasGLES3::_enable_attributes(uint32_t p_start, bool p_primitive, uint32_t p_rate) {
uint32_t split = p_primitive ? 11 : 12;
for (uint32_t i = 6; i < split; i++) {
glEnableVertexAttribArray(i);
glVertexAttribPointer(i, 4, GL_FLOAT, GL_FALSE, sizeof(InstanceData), CAST_INT_TO_UCHAR_PTR(p_start + (i - 6) * 4 * sizeof(float)));
glVertexAttribDivisor(i, p_rate);
}
for (uint32_t i = split; i <= 13; i++) {
glEnableVertexAttribArray(i);
glVertexAttribIPointer(i, 4, GL_UNSIGNED_INT, sizeof(InstanceData), CAST_INT_TO_UCHAR_PTR(p_start + (i - 6) * 4 * sizeof(float)));
glVertexAttribDivisor(i, p_rate);
}
}
RID RasterizerCanvasGLES3::light_create() {
CanvasLight canvas_light;
return canvas_light_owner.make_rid(canvas_light);
@ -2416,8 +2415,8 @@ void RasterizerCanvasGLES3::_allocate_instance_data_buffer() {
GLuint new_buffers[3];
glGenBuffers(3, new_buffers);
// Batch UBO.
glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[0]);
glBufferData(GL_UNIFORM_BUFFER, data.max_instance_buffer_size, nullptr, GL_STREAM_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, new_buffers[0]);
glBufferData(GL_ARRAY_BUFFER, data.max_instance_buffer_size, nullptr, GL_STREAM_DRAW);
// Light uniform buffer.
glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[1]);
glBufferData(GL_UNIFORM_BUFFER, sizeof(LightUniform) * data.max_lights_per_render, nullptr, GL_STREAM_DRAW);
@ -2427,36 +2426,16 @@ void RasterizerCanvasGLES3::_allocate_instance_data_buffer() {
state.current_buffer = (state.current_buffer + 1);
DataBuffer db;
db.ubo = new_buffers[0];
db.buffer = new_buffers[0];
db.light_ubo = new_buffers[1];
db.state_ubo = new_buffers[2];
db.last_frame_used = RSG::rasterizer->get_frame_number();
state.canvas_instance_data_buffers.insert(state.current_buffer, db);
state.current_buffer = state.current_buffer % state.canvas_instance_data_buffers.size();
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindBuffer(GL_UNIFORM_BUFFER, 0);
}
// Batch start positions need to be aligned to the device's GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT.
// This needs to be called anytime a new batch is created.
void RasterizerCanvasGLES3::_align_instance_data_buffer(uint32_t &r_index) {
if (GLES3::Config::get_singleton()->uniform_buffer_offset_alignment > int(sizeof(InstanceData))) {
uint32_t offset = state.canvas_instance_batches[state.current_batch_index].start % GLES3::Config::get_singleton()->uniform_buffer_offset_alignment;
if (offset > 0) {
// uniform_buffer_offset_alignment can be 4, 16, 32, or 256. Our instance batches are 128 bytes.
// Accordingly, this branch is only triggered if we are 128 bytes off.
uint32_t offset_bytes = GLES3::Config::get_singleton()->uniform_buffer_offset_alignment - offset;
state.canvas_instance_batches[state.current_batch_index].start += offset_bytes;
// Offset the instance array so it stays in sync with batch start points.
// This creates gaps in the instance buffer with wasted space, but we can't help it.
r_index += offset_bytes / sizeof(InstanceData);
if (r_index > 0) {
// In this case we need to copy over the basic data.
state.instance_data_array[r_index] = state.instance_data_array[r_index - 1];
}
}
}
}
void RasterizerCanvasGLES3::set_time(double p_time) {
state.time = p_time;
}
@ -2601,12 +2580,12 @@ RasterizerCanvasGLES3::RasterizerCanvasGLES3() {
data.max_instances_per_batch = 128;
} else {
data.max_lights_per_render = 256;
data.max_instances_per_batch = 512;
data.max_instances_per_batch = 2048;
}
// Reserve 3 Uniform Buffers for instance data Frame N, N+1 and N+2
data.max_instances_per_ubo = MAX(data.max_instances_per_batch, uint32_t(GLOBAL_GET("rendering/gl_compatibility/item_buffer_size")));
data.max_instance_buffer_size = data.max_instances_per_ubo * sizeof(InstanceData); // 16,384 instances * 128 bytes = 2,097,152 bytes = 2,048 kb
data.max_instances_per_buffer = MAX(data.max_instances_per_batch, uint32_t(GLOBAL_GET("rendering/gl_compatibility/item_buffer_size")));
data.max_instance_buffer_size = data.max_instances_per_buffer * sizeof(InstanceData); // 16,384 instances * 128 bytes = 2,097,152 bytes = 2,048 kb
state.canvas_instance_data_buffers.resize(3);
state.canvas_instance_batches.reserve(200);
@ -2614,8 +2593,8 @@ RasterizerCanvasGLES3::RasterizerCanvasGLES3() {
GLuint new_buffers[3];
glGenBuffers(3, new_buffers);
// Batch UBO.
glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[0]);
glBufferData(GL_UNIFORM_BUFFER, data.max_instance_buffer_size, nullptr, GL_STREAM_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, new_buffers[0]);
glBufferData(GL_ARRAY_BUFFER, data.max_instance_buffer_size, nullptr, GL_STREAM_DRAW);
// Light uniform buffer.
glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[1]);
glBufferData(GL_UNIFORM_BUFFER, sizeof(LightUniform) * data.max_lights_per_render, nullptr, GL_STREAM_DRAW);
@ -2623,41 +2602,28 @@ RasterizerCanvasGLES3::RasterizerCanvasGLES3() {
glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[2]);
glBufferData(GL_UNIFORM_BUFFER, sizeof(StateBuffer), nullptr, GL_STREAM_DRAW);
DataBuffer db;
db.ubo = new_buffers[0];
db.buffer = new_buffers[0];
db.light_ubo = new_buffers[1];
db.state_ubo = new_buffers[2];
db.last_frame_used = 0;
db.fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
state.canvas_instance_data_buffers[i] = db;
}
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindBuffer(GL_UNIFORM_BUFFER, 0);
state.instance_data_array = memnew_arr(InstanceData, data.max_instances_per_ubo);
state.instance_data_array = memnew_arr(InstanceData, data.max_instances_per_buffer);
state.light_uniforms = memnew_arr(LightUniform, data.max_lights_per_render);
{
const uint32_t no_of_instances = data.max_instances_per_batch;
const uint32_t indices[6] = { 0, 2, 1, 3, 2, 0 };
glGenVertexArrays(1, &data.indexed_quad_array);
glBindVertexArray(data.indexed_quad_array);
glBindBuffer(GL_ARRAY_BUFFER, data.canvas_quad_vertices);
const uint32_t num_indices = 6;
const uint32_t quad_indices[num_indices] = { 0, 2, 1, 3, 2, 0 };
const uint32_t total_indices = no_of_instances * num_indices;
uint32_t *indices = new uint32_t[total_indices];
for (uint32_t i = 0; i < total_indices; i++) {
uint32_t quad = i / num_indices;
uint32_t quad_local = i % num_indices;
indices[i] = quad_indices[quad_local] + quad * num_indices;
}
glGenBuffers(1, &data.indexed_quad_buffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.indexed_quad_buffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(uint32_t) * total_indices, indices, GL_STATIC_DRAW);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(uint32_t) * 6, indices, GL_STATIC_DRAW);
glBindVertexArray(0);
delete[] indices;
}
String global_defines;

View File

@ -245,7 +245,7 @@ public:
uint32_t max_lights_per_render = 256;
uint32_t max_lights_per_item = 16;
uint32_t max_instances_per_batch = 512;
uint32_t max_instances_per_ubo = 16384;
uint32_t max_instances_per_buffer = 16384;
uint32_t max_instance_buffer_size = 16384 * 128;
} data;
@ -278,7 +278,7 @@ public:
// We track them and ensure that they don't get reused until at least 2 frames have passed
// to avoid the GPU stalling to wait for a resource to become available.
struct DataBuffer {
GLuint ubo = 0;
GLuint buffer = 0;
GLuint light_ubo = 0;
GLuint state_ubo = 0;
uint64_t last_frame_used = -3;
@ -359,6 +359,7 @@ public:
void _add_to_batch(uint32_t &r_index, bool &r_batch_broken);
void _allocate_instance_data_buffer();
void _align_instance_data_buffer(uint32_t &r_index);
void _enable_attributes(uint32_t p_start, bool p_primitive, uint32_t p_rate = 1);
void set_time(double p_time);

View File

@ -11,6 +11,7 @@ mode_instanced = #define USE_ATTRIBUTES \n#define USE_INSTANCING
DISABLE_LIGHTING = false
USE_RGBA_SHADOWS = false
SINGLE_INSTANCE = false
#[vertex]
@ -25,10 +26,75 @@ layout(location = 1) in highp vec4 instance_xform0;
layout(location = 2) in highp vec4 instance_xform1;
layout(location = 5) in highp uvec4 instance_color_custom_data; // Color packed into xy, custom_data packed into zw for compatibility with 3D
#endif // USE_INSTANCING
#endif // USE_ATTRIBUTES
#include "stdlib_inc.glsl"
layout(location = 6) in highp vec4 attrib_A;
layout(location = 7) in highp vec4 attrib_B;
layout(location = 8) in highp vec4 attrib_C;
layout(location = 9) in highp vec4 attrib_D;
layout(location = 10) in highp vec4 attrib_E;
#ifdef USE_PRIMITIVE
layout(location = 11) in highp uvec4 attrib_F;
#else
layout(location = 11) in highp vec4 attrib_F;
#endif
layout(location = 12) in highp uvec4 attrib_G;
layout(location = 13) in highp uvec4 attrib_H;
#define read_draw_data_world_x attrib_A.xy
#define read_draw_data_world_y attrib_A.zw
#define read_draw_data_world_ofs attrib_B.xy
#define read_draw_data_color_texture_pixel_size attrib_B.zw
#ifdef USE_PRIMITIVE
#define read_draw_data_point_a attrib_C.xy
#define read_draw_data_point_b attrib_C.zw
#define read_draw_data_point_c attrib_D.xy
#define read_draw_data_uv_a attrib_D.zw
#define read_draw_data_uv_b attrib_E.xy
#define read_draw_data_uv_c attrib_E.zw
#define read_draw_data_color_a_rg attrib_F.x
#define read_draw_data_color_a_ba attrib_F.y
#define read_draw_data_color_b_rg attrib_F.z
#define read_draw_data_color_b_ba attrib_F.w
#define read_draw_data_color_c_rg attrib_G.x
#define read_draw_data_color_c_ba attrib_G.y
#else
#define read_draw_data_modulation attrib_C
#define read_draw_data_ninepatch_margins attrib_D
#define read_draw_data_dst_rect attrib_E
#define read_draw_data_src_rect attrib_F
#endif
#define read_draw_data_flags attrib_G.z
#define read_draw_data_specular_shininess attrib_G.w
#define read_draw_data_lights attrib_H
// Varyings so the per-instance info can be used in the fragment shader
flat out vec4 varying_A;
flat out vec2 varying_B;
#ifndef USE_PRIMITIVE
flat out vec4 varying_C;
#ifndef USE_ATTRIBUTES
#ifdef USE_NINEPATCH
flat out vec2 varying_D;
#endif
flat out vec4 varying_E;
#endif
#endif
flat out uvec2 varying_F;
flat out uvec4 varying_G;
// This needs to be outside clang-format so the ubo comment is in the right place
#ifdef MATERIAL_UNIFORMS_USED
layout(std140) uniform MaterialUniforms{ //ubo:4
@ -39,12 +105,10 @@ layout(std140) uniform MaterialUniforms{ //ubo:4
#endif
/* clang-format on */
#include "canvas_uniforms_inc.glsl"
#include "stdlib_inc.glsl"
out vec2 uv_interp;
out vec4 color_interp;
out vec2 vertex_interp;
flat out int draw_data_instance;
#ifdef USE_NINEPATCH
@ -55,35 +119,46 @@ out vec2 pixel_size_interp;
#GLOBALS
void main() {
varying_A = vec4(read_draw_data_world_x, read_draw_data_world_y);
varying_B = read_draw_data_color_texture_pixel_size;
#ifndef USE_PRIMITIVE
varying_C = read_draw_data_ninepatch_margins;
#ifndef USE_ATTRIBUTES
#ifdef USE_NINEPATCH
varying_D = vec2(read_draw_data_dst_rect.z, read_draw_data_dst_rect.w);
#endif // USE_NINEPATCH
varying_E = read_draw_data_src_rect;
#endif // !USE_ATTRIBUTES
#endif // USE_PRIMITIVE
varying_F = uvec2(read_draw_data_flags, read_draw_data_specular_shininess);
varying_G = read_draw_data_lights;
vec4 instance_custom = vec4(0.0);
#ifdef USE_PRIMITIVE
draw_data_instance = gl_InstanceID;
vec2 vertex;
vec2 uv;
vec4 color;
if (gl_VertexID % 3 == 0) {
vertex = draw_data[draw_data_instance].point_a;
uv = draw_data[draw_data_instance].uv_a;
color = vec4(unpackHalf2x16(draw_data[draw_data_instance].color_a_rg), unpackHalf2x16(draw_data[draw_data_instance].color_a_ba));
vertex = read_draw_data_point_a;
uv = read_draw_data_uv_a;
color = vec4(unpackHalf2x16(read_draw_data_color_a_rg), unpackHalf2x16(read_draw_data_color_a_ba));
} else if (gl_VertexID % 3 == 1) {
vertex = draw_data[draw_data_instance].point_b;
uv = draw_data[draw_data_instance].uv_b;
color = vec4(unpackHalf2x16(draw_data[draw_data_instance].color_b_rg), unpackHalf2x16(draw_data[draw_data_instance].color_b_ba));
vertex = read_draw_data_point_b;
uv = read_draw_data_uv_b;
color = vec4(unpackHalf2x16(read_draw_data_color_b_rg), unpackHalf2x16(read_draw_data_color_b_ba));
} else {
vertex = draw_data[draw_data_instance].point_c;
uv = draw_data[draw_data_instance].uv_c;
color = vec4(unpackHalf2x16(draw_data[draw_data_instance].color_c_rg), unpackHalf2x16(draw_data[draw_data_instance].color_c_ba));
vertex = read_draw_data_point_c;
uv = read_draw_data_uv_c;
color = vec4(unpackHalf2x16(read_draw_data_color_c_rg), unpackHalf2x16(read_draw_data_color_c_ba));
}
#elif defined(USE_ATTRIBUTES)
draw_data_instance = gl_InstanceID;
#ifdef USE_INSTANCING
draw_data_instance = 0;
#endif
vec2 vertex = vertex_attrib;
vec4 color = color_attrib * draw_data[draw_data_instance].modulation;
vec4 color = color_attrib * read_draw_data_modulation;
vec2 uv = uv_attrib;
#ifdef USE_INSTANCING
@ -93,30 +168,29 @@ void main() {
#endif
#else
draw_data_instance = gl_VertexID / 6;
vec2 vertex_base_arr[6] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0), vec2(0.0, 0.0), vec2(1.0, 1.0));
vec2 vertex_base = vertex_base_arr[gl_VertexID % 6];
vec2 uv = draw_data[draw_data_instance].src_rect.xy + abs(draw_data[draw_data_instance].src_rect.zw) * ((draw_data[draw_data_instance].flags & FLAGS_TRANSPOSE_RECT) != uint(0) ? vertex_base.yx : vertex_base.xy);
vec4 color = draw_data[draw_data_instance].modulation;
vec2 vertex = draw_data[draw_data_instance].dst_rect.xy + abs(draw_data[draw_data_instance].dst_rect.zw) * mix(vertex_base, vec2(1.0, 1.0) - vertex_base, lessThan(draw_data[draw_data_instance].src_rect.zw, vec2(0.0, 0.0)));
vec2 uv = read_draw_data_src_rect.xy + abs(read_draw_data_src_rect.zw) * ((read_draw_data_flags & FLAGS_TRANSPOSE_RECT) != uint(0) ? vertex_base.yx : vertex_base.xy);
vec4 color = read_draw_data_modulation;
vec2 vertex = read_draw_data_dst_rect.xy + abs(read_draw_data_dst_rect.zw) * mix(vertex_base, vec2(1.0, 1.0) - vertex_base, lessThan(read_draw_data_src_rect.zw, vec2(0.0, 0.0)));
#endif
mat4 model_matrix = mat4(vec4(draw_data[draw_data_instance].world_x, 0.0, 0.0), vec4(draw_data[draw_data_instance].world_y, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0), vec4(draw_data[draw_data_instance].world_ofs, 0.0, 1.0));
mat4 model_matrix = mat4(vec4(read_draw_data_world_x, 0.0, 0.0), vec4(read_draw_data_world_y, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0), vec4(read_draw_data_world_ofs, 0.0, 1.0));
#ifdef USE_INSTANCING
model_matrix = model_matrix * transpose(mat4(instance_xform0, instance_xform1, vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0)));
#endif // USE_INSTANCING
#if !defined(USE_ATTRIBUTES) && !defined(USE_PRIMITIVE)
if (bool(draw_data[draw_data_instance].flags & FLAGS_USING_PARTICLES)) {
if (bool(read_draw_data_flags & FLAGS_USING_PARTICLES)) {
//scale by texture size
vertex /= draw_data[draw_data_instance].color_texture_pixel_size;
vertex /= read_draw_data_color_texture_pixel_size;
}
#endif
vec2 color_texture_pixel_size = draw_data[draw_data_instance].color_texture_pixel_size.xy;
vec2 color_texture_pixel_size = read_draw_data_color_texture_pixel_size;
#ifdef USE_POINT_SIZE
float point_size = 1.0;
@ -126,7 +200,7 @@ void main() {
}
#ifdef USE_NINEPATCH
pixel_size_interp = abs(draw_data[draw_data_instance].dst_rect.zw) * vertex_base;
pixel_size_interp = abs(read_draw_data_dst_rect.zw) * vertex_base;
#endif
#if !defined(SKIP_TRANSFORM_USED)
@ -159,6 +233,46 @@ void main() {
#include "canvas_uniforms_inc.glsl"
#include "stdlib_inc.glsl"
in vec2 uv_interp;
in vec2 vertex_interp;
in vec4 color_interp;
#ifdef USE_NINEPATCH
in vec2 pixel_size_interp;
#endif
// Can all be flat as they are the same for the whole batched instance
flat in vec4 varying_A;
flat in vec2 varying_B;
#define read_draw_data_world_x varying_A.xy
#define read_draw_data_world_y varying_A.zw
#define read_draw_data_color_texture_pixel_size varying_B
#ifndef USE_PRIMITIVE
flat in vec4 varying_C;
#define read_draw_data_ninepatch_margins varying_C
#ifndef USE_ATTRIBUTES
#ifdef USE_NINEPATCH
flat in vec2 varying_D;
#define read_draw_data_dst_rect_z varying_D.x
#define read_draw_data_dst_rect_w varying_D.y
#endif
flat in vec4 varying_E;
#define read_draw_data_src_rect varying_E
#endif // USE_ATTRIBUTES
#endif // USE_PRIMITIVE
flat in uvec2 varying_F;
flat in uvec4 varying_G;
#define read_draw_data_flags varying_F.x
#define read_draw_data_specular_shininess varying_F.y
#define read_draw_data_lights varying_G
#ifndef DISABLE_LIGHTING
uniform sampler2D atlas_texture; //texunit:-2
uniform sampler2D shadow_atlas_texture; //texunit:-3
@ -170,17 +284,6 @@ uniform sampler2D specular_texture; //texunit:-7
uniform sampler2D color_texture; //texunit:0
in vec2 uv_interp;
in vec4 color_interp;
in vec2 vertex_interp;
flat in int draw_data_instance;
#ifdef USE_NINEPATCH
in vec2 pixel_size_interp;
#endif
layout(location = 0) out vec4 frag_color;
#ifdef MATERIAL_UNIFORMS_USED
@ -366,7 +469,7 @@ float map_ninepatch_axis(float pixel, float draw_size, float tex_pixel_size, flo
} else if (pixel >= draw_size - margin_end) {
return (tex_size - (draw_size - pixel)) * tex_pixel_size;
} else {
if (!bool(draw_data[draw_data_instance].flags & FLAGS_NINEPACH_DRAW_CENTER)) {
if (!bool(read_draw_data_flags & FLAGS_NINEPACH_DRAW_CENTER)) {
draw_center--;
}
@ -414,28 +517,26 @@ void main() {
int draw_center = 2;
uv = vec2(
map_ninepatch_axis(pixel_size_interp.x, abs(draw_data[draw_data_instance].dst_rect.z), draw_data[draw_data_instance].color_texture_pixel_size.x, draw_data[draw_data_instance].ninepatch_margins.x, draw_data[draw_data_instance].ninepatch_margins.z, int(draw_data[draw_data_instance].flags >> FLAGS_NINEPATCH_H_MODE_SHIFT) & 0x3, draw_center),
map_ninepatch_axis(pixel_size_interp.y, abs(draw_data[draw_data_instance].dst_rect.w), draw_data[draw_data_instance].color_texture_pixel_size.y, draw_data[draw_data_instance].ninepatch_margins.y, draw_data[draw_data_instance].ninepatch_margins.w, int(draw_data[draw_data_instance].flags >> FLAGS_NINEPATCH_V_MODE_SHIFT) & 0x3, draw_center));
map_ninepatch_axis(pixel_size_interp.x, abs(read_draw_data_dst_rect_z), read_draw_data_color_texture_pixel_size.x, read_draw_data_ninepatch_margins.x, read_draw_data_ninepatch_margins.z, int(read_draw_data_flags >> FLAGS_NINEPATCH_H_MODE_SHIFT) & 0x3, draw_center),
map_ninepatch_axis(pixel_size_interp.y, abs(read_draw_data_dst_rect_w), read_draw_data_color_texture_pixel_size.y, read_draw_data_ninepatch_margins.y, read_draw_data_ninepatch_margins.w, int(read_draw_data_flags >> FLAGS_NINEPATCH_V_MODE_SHIFT) & 0x3, draw_center));
if (draw_center == 0) {
color.a = 0.0;
}
uv = uv * draw_data[draw_data_instance].src_rect.zw + draw_data[draw_data_instance].src_rect.xy; //apply region if needed
uv = uv * read_draw_data_src_rect.zw + read_draw_data_src_rect.xy; //apply region if needed
#endif
if (bool(draw_data[draw_data_instance].flags & FLAGS_CLIP_RECT_UV)) {
uv = clamp(uv, draw_data[draw_data_instance].src_rect.xy, draw_data[draw_data_instance].src_rect.xy + abs(draw_data[draw_data_instance].src_rect.zw));
if (bool(read_draw_data_flags & FLAGS_CLIP_RECT_UV)) {
uv = clamp(uv, read_draw_data_src_rect.xy, read_draw_data_src_rect.xy + abs(read_draw_data_src_rect.zw));
}
#endif
#ifndef USE_PRIMITIVE
if (bool(draw_data[draw_data_instance].flags & FLAGS_USE_MSDF)) {
float px_range = draw_data[draw_data_instance].ninepatch_margins.x;
float outline_thickness = draw_data[draw_data_instance].ninepatch_margins.y;
//float reserved1 = draw_data[draw_data_instance].ninepatch_margins.z;
//float reserved2 = draw_data[draw_data_instance].ninepatch_margins.w;
if (bool(read_draw_data_flags & FLAGS_USE_MSDF)) {
float px_range = read_draw_data_ninepatch_margins.x;
float outline_thickness = read_draw_data_ninepatch_margins.y;
vec4 msdf_sample = texture(color_texture, uv);
vec2 msdf_size = vec2(textureSize(color_texture, 0));
@ -451,7 +552,7 @@ void main() {
float a = clamp(d * px_size + 0.5, 0.0, 1.0);
color.a = a * color.a;
}
} else if (bool(draw_data[draw_data_instance].flags & FLAGS_USE_LCD)) {
} else if (bool(read_draw_data_flags & FLAGS_USE_LCD)) {
vec4 lcd_sample = texture(color_texture, uv);
if (lcd_sample.a == 1.0) {
color.rgb = lcd_sample.rgb * color.a;
@ -465,7 +566,7 @@ void main() {
color *= texture(color_texture, uv);
}
uint light_count = (draw_data[draw_data_instance].flags >> uint(FLAGS_LIGHT_COUNT_SHIFT)) & uint(0xF); //max 16 lights
uint light_count = (read_draw_data_flags >> uint(FLAGS_LIGHT_COUNT_SHIFT)) & uint(0xF); //max 16 lights
bool using_light = light_count > 0u || directional_light_count > 0u;
vec3 normal;
@ -476,7 +577,7 @@ void main() {
bool normal_used = false;
#endif
if (normal_used || (using_light && bool(draw_data[draw_data_instance].flags & FLAGS_DEFAULT_NORMAL_MAP_USED))) {
if (normal_used || (using_light && bool(read_draw_data_flags & FLAGS_DEFAULT_NORMAL_MAP_USED))) {
normal.xy = texture(normal_texture, uv).xy * vec2(2.0, -2.0) - vec2(1.0, -1.0);
normal.z = sqrt(1.0 - dot(normal.xy, normal.xy));
normal_used = true;
@ -493,9 +594,9 @@ void main() {
bool specular_shininess_used = false;
#endif
if (specular_shininess_used || (using_light && normal_used && bool(draw_data[draw_data_instance].flags & FLAGS_DEFAULT_SPECULAR_MAP_USED))) {
if (specular_shininess_used || (using_light && normal_used && bool(read_draw_data_flags & FLAGS_DEFAULT_SPECULAR_MAP_USED))) {
specular_shininess = texture(specular_texture, uv);
specular_shininess *= godot_unpackUnorm4x8(draw_data[draw_data_instance].specular_shininess);
specular_shininess *= godot_unpackUnorm4x8(read_draw_data_specular_shininess);
specular_shininess_used = true;
} else {
specular_shininess = vec4(1.0);
@ -507,7 +608,7 @@ void main() {
vec2 screen_uv = vec2(0.0);
#endif
vec2 color_texture_pixel_size = draw_data[draw_data_instance].color_texture_pixel_size.xy;
vec2 color_texture_pixel_size = read_draw_data_color_texture_pixel_size.xy;
vec3 light_vertex = vec3(vertex, 0.0);
vec2 shadow_vertex = vertex;
@ -529,7 +630,7 @@ void main() {
if (normal_used) {
//convert by item transform
normal.xy = mat2(normalize(draw_data[draw_data_instance].world_x), normalize(draw_data[draw_data_instance].world_y)) * normal.xy;
normal.xy = mat2(normalize(read_draw_data_world_x), normalize(read_draw_data_world_y)) * normal.xy;
//convert by canvas transform
normal = normalize((canvas_normal_transform * vec4(normal, 0.0)).xyz);
}
@ -591,15 +692,15 @@ void main() {
uint light_base;
if (i < 8u) {
if (i < 4u) {
light_base = draw_data[draw_data_instance].lights[0];
light_base = read_draw_data_lights[0];
} else {
light_base = draw_data[draw_data_instance].lights[1];
light_base = read_draw_data_lights[1];
}
} else {
if (i < 12u) {
light_base = draw_data[draw_data_instance].lights[2];
light_base = read_draw_data_lights[2];
} else {
light_base = draw_data[draw_data_instance].lights[3];
light_base = read_draw_data_lights[3];
}
}
light_base >>= (i & 3u) * 8u;

View File

@ -27,38 +27,6 @@
#define FLAGS_USE_MSDF uint(1 << 28)
#define FLAGS_USE_LCD uint(1 << 29)
// must be always 128 bytes long
struct DrawData {
vec2 world_x;
vec2 world_y;
vec2 world_ofs;
vec2 color_texture_pixel_size;
#ifdef USE_PRIMITIVE
vec2 point_a;
vec2 point_b;
vec2 point_c;
vec2 uv_a;
vec2 uv_b;
vec2 uv_c;
uint color_a_rg;
uint color_a_ba;
uint color_b_rg;
uint color_b_ba;
uint color_c_rg;
uint color_c_ba;
#else
vec4 modulation;
vec4 ninepatch_margins;
vec4 dst_rect; //for built-in rect and UV
vec4 src_rect;
uint pad;
uint pad2;
#endif
uint flags;
uint specular_shininess;
uvec4 lights;
};
layout(std140) uniform GlobalShaderUniformData { //ubo:1
vec4 global_shader_uniforms[MAX_GLOBAL_SHADER_UNIFORMS];
};
@ -116,7 +84,3 @@ layout(std140) uniform LightData { //ubo:2
Light light_array[MAX_LIGHTS];
};
#endif // DISABLE_LIGHTING
layout(std140) uniform DrawDataInstances { //ubo:3
DrawData draw_data[MAX_DRAW_DATA_INSTANCES];
};

View File

@ -90,8 +90,6 @@ Config::Config() {
glGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE, &max_uniform_buffer_size);
glGetIntegerv(GL_MAX_VIEWPORT_DIMS, max_viewport_size);
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_offset_alignment);
support_anisotropic_filter = extensions.has("GL_EXT_texture_filter_anisotropic");
if (support_anisotropic_filter) {
glGetFloatv(_GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &anisotropic_level);

View File

@ -67,8 +67,6 @@ public:
int max_renderable_lights = 0;
int max_lights_per_object = 0;
int uniform_buffer_offset_alignment = 0;
// TODO implement wireframe in OpenGL
// bool generate_wireframes;