Add 2D lights to OpenGL3 canvas renderer
This is an initial implementation using the same single-pass approach as the RenderingDevice.
This commit is contained in:
@ -114,7 +114,7 @@ CopyEffects::~CopyEffects() {
void CopyEffects::copy_to_rect(const Rect2i &p_rect) {
void CopyEffects::copy_to_rect(const Rect2 &p_rect) {
copy.shader.version_bind_shader(copy.shader_version, CopyShaderGLES3::MODE_COPY_SECTION);
copy.shader.version_set_uniform(CopyShaderGLES3::COPY_SECTION, p_rect.position.x, p_rect.position.y, p_rect.size.x, p_rect.size.y, copy.shader_version, CopyShaderGLES3::MODE_COPY_SECTION);
@ -61,7 +61,7 @@ public:
// These functions assume that a framebuffer and texture are bound already. They only manage the shader, uniforms, and vertex array.
void copy_to_rect(const Rect2i &p_rect);
void copy_to_rect(const Rect2 &p_rect);
void copy_screen();
void bilinear_blur(GLuint p_source_texture, int p_mipmap_count, const Rect2i &p_region);
void set_color(const Color &p_color, const Rect2i &p_region);
@ -115,7 +115,7 @@ void RasterizerCanvasGLES3::_update_transform_to_mat4(const Transform3D &p_trans
p_mat4[15] = 1;
void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_item_list, const Color &p_modulate, Light *p_light_list, Light *p_directional_list, const Transform2D &p_canvas_transform, RS::CanvasItemTextureFilter p_default_filter, RS::CanvasItemTextureRepeat p_default_repeat, bool p_snap_2d_vertices_to_pixel, bool &r_sdf_used) {
void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_item_list, const Color &p_modulate, Light *p_light_list, Light *p_directional_light_list, const Transform2D &p_canvas_transform, RS::CanvasItemTextureFilter p_default_filter, RS::CanvasItemTextureRepeat p_default_repeat, bool p_snap_2d_vertices_to_pixel, bool &r_sdf_used) {
GLES3::TextureStorage *texture_storage = GLES3::TextureStorage::get_singleton();
GLES3::MaterialStorage *material_storage = GLES3::MaterialStorage::get_singleton();
@ -144,9 +144,173 @@ void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_
// TODO: Setup Directional Lights
//setup directional lights if exist
// TODO: Setup lights
uint32_t light_count = 0;
uint32_t directional_light_count = 0;
Light *l = p_directional_light_list;
uint32_t index = 0;
while (l) {
if (index == data.max_lights_per_render) {
l->render_index_cache = -1;
l = l->next_ptr;
CanvasLight *clight = canvas_light_owner.get_or_null(l->light_internal);
if (!clight) { //unused or invalid texture
l->render_index_cache = -1;
l = l->next_ptr;
Vector2 canvas_light_dir = l->xform_cache.columns[1].normalized();
state.light_uniforms[index].position[0] = -canvas_light_dir.x;
state.light_uniforms[index].position[1] = -canvas_light_dir.y;
//_update_transform_2d_to_mat2x4(clight->shadow.directional_xform, state.light_uniforms[index].shadow_matrix);
state.light_uniforms[index].height = l->height; //0..1 here
for (int i = 0; i < 4; i++) {
state.light_uniforms[index].shadow_color[i] = uint8_t(CLAMP(int32_t(l->shadow_color[i] * 255.0), 0, 255));
state.light_uniforms[index].color[i] = l->color[i];
state.light_uniforms[index].color[3] = l->energy; //use alpha for energy, so base color can go separate
if (state.shadow_fb.is_valid()) {
state.light_uniforms[index].shadow_pixel_size = (1.0 / state.shadow_texture_size) * (1.0 + l->shadow_smooth);
state.light_uniforms[index].shadow_z_far_inv = 1.0 / clight->shadow.z_far;
state.light_uniforms[index].shadow_y_ofs = clight->shadow.y_offset;
} else {
state.light_uniforms[index].shadow_pixel_size = 1.0;
state.light_uniforms[index].shadow_z_far_inv = 1.0;
state.light_uniforms[index].shadow_y_ofs = 0;
state.light_uniforms[index].flags = l->blend_mode << LIGHT_FLAGS_BLEND_SHIFT;
state.light_uniforms[index].flags |= l->shadow_filter << LIGHT_FLAGS_FILTER_SHIFT;
if (clight->shadow.enabled) {
state.light_uniforms[index].flags |= LIGHT_FLAGS_HAS_SHADOW;
l->render_index_cache = index;
l = l->next_ptr;
light_count = index;
directional_light_count = light_count;
state.using_directional_lights = directional_light_count > 0;
//setup lights if exist
Light *l = p_light_list;
uint32_t index = light_count;
while (l) {
if (index == data.max_lights_per_render) {
l->render_index_cache = -1;
l = l->next_ptr;
CanvasLight *clight = canvas_light_owner.get_or_null(l->light_internal);
if (!clight) { //unused or invalid texture
l->render_index_cache = -1;
l = l->next_ptr;
Transform2D to_light_xform = (p_canvas_transform * l->light_shader_xform).affine_inverse();
Vector2 canvas_light_pos = p_canvas_transform.xform(l->xform.get_origin()); //convert light position to canvas coordinates, as all computation is done in canvas coords to avoid precision loss
state.light_uniforms[index].position[0] = canvas_light_pos.x;
state.light_uniforms[index].position[1] = canvas_light_pos.y;
_update_transform_2d_to_mat2x4(to_light_xform, state.light_uniforms[index].matrix);
_update_transform_2d_to_mat2x4(l->xform_cache.affine_inverse(), state.light_uniforms[index].shadow_matrix);
state.light_uniforms[index].height = l->height * (p_canvas_transform.columns[0].length() + p_canvas_transform.columns[1].length()) * 0.5; //approximate height conversion to the canvas size, since all calculations are done in canvas coords to avoid precision loss
for (int i = 0; i < 4; i++) {
state.light_uniforms[index].shadow_color[i] = uint8_t(CLAMP(int32_t(l->shadow_color[i] * 255.0), 0, 255));
state.light_uniforms[index].color[i] = l->color[i];
state.light_uniforms[index].color[3] = l->energy; //use alpha for energy, so base color can go separate
if (state.shadow_fb.is_valid()) {
state.light_uniforms[index].shadow_pixel_size = (1.0 / state.shadow_texture_size) * (1.0 + l->shadow_smooth);
state.light_uniforms[index].shadow_z_far_inv = 1.0 / clight->shadow.z_far;
state.light_uniforms[index].shadow_y_ofs = clight->shadow.y_offset;
} else {
state.light_uniforms[index].shadow_pixel_size = 1.0;
state.light_uniforms[index].shadow_z_far_inv = 1.0;
state.light_uniforms[index].shadow_y_ofs = 0;
state.light_uniforms[index].flags = l->blend_mode << LIGHT_FLAGS_BLEND_SHIFT;
state.light_uniforms[index].flags |= l->shadow_filter << LIGHT_FLAGS_FILTER_SHIFT;
if (clight->shadow.enabled) {
state.light_uniforms[index].flags |= LIGHT_FLAGS_HAS_SHADOW;
if (clight->texture.is_valid()) {
Rect2 atlas_rect = GLES3::TextureStorage::get_singleton()->texture_atlas_get_texture_rect(clight->texture);
state.light_uniforms[index].atlas_rect[0] = atlas_rect.position.x;
state.light_uniforms[index].atlas_rect[1] = atlas_rect.position.y;
state.light_uniforms[index].atlas_rect[2] = atlas_rect.size.width;
state.light_uniforms[index].atlas_rect[3] = atlas_rect.size.height;
} else {
state.light_uniforms[index].atlas_rect[0] = 0;
state.light_uniforms[index].atlas_rect[1] = 0;
state.light_uniforms[index].atlas_rect[2] = 0;
state.light_uniforms[index].atlas_rect[3] = 0;
l->render_index_cache = index;
l = l->next_ptr;
light_count = index;
if (light_count > 0) {
glBindBufferBase(GL_UNIFORM_BUFFER, LIGHT_UNIFORM_LOCATION, state.canvas_instance_data_buffers[state.current_buffer].light_ubo);
glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(LightUniform) * light_count, state.light_uniforms);
// On Desktop and mobile we map the memory without synchronizing for maximum speed.
void *ubo = glMapBufferRange(GL_UNIFORM_BUFFER, 0, sizeof(LightUniform) * light_count, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
memcpy(ubo, state.light_uniforms, sizeof(LightUniform) * light_count);
GLuint texture_atlas = texture_storage->texture_atlas_get_texture();
if (texture_atlas == 0) {
GLES3::Texture *tex = texture_storage->get_texture(texture_storage->texture_gl_get_default(GLES3::DEFAULT_GL_TEXTURE_WHITE));
texture_atlas = tex->tex_id;
glActiveTexture(GL_TEXTURE0 + GLES3::Config::get_singleton()->max_texture_image_units - 2);
glBindTexture(GL_TEXTURE_2D, texture_atlas);
//update canvas state uniform buffer
@ -175,13 +339,12 @@ void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_
state_buffer.screen_pixel_size[0] = 1.0 / render_target_size.x;
state_buffer.screen_pixel_size[1] = 1.0 / render_target_size.y;
// TODO: temporary, this should be set at the top of this function
glViewport(0, 0, render_target_size.x, render_target_size.y);
state_buffer.time = state.time;
state_buffer.use_pixel_snap = p_snap_2d_vertices_to_pixel;
state_buffer.directional_light_count = 0; //directional_light_count;
state_buffer.directional_light_count = directional_light_count;
Vector2 canvas_scale = p_canvas_transform.get_scale();
@ -200,7 +363,7 @@ void RasterizerCanvasGLES3::canvas_render_items(RID p_to_render_target, Item *p_
state_buffer.sdf_to_tex[3] = -sdf_tex_rect.position.y / sdf_tex_rect.size.height;
state_buffer.tex_to_sdf = 1.0 / ((canvas_scale.x + canvas_scale.y) * 0.5);
glBindBufferBase(GL_UNIFORM_BUFFER, BASE_UNIFORM_LOCATION, state.canvas_state_buffer);
glBindBufferBase(GL_UNIFORM_BUFFER, BASE_UNIFORM_LOCATION, state.canvas_instance_data_buffers[state.current_buffer].state_ubo);
glBufferData(GL_UNIFORM_BUFFER, sizeof(StateBuffer), &state_buffer, GL_STREAM_DRAW);
GLuint global_buffer = material_storage->global_shader_parameters_get_uniform_buffer();
@ -440,7 +603,9 @@ void RasterizerCanvasGLES3::_render_items(RID p_to_render_target, int p_item_cou
GLES3::CanvasMaterialData *material_data = state.canvas_instance_batches[i].material_data;
CanvasShaderGLES3::ShaderVariant variant = state.canvas_instance_batches[i].shader_variant;
_bind_material(material_data, variant);
uint64_t specialization = 0;
specialization |= uint64_t(state.canvas_instance_batches[i].lights_disabled);
_bind_material(material_data, variant, specialization);
GLES3::CanvasShaderData::BlendMode blend_mode = state.canvas_instance_batches[i].blend_mode;
@ -552,6 +717,38 @@ void RasterizerCanvasGLES3::_record_item_commands(const Item *p_item, const Tran
bool skipping = false;
// TODO: consider making lights a per-batch property and then baking light operations in the shader for better performance.
uint32_t lights[4] = { 0, 0, 0, 0 };
uint16_t light_count = 0;
Light *light = p_lights;
while (light) {
if (light->render_index_cache >= 0 && p_item->light_mask & light->item_mask && p_item->z_final >= light->z_min && p_item->z_final <= light->z_max && p_item->global_rect_cache.intersects_transformed(light->xform_cache, light->rect_cache)) {
uint32_t light_index = light->render_index_cache;
lights[light_count >> 2] |= light_index << ((light_count & 3) * 8);
if (light_count == data.max_lights_per_item) {
light = light->next_ptr;
base_flags |= light_count << FLAGS_LIGHT_COUNT_SHIFT;
bool lights_disabled = light_count == 0 && !state.using_directional_lights;
if (lights_disabled != state.canvas_instance_batches[state.current_batch_index].lights_disabled) {
_new_batch(r_batch_broken, r_index);
state.canvas_instance_batches[state.current_batch_index].lights_disabled = lights_disabled;
const Item::Command *c = p_item->commands;
while (c) {
if (skipping && c->type != Item::Command::TYPE_ANIMATION_SLICE) {
@ -578,6 +775,11 @@ void RasterizerCanvasGLES3::_record_item_commands(const Item *p_item, const Tran
state.instance_data_array[r_index].pad[0] = 0.0;
state.instance_data_array[r_index].pad[1] = 0.0;
state.instance_data_array[r_index].lights[0] = lights[0];
state.instance_data_array[r_index].lights[1] = lights[1];
state.instance_data_array[r_index].lights[2] = lights[2];
state.instance_data_array[r_index].lights[3] = lights[3];
state.instance_data_array[r_index].flags = base_flags | (state.instance_data_array[r_index == 0 ? 0 : r_index - 1].flags & (FLAGS_DEFAULT_NORMAL_MAP_USED | FLAGS_DEFAULT_SPECULAR_MAP_USED)); //reset on each command for sanity, keep canvastexture binding config
Color blend_color;
@ -1138,25 +1340,41 @@ void RasterizerCanvasGLES3::_new_batch(bool &r_batch_broken, uint32_t &r_index)
void RasterizerCanvasGLES3::_bind_material(GLES3::CanvasMaterialData *p_material_data, CanvasShaderGLES3::ShaderVariant p_variant) {
void RasterizerCanvasGLES3::_bind_material(GLES3::CanvasMaterialData *p_material_data, CanvasShaderGLES3::ShaderVariant p_variant, uint64_t p_specialization) {
if (p_material_data) {
if (p_material_data->shader_data->version.is_valid() && p_material_data->shader_data->valid) {
// Bind uniform buffer and textures
GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(p_material_data->shader_data->version, p_variant);
GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(p_material_data->shader_data->version, p_variant, p_specialization);
} else {
GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(data.canvas_shader_default_version, p_variant);
GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(data.canvas_shader_default_version, p_variant, p_specialization);
} else {
GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(data.canvas_shader_default_version, p_variant);
GLES3::MaterialStorage::get_singleton()->shaders.canvas_shader.version_bind_shader(data.canvas_shader_default_version, p_variant, p_specialization);
RID RasterizerCanvasGLES3::light_create() {
return RID();
CanvasLight canvas_light;
return canvas_light_owner.make_rid(canvas_light);
void RasterizerCanvasGLES3::light_set_texture(RID p_rid, RID p_texture) {
GLES3::TextureStorage *texture_storage = GLES3::TextureStorage::get_singleton();
CanvasLight *cl = canvas_light_owner.get_or_null(p_rid);
if (cl->texture == p_texture) {
if (cl->texture.is_valid()) {
cl->texture = p_texture;
if (cl->texture.is_valid()) {
void RasterizerCanvasGLES3::light_set_use_shadow(RID p_rid, bool p_enable) {
@ -1185,6 +1403,14 @@ void RasterizerCanvasGLES3::set_shadow_texture_size(int p_size) {
bool RasterizerCanvasGLES3::free(RID p_rid) {
if (canvas_light_owner.owns(p_rid)) {
CanvasLight *cl = canvas_light_owner.get_or_null(p_rid);
ERR_FAIL_COND_V(!cl, false);
} else {
return false;
return true;
@ -1355,7 +1581,7 @@ void RasterizerCanvasGLES3::_prepare_canvas_texture(RID p_texture, RS::CanvasIte
state.instance_data_array[r_index].flags &= ~FLAGS_DEFAULT_SPECULAR_MAP_USED;
if (!normal_map) {
if (normal_map) {
state.instance_data_array[r_index].flags |= FLAGS_DEFAULT_NORMAL_MAP_USED;
} else {
state.instance_data_array[r_index].flags &= ~FLAGS_DEFAULT_NORMAL_MAP_USED;
@ -1565,13 +1791,23 @@ void RasterizerCanvasGLES3::free_polygon(PolygonID p_polygon) {
// In theory allocations can reach as high as number of windows * 3 frames
// because OpenGL can start rendering subsequent frames before finishing the current one
void RasterizerCanvasGLES3::_allocate_instance_data_buffer() {
GLuint new_buffer;
glGenBuffers(1, &new_buffer);
glBindBuffer(GL_UNIFORM_BUFFER, new_buffer);
glBufferData(GL_UNIFORM_BUFFER, data.max_instance_buffer_size, nullptr, GL_DYNAMIC_DRAW);
GLuint new_buffers[3];
glGenBuffers(3, new_buffers);
// Batch UBO.
glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[0]);
glBufferData(GL_UNIFORM_BUFFER, data.max_instance_buffer_size, nullptr, GL_STREAM_DRAW);
// Light uniform buffer.
glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[1]);
glBufferData(GL_UNIFORM_BUFFER, sizeof(LightUniform) * data.max_lights_per_render, nullptr, GL_STREAM_DRAW);
// State buffer.
glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[2]);
glBufferData(GL_UNIFORM_BUFFER, sizeof(StateBuffer), nullptr, GL_STREAM_DRAW);
state.current_buffer = (state.current_buffer + 1);
DataBuffer db;
db.ubo = new_buffer;
db.ubo = new_buffers[0];
db.light_ubo = new_buffers[1];
db.state_ubo = new_buffers[2];
db.last_frame_used = RSG::rasterizer->get_frame_number();
state.canvas_instance_data_buffers.insert(state.current_buffer, db);
state.current_buffer = state.current_buffer % state.canvas_instance_data_buffers.size();
@ -1753,12 +1989,21 @@ RasterizerCanvasGLES3::RasterizerCanvasGLES3() {
for (int i = 0; i < 3; i++) {
GLuint new_buffer;
glGenBuffers(1, &new_buffer);
glBindBuffer(GL_UNIFORM_BUFFER, new_buffer);
glBufferData(GL_UNIFORM_BUFFER, data.max_instance_buffer_size, nullptr, GL_DYNAMIC_DRAW);
GLuint new_buffers[3];
glGenBuffers(3, new_buffers);
// Batch UBO.
glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[0]);
glBufferData(GL_UNIFORM_BUFFER, data.max_instance_buffer_size, nullptr, GL_STREAM_DRAW);
// Light uniform buffer.
glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[1]);
glBufferData(GL_UNIFORM_BUFFER, sizeof(LightUniform) * data.max_lights_per_render, nullptr, GL_STREAM_DRAW);
// State buffer.
glBindBuffer(GL_UNIFORM_BUFFER, new_buffers[2]);
glBufferData(GL_UNIFORM_BUFFER, sizeof(StateBuffer), nullptr, GL_STREAM_DRAW);
DataBuffer db;
db.ubo = new_buffer;
db.ubo = new_buffers[0];
db.light_ubo = new_buffers[1];
db.state_ubo = new_buffers[2];
db.last_frame_used = 0;
db.fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
state.canvas_instance_data_buffers[i] = db;
@ -1766,6 +2011,7 @@ RasterizerCanvasGLES3::RasterizerCanvasGLES3() {
glBindBuffer(GL_UNIFORM_BUFFER, 0);
state.instance_data_array = memnew_arr(InstanceData, data.max_instances_per_ubo);
state.light_uniforms = memnew_arr(LightUniform, data.max_lights_per_render);
const uint32_t no_of_instances = data.max_instances_per_batch;
@ -1792,14 +2038,9 @@ RasterizerCanvasGLES3::RasterizerCanvasGLES3() {
delete[] indices;
glGenBuffers(1, &state.canvas_state_buffer);
glBindBuffer(GL_UNIFORM_BUFFER, state.canvas_state_buffer);
glBufferData(GL_UNIFORM_BUFFER, sizeof(StateBuffer), nullptr, GL_STREAM_DRAW);
glBindBuffer(GL_UNIFORM_BUFFER, 0);
String global_defines;
global_defines += "#define MAX_GLOBAL_SHADER_UNIFORMS 256\n"; // TODO: this is arbitrary for now
global_defines += "#define MAX_LIGHTS " + itos(data.max_instances_per_batch) + "\n";
global_defines += "#define MAX_LIGHTS " + itos(data.max_lights_per_render) + "\n";
global_defines += "#define MAX_DRAW_DATA_INSTANCES " + itos(data.max_instances_per_batch) + "\n";
@ -1852,7 +2093,8 @@ RasterizerCanvasGLES3::~RasterizerCanvasGLES3() {
glDeleteVertexArrays(1, &data.canvas_quad_array);
#endif // GLES3_ENABLED
@ -96,6 +96,33 @@ class RasterizerCanvasGLES3 : public RendererCanvasRender {
/**** LIGHTING ****/
struct CanvasLight {
RID texture;
RID_Owner<CanvasLight> canvas_light_owner;
struct LightUniform {
float matrix[8]; //light to texture coordinate matrix
float shadow_matrix[8]; //light to shadow coordinate matrix
float color[4];
uint8_t shadow_color[4];
uint32_t flags; //index to light texture
float shadow_pixel_size;
float height;
float position[2];
float shadow_z_far_inv;
float shadow_y_ofs;
float atlas_rect[4];
enum {
@ -184,8 +211,8 @@ public:
RID canvas_shader_default_version;
uint32_t max_lights_per_render;
uint32_t max_lights_per_item;
uint32_t max_lights_per_render = 256;
uint32_t max_lights_per_item = 16;
uint32_t max_instances_per_batch = 512;
uint32_t max_instances_per_ubo = 16384;
uint32_t max_instance_buffer_size = 16384 * 128;
@ -212,16 +239,22 @@ public:
const Item::Command *command = nullptr;
Item::Command::Type command_type = Item::Command::TYPE_ANIMATION_SLICE; // Can default to any type that doesn't form a batch.
uint32_t primitive_points = 0;
bool lights_disabled = false;
// DataBuffer contains our per-frame data. I.e. the resources that are updated each frame.
// We track them and ensure that they don't get reused until at least 2 frames have passed
// to avoid the GPU stalling to wait for a resource to become available.
struct DataBuffer {
GLuint ubo = 0;
GLuint light_ubo = 0;
GLuint state_ubo = 0;
uint64_t last_frame_used = -3;
GLsync fence = GLsync();
struct State {
GLuint canvas_state_buffer;
LocalVector<DataBuffer> canvas_instance_data_buffers;
LocalVector<Batch> canvas_instance_batches;
uint32_t current_buffer = 0;
@ -230,6 +263,10 @@ public:
InstanceData *instance_data_array = nullptr;
LightUniform *light_uniforms = nullptr;
bool using_directional_lights = false;
RID current_tex = RID();
RS::CanvasItemTextureFilter current_filter_mode = RS::CANVAS_ITEM_TEXTURE_FILTER_MAX;
RS::CanvasItemTextureRepeat current_repeat_mode = RS::CANVAS_ITEM_TEXTURE_REPEAT_MAX;
@ -282,7 +319,7 @@ public:
void _render_items(RID p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, uint32_t &r_last_index, bool p_to_backbuffer = false);
void _record_item_commands(const Item *p_item, const Transform2D &p_canvas_transform_inverse, Item *¤t_clip, GLES3::CanvasShaderData::BlendMode p_blend_mode, Light *p_lights, uint32_t &r_index, bool &r_break_batch);
void _render_batch(Light *p_lights, uint32_t p_index);
void _bind_material(GLES3::CanvasMaterialData *p_material_data, CanvasShaderGLES3::ShaderVariant p_variant);
void _bind_material(GLES3::CanvasMaterialData *p_material_data, CanvasShaderGLES3::ShaderVariant p_variant, uint64_t p_specialization);
void _new_batch(bool &r_batch_broken, uint32_t &r_index);
void _add_to_batch(uint32_t &r_index, bool &r_batch_broken);
void _allocate_instance_data_buffer();
@ -211,7 +211,7 @@ void main() {
#include "canvas_uniforms_inc.glsl"
#include "stdlib_inc.glsl"
//uniform sampler2D atlas_texture; //texunit:-2
uniform sampler2D atlas_texture; //texunit:-2
//uniform sampler2D shadow_atlas_texture; //texunit:-3
uniform sampler2D screen_texture; //texunit:-4
uniform sampler2D sdf_texture; //texunit:-5
@ -243,6 +243,77 @@ layout(std140) uniform MaterialUniforms{
vec4 light_compute(
vec3 light_vertex,
vec3 light_position,
vec3 normal,
vec4 light_color,
float light_energy,
vec4 specular_shininess,
inout vec4 shadow_modulate,
vec2 screen_uv,
vec2 uv,
vec4 color, bool is_directional) {
vec4 light = vec4(0.0);
vec3 light_direction = vec3(0.0);
if (is_directional) {
light_direction = normalize(mix(vec3(light_position.xy, 0.0), vec3(0, 0, 1), light_position.z));
light_position = vec3(0.0);
} else {
light_direction = normalize(light_position - light_vertex);
return light;
vec3 light_normal_compute(vec3 light_vec, vec3 normal, vec3 base_color, vec3 light_color, vec4 specular_shininess, bool specular_shininess_used) {
float cNdotL = max(0.0, dot(normal, light_vec));
if (specular_shininess_used) {
vec3 view = vec3(0.0, 0.0, 1.0); // not great but good enough
vec3 half_vec = normalize(view + light_vec);
float cNdotV = max(dot(normal, view), 0.0);
float cNdotH = max(dot(normal, half_vec), 0.0);
float cVdotH = max(dot(view, half_vec), 0.0);
float cLdotH = max(dot(light_vec, half_vec), 0.0);
float shininess = exp2(15.0 * specular_shininess.a + 1.0) * 0.25;
float blinn = pow(cNdotH, shininess);
blinn *= (shininess + 8.0) * (1.0 / (8.0 * M_PI));
float s = (blinn) / max(4.0 * cNdotV * cNdotL, 0.75);
return specular_shininess.rgb * light_color * s + light_color * base_color * cNdotL;
} else {
return light_color * base_color * cNdotL;
void light_blend_compute(uint light_base, vec4 light_color, inout vec3 color) {
uint blend_mode = light_array[light_base].flags & LIGHT_FLAGS_BLEND_MASK;
switch (blend_mode) {
color.rgb += light_color.rgb * light_color.a;
} break;
color.rgb -= light_color.rgb * light_color.a;
} break;
color.rgb = mix(color.rgb, light_color.rgb, light_color.a);
} break;
@ -353,7 +424,8 @@ void main() {
color *= texture(color_texture, uv);
bool using_light = false;
uint light_count = (draw_data[draw_data_instance].flags >> uint(FLAGS_LIGHT_COUNT_SHIFT)) & uint(0xF); //max 16 lights
bool using_light = light_count > 0u || directional_light_count > 0u;
vec3 normal;
@ -414,11 +486,105 @@ void main() {
if (normal_used) {
//convert by item transform
normal.xy = mat2(normalize(draw_data[draw_data_instance].world_x), normalize(draw_data[draw_data_instance].world_y)) * normal.xy;
//convert by canvas transform
normal = normalize((canvas_normal_transform * vec4(normal, 0.0)).xyz);
vec4 base_color = color;
color = vec4(0.0);
color *= canvas_modulation;
#if !defined(DISABLE_LIGHTING) && !defined(MODE_UNSHADED)
// Directional Lights
for (uint i = 0u; i < directional_light_count; i++) {
uint light_base = i;
vec2 direction = light_array[light_base].position;
vec4 light_color = light_array[light_base].color;
vec4 shadow_modulate = vec4(1.0);
light_color = light_compute(light_vertex, vec3(direction, light_array[light_base].height), normal, light_color, light_color.a, specular_shininess, shadow_modulate, screen_uv, uv, base_color, true);
if (normal_used) {
vec3 light_vec = normalize(mix(vec3(direction, 0.0), vec3(0, 0, 1), light_array[light_base].height));
light_color.rgb = light_normal_compute(light_vec, normal, base_color.rgb, light_color.rgb, specular_shininess, specular_shininess_used);
} else {
light_color.rgb *= base_color.rgb;
light_blend_compute(light_base, light_color, color.rgb);
// Positional Lights
for (uint i = 0u; i < MAX_LIGHTS_PER_ITEM; i++) {
if (i >= light_count) {
uint light_base;
if (i < 8u) {
if (i < 4u) {
light_base = draw_data[draw_data_instance].lights[0];
} else {
light_base = draw_data[draw_data_instance].lights[1];
} else {
if (i < 12u) {
light_base = draw_data[draw_data_instance].lights[2];
} else {
light_base = draw_data[draw_data_instance].lights[3];
light_base >>= (i & 3u) * 8u;
light_base &= uint(0xFF);
vec2 tex_uv = (vec4(vertex, 0.0, 1.0) * mat4(light_array[light_base].texture_matrix[0], light_array[light_base].texture_matrix[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))).xy; //multiply inverse given its transposed. Optimizer removes useless operations.
vec2 tex_uv_atlas = tex_uv * light_array[light_base] + light_array[light_base].atlas_rect.xy;
vec4 light_color = textureLod(atlas_texture, tex_uv_atlas, 0.0);
vec4 light_base_color = light_array[light_base].color;
vec4 shadow_modulate = vec4(1.0);
vec3 light_position = vec3(light_array[light_base].position, light_array[light_base].height);
light_color.rgb *= light_base_color.rgb;
light_color = light_compute(light_vertex, light_position, normal, light_color, light_base_color.a, specular_shininess, shadow_modulate, screen_uv, uv, base_color, false);
light_color.rgb *= light_base_color.rgb * light_base_color.a;
if (normal_used) {
vec3 light_pos = vec3(light_array[light_base].position, light_array[light_base].height);
vec3 pos = light_vertex;
vec3 light_vec = normalize(light_pos - pos);
light_color.rgb = light_normal_compute(light_vec, normal, base_color.rgb, light_color.rgb, specular_shininess, specular_shininess_used);
} else {
light_color.rgb *= base_color.rgb;
if (any(lessThan(tex_uv, vec2(0.0, 0.0))) || any(greaterThanEqual(tex_uv, vec2(1.0, 1.0)))) {
//if outside the light texture, light color is zero
light_color.a = 0.0;
light_blend_compute(light_base, light_color, color.rgb);
frag_color = color;
@ -94,6 +94,27 @@ layout(std140) uniform CanvasData { //ubo:0
#define LIGHT_FLAGS_SHADOW_PCF5 uint(1 << 22)
#define LIGHT_FLAGS_SHADOW_PCF13 uint(2 << 22)
struct Light {
mat2x4 texture_matrix; //light to texture coordinate matrix (transposed)
mat2x4 shadow_matrix; //light to shadow coordinate matrix (transposed)
vec4 color;
uint shadow_color; // packed
uint flags; //index to light texture
float shadow_pixel_size;
float height;
vec2 position;
float shadow_zfar_inv;
float shadow_y_ofs;
vec4 atlas_rect;
layout(std140) uniform LightData { //ubo:2
Light light_array[MAX_LIGHTS];
layout(std140) uniform DrawDataInstances { //ubo:3
DrawData draw_data[MAX_DRAW_DATA_INSTANCES];
@ -2,7 +2,7 @@
mode_default = #define MODE_SIMPLE_COPY
mode_copy_section = #define USE_COPY_SECTION
mode_copy_section = #define USE_COPY_SECTION \n#define MODE_SIMPLE_COPY
mode_gaussian_blur = #define MODE_GAUSSIAN_BLUR
mode_mipmap = #define MODE_MIPMAP
mode_simple_color = #define MODE_SIMPLE_COLOR \n#define USE_COPY_SECTION
@ -25,8 +25,7 @@ void main() {
gl_Position = vec4(vertex_attrib, 1.0, 1.0);
gl_Position.xy = (copy_section.xy + (uv_interp.xy * 0.5 + 0.5) * * 2.0 - 1.0;
uv_interp = copy_section.xy + uv_interp *;
gl_Position.xy = (copy_section.xy + uv_interp.xy * * 2.0 - 1.0;
@ -197,6 +197,22 @@ TextureStorage::TextureStorage() {
glBindTexture(GL_TEXTURE_2D, 0);
{ // Atlas Texture initialize.
uint8_t pixel_data[4 * 4 * 4];
for (int i = 0; i < 16; i++) {
pixel_data[i * 4 + 0] = 0;
pixel_data[i * 4 + 1] = 0;
pixel_data[i * 4 + 2] = 0;
pixel_data[i * 4 + 3] = 255;
glGenTextures(1, &texture_atlas.texture);
glBindTexture(GL_TEXTURE_2D, texture_atlas.texture);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 4, 4, 0, GL_RGBA, GL_UNSIGNED_BYTE, pixel_data);
glBindTexture(GL_TEXTURE_2D, 0);
@ -207,6 +223,11 @@ TextureStorage::~TextureStorage() {
for (int i = 0; i < DEFAULT_GL_TEXTURE_MAX; i++) {
glDeleteTextures(1, &texture_atlas.texture);
texture_atlas.texture = 0;
glDeleteFramebuffers(1, &texture_atlas.framebuffer);
texture_atlas.framebuffer = 0;
//TODO, move back to storage
@ -653,7 +674,7 @@ void TextureStorage::texture_free(RID p_texture) {
for (int i = 0; i < t->proxies.size(); i++) {
Texture *p = texture_owner.get_or_null(t->proxies[i]);
@ -875,7 +896,7 @@ void TextureStorage::texture_replace(RID p_texture, RID p_by_texture) {
//delete last, so proxies can be updated
void TextureStorage::texture_set_size_override(RID p_texture, int p_width, int p_height) {
@ -1143,6 +1164,217 @@ RID TextureStorage::texture_create_radiance_cubemap(RID p_source, int p_resoluti
return RID();
void TextureStorage::texture_add_to_texture_atlas(RID p_texture) {
if (!texture_atlas.textures.has(p_texture)) {
TextureAtlas::Texture t;
t.users = 1;
texture_atlas.textures[p_texture] = t;
texture_atlas.dirty = true;
} else {
TextureAtlas::Texture *t = texture_atlas.textures.getptr(p_texture);
void TextureStorage::texture_remove_from_texture_atlas(RID p_texture) {
TextureAtlas::Texture *t = texture_atlas.textures.getptr(p_texture);
if (t->users == 0) {
// Do not mark it dirty, there is no need to since it remains working.
void TextureStorage::texture_atlas_mark_dirty_on_texture(RID p_texture) {
if (texture_atlas.textures.has(p_texture)) {
texture_atlas.dirty = true; // Mark it dirty since it was most likely modified.
void TextureStorage::texture_atlas_remove_texture(RID p_texture) {
if (texture_atlas.textures.has(p_texture)) {
// There is not much a point of making it dirty, texture can be removed next time the atlas is updated.
GLuint TextureStorage::texture_atlas_get_texture() const {
return texture_atlas.texture;
void TextureStorage::update_texture_atlas() {
CopyEffects *copy_effects = CopyEffects::get_singleton();
if (!texture_atlas.dirty) {
return; //nothing to do
texture_atlas.dirty = false;
if (texture_atlas.texture != 0) {
glDeleteTextures(1, &texture_atlas.texture);
texture_atlas.texture = 0;
glDeleteFramebuffers(1, &texture_atlas.framebuffer);
texture_atlas.framebuffer = 0;
const int border = 2;
if (texture_atlas.textures.size()) {
//generate atlas
Vector<TextureAtlas::SortItem> itemsv;
int base_size = 8;
int idx = 0;
for (const KeyValue<RID, TextureAtlas::Texture> &E : texture_atlas.textures) {
TextureAtlas::SortItem &si = itemsv.write[idx];
Texture *src_tex = get_texture(E.key);
si.size.width = (src_tex->width / border) + 1;
si.size.height = (src_tex->height / border) + 1;
si.pixel_size = Size2i(src_tex->width, src_tex->height);
if (base_size < si.size.width) {
base_size = nearest_power_of_2_templated(si.size.width);
si.texture = E.key;
//sort items by size
//attempt to create atlas
int item_count = itemsv.size();
TextureAtlas::SortItem *items = itemsv.ptrw();
int atlas_height = 0;
while (true) {
Vector<int> v_offsetsv;
int *v_offsets = v_offsetsv.ptrw();
memset(v_offsets, 0, sizeof(int) * base_size);
int max_height = 0;
for (int i = 0; i < item_count; i++) {
//best fit
TextureAtlas::SortItem &si = items[i];
int best_idx = -1;
int best_height = 0x7FFFFFFF;
for (int j = 0; j <= base_size - si.size.width; j++) {
int height = 0;
for (int k = 0; k < si.size.width; k++) {
int h = v_offsets[k + j];
if (h > height) {
height = h;
if (height > best_height) {
break; //already bad
if (height < best_height) {
best_height = height;
best_idx = j;
for (int k = 0; k < si.size.width; k++) {
v_offsets[k + best_idx] = best_height + si.size.height;
si.pos.x = best_idx;
si.pos.y = best_height;
if (si.pos.y + si.size.height > max_height) {
max_height = si.pos.y + si.size.height;
if (max_height <= base_size * 2) {
atlas_height = max_height;
break; //good ratio, break;
base_size *= 2;
texture_atlas.size.width = base_size * border;
texture_atlas.size.height = nearest_power_of_2_templated(atlas_height * border);
for (int i = 0; i < item_count; i++) {
TextureAtlas::Texture *t = texture_atlas.textures.getptr(items[i].texture);
t->uv_rect.position = items[i].pos * border + Vector2i(border / 2, border / 2);
t->uv_rect.size = items[i].pixel_size;
t->uv_rect.position /= Size2(texture_atlas.size);
t->uv_rect.size /= Size2(texture_atlas.size);
} else {
texture_atlas.size.width = 4;
texture_atlas.size.height = 4;
{ // Atlas Texture initialize.
// TODO validate texture atlas size with maximum texture size
glGenTextures(1, &texture_atlas.texture);
glBindTexture(GL_TEXTURE_2D, texture_atlas.texture);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, texture_atlas.size.width, texture_atlas.size.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
glGenFramebuffers(1, &texture_atlas.framebuffer);
glBindFramebuffer(GL_FRAMEBUFFER, texture_atlas.framebuffer);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture_atlas.texture, 0);
GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
glDeleteFramebuffers(1, &texture_atlas.framebuffer);
texture_atlas.framebuffer = 0;
glDeleteTextures(1, &texture_atlas.texture);
texture_atlas.texture = 0;
WARN_PRINT("Could not create texture atlas, status: " + get_framebuffer_error(status));
glViewport(0, 0, texture_atlas.size.width, texture_atlas.size.height);
glClearColor(0.0, 0.0, 0.0, 0.0);
glBindTexture(GL_TEXTURE_2D, 0);
if (texture_atlas.textures.size()) {
for (const KeyValue<RID, TextureAtlas::Texture> &E : texture_atlas.textures) {
TextureAtlas::Texture *t = texture_atlas.textures.getptr(E.key);
Texture *src_tex = get_texture(E.key);
glBindTexture(GL_TEXTURE_2D, src_tex->tex_id);
glBindFramebuffer(GL_FRAMEBUFFER, 0);
RID TextureStorage::decal_allocate() {
@ -371,6 +371,38 @@ private:
Ref<Image> _get_gl_image_and_format(const Ref<Image> &p_image, Image::Format p_format, Image::Format &r_real_format, GLenum &r_gl_format, GLenum &r_gl_internal_format, GLenum &r_gl_type, bool &r_compressed, bool p_force_decompress) const;
struct TextureAtlas {
struct Texture {
int users;
Rect2 uv_rect;
struct SortItem {
RID texture;
Size2i pixel_size;
Size2i size;
Point2i pos;
bool operator<(const SortItem &p_item) const {
//sort larger to smaller
if (size.height == p_item.size.height) {
return size.width > p_item.size.width;
} else {
return size.height > p_item.size.height;
HashMap<RID, Texture> textures;
bool dirty = true;
GLuint texture = 0;
GLuint framebuffer = 0;
Size2i size;
} texture_atlas;
/* Render Target API */
mutable RID_Owner<RenderTarget> render_target_owner;
@ -473,6 +505,25 @@ public:
void texture_bind(RID p_texture, uint32_t p_texture_no);
RID texture_create_radiance_cubemap(RID p_source, int p_resolution = -1) const;
void update_texture_atlas();
GLuint texture_atlas_get_texture() const;
_FORCE_INLINE_ Rect2 texture_atlas_get_texture_rect(RID p_texture) {
TextureAtlas::Texture *t = texture_atlas.textures.getptr(p_texture);
if (!t) {
return Rect2();
return t->uv_rect;
void texture_add_to_texture_atlas(RID p_texture);
void texture_remove_from_texture_atlas(RID p_texture);
void texture_atlas_mark_dirty_on_texture(RID p_texture);
void texture_atlas_remove_texture(RID p_texture);
virtual RID decal_allocate() override;
@ -302,6 +302,7 @@ void Utilities::update_dirty_resources() {
void Utilities::set_debug_generate_wireframes(bool p_generate) {
@ -598,13 +598,11 @@ void main() {
normal = normalize((canvas_data.canvas_normal_transform * vec4(normal, 0.0)).xyz);
vec3 base_color = color.rgb;
vec4 base_color = color;
if (bool(draw_data.flags & FLAGS_USING_LIGHT_MASK)) {
color = vec4(0.0); //invisible by default due to using light mask
vec4 original_color = color;
color = vec4(0.0);
#elif !defined(MODE_UNSHADED)
@ -624,12 +622,14 @@ void main() {
vec4 shadow_modulate = vec4(1.0);
light_color = light_compute(light_vertex, vec3(direction,[light_base].height), normal, light_color, light_color.a, specular_shininess, shadow_modulate, screen_uv, uv, color, true);
light_color = light_compute(light_vertex, vec3(direction,[light_base].height), normal, light_color, light_color.a, specular_shininess, shadow_modulate, screen_uv, uv, base_color, true);
if (normal_used) {
vec3 light_vec = normalize(mix(vec3(direction, 0.0), vec3(0, 0, 1),[light_base].height));
light_color.rgb = light_normal_compute(light_vec, normal, base_color, light_color.rgb, specular_shininess, specular_shininess_used);
light_color.rgb = light_normal_compute(light_vec, normal, base_color.rgb, light_color.rgb, specular_shininess, specular_shininess_used);
} else {
light_color.rgb *= base_color.rgb;
@ -646,8 +646,6 @@ void main() {
light_color.rgb *= original_color.rgb;
light_blend_compute(light_base, light_color, color.rgb);
@ -685,7 +683,7 @@ void main() {
vec3 light_position = vec3([light_base].position,[light_base].height);
light_color.rgb *= light_base_color.rgb;
light_color = light_compute(light_vertex, light_position, normal, light_color, light_base_color.a, specular_shininess, shadow_modulate, screen_uv, uv, color, false);
light_color = light_compute(light_vertex, light_position, normal, light_color, light_base_color.a, specular_shininess, shadow_modulate, screen_uv, uv, base_color, false);
light_color.rgb *= light_base_color.rgb * light_base_color.a;
@ -695,7 +693,9 @@ void main() {
vec3 pos = light_vertex;
vec3 light_vec = normalize(light_pos - pos);
light_color.rgb = light_normal_compute(light_vec, normal, base_color, light_color.rgb, specular_shininess, specular_shininess_used);
light_color.rgb = light_normal_compute(light_vec, normal, base_color.rgb, light_color.rgb, specular_shininess, specular_shininess_used);
} else {
light_color.rgb *= base_color.rgb;
if (any(lessThan(tex_uv, vec2(0.0, 0.0))) || any(greaterThanEqual(tex_uv, vec2(1.0, 1.0)))) {
@ -743,8 +743,6 @@ void main() {
light_color.rgb *= original_color.rgb;
light_blend_compute(light_base, light_color, color.rgb);
Reference in New Issue
Block a user