diff --git a/core/core_bind.cpp b/core/core_bind.cpp index 8fa7aad0acf..8afca5a2df4 100644 --- a/core/core_bind.cpp +++ b/core/core_bind.cpp @@ -1178,14 +1178,30 @@ void Thread::_start_func(void *ud) { String func_name = t->target_callable.is_custom() ? t->target_callable.get_custom()->get_as_text() : String(t->target_callable.get_method()); ::Thread::set_name(func_name); + // To avoid a circular reference between the thread and the script which can possibly contain a reference + // to the thread, we will do the call (keeping a reference up to that point) and then break chains with it. + // When the call returns, we will reference the thread again if possible. + ObjectID th_instance_id = t->get_instance_id(); + Callable target_callable = t->target_callable; + t = Ref(); + Callable::CallError ce; - t->target_callable.callp(nullptr, 0, t->ret, ce); - if (ce.error != Callable::CallError::CALL_OK) { + Variant ret; + target_callable.callp(nullptr, 0, ret, ce); + // If script properly kept a reference to the thread, we should be able to re-reference it now + // (well, or if the call failed, since we had to break chains anyway because the outcome isn't known upfront). + t = Ref(ObjectDB::get_instance(th_instance_id)); + if (t.is_valid()) { + t->ret = ret; t->running.clear(); - ERR_FAIL_MSG("Could not call function '" + func_name + "' to start thread " + t->get_id() + ": " + Variant::get_callable_error_text(t->target_callable, nullptr, 0, ce) + "."); + } else { + // We could print a warning here, but the Thread object will be eventually destroyed + // noticing wait_to_finish() hasn't been called on it, and it will print a warning itself. } - t->running.clear(); + if (ce.error != Callable::CallError::CALL_OK) { + ERR_FAIL_MSG("Could not call function '" + func_name + "' to start thread " + t->get_id() + ": " + Variant::get_callable_error_text(t->target_callable, nullptr, 0, ce) + "."); + } } Error Thread::start(const Callable &p_callable, Priority p_priority) { diff --git a/core/os/semaphore.h b/core/os/semaphore.h index a992a4587d3..66dfb3ee021 100644 --- a/core/os/semaphore.h +++ b/core/os/semaphore.h @@ -33,6 +33,9 @@ #include "core/error/error_list.h" #include "core/typedefs.h" +#ifdef DEBUG_ENABLED +#include "core/error/error_macros.h" +#endif #include #include @@ -42,6 +45,9 @@ private: mutable std::mutex mutex; mutable std::condition_variable condition; mutable uint32_t count = 0; // Initialized as locked. +#ifdef DEBUG_ENABLED + mutable uint32_t awaiters = 0; +#endif public: _ALWAYS_INLINE_ void post() const { @@ -52,10 +58,16 @@ public: _ALWAYS_INLINE_ void wait() const { std::unique_lock lock(mutex); +#ifdef DEBUG_ENABLED + ++awaiters; +#endif while (!count) { // Handle spurious wake-ups. condition.wait(lock); } - count--; + --count; +#ifdef DEBUG_ENABLED + --awaiters; +#endif } _ALWAYS_INLINE_ bool try_wait() const { @@ -67,6 +79,47 @@ public: return false; } } + +#ifdef DEBUG_ENABLED + ~Semaphore() { + // Destroying an std::condition_variable when not all threads waiting on it have been notified + // invokes undefined behavior (e.g., it may be nicely destroyed or it may be awaited forever.) + // That means other threads could still be running the body of std::condition_variable::wait() + // but already past the safety checkpoint. That's the case for instance if that function is already + // waiting to lock again. + // + // We will make the rule a bit more restrictive and simpler to understand at the same time: there + // should not be any threads at any stage of the waiting by the time the semaphore is destroyed. + // + // We do so because of the following reasons: + // - We have the guideline that threads must be awaited (i.e., completed), so the waiting thread + // must be completely done by the time the thread controlling it finally destroys the semaphore. + // Therefore, only a coding mistake could make the program run into such a attempt at premature + // destruction of the semaphore. + // - In scripting, given that Semaphores are wrapped by RefCounted classes, in general it can't + // happen that a thread is trying to destroy a Semaphore while another is still doing whatever with + // it, so the simplification is mostly transparent to script writers. + // - The redefined rule can be checked for failure to meet it, which is what this implementation does. + // This is useful to detect a few cases of potential misuse; namely: + // a) In scripting: + // * The coder is naughtily dealing with the reference count causing a semaphore to die prematurely. + // * The coder is letting the project reach its termination without having cleanly finished threads + // that await on semaphores (or at least, let the usual semaphore-controlled loop exit). + // b) In the native side, where Semaphore is not a ref-counted beast and certain coding mistakes can + // lead to its premature destruction as well. + // + // Let's let users know they are doing it wrong, but apply a, somewhat hacky, countermeasure against UB + // in debug builds. + std::lock_guard lock(mutex); + if (awaiters) { + WARN_PRINT( + "A Semaphore object is being destroyed while one or more threads are still waiting on it.\n" + "Please call post() on it as necessary to prevent such a situation and so ensure correct cleanup."); + // And now, the hacky countermeasure (i.e., leak the condition variable). + new (&condition) std::condition_variable(); + } + } +#endif }; #endif // SEMAPHORE_H diff --git a/core/os/thread.cpp b/core/os/thread.cpp index c067ad1a6ac..03e2c5409d9 100644 --- a/core/os/thread.cpp +++ b/core/os/thread.cpp @@ -101,7 +101,9 @@ Thread::Thread() { Thread::~Thread() { if (id != UNASSIGNED_ID) { #ifdef DEBUG_ENABLED - WARN_PRINT("A Thread object has been destroyed without wait_to_finish() having been called on it. Please do so to ensure correct cleanup of the thread."); + WARN_PRINT( + "A Thread object is being destroyed without its completion having been realized.\n" + "Please call wait_to_finish() on it to ensure correct cleanup."); #endif thread.detach(); } diff --git a/doc/classes/Mutex.xml b/doc/classes/Mutex.xml index 78202229ed4..ab1fad96246 100644 --- a/doc/classes/Mutex.xml +++ b/doc/classes/Mutex.xml @@ -5,6 +5,11 @@ A synchronization mutex (mutual exclusion). This is used to synchronize multiple [Thread]s, and is equivalent to a binary [Semaphore]. It guarantees that only one thread can ever acquire the lock at a time. A mutex can be used to protect a critical section; however, be careful to avoid deadlocks. + It's of the recursive kind, so it can be locked multiple times by one thread, provided it also unlocks it as many times. + [b]Warning:[/b] + To guarantee that the operating system is able to perform proper cleanup (no crashes, no deadlocks), these conditions must be met: + - By the time a [Mutex]'s reference count reaches zero and therefore it is destroyed, no threads (including the one on which the destruction will happen) must have it locked. + - By the time a [Thread]'s reference count reaches zero and therefore it is destroyed, it must not have any mutex locked. $DOCS_URL/tutorials/performance/using_multiple_threads.html @@ -29,6 +34,7 @@ Unlocks this [Mutex], leaving it to other threads. [b]Note:[/b] If a thread called [method lock] or [method try_lock] multiple times while already having ownership of the mutex, it must also call [method unlock] the same number of times in order to unlock it correctly. + [b]Warning:[/b] Calling [method unlock] more times that [method lock] on a given thread, thus ending up trying to unlock a non-locked mutex, is wrong and may causes crashes or deadlocks. diff --git a/doc/classes/Semaphore.xml b/doc/classes/Semaphore.xml index c95917b7bb4..32eb69fe8cf 100644 --- a/doc/classes/Semaphore.xml +++ b/doc/classes/Semaphore.xml @@ -5,6 +5,10 @@ A synchronization semaphore which can be used to synchronize multiple [Thread]s. Initialized to zero on creation. Be careful to avoid deadlocks. For a binary version, see [Mutex]. + [b]Warning:[/b] + To guarantee that the operating system is able to perform proper cleanup (no crashes, no deadlocks), these conditions must be met: + - By the time a [Semaphore]'s reference count reaches zero and therefore it is destroyed, no threads must be waiting on it. + - By the time a [Thread]'s reference count reaches zero and therefore it is destroyed, it must not be waiting on any semaphore. $DOCS_URL/tutorials/performance/using_multiple_threads.html diff --git a/doc/classes/Thread.xml b/doc/classes/Thread.xml index 2b4ef65f0c2..fcb803b15a9 100644 --- a/doc/classes/Thread.xml +++ b/doc/classes/Thread.xml @@ -6,6 +6,11 @@ A unit of execution in a process. Can run methods on [Object]s simultaneously. The use of synchronization via [Mutex] or [Semaphore] is advised if working with shared objects. [b]Note:[/b] Breakpoints won't break on code if it's running in a thread. This is a current limitation of the GDScript debugger. + [b]Warning:[/b] + To guarantee that the operating system is able to perform proper cleanup (no crashes, no deadlocks), these conditions must be met by the time a [Thread]'s reference count reaches zero and therefore it is destroyed: + - It must not have any [Mutex] objects locked. + - It must not be waiting on any [Semaphore] objects. + - [method wait_to_finish] should have been called on it. $DOCS_URL/tutorials/performance/using_multiple_threads.html