Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Node Processing to allow Scene Multithreading #75901

Merged
merged 1 commit into from
May 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions core/object/message_queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,17 @@ bool CallQueue::is_flushing() const {
return flushing;
}

bool CallQueue::has_messages() const {
if (pages_used == 0) {
return false;
}
if (pages_used == 1 && page_messages[0] == 0) {
return false;
}

return true;
}

int CallQueue::get_max_buffer_usage() const {
return pages.size() * PAGE_SIZE_BYTES;
}
Expand Down
15 changes: 10 additions & 5 deletions core/object/message_queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ class CallQueue {
PAGE_SIZE_BYTES = 4096
};

struct Page {
uint8_t data[PAGE_SIZE_BYTES];
};

// Needs to be public to be able to define it outside the class.
// Needs to lock because there can be multiple of these allocators in several threads.
typedef PagedAllocator<Page, true> Allocator;

private:
enum {
TYPE_CALL,
Expand All @@ -56,12 +64,7 @@ class CallQueue {
FLAG_MASK = FLAG_NULL_IS_OK - 1,
};

struct Page {
uint8_t data[PAGE_SIZE_BYTES];
};

Mutex mutex;
typedef PagedAllocator<Page, false> Allocator;

Allocator *allocator = nullptr;
bool allocator_is_custom = false;
Expand Down Expand Up @@ -140,6 +143,8 @@ class CallQueue {
void clear();
void statistics();

bool has_messages() const;

bool is_flushing() const;
int get_max_buffer_usage() const;

Expand Down
5 changes: 5 additions & 0 deletions core/object/object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,10 @@ bool Object::_predelete() {
return _predelete_ok;
}

void Object::cancel_free() {
_predelete_ok = false;
}

void Object::_postinitialize() {
_class_name_ptr = _get_class_namev(); // Set the direct pointer, which is much faster to obtain, but can only happen after postinitialize.
_initialize_classv();
Expand Down Expand Up @@ -1561,6 +1565,7 @@ void Object::_bind_methods() {
ClassDB::bind_method(D_METHOD("tr_n", "message", "plural_message", "n", "context"), &Object::tr_n, DEFVAL(""));

ClassDB::bind_method(D_METHOD("is_queued_for_deletion"), &Object::is_queued_for_deletion);
ClassDB::bind_method(D_METHOD("cancel_free"), &Object::cancel_free);

ClassDB::add_virtual_method("Object", MethodInfo("free"), false);

Expand Down
2 changes: 2 additions & 0 deletions core/object/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -923,6 +923,8 @@ class Object {

_ALWAYS_INLINE_ bool is_ref_counted() const { return type_is_reference; }

void cancel_free();

Object();
virtual ~Object();
};
Expand Down
2 changes: 2 additions & 0 deletions core/os/thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ class Thread {
// get the ID of the main thread
_FORCE_INLINE_ static ID get_main_id() { return MAIN_ID; }

_FORCE_INLINE_ static bool is_main_thread() { return caller_id == MAIN_ID; } // Gain a tiny bit of perf here because there is no need to validate caller_id here, because only main thread will be set as 1.

static Error set_name(const String &p_name);

void start(Thread::Callback p_callback, void *p_user, const Settings &p_settings = Settings());
Expand Down
4 changes: 2 additions & 2 deletions core/templates/hash_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ class HashMap {
}

bool _lookup_pos(const TKey &p_key, uint32_t &r_pos) const {
if (elements == nullptr) {
if (elements == nullptr || num_elements == 0) {
return false; // Failed lookups, no elements
}

Expand Down Expand Up @@ -252,7 +252,7 @@ class HashMap {
}

void clear() {
if (elements == nullptr) {
if (elements == nullptr || num_elements == 0) {
return;
}
uint32_t capacity = hash_table_size_primes[capacity_index];
Expand Down
4 changes: 2 additions & 2 deletions core/templates/hash_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class HashSet {
}

bool _lookup_pos(const TKey &p_key, uint32_t &r_pos) const {
if (keys == nullptr) {
if (keys == nullptr || num_elements == 0) {
return false; // Failed lookups, no elements
}

Expand Down Expand Up @@ -237,7 +237,7 @@ class HashSet {
}

void clear() {
if (keys == nullptr) {
if (keys == nullptr || num_elements == 0) {
return;
}
uint32_t capacity = hash_table_size_primes[capacity_index];
Expand Down
4 changes: 3 additions & 1 deletion core/templates/local_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,13 @@ class LocalVector {
}
}

void erase(const T &p_val) {
_FORCE_INLINE_ bool erase(const T &p_val) {
int64_t idx = find(p_val);
if (idx >= 0) {
remove_at(idx);
return true;
}
return false;
}

void invert() {
Expand Down
5 changes: 4 additions & 1 deletion core/templates/vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,15 @@ class Vector {
void fill(T p_elem);

void remove_at(int p_index) { _cowdata.remove_at(p_index); }
void erase(const T &p_val) {
_FORCE_INLINE_ bool erase(const T &p_val) {
int idx = find(p_val);
if (idx >= 0) {
remove_at(idx);
return true;
}
return false;
}

void reverse();

_FORCE_INLINE_ T *ptrw() { return _cowdata.ptrw(); }
Expand Down
74 changes: 74 additions & 0 deletions doc/classes/Node.xml
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,20 @@
[b]Note:[/b] For performance reasons, the order of node groups is [i]not[/i] guaranteed. The order of node groups should not be relied upon as it can vary across project runs.
</description>
</method>
<method name="call_deferred_thread_group" qualifiers="vararg">
<return type="Variant" />
<param index="0" name="method" type="StringName" />
<description>
This function is similar to [method Object.call_deferred] except that the call will take place when the node thread group is processed. If the node thread group processes in sub-threads, then the call will be done on that thread, right before [constant NOTIFICATION_PROCESS] or [constant NOTIFICATION_PHYSICS_PROCESS], the [method _process] or [method _physics_process] or their internal versions are called.
</description>
</method>
<method name="call_thread_safe" qualifiers="vararg">
<return type="Variant" />
<param index="0" name="method" type="StringName" />
<description>
This function ensures that the calling of this function will succeed, no matter whether it's being done from a thread or not. If called from a thread that is not allowed to call the function, the call will become deferred. Otherwise, the call will go through directly.
</description>
</method>
<method name="can_process" qualifiers="const">
<return type="bool" />
<description>
Expand Down Expand Up @@ -564,6 +578,20 @@
[b]Note:[/b] Internal children can only be moved within their expected "internal range" (see [code]internal[/code] parameter in [method add_child]).
</description>
</method>
<method name="notify_deferred_thread_group">
<return type="void" />
<param index="0" name="what" type="int" />
<description>
Similar to [method call_deferred_thread_group], but for notifications.
</description>
</method>
<method name="notify_thread_safe">
<return type="void" />
<param index="0" name="what" type="int" />
<description>
Similar to [method call_thread_safe], but for notifications.
</description>
</method>
<method name="print_orphan_nodes" qualifiers="static">
<return type="void" />
<description>
Expand Down Expand Up @@ -697,6 +725,14 @@
Sends a [method rpc] to a specific peer identified by [param peer_id] (see [method MultiplayerPeer.set_target_peer]). Returns [code]null[/code].
</description>
</method>
<method name="set_deferred_thread_group">
<return type="void" />
<param index="0" name="property" type="StringName" />
<param index="1" name="value" type="Variant" />
<description>
Similar to [method call_deferred_thread_group], but for setting properties.
</description>
</method>
<method name="set_display_folded">
<return type="void" />
<param index="0" name="fold" type="bool" />
Expand Down Expand Up @@ -785,6 +821,14 @@
Sets whether this is an instance load placeholder. See [InstancePlaceholder].
</description>
</method>
<method name="set_thread_safe">
<return type="void" />
<param index="0" name="property" type="StringName" />
<param index="1" name="value" type="Variant" />
<description>
Similar to [method call_thread_safe], but for setting properties.
</description>
</method>
<method name="update_configuration_warnings">
<return type="void" />
<description>
Expand All @@ -811,9 +855,24 @@
<member name="process_mode" type="int" setter="set_process_mode" getter="get_process_mode" enum="Node.ProcessMode" default="0">
Can be used to pause or unpause the node, or make the node paused based on the [SceneTree], or make it inherit the process mode from its parent (default).
</member>
<member name="process_physics_priority" type="int" setter="set_physics_process_priority" getter="get_physics_process_priority" default="0">
Similar to [member process_priority] but for [constant NOTIFICATION_PHYSICS_PROCESS], [method _physics_process] or the internal version.
</member>
<member name="process_priority" type="int" setter="set_process_priority" getter="get_process_priority" default="0">
The node's priority in the execution order of the enabled processing callbacks (i.e. [constant NOTIFICATION_PROCESS], [constant NOTIFICATION_PHYSICS_PROCESS] and their internal counterparts). Nodes whose process priority value is [i]lower[/i] will have their processing callbacks executed first.
</member>
<member name="process_thread_group" type="int" setter="set_process_thread_group" getter="get_process_thread_group" enum="Node.ProcessThreadGroup" default="0">
Set the process thread group for this node (basically, whether it receives [constant NOTIFICATION_PROCESS], [constant NOTIFICATION_PHYSICS_PROCESS], [method _process] or [method _physics_process] (and the internal versions) on the main thread or in a sub-thread.
By default, the thread group is [constant PROCESS_THREAD_GROUP_INHERIT], which means that this node belongs to the same thread group as the parent node. The thread groups means that nodes in a specific thread group will process together, separate to other thread groups (depending on [member process_thread_group_order]). If the value is set is [constant PROCESS_THREAD_GROUP_SUB_THREAD], this thread group will occur on a sub thread (not the main thread), otherwise if set to [constant PROCESS_THREAD_GROUP_MAIN_THREAD] it will process on the main thread. If there is not a parent or grandparent node set to something other than inherit, the node will belong to the [i]default thread group[/i]. This default group will process on the main thread and its group order is 0.
During processing in a sub-thread, accessing most functions in nodes outside the thread group is forbidden (and it will result in an error in debug mode). Use [method Object.call_deferred], [method call_thread_safe], [method call_deferred_thread_group] and the likes in order to communicate from the thread groups to the main thread (or to other thread groups).
To better understand process thread groups, the idea is that any node set to any other value than [constant PROCESS_THREAD_GROUP_INHERIT] will include any children (and grandchildren) nodes set to inherit into its process thread group. this means that the processing of all the nodes in the group will happen together, at the same time as the node including them.
</member>
<member name="process_thread_group_order" type="int" setter="set_process_thread_group_order" getter="get_process_thread_group_order">
Change the process thread group order. Groups with a lesser order will process before groups with a greater order. This is useful when a large amount of nodes process in sub thread and, afterwards, another group wants to collect their result in the main thread, as an example.
</member>
<member name="process_thread_messages" type="int" setter="set_process_thread_messages" getter="get_process_thread_messages" enum="Node.ProcessThreadMessages">
Set whether the current thread group will process messages (calls to [method call_deferred_thread_group] on threads, and whether it wants to receive them during regular process or physics process callbacks.
</member>
<member name="scene_file_path" type="String" setter="set_scene_file_path" getter="get_scene_file_path">
If a scene is instantiated from a file, its topmost node contains the absolute file path from which it was loaded in [member scene_file_path] (e.g. [code]res://levels/1.tscn[/code]). Otherwise, [member scene_file_path] is set to an empty string.
</member>
Expand Down Expand Up @@ -1033,6 +1092,21 @@
<constant name="PROCESS_MODE_DISABLED" value="4" enum="ProcessMode">
Never process. Completely disables processing, ignoring the [SceneTree]'s paused property. This is the inverse of [constant PROCESS_MODE_ALWAYS].
</constant>
<constant name="PROCESS_THREAD_GROUP_INHERIT" value="0" enum="ProcessThreadGroup">
If the [member process_thread_group] property is sent to this, the node will belong to any parent (or grandparent) node that has a thread group mode that is not inherit. See [member process_thread_group] for more information.
</constant>
<constant name="PROCESS_THREAD_GROUP_MAIN_THREAD" value="1" enum="ProcessThreadGroup">
Process this node (and children nodes set to inherit) on the main thread. See [member process_thread_group] for more information.
</constant>
<constant name="PROCESS_THREAD_GROUP_SUB_THREAD" value="2" enum="ProcessThreadGroup">
Process this node (and children nodes set to inherit) on a sub-thread. See [member process_thread_group] for more information.
</constant>
<constant name="FLAG_PROCESS_THREAD_MESSAGES" value="1" enum="ProcessThreadMessages">
</constant>
<constant name="FLAG_PROCESS_THREAD_MESSAGES_PHYSICS" value="2" enum="ProcessThreadMessages">
</constant>
<constant name="FLAG_PROCESS_THREAD_MESSAGES_ALL" value="3" enum="ProcessThreadMessages">
</constant>
<constant name="DUPLICATE_SIGNALS" value="1" enum="DuplicateFlags">
Duplicate the node's signals.
</constant>
Expand Down
6 changes: 6 additions & 0 deletions doc/classes/Object.xml
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,12 @@
Returns [code]true[/code] if the object is allowed to translate messages with [method tr] and [method tr_n]. See also [method set_message_translation].
</description>
</method>
<method name="cancel_free">
<return type="void" />
<description>
If this method is called during [constant NOTIFICATION_PREDELETE], this object will reject being freed and will remain allocated. This is mostly an internal function used for error handling to avoid the user from freeing objects when they are not intended to.
</description>
reduz marked this conversation as resolved.
Show resolved Hide resolved
</method>
<method name="connect">
<return type="int" enum="Error" />
<param index="0" name="signal" type="StringName" />
Expand Down
2 changes: 2 additions & 0 deletions editor/editor_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,8 @@ void EditorNode::_notification(int p_what) {
} break;

case NOTIFICATION_ENTER_TREE: {
get_tree()->set_disable_node_threading(true); // No node threading while running editor.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should be an (advanced) editor setting so node threading for the editor can be toggled on/off.

Given the editor uses a wide surface of the engine apis, this would help with identifying bugs and issues with the node threading implementation. As an example, the Android port of the editor has already allowed us to identify bugs and issues with the engine on Android platforms.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Keep in mind that the way threading works at scene level means that it has to be manually used and configured by the developer to better fit the games, and most of the editor is pretty much just GUI (which will not support threading), so ultimately it does not make much sense to make the editor support this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you mean by GUI will not support threading?

Keep in mind that the way threading works at scene level

From the proposal and the screenshot above, it looks like threading can be configured within a scene granularly at the node level.
So just based off the screenshot, it would seem possible to configure different sections/nodes within the EditorNode to run on different threads. I agree there may not be a reason to do so, but nonetheless the capability is there.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GUI will not support threading, because it does a lot of interactions at the tree level across nodes that can easily exit your thread workgroups and go somewhere else (like container resizing). The engine can catch this fine and throw an error, but ultimately if you have a large connected GUI you can't easily split it up in threads. It can be implemented at some point, but it would be a lot of work for very little benefit. Add to this that the GUI does very little amount of work in the process callbacks (which is what gets parallelized) so even if it could be parallelized the benefit would be very minimum.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, that makes sense at the moment.
Although it's worth pointing out that the use of Godot as a UI framework to develop applications is likely to increase over time, and most current UI frameworks also have the concept of splitting work across multiple threads, so that may be something worth considering in the future.

At the moment, yes that is not much of a concern.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If GUI elements are isolated into separate viewports, could each viewport be in its own thread?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lyuma From what I understand, Control nodes will be have their "thread group" stuck to main_thread and "group order" to 0. So their parent settings will not influence their process.


Engine::get_singleton()->set_editor_hint(true);

Window *window = get_window();
Expand Down
8 changes: 8 additions & 0 deletions main/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ static int converter_max_line_length = 100000;

HashMap<Main::CLIScope, Vector<String>> forwardable_cli_arguments;
#endif
static bool single_threaded_scene = false;
bool use_startup_benchmark = false;
String startup_benchmark_file;

Expand Down Expand Up @@ -423,6 +424,7 @@ void Main::print_help(const char *p_binary) {
OS::get_singleton()->print(" --gpu-abort Abort on graphics API usage errors (usually validation layer errors). May help see the problem if your system freezes.\n");
#endif
OS::get_singleton()->print(" --remote-debug <uri> Remote debug (<protocol>://<host/IP>[:<port>], e.g. tcp://127.0.0.1:6007).\n");
OS::get_singleton()->print(" --single-threaded-scene Scene tree runs in single-threaded mode. Sub-thread groups are disabled and run on the main thread.\n");
#if defined(DEBUG_ENABLED)
OS::get_singleton()->print(" --debug-collisions Show collision shapes when running the scene.\n");
OS::get_singleton()->print(" --debug-paths Show path lines when running the scene.\n");
Expand Down Expand Up @@ -1108,6 +1110,8 @@ Error Main::setup(const char *execpath, int argc, char *argv[], bool p_second_ph
OS::get_singleton()->print("Missing remote debug server uri, aborting.\n");
goto error;
}
} else if (I->get() == "--single-threaded-scene") {
single_threaded_scene = true;
} else if (I->get() == "--build-solutions") { // Build the scripting solution such C#

auto_build_solutions = true;
Expand Down Expand Up @@ -2780,6 +2784,10 @@ bool Main::start() {
}
#endif

if (single_threaded_scene) {
sml->set_disable_node_threading(true);
}

bool embed_subwindows = GLOBAL_GET("display/window/subwindows/embed_subwindows");

if (single_window || (!project_manager && !editor && embed_subwindows) || !DisplayServer::get_singleton()->has_feature(DisplayServer::Feature::FEATURE_SUBWINDOWS)) {
Expand Down
9 changes: 8 additions & 1 deletion scene/3d/skeleton_3d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,11 @@ void Skeleton3D::_update_process_order() {

void Skeleton3D::_notification(int p_what) {
switch (p_what) {
case NOTIFICATION_ENTER_TREE: {
if (dirty) {
notification(NOTIFICATION_UPDATE_SKELETON);
}
} break;
case NOTIFICATION_UPDATE_SKELETON: {
RenderingServer *rs = RenderingServer::get_singleton();
Bone *bonesptr = bones.ptrw();
Expand Down Expand Up @@ -629,7 +634,9 @@ void Skeleton3D::_make_dirty() {
return;
}

MessageQueue::get_singleton()->push_notification(this, NOTIFICATION_UPDATE_SKELETON);
if (is_inside_tree()) {
notify_deferred_thread_group(NOTIFICATION_UPDATE_SKELETON);
}
dirty = true;
}

Expand Down
Loading