Skip to content

Commit

Permalink
join default_tree_scope only when a limit_depth is given
Browse files Browse the repository at this point in the history
Although the query issued by the closure_tree_class.has_tree call has an optional argument
`limit_depth` to reduce the load, the cost remains excessively high when
dealing with the hierarchy table containing millions of records.
Below is the execution plan of the query.

```sql
MySQL [app_db]> EXPLAIN
SELECT
  `tags`.*
FROM
  `tags`
INNER JOIN `tag_hierarchies`
  ON `tags`.`id` = `tag_hierarchies`.`descendant_id`
INNER JOIN (
  SELECT
    descendant_id,
    MAX(generations) AS depth
  FROM
    `tag_hierarchies`
  GROUP BY
    descendant_id
) AS generation_depth
  ON `tags`.id = generation_depth.descendant_id
WHERE
  `tag_hierarchies`.`ancestor_id` = 2
  AND (
    `tags`.`id` != '2'
  )
ORDER BY
  `tag_hierarchies`.generations ASC,
  sort_order,
  generation_depth.depth;

+----+-------------+-----------------+------------+--------+-------------------------------+------------------+---------+--------------------------------------+---------+----------+----------------------------------------------+
| id | select_type | table           | partitions | type   | possible_keys                 | key              | key_len | ref                                  | rows    | filtered | Extra                                        |
+----+-------------+-----------------+------------+--------+-------------------------------+------------------+---------+--------------------------------------+---------+----------+----------------------------------------------+
|  1 | PRIMARY     | tag_hierarchies | NULL       | ref    | tag_anc_desc_idx,tag_desc_idx | tag_anc_desc_idx | 4       | const                                |      14 |   100.00 | Using index; Using temporary; Using filesort |
|  1 | PRIMARY     | tags            | NULL       | eq_ref | PRIMARY                       | PRIMARY          | 8       | app_db.tag_hierarchies.descendant_id |       1 |   100.00 | Using where                                  |
|  1 | PRIMARY     | <derived2>      | NULL       | ref    | <auto_key0>                   | <auto_key0>      | 4       | app_db.tags.id                       |      10 |   100.00 | Using where                                  |
|  2 | DERIVED     | tag_hierarchies | NULL       | index  | tag_anc_desc_idx,tag_desc_idx | tag_desc_idx     | 4       | NULL                                 | 970,482 |   100.00 | Using index                                  |
+----+-------------+-----------------+------------+--------+-------------------------------+------------------+---------+--------------------------------------+---------+----------+----------------------------------------------+
4 rows in set (0.003 sec)
```

The default_tree_scope is only meaningful when limit_depth is specified (though
it's questionable whether it actually reduces the load). I have confirmed that
even without the join, the load is not significantly higher.
  • Loading branch information
kakubin committed Oct 20, 2024
1 parent 53f4dc5 commit 7210369
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 3 deletions.
6 changes: 5 additions & 1 deletion lib/closure_tree/hash_tree.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ def hash_tree(options = {})
# There is no default depth limit. This might be crazy-big, depending
# on your tree shape. Hash huge trees at your own peril!
def hash_tree(options = {})
_ct.hash_tree(_ct.default_tree_scope(all, options[:limit_depth]))
if options[:limit_depth]
_ct.hash_tree(_ct.default_tree_scope(all, options[:limit_depth]))
else
_ct.hash_tree(all)
end
end
end
end
Expand Down
22 changes: 20 additions & 2 deletions test/support/tag_examples.rb
Original file line number Diff line number Diff line change
Expand Up @@ -761,9 +761,19 @@ def assert_parent_and_children
assert_equal @full_tree, @tag_class.hash_tree(limit_depth: 4)
end

it 'joins the default scope when a limit_depth is given' do
queries = sql_queries { @tag_class.hash_tree(limit_depth: 2) }
assert queries.any? { |query| query.match?(%r{INNER JOIN \( SELECT descendant_id, MAX\(generations\) as depth}) }
end

it 'no limit' do
assert_equal @full_tree, @tag_class.hash_tree
end

it 'does not join the default scope when there is no limit' do
queries = sql_queries { @tag_class.hash_tree }
assert_equal queries.any? { |query| query.match?(%r{INNER JOIN \( SELECT descendant_id, MAX\(generations\) as depth}) }, false
end
end

describe '.hash_tree' do
Expand Down Expand Up @@ -805,6 +815,16 @@ def assert_parent_and_children
assert_equal @full_tree[@a], @a.children.hash_tree
end

it 'joins the default scope when a limit_depth is given' do
queries = sql_queries { @a.self_and_descendants.hash_tree(limit_depth: 2) }
assert queries.any? { |query| query.match?(%r{INNER JOIN \( SELECT descendant_id, MAX\(generations\) as depth}) }
end

it 'does not join the default scope when there is no limit' do
queries = sql_queries { @a.self_and_descendants.hash_tree }
assert_equal queries.any? { |query| query.match?(%r{INNER JOIN \( SELECT descendant_id, MAX\(generations\) as depth}) }, false
end

it 'limit_depth 3 from b.parent' do
assert_equal @three_tree.slice(@a), @b.parent.hash_tree(limit_depth: 3)
end
Expand Down Expand Up @@ -899,8 +919,6 @@ def assert_parent_and_children
@c3 = @tag_class.find_or_create_by_path %w[a3 b3 c3]
@b3 = @c3.parent
@a3 = @b3.parent


end

it 'should return 0 for root' do
Expand Down

0 comments on commit 7210369

Please sign in to comment.