Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Deduplicate Regexp literals (take 2) #11

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 39 additions & 5 deletions re.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "ruby/encoding.h"
#include "ruby/re.h"
#include "ruby/util.h"
#include "vm_core.h"

VALUE rb_eRegexpError;

Expand Down Expand Up @@ -2956,19 +2957,50 @@ rb_reg_new(const char *s, long len, int options)
return rb_enc_reg_new(s, len, rb_ascii8bit_encoding(), options);
}

static VALUE
rb_reg_lookup_literal(VALUE str, int options)
{
VALUE cache = GET_VM()->regexp_literals_cache;
VALUE options_cache = rb_hash_lookup(cache, INT2FIX(options));
if (RTEST(options_cache)) {
return rb_hash_lookup(options_cache, str);
}
return Qnil;
}

static void
rb_reg_cache_literal(VALUE str, int options, VALUE re)
{
VALUE cache = GET_VM()->regexp_literals_cache;
VALUE options_cache = rb_hash_lookup(cache, INT2FIX(options));
if (!RTEST(options_cache)) {
options_cache = rb_ident_hash_new();
rb_hash_aset(cache, INT2FIX(options), options_cache);
}
rb_hash_aset(options_cache, str, re);
}

VALUE
rb_reg_compile(VALUE str, int options, const char *sourcefile, int sourceline)
{
VALUE re = rb_reg_alloc();
onig_errmsg_buffer err = "";

if (!str) str = rb_str_new(0,0);
str = rb_fstring(str);

VALUE re = rb_reg_lookup_literal(str, options);
if (RTEST(re)) {
return re;
}

re = rb_reg_alloc();
onig_errmsg_buffer err = "";
if (rb_reg_initialize_str(re, str, options, err, sourcefile, sourceline) != 0) {
rb_set_errinfo(rb_reg_error_desc(str, options, err));
return Qnil;
rb_set_errinfo(rb_reg_error_desc(str, options, err));
return Qnil;
}
FL_SET(re, REG_LITERAL);
rb_obj_freeze(re);
rb_reg_cache_literal(str, options, re);

return re;
}

Expand Down Expand Up @@ -4111,4 +4143,6 @@ Init_Regexp(void)
rb_define_method(rb_cMatch, "hash", match_hash, 0);
rb_define_method(rb_cMatch, "eql?", match_equal, 1);
rb_define_method(rb_cMatch, "==", match_equal, 1);

rb_gc_register_mark_object(GET_VM()->regexp_literals_cache = rb_hash_new());
}
7 changes: 7 additions & 0 deletions test/ruby/test_regexp.rb
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,13 @@ def test_assert_normal_exit
Regexp.union("a", "a")
end

def test_literal_deduplication
assert_same(/a/, /a/)
refute_same(/a/, /a/m)
refute_same(/a/, Regexp.new('a'))
assert_equal(/a/, Regexp.new('a'))
end

def test_to_s
assert_equal '(?-mix:\x00)', Regexp.new("\0").to_s

Expand Down
1 change: 1 addition & 0 deletions vm_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,7 @@ typedef struct rb_vm_struct {

VALUE *defined_strings;
st_table *frozen_strings;
VALUE regexp_literals_cache;

const struct rb_builtin_function *builtin_function_table;
int builtin_inline_index;
Expand Down