Skip to content

Commit

Permalink
Use validate_utf8_fast in coderange_scan
Browse files Browse the repository at this point in the history
  • Loading branch information
byroot committed Dec 11, 2019
1 parent 629653a commit 7056c04
Showing 1 changed file with 13 additions and 6 deletions.
19 changes: 13 additions & 6 deletions string.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "debug_counter.h"
#include "ruby/util.h"
#include "simdasciicheck.h"
#include "simdutf8check.h"

#define BEG(no) (regs->beg[(no)])
#define END(no) (regs->end[(no)])
Expand Down Expand Up @@ -534,21 +535,26 @@ search_nonascii(const char *p, const char *e)
static int
coderange_scan(const char *p, long len, rb_encoding *enc)
{
const char *e = p + len;
const char *e;

switch (rb_enc_to_index(enc)) {
case ENCINDEX_ASCII:
/* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
return validate_ascii_fast(p, len) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
case ENCINDEX_US_ASCII:
return validate_ascii_fast(p, len) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_BROKEN;
case ENCINDEX_ASCII:
/* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
return validate_ascii_fast(p, len) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
case ENCINDEX_US_ASCII:
return validate_ascii_fast(p, len) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_BROKEN;
case RUBY_ENCINDEX_UTF_8:
if (validate_ascii_fast(p, len)) return ENC_CODERANGE_7BIT;
if (validate_utf8_fast(p, len)) return ENC_CODERANGE_VALID;
return ENC_CODERANGE_BROKEN;
}

if (rb_enc_asciicompat(enc)) {
if (validate_ascii_fast(p, len)) {
return ENC_CODERANGE_7BIT;
}

e = p + len;
p = search_nonascii(p, e);
if (!p) return ENC_CODERANGE_7BIT;
for (;;) {
Expand All @@ -561,6 +567,7 @@ coderange_scan(const char *p, long len, rb_encoding *enc)
}
}
else {
e = p + len;
while (p < e) {
int ret = rb_enc_precise_mbclen(p, e, enc);
if (!MBCLEN_CHARFOUND_P(ret)) return ENC_CODERANGE_BROKEN;
Expand Down

0 comments on commit 7056c04

Please sign in to comment.