Skip to content

Commit

Permalink
pcre -> pcre2
Browse files Browse the repository at this point in the history
  • Loading branch information
adsr committed Jun 2, 2022
1 parent e4dc431 commit 8d8673c
Show file tree
Hide file tree
Showing 14 changed files with 101 additions and 110 deletions.
6 changes: 3 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
[submodule "lua"]
path = vendor/lua
url = https://github.com/lua/lua.git
[submodule "pcre"]
path = vendor/pcre
url = https://github.com/adsr/pcre.git
[submodule "pcre2"]
path = vendor/pcre2
url = https://github.com/PhilipHazel/pcre2.git
8 changes: 4 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
prefix?=/usr/local

mle_cflags:=-std=c99 -Wall -Wextra -pedantic -Wno-pointer-arith -Wno-unused-result -Wno-unused-parameter -g -O0 -D_GNU_SOURCE -I. $(CFLAGS)
mle_cflags:=-std=c99 -Wall -Wextra -pedantic -Wno-pointer-arith -Wno-unused-result -Wno-unused-parameter -g -O0 -D_GNU_SOURCE -DPCRE2_CODE_UNIT_WIDTH=8 -I. $(CFLAGS)
mle_ldflags:=$(LDFLAGS)
mle_dynamic_libs:=-lpcre -llua5.4
mle_static_libs:=vendor/pcre/.libs/libpcre.a vendor/lua/liblua5.4.a
mle_dynamic_libs:=-lpcre2-8 -llua5.4
mle_static_libs:=vendor/pcre2/.libs/libpcre2-8.a vendor/lua/liblua5.4.a
mle_ldlibs:=-lm $(LDLIBS)
mle_objects:=$(patsubst %.c,%.o,$(wildcard *.c))
mle_objects_no_main:=$(filter-out main.o,$(mle_objects))
Expand All @@ -20,7 +20,7 @@ endif

ifdef mle_vendor
mle_ldlibs:=$(mle_static_libs) $(mle_ldlibs)
mle_cflags:=-Ivendor/pcre -Ivendor -Ivendor/uthash/src $(mle_cflags)
mle_cflags:=-Ivendor/pcre2/src -Ivendor -Ivendor/uthash/src $(mle_cflags)
mle_vendor_deps:=$(mle_static_libs)
else
mle_ldlibs:=$(mle_dynamic_libs) $(mle_ldlibs)
Expand Down
41 changes: 18 additions & 23 deletions buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1441,16 +1441,17 @@ static int _buffer_apply_styles_multis(bline_t *start_line, bint_t min_nlines) {

static int _buffer_bline_apply_style_single(srule_t *srule, bline_t *bline) {
int rc;
int substrs[3];
PCRE2_SIZE substrs[3];
bint_t start;
bint_t stop;
bint_t look_offset;
look_offset = 0;

MLBUF_BLINE_ENSURE_CHARS(bline);
while (look_offset < bline->data_len) {
if ((rc = pcre_exec(srule->cre, srule->crex, bline->data, bline->data_len, look_offset, 0, substrs, 3)) >= 0) {
if (substrs[1] < 0) {
if ((rc = pcre2_match(srule->cre, (PCRE2_SPTR)bline->data, (PCRE2_SIZE)bline->data_len, (PCRE2_SIZE)look_offset, 0, pcre2_md, NULL)) >= 0) {
memcpy(substrs, pcre2_get_ovector_pointer(pcre2_md), 3 * sizeof(PCRE2_SIZE));
if (substrs[1] == PCRE2_UNSET) {
// substrs[0..1] can be -1 sometimes, See http://pcre.org/pcre.txt
break;
}
Expand All @@ -1459,7 +1460,7 @@ static int _buffer_bline_apply_style_single(srule_t *srule, bline_t *bline) {
for (; start < stop; start++) {
bline->chars[start].style = srule->style;
}
look_offset = MLBUF_MAX(substrs[1], look_offset + 1);
look_offset = MLBUF_MAX(substrs[1], (PCRE2_SIZE)(look_offset + 1));
} else {
break;
}
Expand Down Expand Up @@ -1815,29 +1816,28 @@ static int _buffer_munmap(buffer_t *self) {
// Make a new single-line style rule
srule_t *srule_new_single(char *re, bint_t re_len, int caseless, uint16_t fg, uint16_t bg) {
srule_t *rule;
const char *re_error;
int re_erroffset;
int re_errcode;
PCRE2_SIZE re_erroffset;
rule = calloc(1, sizeof(srule_t));
rule->type = MLBUF_SRULE_TYPE_SINGLE;
rule->style.fg = fg;
rule->style.bg = bg;
rule->re = malloc((re_len + 1) * sizeof(char));
snprintf(rule->re, re_len + 1, "%.*s", (int)re_len, re);
rule->cre = pcre_compile((const char*)rule->re, PCRE_NO_AUTO_CAPTURE | (caseless ? PCRE_CASELESS : 0), &re_error, &re_erroffset, NULL);
rule->cre = pcre2_compile((PCRE2_SPTR)rule->re, (PCRE2_SIZE)strlen(rule->re), PCRE2_NO_AUTO_CAPTURE | (caseless ? PCRE2_CASELESS : 0), &re_errcode, &re_erroffset, NULL);
if (!rule->cre) {
// TODO log error
srule_destroy(rule);
return NULL;
}
rule->crex = pcre_study(rule->cre, PCRE_STUDY_JIT_COMPILE, &re_error);
return rule;
}

// Make a new multi-line style rule
srule_t *srule_new_multi(char *re, bint_t re_len, char *re_end, bint_t re_end_len, uint16_t fg, uint16_t bg) {
srule_t *rule;
const char *re_error;
int re_erroffset;
int re_errcode;;
PCRE2_SIZE re_erroffset;
rule = calloc(1, sizeof(srule_t));
rule->type = MLBUF_SRULE_TYPE_MULTI;
rule->style.fg = fg;
Expand All @@ -1846,15 +1846,13 @@ srule_t *srule_new_multi(char *re, bint_t re_len, char *re_end, bint_t re_end_le
rule->re_end = malloc((re_end_len + 1) * sizeof(char));
snprintf(rule->re, re_len + 1, "%.*s", (int)re_len, re);
snprintf(rule->re_end, re_end_len + 1, "%.*s", (int)re_end_len, re_end);
rule->cre = pcre_compile((const char*)rule->re, PCRE_NO_AUTO_CAPTURE, &re_error, &re_erroffset, NULL);
rule->cre_end = pcre_compile((const char*)rule->re_end, PCRE_NO_AUTO_CAPTURE, &re_error, &re_erroffset, NULL);
rule->cre = pcre2_compile((PCRE2_SPTR)rule->re, (PCRE2_SIZE)strlen(rule->re), PCRE2_NO_AUTO_CAPTURE, &re_errcode, &re_erroffset, NULL);
rule->cre_end = pcre2_compile((PCRE2_SPTR)rule->re_end, (PCRE2_SIZE)strlen(rule->re_end), PCRE2_NO_AUTO_CAPTURE, &re_errcode, &re_erroffset, NULL);
if (!rule->cre || !rule->cre_end) {
// TODO log error
srule_destroy(rule);
return NULL;
}
rule->crex = pcre_study(rule->cre, PCRE_STUDY_JIT_COMPILE, &re_error);
rule->crex_end = pcre_study(rule->cre_end, PCRE_STUDY_JIT_COMPILE, &re_error);
return rule;
}

Expand All @@ -1874,19 +1872,16 @@ srule_t *srule_new_range(mark_t *range_a, mark_t *range_b, uint16_t fg, uint16_t
int srule_destroy(srule_t *srule) {
if (srule->re) free(srule->re);
if (srule->re_end) free(srule->re_end);
if (srule->cre) pcre_free(srule->cre);
if (srule->cre_end) pcre_free(srule->cre_end);
if (srule->crex) pcre_free_study_ex(srule->crex);
if (srule->crex_end) pcre_free_study_ex(srule->crex_end);
if (srule->cre) pcre2_code_free(srule->cre);
if (srule->cre_end) pcre2_code_free(srule->cre_end);
free(srule);
return MLBUF_OK;
}

static int _srule_multi_find(srule_t *rule, int find_end, bline_t *bline, bint_t start_offset, bint_t *ret_start, bint_t *ret_stop) {
int rc;
pcre *cre;
pcre_extra *crex;
int substrs[3];
pcre2_code *cre;
PCRE2_SIZE substrs[3];
bint_t start_index;
mark_t *mark;

Expand All @@ -1904,9 +1899,9 @@ static int _srule_multi_find(srule_t *rule, int find_end, bline_t *bline, bint_t

// MLBUF_SRULE_TYPE_MULTI
cre = find_end ? rule->cre_end : rule->cre;
crex = find_end ? rule->crex_end : rule->crex;
start_index = _buffer_bline_col_to_index(bline, start_offset);
if ((rc = pcre_exec(cre, crex, bline->data, bline->data_len, start_index, 0, substrs, 3)) >= 0) {
if ((rc = pcre2_match(cre, (PCRE2_SPTR)bline->data, (PCRE2_SIZE)bline->data_len, (PCRE2_SIZE)start_index, 0, pcre2_md, NULL)) >= 0) {
memcpy(substrs, pcre2_get_ovector_pointer(pcre2_md), 3 * sizeof(PCRE2_SIZE));
*ret_start = _buffer_bline_index_to_col(bline, substrs[0]);
*ret_stop = _buffer_bline_index_to_col(bline, substrs[1]);
return 1;
Expand Down
2 changes: 1 addition & 1 deletion cursor.c
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ int cursor_replace(cursor_t *cursor, int interactive, char *opt_regex, char *opt
bint_t char_count;
bint_t orig_viewport_y;
int pcre_rc;
int pcre_ovector[30];
PCRE2_SIZE pcre_ovector[30];
str_t repl_backref = {0};
int num_replacements;

Expand Down
12 changes: 11 additions & 1 deletion editor.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,13 @@ int editor_init(editor_t *editor, int argc, char **argv) {
char *home_rc;
rv = MLE_OK;
do {
// Create a resuable PCRE2 match data block. This is hacky / lazy. PCRE
// is used all over the place, even in places where there's no easy way
// to get at a shared state, e.g., mark.c. Also feels wasteful to keep
// reallocating this thing, so let's just create one with 10 match
// slots which is the most we ever use. Free in editor_deinit.
pcre2_md = pcre2_match_data_create(10, NULL);

// Set editor defaults
editor->is_in_init = 1;
editor->tab_width = MLE_DEFAULT_TAB_WIDTH;
Expand Down Expand Up @@ -224,6 +231,9 @@ int editor_deinit(editor_t *editor) {
if (editor->cut_buffer) free(editor->cut_buffer);
if (editor->ttyfd) close(editor->ttyfd);
if (editor->startup_macro_name) free(editor->startup_macro_name);

pcre2_match_data_free(pcre2_md);

return MLE_OK;
}

Expand Down Expand Up @@ -823,7 +833,7 @@ static int _editor_prompt_isearch_viewport_down(cmd_context_t *ctx) {
static int _editor_prompt_isearch_drop_cursors(cmd_context_t *ctx) {
bview_t *bview;
mark_t *mark;
pcre *cre;
pcre2_code *cre;
cursor_t *orig_cursor;
cursor_t *last_cursor;
bint_t nchars;
Expand Down
39 changes: 20 additions & 19 deletions mark.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <pcre.h>
#include <pcre2.h>
#include "mlbuf.h"

typedef char* (*mark_find_match_fn)(char *haystack, bint_t haystack_len, bint_t look_offset, bint_t max_offset, void *u1, void *u2, bint_t *ret_needle_len);
Expand All @@ -13,7 +13,8 @@ static char *mark_find_prev_str_matchfn(char *haystack, bint_t haystack_len, bin
static char *mark_find_next_cre_matchfn(char *haystack, bint_t haystack_len, bint_t look_offset, bint_t max_offset, void *cre, void *unused, bint_t *ret_needle_len);
static char *mark_find_prev_cre_matchfn(char *haystack, bint_t haystack_len, bint_t look_offset, bint_t max_offset, void *cre, void *unused, bint_t *ret_needle_len);

static int *pcre_ovector = NULL;
pcre2_match_data *pcre2_md = NULL;
static PCRE2_SIZE *pcre_ovector = NULL;
static int pcre_ovector_size = 0;
static int *pcre_rc;
static char bracket_pairs[8] = {
Expand Down Expand Up @@ -179,12 +180,12 @@ int mark_find_prev_str(mark_t *self, char *str, bint_t str_len, bline_t **ret_li
}

// Find next occurence of regex from mark
int mark_find_next_cre(mark_t *self, pcre *cre, bline_t **ret_line, bint_t *ret_col, bint_t *ret_num_chars) {
int mark_find_next_cre(mark_t *self, pcre2_code *cre, bline_t **ret_line, bint_t *ret_col, bint_t *ret_num_chars) {
return mark_find_match(self, mark_find_next_cre_matchfn, (void*)cre, NULL, 0, ret_line, ret_col, ret_num_chars);
}

// Find prev occurence of regex from mark
int mark_find_prev_cre(mark_t *self, pcre *cre, bline_t **ret_line, bint_t *ret_col, bint_t *ret_num_chars) {
int mark_find_prev_cre(mark_t *self, pcre2_code *cre, bline_t **ret_line, bint_t *ret_col, bint_t *ret_num_chars) {
return mark_find_match(self, mark_find_prev_cre_matchfn, (void*)cre, NULL, 1, ret_line, ret_col, ret_num_chars);
}

Expand Down Expand Up @@ -500,11 +501,11 @@ int mark_move_prev_str(mark_t *self, char *str, bint_t str_len) {
MLBUF_MARK_IMPLEMENT_MOVE_VIA_FIND(self, mark_find_prev_str, str, str_len);
}

int mark_move_next_cre(mark_t *self, pcre *cre) {
int mark_move_next_cre(mark_t *self, pcre2_code *cre) {
MLBUF_MARK_IMPLEMENT_MOVE_VIA_FIND(self, mark_find_next_cre, cre);
}

int mark_move_prev_cre(mark_t *self, pcre *cre) {
int mark_move_prev_cre(mark_t *self, pcre2_code *cre) {
MLBUF_MARK_IMPLEMENT_MOVE_VIA_FIND(self, mark_find_prev_cre, cre);
}

Expand All @@ -520,7 +521,7 @@ int mark_move_next_str_nudge(mark_t *self, char *str, bint_t str_len) {
MLBUF_MARK_IMPLEMENT_NUDGE_VIA_FIND(self, mark_find_next_str, str, str_len);
}

int mark_move_next_cre_nudge(mark_t *self, pcre *cre) {
int mark_move_next_cre_nudge(mark_t *self, pcre2_code *cre) {
MLBUF_MARK_IMPLEMENT_NUDGE_VIA_FIND(self, mark_find_next_cre, cre);
}

Expand All @@ -544,11 +545,11 @@ int mark_move_prev_str_ex(mark_t *self, char *str, bint_t str_len, bline_t **opt
MLBUF_MARK_IMPLEMENT_MOVE_VIA_FIND_EX(self, mark_find_prev_str, str, str_len);
}

int mark_move_next_cre_ex(mark_t *self, pcre *cre, bline_t **optret_line, bint_t *optret_col, bint_t *optret_char_count) {
int mark_move_next_cre_ex(mark_t *self, pcre2_code *cre, bline_t **optret_line, bint_t *optret_col, bint_t *optret_char_count) {
MLBUF_MARK_IMPLEMENT_MOVE_VIA_FIND_EX(self, mark_find_next_cre, cre);
}

int mark_move_prev_cre_ex(mark_t *self, pcre *cre, bline_t **optret_line, bint_t *optret_col, bint_t *optret_char_count) {
int mark_move_prev_cre_ex(mark_t *self, pcre2_code *cre, bline_t **optret_line, bint_t *optret_col, bint_t *optret_char_count) {
MLBUF_MARK_IMPLEMENT_MOVE_VIA_FIND_EX(self, mark_find_prev_cre, cre);
}

Expand Down Expand Up @@ -596,7 +597,7 @@ int mark_is_at_word_bound(mark_t *self, int side) {
}

// Set ovector for capturing substrs
int mark_set_pcre_capture(int *rc, int *ovector, int ovector_size) {
int mark_set_pcre_capture(int *rc, PCRE2_SIZE *ovector, int ovector_size) {
if (rc == NULL || ovector == NULL || ovector_size == 0) {
rc = NULL;
pcre_ovector = NULL;
Expand Down Expand Up @@ -775,13 +776,13 @@ static char *mark_find_match_prev(char *haystack, bint_t haystack_len, bint_t lo
static int mark_find_re(mark_t *self, char *re, bint_t re_len, int reverse, bline_t **ret_line, bint_t *ret_col, bint_t *ret_num_chars) {
int rc;
char *regex;
pcre *cre;
const char *error;
int erroffset;
pcre2_code *cre;
int errcode;
PCRE2_SIZE erroffset;
MLBUF_MAKE_GT_EQ0(re_len);
regex = malloc(re_len + 1);
snprintf(regex, re_len + 1, "%s", re);
cre = pcre_compile((const char*)regex, PCRE_CASELESS, &error, &erroffset, NULL);
cre = pcre2_compile((PCRE2_SPTR)regex, (PCRE2_SIZE)strlen(regex), PCRE2_CASELESS, &errcode, &erroffset, NULL);
if (cre == NULL) {
// TODO log error
free(regex);
Expand All @@ -792,7 +793,7 @@ static int mark_find_re(mark_t *self, char *re, bint_t re_len, int reverse, blin
} else {
rc = mark_find_next_cre(self, cre, ret_line, ret_col, ret_num_chars);
}
pcre_free(cre);
pcre2_code_free(cre);
free(regex);
return rc;
}
Expand All @@ -809,9 +810,9 @@ static char *mark_find_prev_str_matchfn(char *haystack, bint_t haystack_len, bin

static char *mark_find_next_cre_matchfn(char *haystack, bint_t haystack_len, bint_t look_offset, bint_t max_offset, void *cre, void *unused, bint_t *ret_needle_len) {
int rc;
int substrs[3];
PCRE2_SIZE substrs[3];
int *use_rc;
int *use_substrs;
PCRE2_SIZE *use_substrs;
int use_substrs_size;
if (!haystack || haystack_len == 0) {
haystack = "";
Expand All @@ -826,8 +827,8 @@ static char *mark_find_next_cre_matchfn(char *haystack, bint_t haystack_len, bin
use_substrs_size = 3;
use_rc = &rc;
}
MLBUF_INIT_PCRE_EXTRA(pcre_extra);
if ((*use_rc = pcre_exec((pcre*)cre, &pcre_extra, haystack, haystack_len, look_offset, 0, use_substrs, use_substrs_size)) >= 0) {
if ((*use_rc = pcre2_match((pcre2_code *)cre, (PCRE2_SPTR)haystack, (PCRE2_SIZE)haystack_len, (PCRE2_SIZE)look_offset, 0, pcre2_md, NULL)) >= 0) {
memcpy(use_substrs, pcre2_get_ovector_pointer(pcre2_md), use_substrs_size * sizeof(PCRE2_SIZE));
if (ret_needle_len) *ret_needle_len = (bint_t)(use_substrs[1] - use_substrs[0]);
return haystack + use_substrs[0];
}
Expand Down
Loading

0 comments on commit 8d8673c

Please sign in to comment.