Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix sourcepos, add missing extension tests and add sourcepos tests #223

Open
wants to merge 27 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
937fb16
Add `cmark_strbuf_remove` that removes a subrange of characters.
chriszielinski Apr 26, 2019
0fea1dd
Fix source positions for inlines inside inconsistently indented blocks.
chriszielinski Apr 26, 2019
e523c73
Add three additional source position tests.
chriszielinski Apr 26, 2019
6df4f1f
Fix outdated expected test result.
chriszielinski Apr 26, 2019
891db70
Fix autolink source position.
chriszielinski May 1, 2019
9160d60
Fix expected autolink test fixture, and add an additional autolink so…
chriszielinski May 1, 2019
469031e
Fix source position of setext headings.
chriszielinski May 3, 2019
51cd4f5
Add setext heading test case.
chriszielinski May 3, 2019
4ef6064
Fix source position for ATX-style headings.
chriszielinski May 3, 2019
035c392
Add ATX-style heading source position test case.
chriszielinski May 3, 2019
92444d7
Fix HTMl block source position.
chriszielinski May 6, 2019
2157196
Add HTML block source position test case.
chriszielinski May 6, 2019
79bf93b
Fix thematic break source position.
chriszielinski May 6, 2019
53041c3
Add thematic break source position test case.
chriszielinski May 6, 2019
be7f9c0
Fix ending source position for lists and list items.
chriszielinski May 8, 2019
d2721bf
Add list/list item source position test case.
chriszielinski May 8, 2019
94dba88
Remove commented out code.
chriszielinski May 8, 2019
99621a3
Correct list source position.
chriszielinski May 8, 2019
2570911
Fix source position for HTML blocks without a matching end condition.
chriszielinski May 15, 2019
062d461
Add test case for source position of a HTML block without a matching …
chriszielinski May 15, 2019
a4222f5
Fix line length.
chriszielinski May 15, 2019
538ff21
Fix soourcepos for strikethrough.
martincizek Apr 9, 2020
ce43592
Add test for broken multiline strikethrough. Restore adapted extensio…
martincizek Apr 10, 2020
de09b9c
Add test catching sourcepos bugs related to tables with a paragraph r…
martincizek Apr 10, 2020
27ff048
Fix sourcepos bugs related to table headers with a rejected paragraph.
martincizek Apr 10, 2020
a2792cf
Add sourcepos to softbreak and linebreak nodes if SOURCEPOS requested…
martincizek Apr 11, 2020
80548df
Fix shifted paragraph table offset introduced in ddf21bb.
martincizek May 4, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
528 changes: 515 additions & 13 deletions api_test/main.c
100644 → 100755

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion extensions/strikethrough.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ static cmark_node *match(cmark_syntax_extension *self, cmark_parser *parser,

res = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem);
cmark_node_set_literal(res, buffer);
res->start_line = res->end_line = cmark_inline_parser_get_line(inline_parser);
res->start_line = cmark_inline_parser_get_line(inline_parser);
res->start_column = cmark_inline_parser_get_column(inline_parser) - delims;

if ((left_flanking || right_flanking) &&
Expand Down Expand Up @@ -64,6 +64,7 @@ static delimiter *insert(cmark_syntax_extension *self, cmark_parser *parser,
tmp = next;
}

strikethrough->end_line = closer->inl_text->start_line;
strikethrough->end_column = closer->inl_text->start_column + closer->inl_text->as.literal.len - 1;
cmark_node_free(closer->inl_text);

Expand Down
114 changes: 105 additions & 9 deletions extensions/table.c
Original file line number Diff line number Diff line change
Expand Up @@ -202,14 +202,95 @@ static table_row *row_from_string(cmark_syntax_extension *self,
return row;
}

static void try_inserting_table_header_paragraph(cmark_parser *parser,
cmark_node *parent_container,
unsigned char *parent_string,
int paragraph_offset) {
static void count_header_paragraph_newlines(unsigned char *content,
bufsize_t size,
int *lpad_nls,
int *lpad_last_line_columns,
int *trimmed_nls,
int *trimmed_last_line_columns,
int *rpad_nls) {
bufsize_t i = 0;

*lpad_nls = 0;
*lpad_last_line_columns = 0;
while (i < size && cmark_isspace(content[i])) {
if (content[i] == '\n') {
++*lpad_nls;
*lpad_last_line_columns = 0;
} else {
++*lpad_last_line_columns;
}
++i;
}

*rpad_nls = 0;
while (size > i && cmark_isspace(content[size - 1])) {
if (content[size - 1] == '\n') {
++*rpad_nls;
}
--size;
}

*trimmed_nls = 0;
*trimmed_last_line_columns = 0;
while (i < size) {
if (content[i] == '\n') {
++*trimmed_nls;
*trimmed_last_line_columns = 0;
} else {
++*trimmed_last_line_columns;
}
++i;
}
}

static void advance_sourcepos_to_table_start(unsigned char *p,
int *start_line, int *start_column) {
bufsize_t i = 0;
int line = *start_line;
int column = *start_column;

while (p[i] && p[i] != '|') {
if (p[i] == '\n') {
++line;
column = 1;
} else {
++column;
}
++i;
}

if (p[i] == '|') {
*start_line = line;
*start_column = column;
}
}

static cmark_node *try_inserting_table_header_paragraph(cmark_parser *parser,
cmark_node *parent_container,
unsigned char *parent_string,
int paragraph_offset) {
cmark_node *paragraph;
cmark_strbuf *paragraph_content;
int lpad_nls = 0;
int lpad_last_line_columns = 0;
int trimmed_nls = 0;
int trimmed_last_line_columns = 0;
int rpad_nls = 0;

paragraph = cmark_node_new_with_mem(CMARK_NODE_PARAGRAPH, parser->mem);
if (parser->options & CMARK_OPT_SOURCEPOS) {
count_header_paragraph_newlines(parent_string, paragraph_offset,
&lpad_nls, &lpad_last_line_columns,
&trimmed_nls, &trimmed_last_line_columns,
&rpad_nls);
paragraph->start_line = parent_container->start_line + lpad_nls;
paragraph->start_column = lpad_nls > 0
? lpad_last_line_columns
: parent_container->start_column + lpad_last_line_columns;
paragraph->end_line = parent_container->start_line + lpad_nls + trimmed_nls;
paragraph->end_column = trimmed_last_line_columns;
}

paragraph_content = unescape_pipes(parser->mem, parent_string, paragraph_offset);
cmark_strbuf_trim(paragraph_content);
Expand All @@ -219,7 +300,15 @@ static void try_inserting_table_header_paragraph(cmark_parser *parser,

if (!cmark_node_insert_before(parent_container, paragraph)) {
parser->mem->free(paragraph);
return NULL;
}
if (parser->options & CMARK_OPT_SOURCEPOS) {
parent_container->start_line += lpad_nls + trimmed_nls + rpad_nls;
parent_container->start_column = 1;
advance_sourcepos_to_table_start(parent_string + paragraph_offset,
&parent_container->start_line, &parent_container->start_column);
}
return paragraph;
}

static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
Expand All @@ -231,6 +320,7 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
table_row *marker_row = NULL;
node_table_row *ntr;
const char *parent_string;
int start_column_offset = 0;
uint16_t i;

if (!scan_table_start(input, len, cmark_parser_get_first_nonspace(parser))) {
Expand Down Expand Up @@ -273,8 +363,11 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
}

if (header_row->paragraph_offset) {
try_inserting_table_header_paragraph(parser, parent_container, (unsigned char *)parent_string,
header_row->paragraph_offset);
cmark_node *paragraph = try_inserting_table_header_paragraph(parser,
parent_container, (unsigned char *)parent_string, header_row->paragraph_offset);
if (paragraph) {
start_column_offset = header_row->paragraph_offset;
}
}

cmark_node_set_syntax_extension(parent_container, self);
Expand All @@ -301,7 +394,8 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
cmark_parser_add_child(parser, parent_container, CMARK_NODE_TABLE_ROW,
parent_container->start_column);
cmark_node_set_syntax_extension(table_header, self);
table_header->end_column = parent_container->start_column + (int)strlen(parent_string) - 2;
table_header->end_column =
parent_container->start_column + (int)strlen(parent_string) - 2 - start_column_offset;
table_header->start_line = table_header->end_line = parent_container->start_line;

table_header->as.opaque = ntr = (node_table_row *)parser->mem->calloc(1, sizeof(node_table_row));
Expand All @@ -313,10 +407,12 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self,
for (tmp = header_row->cells; tmp; tmp = tmp->next) {
node_cell *cell = (node_cell *) tmp->data;
cmark_node *header_cell = cmark_parser_add_child(parser, table_header,
CMARK_NODE_TABLE_CELL, parent_container->start_column + cell->start_offset);
CMARK_NODE_TABLE_CELL,
parent_container->start_column + cell->start_offset - start_column_offset);
header_cell->start_line = header_cell->end_line = parent_container->start_line;
header_cell->internal_offset = cell->internal_offset;
header_cell->end_column = parent_container->start_column + cell->end_offset;
header_cell->end_column =
parent_container->start_column + cell->end_offset - start_column_offset;
cmark_node_set_string_content(header_cell, (char *) cell->buf->ptr);
cmark_node_set_syntax_extension(header_cell, self);
}
Expand Down
72 changes: 70 additions & 2 deletions src/blocks.c
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,37 @@ static void add_line(cmark_node *node, cmark_chunk *ch, cmark_parser *parser) {
cmark_strbuf_putc(&node->content, ' ');
}
}

// If inserting the initial line to the node...
if (node->content.size == 0
// OR the node is a code block...
|| node->type == CMARK_NODE_CODE_BLOCK
// OR the node is a HTML block.
|| node->type == CMARK_NODE_HTML_BLOCK) {

// Then do not insert the leading trivia.
cmark_strbuf_put(&node->content, ch->data + parser->offset,
ch->len - parser->offset);
} else {
// Special case for maintaining the source position of block quotes
// as they can be lazy (i.e. the block quote marker can be omitted).
//
// The simple solution is to replace any block quote markers (">")
// present in the leading trivia with whitespace.
//
// Note: Using `parser->offset` and not `parser->first_nonspace`
// because the latter encompasses the former with the addition of
// whitespace (which we are not interested in).
assert(parser->offset <= parser->first_nonspace);
for (int i = 0; i < parser->offset; i++) {
if (peek_at(ch, i) == '>')
ch->data[i] = ' ';
}

// Otherwise, do not remove leading trivia for appends (i.e. lines
// other than the first).
cmark_strbuf_put(&node->content, ch->data, ch->len);
}
}

static void remove_trailing_blank_lines(cmark_strbuf *ln) {
Expand Down Expand Up @@ -266,6 +295,12 @@ static bool resolve_reference_link_definitions(

chunk.data += pos;
chunk.len -= pos;

// Leading whitespace is not stripped.
while (cmark_isspace(peek_at(&chunk, 0))) {
chunk.data += 1;
chunk.len -= 1;
}
}
cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
return !is_blank(&b->content, 0);
Expand All @@ -283,13 +318,33 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
CMARK_NODE__OPEN); // shouldn't call finalize on closed blocks
b->flags &= ~CMARK_NODE__OPEN;

if (parser->curline.size == 0) {
if (S_type(b) == CMARK_NODE_THEMATIC_BREAK) {
// Already been "finalized".
return parent;
}

if (S_type(b) == CMARK_NODE_HEADING && !b->as.heading.setext) {
parser->last_line_length += b->end_column;
}

if ((S_type(b) == CMARK_NODE_ITEM || S_type(b) == CMARK_NODE_LIST)
&& b->last_child) {
b->end_line = b->last_child->end_line;
b->end_column = b->last_child->end_column;

if (S_type(b) == CMARK_NODE_ITEM && b->parent) {
// The finalization order is not deterministic...
b->parent->end_line = b->end_line;
b->parent->end_column = b->end_column;
}
} else if (parser->curline.size == 0) {
// end of input - line number has not been incremented
b->end_line = parser->line_number;
b->end_column = parser->last_line_length;
} else if (S_type(b) == CMARK_NODE_DOCUMENT ||
(S_type(b) == CMARK_NODE_CODE_BLOCK && b->as.code.fenced) ||
(S_type(b) == CMARK_NODE_HEADING && b->as.heading.setext)) {
(S_type(b) == CMARK_NODE_HTML_BLOCK
&& b->end_line == b->start_line && b->end_column == 0)) {
b->end_line = parser->line_number;
b->end_column = parser->curline.size;
if (b->end_column && parser->curline.ptr[b->end_column - 1] == '\n')
Expand Down Expand Up @@ -1181,6 +1236,10 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
// it's only now that we know the line is not part of a setext heading:
*container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK,
parser->first_nonspace + 1);
// A thematic break can only be on a single line, so we can set the
// end source position here.
(*container)->end_line = parser->line_number;
(*container)->end_column = input->len - 1;
S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
} else if (!indented &&
parser->options & CMARK_OPT_FOOTNOTES &&
Expand Down Expand Up @@ -1342,6 +1401,12 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
} else { // not a lazy continuation
// Finalize any blocks that were not matched and set cur to container:
while (parser->current != last_matched_container) {
if (S_type(parser->current) == CMARK_NODE_HTML_BLOCK) {
// Edge case: Closing an HTML block without a matching end condition.
parser->current->end_line = parser->line_number - 1;
parser->current->end_column = parser->last_line_length;
}

parser->current = finalize(parser, parser->current);
assert(parser->current != NULL);
}
Expand Down Expand Up @@ -1392,7 +1457,10 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
} else if (accepts_lines(S_type(container))) {
if (S_type(container) == CMARK_NODE_HEADING &&
container->as.heading.setext == false) {
bufsize_t original_len = input->len;
chop_trailing_hashtags(input);
// Substract one to exclude the trailing newline.
container->end_column += original_len - input->len - 1;
}
S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
false);
Expand Down
5 changes: 5 additions & 0 deletions src/buffer.c
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,11 @@ void cmark_strbuf_trim(cmark_strbuf *buf) {
cmark_strbuf_rtrim(buf);
}

void cmark_strbuf_remove(cmark_strbuf *buf, bufsize_t start_offset, bufsize_t len) {
memmove(buf->ptr + start_offset, buf->ptr + start_offset + len, buf->size - (start_offset + len));
buf->size -= len;
}

// Destructively modify string, collapsing consecutive
// space and newline characters into a single space.
void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) {
Expand Down
10 changes: 10 additions & 0 deletions src/buffer.h
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,16 @@ void cmark_strbuf_rtrim(cmark_strbuf *buf);
CMARK_GFM_EXPORT
void cmark_strbuf_trim(cmark_strbuf *buf);

/**
Removes the characters in the given range.

@param buf The string buffer.
@param start_offset The starting character offset.
@param len The length of characters to remove.
*/
CMARK_GFM_EXPORT
void cmark_strbuf_remove(cmark_strbuf *buf, bufsize_t start_offset, bufsize_t len);

CMARK_GFM_EXPORT
void cmark_strbuf_normalize_whitespace(cmark_strbuf *s);

Expand Down
Loading