From b8af8792d1aba480b68baa36a2d687355f6f6bd1 Mon Sep 17 00:00:00 2001 From: Mathieu Duponchelle Date: Sun, 28 Feb 2016 20:47:26 +0100 Subject: [PATCH 01/20] Implement and expose block parsing API. + Document the parser structure for internal usage. --- src/CMakeLists.txt | 2 + src/blocks.c | 36 ++++++++++ src/cmark_extension_api.h | 145 ++++++++++++++++++++++++++++++++++++++ src/parser.h | 15 ++++ 4 files changed, 198 insertions(+) create mode 100644 src/cmark_extension_api.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2de501aa1..f4fda4386 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -3,6 +3,7 @@ set(LIBRARY "libcmark") set(STATICLIBRARY "libcmark_static") set(HEADERS cmark.h + cmark_extension_api.h parser.h buffer.h node.h @@ -133,6 +134,7 @@ install(TARGETS ${PROGRAM} ${LIBRARY} ${STATICLIBRARY} install(FILES cmark.h + cmark_extension_api.h ${CMAKE_CURRENT_BINARY_DIR}/cmark_export.h ${CMAKE_CURRENT_BINARY_DIR}/cmark_version.h DESTINATION include diff --git a/src/blocks.c b/src/blocks.c index 2386bb184..904a6ffc6 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -1185,3 +1185,39 @@ cmark_node *cmark_parser_finish(cmark_parser *parser) { #endif return parser->root; } + +int cmark_parser_get_line_number(cmark_parser *parser) { + return parser->line_number; +} + +bufsize_t cmark_parser_get_offset(cmark_parser *parser) { + return parser->offset; +} + +bufsize_t cmark_parser_get_column(cmark_parser *parser) { + return parser->column; +} + +int cmark_parser_get_first_nonspace(cmark_parser *parser) { + return parser->first_nonspace; +} + +int cmark_parser_get_first_nonspace_column(cmark_parser *parser) { + return parser->first_nonspace_column; +} + +int cmark_parser_get_indent(cmark_parser *parser) { + return parser->indent; +} + +int cmark_parser_is_blank(cmark_parser *parser) { + return parser->blank; +} + +int cmark_parser_has_partially_consumed_tab(cmark_parser *parser) { + return parser->partially_consumed_tab; +} + +int cmark_parser_get_last_line_length(cmark_parser *parser) { + return parser->last_line_length; +} diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h new file mode 100644 index 000000000..ca45adffa --- /dev/null +++ b/src/cmark_extension_api.h @@ -0,0 +1,145 @@ +#ifndef CMARK_EXTENSION_API_H +#define CMARK_EXTENSION_API_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/** Return the index of the line currently being parsed, starting with 1. + */ +CMARK_EXPORT +int cmark_parser_get_line_number(cmark_parser *parser); + +/** Return the offset in bytes in the line being processed. + * + * Example: + * + * ### foo + * + * Here, offset will first be 0, then 5 (the index of the 'f' character). + */ +CMARK_EXPORT +int cmark_parser_get_offset(cmark_parser *parser); + +/** + * Return the offset in 'columns' in the line being processed. + * + * This value may differ from the value returned by + * cmark_parser_get_offset() in that it accounts for tabs, + * and as such should not be used as an index in the current line's + * buffer. + * + * Example: + * + * cmark_parser_advance_offset() can be called to advance the + * offset by a number of columns, instead of a number of bytes. + * + * In that case, if offset falls "in the middle" of a tab + * character, 'column' and offset will differ. + * + * ``` + * foo \t bar + * ^ ^^ + * offset (0) 20 + * ``` + * + * If cmark_parser_advance_offset is called here with 'columns' + * set to 'true' and 'offset' set to 22, cmark_parser_get_offset() + * will return 20, whereas cmark_parser_get_column() will return + * 22. + * + * Additionally, as tabs expand to the next multiple of 4 column, + * cmark_parser_has_partially_consumed_tab() will now return + * 'true'. + */ +CMARK_EXPORT +int cmark_parser_get_column(cmark_parser *parser); + +/** Return the absolute index in bytes of the first nonspace + * character coming after the offset as returned by + * cmark_parser_get_offset() in the line currently being processed. + * + * Example: + * + * ``` + * foo bar baz \n + * ^ ^ ^ + * 0 offset (16) first_nonspace (28) + * ``` + */ +CMARK_EXPORT +int cmark_parser_get_first_nonspace(cmark_parser *parser); + +/** Return the absolute index of the first nonspace column coming after 'offset' + * in the line currently being processed, counting tabs as multiple + * columns as appropriate. + * + * See the documentation for cmark_parser_get_first_nonspace() and + * cmark_parser_get_column() for more information. + */ +CMARK_EXPORT +int cmark_parser_get_first_nonspace_column(cmark_parser *parser); + +/** Return the difference between the values returned by + * cmark_parser_get_first_nonspace_column() and + * cmark_parser_get_column(). + * + * This is not a byte offset, as it can count one tab as multiple + * characters. + */ +CMARK_EXPORT +int cmark_parser_get_indent(cmark_parser *parser); + +/** Return 'true' if the line currently being processed has been entirely + * consumed, 'false' otherwise. + * + * Example: + * + * ``` + * foo bar baz \n + * ^ + * offset + * ``` + * + * This function will return 'false' here. + * + * ``` + * foo bar baz \n + * ^ + * offset + * ``` + * This function will still return 'false'. + * + * ``` + * foo bar baz \n + * ^ + * offset + * ``` + * + * At this point, this function will now return 'true'. + */ +CMARK_EXPORT +int cmark_parser_is_blank(cmark_parser *parser); + +/** Return 'true' if the value returned by cmark_parser_get_offset() + * is 'inside' an expanded tab. + * + * See the documentation for cmark_parser_get_column() for more + * information. + */ +CMARK_EXPORT +int cmark_parser_has_partially_consumed_tab(cmark_parser *parser); + +/** Return the length in bytes of the previously processed line, excluding potential + * newline (\n) and carriage return (\r) trailing characters. + */ +CMARK_EXPORT +int cmark_parser_get_last_line_length(cmark_parser *parser); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/parser.h b/src/parser.h index 0c5033bd2..ef2e747ec 100644 --- a/src/parser.h +++ b/src/parser.h @@ -14,20 +14,35 @@ extern "C" { struct cmark_parser { struct cmark_mem *mem; + /* A hashtable of urls in the current document for cross-references */ struct cmark_reference_map *refmap; + /* The root node of the parser, always a CMARK_NODE_DOCUMENT */ struct cmark_node *root; + /* The last open block after a line is fully processed */ struct cmark_node *current; + /* See the documentation for cmark_parser_get_line_number() in cmark.h */ int line_number; + /* See the documentation for cmark_parser_get_offset() in cmark.h */ bufsize_t offset; + /* See the documentation for cmark_parser_get_column() in cmark.h */ bufsize_t column; + /* See the documentation for cmark_parser_get_first_nonspace() in cmark.h */ bufsize_t first_nonspace; + /* See the documentation for cmark_parser_get_first_nonspace_column() in cmark.h */ bufsize_t first_nonspace_column; + /* See the documentation for cmark_parser_get_indent() in cmark.h */ int indent; + /* See the documentation for cmark_parser_is_blank() in cmark.h */ bool blank; + /* See the documentation for cmark_parser_has_partially_consumed_tab() in cmark.h */ bool partially_consumed_tab; + /* Contains the currently processed line */ cmark_strbuf curline; + /* See the documentation for cmark_parser_get_last_line_length() in cmark.h */ bufsize_t last_line_length; + /* FIXME: not sure about the difference with curline */ cmark_strbuf linebuf; + /* Options set by the user, see the Options section in cmark.h */ int options; bool last_buffer_ended_with_cr; }; From f2e9eff2eb9aafc31b472029694f87e94f5bb0fc Mon Sep 17 00:00:00 2001 From: Mathieu Duponchelle Date: Sun, 28 Feb 2016 05:49:46 +0100 Subject: [PATCH 02/20] [API]: parser: Expose 'setter' methods in cmark_parser. As opposed to the previous commit, where I exposed getters for the private parser structure, this exposes two methods that will influence parsing, cmark_parser_add_child and cmark_parser_advance_offset. --- src/blocks.c | 16 ++++++++++++++++ src/cmark_extension_api.h | 23 +++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/src/blocks.c b/src/blocks.c index 904a6ffc6..94434752f 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -1221,3 +1221,19 @@ int cmark_parser_has_partially_consumed_tab(cmark_parser *parser) { int cmark_parser_get_last_line_length(cmark_parser *parser) { return parser->last_line_length; } + +cmark_node *cmark_parser_add_child(cmark_parser *parser, + cmark_node *parent, + cmark_node_type block_type, + int start_column) { + return add_child(parser, parent, block_type, start_column); +} + +void cmark_parser_advance_offset(cmark_parser *parser, + const char *input, + int count, + int columns) { + cmark_chunk input_chunk = cmark_chunk_literal(input); + + S_advance_offset(parser, &input_chunk, count, columns); +} diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h index ca45adffa..bf0d97f0d 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark_extension_api.h @@ -138,6 +138,29 @@ int cmark_parser_has_partially_consumed_tab(cmark_parser *parser); CMARK_EXPORT int cmark_parser_get_last_line_length(cmark_parser *parser); +/** Add a child to 'parent' during the parsing process. + * + * If 'parent' isn't the kind of node that can accept this child, + * this function will back up till it hits a node that can, closing + * blocks as appropriate. + */ +CMARK_EXPORT +cmark_node*cmark_parser_add_child(cmark_parser *parser, + cmark_node *parent, + cmark_node_type block_type, + int start_column); + +/** Advance the 'offset' of the parser in the current line. + * + * See the documentation of cmark_parser_get_offset() and + * cmark_parser_get_column() for more information. + */ +CMARK_EXPORT +void cmark_parser_advance_offset(cmark_parser *parser, + const char *input, + int count, + int columns); + #ifdef __cplusplus } #endif From 96f966e1d730a85b38465a0843bf0c04edc20af1 Mon Sep 17 00:00:00 2001 From: Mathieu Duponchelle Date: Sun, 28 Feb 2016 06:53:23 +0100 Subject: [PATCH 03/20] [API]: node: implement and expose cmark_node_set_type. This is necessary for extensions implementing vertical rules. An example is setext headings: A heading --------- When cmark parses 'A heading', it first creates a paragraph block to contain it, it's only when cmark parses the second line that the type of the block is changed to the CMARK_NODE_TYPE_HEADING type. --- src/cmark_extension_api.h | 6 +++ src/node.c | 77 +++++++++++++++++++++++++++------------ 2 files changed, 59 insertions(+), 24 deletions(-) diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h index bf0d97f0d..40101a4c8 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark_extension_api.h @@ -161,6 +161,12 @@ void cmark_parser_advance_offset(cmark_parser *parser, int count, int columns); +/** Change the type of 'node'. + * + * Return 0 if the type could be changed, 1 otherwise. + */ +CMARK_EXPORT int cmark_node_set_type(cmark_node *node, cmark_node_type type); + #ifdef __cplusplus } #endif diff --git a/src/node.c b/src/node.c index 62acf14b9..d5e09d239 100644 --- a/src/node.c +++ b/src/node.c @@ -105,35 +105,41 @@ cmark_node *cmark_node_new(cmark_node_type type) { return cmark_node_new_with_mem(type, &DEFAULT_MEM_ALLOCATOR); } +static void free_node_as(cmark_node *node) { + switch (node->type) { + case CMARK_NODE_CODE_BLOCK: + cmark_chunk_free(NODE_MEM(node), &node->as.code.info); + cmark_chunk_free(NODE_MEM(node), &node->as.code.literal); + break; + case CMARK_NODE_TEXT: + case CMARK_NODE_HTML_INLINE: + case CMARK_NODE_CODE: + case CMARK_NODE_HTML_BLOCK: + cmark_chunk_free(NODE_MEM(node), &node->as.literal); + break; + case CMARK_NODE_LINK: + case CMARK_NODE_IMAGE: + cmark_chunk_free(NODE_MEM(node), &node->as.link.url); + cmark_chunk_free(NODE_MEM(node), &node->as.link.title); + break; + case CMARK_NODE_CUSTOM_BLOCK: + case CMARK_NODE_CUSTOM_INLINE: + cmark_chunk_free(NODE_MEM(node), &node->as.custom.on_enter); + cmark_chunk_free(NODE_MEM(node), &node->as.custom.on_exit); + break; + default: + break; + } +} + // Free a cmark_node list and any children. static void S_free_nodes(cmark_node *e) { cmark_node *next; while (e != NULL) { cmark_strbuf_free(&e->content); - switch (e->type) { - case CMARK_NODE_CODE_BLOCK: - cmark_chunk_free(NODE_MEM(e), &e->as.code.info); - cmark_chunk_free(NODE_MEM(e), &e->as.code.literal); - break; - case CMARK_NODE_TEXT: - case CMARK_NODE_HTML_INLINE: - case CMARK_NODE_CODE: - case CMARK_NODE_HTML_BLOCK: - cmark_chunk_free(NODE_MEM(e), &e->as.literal); - break; - case CMARK_NODE_LINK: - case CMARK_NODE_IMAGE: - cmark_chunk_free(NODE_MEM(e), &e->as.link.url); - cmark_chunk_free(NODE_MEM(e), &e->as.link.title); - break; - case CMARK_NODE_CUSTOM_BLOCK: - case CMARK_NODE_CUSTOM_INLINE: - cmark_chunk_free(NODE_MEM(e), &e->as.custom.on_enter); - cmark_chunk_free(NODE_MEM(e), &e->as.custom.on_exit); - break; - default: - break; - } + + free_node_as(e); + if (e->last_child) { // Splice children into list e->last_child->next = e->next; @@ -159,6 +165,29 @@ cmark_node_type cmark_node_get_type(cmark_node *node) { } } +int cmark_node_set_type(cmark_node * node, cmark_node_type type) { + cmark_node_type initial_type; + + if (type == node->type) + return 1; + + initial_type = node->type; + node->type = type; + + if (!S_can_contain(node->parent, node)) { + node->type = initial_type; + return 0; + } + + /* We rollback the type to free the union members appropriately */ + node->type = initial_type; + free_node_as(node); + + node->type = type; + + return 1; +} + const char *cmark_node_get_type_string(cmark_node *node) { if (node == NULL) { return "NONE"; From 8075d6411a8baeb5d11a2dcfdfa460a2c7c74d8a Mon Sep 17 00:00:00 2001 From: Mathieu Duponchelle Date: Sun, 28 Feb 2016 08:05:54 +0100 Subject: [PATCH 04/20] [API]: node: implement and expose cmark_node_set_user_data_free_func. Ideally, this would be passed in set_user_data, but this would break API. --- src/cmark.h | 7 +++++++ src/node.c | 12 ++++++++++++ src/node.h | 1 + 3 files changed, 20 insertions(+) diff --git a/src/cmark.h b/src/cmark.h index 45d7bc324..5c03b9e87 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -87,6 +87,8 @@ typedef struct cmark_node cmark_node; typedef struct cmark_parser cmark_parser; typedef struct cmark_iter cmark_iter; +typedef void (*cmark_free_func) (void *user_data); + /** * ## Custom memory allocator support */ @@ -253,6 +255,11 @@ CMARK_EXPORT void *cmark_node_get_user_data(cmark_node *node); */ CMARK_EXPORT int cmark_node_set_user_data(cmark_node *node, void *user_data); +/** Set free function for user data */ +CMARK_EXPORT +int cmark_node_set_user_data_free_func(cmark_node *node, + cmark_free_func free_func); + /** Returns the type of 'node', or `CMARK_NODE_NONE` on error. */ CMARK_EXPORT cmark_node_type cmark_node_get_type(cmark_node *node); diff --git a/src/node.c b/src/node.c index d5e09d239..081e533a8 100644 --- a/src/node.c +++ b/src/node.c @@ -138,6 +138,9 @@ static void S_free_nodes(cmark_node *e) { while (e != NULL) { cmark_strbuf_free(&e->content); + if (e->user_data && e->user_data_free_func) + e->user_data_free_func(e->user_data); + free_node_as(e); if (e->last_child) { @@ -297,6 +300,15 @@ int cmark_node_set_user_data(cmark_node *node, void *user_data) { return 1; } +int cmark_node_set_user_data_free_func(cmark_node *node, + cmark_free_func free_func) { + if (node == NULL) { + return 0; + } + node->user_data_free_func = free_func; + return 1; +} + const char *cmark_node_get_literal(cmark_node *node) { if (node == NULL) { return NULL; diff --git a/src/node.h b/src/node.h index 65d857f0b..bafa730fd 100644 --- a/src/node.h +++ b/src/node.h @@ -61,6 +61,7 @@ struct cmark_node { struct cmark_node *last_child; void *user_data; + cmark_free_func user_data_free_func; int start_line; int start_column; From 0839b4bc274a5337d71bbcd8be4273aeac794857 Mon Sep 17 00:00:00 2001 From: Mathieu Duponchelle Date: Sun, 28 Feb 2016 08:21:02 +0100 Subject: [PATCH 05/20] [API]: node: Expose string_content. By implementing cmark_node_get_string_content and cmark_node_set_string_content. This is useful for vertical rules in extensions, as they may need to access it in order to decide whether to update the block. Unfortunately, this overlaps with get_literal and set_literal. As far as I can tell we should deprecate these functions, and have them follow the get_string_content code path and set_string_content for the while. --- src/cmark_extension_api.h | 10 ++++++++++ src/node.c | 9 +++++++++ 2 files changed, 19 insertions(+) diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h index 40101a4c8..c097bb185 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark_extension_api.h @@ -167,6 +167,16 @@ void cmark_parser_advance_offset(cmark_parser *parser, */ CMARK_EXPORT int cmark_node_set_type(cmark_node *node, cmark_node_type type); +/** Return the string content for all types of 'node'. + * The pointer stays valid as long as 'node' isn't freed. + */ +CMARK_EXPORT const char *cmark_node_get_string_content(cmark_node *node); + +/** Set the string 'content' for all types of 'node'. + * Copies 'content'. + */ +CMARK_EXPORT int cmark_node_set_string_content(cmark_node *node, const char *content); + #ifdef __cplusplus } #endif diff --git a/src/node.c b/src/node.c index 081e533a8..8e7c3dec2 100644 --- a/src/node.c +++ b/src/node.c @@ -355,6 +355,15 @@ int cmark_node_set_literal(cmark_node *node, const char *content) { return 0; } +const char *cmark_node_get_string_content(cmark_node *node) { + return (char *) node->content.ptr; +} + +int cmark_node_set_string_content(cmark_node *node, const char *content) { + cmark_strbuf_sets(&node->content, content); + return true; +} + int cmark_node_get_heading_level(cmark_node *node) { if (node == NULL) { return 0; From 19ffd43cae38663ebe255545f019672d694271ed Mon Sep 17 00:00:00 2001 From: Mathieu Duponchelle Date: Sun, 6 Mar 2016 01:30:12 +0100 Subject: [PATCH 06/20] [API]: node: set and get code blocks fenced state. --- src/cmark.h | 9 +++++++++ src/node.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/src/cmark.h b/src/cmark.h index 5c03b9e87..318db498f 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -339,6 +339,15 @@ CMARK_EXPORT const char *cmark_node_get_fence_info(cmark_node *node); */ CMARK_EXPORT int cmark_node_set_fence_info(cmark_node *node, const char *info); +/** Sets code blocks fencing details + */ +CMARK_EXPORT int cmark_node_set_fenced(cmark_node * node, int fenced, + int length, int offset, char character); + +/** Returns code blocks fencing details + */ +CMARK_EXPORT int cmark_node_get_fenced(cmark_node *node, int *length, int *offset, char *character); + /** Returns the URL of a link or image 'node', or an empty string if no URL is set. */ diff --git a/src/node.c b/src/node.c index 8e7c3dec2..00e599125 100644 --- a/src/node.c +++ b/src/node.c @@ -530,6 +530,38 @@ int cmark_node_set_fence_info(cmark_node *node, const char *info) { } } +int cmark_node_get_fenced(cmark_node *node, int *length, int *offset, char *character) { + if (node == NULL) { + return 0; + } + + if (node->type == CMARK_NODE_CODE_BLOCK) { + *length = node->as.code.fence_length; + *offset = node->as.code.fence_offset; + *character = node->as.code.fence_char; + return node->as.code.fenced; + } else { + return 0; + } +} + +int cmark_node_set_fenced(cmark_node * node, int fenced, + int length, int offset, char character) { + if (node == NULL) { + return 0; + } + + if (node->type == CMARK_NODE_CODE_BLOCK) { + node->as.code.fenced = fenced; + node->as.code.fence_length = length; + node->as.code.fence_offset = offset; + node->as.code.fence_char = character; + return 1; + } else { + return 0; + } +} + const char *cmark_node_get_url(cmark_node *node) { if (node == NULL) { return NULL; From 9d8a9dc9a677755e10633120ca7d0d7d3ef7d1f6 Mon Sep 17 00:00:00 2001 From: Mathieu Duponchelle Date: Sat, 27 Feb 2016 02:22:24 +0100 Subject: [PATCH 07/20] Check in and expose a linked list. --- src/CMakeLists.txt | 1 + src/cmark.h | 37 +++++++++++++++++++++++++++++++++++++ src/linked_list.c | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+) create mode 100644 src/linked_list.c diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f4fda4386..8baf7ed23 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -38,6 +38,7 @@ set(LIBRARY_SOURCES houdini_html_e.c houdini_html_u.c cmark_ctype.c + linked_list.c ${HEADERS} ) diff --git a/src/cmark.h b/src/cmark.h index 318db498f..5e04f0c81 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -102,6 +102,43 @@ typedef struct cmark_mem { void (*free)(void *); } cmark_mem; +/* + * ## Basic data structures + * + * To keep dependencies to the strict minimum, libcmark implements + * its own versions of "classic" data structures. + */ + +/** + * ### Linked list + */ + +/** A generic singly linked list. + */ +typedef struct _cmark_llist +{ + struct _cmark_llist *next; + void *data; +} cmark_llist; + +/** Append an element to the linked list, return the possibly modified + * head of the list. + */ +CMARK_EXPORT +cmark_llist * cmark_llist_append (cmark_llist * head, + void * data); + +/** Free the list starting with 'head', calling 'free_func' with the + * data pointer of each of its elements + */ +CMARK_EXPORT +void cmark_llist_free_full (cmark_llist * head, + cmark_free_func free_func); + +/** Free the list starting with 'head' + */ +CMARK_EXPORT +void cmark_llist_free (cmark_llist * head); /** * ## Creating and Destroying Nodes diff --git a/src/linked_list.c b/src/linked_list.c new file mode 100644 index 000000000..7d6690dae --- /dev/null +++ b/src/linked_list.c @@ -0,0 +1,37 @@ +#include + +#include "cmark.h" + +cmark_llist *cmark_llist_append(cmark_llist *head, void *data) { + cmark_llist *tmp; + cmark_llist *new_node = (cmark_llist *) malloc(sizeof(cmark_llist)); + + new_node->data = data; + new_node->next = NULL; + + if (!head) + return new_node; + + for (tmp = head; tmp->next; tmp=tmp->next); + + tmp->next = new_node; + + return head; +} + +void cmark_llist_free_full(cmark_llist *head, cmark_free_func free_func) { + cmark_llist *tmp, *prev; + + for (tmp = head; tmp;) { + if (free_func) + free_func(tmp->data); + + prev = tmp; + tmp = tmp->next; + free(prev); + } +} + +void cmark_llist_free(cmark_llist *head) { + cmark_llist_free_full(head, NULL); +} From d2f829ff2b44e40b843a107db56da87833cbdd87 Mon Sep 17 00:00:00 2001 From: Mathieu Duponchelle Date: Thu, 28 Apr 2016 00:46:22 +0200 Subject: [PATCH 08/20] Define syntax extensions And expose and implement block parsing hooks --- src/CMakeLists.txt | 2 + src/blocks.c | 52 ++++++++++++++++- src/cmark_extension_api.h | 120 ++++++++++++++++++++++++++++++++++++++ src/node.c | 17 ++++++ src/node.h | 3 + src/parser.h | 1 + src/syntax_extension.c | 27 +++++++++ src/syntax_extension.h | 13 +++++ 8 files changed, 233 insertions(+), 2 deletions(-) create mode 100644 src/syntax_extension.c create mode 100644 src/syntax_extension.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8baf7ed23..9f01ef4f6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -16,6 +16,7 @@ set(HEADERS houdini.h cmark_ctype.h render.h + syntax_extension.h ) set(LIBRARY_SOURCES cmark.c @@ -39,6 +40,7 @@ set(LIBRARY_SOURCES houdini_html_u.c cmark_ctype.c linked_list.c + syntax_extension.c ${HEADERS} ) diff --git a/src/blocks.c b/src/blocks.c index 94434752f..7d057efed 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -10,6 +10,7 @@ #include #include "cmark_ctype.h" +#include "syntax_extension.h" #include "config.h" #include "parser.h" #include "cmark.h" @@ -80,6 +81,12 @@ static cmark_node *make_document(cmark_mem *mem) { return e; } +int cmark_parser_attach_syntax_extension(cmark_parser *parser, + cmark_syntax_extension *extension) { + parser->syntax_extensions = cmark_llist_append(parser->syntax_extensions, extension); + return 1; +} + cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) { cmark_parser *parser = (cmark_parser *)mem->calloc(1, sizeof(cmark_parser)); parser->mem = mem; @@ -103,6 +110,7 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) { parser->last_line_length = 0; parser->options = options; parser->last_buffer_ended_with_cr = false; + parser->syntax_extensions = NULL; return parser; } @@ -117,6 +125,7 @@ void cmark_parser_free(cmark_parser *parser) { cmark_strbuf_free(&parser->curline); cmark_strbuf_free(&parser->linebuf); cmark_reference_map_free(parser->refmap); + cmark_llist_free(parser->syntax_extensions); mem->free(parser); } @@ -765,6 +774,21 @@ static bool parse_html_block_prefix(cmark_parser *parser, return res; } +static bool parse_extension_block(cmark_parser *parser, + cmark_node *container, + cmark_chunk *input) +{ + bool res = false; + + if (container->extension->last_block_matches) { + if (container->extension->last_block_matches( + container->extension, parser, input->data, input->len, container)) + res = true; + } + + return res; +} + /** * For each containing node, try to parse the associated line start. * @@ -786,6 +810,12 @@ static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input, S_find_first_nonspace(parser, input); + if (container->extension) { + if (!parse_extension_block(parser, container, input)) + goto done; + continue; + } + switch (cont_type) { case CMARK_NODE_BLOCK_QUOTE: if (!parse_block_quote_prefix(parser, input)) @@ -981,9 +1011,27 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, (*container)->as.code.fence_length = 0; (*container)->as.code.fence_offset = 0; (*container)->as.code.info = cmark_chunk_literal(""); - } else { - break; + cmark_llist *tmp; + cmark_node *new_container = NULL; + + for (tmp = parser->syntax_extensions; tmp; tmp=tmp->next) { + cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data; + + if (ext->try_opening_block) { + new_container = ext->try_opening_block( + ext, indented, parser, *container, input->data, input->len); + + if (new_container) { + *container = new_container; + break; + } + } + } + + if (!new_container) { + break; + } } if (accepts_lines(S_type(*container))) { diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h index c097bb185..8a2164a99 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark_extension_api.h @@ -7,6 +7,106 @@ extern "C" { #include +/** + * ## Extension Support + * + * While the "core" of libcmark is strictly compliant with the + * specification, an API is provided for extension writers to + * hook into the parsing process. + * + * It should be noted that the cmark_node API already offers + * room for customization, with methods offered to traverse and + * modify the AST, and even define custom blocks. + * When the desired customization is achievable in an error-proof + * way using that API, it should be the preferred method. + * + * The following API requires a more in-depth understanding + * of libcmark's parsing strategy, which is exposed + * [here](http://spec.commonmark.org/0.24/#appendix-a-parsing-strategy). + * + * It should be used when "a posteriori" modification of the AST + * proves to be too difficult / impossible to implement correctly. + * + * It can also serve as an intermediary step before extending + * the specification, as an extension implemented using this API + * will be trivially integrated in the core if it proves to be + * desirable. + */ + + +/** A syntax extension that can be attached to a cmark_parser + * with cmark_parser_attach_syntax_extension(). + * + * Extension writers should assign functions matching + * the signature of the following 'virtual methods' to + * implement new functionality. + * + * Their calling order and expected behaviour match the procedure outlined + * at : + * + * During step 1, cmark will call the function provided through + * 'cmark_syntax_extension_set_match_block_func' when it + * iterates over an open block created by this extension, + * to determine whether it could contain the new line. + * If no function was provided, cmark will close the block. + * + * During step 2, if and only if the new line doesn't match any + * of the standard syntax rules, cmark will call the function + * provided through 'cmark_syntax_extension_set_open_block_func' + * to let the extension determine whether that new line matches + * one of its syntax rules. + * It is the responsibility of the parser to create and add the + * new block with cmark_parser_make_block and cmark_parser_add_child. + * If no function was provided is NULL, the extension will have + * no effect at all on the final block structure of the AST. + */ +typedef struct cmark_syntax_extension cmark_syntax_extension; + +/** Should create and add a new open block to 'parent_container' if + * 'input' matches a syntax rule for that block type. It is allowed + * to modify the type of 'parent_container'. + * + * Should return the newly created block if there is one, or + * 'parent_container' if its type was modified, or NULL. + */ +typedef cmark_node * (*cmark_open_block_func) (cmark_syntax_extension *extension, + int indented, + cmark_parser *parser, + cmark_node *parent_container, + unsigned char *input, + int len); + +/** Should return 'true' if 'input' can be contained in 'container', + * 'false' otherwise. + */ +typedef int (*cmark_match_block_func) (cmark_syntax_extension *extension, + cmark_parser *parser, + unsigned char *input, + int len, + cmark_node *container); + +/** Free a cmark_syntax_extension. + */ +CMARK_EXPORT +void cmark_syntax_extension_free (cmark_syntax_extension *extension); + +/** Return a newly-constructed cmark_syntax_extension, named 'name'. + */ +CMARK_EXPORT +cmark_syntax_extension *cmark_syntax_extension_new (const char *name); + +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_syntax_extension_set_open_block_func(cmark_syntax_extension *extension, + cmark_open_block_func func); + +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_syntax_extension_set_match_block_func(cmark_syntax_extension *extension, + cmark_match_block_func func); + /** Return the index of the line currently being parsed, starting with 1. */ CMARK_EXPORT @@ -161,6 +261,16 @@ void cmark_parser_advance_offset(cmark_parser *parser, int count, int columns); +/** Attach the syntax 'extension' to the 'parser', to provide extra syntax + * rules. + * See the documentation for cmark_syntax_extension for more information. + * + * Returns 'true' if the 'extension' was successfully attached, + * 'false' otherwise. + */ +CMARK_EXPORT +int cmark_parser_attach_syntax_extension(cmark_parser *parser, cmark_syntax_extension *extension); + /** Change the type of 'node'. * * Return 0 if the type could be changed, 1 otherwise. @@ -177,6 +287,16 @@ CMARK_EXPORT const char *cmark_node_get_string_content(cmark_node *node); */ CMARK_EXPORT int cmark_node_set_string_content(cmark_node *node, const char *content); +/** Get the syntax extension responsible for the creation of 'node'. + * Return NULL if 'node' was created because it matched standard syntax rules. + */ +CMARK_EXPORT cmark_syntax_extension *cmark_node_get_syntax_extension(cmark_node *node); + +/** Set the syntax extension responsible for creating 'node'. + */ +CMARK_EXPORT int cmark_node_set_syntax_extension(cmark_node *node, + cmark_syntax_extension *extension); + #ifdef __cplusplus } #endif diff --git a/src/node.c b/src/node.c index 00e599125..7cfa7147e 100644 --- a/src/node.c +++ b/src/node.c @@ -694,6 +694,23 @@ int cmark_node_set_on_exit(cmark_node *node, const char *on_exit) { return 0; } +cmark_syntax_extension *cmark_node_get_syntax_extension(cmark_node *node) { + if (node == NULL) { + return NULL; + } + + return node->extension; +} + +int cmark_node_set_syntax_extension(cmark_node *node, cmark_syntax_extension *extension) { + if (node == NULL) { + return 0; + } + + node->extension = extension; + return 1; +} + int cmark_node_get_start_line(cmark_node *node) { if (node == NULL) { return 0; diff --git a/src/node.h b/src/node.h index bafa730fd..93932aae0 100644 --- a/src/node.h +++ b/src/node.h @@ -9,6 +9,7 @@ extern "C" { #include #include "cmark.h" +#include "cmark_extension_api.h" #include "buffer.h" #include "chunk.h" @@ -70,6 +71,8 @@ struct cmark_node { uint16_t type; uint16_t flags; + cmark_syntax_extension *extension; + union { cmark_chunk literal; cmark_list list; diff --git a/src/parser.h b/src/parser.h index ef2e747ec..1129f3bae 100644 --- a/src/parser.h +++ b/src/parser.h @@ -45,6 +45,7 @@ struct cmark_parser { /* Options set by the user, see the Options section in cmark.h */ int options; bool last_buffer_ended_with_cr; + cmark_llist *syntax_extensions; }; #ifdef __cplusplus diff --git a/src/syntax_extension.c b/src/syntax_extension.c new file mode 100644 index 000000000..11a92c020 --- /dev/null +++ b/src/syntax_extension.c @@ -0,0 +1,27 @@ +#include + +#include "cmark.h" +#include "syntax_extension.h" +#include "buffer.h" + +void cmark_syntax_extension_free(cmark_syntax_extension *extension) { + free(extension->name); + free(extension); +} + +cmark_syntax_extension *cmark_syntax_extension_new(const char *name) { + cmark_syntax_extension *res = (cmark_syntax_extension *) calloc(1, sizeof(cmark_syntax_extension)); + res->name = (char *) malloc(sizeof(char) * (strlen(name)) + 1); + strcpy(res->name, name); + return res; +} + +void cmark_syntax_extension_set_open_block_func(cmark_syntax_extension *extension, + cmark_open_block_func func) { + extension->try_opening_block = func; +} + +void cmark_syntax_extension_set_match_block_func(cmark_syntax_extension *extension, + cmark_match_block_func func) { + extension->last_block_matches = func; +} diff --git a/src/syntax_extension.h b/src/syntax_extension.h new file mode 100644 index 000000000..5565b4523 --- /dev/null +++ b/src/syntax_extension.h @@ -0,0 +1,13 @@ +#ifndef SYNTAX_EXTENSION_H +#define SYNTAX_EXTENSION_H + +#include "cmark.h" +#include "cmark_extension_api.h" + +struct cmark_syntax_extension { + cmark_match_block_func last_block_matches; + cmark_open_block_func try_opening_block; + char * name; +}; + +#endif From 1bc58d0938b5f4b4f206e997d2c6710028a90237 Mon Sep 17 00:00:00 2001 From: Mathieu Duponchelle Date: Thu, 28 Apr 2016 00:52:02 +0200 Subject: [PATCH 09/20] Implement plugin loading and discovery Linux-only support --- api_test/CMakeLists.txt | 2 +- src/CMakeLists.txt | 8 +++ src/cmark.c | 11 +++ src/cmark.h | 16 +++++ src/cmark_extension_api.h | 62 +++++++++++++++++ src/config.h.in | 2 + src/libcmark.pc.in | 2 +- src/plugin.c | 33 +++++++++ src/plugin.h | 34 +++++++++ src/registry.c | 141 ++++++++++++++++++++++++++++++++++++++ src/registry.h | 18 +++++ 11 files changed, 327 insertions(+), 2 deletions(-) create mode 100644 src/plugin.c create mode 100644 src/plugin.h create mode 100644 src/registry.c create mode 100644 src/registry.h diff --git a/api_test/CMakeLists.txt b/api_test/CMakeLists.txt index 3151ccccb..5c247aba0 100644 --- a/api_test/CMakeLists.txt +++ b/api_test/CMakeLists.txt @@ -8,7 +8,7 @@ include_directories( ${PROJECT_SOURCE_DIR}/src ${PROJECT_BINARY_DIR}/src ) -target_link_libraries(api_test libcmark) +target_link_libraries(api_test libcmark ${CMAKE_DL_LIBS}) # Compiler flags if(MSVC) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9f01ef4f6..a4f673bd0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -16,7 +16,9 @@ set(HEADERS houdini.h cmark_ctype.h render.h + registry.h syntax_extension.h + plugin.h ) set(LIBRARY_SOURCES cmark.c @@ -41,6 +43,8 @@ set(LIBRARY_SOURCES cmark_ctype.c linked_list.c syntax_extension.c + registry.c + plugin.c ${HEADERS} ) @@ -85,6 +89,8 @@ set(CMAKE_LINKER_DEBUG "${CMAKE_LINKER_FLAGS_DEBUG}") set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg") set(CMAKE_LINKER_PROFILE "${CMAKE_LINKER_FLAGS_RELEASE} -pg") +add_definitions(-DLIBDIR=\"${CMAKE_BINARY_DIR}\") + if (${CMAKE_MAJOR_VERSION} GREATER 1 AND ${CMAKE_MINOR_VERSION} GREATER 8) set(CMAKE_C_VISIBILITY_PRESET hidden) set(CMAKE_VISIBILITY_INLINES_HIDDEN 1) @@ -113,6 +119,8 @@ else() VERSION ${PROJECT_VERSION}) endif(MSVC) +target_link_libraries(cmark ${CMAKE_DL_LIBS}) + set_property(TARGET ${LIBRARY} APPEND PROPERTY MACOSX_RPATH true) diff --git a/src/cmark.c b/src/cmark.c index d5fef6382..ad2554cf8 100644 --- a/src/cmark.c +++ b/src/cmark.c @@ -1,6 +1,7 @@ #include #include #include +#include "registry.h" #include "node.h" #include "houdini.h" #include "cmark.h" @@ -35,3 +36,13 @@ char *cmark_markdown_to_html(const char *text, size_t len, int options) { return result; } + +int cmark_init(void) { + cmark_discover_plugins(); + return 1; +} + +int cmark_deinit(void) { + cmark_release_plugins(); + return 1; +} diff --git a/src/cmark.h b/src/cmark.h index 5e04f0c81..6322b5094 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -140,6 +140,22 @@ void cmark_llist_free_full (cmark_llist * head, CMARK_EXPORT void cmark_llist_free (cmark_llist * head); +/** + * ## Initialization + */ + +/** Initialize the cmark library. This will discover available plugins. + * Returns 'true' if initialization was successful, 'false' otherwise. + */ +CMARK_EXPORT +int cmark_init(void); + +/** Deinitialize the cmark library. This will release all plugins. + * Returns true if deinitialization was successful, 'false' otherwise. + */ +CMARK_EXPORT +int cmark_deinit(void); + /** * ## Creating and Destroying Nodes */ diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h index 8a2164a99..e74f5676b 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark_extension_api.h @@ -33,6 +33,7 @@ extern "C" { * desirable. */ +typedef struct cmark_plugin cmark_plugin; /** A syntax extension that can be attached to a cmark_parser * with cmark_parser_attach_syntax_extension(). @@ -62,6 +63,67 @@ extern "C" { */ typedef struct cmark_syntax_extension cmark_syntax_extension; +/** + * ### Plugin API. + * + * Extensions should be distributed as dynamic libraries, + * with a single exported function named after the distributed + * filename. + * + * When discovering extensions (see cmark_init), cmark will + * try to load a symbol named "init_{{filename}}" in all the + * dynamic libraries it encounters. + * + * For example, given a dynamic library named myextension.so + * (or myextension.dll), cmark will try to load the symbol + * named "init_myextension". This means that the filename + * must lend itself to forming a valid C identifier, with + * the notable exception of dashes, which will be translated + * to underscores, which means cmark will look for a function + * named "init_my_extension" if it encounters a dynamic library + * named "my-extension.so". + * + * See the 'cmark_plugin_init_func' typedef for the exact prototype + * this function should follow. + * + * For now the extensibility of cmark is not complete, as + * it only offers API to hook into the block parsing phase + * (). + * + * See 'cmark_plugin_register_syntax_extension' for more information. + */ + +/** The prototype plugins' init function should follow. + */ +typedef int (*cmark_plugin_init_func)(cmark_plugin *plugin); + +/** Register a syntax 'extension' with the 'plugin', it will be made + * available as an extension and, if attached to a cmark_parser + * with 'cmark_parser_attach_syntax_extension', it will contribute + * to the block parsing process. + * + * See the documentation for 'cmark_syntax_extension' for information + * on how to implement one. + * + * This function will typically be called from the init function + * of external modules. + * + * This takes ownership of 'extension', one should not call + * 'cmark_syntax_extension_free' on a registered extension. + */ +CMARK_EXPORT +int cmark_plugin_register_syntax_extension(cmark_plugin *plugin, + cmark_syntax_extension *extension); + +/** This will search for the syntax extension named 'name' among the + * registered syntax extensions. + * + * It can then be attached to a cmark_parser + * with the cmark_parser_attach_syntax_extension method. + */ +CMARK_EXPORT +cmark_syntax_extension *cmark_find_syntax_extension(const char *name); + /** Should create and add a new open block to 'parent_container' if * 'input' matches a syntax rule for that block type. It is allowed * to modify the type of 'parent_container'. diff --git a/src/config.h.in b/src/config.h.in index 0f72d6a1f..06c74c305 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -77,6 +77,8 @@ CMARK_INLINE int c99_snprintf(char *outBuf, size_t size, const char *format, ... # include #endif +#define EXTENSION_DIR LIBDIR "/extensions" + #ifdef __cplusplus } #endif diff --git a/src/libcmark.pc.in b/src/libcmark.pc.in index 9c3a9a9e6..8419df378 100644 --- a/src/libcmark.pc.in +++ b/src/libcmark.pc.in @@ -6,5 +6,5 @@ includedir=@CMAKE_INSTALL_PREFIX@/include Name: libcmark Description: CommonMark parsing, rendering, and manipulation Version: @PROJECT_VERSION@ -Libs: -L${libdir} -lcmark +Libs: -L${libdir} -lcmark -ldl Cflags: -I${includedir} diff --git a/src/plugin.c b/src/plugin.c new file mode 100644 index 000000000..39c361ac7 --- /dev/null +++ b/src/plugin.c @@ -0,0 +1,33 @@ +#include + +#include "plugin.h" + +int cmark_plugin_register_syntax_extension(cmark_plugin * plugin, + cmark_syntax_extension * extension) { + plugin->syntax_extensions = cmark_llist_append(plugin->syntax_extensions, extension); + return 1; +} + +cmark_plugin * +cmark_plugin_new(void) { + cmark_plugin *res = malloc(sizeof(cmark_plugin)); + + res->syntax_extensions = NULL; + + return res; +} + +void +cmark_plugin_free(cmark_plugin *plugin) { + cmark_llist_free_full(plugin->syntax_extensions, + (cmark_free_func) cmark_syntax_extension_free); + free(plugin); +} + +cmark_llist * +cmark_plugin_steal_syntax_extensions(cmark_plugin *plugin) { + cmark_llist *res = plugin->syntax_extensions; + + plugin->syntax_extensions = NULL; + return res; +} diff --git a/src/plugin.h b/src/plugin.h new file mode 100644 index 000000000..b9e9d2994 --- /dev/null +++ b/src/plugin.h @@ -0,0 +1,34 @@ +#ifndef CMARK_PLUGIN_H +#define CMARK_PLUGIN_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "cmark.h" +#include "cmark_extension_api.h" + +/** + * cmark_plugin: + * + * A plugin structure, which should be filled by plugin's + * init functions. + */ +struct cmark_plugin { + cmark_llist *syntax_extensions; +}; + +cmark_llist * +cmark_plugin_steal_syntax_extensions(cmark_plugin *plugin); + +cmark_plugin * +cmark_plugin_new(void); + +void +cmark_plugin_free(cmark_plugin *plugin); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/registry.c b/src/registry.c new file mode 100644 index 000000000..8f7b9c4e1 --- /dev/null +++ b/src/registry.c @@ -0,0 +1,141 @@ +#include +#include +#include +#include +#include + +#include "config.h" +#include "cmark.h" +#include "syntax_extension.h" +#include "registry.h" +#include "plugin.h" + + +static cmark_llist *syntax_extensions = NULL; +static cmark_llist *plugin_handles = NULL; + +static cmark_plugin *scan_file(char* filename) { + char* last_slash = strrchr(filename, '/'); + char* name_start = last_slash ? last_slash + 1 : filename; + char* last_dot = strrchr(filename, '.'); + cmark_plugin *plugin = NULL; + char *init_func_name = NULL; + int i; + void *libhandle; + char *libname = NULL; + + if (!last_dot || strcmp(last_dot, ".so")) + goto done; + + libname = malloc(sizeof(char) * (strlen(EXTENSION_DIR) + strlen(filename) + 2)); + snprintf(libname, strlen(EXTENSION_DIR) + strlen(filename) + 2, "%s/%s", + EXTENSION_DIR, filename); + libhandle = dlopen(libname, RTLD_NOW); + free(libname); + + if (!libhandle) { + printf("Error loading DSO: %s\n", dlerror()); + goto done; + } + + name_start[last_dot - name_start] = '\0'; + + for (i = 0; name_start[i]; i++) { + if (name_start[i] == '-') + name_start[i] = '_'; + } + + init_func_name = malloc(sizeof(char) * (strlen(name_start) + 6)); + + snprintf(init_func_name, strlen(name_start) + 6, "init_%s", name_start); + + cmark_plugin_init_func initfunc = (cmark_plugin_init_func) + (intptr_t) dlsym(libhandle, init_func_name); + free(init_func_name); + + plugin = cmark_plugin_new(); + + if (initfunc) { + if (initfunc(plugin)) { + plugin_handles = cmark_llist_append(plugin_handles, libhandle); + } else { + cmark_plugin_free(plugin); + printf("Error Initializing plugin %s\n", name_start); + plugin = NULL; + dlclose(libhandle); + } + } else { + printf("Error loading init function: %s\n", dlerror()); + dlclose(libhandle); + } + +done: + return plugin; +} + +static void scan_path(char *path) { + DIR *dir = opendir(path); + struct dirent* direntry; + + if (!dir) + return; + + while ((direntry = readdir(dir))) { + cmark_plugin *plugin = scan_file(direntry->d_name); + if (plugin) { + cmark_llist *syntax_extensions_list = cmark_plugin_steal_syntax_extensions(plugin); + cmark_llist *tmp; + + for (tmp = syntax_extensions_list; tmp; tmp=tmp->next) { + syntax_extensions = cmark_llist_append(syntax_extensions, tmp->data); + } + + cmark_llist_free(syntax_extensions_list); + cmark_plugin_free(plugin); + } + } + + closedir(dir); +} + +void cmark_discover_plugins(void) { + cmark_release_plugins(); + scan_path(EXTENSION_DIR); +} + +static void +release_plugin_handle(void *libhandle) { + dlclose(libhandle); +} + +void cmark_release_plugins(void) { + if (syntax_extensions) { + cmark_llist_free_full(syntax_extensions, + (cmark_free_func) cmark_syntax_extension_free); + syntax_extensions = NULL; + } + + cmark_llist_free_full(plugin_handles, release_plugin_handle); + plugin_handles = NULL; +} + +cmark_llist *cmark_list_syntax_extensions(void) { + cmark_llist *tmp; + cmark_llist *res = NULL; + + for (tmp = syntax_extensions; tmp; tmp = tmp->next) { + res = cmark_llist_append(res, tmp->data); + } + return res; +} + +cmark_syntax_extension *cmark_find_syntax_extension(const char *name) { + cmark_llist *tmp; + + for (tmp = syntax_extensions; tmp; tmp = tmp->next) { + cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data; + if (!strcmp(ext->name, name)) + return ext; + } + return NULL; +} diff --git a/src/registry.h b/src/registry.h new file mode 100644 index 000000000..bc566e010 --- /dev/null +++ b/src/registry.h @@ -0,0 +1,18 @@ +#ifndef CMARK_REGISTRY_H +#define CMARK_REGISTRY_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "cmark.h" + +void cmark_discover_plugins(void); +void cmark_release_plugins(void); +cmark_llist *cmark_list_syntax_extensions(void); + +#ifdef __cplusplus +} +#endif + +#endif From e7075293f660148811a4e6c15f2273b70653d148 Mon Sep 17 00:00:00 2001 From: Mathieu Duponchelle Date: Sat, 27 Feb 2016 02:31:43 +0100 Subject: [PATCH 10/20] cmark executable: add extension switches Allow listing and attaching extensions. Also cleanup valgrind a little by removing exits and using cleanup gotos --- src/main.c | 110 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 85 insertions(+), 25 deletions(-) diff --git a/src/main.c b/src/main.c index 88a4f322f..c6e2508c0 100644 --- a/src/main.c +++ b/src/main.c @@ -5,6 +5,9 @@ #include "config.h" #include "memory.h" #include "cmark.h" +#include "cmark_extension_api.h" +#include "syntax_extension.h" +#include "registry.h" #if defined(_WIN32) && !defined(__CYGWIN__) #include @@ -23,20 +26,22 @@ typedef enum { void print_usage() { printf("Usage: cmark [FILE*]\n"); printf("Options:\n"); - printf(" --to, -t FORMAT Specify output format (html, xml, man, " + printf(" --to, -t FORMAT Specify output format (html, xml, man, " "commonmark, latex)\n"); - printf(" --width WIDTH Specify wrap width (default 0 = nowrap)\n"); - printf(" --sourcepos Include source position attribute\n"); - printf(" --hardbreaks Treat newlines as hard line breaks\n"); + printf(" --width WIDTH Specify wrap width (default 0 = nowrap)\n"); + printf(" --sourcepos Include source position attribute\n"); + printf(" --hardbreaks Treat newlines as hard line breaks\n"); printf(" --nobreaks Render soft line breaks as spaces\n"); - printf(" --safe Suppress raw HTML and dangerous URLs\n"); - printf(" --smart Use smart punctuation\n"); - printf(" --normalize Consolidate adjacent text nodes\n"); - printf(" --help, -h Print usage information\n"); - printf(" --version Print version\n"); + printf(" --safe Suppress raw HTML and dangerous URLs\n"); + printf(" --smart Use smart punctuation\n"); + printf(" --normalize Consolidate adjacent text nodes\n"); + printf(" -e, --extension EXTENSION_NAME Specify an extension name to use\n"); + printf(" --list-extensions List available extensions and quit\n"); + printf(" --help, -h Print usage information\n"); + printf(" --version Print version\n"); } -static void print_document(cmark_node *document, writer_format writer, +static bool print_document(cmark_node *document, writer_format writer, int options, int width) { char *result; @@ -58,23 +63,44 @@ static void print_document(cmark_node *document, writer_format writer, break; default: fprintf(stderr, "Unknown format %d\n", writer); - exit(1); + return false; } + printf("%s", result); free(result); + + return true; +} + +static void print_extensions(void) { + cmark_llist *syntax_extensions; + cmark_llist *tmp; + + printf ("Available extensions:\n"); + + syntax_extensions = cmark_list_syntax_extensions(); + for (tmp = syntax_extensions; tmp; tmp=tmp->next) { + cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data; + printf("%s\n", ext->name); + } + + cmark_llist_free(syntax_extensions); } int main(int argc, char *argv[]) { int i, numfps = 0; int *files; char buffer[4096]; - cmark_parser *parser; + cmark_parser *parser = NULL; size_t bytes; - cmark_node *document; + cmark_node *document = NULL; int width = 0; char *unparsed; writer_format writer = FORMAT_HTML; int options = CMARK_OPT_DEFAULT; + int res = 1; + + cmark_init(); #if defined(_WIN32) && !defined(__CYGWIN__) _setmode(_fileno(stdin), _O_BINARY); @@ -87,7 +113,10 @@ int main(int argc, char *argv[]) { if (strcmp(argv[i], "--version") == 0) { printf("cmark %s", CMARK_VERSION_STRING); printf(" - CommonMark converter\n(C) 2014-2016 John MacFarlane\n"); - exit(0); + goto success; + } else if (strcmp(argv[i], "--list-extensions") == 0) { + print_extensions(); + goto success; } else if (strcmp(argv[i], "--sourcepos") == 0) { options |= CMARK_OPT_SOURCEPOS; } else if (strcmp(argv[i], "--hardbreaks") == 0) { @@ -105,7 +134,7 @@ int main(int argc, char *argv[]) { } else if ((strcmp(argv[i], "--help") == 0) || (strcmp(argv[i], "-h") == 0)) { print_usage(); - exit(0); + goto success; } else if (strcmp(argv[i], "--width") == 0) { i += 1; if (i < argc) { @@ -113,11 +142,11 @@ int main(int argc, char *argv[]) { if (unparsed && strlen(unparsed) > 0) { fprintf(stderr, "failed parsing width '%s' at '%s'\n", argv[i], unparsed); - exit(1); + goto failure; } } else { fprintf(stderr, "--width requires an argument\n"); - exit(1); + goto failure; } } else if ((strcmp(argv[i], "-t") == 0) || (strcmp(argv[i], "--to") == 0)) { i += 1; @@ -134,27 +163,48 @@ int main(int argc, char *argv[]) { writer = FORMAT_LATEX; } else { fprintf(stderr, "Unknown format %s\n", argv[i]); - exit(1); + goto failure; } } else { fprintf(stderr, "No argument provided for %s\n", argv[i - 1]); - exit(1); + goto failure; } + } else if ((strcmp(argv[i], "-e") == 0) || (strcmp(argv[i], "--extension") == 0)) { + i += 1; // Simpler to handle extensions in a second pass, as we can directly register + // them with the parser. } else if (*argv[i] == '-') { print_usage(); - exit(1); + goto failure; } else { // treat as file argument files[numfps++] = i; } } parser = cmark_parser_new(options); + + for (i = 1; i < argc; i++) { + if ((strcmp(argv[i], "-e") == 0) || (strcmp(argv[i], "--extension") == 0)) { + i += 1; + if (i < argc) { + cmark_syntax_extension *syntax_extension = cmark_find_syntax_extension(argv[i]); + if (!syntax_extension) { + fprintf(stderr, "Unknown extension %s\n", argv[i]); + goto failure; + } + cmark_parser_attach_syntax_extension(parser, syntax_extension); + } else { + fprintf(stderr, "No argument provided for %s\n", argv[i - 1]); + goto failure; + } + } + } + for (i = 0; i < numfps; i++) { FILE *fp = fopen(argv[files[i]], "rb"); if (fp == NULL) { fprintf(stderr, "Error opening file %s: %s\n", argv[files[i]], strerror(errno)); - exit(1); + goto failure; } while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) { @@ -178,13 +228,23 @@ int main(int argc, char *argv[]) { } document = cmark_parser_finish(parser); - cmark_parser_free(parser); - print_document(document, writer, options, width); + if (!print_document(document, writer, options, width)) + goto failure; + + +success: + res = 0; + +failure: + if (parser) + cmark_parser_free(parser); - cmark_node_free(document); + if (document) + cmark_node_free(document); free(files); + cmark_deinit(); - return 0; + return res; } From fe08f83202c61abcd09041280f1874cc74babe25 Mon Sep 17 00:00:00 2001 From: Mathieu Duponchelle Date: Sun, 28 Feb 2016 18:07:34 +0100 Subject: [PATCH 11/20] Define blocks constituting a table. We have no syntax rules yet for creating them natively, but future extensions may provide some. --- src/blocks.c | 12 +++++-- src/cmark.h | 12 ++++++- src/commonmark.c | 27 ++++++++++++++++ src/html.c | 63 ++++++++++++++++++++++++++++++++++++- src/latex.c | 38 ++++++++++++++++++++++ src/man.c | 34 ++++++++++++++++++++ src/node.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++ src/node.h | 10 ++++++ 8 files changed, 274 insertions(+), 4 deletions(-) diff --git a/src/blocks.c b/src/blocks.c index 7d057efed..d0e958575 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -153,7 +153,14 @@ static bool is_blank(cmark_strbuf *s, bufsize_t offset) { } static CMARK_INLINE bool can_contain(cmark_node_type parent_type, - cmark_node_type child_type) { + cmark_node_type child_type) { + if (parent_type == CMARK_NODE_TABLE) { + return child_type == CMARK_NODE_TABLE_ROW; + } + + if (parent_type == CMARK_NODE_TABLE_ROW) + return child_type == CMARK_NODE_TABLE_CELL; + return (parent_type == CMARK_NODE_DOCUMENT || parent_type == CMARK_NODE_BLOCK_QUOTE || parent_type == CMARK_NODE_ITEM || @@ -168,7 +175,8 @@ static CMARK_INLINE bool accepts_lines(cmark_node_type block_type) { static CMARK_INLINE bool contains_inlines(cmark_node_type block_type) { return (block_type == CMARK_NODE_PARAGRAPH || - block_type == CMARK_NODE_HEADING); + block_type == CMARK_NODE_HEADING || + block_type == CMARK_NODE_TABLE_CELL); } static void add_line(cmark_node *node, cmark_chunk *ch, cmark_parser *parser) { diff --git a/src/cmark.h b/src/cmark.h index 6322b5094..4076176a2 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -46,8 +46,13 @@ typedef enum { CMARK_NODE_HEADING, CMARK_NODE_THEMATIC_BREAK, + /* blocks with no syntax rules in the current specification */ + CMARK_NODE_TABLE, + CMARK_NODE_TABLE_ROW, + CMARK_NODE_TABLE_CELL, + CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT, - CMARK_NODE_LAST_BLOCK = CMARK_NODE_THEMATIC_BREAK, + CMARK_NODE_LAST_BLOCK = CMARK_NODE_TABLE_CELL, /* Inline */ CMARK_NODE_TEXT, @@ -460,6 +465,11 @@ CMARK_EXPORT int cmark_node_get_end_line(cmark_node *node); */ CMARK_EXPORT int cmark_node_get_end_column(cmark_node *node); +CMARK_EXPORT int cmark_node_get_n_table_columns(cmark_node *node); +CMARK_EXPORT int cmark_node_set_n_table_columns(cmark_node *node, int n_columns); +CMARK_EXPORT int cmark_node_is_table_header(cmark_node *node); +CMARK_EXPORT int cmark_node_set_is_table_header(cmark_node *node, int is_table_header); + /** * ## Tree Manipulation */ diff --git a/src/commonmark.c b/src/commonmark.c index f1589f576..486dc441c 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -333,6 +333,33 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } break; + case CMARK_NODE_TABLE: + BLANKLINE(); + break; + + case CMARK_NODE_TABLE_ROW: + if (entering) { + CR(); + LIT("|"); + } + break; + case CMARK_NODE_TABLE_CELL: + if (entering) { + } else { + LIT(" |"); + if (node->parent->as.table_row.is_header && !node->next) { + int i; + int n_cols = node->parent->parent->as.table.n_columns; + CR(); + LIT("|"); + for (i = 0; i < n_cols; i++) { + LIT(" --- |"); + } + CR(); + } + } + break; + case CMARK_NODE_TEXT: OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); break; diff --git a/src/html.c b/src/html.c index a680e4a50..f8f51026c 100644 --- a/src/html.c +++ b/src/html.c @@ -27,6 +27,8 @@ static CMARK_INLINE void cr(cmark_strbuf *html) { struct render_state { cmark_strbuf *html; cmark_node *plain; + bool need_closing_table_body; + bool in_table_header; }; static void S_render_sourcepos(cmark_node *node, cmark_strbuf *html, @@ -217,6 +219,65 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, } break; + case CMARK_NODE_TABLE: + if (entering) { + cr(html); + cmark_strbuf_puts(html, "'); + state->need_closing_table_body = false; + } else { + if (state->need_closing_table_body) + cmark_strbuf_puts(html, ""); + state->need_closing_table_body = false; + cmark_strbuf_puts(html, ""); + } + break; + + case CMARK_NODE_TABLE_ROW: + if (entering) { + cr(html); + if (node->as.table_row.is_header) { + state->in_table_header = true; + cmark_strbuf_puts(html, ""); + cr(html); + } + cmark_strbuf_puts(html, "'); + } else { + cr(html); + cmark_strbuf_puts(html, ""); + if (node->as.table_row.is_header) { + cr(html); + cmark_strbuf_puts(html, ""); + cr(html); + cmark_strbuf_puts(html, ""); + state->need_closing_table_body = true; + state->in_table_header = false; + } + } + break; + + case CMARK_NODE_TABLE_CELL: + if (entering) { + cr(html); + if (state->in_table_header) { + cmark_strbuf_puts(html, "'); + } else { + if (state->in_table_header) { + cmark_strbuf_puts(html, ""); + } else { + cmark_strbuf_puts(html, ""); + } + } + break; + case CMARK_NODE_TEXT: escape_html(html, node->as.literal.data, node->as.literal.len); break; @@ -327,7 +388,7 @@ char *cmark_render_html(cmark_node *root, int options) { cmark_strbuf html = CMARK_BUF_INIT(cmark_node_mem(root)); cmark_event_type ev_type; cmark_node *cur; - struct render_state state = {&html, NULL}; + struct render_state state = {&html, NULL, false, false}; cmark_iter *iter = cmark_iter_new(root); while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { diff --git a/src/latex.c b/src/latex.c index 7c3decda0..306a508de 100644 --- a/src/latex.c +++ b/src/latex.c @@ -332,6 +332,44 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } break; + case CMARK_NODE_TABLE: + if (entering) { + int i, n_cols; + CR(); + LIT("\\begin{table}"); + CR(); + LIT("\\begin{tabular}{"); + + n_cols = node->as.table.n_columns; + for (i = 0; i < n_cols; i++) { + LIT("l"); + } + LIT("}"); + CR(); + } else { + LIT("\\end{tabular}"); + CR(); + LIT("\\end{table}"); + CR(); + } + break; + + case CMARK_NODE_TABLE_ROW: + if (!entering) { + CR(); + } + break; + + case CMARK_NODE_TABLE_CELL: + if (!entering) { + if (node->next) { + LIT(" & "); + } else { + LIT(" \\\\"); + } + } + break; + case CMARK_NODE_TEXT: OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); break; diff --git a/src/man.c b/src/man.c index 1c76f68bb..a2691b063 100644 --- a/src/man.c +++ b/src/man.c @@ -173,6 +173,40 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } break; + case CMARK_NODE_TABLE: + if (entering) { + int i, n_cols; + CR(); + LIT(".TS"); + CR(); + LIT("tab(@);"); + CR(); + + n_cols = node->as.table.n_columns; + + for (i = 0; i < n_cols; i++) { + LIT("c"); + } + + if (n_cols) { + LIT("."); + CR(); + } + } else { + LIT(".TE"); + CR(); + } + break; + case CMARK_NODE_TABLE_ROW: + if (!entering) { + CR(); + } + break; + case CMARK_NODE_TABLE_CELL: + if (!entering && node->next) { + LIT("@"); + } + break; case CMARK_NODE_TEXT: OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); break; diff --git a/src/node.c b/src/node.c index 7cfa7147e..7743ae19b 100644 --- a/src/node.c +++ b/src/node.c @@ -67,6 +67,17 @@ static bool S_can_contain(cmark_node *node, cmark_node *child) { case CMARK_NODE_IMAGE: case CMARK_NODE_CUSTOM_INLINE: return S_is_inline(child); + case CMARK_NODE_TABLE: + return child->type == CMARK_NODE_TABLE_ROW; + case CMARK_NODE_TABLE_ROW: + return child->type == CMARK_NODE_TABLE_CELL; + case CMARK_NODE_TABLE_CELL: + return child->type == CMARK_NODE_TEXT || + child->type == CMARK_NODE_CODE || + child->type == CMARK_NODE_EMPH || + child->type == CMARK_NODE_STRONG || + child->type == CMARK_NODE_LINK || + child->type == CMARK_NODE_IMAGE; default: break; @@ -213,6 +224,15 @@ const char *cmark_node_get_type_string(cmark_node *node) { return "html_block"; case CMARK_NODE_CUSTOM_BLOCK: return "custom_block"; + case CMARK_NODE_TABLE: + return "table"; + case CMARK_NODE_TABLE_ROW: + if (node->as.table_row.is_header) + return "table_header"; + else + return "table_row"; + case CMARK_NODE_TABLE_CELL: + return "table_cell"; case CMARK_NODE_PARAGRAPH: return "paragraph"; case CMARK_NODE_HEADING: @@ -739,6 +759,68 @@ int cmark_node_get_end_column(cmark_node *node) { return node->end_column; } +int cmark_node_get_n_table_columns(cmark_node *node) { + if (node == NULL) { + return -1; + } + + switch (node->type) { + case CMARK_NODE_TABLE: + return node->as.table.n_columns; + default: + break; + } + + return -1; +} + +int cmark_node_set_n_table_columns(cmark_node *node, int n_columns) { + if (node == NULL) { + return 0; + } + + switch (node->type) { + case CMARK_NODE_TABLE: + node->as.table.n_columns = n_columns; + return 1; + default: + break; + } + + return 0; +} + +int cmark_node_is_table_header(cmark_node *node) { + if (node == NULL) { + return 0; + } + + switch (node->type) { + case CMARK_NODE_TABLE_ROW: + return node->as.table_row.is_header; + default: + break; + } + + return 1; +} + +int cmark_node_set_is_table_header(cmark_node *node, int is_table_header) { + if (node == NULL) { + return 0; + } + + switch (node->type) { + case CMARK_NODE_TABLE_ROW: + node->as.table_row.is_header = is_table_header; + return 1; + default: + break; + } + + return 0; +} + // Unlink a node without adjusting its next, prev, and parent pointers. static void S_node_unlink(cmark_node *node) { if (node == NULL) { diff --git a/src/node.h b/src/node.h index 93932aae0..cbb0e551b 100644 --- a/src/node.h +++ b/src/node.h @@ -47,6 +47,14 @@ typedef struct { cmark_chunk on_exit; } cmark_custom; +typedef struct { + int n_columns; +} cmark_table; + +typedef struct { + bool is_header; +} cmark_table_row; + enum cmark_node__internal_flags { CMARK_NODE__OPEN = (1 << 0), CMARK_NODE__LAST_LINE_BLANK = (1 << 1), @@ -80,6 +88,8 @@ struct cmark_node { cmark_heading heading; cmark_link link; cmark_custom custom; + cmark_table table; + cmark_table_row table_row; int html_block_type; } as; }; From 634833ca5edb015685bd656e73935f46974187ca Mon Sep 17 00:00:00 2001 From: Mathieu Duponchelle Date: Sat, 27 Feb 2016 02:41:24 +0100 Subject: [PATCH 12/20] Extensions: implement an example "core extension" --- CMakeLists.txt | 1 + Makefile | 14 +++ extensions/CMakeLists.txt | 32 +++++ extensions/core-extensions.c | 237 +++++++++++++++++++++++++++++++++++ extensions/ext_scanners.h | 20 +++ extensions/ext_scanners.re | 65 ++++++++++ 6 files changed, 369 insertions(+) create mode 100644 extensions/CMakeLists.txt create mode 100644 extensions/core-extensions.c create mode 100644 extensions/ext_scanners.h create mode 100644 extensions/ext_scanners.re diff --git a/CMakeLists.txt b/CMakeLists.txt index 2ab6a7257..b9abddc55 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,7 @@ set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_ option(CMARK_TESTS "Build cmark tests and enable testing" ON) add_subdirectory(src) +add_subdirectory(extensions) if(CMARK_TESTS) add_subdirectory(api_test) endif() diff --git a/Makefile b/Makefile index 039089e19..5bb1fd2db 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,5 @@ SRCDIR=src +EXTDIR=extensions DATADIR=data BUILDDIR?=build GENERATOR?=Unix Makefiles @@ -115,6 +116,19 @@ $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re --encoding-policy substitute -o $@ $< clang-format -style llvm -i $@ +# We include scanners.c in the repository, so this shouldn't +# normally need to be generated. +$(EXTDIR)/ext_scanners.c: $(EXTDIR)/ext_scanners.re + @case "$$(re2c -v)" in \ + *\ 0.13.*|*\ 0.14|*\ 0.14.1) \ + echo "re2c >= 0.14.2 is required"; \ + false; \ + ;; \ + esac + re2c --case-insensitive -b -i --no-generation-date -8 \ + --encoding-policy substitute -o $@ $< + clang-format -style llvm -i $@ + # We include entities.inc in the repository, so normally this # doesn't need to be regenerated: $(SRCDIR)/entities.inc: tools/make_entities_inc.py diff --git a/extensions/CMakeLists.txt b/extensions/CMakeLists.txt new file mode 100644 index 000000000..85d9e4450 --- /dev/null +++ b/extensions/CMakeLists.txt @@ -0,0 +1,32 @@ +cmake_minimum_required(VERSION 2.8) +set(LIBRARY "cmarkextensions") +set(LIBRARY_SOURCES + ${PROJECT_SOURCE_DIR}/src/buffer.c + ${PROJECT_SOURCE_DIR}/src/cmark_ctype.c + core-extensions.c + ext_scanners.c + ext_scanners.h + ) + +include_directories( + ${PROJECT_SOURCE_DIR}/src + ${PROJECT_BINARY_DIR}/src +) + +# We make LIB_INSTALL_DIR configurable rather than +# hard-coding lib, because on some OSes different locations +# are used for different architectures (e.g. /usr/lib64 on +# 64-bit Fedora). +if(NOT LIB_INSTALL_DIR) + set(LIB_INSTALL_DIR "lib" CACHE STRING + "Set the installation directory for libraries." FORCE) +endif(NOT LIB_INSTALL_DIR) + +include_directories(. ${CMAKE_CURRENT_BINARY_DIR}) + +set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg") +set(CMAKE_LINKER_PROFILE "${CMAKE_LINKER_FLAGS_RELEASE} -pg") + +add_library(${LIBRARY} SHARED ${LIBRARY_SOURCES}) + +target_link_libraries(cmarkextensions libcmark) diff --git a/extensions/core-extensions.c b/extensions/core-extensions.c new file mode 100644 index 000000000..6b650dd4b --- /dev/null +++ b/extensions/core-extensions.c @@ -0,0 +1,237 @@ +#include +#include + +#include +#include + +#include "buffer.h" +#include "ext_scanners.h" + +typedef struct { + int n_columns; + cmark_llist *cells; +} table_row; + +static void free_table_cell(void *data) { + cmark_strbuf_free((cmark_strbuf *) data); + free(data); +} + +static void free_table_row(table_row *row) { + + if (!row) + return; + + cmark_llist_free_full(row->cells, (cmark_free_func) free_table_cell); + + free(row); +} + +static cmark_strbuf *unescape_pipes(unsigned char *string, bufsize_t len) +{ + cmark_strbuf *res = (cmark_strbuf *)malloc(sizeof(cmark_strbuf)); + bufsize_t r, w; + + cmark_strbuf_init(res, len + 1); + cmark_strbuf_put(res, string, len); + cmark_strbuf_putc(res, '\0'); + + for (r = 0, w = 0; r < len; ++r) { + if (res->ptr[r] == '\\' && res->ptr[r + 1] == '|') + r++; + + res->ptr[w++] = res->ptr[r]; + } + + cmark_strbuf_truncate(res, w); + + return res; +} + +static table_row *row_from_string(unsigned char *string, int len) { + table_row *row = NULL; + bufsize_t cell_matched = 0; + bufsize_t cell_offset = 0; + + row = malloc(sizeof(table_row)); + row->n_columns = 0; + row->cells = NULL; + + do { + cell_matched = scan_table_cell(string, len, cell_offset); + if (cell_matched) { + cmark_strbuf *cell_buf = unescape_pipes(string + cell_offset + 1, + cell_matched - 1); + row->n_columns += 1; + row->cells = cmark_llist_append(row->cells, cell_buf); + } + cell_offset += cell_matched; + } while (cell_matched); + + cell_matched = scan_table_row_end(string, len, cell_offset); + cell_offset += cell_matched; + + if (!cell_matched || cell_offset != len) { + free_table_row(row); + row = NULL; + } + + return row; +} + +static cmark_node *try_opening_table_header(cmark_syntax_extension *self, + cmark_parser * parser, + cmark_node * parent_container, + unsigned char * input, + int len) { + bufsize_t matched = scan_table_start(input, len, cmark_parser_get_first_nonspace(parser)); + cmark_node *table_header; + table_row *header_row = NULL; + table_row *marker_row = NULL; + const char *parent_string; + + if (!matched) + goto done; + + parent_string = cmark_node_get_string_content(parent_container); + + header_row = row_from_string((unsigned char *) parent_string, strlen(parent_string)); + + if (!header_row) { + goto done; + } + + marker_row = row_from_string(input + cmark_parser_get_first_nonspace(parser), + len - cmark_parser_get_first_nonspace(parser)); + + assert(marker_row); + + if (header_row->n_columns != marker_row->n_columns) { + goto done; + } + + if (!cmark_node_set_type(parent_container, CMARK_NODE_TABLE)) { + goto done; + } + + cmark_node_set_syntax_extension(parent_container, self); + cmark_node_set_n_table_columns(parent_container, header_row->n_columns); + + table_header = cmark_parser_add_child(parser, parent_container, + CMARK_NODE_TABLE_ROW, cmark_parser_get_offset(parser)); + cmark_node_set_syntax_extension(table_header, self); + cmark_node_set_is_table_header(table_header, true); + + { + cmark_llist *tmp; + + for (tmp = header_row->cells; tmp; tmp = tmp->next) { + cmark_strbuf *cell_buf = (cmark_strbuf *) tmp->data; + cmark_node *header_cell = cmark_parser_add_child(parser, table_header, + CMARK_NODE_TABLE_CELL, cmark_parser_get_offset(parser)); + cmark_node_set_string_content(header_cell, (char *) cell_buf->ptr); + cmark_node_set_syntax_extension(header_cell, self); + } + } + + cmark_parser_advance_offset(parser, input, + strlen(input) - 1 - cmark_parser_get_offset(parser), + false); +done: + free_table_row(header_row); + free_table_row(marker_row); + return parent_container; +} + +static cmark_node *try_opening_table_row(cmark_syntax_extension *self, + cmark_parser * parser, + cmark_node * parent_container, + unsigned char * input, + int len) { + cmark_node *table_row_block; + table_row *row; + + if (cmark_parser_is_blank(parser)) + return NULL; + + table_row_block = cmark_parser_add_child(parser, parent_container, + CMARK_NODE_TABLE_ROW, cmark_parser_get_offset(parser)); + + cmark_node_set_syntax_extension(table_row_block, self); + + /* We don't advance the offset here */ + + row = row_from_string(input + cmark_parser_get_first_nonspace(parser), + len - cmark_parser_get_first_nonspace(parser)); + + { + cmark_llist *tmp; + + for (tmp = row->cells; tmp; tmp = tmp->next) { + cmark_strbuf *cell_buf = (cmark_strbuf *) tmp->data; + cmark_node *cell = cmark_parser_add_child(parser, table_row_block, + CMARK_NODE_TABLE_CELL, cmark_parser_get_offset(parser)); + cmark_node_set_string_content(cell, (char *) cell_buf->ptr); + cmark_node_set_syntax_extension(cell, self); + } + } + + free_table_row(row); + + cmark_parser_advance_offset(parser, input, + len - 1 - cmark_parser_get_offset(parser), + false); + + return table_row_block; +} + +static cmark_node *try_opening_table_block(cmark_syntax_extension * syntax_extension, + int indented, + cmark_parser * parser, + cmark_node * parent_container, + unsigned char * input, + int len) { + cmark_node_type parent_type = cmark_node_get_type(parent_container); + + if (!indented && parent_type == CMARK_NODE_PARAGRAPH) { + return try_opening_table_header(syntax_extension, parser, parent_container, input, len); + } else if (!indented && parent_type == CMARK_NODE_TABLE) { + return try_opening_table_row(syntax_extension, parser, parent_container, input, len); + } + + return NULL; +} + +static int table_matches(cmark_syntax_extension *self, + cmark_parser * parser, + unsigned char * input, + int len, + cmark_node * parent_container) { + int res = 0; + + if (cmark_node_get_type(parent_container) == CMARK_NODE_TABLE) { + table_row *new_row = row_from_string(input + cmark_parser_get_first_nonspace(parser), + len - cmark_parser_get_first_nonspace(parser)); + if (new_row) { + if (new_row->n_columns == cmark_node_get_n_table_columns(parent_container)) + res = 1; + } + free_table_row(new_row); + } + + return res; +} + +static cmark_syntax_extension *register_table_syntax_extension(void) { + cmark_syntax_extension *ext = cmark_syntax_extension_new("piped-tables"); + + cmark_syntax_extension_set_match_block_func(ext, table_matches); + cmark_syntax_extension_set_open_block_func(ext, try_opening_table_block); + + return ext; +} + +int init_libcmarkextensions(cmark_plugin *plugin) { + cmark_plugin_register_syntax_extension(plugin, register_table_syntax_extension()); + return 1; +} diff --git a/extensions/ext_scanners.h b/extensions/ext_scanners.h new file mode 100644 index 000000000..c96b18490 --- /dev/null +++ b/extensions/ext_scanners.h @@ -0,0 +1,20 @@ +#include "cmark.h" +#include "chunk.h" + +#ifdef __cplusplus +extern "C" { +#endif + +bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned char *ptr, + int len, bufsize_t offset); +bufsize_t _scan_table_start(const unsigned char *p); +bufsize_t _scan_table_cell(const unsigned char *p); +bufsize_t _scan_table_row_end(const unsigned char *p); + +#define scan_table_start(c, l, n) _ext_scan_at(&_scan_table_start, c, l, n) +#define scan_table_cell(c, l, n) _ext_scan_at(&_scan_table_cell, c, l, n) +#define scan_table_row_end(c, l, n) _ext_scan_at(&_scan_table_row_end, c, l, n) + +#ifdef __cplusplus +} +#endif diff --git a/extensions/ext_scanners.re b/extensions/ext_scanners.re new file mode 100644 index 000000000..7ad561f51 --- /dev/null +++ b/extensions/ext_scanners.re @@ -0,0 +1,65 @@ +#include +#include "ext_scanners.h" + +bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), unsigned char *ptr, int len, bufsize_t offset) +{ + bufsize_t res; + + if (ptr == NULL || offset > len) { + return 0; + } else { + unsigned char lim = ptr[len]; + + ptr[len] = '\0'; + res = scanner(ptr + offset); + ptr[len] = lim; + } + + return res; +} + +/*!re2c + re2c:define:YYCTYPE = "unsigned char"; + re2c:define:YYCURSOR = p; + re2c:define:YYMARKER = marker; + re2c:define:YYCTXMARKER = marker; + re2c:yyfill:enable = 0; + + spacechar = [ \t\v\f]; + newline = [\r]?[\n]; + + escaped_char = [\\][|!"#$%&'()*+,./:;<=>?@[\\\]^_`{}~-]; + + table_marker = [|](spacechar*[-]+spacechar*); + table_cell = [|](escaped_char|[^|\r\n])+; +*/ + +bufsize_t _scan_table_cell(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + table_cell { return (bufsize_t)(p - start); } + .? { return 0; } +*/ +} + +bufsize_t _scan_table_row_end(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + [|]newline { return (bufsize_t)(p - start); } + .? { return 0; } +*/ +} + +bufsize_t _scan_table_start(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + (table_marker)+ [|]newline { return (bufsize_t)(p - start); } + .? { return 0; } +*/ +} From 228be72ef294f35788cd18d9cbbf19653a2eb7bb Mon Sep 17 00:00:00 2001 From: Mathieu Duponchelle Date: Sat, 27 Feb 2016 02:42:23 +0100 Subject: [PATCH 13/20] Check in extension scanners separately --- extensions/core-extensions.c | 17 +- extensions/ext_scanners.c | 585 +++++++++++++++++++++++++++++++++++ 2 files changed, 594 insertions(+), 8 deletions(-) create mode 100644 extensions/ext_scanners.c diff --git a/extensions/core-extensions.c b/extensions/core-extensions.c index 6b650dd4b..9b904ed86 100644 --- a/extensions/core-extensions.c +++ b/extensions/core-extensions.c @@ -4,6 +4,7 @@ #include #include +#include "parser.h" #include "buffer.h" #include "ext_scanners.h" @@ -27,12 +28,12 @@ static void free_table_row(table_row *row) { free(row); } -static cmark_strbuf *unescape_pipes(unsigned char *string, bufsize_t len) +static cmark_strbuf *unescape_pipes(cmark_mem *mem, unsigned char *string, bufsize_t len) { cmark_strbuf *res = (cmark_strbuf *)malloc(sizeof(cmark_strbuf)); bufsize_t r, w; - cmark_strbuf_init(res, len + 1); + cmark_strbuf_init(mem, res, len + 1); cmark_strbuf_put(res, string, len); cmark_strbuf_putc(res, '\0'); @@ -48,7 +49,7 @@ static cmark_strbuf *unescape_pipes(unsigned char *string, bufsize_t len) return res; } -static table_row *row_from_string(unsigned char *string, int len) { +static table_row *row_from_string(cmark_mem *mem, unsigned char *string, int len) { table_row *row = NULL; bufsize_t cell_matched = 0; bufsize_t cell_offset = 0; @@ -60,7 +61,7 @@ static table_row *row_from_string(unsigned char *string, int len) { do { cell_matched = scan_table_cell(string, len, cell_offset); if (cell_matched) { - cmark_strbuf *cell_buf = unescape_pipes(string + cell_offset + 1, + cmark_strbuf *cell_buf = unescape_pipes(mem, string + cell_offset + 1, cell_matched - 1); row->n_columns += 1; row->cells = cmark_llist_append(row->cells, cell_buf); @@ -95,13 +96,13 @@ static cmark_node *try_opening_table_header(cmark_syntax_extension *self, parent_string = cmark_node_get_string_content(parent_container); - header_row = row_from_string((unsigned char *) parent_string, strlen(parent_string)); + header_row = row_from_string(parser->mem, (unsigned char *) parent_string, strlen(parent_string)); if (!header_row) { goto done; } - marker_row = row_from_string(input + cmark_parser_get_first_nonspace(parser), + marker_row = row_from_string(parser->mem, input + cmark_parser_get_first_nonspace(parser), len - cmark_parser_get_first_nonspace(parser)); assert(marker_row); @@ -161,7 +162,7 @@ static cmark_node *try_opening_table_row(cmark_syntax_extension *self, /* We don't advance the offset here */ - row = row_from_string(input + cmark_parser_get_first_nonspace(parser), + row = row_from_string(parser->mem, input + cmark_parser_get_first_nonspace(parser), len - cmark_parser_get_first_nonspace(parser)); { @@ -210,7 +211,7 @@ static int table_matches(cmark_syntax_extension *self, int res = 0; if (cmark_node_get_type(parent_container) == CMARK_NODE_TABLE) { - table_row *new_row = row_from_string(input + cmark_parser_get_first_nonspace(parser), + table_row *new_row = row_from_string(parser->mem, input + cmark_parser_get_first_nonspace(parser), len - cmark_parser_get_first_nonspace(parser)); if (new_row) { if (new_row->n_columns == cmark_node_get_n_table_columns(parent_container)) diff --git a/extensions/ext_scanners.c b/extensions/ext_scanners.c new file mode 100644 index 000000000..78df8d242 --- /dev/null +++ b/extensions/ext_scanners.c @@ -0,0 +1,585 @@ +/* Generated by re2c 0.16 */ +#include +#include "ext_scanners.h" + +bufsize_t _ext_scan_at(bufsize_t (*scanner)(const unsigned char *), + unsigned char *ptr, int len, bufsize_t offset) { + bufsize_t res; + + if (ptr == NULL || offset > len) { + return 0; + } else { + unsigned char lim = ptr[len]; + + ptr[len] = '\0'; + res = scanner(ptr + offset); + ptr[len] = lim; + } + + return res; +} + +bufsize_t _scan_table_cell(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + unsigned int yyaccept = 0; + static const unsigned char yybm[] = { + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, 64, 0, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 128, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 64, + 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + }; + yych = *(marker = p); + if (yych <= 0xDF) { + if (yych <= '{') { + if (yych != '\n') + goto yy3; + } else { + if (yych <= '|') + goto yy4; + if (yych <= 0x7F) + goto yy3; + if (yych >= 0xC2) + goto yy5; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) + goto yy7; + if (yych == 0xED) + goto yy9; + goto yy8; + } else { + if (yych <= 0xF0) + goto yy10; + if (yych <= 0xF3) + goto yy11; + if (yych <= 0xF4) + goto yy12; + } + } + yy2 : { return 0; } + yy3: + yych = *++p; + goto yy2; + yy4: + yyaccept = 0; + yych = *(marker = ++p); + if (yych <= '{') { + if (yych <= '\n') { + if (yych <= '\t') + goto yy14; + goto yy2; + } else { + if (yych == '\r') + goto yy2; + goto yy14; + } + } else { + if (yych <= 0x7F) { + if (yych <= '|') + goto yy2; + goto yy14; + } else { + if (yych <= 0xC1) + goto yy2; + if (yych <= 0xF4) + goto yy14; + goto yy2; + } + } + yy5: + yych = *++p; + if (yych <= 0x7F) + goto yy6; + if (yych <= 0xBF) + goto yy3; + yy6: + p = marker; + if (yyaccept == 0) { + goto yy2; + } else { + goto yy15; + } + yy7: + yych = *++p; + if (yych <= 0x9F) + goto yy6; + if (yych <= 0xBF) + goto yy5; + goto yy6; + yy8: + yych = *++p; + if (yych <= 0x7F) + goto yy6; + if (yych <= 0xBF) + goto yy5; + goto yy6; + yy9: + yych = *++p; + if (yych <= 0x7F) + goto yy6; + if (yych <= 0x9F) + goto yy5; + goto yy6; + yy10: + yych = *++p; + if (yych <= 0x8F) + goto yy6; + if (yych <= 0xBF) + goto yy8; + goto yy6; + yy11: + yych = *++p; + if (yych <= 0x7F) + goto yy6; + if (yych <= 0xBF) + goto yy8; + goto yy6; + yy12: + yych = *++p; + if (yych <= 0x7F) + goto yy6; + if (yych <= 0x8F) + goto yy8; + goto yy6; + yy13: + yyaccept = 1; + marker = ++p; + yych = *p; + yy14: + if (yybm[0 + yych] & 64) { + goto yy13; + } + if (yych <= 0xEC) { + if (yych <= 0xC1) { + if (yych <= '\r') + goto yy15; + if (yych <= '\\') + goto yy16; + } else { + if (yych <= 0xDF) + goto yy18; + if (yych <= 0xE0) + goto yy19; + goto yy20; + } + } else { + if (yych <= 0xF0) { + if (yych <= 0xED) + goto yy21; + if (yych <= 0xEF) + goto yy20; + goto yy22; + } else { + if (yych <= 0xF3) + goto yy23; + if (yych <= 0xF4) + goto yy24; + } + } + yy15 : { return (bufsize_t)(p - start); } + yy16: + yyaccept = 1; + marker = ++p; + yych = *p; + if (yybm[0 + yych] & 128) { + goto yy16; + } + if (yych <= 0xDF) { + if (yych <= '\f') { + if (yych == '\n') + goto yy15; + goto yy13; + } else { + if (yych <= '\r') + goto yy15; + if (yych <= 0x7F) + goto yy13; + if (yych <= 0xC1) + goto yy15; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) + goto yy19; + if (yych == 0xED) + goto yy21; + goto yy20; + } else { + if (yych <= 0xF0) + goto yy22; + if (yych <= 0xF3) + goto yy23; + if (yych <= 0xF4) + goto yy24; + goto yy15; + } + } + yy18: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy6; + if (yych <= 0xBF) + goto yy13; + goto yy6; + yy19: + ++p; + yych = *p; + if (yych <= 0x9F) + goto yy6; + if (yych <= 0xBF) + goto yy18; + goto yy6; + yy20: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy6; + if (yych <= 0xBF) + goto yy18; + goto yy6; + yy21: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy6; + if (yych <= 0x9F) + goto yy18; + goto yy6; + yy22: + ++p; + yych = *p; + if (yych <= 0x8F) + goto yy6; + if (yych <= 0xBF) + goto yy20; + goto yy6; + yy23: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy6; + if (yych <= 0xBF) + goto yy20; + goto yy6; + yy24: + ++p; + yych = *p; + if (yych <= 0x7F) + goto yy6; + if (yych <= 0x8F) + goto yy20; + goto yy6; + } +} + +bufsize_t _scan_table_row_end(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + yych = *(marker = p); + if (yych <= 0xDF) { + if (yych <= '{') { + if (yych != '\n') + goto yy28; + } else { + if (yych <= '|') + goto yy29; + if (yych <= 0x7F) + goto yy28; + if (yych >= 0xC2) + goto yy30; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) + goto yy32; + if (yych == 0xED) + goto yy34; + goto yy33; + } else { + if (yych <= 0xF0) + goto yy35; + if (yych <= 0xF3) + goto yy36; + if (yych <= 0xF4) + goto yy37; + } + } + yy27 : { return 0; } + yy28: + yych = *++p; + goto yy27; + yy29: + yych = *(marker = ++p); + if (yych == '\n') + goto yy38; + if (yych == '\r') + goto yy40; + goto yy27; + yy30: + yych = *++p; + if (yych <= 0x7F) + goto yy31; + if (yych <= 0xBF) + goto yy28; + yy31: + p = marker; + goto yy27; + yy32: + yych = *++p; + if (yych <= 0x9F) + goto yy31; + if (yych <= 0xBF) + goto yy30; + goto yy31; + yy33: + yych = *++p; + if (yych <= 0x7F) + goto yy31; + if (yych <= 0xBF) + goto yy30; + goto yy31; + yy34: + yych = *++p; + if (yych <= 0x7F) + goto yy31; + if (yych <= 0x9F) + goto yy30; + goto yy31; + yy35: + yych = *++p; + if (yych <= 0x8F) + goto yy31; + if (yych <= 0xBF) + goto yy33; + goto yy31; + yy36: + yych = *++p; + if (yych <= 0x7F) + goto yy31; + if (yych <= 0xBF) + goto yy33; + goto yy31; + yy37: + yych = *++p; + if (yych <= 0x7F) + goto yy31; + if (yych <= 0x8F) + goto yy33; + goto yy31; + yy38: + ++p; + { return (bufsize_t)(p - start); } + yy40: + ++p; + if ((yych = *p) == '\n') + goto yy38; + goto yy31; + } +} + +bufsize_t _scan_table_start(const unsigned char *p) { + const unsigned char *marker = NULL; + const unsigned char *start = p; + + { + unsigned char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *(marker = p); + if (yych <= 0xDF) { + if (yych <= '{') { + if (yych != '\n') + goto yy44; + } else { + if (yych <= '|') + goto yy45; + if (yych <= 0x7F) + goto yy44; + if (yych >= 0xC2) + goto yy46; + } + } else { + if (yych <= 0xEF) { + if (yych <= 0xE0) + goto yy48; + if (yych == 0xED) + goto yy50; + goto yy49; + } else { + if (yych <= 0xF0) + goto yy51; + if (yych <= 0xF3) + goto yy52; + if (yych <= 0xF4) + goto yy53; + } + } + yy43 : { return 0; } + yy44: + yych = *++p; + goto yy43; + yy45: + yych = *(marker = ++p); + if (yybm[0 + yych] & 64) { + goto yy54; + } + if (yych == '-') + goto yy56; + goto yy43; + yy46: + yych = *++p; + if (yych <= 0x7F) + goto yy47; + if (yych <= 0xBF) + goto yy44; + yy47: + p = marker; + goto yy43; + yy48: + yych = *++p; + if (yych <= 0x9F) + goto yy47; + if (yych <= 0xBF) + goto yy46; + goto yy47; + yy49: + yych = *++p; + if (yych <= 0x7F) + goto yy47; + if (yych <= 0xBF) + goto yy46; + goto yy47; + yy50: + yych = *++p; + if (yych <= 0x7F) + goto yy47; + if (yych <= 0x9F) + goto yy46; + goto yy47; + yy51: + yych = *++p; + if (yych <= 0x8F) + goto yy47; + if (yych <= 0xBF) + goto yy49; + goto yy47; + yy52: + yych = *++p; + if (yych <= 0x7F) + goto yy47; + if (yych <= 0xBF) + goto yy49; + goto yy47; + yy53: + yych = *++p; + if (yych <= 0x7F) + goto yy47; + if (yych <= 0x8F) + goto yy49; + goto yy47; + yy54: + ++p; + yych = *p; + if (yybm[0 + yych] & 64) { + goto yy54; + } + if (yych != '-') + goto yy47; + yy56: + ++p; + yych = *p; + if (yybm[0 + yych] & 128) { + goto yy56; + } + if (yych <= '\f') { + if (yych == '\t') + goto yy58; + if (yych <= '\n') + goto yy47; + } else { + if (yych <= ' ') { + if (yych <= 0x1F) + goto yy47; + } else { + if (yych == '|') + goto yy60; + goto yy47; + } + } + yy58: + ++p; + yych = *p; + if (yych <= '\f') { + if (yych == '\t') + goto yy58; + if (yych <= '\n') + goto yy47; + goto yy58; + } else { + if (yych <= ' ') { + if (yych <= 0x1F) + goto yy47; + goto yy58; + } else { + if (yych != '|') + goto yy47; + } + } + yy60: + ++p; + yych = *p; + if (yybm[0 + yych] & 64) { + goto yy54; + } + if (yych <= '\r') { + if (yych <= 0x08) + goto yy47; + if (yych >= '\v') + goto yy63; + } else { + if (yych == '-') + goto yy56; + goto yy47; + } + yy61: + ++p; + { return (bufsize_t)(p - start); } + yy63: + ++p; + if ((yych = *p) == '\n') + goto yy61; + goto yy47; + } +} From c174e30acc9a49969406e1ab3dfcf247efe4a938 Mon Sep 17 00:00:00 2001 From: Mathieu Duponchelle Date: Fri, 4 Mar 2016 20:49:01 +0100 Subject: [PATCH 14/20] inlines: Expose inline parser --- src/cmark_extension_api.h | 96 +++++++++++++++++++++++++++ src/inlines.c | 132 ++++++++++++++++++++++++++++++++++---- 2 files changed, 216 insertions(+), 12 deletions(-) diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h index e74f5676b..fb3264e91 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark_extension_api.h @@ -63,6 +63,21 @@ typedef struct cmark_plugin cmark_plugin; */ typedef struct cmark_syntax_extension cmark_syntax_extension; +typedef struct subject cmark_inline_parser; + +/** Exposed raw for now */ + +typedef struct delimiter { + struct delimiter *previous; + struct delimiter *next; + cmark_node *inl_text; + int position; + unsigned char delim_char; + int can_open; + int can_close; + int active; +} delimiter; + /** * ### Plugin API. * @@ -359,6 +374,87 @@ CMARK_EXPORT cmark_syntax_extension *cmark_node_get_syntax_extension(cmark_node CMARK_EXPORT int cmark_node_set_syntax_extension(cmark_node *node, cmark_syntax_extension *extension); +/** + * ## Inline syntax extension helpers + * + * The inline parsing process is described in detail at + * + */ + +/** Should return 'true' if the predicate matches 'c', 'false' otherwise + */ +typedef int (*cmark_inline_predicate)(int c); + +/** Advance the current inline parsing offset */ +CMARK_EXPORT +void cmark_inline_parser_advance_offset(cmark_inline_parser *parser); + +/** Get the current inline parsing offset */ +CMARK_EXPORT +int cmark_inline_parser_get_offset(cmark_inline_parser *parser); + +/** Get the character located at the current inline parsing offset + */ +CMARK_EXPORT +unsigned char cmark_inline_parser_peek_char(cmark_inline_parser *parser); + +/** Get the character located 'pos' bytes in the current line. + */ +CMARK_EXPORT +unsigned char cmark_inline_parser_peek_at(cmark_inline_parser *parser, int pos); + +/** Whether the inline parser has reached the end of the current line + */ +CMARK_EXPORT +int cmark_inline_parser_is_eof(cmark_inline_parser *parser); + +/** Get the characters located after the current inline parsing offset + * while 'pred' matches. Free after usage. + */ +CMARK_EXPORT +char *cmark_inline_parser_take_while(cmark_inline_parser *parser, cmark_inline_predicate pred); + +/** Push a delimiter on the delimiter stack. + * See < for + * more information on the parameters + */ +CMARK_EXPORT +void cmark_inline_parser_push_delimiter(cmark_inline_parser *parser, + unsigned char c, + int can_open, + int can_close, + cmark_node *inl_text); + +/** Remove 'delim' from the delimiter stack + */ +CMARK_EXPORT +void cmark_inline_parser_remove_delimiter(cmark_inline_parser *parser, delimiter *delim); + +CMARK_EXPORT +delimiter *cmark_inline_parser_get_last_delimiter(cmark_inline_parser *parser); + +/** Convenience function to scan a given delimiter. + * + * 'left_flanking' and 'right_flanking' will be set to true if they + * respectively precede and follow a non-space, non-punctuation + * character. + * + * Additionally, 'punct_before' and 'punct_after' will respectively be set + * if the preceding or following character is a punctuation character. + * + * Note that 'left_flanking' and 'right_flanking' can both be 'true'. + * + * Returns the number of delimiters encountered, in the limit + * of 'max_delims', and advances the inline parsing offset. + */ +CMARK_EXPORT +int cmark_inline_parser_scan_delimiters(cmark_inline_parser *parser, + int max_delims, + unsigned char c, + int *left_flanking, + int *right_flanking, + int *punct_before, + int *punct_after); #ifdef __cplusplus } #endif diff --git a/src/inlines.c b/src/inlines.c index 8f18e6c96..999a9cfca 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -30,18 +30,7 @@ static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99"; #define make_emph(mem) make_simple(mem, CMARK_NODE_EMPH) #define make_strong(mem) make_simple(mem, CMARK_NODE_STRONG) -typedef struct delimiter { - struct delimiter *previous; - struct delimiter *next; - cmark_node *inl_text; - bufsize_t position; - unsigned char delim_char; - bool can_open; - bool can_close; - bool active; -} delimiter; - -typedef struct { +typedef struct subject{ cmark_mem *mem; cmark_chunk input; bufsize_t pos; @@ -1127,3 +1116,122 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, cmark_reference_create(refmap, &lab, &url, &title); return subj.pos; } + +unsigned char cmark_inline_parser_peek_char(cmark_inline_parser *parser) { + return peek_char(parser); +} + +unsigned char cmark_inline_parser_peek_at(cmark_inline_parser *parser, bufsize_t pos) { + return peek_at(parser, pos); +} + +int cmark_inline_parser_is_eof(cmark_inline_parser *parser) { + return is_eof(parser); +} + +static char * +my_strndup (const char *s, size_t n) +{ + char *result; + size_t len = strlen (s); + + if (n < len) + len = n; + + result = (char *) malloc (len + 1); + if (!result) + return 0; + + result[len] = '\0'; + return (char *) memcpy (result, s, len); +} + +char *cmark_inline_parser_take_while(cmark_inline_parser *parser, cmark_inline_predicate pred) { + unsigned char c; + bufsize_t startpos = parser->pos; + bufsize_t len = 0; + + while ((c = peek_char(parser)) && (*pred)(c)) { + advance(parser); + len++; + } + + return my_strndup((const char *) parser->input.data + startpos, len); +} + +void cmark_inline_parser_push_delimiter(cmark_inline_parser *parser, + unsigned char c, + int can_open, + int can_close, + cmark_node *inl_text) { + push_delimiter(parser, c, can_open, can_close, inl_text); +} + +void cmark_inline_parser_remove_delimiter(cmark_inline_parser *parser, delimiter *delim) { + remove_delimiter(parser, delim); +} + +int cmark_inline_parser_scan_delimiters(cmark_inline_parser *parser, + int max_delims, + unsigned char c, + int *left_flanking, + int *right_flanking, + int *punct_before, + int *punct_after) { + int numdelims = 0; + bufsize_t before_char_pos; + int32_t after_char = 0; + int32_t before_char = 0; + int len; + bool space_before, space_after; + + if (parser->pos == 0) { + before_char = 10; + } else { + before_char_pos = parser->pos - 1; + // walk back to the beginning of the UTF_8 sequence: + while (peek_at(parser, before_char_pos) >> 6 == 2 && before_char_pos > 0) { + before_char_pos -= 1; + } + len = cmark_utf8proc_iterate(parser->input.data + before_char_pos, + parser->pos - before_char_pos, &before_char); + if (len == -1) { + before_char = 10; + } + } + + while (peek_char(parser) == c && numdelims <= max_delims) { + numdelims++; + advance(parser); + } + + len = cmark_utf8proc_iterate(parser->input.data + parser->pos, + parser->input.len - parser->pos, &after_char); + if (len == -1) { + after_char = 10; + } + + *punct_before = cmark_utf8proc_is_punctuation(before_char); + *punct_after = cmark_utf8proc_is_punctuation(after_char); + space_before = cmark_utf8proc_is_space(before_char); + space_after = cmark_utf8proc_is_space(after_char); + + *left_flanking = numdelims > 0 && !cmark_utf8proc_is_space(after_char) && + !(*punct_after && !space_before && !*punct_before); + *right_flanking = numdelims > 0 && !cmark_utf8proc_is_space(before_char) && + !(*punct_before && !space_after && !*punct_after); + + return numdelims; +} + +void cmark_inline_parser_advance_offset(cmark_inline_parser *parser) { + advance(parser); +} + +int cmark_inline_parser_get_offset(cmark_inline_parser *parser) { + return parser->pos; +} + +delimiter *cmark_inline_parser_get_last_delimiter(cmark_inline_parser *parser) { + return parser->last_delim; +} From 315f477305e75a284bcd6f2ed9b565fc1d790253 Mon Sep 17 00:00:00 2001 From: Mathieu Duponchelle Date: Sat, 5 Mar 2016 02:27:57 +0100 Subject: [PATCH 15/20] inline extension draft --- src/blocks.c | 37 +++++++-- src/cmark_extension_api.h | 67 ++++++++++++++++ src/inlines.c | 160 ++++++++++++++++++++++++++++---------- src/inlines.h | 7 +- src/parser.h | 1 + src/syntax_extension.c | 16 ++++ src/syntax_extension.h | 9 ++- 7 files changed, 245 insertions(+), 52 deletions(-) diff --git a/src/blocks.c b/src/blocks.c index d0e958575..562658d69 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -84,6 +84,11 @@ static cmark_node *make_document(cmark_mem *mem) { int cmark_parser_attach_syntax_extension(cmark_parser *parser, cmark_syntax_extension *extension) { parser->syntax_extensions = cmark_llist_append(parser->syntax_extensions, extension); + if (extension->match_inline && extension->insert_inline_from_delim) { + parser->inline_syntax_extensions = cmark_llist_append( + parser->inline_syntax_extensions, extension); + } + return 1; } @@ -111,6 +116,7 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) { parser->options = options; parser->last_buffer_ended_with_cr = false; parser->syntax_extensions = NULL; + parser->inline_syntax_extensions = NULL; return parser; } @@ -126,6 +132,7 @@ void cmark_parser_free(cmark_parser *parser) { cmark_strbuf_free(&parser->linebuf); cmark_reference_map_free(parser->refmap); cmark_llist_free(parser->syntax_extensions); + cmark_llist_free(parser->inline_syntax_extensions); mem->free(parser); } @@ -392,23 +399,43 @@ static cmark_node *add_child(cmark_parser *parser, cmark_node *parent, return child; } +static void manage_extensions_special_characters(cmark_parser *parser, bool add) { + cmark_llist *tmp_ext; + + for (tmp_ext = parser->inline_syntax_extensions; tmp_ext; tmp_ext=tmp_ext->next) { + cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp_ext->data; + cmark_llist *tmp_char; + for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) { + unsigned char c = (unsigned char) (unsigned long) tmp_char->data; + if (add) + cmark_inlines_add_special_character(c); + else + cmark_inlines_remove_special_character(c); + } + } +} + // Walk through node and all children, recursively, parsing // string content into inline content where appropriate. -static void process_inlines(cmark_mem *mem, cmark_node *root, cmark_reference_map *refmap, +static void process_inlines(cmark_parser *parser, cmark_reference_map *refmap, int options) { - cmark_iter *iter = cmark_iter_new(root); + cmark_iter *iter = cmark_iter_new(parser->root); cmark_node *cur; cmark_event_type ev_type; + manage_extensions_special_characters(parser, true); + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); if (ev_type == CMARK_EVENT_ENTER) { - if (contains_inlines(S_type(cur))) { - cmark_parse_inlines(mem, cur, refmap, options); + if (contains_inlines(cur->type)) { + cmark_parse_inlines(parser, cur, refmap, options); } } } + manage_extensions_special_characters(parser, false); + cmark_iter_free(iter); } @@ -487,7 +514,7 @@ static cmark_node *finalize_document(cmark_parser *parser) { } finalize(parser, parser->root); - process_inlines(parser->mem, parser->root, parser->refmap, parser->options); + process_inlines(parser, parser->refmap, parser->options); return parser->root; } diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h index fb3264e91..636df7a12 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark_extension_api.h @@ -60,6 +60,43 @@ typedef struct cmark_plugin cmark_plugin; * new block with cmark_parser_make_block and cmark_parser_add_child. * If no function was provided is NULL, the extension will have * no effect at all on the final block structure of the AST. + * + * #### Inline parsing phase hooks + * + * For each character provided by the extension through + * 'cmark_syntax_extension_set_special_inline_chars', + * the function provided by the extension through + * 'cmark_syntax_extension_set_match_inline_func' + * will get called, it is the responsibility of the extension + * to scan the characters located at the current inline parsing offset + * with the cmark_inline_parser API. + * + * Depending on the type of the extension, it can either: + * + * * Scan forward, determine that the syntax matches and return + * a newly-created inline node with the appropriate type. + * This is the technique that would be used if inline code + * (with backticks) was implemented as an extension. + * * Scan only the character(s) that its syntax rules require + * for opening and closing nodes, push a delimiter on the + * delimiter stack, and return a simple text node with its + * contents set to the character(s) consumed. + * This is the technique that would be used if emphasis + * inlines were implemented as an extension. + * + * When an extension has pushed delimiters on the stack, + * the function provided through + * 'cmark_syntax_extension_set_inline_from_delim_func' + * will get called in a latter phase, + * when the inline parser has matched opener and closer delimiters + * created by the extension together. + * + * It is then the responsibility of the extension to modify + * and populate the opener inline text node, and to remove + * the necessary delimiters from the delimiter stack. + * + * Finally, the extension should return NULL if its scan didn't + * match its syntax rules. */ typedef struct cmark_syntax_extension cmark_syntax_extension; @@ -153,6 +190,18 @@ typedef cmark_node * (*cmark_open_block_func) (cmark_syntax_extension *extension unsigned char *input, int len); +typedef cmark_node *(*cmark_match_inline_func)(cmark_syntax_extension *extension, + cmark_parser *parser, + cmark_node *parent, + unsigned char character, + cmark_inline_parser *inline_parser); + +typedef delimiter *(*cmark_inline_from_delim_func)(cmark_syntax_extension *extension, + cmark_parser *parser, + cmark_inline_parser *inline_parser, + delimiter *opener, + delimiter *closer); + /** Should return 'true' if 'input' can be contained in 'container', * 'false' otherwise. */ @@ -184,6 +233,24 @@ CMARK_EXPORT void cmark_syntax_extension_set_match_block_func(cmark_syntax_extension *extension, cmark_match_block_func func); +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_syntax_extension_set_match_inline_func(cmark_syntax_extension *extension, + cmark_match_inline_func func); + +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_syntax_extension_set_inline_from_delim_func(cmark_syntax_extension *extension, + cmark_inline_from_delim_func func); + +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_syntax_extension_set_special_inline_chars(cmark_syntax_extension *extension, + cmark_llist *special_chars); + /** Return the index of the line currently being parsed, starting with 1. */ CMARK_EXPORT diff --git a/src/inlines.c b/src/inlines.c index 999a9cfca..72f4cfd75 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -12,6 +12,7 @@ #include "utf8.h" #include "scanners.h" #include "inlines.h" +#include "syntax_extension.h" static const char *EMDASH = "\xE2\x80\x94"; static const char *ENDASH = "\xE2\x80\x93"; @@ -45,7 +46,7 @@ static CMARK_INLINE bool S_is_line_end_char(char c) { static delimiter *S_insert_emph(subject *subj, delimiter *opener, delimiter *closer); -static int parse_inline(subject *subj, cmark_node *parent, int options); +static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, int options); static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer, cmark_reference_map *refmap); @@ -434,7 +435,40 @@ static cmark_node *handle_period(subject *subj, bool smart) { } } -static void process_emphasis(subject *subj, delimiter *stack_bottom) { +static void add_extensions_openers_bottom(cmark_parser *parser, + delimiter **openers_bottom, delimiter *stack_bottom) { + cmark_llist *tmp_ext; + + for (tmp_ext = parser->inline_syntax_extensions; tmp_ext; tmp_ext=tmp_ext->next) { + cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp_ext->data; + cmark_llist *tmp_char; + for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) { + unsigned char c = (unsigned char) (unsigned long) tmp_char->data; + + openers_bottom[c] = stack_bottom; + } + } +} + +static cmark_syntax_extension *get_extension_for_special_char(cmark_parser *parser, unsigned char c) { + cmark_llist *tmp_ext; + + for (tmp_ext = parser->inline_syntax_extensions; tmp_ext; tmp_ext=tmp_ext->next) { + cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp_ext->data; + cmark_llist *tmp_char; + for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) { + unsigned char tmp_c = (unsigned char) (unsigned long) tmp_char->data; + + if (tmp_c == c) { + return ext; + } + } + } + + return NULL; +} + +static void process_emphasis(cmark_parser *parser, subject *subj, delimiter *stack_bottom) { delimiter *closer = subj->last_delim; delimiter *opener; delimiter *old_closer; @@ -446,6 +480,7 @@ static void process_emphasis(subject *subj, delimiter *stack_bottom) { openers_bottom['_'] = stack_bottom; openers_bottom['\''] = stack_bottom; openers_bottom['"'] = stack_bottom; + add_extensions_openers_bottom(parser, openers_bottom, stack_bottom); // move back to first relevant delim. while (closer != NULL && closer->previous != stack_bottom) { @@ -454,8 +489,10 @@ static void process_emphasis(subject *subj, delimiter *stack_bottom) { // now move forward, looking for closers, and handling each while (closer != NULL) { + cmark_syntax_extension *extension = get_extension_for_special_char(parser, closer->delim_char); if (closer->can_close && - (closer->delim_char == '*' || closer->delim_char == '_' || + (extension != NULL || + closer->delim_char == '*' || closer->delim_char == '_' || closer->delim_char == '"' || closer->delim_char == '\'')) { // Now look backwards for first matching opener: opener = closer->previous; @@ -469,7 +506,13 @@ static void process_emphasis(subject *subj, delimiter *stack_bottom) { opener = opener->previous; } old_closer = closer; - if (closer->delim_char == '*' || closer->delim_char == '_') { + + if (extension) { + if (opener_found) + closer = extension->insert_inline_from_delim(extension, parser, subj, opener, closer); + else + closer = closer->next; + } else if (closer->delim_char == '*' || closer->delim_char == '_') { if (opener_found) { closer = S_insert_emph(subj, opener, closer); } else { @@ -737,9 +780,7 @@ static int link_label(subject *subj, cmark_chunk *raw_label) { subj->pos = startpos; // rewind return 0; } - -// Return a link, an image, or a literal close bracket. -static cmark_node *handle_close_bracket(subject *subj) { +static cmark_node *handle_close_bracket(cmark_parser *parser, subject *subj) { bufsize_t initial_pos; bufsize_t starturl, endurl, starttitle, endtitle, endall; bufsize_t n; @@ -864,7 +905,7 @@ static cmark_node *handle_close_bracket(subject *subj) { // Free the bracket [: cmark_node_free(opener->inl_text); - process_emphasis(subj, opener); + process_emphasis(parser, subj, opener); // Now, if we have a link, we also want to deactivate earlier link // delimiters. (This code can be removed if we decide to allow links @@ -908,36 +949,36 @@ static cmark_node *handle_newline(subject *subj) { } } -static bufsize_t subject_find_special_char(subject *subj, int options) { - // "\r\n\\`&_*[]pos + 1; while (n < subj->input.len) { @@ -951,9 +992,36 @@ static bufsize_t subject_find_special_char(subject *subj, int options) { return subj->input.len; } +void cmark_inlines_add_special_character(unsigned char c) { + SPECIAL_CHARS[c] = 1; +} + +void cmark_inlines_remove_special_character(unsigned char c) { + SPECIAL_CHARS[c] = 0; +} + +static cmark_node *try_extensions(cmark_parser *parser, + cmark_node *parent, + unsigned char c, + subject *subj) { + cmark_node *res = NULL; + cmark_llist *tmp; + + for (tmp = parser->inline_syntax_extensions; tmp; tmp = tmp->next) { + cmark_syntax_extension *ext = (cmark_syntax_extension *) tmp->data; + + res = ext->match_inline(ext, parser, parent, c, subj); + + if (res) + break; + } + + return res; +} + // Parse an inline, advancing subject, and add it as a child of parent. // Return 0 if no inline can be parsed, 1 otherwise. -static int parse_inline(subject *subj, cmark_node *parent, int options) { +static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, int options) { cmark_node *new_inl = NULL; cmark_chunk contents; unsigned char c; @@ -997,7 +1065,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { push_delimiter(subj, '[', true, false, new_inl); break; case ']': - new_inl = handle_close_bracket(subj); + new_inl = handle_close_bracket(parser, subj); break; case '!': advance(subj); @@ -1010,6 +1078,10 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { } break; default: + new_inl = try_extensions(parser, parent, c, subj); + if (new_inl != NULL) + break; + endpos = subject_find_special_char(subj, options); contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos); subj->pos = endpos; @@ -1029,16 +1101,18 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { } // Parse inlines from parent's string_content, adding as children of parent. -extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, cmark_reference_map *refmap, +extern void cmark_parse_inlines(cmark_parser *parser, + cmark_node *parent, + cmark_reference_map *refmap, int options) { subject subj; - subject_from_buf(mem, &subj, &parent->content, refmap); + subject_from_buf(parser->mem, &subj, &parent->content, refmap); cmark_chunk_rtrim(&subj.input); - while (!is_eof(&subj) && parse_inline(&subj, parent, options)) + while (!is_eof(&subj) && parse_inline(parser, &subj, parent, options)) ; - process_emphasis(&subj, NULL); + process_emphasis(parser, &subj, NULL); } // Parse zero or more space characters, including at most one newline. diff --git a/src/inlines.h b/src/inlines.h index cbe783032..586b53fa7 100644 --- a/src/inlines.h +++ b/src/inlines.h @@ -8,12 +8,17 @@ extern "C" { cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url); cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title); -void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, cmark_reference_map *refmap, +void cmark_parse_inlines(cmark_parser *parser, + cmark_node *parent, + cmark_reference_map *refmap, int options); bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, cmark_reference_map *refmap); +void cmark_inlines_add_special_character(unsigned char c); +void cmark_inlines_remove_special_character(unsigned char c); + #ifdef __cplusplus } #endif diff --git a/src/parser.h b/src/parser.h index 1129f3bae..247423a76 100644 --- a/src/parser.h +++ b/src/parser.h @@ -46,6 +46,7 @@ struct cmark_parser { int options; bool last_buffer_ended_with_cr; cmark_llist *syntax_extensions; + cmark_llist *inline_syntax_extensions; }; #ifdef __cplusplus diff --git a/src/syntax_extension.c b/src/syntax_extension.c index 11a92c020..bad984adb 100644 --- a/src/syntax_extension.c +++ b/src/syntax_extension.c @@ -5,6 +5,7 @@ #include "buffer.h" void cmark_syntax_extension_free(cmark_syntax_extension *extension) { + cmark_llist_free(extension->special_inline_chars); free(extension->name); free(extension); } @@ -25,3 +26,18 @@ void cmark_syntax_extension_set_match_block_func(cmark_syntax_extension *extensi cmark_match_block_func func) { extension->last_block_matches = func; } + +void cmark_syntax_extension_set_match_inline_func(cmark_syntax_extension *extension, + cmark_match_inline_func func) { + extension->match_inline = func; +} + +void cmark_syntax_extension_set_inline_from_delim_func(cmark_syntax_extension *extension, + cmark_inline_from_delim_func func) { + extension->insert_inline_from_delim = func; +} + +void cmark_syntax_extension_set_special_inline_chars(cmark_syntax_extension *extension, + cmark_llist *special_chars) { + extension->special_inline_chars = special_chars; +} diff --git a/src/syntax_extension.h b/src/syntax_extension.h index 5565b4523..f9af1f12c 100644 --- a/src/syntax_extension.h +++ b/src/syntax_extension.h @@ -5,9 +5,12 @@ #include "cmark_extension_api.h" struct cmark_syntax_extension { - cmark_match_block_func last_block_matches; - cmark_open_block_func try_opening_block; - char * name; + cmark_match_block_func last_block_matches; + cmark_open_block_func try_opening_block; + cmark_match_inline_func match_inline; + cmark_inline_from_delim_func insert_inline_from_delim; + cmark_llist * special_inline_chars; + char * name; }; #endif From 5f64582e14b529d28647474ba616a1d7ac6445bd Mon Sep 17 00:00:00 2001 From: Mathieu Duponchelle Date: Sat, 5 Mar 2016 03:44:25 +0100 Subject: [PATCH 16/20] Define a strikethrough inline node type. --- src/cmark.h | 5 ++++- src/commonmark.c | 4 ++++ src/html.c | 8 ++++++++ src/latex.c | 9 +++++++++ src/man.c | 23 +++++++++++++++++++++++ src/node.c | 6 +++++- 6 files changed, 53 insertions(+), 2 deletions(-) diff --git a/src/cmark.h b/src/cmark.h index 4076176a2..6616ee593 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -66,8 +66,11 @@ typedef enum { CMARK_NODE_LINK, CMARK_NODE_IMAGE, + /* inlines with no syntax rules in the current specification */ + CMARK_NODE_STRIKETHROUGH, + CMARK_NODE_FIRST_INLINE = CMARK_NODE_TEXT, - CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE, + CMARK_NODE_LAST_INLINE = CMARK_NODE_STRIKETHROUGH, } cmark_node_type; /* For backwards compatibility: */ diff --git a/src/commonmark.c b/src/commonmark.c index 486dc441c..f5d3af1b3 100644 --- a/src/commonmark.c +++ b/src/commonmark.c @@ -483,6 +483,10 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } break; + case CMARK_NODE_STRIKETHROUGH: + OUT(cmark_node_get_string_content(node), false, LITERAL); + break; + default: assert(false); break; diff --git a/src/html.c b/src/html.c index f8f51026c..f710c2428 100644 --- a/src/html.c +++ b/src/html.c @@ -374,6 +374,14 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type, } break; + case CMARK_NODE_STRIKETHROUGH: + if (entering) { + cmark_strbuf_puts(html, ""); + } else { + cmark_strbuf_puts(html, ""); + } + break; + default: assert(false); break; diff --git a/src/latex.c b/src/latex.c index 306a508de..f85e46689 100644 --- a/src/latex.c +++ b/src/latex.c @@ -463,6 +463,15 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } break; + case CMARK_NODE_STRIKETHROUGH: + /* requires \usepackage{ulem} */ + if (entering) { + LIT("\\sout{"); + } else { + LIT("}"); + } + break; + default: assert(false); break; diff --git a/src/man.c b/src/man.c index a2691b063..7242927bd 100644 --- a/src/man.c +++ b/src/man.c @@ -82,6 +82,19 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, switch (node->type) { case CMARK_NODE_DOCUMENT: + if (entering) { + /* Define a strikethrough macro */ + /* Commenting out because this makes tests fail + LIT(".de ST"); + CR(); + LIT(".nr ww \\w'\\\\$1'"); + CR(); + LIT("\\Z@\\v'-.25m'\\l'\\\\n[ww]u'@\\\\$1"); + CR(); + LIT(".."); + CR(); + */ + } break; case CMARK_NODE_BLOCK_QUOTE: @@ -273,6 +286,16 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node, } break; + case CMARK_NODE_STRIKETHROUGH: + if (entering) { + CR(); + LIT(".ST \""); + } else { + LIT("\""); + CR(); + } + break; + default: assert(false); break; diff --git a/src/node.c b/src/node.c index 7743ae19b..0e03210d0 100644 --- a/src/node.c +++ b/src/node.c @@ -65,6 +65,7 @@ static bool S_can_contain(cmark_node *node, cmark_node *child) { case CMARK_NODE_STRONG: case CMARK_NODE_LINK: case CMARK_NODE_IMAGE: + case CMARK_NODE_STRIKETHROUGH: case CMARK_NODE_CUSTOM_INLINE: return S_is_inline(child); case CMARK_NODE_TABLE: @@ -77,7 +78,8 @@ static bool S_can_contain(cmark_node *node, cmark_node *child) { child->type == CMARK_NODE_EMPH || child->type == CMARK_NODE_STRONG || child->type == CMARK_NODE_LINK || - child->type == CMARK_NODE_IMAGE; + child->type == CMARK_NODE_IMAGE || + child->type == CMARK_NODE_STRIKETHROUGH; default: break; @@ -259,6 +261,8 @@ const char *cmark_node_get_type_string(cmark_node *node) { return "link"; case CMARK_NODE_IMAGE: return "image"; + case CMARK_NODE_STRIKETHROUGH: + return "strikethrough"; } return ""; From 7fbb2e65fdd50e8c98107fa59b50822eda448e37 Mon Sep 17 00:00:00 2001 From: Mathieu Duponchelle Date: Fri, 4 Mar 2016 20:54:03 +0100 Subject: [PATCH 17/20] Example inline extension: strikethrough text --- extensions/core-extensions.c | 87 ++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/extensions/core-extensions.c b/extensions/core-extensions.c index 9b904ed86..ad9715636 100644 --- a/extensions/core-extensions.c +++ b/extensions/core-extensions.c @@ -232,7 +232,94 @@ static cmark_syntax_extension *register_table_syntax_extension(void) { return ext; } +static cmark_node *strikethrough_match(cmark_syntax_extension *self, + cmark_parser *parser, + cmark_node *parent, + unsigned char character, + cmark_inline_parser *inline_parser) +{ + cmark_node *res = NULL; + int left_flanking, right_flanking, punct_before, punct_after; + int num_delims; + + /* Exit early */ + if (character != '~') + return NULL; + + num_delims = cmark_inline_parser_scan_delimiters(inline_parser, 1, '~', + &left_flanking, &right_flanking, &punct_before, &punct_after); + + if (num_delims > 0) { /* Should not be needed */ + int can_open, can_close; + + res = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); + cmark_node_set_literal(res, "~"); + + can_open = left_flanking; + can_close = right_flanking; + if (can_open || can_close) + cmark_inline_parser_push_delimiter(inline_parser, character, can_open, can_close, res); + } + + return res; +} + +static delimiter *strikethrough_insert(cmark_syntax_extension *self, + cmark_parser *parser, + cmark_inline_parser *inline_parser, + delimiter *opener, + delimiter *closer) +{ + cmark_node *strikethrough; + cmark_node *tmp, *next; + delimiter *delim, *tmp_delim; + delimiter *res = closer->next; + + strikethrough = opener->inl_text; + + if (!cmark_node_set_type(strikethrough, CMARK_NODE_STRIKETHROUGH)) + goto done; + + cmark_node_set_string_content(strikethrough, "~"); + tmp = cmark_node_next(opener->inl_text); + + while (tmp) { + if (tmp == closer->inl_text) + break; + next = cmark_node_next(tmp); + cmark_node_append_child(strikethrough, tmp); + tmp = next; + } + + cmark_node_free(closer->inl_text); + + delim = closer; + while (delim != NULL && delim != opener) { + tmp_delim = delim->previous; + cmark_inline_parser_remove_delimiter(inline_parser, delim); + delim = tmp_delim; + } + + cmark_inline_parser_remove_delimiter(inline_parser, opener); + +done: + return res; +} + +static cmark_syntax_extension *create_strikethrough_extension(void) { + cmark_syntax_extension *ext = cmark_syntax_extension_new("tilde_strikethrough"); + cmark_llist *special_chars = NULL; + + cmark_syntax_extension_set_match_inline_func(ext, strikethrough_match); + cmark_syntax_extension_set_inline_from_delim_func(ext, strikethrough_insert); + special_chars = cmark_llist_append(special_chars, (void *) '~'); + cmark_syntax_extension_set_special_inline_chars(ext, special_chars); + + return ext; +} + int init_libcmarkextensions(cmark_plugin *plugin) { cmark_plugin_register_syntax_extension(plugin, register_table_syntax_extension()); + cmark_plugin_register_syntax_extension(plugin, create_strikethrough_extension()); return 1; } From 44b2d76a4762b02ffbd6b18be95004beae394ed7 Mon Sep 17 00:00:00 2001 From: Mathieu Duponchelle Date: Sun, 6 Mar 2016 10:17:19 +0100 Subject: [PATCH 18/20] cmark_parser: improve lifecycle management. This slightly breaks API as finish will now return NULL after it has been called once, but I believe that's for the best. This fixes potential leaks of root when a parser was fed but not finished, and makes reusing a parser instance multiple times possible. --- src/blocks.c | 69 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 21 deletions(-) diff --git a/src/blocks.c b/src/blocks.c index 562658d69..14cee3ba9 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -92,32 +92,46 @@ int cmark_parser_attach_syntax_extension(cmark_parser *parser, return 1; } -cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) { - cmark_parser *parser = (cmark_parser *)mem->calloc(1, sizeof(cmark_parser)); - parser->mem = mem; +static void cmark_parser_dispose(cmark_parser *parser) { + if (parser->root) + cmark_node_free(parser->root); + + if (parser->refmap) + cmark_reference_map_free(parser->refmap); +} + +static void cmark_parser_reset(cmark_parser *parser) { + cmark_llist *saved_exts = parser->syntax_extensions; + cmark_llist *saved_inline_exts = parser->inline_syntax_extensions; + int saved_options = parser->options; + cmark_mem *saved_mem = parser->mem; + + cmark_parser_dispose(parser); - cmark_node *document = make_document(mem); + memset(parser, 0, sizeof(cmark_parser)); + parser->mem = saved_mem; - cmark_strbuf_init(mem, &parser->curline, 256); - cmark_strbuf_init(mem, &parser->linebuf, 0); + cmark_strbuf_init(parser->mem, &parser->curline, 256); + cmark_strbuf_init(parser->mem, &parser->linebuf, 0); - parser->refmap = cmark_reference_map_new(mem); + cmark_node *document = make_document(parser->mem); + + parser->refmap = cmark_reference_map_new(parser->mem); parser->root = document; parser->current = document; - parser->line_number = 0; - parser->offset = 0; - parser->column = 0; - parser->first_nonspace = 0; - parser->first_nonspace_column = 0; - parser->indent = 0; - parser->blank = false; - parser->partially_consumed_tab = false; - parser->last_line_length = 0; - parser->options = options; + parser->last_buffer_ended_with_cr = false; - parser->syntax_extensions = NULL; - parser->inline_syntax_extensions = NULL; + parser->syntax_extensions = saved_exts; + parser->inline_syntax_extensions = saved_inline_exts; + parser->options = saved_options; +} + +cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) { + cmark_parser *parser = (cmark_parser *)mem->calloc(1, sizeof(cmark_parser)); + parser->mem = mem; + parser->options = options; + cmark_parser_reset(parser); return parser; } @@ -128,9 +142,9 @@ cmark_parser *cmark_parser_new(int options) { void cmark_parser_free(cmark_parser *parser) { cmark_mem *mem = parser->mem; + cmark_parser_dispose(parser); cmark_strbuf_free(&parser->curline); cmark_strbuf_free(&parser->linebuf); - cmark_reference_map_free(parser->refmap); cmark_llist_free(parser->syntax_extensions); cmark_llist_free(parser->inline_syntax_extensions); mem->free(parser); @@ -1248,6 +1262,12 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, } cmark_node *cmark_parser_finish(cmark_parser *parser) { + cmark_node *res; + + /* Parser was already finished once */ + if (parser->root == NULL) + return NULL; + if (parser->linebuf.size) { S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size); cmark_strbuf_clear(&parser->linebuf); @@ -1260,13 +1280,20 @@ cmark_node *cmark_parser_finish(cmark_parser *parser) { } cmark_strbuf_free(&parser->curline); + cmark_strbuf_free(&parser->linebuf); #if CMARK_DEBUG_NODES if (cmark_node_check(parser->root, stderr)) { abort(); } #endif - return parser->root; + + res = parser->root; + parser->root = NULL; + + cmark_parser_reset(parser); + + return res; } int cmark_parser_get_line_number(cmark_parser *parser) { From cf19502bdae4967f5117c9d90860def8896caeb8 Mon Sep 17 00:00:00 2001 From: Mathieu Duponchelle Date: Sun, 3 Apr 2016 00:59:11 +0200 Subject: [PATCH 19/20] syntax-extension: add "priv" field. + And pointer to a free function as well. --- src/cmark_extension_api.h | 11 +++++++++++ src/syntax_extension.c | 11 +++++++++++ src/syntax_extension.h | 2 ++ 3 files changed, 24 insertions(+) diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h index 636df7a12..db6f4cd01 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark_extension_api.h @@ -97,6 +97,10 @@ typedef struct cmark_plugin cmark_plugin; * * Finally, the extension should return NULL if its scan didn't * match its syntax rules. + * + * The extension can store whatever private data it might need + * with 'cmark_syntax_extension_set_private', + * and optionally define a free function for this data. */ typedef struct cmark_syntax_extension cmark_syntax_extension; @@ -251,6 +255,13 @@ CMARK_EXPORT void cmark_syntax_extension_set_special_inline_chars(cmark_syntax_extension *extension, cmark_llist *special_chars); +/** See the documentation for 'cmark_syntax_extension' + */ +CMARK_EXPORT +void cmark_syntax_extension_set_private(cmark_syntax_extension *extension, + void *priv, + cmark_free_func free_func); + /** Return the index of the line currently being parsed, starting with 1. */ CMARK_EXPORT diff --git a/src/syntax_extension.c b/src/syntax_extension.c index bad984adb..d8c4459d9 100644 --- a/src/syntax_extension.c +++ b/src/syntax_extension.c @@ -5,6 +5,10 @@ #include "buffer.h" void cmark_syntax_extension_free(cmark_syntax_extension *extension) { + if (extension->free_function && extension->priv) { + extension->free_function(extension->priv); + } + cmark_llist_free(extension->special_inline_chars); free(extension->name); free(extension); @@ -41,3 +45,10 @@ void cmark_syntax_extension_set_special_inline_chars(cmark_syntax_extension *ext cmark_llist *special_chars) { extension->special_inline_chars = special_chars; } + +void cmark_syntax_extension_set_private(cmark_syntax_extension *extension, + void *priv, + cmark_free_func free_func) { + extension->priv = priv; + extension->free_function = free_func; +} diff --git a/src/syntax_extension.h b/src/syntax_extension.h index f9af1f12c..f46a7d2fe 100644 --- a/src/syntax_extension.h +++ b/src/syntax_extension.h @@ -11,6 +11,8 @@ struct cmark_syntax_extension { cmark_inline_from_delim_func insert_inline_from_delim; cmark_llist * special_inline_chars; char * name; + void * priv; + cmark_free_func free_function; }; #endif From 389216b027fa2c27834a119a863ff9e1a99d77a6 Mon Sep 17 00:00:00 2001 From: Mathieu Duponchelle Date: Sun, 3 Apr 2016 00:59:45 +0200 Subject: [PATCH 20/20] cmark_parser: implement and expose reentrant feed function. This can be useful for transclusion extensions for example, http://talk.commonmark.org/t/transclusion-or-including-sub-documents-for-reuse/270 --- src/blocks.c | 22 +++++++++++++++++++++- src/cmark_extension_api.h | 4 ++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/blocks.c b/src/blocks.c index 14cee3ba9..637e1536c 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -567,6 +567,19 @@ void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len) { S_parser_feed(parser, (const unsigned char *)buffer, len, false); } +void cmark_parser_feed_reentrant(cmark_parser *parser, const char *buffer, size_t len) { + cmark_strbuf saved_linebuf; + + cmark_strbuf_init(parser->mem, &saved_linebuf, 0); + cmark_strbuf_puts(&saved_linebuf, cmark_strbuf_cstr(&parser->linebuf)); + cmark_strbuf_clear(&parser->linebuf); + + S_parser_feed(parser, (const unsigned char *)buffer, len, true); + + cmark_strbuf_sets(&parser->linebuf, cmark_strbuf_cstr(&saved_linebuf)); + cmark_strbuf_free(&saved_linebuf); +} + static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, bool eof) { const unsigned char *end = buffer + len; @@ -1214,6 +1227,9 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, bool all_matched = true; cmark_node *container; cmark_chunk input; + cmark_node *current; + + cmark_strbuf_clear(&parser->curline); if (parser->options & CMARK_OPT_VALIDATE_UTF8) cmark_utf8proc_check(&parser->curline, buffer, bytes); @@ -1245,9 +1261,13 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, if (parser->blank && S_last_line_blank(container)) break_out_of_lists(parser, &container); + current = parser->current; + open_new_blocks(parser, &container, &input, all_matched); - add_text_to_container(parser, container, last_matched_container, &input); + /* parser->current might have changed if feed_reentrant was called */ + if (current == parser->current) + add_text_to_container(parser, container, last_matched_container, &input); finished: parser->last_line_length = input.len; diff --git a/src/cmark_extension_api.h b/src/cmark_extension_api.h index db6f4cd01..18e08142c 100644 --- a/src/cmark_extension_api.h +++ b/src/cmark_extension_api.h @@ -416,6 +416,10 @@ void cmark_parser_advance_offset(cmark_parser *parser, int count, int columns); + +CMARK_EXPORT +void cmark_parser_feed_reentrant(cmark_parser *parser, const char *buffer, size_t len); + /** Attach the syntax 'extension' to the 'parser', to provide extra syntax * rules. * See the documentation for cmark_syntax_extension for more information.