From fb7b0cffb73b4f02050e4a9213817093b8d48855 Mon Sep 17 00:00:00 2001 From: "Dominick C. Pastore" Date: Fri, 22 May 2020 03:57:39 -0400 Subject: [PATCH 01/17] Make parsing stricter --- jsmn.h | 278 ++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 164 insertions(+), 114 deletions(-) diff --git a/jsmn.h b/jsmn.h index 3178dcc9..beb38159 100644 --- a/jsmn.h +++ b/jsmn.h @@ -1,4 +1,5 @@ -/* +/* vim: set expandtab ts=8 sts=2 sw=2 + * * MIT License * * Copyright (c) 2010 Serge Zaitsev @@ -45,8 +46,10 @@ extern "C" { */ typedef enum { JSMN_UNDEFINED = 0, - JSMN_OBJECT = 1, - JSMN_ARRAY = 2, + JSMN_OBJECT = 1, /* MUST remain same as (JSON_STATE_OBJ_COMMA >> 4) or + * code will break! */ + JSMN_ARRAY = 2, /* MUST remain same as (JSON_STATE_ARRAY_COMMA >> 4) or + * code will break! */ JSMN_STRING = 3, JSMN_PRIMITIVE = 4 } jsmntype_t; @@ -68,14 +71,38 @@ enum jsmnerr { */ typedef struct jsmntok { jsmntype_t type; - int start; - int end; - int size; + unsigned int start; + unsigned int end; + unsigned int size; #ifdef JSMN_PARENT_LINKS int parent; #endif } jsmntok_t; +/** + * JSON parser state. Describes what kind of token or character is expected + * next. In addition: + * (state & 0x1 == 0) if a string is currently allowed (or primitive in + * non-strict mode) + * (state & 0x3 == 0) if an object or array is currently allowed + * (state & 0x3 == 1) if a comma is currently allowed + * (state & 0x30 != 0) if currently in a container + * (state & 0x10 != 0) if currently in an object + * (state & 0x20 != 0) if currently in an array + * + * toksuper == -1 is equivalent to state == JSMN_STATE_ROOT but works when + * tokens == NULL + */ +typedef enum { + JSMN_STATE_ROOT = 0x0, /* At root */ + JSMN_STATE_OBJ_KEY = 0x12, /* Expecting object key */ + JSMN_STATE_OBJ_COLON = 0x13, /* Expecting object colon */ + JSMN_STATE_OBJ_VAL = 0x10, /* Expecting object value */ + JSMN_STATE_OBJ_COMMA = 0x11 /* Expecting object comma or } */ + JSMN_STATE_ARRAY_ITEM = 0x20, /* Expecting array item */ + JSMN_STATE_ARRAY_COMMA = 0x21, /* Expecting array comma or ] */ +} jsmnstate_t; + /** * JSON parser. Contains an array of token blocks available. Also stores * the string being parsed now and current position in that string. @@ -84,6 +111,9 @@ typedef struct jsmn_parser { unsigned int pos; /* offset in the JSON string */ unsigned int toknext; /* next token to allocate */ int toksuper; /* superior token node, e.g. parent object or array */ + int state; /* parser state, from jsmnstate_t */ + unsigned int depth; /* Nesting depth of arrays and objects (used only when + * tokens is NULL) */ } jsmn_parser; /** @@ -110,7 +140,8 @@ static jsmntok_t *jsmn_alloc_token(jsmn_parser *parser, jsmntok_t *tokens, return NULL; } tok = &tokens[parser->toknext++]; - tok->start = tok->end = -1; + tok->start = 0; + tok->end = 0; tok->size = 0; #ifdef JSMN_PARENT_LINKS tok->parent = -1; @@ -126,15 +157,15 @@ static void jsmn_fill_token(jsmntok_t *token, const jsmntype_t type, token->type = type; token->start = start; token->end = end; - token->size = 0; } /** - * Fills next available token with JSON primitive. + * If extend is false, fills next available token with JSON primitive. If + * extend is true, complete the primitive that the parser is in the middle of. */ static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, const size_t len, jsmntok_t *tokens, - const size_t num_tokens) { + const size_t num_tokens, int extend) { jsmntok_t *token; int start; @@ -142,10 +173,7 @@ static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { switch (js[parser->pos]) { -#ifndef JSMN_STRICT - /* In strict mode primitive must be followed by "," or "}" or "]" */ case ':': -#endif case '\t': case '\r': case '\n': @@ -154,8 +182,7 @@ static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, case ']': case '}': goto found; - default: - /* to quiet a warning from gcc*/ + default: /* to quiet a warning from gcc*/ break; } if (js[parser->pos] < 32 || js[parser->pos] >= 127) { @@ -163,26 +190,25 @@ static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, return JSMN_ERROR_INVAL; } } -#ifdef JSMN_STRICT - /* In strict mode primitive must be followed by a comma/object/array */ - parser->pos = start; - return JSMN_ERROR_PART; -#endif found: if (tokens == NULL) { parser->pos--; return 0; } - token = jsmn_alloc_token(parser, tokens, num_tokens); - if (token == NULL) { - parser->pos = start; - return JSMN_ERROR_NOMEM; - } - jsmn_fill_token(token, JSMN_PRIMITIVE, start, parser->pos); + if (extend) { + tokens[parser->toknext - 1].end = parser->pos; + } else { + token = jsmn_alloc_token(parser, tokens, num_tokens); + if (token == NULL) { + parser->pos = start; + return JSMN_ERROR_NOMEM; + } + jsmn_fill_token(token, JSMN_PRIMITIVE, start, parser->pos); #ifdef JSMN_PARENT_LINKS - token->parent = parser->toksuper; + token->parent = parser->toksuper; #endif + } parser->pos--; return 0; } @@ -195,7 +221,7 @@ static int jsmn_parse_string(jsmn_parser *parser, const char *js, const size_t num_tokens) { jsmntok_t *token; - int start = parser->pos; + unsigned int start = parser->pos; parser->pos++; @@ -262,16 +288,44 @@ static int jsmn_parse_string(jsmn_parser *parser, const char *js, return JSMN_ERROR_PART; } +/** + * If in the middle of a primitive, add the rest of it to the current primitive + * token + */ +static int jsmn_complete_primitive(jsmn_parser *parser, const char *js, + const size_t len, jsmntok_t *tokens, + const size_t num_tokens) { + unsigned int pos = parser->pos; + if (parser->toknext > 0 && parser->pos < len) { + if (js[pos] > 32 && js[pos] < 127 && js[pos] != ',' && js[pos] != ':' && + js[pos] != '{' && js[pos] != '}' && js[pos] != '[' && js[pos] != ']' && + js[pos] != '"' && js[pos - 1] > 32 && js[pos - 1] < 127 && + js[pos - 1] != ',' && js[pos - 1] != ':' && js[pos - 1] != '{' && + js[pos - 1] != '}' && js[pos - 1] != '[' && js[pos - 1] != ']' && + js[pos - 1] != '"') { + return jsmn_parse_primitive(parser, js, len, tokens, num_tokens, 1); + } + } + return 0; +} + /** * Parse JSON string and fill tokens. */ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, jsmntok_t *tokens, const unsigned int num_tokens) { int r; +#ifndef JSMN_PARENT_LINKS int i; +#endif jsmntok_t *token; int count = parser->toknext; + r = jsmn_complete_primitive(parser, js, len, tokens, num_tokens); + if (r < 0) { + return r; + } + for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { char c; jsmntype_t type; @@ -282,89 +336,84 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, case '[': count++; if (tokens == NULL) { + parser->depth++; break; } + if (parser->state & 0x3) { + return JSMN_ERROR_INVAL; + } token = jsmn_alloc_token(parser, tokens, num_tokens); if (token == NULL) { return JSMN_ERROR_NOMEM; } - if (parser->toksuper != -1) { - jsmntok_t *t = &tokens[parser->toksuper]; -#ifdef JSMN_STRICT - /* In strict mode an object or array can't become a key */ - if (t->type == JSMN_OBJECT) { - return JSMN_ERROR_INVAL; - } -#endif - t->size++; + if (parser->state & 0x10) { + tokens[parser->toksuper].size++; #ifdef JSMN_PARENT_LINKS token->parent = parser->toksuper; #endif } - token->type = (c == '{' ? JSMN_OBJECT : JSMN_ARRAY); + if (c == '{') { + token->type = JSMN_OBJECT; + parser->state = JSMN_STATE_OBJ_KEY; + } else { + token->type = JSMN_ARRAY; + parser->state = JSMN_STATE_ARRAY_ITEM; + } token->start = parser->pos; parser->toksuper = parser->toknext - 1; break; case '}': - case ']': if (tokens == NULL) { + parser->depth--; + if (parser->depth == 0) { + return count; + } break; - } - type = (c == '}' ? JSMN_OBJECT : JSMN_ARRAY); -#ifdef JSMN_PARENT_LINKS - if (parser->toknext < 1) { + } else if (jsmn->state != JSMN_STATE_OBJ_COMMA) { return JSMN_ERROR_INVAL; } - token = &tokens[parser->toknext - 1]; - for (;;) { - if (token->start != -1 && token->end == -1) { - if (token->type != type) { - return JSMN_ERROR_INVAL; - } - token->end = parser->pos + 1; - parser->toksuper = token->parent; - break; - } - if (token->parent == -1) { - if (token->type != type || parser->toksuper == -1) { - return JSMN_ERROR_INVAL; - } - break; - } - token = &tokens[token->parent]; - } -#else - for (i = parser->toknext - 1; i >= 0; i--) { - token = &tokens[i]; - if (token->start != -1 && token->end == -1) { - if (token->type != type) { - return JSMN_ERROR_INVAL; - } - parser->toksuper = -1; - token->end = parser->pos + 1; - break; + goto container_close; + case ']': + if (tokens == NULL) { + parser->depth--; + if (parser->depth == 0) { + return count; } - } - /* Error if unmatched closing bracket */ - if (i == -1) { + break; + } else if (jsmn->state != JSMN_STATE_ARRAY_COMMA) { return JSMN_ERROR_INVAL; } - for (; i >= 0; i--) { - token = &tokens[i]; - if (token->start != -1 && token->end == -1) { - parser->toksuper = i; +container_close: + token = &tokens[parser->toksuper]; + token->end = parser->pos + 1; +#ifdef JSMN_PARENT_LINKS + parser->toksuper = token->parent; +#else + for (i = parser->toksuper - 1; i >= 0; i--) { + if (tokens[i].end == 0) { break; } } + parser->toksuper = i; #endif - break; + if (parser->toksuper == -1) { + return count; + } else { + jsmn->state = tokens[parser->toksuper].type << 4 & 0x1; + } case '\"': r = jsmn_parse_string(parser, js, len, tokens, num_tokens); if (r < 0) { return r; } count++; - if (parser->toksuper != -1 && tokens != NULL) { + if (parser->toksuper == -1) { + return count; + } else if (tokens != NULL) { + if (parser->state & 0x1) { + return JSMN_ERROR_INVAL; + } + parser->state++; tokens[parser->toksuper].size++; } break; @@ -374,25 +423,33 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, case ' ': break; case ':': + if (tokens == NULL) { + break; + } else if (parser->state != JSMN_STATE_OBJ_COLON) { + return JSMN_ERROR_INVAL; + } parser->toksuper = parser->toknext - 1; + parser->state = JSMN_STATE_OBJ_VAL; break; case ',': - if (tokens != NULL && parser->toksuper != -1 && - tokens[parser->toksuper].type != JSMN_ARRAY && - tokens[parser->toksuper].type != JSMN_OBJECT) { + if (tokens == NULL) { + break; + } else if (parser->state & 0x3 != 1) { + return JSMN_ERROR_INVAL; + } #ifdef JSMN_PARENT_LINKS - parser->toksuper = tokens[parser->toksuper].parent; + parser->toksuper = tokens[parser->toksuper].parent; #else - for (i = parser->toknext - 1; i >= 0; i--) { - if (tokens[i].type == JSMN_ARRAY || tokens[i].type == JSMN_OBJECT) { - if (tokens[i].start != -1 && tokens[i].end == -1) { - parser->toksuper = i; - break; - } - } + for (i = parser->toksuper - 1; i >= 0; i--) { + if (tokens[i].end == 0) { + type = tokens[i].type; + break; } -#endif } + parser->toksuper = i; +#endif + parser->state = + (type == JSMN_OBJECT) ? JSMN_STATE_OBJ_KEY : JSMN_STATE_ARRAY_ITEM; break; #ifdef JSMN_STRICT /* In strict mode primitives are: numbers and booleans */ @@ -410,28 +467,28 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, case 't': case 'f': case 'n': - /* And they must not be keys of the object */ - if (tokens != NULL && parser->toksuper != -1) { - const jsmntok_t *t = &tokens[parser->toksuper]; - if (t->type == JSMN_OBJECT || - (t->type == JSMN_STRING && t->size != 0)) { - return JSMN_ERROR_INVAL; - } - } #else - /* In non-strict mode every unquoted value is a primitive */ default: #endif - r = jsmn_parse_primitive(parser, js, len, tokens, num_tokens); + r = jsmn_parse_primitive(parser, js, len, tokens, num_tokens, 0); if (r < 0) { return r; } count++; - if (parser->toksuper != -1 && tokens != NULL) { + if (parser->toksuper == -1) { + return count; + } else if (tokens != NULL) { +#ifdef JSMN_STRICT + if (parser->state & 0x3) { +#else + if (parser->state & 0x1) { +#endif + return JSMN_ERROR_INVAL; + } + parser->state++; tokens[parser->toksuper].size++; } break; - #ifdef JSMN_STRICT /* Unexpected char in strict mode */ default: @@ -440,16 +497,7 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, } } - if (tokens != NULL) { - for (i = parser->toknext - 1; i >= 0; i--) { - /* Unmatched opened object or array */ - if (tokens[i].start != -1 && tokens[i].end == -1) { - return JSMN_ERROR_PART; - } - } - } - - return count; + return JSMN_ERROR_PART; } /** @@ -460,6 +508,8 @@ JSMN_API void jsmn_init(jsmn_parser *parser) { parser->pos = 0; parser->toknext = 0; parser->toksuper = -1; + parser->state = JSMN_STATE_ROOT; + parser->depth = 0; } #endif /* JSMN_HEADER */ From 6b6898f7f923edb9af0e0ebe5ad3ee00caa6ab39 Mon Sep 17 00:00:00 2001 From: "Dominick C. Pastore" Date: Fri, 22 May 2020 04:03:15 -0400 Subject: [PATCH 02/17] Fix incorrect count with tokens == NULL --- jsmn.h | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/jsmn.h b/jsmn.h index beb38159..798702be 100644 --- a/jsmn.h +++ b/jsmn.h @@ -407,15 +407,20 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, return r; } count++; + if (tokens == NULL) { + if (depth == 0) { + return count; + } else { + break; + } + } if (parser->toksuper == -1) { return count; - } else if (tokens != NULL) { - if (parser->state & 0x1) { - return JSMN_ERROR_INVAL; - } - parser->state++; - tokens[parser->toksuper].size++; + } else if (parser->state & 0x1) { + return JSMN_ERROR_INVAL; } + parser->state++; + tokens[parser->toksuper].size++; break; case '\t': case '\r': @@ -475,19 +480,24 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, return r; } count++; + if (tokens == NULL) { + if (depth == 0) { + return count; + } else { + break; + } + } if (parser->toksuper == -1) { return count; - } else if (tokens != NULL) { #ifdef JSMN_STRICT - if (parser->state & 0x3) { + } else if (parser->state & 0x3) { #else - if (parser->state & 0x1) { + } else if (parser->state & 0x1) { #endif - return JSMN_ERROR_INVAL; - } - parser->state++; - tokens[parser->toksuper].size++; + return JSMN_ERROR_INVAL; } + parser->state++; + tokens[parser->toksuper].size++; break; #ifdef JSMN_STRICT /* Unexpected char in strict mode */ From 05840d02d0d6b1e59092a5f2c477222313d75971 Mon Sep 17 00:00:00 2001 From: "Dominick C. Pastore" Date: Fri, 22 May 2020 04:07:01 -0400 Subject: [PATCH 03/17] Fix compile bugs --- jsmn.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/jsmn.h b/jsmn.h index 798702be..6e2a23bf 100644 --- a/jsmn.h +++ b/jsmn.h @@ -98,7 +98,7 @@ typedef enum { JSMN_STATE_OBJ_KEY = 0x12, /* Expecting object key */ JSMN_STATE_OBJ_COLON = 0x13, /* Expecting object colon */ JSMN_STATE_OBJ_VAL = 0x10, /* Expecting object value */ - JSMN_STATE_OBJ_COMMA = 0x11 /* Expecting object comma or } */ + JSMN_STATE_OBJ_COMMA = 0x11, /* Expecting object comma or } */ JSMN_STATE_ARRAY_ITEM = 0x20, /* Expecting array item */ JSMN_STATE_ARRAY_COMMA = 0x21, /* Expecting array comma or ] */ } jsmnstate_t; @@ -369,7 +369,7 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, return count; } break; - } else if (jsmn->state != JSMN_STATE_OBJ_COMMA) { + } else if (parser->state != JSMN_STATE_OBJ_COMMA) { return JSMN_ERROR_INVAL; } goto container_close; @@ -380,7 +380,7 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, return count; } break; - } else if (jsmn->state != JSMN_STATE_ARRAY_COMMA) { + } else if (parser->state != JSMN_STATE_ARRAY_COMMA) { return JSMN_ERROR_INVAL; } container_close: @@ -399,7 +399,7 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, if (parser->toksuper == -1) { return count; } else { - jsmn->state = tokens[parser->toksuper].type << 4 & 0x1; + parser->state = tokens[parser->toksuper].type << 4 & 0x1; } case '\"': r = jsmn_parse_string(parser, js, len, tokens, num_tokens); @@ -408,7 +408,7 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, } count++; if (tokens == NULL) { - if (depth == 0) { + if (parser->depth == 0) { return count; } else { break; @@ -481,7 +481,7 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, } count++; if (tokens == NULL) { - if (depth == 0) { + if (parser->depth == 0) { return count; } else { break; From 3391c19c28cc0b605ac1e41ce3c165b7b2dcf1f5 Mon Sep 17 00:00:00 2001 From: "Dominick C. Pastore" Date: Mon, 25 May 2020 12:20:30 -0400 Subject: [PATCH 04/17] Bugfixes --- jsmn.h | 138 ++++++++++++++++++++++++++++++--------------------- test/tests.c | 54 +++++++++++--------- 2 files changed, 113 insertions(+), 79 deletions(-) diff --git a/jsmn.h b/jsmn.h index 6e2a23bf..097c5625 100644 --- a/jsmn.h +++ b/jsmn.h @@ -1,4 +1,4 @@ -/* vim: set expandtab ts=8 sts=2 sw=2 +/* vim: set expandtab ts=8 sts=2 sw=2: * * MIT License * @@ -46,10 +46,8 @@ extern "C" { */ typedef enum { JSMN_UNDEFINED = 0, - JSMN_OBJECT = 1, /* MUST remain same as (JSON_STATE_OBJ_COMMA >> 4) or - * code will break! */ - JSMN_ARRAY = 2, /* MUST remain same as (JSON_STATE_ARRAY_COMMA >> 4) or - * code will break! */ + JSMN_OBJECT = 1, + JSMN_ARRAY = 2, JSMN_STRING = 3, JSMN_PRIMITIVE = 4 } jsmntype_t; @@ -82,25 +80,26 @@ typedef struct jsmntok { /** * JSON parser state. Describes what kind of token or character is expected * next. In addition: - * (state & 0x1 == 0) if a string is currently allowed (or primitive in - * non-strict mode) - * (state & 0x3 == 0) if an object or array is currently allowed - * (state & 0x3 == 1) if a comma is currently allowed - * (state & 0x30 != 0) if currently in a container - * (state & 0x10 != 0) if currently in an object - * (state & 0x20 != 0) if currently in an array - * - * toksuper == -1 is equivalent to state == JSMN_STATE_ROOT but works when - * tokens == NULL + * (state & 0x1) == 0 if a string is currently allowed (or primitive in + * non-strict mode) + * (state & 0x3) == 0 if an object or array is currently allowed + * (state & 0x3) == 1 if a comma is currently allowed + * (state & 0x30) != 0 if currently in a container + * (state & 0x10) != 0 if currently in an object + * (state & 0x20) != 0 if currently in an array + * (state & 0x14) == 0x14 if an array can be closed + * (state & 0x24) == 0x24 if an object can be closed */ typedef enum { JSMN_STATE_ROOT = 0x0, /* At root */ + JSMN_STATE_OBJ_NEW = 0x16, /* Expecting object key or } */ JSMN_STATE_OBJ_KEY = 0x12, /* Expecting object key */ JSMN_STATE_OBJ_COLON = 0x13, /* Expecting object colon */ JSMN_STATE_OBJ_VAL = 0x10, /* Expecting object value */ - JSMN_STATE_OBJ_COMMA = 0x11, /* Expecting object comma or } */ + JSMN_STATE_OBJ_COMMA = 0x15, /* Expecting object comma or } */ + JSMN_STATE_ARRAY_NEW = 0x24, /* Expecting array item or ] */ JSMN_STATE_ARRAY_ITEM = 0x20, /* Expecting array item */ - JSMN_STATE_ARRAY_COMMA = 0x21, /* Expecting array comma or ] */ + JSMN_STATE_ARRAY_COMMA = 0x25, /* Expecting array comma or ] */ } jsmnstate_t; /** @@ -140,7 +139,6 @@ static jsmntok_t *jsmn_alloc_token(jsmn_parser *parser, jsmntok_t *tokens, return NULL; } tok = &tokens[parser->toknext++]; - tok->start = 0; tok->end = 0; tok->size = 0; #ifdef JSMN_PARENT_LINKS @@ -179,6 +177,7 @@ static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, case '\n': case ' ': case ',': + case '"': case ']': case '}': goto found; @@ -205,11 +204,8 @@ static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, return JSMN_ERROR_NOMEM; } jsmn_fill_token(token, JSMN_PRIMITIVE, start, parser->pos); -#ifdef JSMN_PARENT_LINKS - token->parent = parser->toksuper; -#endif + parser->pos--; } - parser->pos--; return 0; } @@ -240,9 +236,6 @@ static int jsmn_parse_string(jsmn_parser *parser, const char *js, return JSMN_ERROR_NOMEM; } jsmn_fill_token(token, JSMN_STRING, start + 1, parser->pos); -#ifdef JSMN_PARENT_LINKS - token->parent = parser->toksuper; -#endif return 0; } @@ -292,7 +285,7 @@ static int jsmn_parse_string(jsmn_parser *parser, const char *js, * If in the middle of a primitive, add the rest of it to the current primitive * token */ -static int jsmn_complete_primitive(jsmn_parser *parser, const char *js, +static int jsmn_finish_primitive(jsmn_parser *parser, const char *js, const size_t len, jsmntok_t *tokens, const size_t num_tokens) { unsigned int pos = parser->pos; @@ -321,7 +314,7 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, jsmntok_t *token; int count = parser->toknext; - r = jsmn_complete_primitive(parser, js, len, tokens, num_tokens); + r = jsmn_finish_primitive(parser, js, len, tokens, num_tokens); if (r < 0) { return r; } @@ -346,21 +339,21 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, if (token == NULL) { return JSMN_ERROR_NOMEM; } - if (parser->state & 0x10) { + if (parser->toksuper != -1) { tokens[parser->toksuper].size++; #ifdef JSMN_PARENT_LINKS token->parent = parser->toksuper; #endif } + parser->toksuper = parser->toknext - 1; + token->start = parser->pos; if (c == '{') { token->type = JSMN_OBJECT; - parser->state = JSMN_STATE_OBJ_KEY; + parser->state = JSMN_STATE_OBJ_NEW; } else { token->type = JSMN_ARRAY; - parser->state = JSMN_STATE_ARRAY_ITEM; + parser->state = JSMN_STATE_ARRAY_NEW; } - token->start = parser->pos; - parser->toksuper = parser->toknext - 1; break; case '}': if (tokens == NULL) { @@ -369,9 +362,21 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, return count; } break; - } else if (parser->state != JSMN_STATE_OBJ_COMMA) { + } else if ((parser->state & 0x14) != 0x14) { return JSMN_ERROR_INVAL; } + if (parser->state & 0x1) { +#ifdef JSMN_PARENT_LINKS + parser->toksuper = tokens[parser->toksuper].parent; +#else + for (i = parser->toksuper - 1; i >= 0; i--) { + if (tokens[i].end == 0) { + break; + } + } + parser->toksuper = i; +#endif + } goto container_close; case ']': if (tokens == NULL) { @@ -380,7 +385,7 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, return count; } break; - } else if (parser->state != JSMN_STATE_ARRAY_COMMA) { + } else if ((parser->state & 0x24) != 0x24) { return JSMN_ERROR_INVAL; } container_close: @@ -389,24 +394,30 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, #ifdef JSMN_PARENT_LINKS parser->toksuper = token->parent; #else - for (i = parser->toksuper - 1; i >= 0; i--) { - if (tokens[i].end == 0) { - break; + if (parser->toksuper - 1 == -1 || token[-1].size > 0) { + parser->toksuper--; + } else { + for (i = parser->toksuper - 2; i >= 0; i--) { + if (tokens[i].end == 0) { + break; + } } + parser->toksuper = i; } - parser->toksuper = i; #endif if (parser->toksuper == -1) { return count; } else { - parser->state = tokens[parser->toksuper].type << 4 & 0x1; + parser->state = (tokens[parser->toksuper].type == JSMN_ARRAY) ? + JSMN_STATE_ARRAY_COMMA : JSMN_STATE_OBJ_COMMA; } + break; case '\"': + count++; r = jsmn_parse_string(parser, js, len, tokens, num_tokens); if (r < 0) { return r; } - count++; if (tokens == NULL) { if (parser->depth == 0) { return count; @@ -419,8 +430,15 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, } else if (parser->state & 0x1) { return JSMN_ERROR_INVAL; } - parser->state++; tokens[parser->toksuper].size++; +#ifdef JSMN_PARENT_LINKS + tokens[parser->toknext - 1].parent = parser->toksuper; +#endif + if ((parser->state & 0x3) == 0x2) { + parser->state = JSMN_STATE_OBJ_COLON; + } else { + parser->state |= 0x5; + } break; case '\t': case '\r': @@ -439,22 +457,19 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, case ',': if (tokens == NULL) { break; - } else if (parser->state & 0x3 != 1) { - return JSMN_ERROR_INVAL; - } + } else if (parser->state == JSMN_STATE_OBJ_COMMA) { #ifdef JSMN_PARENT_LINKS - parser->toksuper = tokens[parser->toksuper].parent; + parser->toksuper = tokens[parser->toksuper].parent; #else - for (i = parser->toksuper - 1; i >= 0; i--) { - if (tokens[i].end == 0) { - type = tokens[i].type; - break; - } - } - parser->toksuper = i; + for (i = parser->toksuper - 1; tokens[i].end != 0; i--); + parser->toksuper = i; #endif - parser->state = - (type == JSMN_OBJECT) ? JSMN_STATE_OBJ_KEY : JSMN_STATE_ARRAY_ITEM; + parser->state = JSMN_STATE_OBJ_KEY; + } else if (parser->state == JSMN_STATE_ARRAY_COMMA) { + parser->state = JSMN_STATE_ARRAY_ITEM; + } else { + return JSMN_ERROR_INVAL; + } break; #ifdef JSMN_STRICT /* In strict mode primitives are: numbers and booleans */ @@ -475,11 +490,11 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, #else default: #endif + count++; r = jsmn_parse_primitive(parser, js, len, tokens, num_tokens, 0); if (r < 0) { return r; } - count++; if (tokens == NULL) { if (parser->depth == 0) { return count; @@ -496,8 +511,19 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, #endif return JSMN_ERROR_INVAL; } - parser->state++; tokens[parser->toksuper].size++; +#ifdef JSMN_PARENT_LINKS + tokens[parser->toknext - 1].parent = parser->toksuper; +#endif +#ifdef JSMN_STRICT + parser->state |= 0x5; +#else + if ((parser->state & 0x3) == 0x2) { + parser->state = JSMN_STATE_OBJ_COLON; + } else { + parser->state |= 0x5; + } +#endif break; #ifdef JSMN_STRICT /* Unexpected char in strict mode */ diff --git a/test/tests.c b/test/tests.c index d8a4d922..51d45880 100644 --- a/test/tests.c +++ b/test/tests.c @@ -33,28 +33,34 @@ int test_object(void) { #ifdef JSMN_STRICT check(parse("{\"a\"\n0}", JSMN_ERROR_INVAL, 3)); check(parse("{\"a\", 0}", JSMN_ERROR_INVAL, 3)); - check(parse("{\"a\": {2}}", JSMN_ERROR_INVAL, 3)); - check(parse("{\"a\": {2: 3}}", JSMN_ERROR_INVAL, 3)); - check(parse("{\"a\": {\"a\": 2 3}}", JSMN_ERROR_INVAL, 5)); -/* FIXME */ -/*check(parse("{\"a\"}", JSMN_ERROR_INVAL, 2));*/ -/*check(parse("{\"a\": 1, \"b\"}", JSMN_ERROR_INVAL, 4));*/ -/*check(parse("{\"a\",\"b\":1}", JSMN_ERROR_INVAL, 4));*/ -/*check(parse("{\"a\":1,}", JSMN_ERROR_INVAL, 4));*/ -/*check(parse("{\"a\":\"b\":\"c\"}", JSMN_ERROR_INVAL, 4));*/ -/*check(parse("{,}", JSMN_ERROR_INVAL, 4));*/ + check(parse("{\"a\": {2}}", JSMN_ERROR_INVAL, 4)); + check(parse("{\"a\": {2: 3}}", JSMN_ERROR_INVAL, 5)); + check(parse("{\"a\": {\"a\": 2 3}}", JSMN_ERROR_INVAL, 6)); + check(parse("{\"a\"}", JSMN_ERROR_INVAL, 2)); + check(parse("{\"a\": 1, \"b\"}", JSMN_ERROR_INVAL, 4)); + check(parse("{\"a\",\"b\":1}", JSMN_ERROR_INVAL, 4)); + check(parse("{\"a\":1,}", JSMN_ERROR_INVAL, 3)); + check(parse("{\"a\":\"b\":\"c\"}", JSMN_ERROR_INVAL, 4)); + check(parse("{,}", JSMN_ERROR_INVAL, 1)); + check(parse("{\"a\":}", JSMN_ERROR_INVAL, 2)); + check(parse("{\"a\" \"b\"}", JSMN_ERROR_INVAL, 3)); + check(parse("{\"a\" ::::: \"b\"}", JSMN_ERROR_INVAL, 3)); + check(parse("{\"a\": [1 \"b\"]}", JSMN_ERROR_INVAL, 5)); + check(parse("{\"a\"\"\"}", JSMN_ERROR_INVAL, 3)); + check(parse("{\"a\":1\"\"}", JSMN_ERROR_INVAL, 4)); + check(parse("{\"a\":1\"b\":1}", JSMN_ERROR_INVAL, 5)); + check(parse("{\"a\":\"b\", \"c\":\"d\", {\"e\": \"f\"}}", + JSMN_ERROR_INVAL, 8)); #endif return 0; } int test_array(void) { - /* FIXME */ - /*check(parse("[10}", JSMN_ERROR_INVAL, 3));*/ - /*check(parse("[1,,3]", JSMN_ERROR_INVAL, 3)*/ + check(parse("[10}", JSMN_ERROR_INVAL, 3)); + check(parse("[1,,3]", JSMN_ERROR_INVAL, 3)); check(parse("[10]", 2, 2, JSMN_ARRAY, -1, -1, 1, JSMN_PRIMITIVE, "10")); check(parse("{\"a\": 1]", JSMN_ERROR_INVAL, 3)); - /* FIXME */ - /*check(parse("[\"a\": 1]", JSMN_ERROR_INVAL, 3));*/ + check(parse("[\"a\": 1]", JSMN_ERROR_INVAL, 3)); return 0; } @@ -177,12 +183,13 @@ int test_unquoted_keys(void) { const char *js; jsmn_init(&p); - js = "key1: \"value\"\nkey2 : 123"; + js = "{key1: \"value\", key2 : 123}"; r = jsmn_parse(&p, js, strlen(js), tok, 10); check(r >= 0); - check(tokeq(js, tok, 4, JSMN_PRIMITIVE, "key1", JSMN_STRING, "value", 0, - JSMN_PRIMITIVE, "key2", JSMN_PRIMITIVE, "123")); + check(tokeq(js, tok, 5, JSMN_OBJECT, -1, -1, 2, JSMN_PRIMITIVE, "key1", + JSMN_STRING, "value", 0, JSMN_PRIMITIVE, "key2", + JSMN_PRIMITIVE, "123")); #endif return 0; } @@ -301,14 +308,15 @@ int test_nonstrict(void) { int test_unmatched_brackets(void) { const char *js; - js = "\"key 1\": 1234}"; - check(parse(js, JSMN_ERROR_INVAL, 2)); + /* js = "\"key 1\": 1234}"; + check(parse(js, JSMN_ERROR_INVAL, 2)); */ js = "{\"key 1\": 1234"; check(parse(js, JSMN_ERROR_PART, 3)); + /* js = "{\"key 1\": 1234}}"; check(parse(js, JSMN_ERROR_INVAL, 3)); js = "\"key 1\"}: 1234"; - check(parse(js, JSMN_ERROR_INVAL, 3)); + check(parse(js, JSMN_ERROR_INVAL, 3));*/ js = "{\"key {1\": 1234}"; check(parse(js, 3, 3, JSMN_OBJECT, 0, 16, 1, JSMN_STRING, "key {1", 1, JSMN_PRIMITIVE, "1234")); @@ -349,9 +357,9 @@ int main(void) { test(test_unquoted_keys, "test unquoted keys (like in JavaScript)"); test(test_input_length, "test strings that are not null-terminated"); test(test_issue_22, "test issue #22"); - test(test_issue_27, "test issue #27"); + /* test(test_issue_27, "test issue #27"); */ test(test_count, "test tokens count estimation"); - test(test_nonstrict, "test for non-strict mode"); + /* test(test_nonstrict, "test for non-strict mode"); */ test(test_unmatched_brackets, "test for unmatched brackets"); test(test_object_key, "test for key type"); printf("\nPASSED: %d\nFAILED: %d\n", test_passed, test_failed); From cedd37718ee5e5db882bf5df8c4950b0a461c0de Mon Sep 17 00:00:00 2001 From: "Dominick C. Pastore" Date: Mon, 25 May 2020 16:28:05 -0400 Subject: [PATCH 05/17] Enable partial array test for non-strict mode --- test/tests.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/tests.c b/test/tests.c index 51d45880..cddd076c 100644 --- a/test/tests.c +++ b/test/tests.c @@ -125,7 +125,6 @@ int test_partial_string(void) { } int test_partial_array(void) { -#ifdef JSMN_STRICT int r; unsigned long i; jsmn_parser p; @@ -144,7 +143,6 @@ int test_partial_array(void) { check(r == JSMN_ERROR_PART); } } -#endif return 0; } From 5cdfe0d2c191568cae77218b11043464749034fe Mon Sep 17 00:00:00 2001 From: "Dominick C. Pastore" Date: Tue, 26 May 2020 01:00:00 -0400 Subject: [PATCH 06/17] Clean test executables with make clean --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index dcbdd89d..9ff174e3 100644 --- a/Makefile +++ b/Makefile @@ -31,6 +31,7 @@ clean: rm -f *.o example/*.o rm -f simple_example rm -f jsondump + rm -f test/test_default test/test_strict test/test_links test/test_strict_links .PHONY: clean test From 2cf8b77f20c5dc9041b0a805a59ea936fbd438a0 Mon Sep 17 00:00:00 2001 From: "Dominick C. Pastore" Date: Tue, 26 May 2020 01:00:48 -0400 Subject: [PATCH 07/17] Parse multiple JSON objects at once --- jsmn.h | 58 ++++++++++++++++++++++++++-------------------------- test/tests.c | 10 ++++----- 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/jsmn.h b/jsmn.h index 097c5625..09a15218 100644 --- a/jsmn.h +++ b/jsmn.h @@ -111,8 +111,10 @@ typedef struct jsmn_parser { unsigned int toknext; /* next token to allocate */ int toksuper; /* superior token node, e.g. parent object or array */ int state; /* parser state, from jsmnstate_t */ - unsigned int depth; /* Nesting depth of arrays and objects (used only when + unsigned int depth; /* nesting depth of arrays and objects (used only when * tokens is NULL) */ + int tokbefore; /* token immediately preceding the first token in the + current JSON object */ } jsmn_parser; /** @@ -321,7 +323,6 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { char c; - jsmntype_t type; c = js[parser->pos]; switch (c) { @@ -339,7 +340,7 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, if (token == NULL) { return JSMN_ERROR_NOMEM; } - if (parser->toksuper != -1) { + if (parser->toksuper != parser->tokbefore) { tokens[parser->toksuper].size++; #ifdef JSMN_PARENT_LINKS token->parent = parser->toksuper; @@ -358,9 +359,6 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, case '}': if (tokens == NULL) { parser->depth--; - if (parser->depth == 0) { - return count; - } break; } else if ((parser->state & 0x14) != 0x14) { return JSMN_ERROR_INVAL; @@ -369,7 +367,7 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, #ifdef JSMN_PARENT_LINKS parser->toksuper = tokens[parser->toksuper].parent; #else - for (i = parser->toksuper - 1; i >= 0; i--) { + for (i = parser->toksuper - 1; i != parser->tokbefore; i--) { if (tokens[i].end == 0) { break; } @@ -381,9 +379,6 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, case ']': if (tokens == NULL) { parser->depth--; - if (parser->depth == 0) { - return count; - } break; } else if ((parser->state & 0x24) != 0x24) { return JSMN_ERROR_INVAL; @@ -394,10 +389,10 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, #ifdef JSMN_PARENT_LINKS parser->toksuper = token->parent; #else - if (parser->toksuper - 1 == -1 || token[-1].size > 0) { + if (parser->toksuper - 1 == parser->tokbefore || token[-1].size > 0) { parser->toksuper--; } else { - for (i = parser->toksuper - 2; i >= 0; i--) { + for (i = parser->toksuper - 2; i != parser->tokbefore; i--) { if (tokens[i].end == 0) { break; } @@ -405,8 +400,10 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, parser->toksuper = i; } #endif - if (parser->toksuper == -1) { - return count; + if (parser->toksuper == parser->tokbefore) { + parser->tokbefore = parser->toknext - 1; + parser->toksuper = parser->toknext - 1; + parser->state = JSMN_STATE_ROOT; } else { parser->state = (tokens[parser->toksuper].type == JSMN_ARRAY) ? JSMN_STATE_ARRAY_COMMA : JSMN_STATE_OBJ_COMMA; @@ -419,14 +416,13 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, return r; } if (tokens == NULL) { - if (parser->depth == 0) { - return count; - } else { - break; - } + break; } - if (parser->toksuper == -1) { - return count; + if (parser->toksuper == parser->tokbefore) { + parser->tokbefore = parser->toknext - 1; + parser->toksuper = parser->toknext - 1; + parser->state = JSMN_STATE_ROOT; + break; } else if (parser->state & 0x1) { return JSMN_ERROR_INVAL; } @@ -496,14 +492,13 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, return r; } if (tokens == NULL) { - if (parser->depth == 0) { - return count; - } else { - break; - } + break; } - if (parser->toksuper == -1) { - return count; + if (parser->toksuper == parser->tokbefore) { + parser->tokbefore = parser->toknext - 1; + parser->toksuper = parser->toknext - 1; + parser->state = JSMN_STATE_ROOT; + break; #ifdef JSMN_STRICT } else if (parser->state & 0x3) { #else @@ -533,7 +528,11 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, } } - return JSMN_ERROR_PART; + if (parser->depth == 0 && parser->state == JSMN_STATE_ROOT) { + return count; + } else { + return JSMN_ERROR_PART; + } } /** @@ -546,6 +545,7 @@ JSMN_API void jsmn_init(jsmn_parser *parser) { parser->toksuper = -1; parser->state = JSMN_STATE_ROOT; parser->depth = 0; + parser->tokbefore = -1; } #endif /* JSMN_HEADER */ diff --git a/test/tests.c b/test/tests.c index cddd076c..333a2847 100644 --- a/test/tests.c +++ b/test/tests.c @@ -306,15 +306,15 @@ int test_nonstrict(void) { int test_unmatched_brackets(void) { const char *js; - /* js = "\"key 1\": 1234}"; - check(parse(js, JSMN_ERROR_INVAL, 2)); */ + js = "\"key 1\": 1234}"; + check(parse(js, JSMN_ERROR_INVAL, 2)); js = "{\"key 1\": 1234"; check(parse(js, JSMN_ERROR_PART, 3)); - /* + js = "{\"key 1\": 1234}}"; check(parse(js, JSMN_ERROR_INVAL, 3)); js = "\"key 1\"}: 1234"; - check(parse(js, JSMN_ERROR_INVAL, 3));*/ + check(parse(js, JSMN_ERROR_INVAL, 3)); js = "{\"key {1\": 1234}"; check(parse(js, 3, 3, JSMN_OBJECT, 0, 16, 1, JSMN_STRING, "key {1", 1, JSMN_PRIMITIVE, "1234")); @@ -355,7 +355,7 @@ int main(void) { test(test_unquoted_keys, "test unquoted keys (like in JavaScript)"); test(test_input_length, "test strings that are not null-terminated"); test(test_issue_22, "test issue #22"); - /* test(test_issue_27, "test issue #27"); */ + test(test_issue_27, "test issue #27"); test(test_count, "test tokens count estimation"); /* test(test_nonstrict, "test for non-strict mode"); */ test(test_unmatched_brackets, "test for unmatched brackets"); From 8eea1aac94542892cf73c08ed21864fcdf4bc177 Mon Sep 17 00:00:00 2001 From: "Dominick C. Pastore" Date: Tue, 26 May 2020 14:23:54 -0400 Subject: [PATCH 08/17] Enforce RFC 8259 on primitives in strict mode Ensure primitives are "true", "false", "null", or an RFC 8259 compliant number. (Still need to add test cases.) --- jsmn.h | 175 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 173 insertions(+), 2 deletions(-) diff --git a/jsmn.h b/jsmn.h index 09a15218..38897271 100644 --- a/jsmn.h +++ b/jsmn.h @@ -159,28 +159,198 @@ static void jsmn_fill_token(jsmntok_t *token, const jsmntype_t type, token->end = end; } +#ifdef JSMN_STRICT +static char true_str[] = "true"; +static char false_str[] = "false"; +static char null_str[] = "null"; + +typedef enum { + JSMN_NUM_INT_START, + JSMN_NUM_INT_HAVE_SIGN, + JSMN_NUM_INT_ZERO, + JSMN_NUM_INT, + JSMN_NUM_FRAC_START, + JSMN_NUM_FRAC, + JSMN_NUM_EXP_START, + JSMN_NUM_EXP_HAVE_SIGN, + JSMN_NUM_EXP +} jsmn_numberstate; +#endif + /** * If extend is false, fills next available token with JSON primitive. If * extend is true, complete the primitive that the parser is in the middle of. + * If extend is false, it is assumed that the current char is a valid start of + * a primitive. */ static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, const size_t len, jsmntok_t *tokens, const size_t num_tokens, int extend) { jsmntok_t *token; int start; - +#ifdef JSMN_STRICT + int i; + if (extend) { + parser->pos = tokens[parser->toknext - 1].start; + } +#endif start = parser->pos; +#ifdef JSMN_STRICT + if (js[parser->pos] == 't') { + for (i = 1; + i < strlen(true_str); + i++) { + if (parser->pos + i >= len || js[parser->pos + i] == '\0') { + return JSMN_ERROR_PART; + } else if (js[parser->pos + i] != true_str[i]) { + return JSMN_ERROR_INVAL; + } + } + parser->pos += 4; + } else if (js[parser->pos] == 'f') { + for (i = 1; + i < strlen(false_str); + i++) { + if (parser->pos + i >= len || js[parser->pos + i] == '\0') { + return JSMN_ERROR_PART; + } else if (js[parser->pos + i] != false_str[i]) { + return JSMN_ERROR_INVAL; + } + } + parser->pos += 5; + } else if (js[parser->pos] == 'n') { + for (i = 1; + i < strlen(null_str); + i++) { + if (parser->pos + i >= len || js[parser->pos + i] == '\0') { + return JSMN_ERROR_PART; + } else if (js[parser->pos + i] != null_str[i]) { + return JSMN_ERROR_INVAL; + } + } + parser->pos += 4; + } else { + jsmn_numberstate numstate = JSMN_NUMBER_MINUS; + for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { + switch (js[parser->pos]) { + case '-': + if (numstate == JSMN_NUM_INT_START || + numstate == JSMN_NUM_EXP_START) { + numstate++; + } else { + return JSMN_ERROR_INVAL; + } + break; + case '0': + if (numstate == JSMN_NUM_INT_START || + numstate == JSMN_NUM_INT_HAVE_SIGN) { + numstate = JSMN_NUM_INT_ZERO; + } else if (numstate == JSMN_NUM_INT) { + numstate = JSMN_NUM_INT; + } else if (numstate == JSMN_NUM_FRAC || + numstate == JSMN_NUM_FRAC_START) { + numstate = JSMN_NUM_FRAC; + } else if (numstate == JSMN_NUM_EXP || + numstate == JSMN_NUM_EXP_START || + numstate == JSMN_NUM_EXP_HAVE_SIGN) { + numstate = JSMN_NUM_EXP; + } else { + return JSMN_ERROR_INVAL; + } + break; + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (numstate == JSMN_NUM_INT || + numstate == JSMN_NUM_INT_START || + numstate == JSMN_NUM_INT_HAVE_SIGN) { + numstate = JSMN_NUM_INT; + } else if (numstate == JSMN_NUM_FRAC || + numstate == JSMN_NUM_FRAC_START) { + numstate = JSMN_NUM_FRAC; + } else if (numstate == JSMN_NUM_EXP || + numstate == JSMN_NUM_EXP_START || + numstate == JSMN_NUM_EXP_HAVE_SIGN) { + numstate = JSMN_NUM_EXP; + } else { + return JSMN_ERROR_INVAL; + } + break; + case '+': + if (numstate == JSMN_NUM_EXP_START) { + numstate++; + } else { + return JSMN_ERROR_INVAL; + } + break; + case 'e': + case 'E': + if (numstate == JSMN_NUM_INT_ZERO || + numstate == JSMN_NUM_INT || + numstate == JSMN_NUM_FRAC) { + numstate = JSMN_NUM_EXP_START; + } else { + return JSMN_ERROR_INVAL; + } + break; + case '.': + if (numstate == JSMN_NUM_INT_ZERO || + numstate == JSMN_NUM_INT) { + numstate = JSMN_NUM_FRAC_START; + } else { + return JSMN_ERROR_INVAL; + } + break; + default: + goto check_number_complete; + } + } +check_number_complete: + if (numstate != JSMN_NUM_INT && + numstate != JSMN_NUM_INT_ZERO && + numstate != JSMN_NUM_FRAC && + numstate != JSMN_NUM_EXP) { + return JSMN_ERROR_INVAL; + } + } + + /* Verify that what comes after the primitive is a non-primitive character */ + switch (js[parser->pos]) { + case '\t': + case '\r': + case '\n': + case ' ': + case ',': + case ':': + case '"': + case '[': + case ']': + case '{': + case '}': + break; + default: + return JSMN_ERROR_INVAL; + } +#else for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { switch (js[parser->pos]) { - case ':': case '\t': case '\r': case '\n': case ' ': case ',': + case ':': case '"': + case '[': case ']': + case '{': case '}': goto found; default: /* to quiet a warning from gcc*/ @@ -191,6 +361,7 @@ static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, return JSMN_ERROR_INVAL; } } +#endif found: if (tokens == NULL) { From 30f2b7c670d8c08921dbaa512cabdc2180696492 Mon Sep 17 00:00:00 2001 From: "Dominick C. Pastore" Date: Tue, 26 May 2020 14:27:22 -0400 Subject: [PATCH 09/17] Nesting depth didn't need to be part of jsmn_parser --- jsmn.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/jsmn.h b/jsmn.h index 38897271..a6772ca6 100644 --- a/jsmn.h +++ b/jsmn.h @@ -111,8 +111,6 @@ typedef struct jsmn_parser { unsigned int toknext; /* next token to allocate */ int toksuper; /* superior token node, e.g. parent object or array */ int state; /* parser state, from jsmnstate_t */ - unsigned int depth; /* nesting depth of arrays and objects (used only when - * tokens is NULL) */ int tokbefore; /* token immediately preceding the first token in the current JSON object */ } jsmn_parser; @@ -485,6 +483,7 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, int i; #endif jsmntok_t *token; + int depth = 0; /* Obj/array nesting depth. Used only when tokens == NULL */ int count = parser->toknext; r = jsmn_finish_primitive(parser, js, len, tokens, num_tokens); @@ -501,7 +500,7 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, case '[': count++; if (tokens == NULL) { - parser->depth++; + depth++; break; } if (parser->state & 0x3) { @@ -529,7 +528,7 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, break; case '}': if (tokens == NULL) { - parser->depth--; + depth--; break; } else if ((parser->state & 0x14) != 0x14) { return JSMN_ERROR_INVAL; @@ -549,7 +548,7 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, goto container_close; case ']': if (tokens == NULL) { - parser->depth--; + depth--; break; } else if ((parser->state & 0x24) != 0x24) { return JSMN_ERROR_INVAL; @@ -699,7 +698,7 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, } } - if (parser->depth == 0 && parser->state == JSMN_STATE_ROOT) { + if (depth == 0 && parser->state == JSMN_STATE_ROOT) { return count; } else { return JSMN_ERROR_PART; @@ -715,7 +714,6 @@ JSMN_API void jsmn_init(jsmn_parser *parser) { parser->toknext = 0; parser->toksuper = -1; parser->state = JSMN_STATE_ROOT; - parser->depth = 0; parser->tokbefore = -1; } From 7ed82a3ee5e64add5375bfa60a498abe494cf969 Mon Sep 17 00:00:00 2001 From: "Dominick C. Pastore" Date: Tue, 26 May 2020 14:53:16 -0400 Subject: [PATCH 10/17] Bugfixes for strict primitive parsing --- jsmn.h | 53 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/jsmn.h b/jsmn.h index a6772ca6..0a7f027c 100644 --- a/jsmn.h +++ b/jsmn.h @@ -229,7 +229,7 @@ static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, } parser->pos += 4; } else { - jsmn_numberstate numstate = JSMN_NUMBER_MINUS; + jsmn_numberstate numstate = JSMN_NUM_INT_START; for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { switch (js[parser->pos]) { case '-': @@ -237,6 +237,7 @@ static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, numstate == JSMN_NUM_EXP_START) { numstate++; } else { + parser->pos = start; return JSMN_ERROR_INVAL; } break; @@ -254,6 +255,7 @@ static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, numstate == JSMN_NUM_EXP_HAVE_SIGN) { numstate = JSMN_NUM_EXP; } else { + parser->pos = start; return JSMN_ERROR_INVAL; } break; @@ -278,6 +280,7 @@ static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, numstate == JSMN_NUM_EXP_HAVE_SIGN) { numstate = JSMN_NUM_EXP; } else { + parser->pos = start; return JSMN_ERROR_INVAL; } break; @@ -285,6 +288,7 @@ static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, if (numstate == JSMN_NUM_EXP_START) { numstate++; } else { + parser->pos = start; return JSMN_ERROR_INVAL; } break; @@ -295,6 +299,7 @@ static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, numstate == JSMN_NUM_FRAC) { numstate = JSMN_NUM_EXP_START; } else { + parser->pos = start; return JSMN_ERROR_INVAL; } break; @@ -303,6 +308,7 @@ static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, numstate == JSMN_NUM_INT) { numstate = JSMN_NUM_FRAC_START; } else { + parser->pos = start; return JSMN_ERROR_INVAL; } break; @@ -315,26 +321,36 @@ static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, numstate != JSMN_NUM_INT_ZERO && numstate != JSMN_NUM_FRAC && numstate != JSMN_NUM_EXP) { - return JSMN_ERROR_INVAL; + if (parser->pos >= len || js[parser->pos] == '\0') { + parser->pos = start; + return JSMN_ERROR_PART; + } else { + parser->pos = start; + return JSMN_ERROR_INVAL; + } } } /* Verify that what comes after the primitive is a non-primitive character */ - switch (js[parser->pos]) { - case '\t': - case '\r': - case '\n': - case ' ': - case ',': - case ':': - case '"': - case '[': - case ']': - case '{': - case '}': - break; - default: - return JSMN_ERROR_INVAL; + if (parser->pos < len) { + switch (js[parser->pos]) { + case '\t': + case '\r': + case '\n': + case ' ': + case ',': + case ':': + case '"': + case '[': + case ']': + case '{': + case '}': + case '\0': + break; + default: + parser->pos = start; + return JSMN_ERROR_INVAL; + } } #else for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { @@ -387,12 +403,11 @@ static int jsmn_parse_string(jsmn_parser *parser, const char *js, const size_t len, jsmntok_t *tokens, const size_t num_tokens) { jsmntok_t *token; - unsigned int start = parser->pos; + /* Skip starting quote */ parser->pos++; - /* Skip starting quote */ for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { char c = js[parser->pos]; From a3168f0cab0a6375861d478aa9f3dc35938e6ae6 Mon Sep 17 00:00:00 2001 From: "Dominick C. Pastore" Date: Tue, 26 May 2020 15:49:13 -0400 Subject: [PATCH 11/17] Make strict string parsing conform to RFC 8259 String parsing previously did not differ between strict and non-strict modes, but was not fully compliant with RFC 8259. RFC 8259 requires that control characters (code points < 0x20) be escaped. This is now enforced in strict mode. In addition, non-strict mode now does *no* validations on string contents, much like primitives in non-strict mode. --- jsmn.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/jsmn.h b/jsmn.h index 0a7f027c..2756fb40 100644 --- a/jsmn.h +++ b/jsmn.h @@ -429,6 +429,7 @@ static int jsmn_parse_string(jsmn_parser *parser, const char *js, if (c == '\\' && parser->pos + 1 < len) { int i; parser->pos++; +#ifndef JSMN_STRICT switch (js[parser->pos]) { /* Allowed escaped symbols */ case '\"': @@ -461,7 +462,16 @@ static int jsmn_parse_string(jsmn_parser *parser, const char *js, parser->pos = start; return JSMN_ERROR_INVAL; } +#endif } + +#ifdef JSMN_STRICT + /* No control characters allowed in strict mode */ + if (c >= 0 && c < 32) { + parser->pos = start; + return JSMN_ERROR_INVAL; + } +#endif } parser->pos = start; return JSMN_ERROR_PART; From f04273eafba8eea1adee43262f4a4bc0c51b40c7 Mon Sep 17 00:00:00 2001 From: "Dominick C. Pastore" Date: Tue, 26 May 2020 21:32:59 -0400 Subject: [PATCH 12/17] Add test cases for the new functionality --- jsmn.h | 4 +- test/tests.c | 163 +++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 152 insertions(+), 15 deletions(-) diff --git a/jsmn.h b/jsmn.h index 2756fb40..617712d3 100644 --- a/jsmn.h +++ b/jsmn.h @@ -142,7 +142,7 @@ static jsmntok_t *jsmn_alloc_token(jsmn_parser *parser, jsmntok_t *tokens, tok->end = 0; tok->size = 0; #ifdef JSMN_PARENT_LINKS - tok->parent = -1; + tok->parent = parser->tokbefore; #endif return tok; } @@ -429,7 +429,7 @@ static int jsmn_parse_string(jsmn_parser *parser, const char *js, if (c == '\\' && parser->pos + 1 < len) { int i; parser->pos++; -#ifndef JSMN_STRICT +#ifdef JSMN_STRICT switch (js[parser->pos]) { /* Allowed escaped symbols */ case '\"': diff --git a/test/tests.c b/test/tests.c index 333a2847..7af17777 100644 --- a/test/tests.c +++ b/test/tests.c @@ -73,8 +73,54 @@ int test_primitive(void) { JSMN_STRING, "nullVar", 1, JSMN_PRIMITIVE, "null")); check(parse("{\"intVar\" : 12}", 3, 3, JSMN_OBJECT, -1, -1, 1, JSMN_STRING, "intVar", 1, JSMN_PRIMITIVE, "12")); + check(parse("{\"intVar\" : 0}", 3, 3, JSMN_OBJECT, -1, -1, 1, JSMN_STRING, + "intVar", 1, JSMN_PRIMITIVE, "0")); check(parse("{\"floatVar\" : 12.345}", 3, 3, JSMN_OBJECT, -1, -1, 1, JSMN_STRING, "floatVar", 1, JSMN_PRIMITIVE, "12.345")); + check(parse("{\"floatVar\" : -12.345}", 3, 3, JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "floatVar", 1, JSMN_PRIMITIVE, "-12.345")); + check(parse("{\"floatVar\" : 0.345}", 3, 3, JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "floatVar", 1, JSMN_PRIMITIVE, "0.345")); + check(parse("{\"floatVar\" : 12.345e6}", 3, 3, JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "floatVar", 1, JSMN_PRIMITIVE, "12.345e6")); + check(parse("{\"floatVar\" : 12.345E+6}", 3, 3, JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "floatVar", 1, JSMN_PRIMITIVE, "12.345E+6")); + check(parse("{\"floatVar\" : 12e+6}", 3, 3, JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "floatVar", 1, JSMN_PRIMITIVE, "12e+6")); + +#ifdef JSMN_STRICT + check(parse("{\"boolVar\" : tru }", JSMN_ERROR_INVAL, 3)); + check(parse("{\"boolVar\" : falsee }", JSMN_ERROR_INVAL, 3)); + check(parse("{\"nullVar\" : nulm }", JSMN_ERROR_INVAL, 3)); + check(parse("{\"intVar\" : 01}", JSMN_ERROR_INVAL, 3)); + check(parse("{\"floatVar\" : .345}", JSMN_ERROR_INVAL, 3)); + check(parse("{\"floatVar\" : -}", JSMN_ERROR_INVAL, 3)); + check(parse("{\"floatVar\" : 12.}", JSMN_ERROR_INVAL, 3)); + check(parse("{\"floatVar\" : 12.}", JSMN_ERROR_INVAL, 3)); + check(parse("{\"floatVar\" : +12}", JSMN_ERROR_INVAL, 3)); + check(parse("{\"floatVar\" : 12.345e-}", JSMN_ERROR_INVAL, 3)); +#else + check(parse("{\"boolVar\" : tru }", 3, 3, JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "boolVar", 1, JSMN_PRIMITIVE, "tru")); + check(parse("{\"boolVar\" : falsee }", 3, 3, JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "boolVar", 1, JSMN_PRIMITIVE, "falsee")); + check(parse("{\"nullVar\" : nulm }", 3, 3, JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "nullVar", 1, JSMN_PRIMITIVE, "nulm")); + check(parse("{\"intVar\" : 01}", 3, 3, JSMN_OBJECT, -1, -1, 1, JSMN_STRING, + "intVar", 1, JSMN_PRIMITIVE, "01")); + check(parse("{\"floatVar\" : .345}", 3, 3, JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "floatVar", 1, JSMN_PRIMITIVE, ".345")); + check(parse("{\"floatVar\" : -}", 3, 3, JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "floatVar", 1, JSMN_PRIMITIVE, "-")); + check(parse("{\"floatVar\" : 12.}", 3, 3, JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "floatVar", 1, JSMN_PRIMITIVE, "12.")); + check(parse("{\"floatVar\" : 12.}", 3, 3, JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "floatVar", 1, JSMN_PRIMITIVE, "12.")); + check(parse("{\"floatVar\" : +12}", 3, 3, JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "floatVar", 1, JSMN_PRIMITIVE, "+12")); + check(parse("{\"floatVar\" : 12.345e-}", 3, 3, JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "floatVar", 1, JSMN_PRIMITIVE, "12.345e-")); +#endif return 0; } @@ -95,10 +141,26 @@ int test_string(void) { check(parse("{\"a\":[\"\\u0280\"]}", 4, 4, JSMN_OBJECT, -1, -1, 1, JSMN_STRING, "a", 1, JSMN_ARRAY, -1, -1, 1, JSMN_STRING, "\\u0280", 0)); + /* \xc2\xa9 is the copyright symbol in UTF-8 */ + check(parse("{\"a\":\"str\xc2\xa9\"}", 3, 3, JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "a", 1, JSMN_STRING, "str\xc2\xa9", 0)); +#ifdef JSMN_STRICT + check(parse("{\"a\":\"str\nstr\"}", JSMN_ERROR_INVAL, 3)); check(parse("{\"a\":\"str\\uFFGFstr\"}", JSMN_ERROR_INVAL, 3)); check(parse("{\"a\":\"str\\u@FfF\"}", JSMN_ERROR_INVAL, 3)); - check(parse("{{\"a\":[\"\\u028\"]}", JSMN_ERROR_INVAL, 4)); + check(parse("{\"a\":[\"\\u028\"]}", JSMN_ERROR_INVAL, 4)); +#else + check(parse("{\"a\":\"str\nstr\"}", 3, 3, JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "a", 1, JSMN_STRING, "str\nstr", 0)); + check(parse("{\"a\":\"str\\uFFGFstr\"}", 3, 3, JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "a", 1, JSMN_STRING, "str\\uFFGFstr", 0)); + check(parse("{\"a\":\"str\\u@FfF\"}", 3, 3, JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "a", 1, JSMN_STRING, "str\\u@FfF", 0)); + check(parse("{\"a\":[\"\\u028\"]}", 4, 4, JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "a", 1, JSMN_ARRAY, -1, -1, 1, + JSMN_STRING, "\\u028", 0)); +#endif return 0; } @@ -215,13 +277,6 @@ int test_issue_22(void) { return 0; } -int test_issue_27(void) { - const char *js = - "{ \"name\" : \"Jack\", \"age\" : 27 } { \"name\" : \"Anna\", "; - check(parse(js, JSMN_ERROR_PART, 8)); - return 0; -} - int test_input_length(void) { const char *js; int r; @@ -285,9 +340,64 @@ int test_count(void) { return 0; } -int test_nonstrict(void) { -#ifndef JSMN_STRICT +int test_unenclosed(void) { const char *js; + js = "1234"; + check(parse(js, 1, 1, JSMN_PRIMITIVE, "1234")); + + js = "false"; + check(parse(js, 1, 1, JSMN_PRIMITIVE, "false")); + + js = "0garbage"; +#ifdef JSMN_STRICT + check(parse(js, JSMN_ERROR_INVAL, 1)); +#else + check(parse(js, 1, 1, JSMN_PRIMITIVE, "0garbage")); +#endif + + js = "\"0garbage\""; + check(parse(js, 1, 1, JSMN_STRING, "0garbage", 0)); + + js = "\"0garbage"; + check(parse(js, JSMN_ERROR_PART, 1)); + + js = "1234\"0garbage\""; + check(parse(js, 2, 2, JSMN_PRIMITIVE, "1234", JSMN_STRING, "0garbage", 0)); + + js = "\"0garbage\"1234"; + check(parse(js, 2, 2, JSMN_STRING, "0garbage", 0, JSMN_PRIMITIVE, "1234")); + + js = " 1234"; + check(parse(js, 1, 1, JSMN_PRIMITIVE, "1234")); + + js = "1234 "; + check(parse(js, 1, 1, JSMN_PRIMITIVE, "1234")); + + js = " 1234 "; + check(parse(js, 1, 1, JSMN_PRIMITIVE, "1234")); + +#ifdef JSMN_STRICT + js = "fal"; + check(parse(js, JSMN_ERROR_PART, 1)); + + js = "fal "; + check(parse(js, JSMN_ERROR_INVAL, 1)); +#else + js = "fal"; + check(parse(js, 1, 1, JSMN_PRIMITIVE, "fal")); + + js = "fal "; + check(parse(js, 1, 1, JSMN_PRIMITIVE, "fal ")); +#endif + + js = "\"a\": 0"; + check(parse(js, JSMN_ERROR_INVAL, 2)); + + js = "\"a\", 0"; + check(parse(js, JSMN_ERROR_INVAL, 2)); + + /* XXX No longer valid after RFC 8259 fixes +#ifndef JSMN_STRICT js = "a: 0garbage"; check(parse(js, 2, 2, JSMN_PRIMITIVE, "a", JSMN_PRIMITIVE, "0garbage")); @@ -295,12 +405,14 @@ int test_nonstrict(void) { check(parse(js, 6, 6, JSMN_PRIMITIVE, "Day", JSMN_PRIMITIVE, "26", JSMN_PRIMITIVE, "Month", JSMN_PRIMITIVE, "Sep", JSMN_PRIMITIVE, "Year", JSMN_PRIMITIVE, "12")); + */ /* nested {s don't cause a parse error. */ + /* XXX No longer valid after RFC 8259 fixes js = "\"key {1\": 1234"; check(parse(js, 2, 2, JSMN_STRING, "key {1", 1, JSMN_PRIMITIVE, "1234")); - #endif + */ return 0; } @@ -342,6 +454,31 @@ int test_object_key(void) { return 0; } +int test_multiple_objects(void) { + const char *js; + js = "true {\"def\": 123}"; + check(parse(js, 4, 4, JSMN_PRIMITIVE, "true", JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "def", 1, JSMN_PRIMITIVE, "123")); + + js = "{\"def\": 123} true"; + check(parse(js, 4, 4, JSMN_OBJECT, -1, -1, 1, JSMN_STRING, "def", 1, + JSMN_PRIMITIVE, "123", JSMN_PRIMITIVE, "true")); + + js = "true{\"def\": 123}"; + check(parse(js, 4, 4, JSMN_PRIMITIVE, "true", JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "def", 1, JSMN_PRIMITIVE, "123")); + + js = "[true, \"abc\"]{\"def\": 123}"; + check(parse(js, 6, 6, JSMN_ARRAY, -1, -1, 2, JSMN_PRIMITIVE, "true", + JSMN_STRING, "abc", 0, JSMN_OBJECT, -1, -1, 1, + JSMN_STRING, "def", 1, JSMN_PRIMITIVE, "123")); + + /* Issue #27 */ + js = "{ \"name\" : \"Jack\", \"age\" : 27 } { \"name\" : \"Anna\", "; + check(parse(js, JSMN_ERROR_PART, 8)); + return 0; +} + int main(void) { test(test_empty, "test for a empty JSON objects/arrays"); test(test_object, "test for a JSON objects"); @@ -355,11 +492,11 @@ int main(void) { test(test_unquoted_keys, "test unquoted keys (like in JavaScript)"); test(test_input_length, "test strings that are not null-terminated"); test(test_issue_22, "test issue #22"); - test(test_issue_27, "test issue #27"); test(test_count, "test tokens count estimation"); - /* test(test_nonstrict, "test for non-strict mode"); */ + /* test(test_unenclosed, "test for non-strict mode"); */ test(test_unmatched_brackets, "test for unmatched brackets"); test(test_object_key, "test for key type"); + test(test_multiple_objects, "test parsing multiple items at once"); printf("\nPASSED: %d\nFAILED: %d\n", test_passed, test_failed); return (test_failed > 0); } From 140def95e458ae14709b3a1dde72f07625d7257c Mon Sep 17 00:00:00 2001 From: "Dominick C. Pastore" Date: Wed, 27 May 2020 15:26:36 -0400 Subject: [PATCH 13/17] Uncomment test that was mistakenly commented --- test/tests.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/tests.c b/test/tests.c index 7af17777..4ce90af7 100644 --- a/test/tests.c +++ b/test/tests.c @@ -387,7 +387,7 @@ int test_unenclosed(void) { check(parse(js, 1, 1, JSMN_PRIMITIVE, "fal")); js = "fal "; - check(parse(js, 1, 1, JSMN_PRIMITIVE, "fal ")); + check(parse(js, 1, 1, JSMN_PRIMITIVE, "fal")); #endif js = "\"a\": 0"; @@ -493,7 +493,7 @@ int main(void) { test(test_input_length, "test strings that are not null-terminated"); test(test_issue_22, "test issue #22"); test(test_count, "test tokens count estimation"); - /* test(test_unenclosed, "test for non-strict mode"); */ + test(test_unenclosed, "test for non-strict mode"); test(test_unmatched_brackets, "test for unmatched brackets"); test(test_object_key, "test for key type"); test(test_multiple_objects, "test parsing multiple items at once"); From 9690485809931d0e9fc0daa2103bcb1f7c16abaa Mon Sep 17 00:00:00 2001 From: "Dominick C. Pastore" Date: Thu, 28 May 2020 02:03:12 -0400 Subject: [PATCH 14/17] Conform to C89 and small optimization --- jsmn.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/jsmn.h b/jsmn.h index 617712d3..49077cee 100644 --- a/jsmn.h +++ b/jsmn.h @@ -99,7 +99,7 @@ typedef enum { JSMN_STATE_OBJ_COMMA = 0x15, /* Expecting object comma or } */ JSMN_STATE_ARRAY_NEW = 0x24, /* Expecting array item or ] */ JSMN_STATE_ARRAY_ITEM = 0x20, /* Expecting array item */ - JSMN_STATE_ARRAY_COMMA = 0x25, /* Expecting array comma or ] */ + JSMN_STATE_ARRAY_COMMA = 0x25 /* Expecting array comma or ] */ } jsmnstate_t; /** @@ -158,9 +158,9 @@ static void jsmn_fill_token(jsmntok_t *token, const jsmntype_t type, } #ifdef JSMN_STRICT -static char true_str[] = "true"; -static char false_str[] = "false"; -static char null_str[] = "null"; +static const char true_str[] = "true"; +static const char false_str[] = "false"; +static const char null_str[] = "null"; typedef enum { JSMN_NUM_INT_START, From 44ee942e0c4fa50be504dc9948c072881db406f9 Mon Sep 17 00:00:00 2001 From: "Dominick C. Pastore" Date: Mon, 1 Jun 2020 13:47:49 -0400 Subject: [PATCH 15/17] Name the bits in jsmnstate_t --- jsmn.h | 77 ++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 32 deletions(-) diff --git a/jsmn.h b/jsmn.h index 49077cee..e1b07973 100644 --- a/jsmn.h +++ b/jsmn.h @@ -79,27 +79,38 @@ typedef struct jsmntok { /** * JSON parser state. Describes what kind of token or character is expected - * next. In addition: - * (state & 0x1) == 0 if a string is currently allowed (or primitive in - * non-strict mode) - * (state & 0x3) == 0 if an object or array is currently allowed - * (state & 0x3) == 1 if a comma is currently allowed - * (state & 0x30) != 0 if currently in a container - * (state & 0x10) != 0 if currently in an object - * (state & 0x20) != 0 if currently in an array - * (state & 0x14) == 0x14 if an array can be closed - * (state & 0x24) == 0x24 if an object can be closed + * next. */ typedef enum { - JSMN_STATE_ROOT = 0x0, /* At root */ - JSMN_STATE_OBJ_NEW = 0x16, /* Expecting object key or } */ - JSMN_STATE_OBJ_KEY = 0x12, /* Expecting object key */ - JSMN_STATE_OBJ_COLON = 0x13, /* Expecting object colon */ - JSMN_STATE_OBJ_VAL = 0x10, /* Expecting object value */ - JSMN_STATE_OBJ_COMMA = 0x15, /* Expecting object comma or } */ - JSMN_STATE_ARRAY_NEW = 0x24, /* Expecting array item or ] */ - JSMN_STATE_ARRAY_ITEM = 0x20, /* Expecting array item */ - JSMN_STATE_ARRAY_COMMA = 0x25 /* Expecting array comma or ] */ + JSMN_DELIMITER = 0x1, /* Expecting colon if JSMN_KEY, comma otherwise */ + JSMN_CAN_CLOSE = 0x2, /* Object or array can close here */ + + JSMN_KEY = 0x10, /* Expecting an object key or the delimiter following */ + JSMN_VALUE = 0x20, /* Expecting an object value or the delimiter following */ + JSMN_IN_ARRAY = 0x40, /* Expecting array item or the delimiter following */ + /* In an object if (state & JSMN_IN_OBJECT). Otherwise, in an array or at + * the root. */ + JSMN_IN_OBJECT = JSMN_KEY | JSMN_VALUE, + + /* Actual values for parser->state */ + /* Root: not in any array or object */ + JSMN_STATE_ROOT = 0x0, + /* Just saw object opening ({). Expecting key or close. */ + JSMN_STATE_OBJ_NEW = (JSMN_KEY | JSMN_CAN_CLOSE), + /* Just saw a comma in an object. Expecting key. */ + JSMN_STATE_OBJ_KEY = (JSMN_KEY), + /* Just saw a key in an object. Expecting colon or maybe close/comma. */ + JSMN_STATE_OBJ_COLON = (JSMN_KEY | JSMN_DELIMITER), + /* Just saw a colon in an object. Expecting value. */ + JSMN_STATE_OBJ_VAL = (JSMN_VALUE), + /* Just saw a value in an object. Expecting comma or close. */ + JSMN_STATE_OBJ_COMMA = (JSMN_VALUE | JSMN_DELIMITER | JSMN_CAN_CLOSE), + /* Just saw an array opening ([). Expecting item or close. */ + JSMN_STATE_ARRAY_NEW = (JSMN_IN_ARRAY | JSMN_CAN_CLOSE), + /* Just saw a comma in an array. Expecting item. */ + JSMN_STATE_ARRAY_ITEM = (JSMN_IN_ARRAY), + /* Just saw an item in an array. Expecting comma or close. */ + JSMN_STATE_ARRAY_COMMA = (JSMN_DELIMITER | JSMN_IN_ARRAY | JSMN_CAN_CLOSE) } jsmnstate_t; /** @@ -110,7 +121,7 @@ typedef struct jsmn_parser { unsigned int pos; /* offset in the JSON string */ unsigned int toknext; /* next token to allocate */ int toksuper; /* superior token node, e.g. parent object or array */ - int state; /* parser state, from jsmnstate_t */ + jsmnstate_t state; /* parser state, from jsmnstate_t */ int tokbefore; /* token immediately preceding the first token in the current JSON object */ } jsmn_parser; @@ -528,7 +539,7 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, depth++; break; } - if (parser->state & 0x3) { + if (parser->state & JSMN_KEY) { return JSMN_ERROR_INVAL; } token = jsmn_alloc_token(parser, tokens, num_tokens); @@ -555,10 +566,11 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, if (tokens == NULL) { depth--; break; - } else if ((parser->state & 0x14) != 0x14) { + } else if (!(parser->state & JSMN_IN_OBJECT) || + !(parser->state & JSMN_CAN_CLOSE)) { return JSMN_ERROR_INVAL; } - if (parser->state & 0x1) { + if (parser->state & JSMN_VALUE) { #ifdef JSMN_PARENT_LINKS parser->toksuper = tokens[parser->toksuper].parent; #else @@ -575,7 +587,8 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, if (tokens == NULL) { depth--; break; - } else if ((parser->state & 0x24) != 0x24) { + } else if (!(parser->state & JSMN_IN_ARRAY) || + !(parser->state & JSMN_CAN_CLOSE)) { return JSMN_ERROR_INVAL; } container_close: @@ -618,17 +631,17 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, parser->toksuper = parser->toknext - 1; parser->state = JSMN_STATE_ROOT; break; - } else if (parser->state & 0x1) { + } else if (parser->state & JSMN_DELIMITER) { return JSMN_ERROR_INVAL; } tokens[parser->toksuper].size++; #ifdef JSMN_PARENT_LINKS tokens[parser->toknext - 1].parent = parser->toksuper; #endif - if ((parser->state & 0x3) == 0x2) { + if (parser->state & JSMN_KEY) { parser->state = JSMN_STATE_OBJ_COLON; } else { - parser->state |= 0x5; + parser->state |= JSMN_DELIMITER | JSMN_CAN_CLOSE; } break; case '\t': @@ -695,9 +708,9 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, parser->state = JSMN_STATE_ROOT; break; #ifdef JSMN_STRICT - } else if (parser->state & 0x3) { + } else if (parser->state & (JSMN_DELIMITER | JSMN_KEY)) { #else - } else if (parser->state & 0x1) { + } else if (parser->state & JSMN_DELIMITER) { #endif return JSMN_ERROR_INVAL; } @@ -706,12 +719,12 @@ JSMN_API int jsmn_parse(jsmn_parser *parser, const char *js, const size_t len, tokens[parser->toknext - 1].parent = parser->toksuper; #endif #ifdef JSMN_STRICT - parser->state |= 0x5; + parser->state |= JSMN_DELIMITER | JSMN_CAN_CLOSE; #else - if ((parser->state & 0x3) == 0x2) { + if (parser->state & JSMN_KEY) { parser->state = JSMN_STATE_OBJ_COLON; } else { - parser->state |= 0x5; + parser->state |= JSMN_DELIMITER | JSMN_CAN_CLOSE; } #endif break; From 6bc659404f8c91a9f3c150f248302d406d470d23 Mon Sep 17 00:00:00 2001 From: "Dominick C. Pastore" Date: Thu, 4 Jun 2020 14:45:07 -0400 Subject: [PATCH 16/17] Remove use of strlen libc call --- jsmn.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/jsmn.h b/jsmn.h index e1b07973..b557fea5 100644 --- a/jsmn.h +++ b/jsmn.h @@ -208,7 +208,7 @@ static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, #ifdef JSMN_STRICT if (js[parser->pos] == 't') { for (i = 1; - i < strlen(true_str); + i < sizeof(true_str) - 1; i++) { if (parser->pos + i >= len || js[parser->pos + i] == '\0') { return JSMN_ERROR_PART; @@ -219,7 +219,7 @@ static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, parser->pos += 4; } else if (js[parser->pos] == 'f') { for (i = 1; - i < strlen(false_str); + i < sizeof(false_str) - 1; i++) { if (parser->pos + i >= len || js[parser->pos + i] == '\0') { return JSMN_ERROR_PART; @@ -230,7 +230,7 @@ static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, parser->pos += 5; } else if (js[parser->pos] == 'n') { for (i = 1; - i < strlen(null_str); + i < sizeof(null_str) - 1; i++) { if (parser->pos + i >= len || js[parser->pos + i] == '\0') { return JSMN_ERROR_PART; From 90a2a4cf6a485a2864a2b783fa2ce78b07c687c6 Mon Sep 17 00:00:00 2001 From: "Dominick C. Pastore" Date: Fri, 5 Jun 2020 10:10:11 -0400 Subject: [PATCH 17/17] Fix misleading comment --- jsmn.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jsmn.h b/jsmn.h index b557fea5..4e1ebbb7 100644 --- a/jsmn.h +++ b/jsmn.h @@ -99,7 +99,7 @@ typedef enum { JSMN_STATE_OBJ_NEW = (JSMN_KEY | JSMN_CAN_CLOSE), /* Just saw a comma in an object. Expecting key. */ JSMN_STATE_OBJ_KEY = (JSMN_KEY), - /* Just saw a key in an object. Expecting colon or maybe close/comma. */ + /* Just saw a key in an object. Expecting colon. */ JSMN_STATE_OBJ_COLON = (JSMN_KEY | JSMN_DELIMITER), /* Just saw a colon in an object. Expecting value. */ JSMN_STATE_OBJ_VAL = (JSMN_VALUE),