/** * @file NCDConfigTokenizer.c * @author Ambroz Bizjak * * @section LICENSE * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include static int is_name_char (char c) { return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'); } static int is_name_first_char (char c) { return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'); } static int is_space_char (char c) { return (c == ' ' || c == '\t' || c == '\n' || c == '\r'); } static int string_equals (char *str, int str_len, char *needle) { return (str_len == strlen(needle) && !memcmp(str, needle, str_len)); } void NCDConfigTokenizer_Tokenize (char *str, size_t left, NCDConfigTokenizer_output output, void *user) { size_t line = 1; size_t line_char = 1; while (left > 0) { size_t l; int error = 0; int token; void *token_val = NULL; size_t token_len = 0; if (*str == '#') { l = 1; while (l < left && str[l] != '\n') { l++; } token = 0; } else if (l = data_begins_with(str, left, "{")) { token = NCD_TOKEN_CURLY_OPEN; } else if (l = data_begins_with(str, left, "}")) { token = NCD_TOKEN_CURLY_CLOSE; } else if (l = data_begins_with(str, left, "(")) { token = NCD_TOKEN_ROUND_OPEN; } else if (l = data_begins_with(str, left, ")")) { token = NCD_TOKEN_ROUND_CLOSE; } else if (l = data_begins_with(str, left, ";")) { token = NCD_TOKEN_SEMICOLON; } else if (l = data_begins_with(str, left, ".")) { token = NCD_TOKEN_DOT; } else if (l = data_begins_with(str, left, ",")) { token = NCD_TOKEN_COMMA; } else if (l = data_begins_with(str, left, ":")) { token = NCD_TOKEN_COLON; } else if (l = data_begins_with(str, left, "[")) { token = NCD_TOKEN_BRACKET_OPEN; } else if (l = data_begins_with(str, left, "]")) { token = NCD_TOKEN_BRACKET_CLOSE; } else if (l = data_begins_with(str, left, "->")) { token = NCD_TOKEN_ARROW; } else if (l = data_begins_with(str, left, "If")) { token = NCD_TOKEN_IF; } else if (l = data_begins_with(str, left, "Elif")) { token = NCD_TOKEN_ELIF; } else if (l = data_begins_with(str, left, "elif")) { token = NCD_TOKEN_ELIF; } else if (l = data_begins_with(str, left, "Else")) { token = NCD_TOKEN_ELSE; } else if (l = data_begins_with(str, left, "else")) { token = NCD_TOKEN_ELSE; } else if (l = data_begins_with(str, left, "Foreach")) { token = NCD_TOKEN_FOREACH; } else if (l = data_begins_with(str, left, "As")) { token = NCD_TOKEN_AS; } else if (l = data_begins_with(str, left, "include_guard")) { token = NCD_TOKEN_INCLUDE_GUARD; } else if (l = data_begins_with(str, left, "include")) { token = NCD_TOKEN_INCLUDE; } else if (is_name_first_char(*str)) { l = 1; while (l < left && is_name_char(str[l])) { l++; } // allocate buffer bsize_t bufsize = bsize_add(bsize_fromsize(l), bsize_fromint(1)); char *buf; if (bufsize.is_overflow || !(buf = malloc(bufsize.value))) { BLog(BLOG_ERROR, "malloc failed"); error = 1; goto out; } // copy and terminate memcpy(buf, str, l); buf[l] = '\0'; if (!strcmp(buf, "process")) { token = NCD_TOKEN_PROCESS; free(buf); } else if (!strcmp(buf, "template")) { token = NCD_TOKEN_TEMPLATE; free(buf); } else { token = NCD_TOKEN_NAME; token_val = buf; token_len = l; } } else if (*str == '"') do { // init string ExpString estr; if (!ExpString_Init(&estr)) { BLog(BLOG_ERROR, "ExpString_Init failed"); goto string_fail0; } // skip start quote l = 1; // decode string while (l < left) { uint8_t dec_ch; // get character if (str[l] == '\\') { if (left - l < 2) { BLog(BLOG_ERROR, "escape character found in string but nothing follows"); goto string_fail1; } size_t extra = 0; switch (str[l + 1]) { case '\'': case '\"': case '\\': case '\?': dec_ch = str[l + 1]; break; case 'a': dec_ch = '\a'; break; case 'b': dec_ch = '\b'; break; case 'f': dec_ch = '\f'; break; case 'n': dec_ch = '\n'; break; case 'r': dec_ch = '\r'; break; case 't': dec_ch = '\t'; break; case 'v': dec_ch = '\v'; break; case '0': dec_ch = 0; break; case 'x': { if (left - l < 4) { BLog(BLOG_ERROR, "hexadecimal escape found in string but too little characters follow"); goto string_fail1; } uintmax_t hex_val; if (!parse_unsigned_hex_integer_bin(&str[l + 2], 2, &hex_val)) { BLog(BLOG_ERROR, "hexadecimal escape found in string but two hex characters don't follow"); goto string_fail1; } dec_ch = hex_val; extra = 2; } break; default: BLog(BLOG_ERROR, "bad escape sequence in string"); goto string_fail1; } l += 2 + extra; } else if (str[l] == '"') { break; } else { dec_ch = str[l]; l++; } // append character to string if (!ExpString_AppendByte(&estr, dec_ch)) { BLog(BLOG_ERROR, "ExpString_AppendChar failed"); goto string_fail1; } } // make sure ending quote was found if (l == left) { BLog(BLOG_ERROR, "missing ending quote for string"); goto string_fail1; } // skip ending quote l++; token = NCD_TOKEN_STRING; token_val = ExpString_Get(&estr); token_len = ExpString_Length(&estr); break; string_fail1: ExpString_Free(&estr); string_fail0: error = 1; } while (0); else if (is_space_char(*str)) { token = 0; l = 1; } else { BLog(BLOG_ERROR, "unrecognized character"); error = 1; } out: // report error if (error) { output(user, NCD_ERROR, NULL, 0, line, line_char); return; } // output token if (token) { if (!output(user, token, token_val, token_len, line, line_char)) { return; } } // update line/char counters for (size_t i = 0; i < l; i++) { if (str[i] == '\n') { line++; line_char = 1; } else { line_char++; } } str += l; left -= l; } output(user, NCD_EOF, NULL, 0, line, line_char); }