322 lines
11 KiB
C
322 lines
11 KiB
C
|
/**
|
||
|
* @file NCDConfigTokenizer.c
|
||
|
* @author Ambroz Bizjak <ambrop7@gmail.com>
|
||
|
*
|
||
|
* @section LICENSE
|
||
|
*
|
||
|
* Redistribution and use in source and binary forms, with or without
|
||
|
* modification, are permitted provided that the following conditions are met:
|
||
|
* 1. Redistributions of source code must retain the above copyright
|
||
|
* notice, this list of conditions and the following disclaimer.
|
||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||
|
* notice, this list of conditions and the following disclaimer in the
|
||
|
* documentation and/or other materials provided with the distribution.
|
||
|
* 3. Neither the name of the author nor the
|
||
|
* names of its contributors may be used to endorse or promote products
|
||
|
* derived from this software without specific prior written permission.
|
||
|
*
|
||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||
|
* DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||
|
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||
|
*/
|
||
|
|
||
|
#include <string.h>
|
||
|
#include <stddef.h>
|
||
|
#include <stdlib.h>
|
||
|
|
||
|
#include <misc/debug.h>
|
||
|
#include <misc/string_begins_with.h>
|
||
|
#include <misc/balloc.h>
|
||
|
#include <misc/expstring.h>
|
||
|
#include <misc/parse_number.h>
|
||
|
#include <base/BLog.h>
|
||
|
|
||
|
#include <ncd/NCDConfigTokenizer.h>
|
||
|
|
||
|
#include <generated/blog_channel_NCDConfigTokenizer.h>
|
||
|
|
||
|
static int is_name_char (char c)
|
||
|
{
|
||
|
return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_');
|
||
|
}
|
||
|
|
||
|
static int is_name_first_char (char c)
|
||
|
{
|
||
|
return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_');
|
||
|
}
|
||
|
|
||
|
static int is_space_char (char c)
|
||
|
{
|
||
|
return (c == ' ' || c == '\t' || c == '\n' || c == '\r');
|
||
|
}
|
||
|
|
||
|
static int string_equals (char *str, int str_len, char *needle)
|
||
|
{
|
||
|
return (str_len == strlen(needle) && !memcmp(str, needle, str_len));
|
||
|
}
|
||
|
|
||
|
void NCDConfigTokenizer_Tokenize (char *str, size_t left, NCDConfigTokenizer_output output, void *user)
|
||
|
{
|
||
|
size_t line = 1;
|
||
|
size_t line_char = 1;
|
||
|
|
||
|
while (left > 0) {
|
||
|
size_t l;
|
||
|
int error = 0;
|
||
|
int token;
|
||
|
void *token_val = NULL;
|
||
|
size_t token_len = 0;
|
||
|
|
||
|
if (*str == '#') {
|
||
|
l = 1;
|
||
|
while (l < left && str[l] != '\n') {
|
||
|
l++;
|
||
|
}
|
||
|
token = 0;
|
||
|
}
|
||
|
else if (l = data_begins_with(str, left, "{")) {
|
||
|
token = NCD_TOKEN_CURLY_OPEN;
|
||
|
}
|
||
|
else if (l = data_begins_with(str, left, "}")) {
|
||
|
token = NCD_TOKEN_CURLY_CLOSE;
|
||
|
}
|
||
|
else if (l = data_begins_with(str, left, "(")) {
|
||
|
token = NCD_TOKEN_ROUND_OPEN;
|
||
|
}
|
||
|
else if (l = data_begins_with(str, left, ")")) {
|
||
|
token = NCD_TOKEN_ROUND_CLOSE;
|
||
|
}
|
||
|
else if (l = data_begins_with(str, left, ";")) {
|
||
|
token = NCD_TOKEN_SEMICOLON;
|
||
|
}
|
||
|
else if (l = data_begins_with(str, left, ".")) {
|
||
|
token = NCD_TOKEN_DOT;
|
||
|
}
|
||
|
else if (l = data_begins_with(str, left, ",")) {
|
||
|
token = NCD_TOKEN_COMMA;
|
||
|
}
|
||
|
else if (l = data_begins_with(str, left, ":")) {
|
||
|
token = NCD_TOKEN_COLON;
|
||
|
}
|
||
|
else if (l = data_begins_with(str, left, "[")) {
|
||
|
token = NCD_TOKEN_BRACKET_OPEN;
|
||
|
}
|
||
|
else if (l = data_begins_with(str, left, "]")) {
|
||
|
token = NCD_TOKEN_BRACKET_CLOSE;
|
||
|
}
|
||
|
else if (l = data_begins_with(str, left, "->")) {
|
||
|
token = NCD_TOKEN_ARROW;
|
||
|
}
|
||
|
else if (l = data_begins_with(str, left, "If")) {
|
||
|
token = NCD_TOKEN_IF;
|
||
|
}
|
||
|
else if (l = data_begins_with(str, left, "Elif")) {
|
||
|
token = NCD_TOKEN_ELIF;
|
||
|
}
|
||
|
else if (l = data_begins_with(str, left, "elif")) {
|
||
|
token = NCD_TOKEN_ELIF;
|
||
|
}
|
||
|
else if (l = data_begins_with(str, left, "Else")) {
|
||
|
token = NCD_TOKEN_ELSE;
|
||
|
}
|
||
|
else if (l = data_begins_with(str, left, "else")) {
|
||
|
token = NCD_TOKEN_ELSE;
|
||
|
}
|
||
|
else if (l = data_begins_with(str, left, "Foreach")) {
|
||
|
token = NCD_TOKEN_FOREACH;
|
||
|
}
|
||
|
else if (l = data_begins_with(str, left, "As")) {
|
||
|
token = NCD_TOKEN_AS;
|
||
|
}
|
||
|
else if (l = data_begins_with(str, left, "include_guard")) {
|
||
|
token = NCD_TOKEN_INCLUDE_GUARD;
|
||
|
}
|
||
|
else if (l = data_begins_with(str, left, "include")) {
|
||
|
token = NCD_TOKEN_INCLUDE;
|
||
|
}
|
||
|
else if (is_name_first_char(*str)) {
|
||
|
l = 1;
|
||
|
while (l < left && is_name_char(str[l])) {
|
||
|
l++;
|
||
|
}
|
||
|
|
||
|
// allocate buffer
|
||
|
bsize_t bufsize = bsize_add(bsize_fromsize(l), bsize_fromint(1));
|
||
|
char *buf;
|
||
|
if (bufsize.is_overflow || !(buf = malloc(bufsize.value))) {
|
||
|
BLog(BLOG_ERROR, "malloc failed");
|
||
|
error = 1;
|
||
|
goto out;
|
||
|
}
|
||
|
|
||
|
// copy and terminate
|
||
|
memcpy(buf, str, l);
|
||
|
buf[l] = '\0';
|
||
|
|
||
|
if (!strcmp(buf, "process")) {
|
||
|
token = NCD_TOKEN_PROCESS;
|
||
|
free(buf);
|
||
|
}
|
||
|
else if (!strcmp(buf, "template")) {
|
||
|
token = NCD_TOKEN_TEMPLATE;
|
||
|
free(buf);
|
||
|
}
|
||
|
else {
|
||
|
token = NCD_TOKEN_NAME;
|
||
|
token_val = buf;
|
||
|
token_len = l;
|
||
|
}
|
||
|
}
|
||
|
else if (*str == '"') do {
|
||
|
// init string
|
||
|
ExpString estr;
|
||
|
if (!ExpString_Init(&estr)) {
|
||
|
BLog(BLOG_ERROR, "ExpString_Init failed");
|
||
|
goto string_fail0;
|
||
|
}
|
||
|
|
||
|
// skip start quote
|
||
|
l = 1;
|
||
|
|
||
|
// decode string
|
||
|
while (l < left) {
|
||
|
uint8_t dec_ch;
|
||
|
|
||
|
// get character
|
||
|
if (str[l] == '\\') {
|
||
|
if (left - l < 2) {
|
||
|
BLog(BLOG_ERROR, "escape character found in string but nothing follows");
|
||
|
goto string_fail1;
|
||
|
}
|
||
|
|
||
|
size_t extra = 0;
|
||
|
|
||
|
switch (str[l + 1]) {
|
||
|
case '\'':
|
||
|
case '\"':
|
||
|
case '\\':
|
||
|
case '\?':
|
||
|
dec_ch = str[l + 1]; break;
|
||
|
|
||
|
case 'a':
|
||
|
dec_ch = '\a'; break;
|
||
|
case 'b':
|
||
|
dec_ch = '\b'; break;
|
||
|
case 'f':
|
||
|
dec_ch = '\f'; break;
|
||
|
case 'n':
|
||
|
dec_ch = '\n'; break;
|
||
|
case 'r':
|
||
|
dec_ch = '\r'; break;
|
||
|
case 't':
|
||
|
dec_ch = '\t'; break;
|
||
|
case 'v':
|
||
|
dec_ch = '\v'; break;
|
||
|
|
||
|
case '0':
|
||
|
dec_ch = 0; break;
|
||
|
|
||
|
case 'x': {
|
||
|
if (left - l < 4) {
|
||
|
BLog(BLOG_ERROR, "hexadecimal escape found in string but too little characters follow");
|
||
|
goto string_fail1;
|
||
|
}
|
||
|
|
||
|
uintmax_t hex_val;
|
||
|
if (!parse_unsigned_hex_integer_bin(&str[l + 2], 2, &hex_val)) {
|
||
|
BLog(BLOG_ERROR, "hexadecimal escape found in string but two hex characters don't follow");
|
||
|
goto string_fail1;
|
||
|
}
|
||
|
|
||
|
dec_ch = hex_val;
|
||
|
extra = 2;
|
||
|
} break;
|
||
|
|
||
|
default:
|
||
|
BLog(BLOG_ERROR, "bad escape sequence in string");
|
||
|
goto string_fail1;
|
||
|
}
|
||
|
|
||
|
l += 2 + extra;
|
||
|
}
|
||
|
else if (str[l] == '"') {
|
||
|
break;
|
||
|
}
|
||
|
else {
|
||
|
dec_ch = str[l];
|
||
|
l++;
|
||
|
}
|
||
|
|
||
|
// append character to string
|
||
|
if (!ExpString_AppendByte(&estr, dec_ch)) {
|
||
|
BLog(BLOG_ERROR, "ExpString_AppendChar failed");
|
||
|
goto string_fail1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// make sure ending quote was found
|
||
|
if (l == left) {
|
||
|
BLog(BLOG_ERROR, "missing ending quote for string");
|
||
|
goto string_fail1;
|
||
|
}
|
||
|
|
||
|
// skip ending quote
|
||
|
l++;
|
||
|
|
||
|
token = NCD_TOKEN_STRING;
|
||
|
token_val = ExpString_Get(&estr);
|
||
|
token_len = ExpString_Length(&estr);
|
||
|
break;
|
||
|
|
||
|
string_fail1:
|
||
|
ExpString_Free(&estr);
|
||
|
string_fail0:
|
||
|
error = 1;
|
||
|
} while (0);
|
||
|
else if (is_space_char(*str)) {
|
||
|
token = 0;
|
||
|
l = 1;
|
||
|
}
|
||
|
else {
|
||
|
BLog(BLOG_ERROR, "unrecognized character");
|
||
|
error = 1;
|
||
|
}
|
||
|
|
||
|
out:
|
||
|
// report error
|
||
|
if (error) {
|
||
|
output(user, NCD_ERROR, NULL, 0, line, line_char);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
// output token
|
||
|
if (token) {
|
||
|
if (!output(user, token, token_val, token_len, line, line_char)) {
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// update line/char counters
|
||
|
for (size_t i = 0; i < l; i++) {
|
||
|
if (str[i] == '\n') {
|
||
|
line++;
|
||
|
line_char = 1;
|
||
|
} else {
|
||
|
line_char++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
str += l;
|
||
|
left -= l;
|
||
|
}
|
||
|
|
||
|
output(user, NCD_EOF, NULL, 0, line, line_char);
|
||
|
}
|