tor-android/external/badvpn_dns/ncd/NCDConfigTokenizer.c

/**
 * @file NCDConfigTokenizer.c
 * @author Ambroz Bizjak <ambrop7@gmail.com>
 * 
 * @section LICENSE
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the author nor the
 *    names of its contributors may be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <string.h>
#include <stddef.h>
#include <stdlib.h>

#include <misc/debug.h>
#include <misc/string_begins_with.h>
#include <misc/balloc.h>
#include <misc/expstring.h>
#include <misc/parse_number.h>
#include <base/BLog.h>

#include <ncd/NCDConfigTokenizer.h>

#include <generated/blog_channel_NCDConfigTokenizer.h>

static int is_name_char (char c)
{
    return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_');
}

static int is_name_first_char (char c)
{
    return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_');
}

static int is_space_char (char c)
{
    return (c == ' ' || c == '\t' || c == '\n' || c == '\r');
}

static int string_equals (char *str, int str_len, char *needle)
{
    return (str_len == strlen(needle) && !memcmp(str, needle, str_len));
}

void NCDConfigTokenizer_Tokenize (char *str, size_t left, NCDConfigTokenizer_output output, void *user)
{
    size_t line = 1;
    size_t line_char = 1;
    
    while (left > 0) {
        size_t l;
        int error = 0;
        int token;
        void *token_val = NULL;
        size_t token_len = 0;
        
        if (*str == '#') {
            l = 1;
            while (l < left && str[l] != '\n') {
                l++;
            }
            token = 0;
        }
        else if (l = data_begins_with(str, left, "{")) {
            token = NCD_TOKEN_CURLY_OPEN;
        }
        else if (l = data_begins_with(str, left, "}")) {
            token = NCD_TOKEN_CURLY_CLOSE;
        }
        else if (l = data_begins_with(str, left, "(")) {
            token = NCD_TOKEN_ROUND_OPEN;
        }
        else if (l = data_begins_with(str, left, ")")) {
            token = NCD_TOKEN_ROUND_CLOSE;
        }
        else if (l = data_begins_with(str, left, ";")) {
            token = NCD_TOKEN_SEMICOLON;
        }
        else if (l = data_begins_with(str, left, ".")) {
            token = NCD_TOKEN_DOT;
        }
        else if (l = data_begins_with(str, left, ",")) {
            token = NCD_TOKEN_COMMA;
        }
        else if (l = data_begins_with(str, left, ":")) {
            token = NCD_TOKEN_COLON;
        }
        else if (l = data_begins_with(str, left, "[")) {
            token = NCD_TOKEN_BRACKET_OPEN;
        }
        else if (l = data_begins_with(str, left, "]")) {
            token = NCD_TOKEN_BRACKET_CLOSE;
        }
        else if (l = data_begins_with(str, left, "->")) {
            token = NCD_TOKEN_ARROW;
        }
        else if (l = data_begins_with(str, left, "If")) {
            token = NCD_TOKEN_IF;
        }
        else if (l = data_begins_with(str, left, "Elif")) {
            token = NCD_TOKEN_ELIF;
        }
        else if (l = data_begins_with(str, left, "elif")) {
            token = NCD_TOKEN_ELIF;
        }
        else if (l = data_begins_with(str, left, "Else")) {
            token = NCD_TOKEN_ELSE;
        }
        else if (l = data_begins_with(str, left, "else")) {
            token = NCD_TOKEN_ELSE;
        }
        else if (l = data_begins_with(str, left, "Foreach")) {
            token = NCD_TOKEN_FOREACH;
        }
        else if (l = data_begins_with(str, left, "As")) {
            token = NCD_TOKEN_AS;
        }
        else if (l = data_begins_with(str, left, "include_guard")) {
            token = NCD_TOKEN_INCLUDE_GUARD;
        }
        else if (l = data_begins_with(str, left, "include")) {
            token = NCD_TOKEN_INCLUDE;
        }
        else if (is_name_first_char(*str)) {
            l = 1;
            while (l < left && is_name_char(str[l])) {
                l++;
            }
            
            // allocate buffer
            bsize_t bufsize = bsize_add(bsize_fromsize(l), bsize_fromint(1));
            char *buf;
            if (bufsize.is_overflow || !(buf = malloc(bufsize.value))) {
                BLog(BLOG_ERROR, "malloc failed");
                error = 1;
                goto out;
            }
            
            // copy and terminate
            memcpy(buf, str, l);
            buf[l] = '\0';
            
            if (!strcmp(buf, "process")) {
                token = NCD_TOKEN_PROCESS;
                free(buf);
            }
            else if (!strcmp(buf, "template")) {
                token = NCD_TOKEN_TEMPLATE;
                free(buf);
            }
            else {
                token = NCD_TOKEN_NAME;
                token_val = buf;
                token_len = l;
            }
        }
        else if (*str == '"') do {
            // init string
            ExpString estr;
            if (!ExpString_Init(&estr)) {
                BLog(BLOG_ERROR, "ExpString_Init failed");
                goto string_fail0;
            }
            
            // skip start quote
            l = 1;
            
            // decode string
            while (l < left) {
                uint8_t dec_ch;
                
                // get character
                if (str[l] == '\\') {
                    if (left - l < 2) {
                        BLog(BLOG_ERROR, "escape character found in string but nothing follows");
                        goto string_fail1;
                    }
                    
                    size_t extra = 0;
                    
                    switch (str[l + 1]) {
                        case '\'':
                        case '\"':
                        case '\\':
                        case '\?':
                            dec_ch = str[l + 1]; break;
                        
                        case 'a':
                            dec_ch = '\a'; break;
                        case 'b':
                            dec_ch = '\b'; break;
                        case 'f':
                            dec_ch = '\f'; break;
                        case 'n':
                            dec_ch = '\n'; break;
                        case 'r':
                            dec_ch = '\r'; break;
                        case 't':
                            dec_ch = '\t'; break;
                        case 'v':
                            dec_ch = '\v'; break;
                        
                        case '0':
                            dec_ch = 0; break;
                        
                        case 'x': {
                            if (left - l < 4) {
                                BLog(BLOG_ERROR, "hexadecimal escape found in string but too little characters follow");
                                goto string_fail1;
                            }
                            
                            uintmax_t hex_val;
                            if (!parse_unsigned_hex_integer_bin(&str[l + 2], 2, &hex_val)) {
                                BLog(BLOG_ERROR, "hexadecimal escape found in string but two hex characters don't follow");
                                goto string_fail1;
                            }
                            
                            dec_ch = hex_val;
                            extra = 2;
                        } break;
                        
                        default:
                            BLog(BLOG_ERROR, "bad escape sequence in string");
                            goto string_fail1;
                    }
                    
                    l += 2 + extra;
                }
                else if (str[l] == '"') {
                    break;
                }
                else {
                    dec_ch = str[l];
                    l++;
                }
                
                // append character to string
                if (!ExpString_AppendByte(&estr, dec_ch)) {
                    BLog(BLOG_ERROR, "ExpString_AppendChar failed");
                    goto string_fail1;
                }
            }
            
            // make sure ending quote was found
            if (l == left) {
                BLog(BLOG_ERROR, "missing ending quote for string");
                goto string_fail1;
            }
            
            // skip ending quote
            l++;
            
            token = NCD_TOKEN_STRING;
            token_val = ExpString_Get(&estr);
            token_len = ExpString_Length(&estr);
            break;
            
        string_fail1:
            ExpString_Free(&estr);
        string_fail0:
            error = 1;
        } while (0);
        else if (is_space_char(*str)) {
            token = 0;
            l = 1;
        }
        else {
            BLog(BLOG_ERROR, "unrecognized character");
            error = 1;
        }
        
    out:
        // report error
        if (error) {
            output(user, NCD_ERROR, NULL, 0, line, line_char);
            return;
        }
        
        // output token
        if (token) {
            if (!output(user, token, token_val, token_len, line, line_char)) {
                return;
            }
        }
        
        // update line/char counters
        for (size_t i = 0; i < l; i++) {
            if (str[i] == '\n') {
                line++;
                line_char = 1;
            } else {
                line_char++;
            }
        }
        
        str += l;
        left -= l;
    }
    
    output(user, NCD_EOF, NULL, 0, line, line_char);
}
added badvpn as local folder 2015-01-25 11:08:34 +00:00			`/**`
			`* @file NCDConfigTokenizer.c`
			`* @author Ambroz Bizjak <ambrop7@gmail.com>`
			`*`
			`* @section LICENSE`
			`*`
			`* Redistribution and use in source and binary forms, with or without`
			`* modification, are permitted provided that the following conditions are met:`
			`* 1. Redistributions of source code must retain the above copyright`
			`* notice, this list of conditions and the following disclaimer.`
			`* 2. Redistributions in binary form must reproduce the above copyright`
			`* notice, this list of conditions and the following disclaimer in the`
			`* documentation and/or other materials provided with the distribution.`
			`* 3. Neither the name of the author nor the`
			`* names of its contributors may be used to endorse or promote products`
			`* derived from this software without specific prior written permission.`
			`*`
			`* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND`
			`* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED`
			`* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE`
			`* DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY`
			`* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES`
			`* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;`
			`* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND`
			`* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT`
			`* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS`
			`* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.`
			`*/`

			`#include <string.h>`
			`#include <stddef.h>`
			`#include <stdlib.h>`

			`#include <misc/debug.h>`
			`#include <misc/string_begins_with.h>`
			`#include <misc/balloc.h>`
			`#include <misc/expstring.h>`
			`#include <misc/parse_number.h>`
			`#include <base/BLog.h>`

			`#include <ncd/NCDConfigTokenizer.h>`

			`#include <generated/blog_channel_NCDConfigTokenizer.h>`

			`static int is_name_char (char c)`
			`{`
			`return ((c >= 'a' && c <= 'z') \|\| (c >= 'A' && c <= 'Z') \|\| (c >= '0' && c <= '9') \|\| c == '_');`
			`}`

			`static int is_name_first_char (char c)`
			`{`
			`return ((c >= 'a' && c <= 'z') \|\| (c >= 'A' && c <= 'Z') \|\| c == '_');`
			`}`

			`static int is_space_char (char c)`
			`{`
			`return (c == ' ' \|\| c == '\t' \|\| c == '\n' \|\| c == '\r');`
			`}`

			`static int string_equals (char str, int str_len, char needle)`
			`{`
			`return (str_len == strlen(needle) && !memcmp(str, needle, str_len));`
			`}`

			`void NCDConfigTokenizer_Tokenize (char str, size_t left, NCDConfigTokenizer_output output, void user)`
			`{`
			`size_t line = 1;`
			`size_t line_char = 1;`

			`while (left > 0) {`
			`size_t l;`
			`int error = 0;`
			`int token;`
			`void *token_val = NULL;`
			`size_t token_len = 0;`

			`if (*str == '#') {`
			`l = 1;`
			`while (l < left && str[l] != '\n') {`
			`l++;`
			`}`
			`token = 0;`
			`}`
			`else if (l = data_begins_with(str, left, "{")) {`
			`token = NCD_TOKEN_CURLY_OPEN;`
			`}`
			`else if (l = data_begins_with(str, left, "}")) {`
			`token = NCD_TOKEN_CURLY_CLOSE;`
			`}`
			`else if (l = data_begins_with(str, left, "(")) {`
			`token = NCD_TOKEN_ROUND_OPEN;`
			`}`
			`else if (l = data_begins_with(str, left, ")")) {`
			`token = NCD_TOKEN_ROUND_CLOSE;`
			`}`
			`else if (l = data_begins_with(str, left, ";")) {`
			`token = NCD_TOKEN_SEMICOLON;`
			`}`
			`else if (l = data_begins_with(str, left, ".")) {`
			`token = NCD_TOKEN_DOT;`
			`}`
			`else if (l = data_begins_with(str, left, ",")) {`
			`token = NCD_TOKEN_COMMA;`
			`}`
			`else if (l = data_begins_with(str, left, ":")) {`
			`token = NCD_TOKEN_COLON;`
			`}`
			`else if (l = data_begins_with(str, left, "[")) {`
			`token = NCD_TOKEN_BRACKET_OPEN;`
			`}`
			`else if (l = data_begins_with(str, left, "]")) {`
			`token = NCD_TOKEN_BRACKET_CLOSE;`
			`}`
			`else if (l = data_begins_with(str, left, "->")) {`
			`token = NCD_TOKEN_ARROW;`
			`}`
			`else if (l = data_begins_with(str, left, "If")) {`
			`token = NCD_TOKEN_IF;`
			`}`
			`else if (l = data_begins_with(str, left, "Elif")) {`
			`token = NCD_TOKEN_ELIF;`
			`}`
			`else if (l = data_begins_with(str, left, "elif")) {`
			`token = NCD_TOKEN_ELIF;`
			`}`
			`else if (l = data_begins_with(str, left, "Else")) {`
			`token = NCD_TOKEN_ELSE;`
			`}`
			`else if (l = data_begins_with(str, left, "else")) {`
			`token = NCD_TOKEN_ELSE;`
			`}`
			`else if (l = data_begins_with(str, left, "Foreach")) {`
			`token = NCD_TOKEN_FOREACH;`
			`}`
			`else if (l = data_begins_with(str, left, "As")) {`
			`token = NCD_TOKEN_AS;`
			`}`
			`else if (l = data_begins_with(str, left, "include_guard")) {`
			`token = NCD_TOKEN_INCLUDE_GUARD;`
			`}`
			`else if (l = data_begins_with(str, left, "include")) {`
			`token = NCD_TOKEN_INCLUDE;`
			`}`
			`else if (is_name_first_char(*str)) {`
			`l = 1;`
			`while (l < left && is_name_char(str[l])) {`
			`l++;`
			`}`

			`// allocate buffer`
			`bsize_t bufsize = bsize_add(bsize_fromsize(l), bsize_fromint(1));`
			`char *buf;`
			`if (bufsize.is_overflow \|\| !(buf = malloc(bufsize.value))) {`
			`BLog(BLOG_ERROR, "malloc failed");`
			`error = 1;`
			`goto out;`
			`}`

			`// copy and terminate`
			`memcpy(buf, str, l);`
			`buf[l] = '\0';`

			`if (!strcmp(buf, "process")) {`
			`token = NCD_TOKEN_PROCESS;`
			`free(buf);`
			`}`
			`else if (!strcmp(buf, "template")) {`
			`token = NCD_TOKEN_TEMPLATE;`
			`free(buf);`
			`}`
			`else {`
			`token = NCD_TOKEN_NAME;`
			`token_val = buf;`
			`token_len = l;`
			`}`
			`}`
			`else if (*str == '"') do {`
			`// init string`
			`ExpString estr;`
			`if (!ExpString_Init(&estr)) {`
			`BLog(BLOG_ERROR, "ExpString_Init failed");`
			`goto string_fail0;`
			`}`

			`// skip start quote`
			`l = 1;`

			`// decode string`
			`while (l < left) {`
			`uint8_t dec_ch;`

			`// get character`
			`if (str[l] == '\\') {`
			`if (left - l < 2) {`
			`BLog(BLOG_ERROR, "escape character found in string but nothing follows");`
			`goto string_fail1;`
			`}`

			`size_t extra = 0;`

			`switch (str[l + 1]) {`
			`case '\'':`
			`case '\"':`
			`case '\\':`
			`case '\?':`
			`dec_ch = str[l + 1]; break;`

			`case 'a':`
			`dec_ch = '\a'; break;`
			`case 'b':`
			`dec_ch = '\b'; break;`
			`case 'f':`
			`dec_ch = '\f'; break;`
			`case 'n':`
			`dec_ch = '\n'; break;`
			`case 'r':`
			`dec_ch = '\r'; break;`
			`case 't':`
			`dec_ch = '\t'; break;`
			`case 'v':`
			`dec_ch = '\v'; break;`

			`case '0':`
			`dec_ch = 0; break;`

			`case 'x': {`
			`if (left - l < 4) {`
			`BLog(BLOG_ERROR, "hexadecimal escape found in string but too little characters follow");`
			`goto string_fail1;`
			`}`

			`uintmax_t hex_val;`
			`if (!parse_unsigned_hex_integer_bin(&str[l + 2], 2, &hex_val)) {`
			`BLog(BLOG_ERROR, "hexadecimal escape found in string but two hex characters don't follow");`
			`goto string_fail1;`
			`}`

			`dec_ch = hex_val;`
			`extra = 2;`
			`} break;`

			`default:`
			`BLog(BLOG_ERROR, "bad escape sequence in string");`
			`goto string_fail1;`
			`}`

			`l += 2 + extra;`
			`}`
			`else if (str[l] == '"') {`
			`break;`
			`}`
			`else {`
			`dec_ch = str[l];`
			`l++;`
			`}`

			`// append character to string`
			`if (!ExpString_AppendByte(&estr, dec_ch)) {`
			`BLog(BLOG_ERROR, "ExpString_AppendChar failed");`
			`goto string_fail1;`
			`}`
			`}`

			`// make sure ending quote was found`
			`if (l == left) {`
			`BLog(BLOG_ERROR, "missing ending quote for string");`
			`goto string_fail1;`
			`}`

			`// skip ending quote`
			`l++;`

			`token = NCD_TOKEN_STRING;`
			`token_val = ExpString_Get(&estr);`
			`token_len = ExpString_Length(&estr);`
			`break;`

			`string_fail1:`
			`ExpString_Free(&estr);`
			`string_fail0:`
			`error = 1;`
			`} while (0);`
			`else if (is_space_char(*str)) {`
			`token = 0;`
			`l = 1;`
			`}`
			`else {`
			`BLog(BLOG_ERROR, "unrecognized character");`
			`error = 1;`
			`}`

			`out:`
			`// report error`
			`if (error) {`
			`output(user, NCD_ERROR, NULL, 0, line, line_char);`
			`return;`
			`}`

			`// output token`
			`if (token) {`
			`if (!output(user, token, token_val, token_len, line, line_char)) {`
			`return;`
			`}`
			`}`

			`// update line/char counters`
			`for (size_t i = 0; i < l; i++) {`
			`if (str[i] == '\n') {`
			`line++;`
			`line_char = 1;`
			`} else {`
			`line_char++;`
			`}`
			`}`

			`str += l;`
			`left -= l;`
			`}`

			`output(user, NCD_EOF, NULL, 0, line, line_char);`
			`}`