From 99911873a196975f5221aad89ae5eac42e1330e0 Mon Sep 17 00:00:00 2001 From: Maria Matejka Date: Mon, 25 Feb 2019 17:19:47 +0100 Subject: [PATCH] Conf: Lexer parses quoted strings in a more descriptive way --- conf/cf-lex.l | 137 +++++++++++++++++++++++++++++++++----------------- conf/conf.h | 4 +- 2 files changed, 92 insertions(+), 49 deletions(-) diff --git a/conf/cf-lex.l b/conf/cf-lex.l index f1b402a0..45327cd9 100644 --- a/conf/cf-lex.l +++ b/conf/cf-lex.l @@ -65,7 +65,7 @@ struct keyword { #endif -static uint cf_hash(byte *c); +static uint cf_hash(const byte *c); #define KW_KEY(n) n->name #define KW_NEXT(n) n->next @@ -96,6 +96,15 @@ int (*cf_read_hook)(byte *buf, unsigned int max, int fd); struct include_file_stack *ifs; static struct include_file_stack *ifs_head; +#define QUOTED_BUFFER_SIZE 4096 +static BUFFER_(char) quoted_buffer; +static char quoted_buffer_data[QUOTED_BUFFER_SIZE]; +static inline void quoted_buffer_init(void) { + quoted_buffer.used = 0; + quoted_buffer.size = QUOTED_BUFFER_SIZE; + quoted_buffer.data = quoted_buffer_data; +} + #define MAX_INCLUDE_DEPTH 8 #define YY_INPUT(buf,result,max) result = cf_read_hook(buf, max, ifs->fd); @@ -106,6 +115,8 @@ static struct include_file_stack *ifs_head; static void cf_include(char *arg, int alen); static int check_eof(void); +static enum yytokentype cf_lex_symbol(const char *data); + %} %option noyywrap @@ -113,24 +124,26 @@ static int check_eof(void); %option nounput %option noreject -%x COMMENT CCOMM CLI +%x COMMENT CCOMM CLI QUOTED APOSTROPHED INCLUDE ALPHA [a-zA-Z_] DIGIT [0-9] XIGIT [0-9a-fA-F] ALNUM [a-zA-Z_0-9] WHITE [ \t] -include ^{WHITE}*include{WHITE}*\".*\"{WHITE}*; %% -{include} { - char *start, *end; - +^{WHITE}*include{WHITE}*\" { if (!ifs->depth) cf_error("Include not allowed in CLI"); - start = strchr(yytext, '"'); - start++; + BEGIN(INCLUDE); +} + +[^"\n]+["]{WHITE}*; { + char *start, *end; + + start = yytext; end = strchr(start, '"'); *end = 0; @@ -139,8 +152,16 @@ include ^{WHITE}*include{WHITE}*\".*\"{WHITE}*; cf_error("Include with empty argument"); cf_include(start, end-start); + + BEGIN(INITIAL); } +["] cf_error("Include with empty argument"); +. cf_error("Unterminated include"); +\n cf_error("Unterminated include"); +<> cf_error("Unterminated include"); + + {DIGIT}+:{DIGIT}+ { uint len1 UNUSED, len2; u64 l; @@ -267,37 +288,23 @@ else: { return ELSECOL; } -({ALPHA}{ALNUM}*|[']({ALNUM}|[-]|[\.]|[:])*[']) { - if(*yytext == '\'') { - yytext[yyleng-1] = 0; - yytext++; - } +['] { + BEGIN(APOSTROPHED); + quoted_buffer_init(); +} - struct keyword *k = HASH_FIND(kw_hash, KW, yytext); - if (k) - { - if (k->value > 0) - return k->value; - else - { - cf_lval.i = -k->value; - return ENUM; - } - } +{ALNUM}|[-]|[.:] BUFFER_PUSH(quoted_buffer) = yytext[0]; +\n cf_error("Unterminated symbol"); +<> cf_error("Unterminated symbol"); +['] { + BEGIN(INITIAL); + BUFFER_PUSH(quoted_buffer) = 0; + return cf_lex_symbol(quoted_buffer_data); +} +. cf_error("Invalid character in apostrophed symbol"); - cf_lval.s = cf_get_symbol(yytext); - switch (cf_lval.s->class) { - case SYM_VOID: return CF_SYM_VOID; - case SYM_PROTO: return CF_SYM_PROTO; - case SYM_TEMPLATE: return CF_SYM_TEMPLATE; - case SYM_FUNCTION: return CF_SYM_FUNCTION; - case SYM_FILTER: return CF_SYM_FILTER; - case SYM_TABLE: return CF_SYM_TABLE; - case SYM_ATTRIBUTE: return CF_SYM_ATTRIBUTE; - case SYM_VARIABLE_RANGE: return CF_SYM_VARIABLE; - case SYM_CONSTANT_RANGE: return CF_SYM_CONSTANT; - default: bug("Unknown symbol class %d", cf_lval.s->class); - } +({ALPHA}{ALNUM}*) { + return cf_lex_symbol(yytext); } (.|\n) { @@ -313,14 +320,21 @@ else: { return yytext[0]; } -["][^"\n]*["] { - yytext[yyleng-1] = 0; - cf_lval.t = cfg_strdup(yytext+1); - yytext[yyleng-1] = '"'; +["] { + BEGIN(QUOTED); + quoted_buffer_init(); +} + +\n cf_error("Unterminated string"); +<> cf_error("Unterminated string"); +["] { + BEGIN(INITIAL); + BUFFER_PUSH(quoted_buffer) = 0; + cf_lval.t = cfg_strdup(quoted_buffer_data); return TEXT; } -["][^"\n]*\n cf_error("Unterminated string"); +. BUFFER_PUSH(quoted_buffer) = yytext[0]; <> { if (check_eof()) return END; } @@ -361,7 +375,7 @@ else: { %% static uint -cf_hash(byte *c) +cf_hash(const byte *c) { uint h = 13 << 24; @@ -370,7 +384,6 @@ cf_hash(byte *c) return h; } - /* * IFS stack - it contains structures needed for recursive processing * of include in config files. On the top of the stack is a structure @@ -531,7 +544,7 @@ check_eof(void) } static struct symbol * -cf_new_symbol(byte *c) +cf_new_symbol(const byte *c) { struct symbol *s; @@ -563,7 +576,7 @@ cf_new_symbol(byte *c) * signify no match. */ struct symbol * -cf_find_symbol(struct config *cfg, byte *c) +cf_find_symbol(struct config *cfg, const byte *c) { struct symbol *s; @@ -590,7 +603,7 @@ cf_find_symbol(struct config *cfg, byte *c) * existing symbol is found. */ struct symbol * -cf_get_symbol(byte *c) +cf_get_symbol(const byte *c) { return cf_find_symbol(new_config, c) ?: cf_new_symbol(c); } @@ -636,6 +649,36 @@ cf_default_name(char *template, int *counter) cf_error("Unable to generate default name"); } +static enum yytokentype +cf_lex_symbol(const char *data) +{ + struct keyword *k = HASH_FIND(kw_hash, KW, data); + if (k) + { + if (k->value > 0) + return k->value; + else + { + cf_lval.i = -k->value; + return ENUM; + } + } + + cf_lval.s = cf_get_symbol(data); + switch (cf_lval.s->class) { + case SYM_VOID: return CF_SYM_VOID; + case SYM_PROTO: return CF_SYM_PROTO; + case SYM_TEMPLATE: return CF_SYM_TEMPLATE; + case SYM_FUNCTION: return CF_SYM_FUNCTION; + case SYM_FILTER: return CF_SYM_FILTER; + case SYM_TABLE: return CF_SYM_TABLE; + case SYM_ATTRIBUTE: return CF_SYM_ATTRIBUTE; + case SYM_VARIABLE_RANGE: return CF_SYM_VARIABLE; + case SYM_CONSTANT_RANGE: return CF_SYM_CONSTANT; + default: bug("Unknown symbol class %d", cf_lval.s->class); + } +} + static void cf_lex_init_kh(void) { diff --git a/conf/conf.h b/conf/conf.h index 21d4f1e2..acb3413b 100644 --- a/conf/conf.h +++ b/conf/conf.h @@ -167,9 +167,9 @@ int cf_lex(void); void cf_lex_init(int is_cli, struct config *c); void cf_lex_unwind(void); -struct symbol *cf_find_symbol(struct config *cfg, byte *c); +struct symbol *cf_find_symbol(struct config *cfg, const byte *c); -struct symbol *cf_get_symbol(byte *c); +struct symbol *cf_get_symbol(const byte *c); struct symbol *cf_default_name(char *template, int *counter); struct symbol *cf_localize_symbol(struct symbol *sym);