Conf: Lexer parses quoted strings in a more descriptive way

This commit is contained in:
Maria Matejka 2019-02-25 17:19:47 +01:00
parent 412614c700
commit 99911873a1
2 changed files with 92 additions and 49 deletions

View file

@ -65,7 +65,7 @@ struct keyword {
#endif #endif
static uint cf_hash(byte *c); static uint cf_hash(const byte *c);
#define KW_KEY(n) n->name #define KW_KEY(n) n->name
#define KW_NEXT(n) n->next #define KW_NEXT(n) n->next
@ -96,6 +96,15 @@ int (*cf_read_hook)(byte *buf, unsigned int max, int fd);
struct include_file_stack *ifs; struct include_file_stack *ifs;
static struct include_file_stack *ifs_head; static struct include_file_stack *ifs_head;
#define QUOTED_BUFFER_SIZE 4096
static BUFFER_(char) quoted_buffer;
static char quoted_buffer_data[QUOTED_BUFFER_SIZE];
static inline void quoted_buffer_init(void) {
quoted_buffer.used = 0;
quoted_buffer.size = QUOTED_BUFFER_SIZE;
quoted_buffer.data = quoted_buffer_data;
}
#define MAX_INCLUDE_DEPTH 8 #define MAX_INCLUDE_DEPTH 8
#define YY_INPUT(buf,result,max) result = cf_read_hook(buf, max, ifs->fd); #define YY_INPUT(buf,result,max) result = cf_read_hook(buf, max, ifs->fd);
@ -106,6 +115,8 @@ static struct include_file_stack *ifs_head;
static void cf_include(char *arg, int alen); static void cf_include(char *arg, int alen);
static int check_eof(void); static int check_eof(void);
static enum yytokentype cf_lex_symbol(const char *data);
%} %}
%option noyywrap %option noyywrap
@ -113,24 +124,26 @@ static int check_eof(void);
%option nounput %option nounput
%option noreject %option noreject
%x COMMENT CCOMM CLI %x COMMENT CCOMM CLI QUOTED APOSTROPHED INCLUDE
ALPHA [a-zA-Z_] ALPHA [a-zA-Z_]
DIGIT [0-9] DIGIT [0-9]
XIGIT [0-9a-fA-F] XIGIT [0-9a-fA-F]
ALNUM [a-zA-Z_0-9] ALNUM [a-zA-Z_0-9]
WHITE [ \t] WHITE [ \t]
include ^{WHITE}*include{WHITE}*\".*\"{WHITE}*;
%% %%
{include} { ^{WHITE}*include{WHITE}*\" {
char *start, *end;
if (!ifs->depth) if (!ifs->depth)
cf_error("Include not allowed in CLI"); cf_error("Include not allowed in CLI");
start = strchr(yytext, '"'); BEGIN(INCLUDE);
start++; }
<INCLUDE>[^"\n]+["]{WHITE}*; {
char *start, *end;
start = yytext;
end = strchr(start, '"'); end = strchr(start, '"');
*end = 0; *end = 0;
@ -139,8 +152,16 @@ include ^{WHITE}*include{WHITE}*\".*\"{WHITE}*;
cf_error("Include with empty argument"); cf_error("Include with empty argument");
cf_include(start, end-start); cf_include(start, end-start);
BEGIN(INITIAL);
} }
<INCLUDE>["] cf_error("Include with empty argument");
<INCLUDE>. cf_error("Unterminated include");
<INCLUDE>\n cf_error("Unterminated include");
<INCLUDE><<EOF>> cf_error("Unterminated include");
{DIGIT}+:{DIGIT}+ { {DIGIT}+:{DIGIT}+ {
uint len1 UNUSED, len2; uint len1 UNUSED, len2;
u64 l; u64 l;
@ -267,37 +288,23 @@ else: {
return ELSECOL; return ELSECOL;
} }
({ALPHA}{ALNUM}*|[']({ALNUM}|[-]|[\.]|[:])*[']) { ['] {
if(*yytext == '\'') { BEGIN(APOSTROPHED);
yytext[yyleng-1] = 0; quoted_buffer_init();
yytext++;
} }
struct keyword *k = HASH_FIND(kw_hash, KW, yytext); <APOSTROPHED>{ALNUM}|[-]|[.:] BUFFER_PUSH(quoted_buffer) = yytext[0];
if (k) <APOSTROPHED>\n cf_error("Unterminated symbol");
{ <APOSTROPHED><<EOF>> cf_error("Unterminated symbol");
if (k->value > 0) <APOSTROPHED>['] {
return k->value; BEGIN(INITIAL);
else BUFFER_PUSH(quoted_buffer) = 0;
{ return cf_lex_symbol(quoted_buffer_data);
cf_lval.i = -k->value;
return ENUM;
}
} }
<APOSTROPHED>. cf_error("Invalid character in apostrophed symbol");
cf_lval.s = cf_get_symbol(yytext); ({ALPHA}{ALNUM}*) {
switch (cf_lval.s->class) { return cf_lex_symbol(yytext);
case SYM_VOID: return CF_SYM_VOID;
case SYM_PROTO: return CF_SYM_PROTO;
case SYM_TEMPLATE: return CF_SYM_TEMPLATE;
case SYM_FUNCTION: return CF_SYM_FUNCTION;
case SYM_FILTER: return CF_SYM_FILTER;
case SYM_TABLE: return CF_SYM_TABLE;
case SYM_ATTRIBUTE: return CF_SYM_ATTRIBUTE;
case SYM_VARIABLE_RANGE: return CF_SYM_VARIABLE;
case SYM_CONSTANT_RANGE: return CF_SYM_CONSTANT;
default: bug("Unknown symbol class %d", cf_lval.s->class);
}
} }
<CLI>(.|\n) { <CLI>(.|\n) {
@ -313,14 +320,21 @@ else: {
return yytext[0]; return yytext[0];
} }
["][^"\n]*["] { ["] {
yytext[yyleng-1] = 0; BEGIN(QUOTED);
cf_lval.t = cfg_strdup(yytext+1); quoted_buffer_init();
yytext[yyleng-1] = '"'; }
<QUOTED>\n cf_error("Unterminated string");
<QUOTED><<EOF>> cf_error("Unterminated string");
<QUOTED>["] {
BEGIN(INITIAL);
BUFFER_PUSH(quoted_buffer) = 0;
cf_lval.t = cfg_strdup(quoted_buffer_data);
return TEXT; return TEXT;
} }
["][^"\n]*\n cf_error("Unterminated string"); <QUOTED>. BUFFER_PUSH(quoted_buffer) = yytext[0];
<INITIAL,COMMENT><<EOF>> { if (check_eof()) return END; } <INITIAL,COMMENT><<EOF>> { if (check_eof()) return END; }
@ -361,7 +375,7 @@ else: {
%% %%
static uint static uint
cf_hash(byte *c) cf_hash(const byte *c)
{ {
uint h = 13 << 24; uint h = 13 << 24;
@ -370,7 +384,6 @@ cf_hash(byte *c)
return h; return h;
} }
/* /*
* IFS stack - it contains structures needed for recursive processing * IFS stack - it contains structures needed for recursive processing
* of include in config files. On the top of the stack is a structure * of include in config files. On the top of the stack is a structure
@ -531,7 +544,7 @@ check_eof(void)
} }
static struct symbol * static struct symbol *
cf_new_symbol(byte *c) cf_new_symbol(const byte *c)
{ {
struct symbol *s; struct symbol *s;
@ -563,7 +576,7 @@ cf_new_symbol(byte *c)
* signify no match. * signify no match.
*/ */
struct symbol * struct symbol *
cf_find_symbol(struct config *cfg, byte *c) cf_find_symbol(struct config *cfg, const byte *c)
{ {
struct symbol *s; struct symbol *s;
@ -590,7 +603,7 @@ cf_find_symbol(struct config *cfg, byte *c)
* existing symbol is found. * existing symbol is found.
*/ */
struct symbol * struct symbol *
cf_get_symbol(byte *c) cf_get_symbol(const byte *c)
{ {
return cf_find_symbol(new_config, c) ?: cf_new_symbol(c); return cf_find_symbol(new_config, c) ?: cf_new_symbol(c);
} }
@ -636,6 +649,36 @@ cf_default_name(char *template, int *counter)
cf_error("Unable to generate default name"); cf_error("Unable to generate default name");
} }
static enum yytokentype
cf_lex_symbol(const char *data)
{
struct keyword *k = HASH_FIND(kw_hash, KW, data);
if (k)
{
if (k->value > 0)
return k->value;
else
{
cf_lval.i = -k->value;
return ENUM;
}
}
cf_lval.s = cf_get_symbol(data);
switch (cf_lval.s->class) {
case SYM_VOID: return CF_SYM_VOID;
case SYM_PROTO: return CF_SYM_PROTO;
case SYM_TEMPLATE: return CF_SYM_TEMPLATE;
case SYM_FUNCTION: return CF_SYM_FUNCTION;
case SYM_FILTER: return CF_SYM_FILTER;
case SYM_TABLE: return CF_SYM_TABLE;
case SYM_ATTRIBUTE: return CF_SYM_ATTRIBUTE;
case SYM_VARIABLE_RANGE: return CF_SYM_VARIABLE;
case SYM_CONSTANT_RANGE: return CF_SYM_CONSTANT;
default: bug("Unknown symbol class %d", cf_lval.s->class);
}
}
static void static void
cf_lex_init_kh(void) cf_lex_init_kh(void)
{ {

View file

@ -167,9 +167,9 @@ int cf_lex(void);
void cf_lex_init(int is_cli, struct config *c); void cf_lex_init(int is_cli, struct config *c);
void cf_lex_unwind(void); void cf_lex_unwind(void);
struct symbol *cf_find_symbol(struct config *cfg, byte *c); struct symbol *cf_find_symbol(struct config *cfg, const byte *c);
struct symbol *cf_get_symbol(byte *c); struct symbol *cf_get_symbol(const byte *c);
struct symbol *cf_default_name(char *template, int *counter); struct symbol *cf_default_name(char *template, int *counter);
struct symbol *cf_localize_symbol(struct symbol *sym); struct symbol *cf_localize_symbol(struct symbol *sym);