Skip to content

Commit 3beea59

Browse files
committed
add new parameters for quoted-string parser
1 parent 2053745 commit 3beea59

File tree

8 files changed

+147
-14
lines changed

8 files changed

+147
-14
lines changed

doc/configuration.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -757,6 +757,9 @@ Zero or more characters, possibly surrounded by double quote marks.
757757
If the first character is a quote mark, operates like quoted-string. Otherwise, operates like "word"
758758
Quote marks are stripped from the match.
759759

760+
Note: this is a "historical" type. The same can be achieved in 2.0.7 and above by using
761+
"quoted-string" with parameter "option.quotesOptional".
762+
760763
date-iso
761764
########
762765
Date in ISO format ('YYYY-MM-DD').

src/parser.c

Lines changed: 72 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* liblognorm - a fast samples-based log normalization library
3-
* Copyright 2010-2018 by Rainer Gerhards and Adiscon GmbH.
3+
* Copyright 2010-2021 by Rainer Gerhards and Adiscon GmbH.
44
*
55
* Modified by Pavel Levshin ([email protected]) in 2013
66
*
@@ -1644,6 +1644,12 @@ PARSER_Parse(OpQuotedString)
16441644
}
16451645

16461646

1647+
1648+
struct data_QuotedString {
1649+
int dashIsEmpty;
1650+
int quotesOptional;
1651+
int supportEscape;
1652+
};
16471653
/**
16481654
* Parse a quoted string. In this initial implementation, escaping of the quote
16491655
* char is not supported. A quoted string is one start starts with a double quote,
@@ -1653,38 +1659,94 @@ PARSER_Parse(OpQuotedString)
16531659
*/
16541660
PARSER_Parse(QuotedString)
16551661
const char *c;
1662+
struct data_QuotedString *const data = (struct data_QuotedString*) pdata;
16561663
size_t i;
1664+
int hadQuote = 0;
16571665

16581666
assert(npb->str != NULL);
16591667
assert(offs != NULL);
16601668
assert(parsed != NULL);
16611669
c = npb->str;
16621670
i = *offs;
1663-
if(i + 2 > npb->strLen)
1664-
goto done; /* needs at least 2 characters */
1671+
if(i + 1 > npb->strLen)
1672+
goto done; /* needs at least 1 characters (with quotesQptional...) */
16651673

1666-
if(c[i] != '"')
1667-
goto done;
1668-
++i;
1674+
if(c[i] == '"') {
1675+
hadQuote = 1;
1676+
++i;
1677+
} else {
1678+
if(!data->quotesOptional) {
1679+
goto done;
1680+
}
1681+
}
16691682

16701683
/* search end of string */
1671-
while(i < npb->strLen && c[i] != '"')
1684+
while(i < npb->strLen &&
1685+
( (hadQuote && c[i] != '"') || (!hadQuote && c[i] != ' ') )
1686+
) {
1687+
if(data->supportEscape && c[i] == '\\' && (i < npb->strLen)) {
1688+
i++; /* next char is escaped */
1689+
}
16721690
i++;
1691+
}
16731692

1674-
if(i == npb->strLen || c[i] != '"')
1693+
if(hadQuote && (i == npb->strLen || c[i] != '"'))
16751694
goto done;
16761695

16771696
/* success, persist */
1678-
*parsed = i + 1 - *offs; /* "eat" terminal double quote */
1697+
const size_t charsFound = i - *offs + (hadQuote ? 1 : 0);
1698+
*parsed = charsFound; /* "eat" terminal double quote */
16791699
/* create JSON value to save quoted string contents */
16801700
if(value != NULL) {
1681-
*value = json_object_new_string_len(npb->str+(*offs), *parsed);
1701+
if(charsFound == 3 && data->dashIsEmpty && !strncmp(npb->str+(*offs), "\"-\"", 3)) {
1702+
*value = json_object_new_string_len("", 0);
1703+
} else {
1704+
*value = json_object_new_string_len(npb->str+(*offs), *parsed);
1705+
}
16821706
}
16831707
r = 0; /* success */
16841708
done:
16851709
return r;
16861710
}
16871711

1712+
PARSER_Construct(QuotedString)
1713+
{
1714+
int r = 0;
1715+
struct data_QuotedString *data = (struct data_QuotedString*) calloc(1, sizeof(struct data_QuotedString));
1716+
1717+
if(json == NULL)
1718+
goto done;
1719+
1720+
struct json_object_iterator it = json_object_iter_begin(json);
1721+
struct json_object_iterator itEnd = json_object_iter_end(json);
1722+
while (!json_object_iter_equal(&it, &itEnd)) {
1723+
const char *key = json_object_iter_peek_name(&it);
1724+
struct json_object *const val = json_object_iter_peek_value(&it);
1725+
if(!strcasecmp(key, "option.quotesOptional")) {
1726+
data->quotesOptional = json_object_get_boolean(val);
1727+
} else if(!strcasecmp(key, "option.dashIsEmpty")) {
1728+
data->dashIsEmpty = json_object_get_boolean(val);
1729+
} else if(!strcasecmp(key, "option.supportEscape")) {
1730+
data->supportEscape = json_object_get_boolean(val);
1731+
} else {
1732+
ln_errprintf(ctx, 0, "invalid param for QuotedString: %s",
1733+
json_object_to_json_string(val));
1734+
}
1735+
json_object_iter_next(&it);
1736+
}
1737+
1738+
done:
1739+
*pdata = data;
1740+
if(r != 0)
1741+
free(data);
1742+
return r;
1743+
}
1744+
PARSER_Destruct(QuotedString)
1745+
{
1746+
free(pdata);
1747+
}
1748+
1749+
16881750

16891751
/**
16901752
* Parse an ISO date, that is YYYY-MM-DD (exactly this format).

src/parser.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* liblognorm - a fast samples-based log normalization library
3-
* Copyright 2010-2015 by Rainer Gerhards and Adiscon GmbH.
3+
* Copyright 2010-2021 by Rainer Gerhards and Adiscon GmbH.
44
*
55
* Modified by Pavel Levshin ([email protected]) in 2013
66
*
@@ -63,7 +63,7 @@ PARSERDEF(Repeat);
6363
PARSERDEF(String);
6464
PARSERDEF_NO_DATA(Rest);
6565
PARSERDEF_NO_DATA(OpQuotedString);
66-
PARSERDEF_NO_DATA(QuotedString);
66+
PARSERDEF(QuotedString);
6767
PARSERDEF_NO_DATA(ISODate);
6868
PARSERDEF_NO_DATA(Time12hr);
6969
PARSERDEF_NO_DATA(Time24hr);

src/pdag.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* @brief Implementation of the parse dag object.
44
* @class ln_pdag pdag.h
55
*//*
6-
* Copyright 2015 by Rainer Gerhards and Adiscon GmbH.
6+
* Copyright 2015-2021 by Rainer Gerhards and Adiscon GmbH.
77
*
88
* Released under ASL 2.0.
99
*/
@@ -83,7 +83,7 @@ static struct ln_parser_info parser_lookup_table[] = {
8383
PARSER_ENTRY_NO_DATA("alpha", Alpha, 32),
8484
PARSER_ENTRY_NO_DATA("rest", Rest, 255),
8585
PARSER_ENTRY_NO_DATA("op-quoted-string", OpQuotedString, 64),
86-
PARSER_ENTRY_NO_DATA("quoted-string", QuotedString, 64),
86+
PARSER_ENTRY("quoted-string", QuotedString, 64),
8787
PARSER_ENTRY_NO_DATA("date-iso", ISODate, 8),
8888
PARSER_ENTRY_NO_DATA("time-24hr", Time24hr, 8),
8989
PARSER_ENTRY_NO_DATA("time-12hr", Time12hr, 8),

tests/Makefile.am

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ TESTS_SHELLSCRIPTS = \
6363
strict_prefix_actual_sample1.sh \
6464
strict_prefix_matching_1.sh \
6565
strict_prefix_matching_2.sh \
66+
quote-string-escape.sh \
67+
quote-string-dash-empty.sh \
68+
quote-string-quote-optional.sh \
6669
field_string.sh \
6770
field_string_perm_chars.sh \
6871
field_string_lazy_matching.sh \

tests/quote-string-dash-empty.sh

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/bin/bash
2+
# added 2021-06-07 by Rainer Gerhards
3+
# This file is part of the liblognorm project, released under ASL 2.0
4+
. $srcdir/exec.sh
5+
no_solaris10
6+
test_def $0 "quoted string with dash"
7+
8+
add_rule 'version=2'
9+
add_rule 'rule=:%
10+
{"type":"quoted-string", "name":"str", "option.dashIsEmpty":True}
11+
%'
12+
13+
execute '"-"'
14+
assert_output_json_eq '{ "str": ""}'
15+
16+
reset_rules
17+
add_rule 'version=2'
18+
add_rule 'rule=:%
19+
{"type":"quoted-string", "name":"str"}
20+
%'
21+
22+
execute '"-"'
23+
assert_output_json_eq '{ "str": "\"-\""}'
24+
25+
26+
cleanup_tmp_files
27+

tests/quote-string-escape.sh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/bin/bash
2+
# added 2021-05-15 by Rainer Gerhards
3+
# This file is part of the liblognorm project, released under ASL 2.0
4+
. $srcdir/exec.sh
5+
no_solaris10
6+
7+
test_def $0 "quoted string with escapes"
8+
add_rule 'version=2'
9+
add_rule 'rule=:%
10+
{"type":"quoted-string", "name":"str", "option.supportEscape":True}
11+
%'
12+
13+
execute '"word1\"word2"'
14+
assert_output_json_eq '{ "str": "\"word1\\\"word2\""}'
15+
16+
17+
cleanup_tmp_files
18+
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#!/bin/bash
2+
# added 2021-06-07 by Rainer Gerhards
3+
# This file is part of the liblognorm project, released under ASL 2.0
4+
. $srcdir/exec.sh
5+
no_solaris10
6+
test_def $0 "quoted string with quotesOptional"
7+
8+
add_rule 'version=2'
9+
add_rule 'rule=:%
10+
{"type":"quoted-string", "name":"str", "option.quotesOptional":True}
11+
%'
12+
13+
execute '"line 1"'
14+
assert_output_json_eq '{ "str": "\"line 1\""}'
15+
16+
execute 'line2'
17+
assert_output_json_eq '{ "str": "line2"}'
18+
19+
20+
cleanup_tmp_files

0 commit comments

Comments
 (0)