Skip to content

Commit 8f62857

Browse files
committed
implement new "dashIsEmpty" parameter for string parser
1 parent 2053745 commit 8f62857

File tree

4 files changed

+50
-1
lines changed

4 files changed

+50
-1
lines changed

doc/configuration.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,16 @@ it becomes resolvable, and ``f`` will contain "12" and ``r`` will contain ":34 5
682682
This also shows the risk associated, as the result obtained may not necessarily be
683683
what was intended.
684684

685+
option.dashIsEmpty
686+
~~~~~~~~~~~~~~~~~~
687+
688+
This parameter, if True, permits to treat string consisting only of dash characters ('-') as
689+
being empty. The default value is False.
690+
691+
This parameter is meant for e.g. processing web log data where a dash indicates a missing
692+
value but the user does not populate an analysis backend with dashes where "empty value" is
693+
meant.
694+
685695

686696
word
687697
####

src/parser.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* liblognorm - a fast samples-based log normalization library
3-
* Copyright 2010-2018 by Rainer Gerhards and Adiscon GmbH.
3+
* Copyright 2010-2021 by Rainer Gerhards and Adiscon GmbH.
44
*
55
* Modified by Pavel Levshin ([email protected]) in 2013
66
*
@@ -3204,6 +3204,7 @@ struct data_String {
32043204
unsigned esc_md : 2;
32053205
} flags;
32063206
enum { ST_MATCH_EXACT = 0, ST_MATCH_LAZY = 1} matching;
3207+
int dashIsEmpty;
32073208
char qchar_begin;
32083209
char qchar_end;
32093210
char perm_chars[256]; // TODO: make this bit-wise, so we need only 32 bytes
@@ -3373,6 +3374,14 @@ PARSER_Parse(String)
33733374
if(value != NULL) {
33743375
size_t strt;
33753376
size_t len;
3377+
if(data->dashIsEmpty) {
3378+
if( (bHaveQuotes && *parsed == 3 && !strncmp(npb->str+(*offs), "\"-\"", 3))
3379+
|| (!bHaveQuotes && *parsed == 1 && npb->str[*offs] == '-') ) {
3380+
*value = json_object_new_string_len("", 0);
3381+
r = 0;
3382+
goto done; /* shortcut exit */
3383+
}
3384+
}
33763385
if(bHaveQuotes && data->flags.strip_quotes) {
33773386
strt = *offs + 1;
33783387
len = *parsed - 2; /* del begin AND end quote! */
@@ -3498,6 +3507,8 @@ PARSER_Construct(String)
34983507
r = LN_BADCONFIG;
34993508
goto done;
35003509
}
3510+
} else if(!strcasecmp(key, "option.dashIsEmpty")) {
3511+
data->dashIsEmpty = json_object_get_boolean(val);
35013512
} else {
35023513
ln_errprintf(ctx, 0, "invalid param for hexnumber: %s",
35033514
json_object_to_json_string(val));

tests/Makefile.am

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ TESTS_SHELLSCRIPTS = \
6767
field_string_perm_chars.sh \
6868
field_string_lazy_matching.sh \
6969
field_string_doc_sample_lazy.sh \
70+
field_string_dashIsEmpty.sh \
7071
field_number.sh \
7172
field_number-fmt_number.sh \
7273
field_number_maxval.sh \

tests/field_string_dashIsEmpty.sh

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/bin/bash
2+
# added 2021-06-07 by Rainer Gerhards
3+
# This file is part of the liblognorm project, released under ASL 2.0
4+
. $srcdir/exec.sh
5+
no_solaris10
6+
test_def $0 "quoted string with dash"
7+
8+
add_rule 'version=2'
9+
add_rule 'rule=:%
10+
{"type":"string", "name":"str", "option.dashIsEmpty":True}
11+
%'
12+
13+
execute '"-"'
14+
assert_output_json_eq '{ "str": ""}'
15+
16+
reset_rules
17+
add_rule 'version=2'
18+
add_rule 'rule=:%
19+
{"type":"quoted-string", "name":"str"}
20+
%'
21+
22+
execute '"-"'
23+
assert_output_json_eq '{ "str": "\"-\""}'
24+
25+
26+
cleanup_tmp_files
27+

0 commit comments

Comments
 (0)