Skip to content

Commit e750d02

Browse files
authored
Merge pull request #2575 from haxtibal/tdmg/fix_lark_newline
fix(backend/sdoc_source_code): let lark scan CR to NEWLINE, not to NODE_STRING_VALUE
2 parents 2d2f4e9 + 066f5e2 commit e750d02

File tree

2 files changed

+42
-2
lines changed

2 files changed

+42
-2
lines changed

strictdoc/backend/sdoc_source_code/comment_parser/marker_lexer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class GrammarTemplate(Template):
2424
node_name: /##CUSTOM_TAGS/
2525
node_multiline_value: (_WS_INLINE? | (_WS_INLINE NODE_STRING_VALUE)) NEWLINE (NODE_STRING_VALUE NEWLINE)*
2626
27-
NODE_STRING_VALUE.2: /(?![ ]*##RELATION_MARKER_START)(?!\\s*(##CUSTOM_TAGS):\\s)(?!\\s*##NODE_FIELD_END_MARKER)[^\n\r]+/x
27+
NODE_STRING_VALUE.2: /(?![ ]*##RELATION_MARKER_START)(?!\\s*(##CUSTOM_TAGS):\\s)(?!\\s*##NODE_FIELD_END_MARKER)[^\\n\\r]+/
2828
2929
_NORMAL_STRING_NO_MARKER_NO_NODE: /(?!\\s*##RELATION_MARKER_START)((?!\\s*(##CUSTOM_TAGS):\\s)|(##RESERVED_KEYWORDS)).+/
3030
""")

tests/unit/strictdoc/backend/sdoc_source_code/test_marker_lexer.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from typing import Any, List, Optional
66

7-
from lark import Tree
7+
from lark import Token, Tree
88

99
from strictdoc.backend.sdoc_source_code.comment_parser.marker_lexer import (
1010
MarkerLexer,
@@ -489,6 +489,46 @@ def test_34_node_text_starting_below() -> None:
489489
)
490490

491491

492+
def test_35a_node_value_newline_lf() -> None:
493+
"""Verify that LF goes into a separate NEWLINE token."""
494+
input_string = "FIELD: value1\nvalue2\n"
495+
tree = MarkerLexer.parse(input_string, custom_tags={"FIELD"})
496+
497+
node_fields = list(tree.find_data("node_field"))
498+
499+
assert_node_field(
500+
node_fields[0],
501+
"FIELD",
502+
[
503+
Token("NODE_STRING_VALUE", "value1"),
504+
Token("NEWLINE", "\n"),
505+
Token("NODE_STRING_VALUE", "value2"),
506+
Token("NEWLINE", "\n"),
507+
],
508+
)
509+
510+
511+
def test_35b_node_value_newline_crlf() -> None:
512+
"""Verify that CR LF goes into a separate NEWLINE token."""
513+
input_string = "FIELD: value1\r\nvalue2\r\n"
514+
tree = MarkerLexer.parse(input_string, custom_tags={"FIELD"})
515+
516+
node_fields = list(tree.find_data("node_field"))
517+
518+
assert_node_field(
519+
node_fields[0],
520+
"FIELD",
521+
[
522+
Token("NODE_STRING_VALUE", "value1"),
523+
Token("NEWLINE", "\r\n"),
524+
Token("NODE_STRING_VALUE", "value2"),
525+
# The implicit \r\n => \n conversion at EOF is not nice, but doesn't hurt (yet).
526+
# We need to improve EOF handling in lark grammar to get rid of it.
527+
Token("NEWLINE", "\n"),
528+
],
529+
)
530+
531+
492532
def test_60_exclude_reserved_keywords() -> None:
493533
input_string = """
494534
FIXME: This can likely replace _weak below with no problem.

0 commit comments

Comments
 (0)