@@ -39,11 +39,10 @@ def map_lookback(
39
39
40
40
"""
41
41
results = [initial ]
42
- if len (items ) > 1 :
43
- for item in items [1 :]:
44
- result = func (results [- 1 ], item )
45
- results .append (result )
46
- return results
42
+ for item in items :
43
+ result = func (results [- 1 ], item )
44
+ results .append (result )
45
+ return results [1 :]
47
46
48
47
49
48
# ======================================================================================
@@ -59,6 +58,8 @@ def map_lookback(
59
58
class Syntax (Enum ):
60
59
"""Non-standard line types."""
61
60
61
+ CODE_BULLETED = "CODE_BULLETED"
62
+ CODE_NUMBERED = "CODE_NUMBERED"
62
63
LIST_BULLETED = "LIST_BULLETED"
63
64
LIST_NUMBERED = "LIST_NUMBERED"
64
65
START_MARKED = "START_MARKED"
@@ -74,11 +75,10 @@ def from_content(cls, content: str) -> Syntax | None:
74
75
75
76
"""
76
77
if match := RE_LIST_ITEM .fullmatch (content ):
77
- return (
78
- cls .LIST_NUMBERED
79
- if match ["bullet" ] not in {"-" , "*" }
80
- else cls .LIST_BULLETED
81
- )
78
+ is_numbered = match ["bullet" ] not in {"-" , "*" }
79
+ if match ["item" ].startswith ("```" ):
80
+ return cls .CODE_NUMBERED if is_numbered else cls .CODE_BULLETED
81
+ return cls .LIST_NUMBERED if is_numbered else cls .LIST_BULLETED
82
82
if any (content .startswith (f"{ marker } " ) for marker in MARKERS ):
83
83
return cls .START_MARKED
84
84
if content .startswith ("```" ):
@@ -88,6 +88,10 @@ def from_content(cls, content: str) -> Syntax | None:
88
88
return None
89
89
90
90
91
+ SYNTAX_CODE_LIST = {Syntax .CODE_BULLETED , Syntax .CODE_NUMBERED }
92
+ """The start of a code block, which is also the start of a list."""
93
+
94
+
91
95
class ParsedLine (NamedTuple ):
92
96
"""Parsed Line of text."""
93
97
@@ -114,7 +118,11 @@ def _is_parent_line(prev_line: LineResult, parsed: ParsedLine) -> bool:
114
118
115
119
def _is_peer_list_line (prev_line : LineResult , parsed : ParsedLine ) -> bool :
116
120
"""Return True if two list items share the same scope and level."""
117
- list_types = {Syntax .LIST_BULLETED , Syntax .LIST_NUMBERED }
121
+ list_types = {
122
+ * SYNTAX_CODE_LIST ,
123
+ Syntax .LIST_BULLETED ,
124
+ Syntax .LIST_NUMBERED ,
125
+ }
118
126
return (
119
127
parsed .syntax in list_types
120
128
and prev_line .parsed .syntax in list_types
@@ -204,7 +212,10 @@ class BlockIndent(NamedTuple):
204
212
def _parse_code_block (last : BlockIndent | None , line : LineResult ) -> BlockIndent | None :
205
213
"""Identify fenced or indented sections internally referred to as 'code blocks'."""
206
214
result = last
207
- if line .parsed .syntax == Syntax .EDGE_CODE :
215
+ if line .parsed .syntax in {
216
+ * SYNTAX_CODE_LIST ,
217
+ Syntax .EDGE_CODE ,
218
+ }:
208
219
# On first edge, start tracking a code block
209
220
# on the second edge, stop tracking
210
221
result = (
@@ -260,7 +271,11 @@ def _parse_semantic_indent(
260
271
# PLANNED: This works, but is very confusing
261
272
line , code_indent = tin
262
273
263
- if not line .parsed .content or code_indent is not None :
274
+ if (
275
+ not line .parsed .content
276
+ or code_indent is not None
277
+ or line .parsed .syntax in SYNTAX_CODE_LIST
278
+ ):
264
279
result = SemanticIndent .EMPTY
265
280
266
281
elif line .parsed .syntax == Syntax .LIST_BULLETED :
@@ -305,6 +320,12 @@ def _format_new_indent(line: LineResult, block_indent: BlockIndent | None) -> st
305
320
line_indent = line .parsed .indent ,
306
321
)
307
322
result = DEFAULT_INDENT * depth + extra_indent
323
+ elif line .parents and line .parents [- 1 ].syntax in SYNTAX_CODE_LIST :
324
+ depth = len (line .parents ) - 1
325
+ match = RE_LIST_ITEM .fullmatch (line .parents [- 1 ].content )
326
+ assert match # for pyright
327
+ extra_indent = " " * (len (match ["bullet" ]) + 1 )
328
+ result = DEFAULT_INDENT * depth + extra_indent
308
329
else :
309
330
result = DEFAULT_INDENT * len (line .parents )
310
331
return result
@@ -313,9 +334,9 @@ def _format_new_indent(line: LineResult, block_indent: BlockIndent | None) -> st
313
334
class ParsedText (NamedTuple ):
314
335
"""Intermediary result of parsing the text."""
315
336
316
- lines : list [LineResult ]
317
337
new_lines : list [tuple [str , str ]]
318
338
# Used only for debugging purposes
339
+ debug_original_lines : list [LineResult ]
319
340
debug_block_indents : list [BlockIndent | None ]
320
341
321
342
@@ -327,7 +348,7 @@ def _format_new_content(line: LineResult, inc_numbers: bool, is_code: bool) -> s
327
348
Syntax .LIST_NUMBERED ,
328
349
}:
329
350
list_match = RE_LIST_ITEM .fullmatch (line .parsed .content )
330
- assert list_match is not None # for pyright
351
+ assert list_match # for pyright
331
352
new_bullet = "-"
332
353
if line .parsed .syntax == Syntax .LIST_NUMBERED :
333
354
first_peer = (
@@ -341,6 +362,25 @@ def _format_new_content(line: LineResult, inc_numbers: bool, is_code: bool) -> s
341
362
return new_content
342
363
343
364
365
+ def _insert_newlines (
366
+ parsed_lines : list [LineResult ],
367
+ zipped_lines : list [tuple [str , str ]],
368
+ ) -> list [tuple [str , str ]]:
369
+ """Extend zipped_lines with newlines if necessary."""
370
+ newline = ("" , "" )
371
+ new_lines : list [tuple [str , str ]] = []
372
+ for line , zip_line in zip_equal (parsed_lines , zipped_lines ):
373
+ new_lines .append (zip_line )
374
+ if (
375
+ line .parsed .syntax == Syntax .EDGE_CODE
376
+ and line .parents
377
+ and line .parents [- 1 ].syntax in SYNTAX_CODE_LIST
378
+ ):
379
+ new_lines .append (newline )
380
+
381
+ return new_lines
382
+
383
+
344
384
def parse_text (* , text : str , inc_numbers : bool , use_sem_break : bool ) -> ParsedText :
345
385
"""Post-processor to normalize lists.
346
386
@@ -354,7 +394,7 @@ def parse_text(*, text: str, inc_numbers: bool, use_sem_break: bool) -> ParsedTe
354
394
code_indents = map_lookback (_parse_code_block , lines , None )
355
395
html_indents = [
356
396
# Any indents initiated from within a `code_block_indents` should be ignored
357
- indent if indent and code_indents [indent .start_line ] is None else None
397
+ indent if ( indent and code_indents [indent .start_line ] is None ) else None
358
398
for indent in map_lookback (_parse_html_line , lines , None )
359
399
]
360
400
# When both, code_indents take precedence
@@ -379,9 +419,10 @@ def parse_text(*, text: str, inc_numbers: bool, use_sem_break: bool) -> ParsedTe
379
419
),
380
420
]
381
421
422
+ new_lines = _insert_newlines (lines , [* zip_equal (new_indents , new_contents )])
382
423
return ParsedText (
383
- lines = lines ,
384
- new_lines = [ * zip_equal ( new_indents , new_contents )] ,
424
+ new_lines = new_lines ,
425
+ debug_original_lines = lines ,
385
426
debug_block_indents = block_indents ,
386
427
)
387
428
@@ -390,9 +431,9 @@ def parse_text(*, text: str, inc_numbers: bool, use_sem_break: bool) -> ParsedTe
390
431
# Outputs string result
391
432
392
433
393
- def _join (parsed_text : ParsedText ) -> str :
434
+ def _join (* , new_lines : list [ tuple [ str , str ]] ) -> str :
394
435
"""Join ParsedText into a single string representation."""
395
- new_indents , new_contents = unzip (parsed_text . new_lines )
436
+ new_indents , new_contents = unzip (new_lines )
396
437
397
438
new_indents_iter = new_indents
398
439
@@ -434,4 +475,4 @@ def normalize_list(
434
475
inc_numbers = inc_numbers ,
435
476
use_sem_break = check_if_align_semantic_breaks_in_lists (),
436
477
)
437
- return _join (parsed_text = parsed_text )
478
+ return _join (new_lines = parsed_text . new_lines )
0 commit comments