@@ -39,11 +39,10 @@ def map_lookback(
3939
4040 """
4141 results = [initial ]
42- if len (items ) > 1 :
43- for item in items [1 :]:
44- result = func (results [- 1 ], item )
45- results .append (result )
46- return results
42+ for item in items :
43+ result = func (results [- 1 ], item )
44+ results .append (result )
45+ return results [1 :]
4746
4847
4948# ======================================================================================
@@ -59,6 +58,8 @@ def map_lookback(
5958class Syntax (Enum ):
6059 """Non-standard line types."""
6160
61+ CODE_BULLETED = "CODE_BULLETED"
62+ CODE_NUMBERED = "CODE_NUMBERED"
6263 LIST_BULLETED = "LIST_BULLETED"
6364 LIST_NUMBERED = "LIST_NUMBERED"
6465 START_MARKED = "START_MARKED"
@@ -74,11 +75,10 @@ def from_content(cls, content: str) -> Syntax | None:
7475
7576 """
7677 if match := RE_LIST_ITEM .fullmatch (content ):
77- return (
78- cls .LIST_NUMBERED
79- if match ["bullet" ] not in {"-" , "*" }
80- else cls .LIST_BULLETED
81- )
78+ is_numbered = match ["bullet" ] not in {"-" , "*" }
79+ if match ["item" ].startswith ("```" ):
80+ return cls .CODE_NUMBERED if is_numbered else cls .CODE_BULLETED
81+ return cls .LIST_NUMBERED if is_numbered else cls .LIST_BULLETED
8282 if any (content .startswith (f"{ marker } " ) for marker in MARKERS ):
8383 return cls .START_MARKED
8484 if content .startswith ("```" ):
@@ -88,6 +88,10 @@ def from_content(cls, content: str) -> Syntax | None:
8888 return None
8989
9090
91+ SYNTAX_CODE_LIST = {Syntax .CODE_BULLETED , Syntax .CODE_NUMBERED }
92+ """The start of a code block, which is also the start of a list."""
93+
94+
9195class ParsedLine (NamedTuple ):
9296 """Parsed Line of text."""
9397
@@ -114,7 +118,11 @@ def _is_parent_line(prev_line: LineResult, parsed: ParsedLine) -> bool:
114118
115119def _is_peer_list_line (prev_line : LineResult , parsed : ParsedLine ) -> bool :
116120 """Return True if two list items share the same scope and level."""
117- list_types = {Syntax .LIST_BULLETED , Syntax .LIST_NUMBERED }
121+ list_types = {
122+ * SYNTAX_CODE_LIST ,
123+ Syntax .LIST_BULLETED ,
124+ Syntax .LIST_NUMBERED ,
125+ }
118126 return (
119127 parsed .syntax in list_types
120128 and prev_line .parsed .syntax in list_types
@@ -204,7 +212,10 @@ class BlockIndent(NamedTuple):
204212def _parse_code_block (last : BlockIndent | None , line : LineResult ) -> BlockIndent | None :
205213 """Identify fenced or indented sections internally referred to as 'code blocks'."""
206214 result = last
207- if line .parsed .syntax == Syntax .EDGE_CODE :
215+ if line .parsed .syntax in {
216+ * SYNTAX_CODE_LIST ,
217+ Syntax .EDGE_CODE ,
218+ }:
208219 # On first edge, start tracking a code block
209220 # on the second edge, stop tracking
210221 result = (
@@ -260,7 +271,11 @@ def _parse_semantic_indent(
260271 # PLANNED: This works, but is very confusing
261272 line , code_indent = tin
262273
263- if not line .parsed .content or code_indent is not None :
274+ if (
275+ not line .parsed .content
276+ or code_indent is not None
277+ or line .parsed .syntax in SYNTAX_CODE_LIST
278+ ):
264279 result = SemanticIndent .EMPTY
265280
266281 elif line .parsed .syntax == Syntax .LIST_BULLETED :
@@ -305,6 +320,12 @@ def _format_new_indent(line: LineResult, block_indent: BlockIndent | None) -> st
305320 line_indent = line .parsed .indent ,
306321 )
307322 result = DEFAULT_INDENT * depth + extra_indent
323+ elif line .parents and line .parents [- 1 ].syntax in SYNTAX_CODE_LIST :
324+ depth = len (line .parents ) - 1
325+ match = RE_LIST_ITEM .fullmatch (line .parents [- 1 ].content )
326+ assert match # for pyright
327+ extra_indent = " " * (len (match ["bullet" ]) + 1 )
328+ result = DEFAULT_INDENT * depth + extra_indent
308329 else :
309330 result = DEFAULT_INDENT * len (line .parents )
310331 return result
@@ -313,9 +334,9 @@ def _format_new_indent(line: LineResult, block_indent: BlockIndent | None) -> st
313334class ParsedText (NamedTuple ):
314335 """Intermediary result of parsing the text."""
315336
316- lines : list [LineResult ]
317337 new_lines : list [tuple [str , str ]]
318338 # Used only for debugging purposes
339+ debug_original_lines : list [LineResult ]
319340 debug_block_indents : list [BlockIndent | None ]
320341
321342
@@ -327,7 +348,7 @@ def _format_new_content(line: LineResult, inc_numbers: bool, is_code: bool) -> s
327348 Syntax .LIST_NUMBERED ,
328349 }:
329350 list_match = RE_LIST_ITEM .fullmatch (line .parsed .content )
330- assert list_match is not None # for pyright
351+ assert list_match # for pyright
331352 new_bullet = "-"
332353 if line .parsed .syntax == Syntax .LIST_NUMBERED :
333354 first_peer = (
@@ -341,6 +362,25 @@ def _format_new_content(line: LineResult, inc_numbers: bool, is_code: bool) -> s
341362 return new_content
342363
343364
365+ def _insert_newlines (
366+ parsed_lines : list [LineResult ],
367+ zipped_lines : list [tuple [str , str ]],
368+ ) -> list [tuple [str , str ]]:
369+ """Extend zipped_lines with newlines if necessary."""
370+ newline = ("" , "" )
371+ new_lines : list [tuple [str , str ]] = []
372+ for line , zip_line in zip_equal (parsed_lines , zipped_lines ):
373+ new_lines .append (zip_line )
374+ if (
375+ line .parsed .syntax == Syntax .EDGE_CODE
376+ and line .parents
377+ and line .parents [- 1 ].syntax in SYNTAX_CODE_LIST
378+ ):
379+ new_lines .append (newline )
380+
381+ return new_lines
382+
383+
344384def parse_text (* , text : str , inc_numbers : bool , use_sem_break : bool ) -> ParsedText :
345385 """Post-processor to normalize lists.
346386
@@ -354,7 +394,7 @@ def parse_text(*, text: str, inc_numbers: bool, use_sem_break: bool) -> ParsedTe
354394 code_indents = map_lookback (_parse_code_block , lines , None )
355395 html_indents = [
356396 # Any indents initiated from within a `code_block_indents` should be ignored
357- indent if indent and code_indents [indent .start_line ] is None else None
397+ indent if ( indent and code_indents [indent .start_line ] is None ) else None
358398 for indent in map_lookback (_parse_html_line , lines , None )
359399 ]
360400 # When both, code_indents take precedence
@@ -379,9 +419,10 @@ def parse_text(*, text: str, inc_numbers: bool, use_sem_break: bool) -> ParsedTe
379419 ),
380420 ]
381421
422+ new_lines = _insert_newlines (lines , [* zip_equal (new_indents , new_contents )])
382423 return ParsedText (
383- lines = lines ,
384- new_lines = [ * zip_equal ( new_indents , new_contents )] ,
424+ new_lines = new_lines ,
425+ debug_original_lines = lines ,
385426 debug_block_indents = block_indents ,
386427 )
387428
@@ -390,9 +431,9 @@ def parse_text(*, text: str, inc_numbers: bool, use_sem_break: bool) -> ParsedTe
390431# Outputs string result
391432
392433
393- def _join (parsed_text : ParsedText ) -> str :
434+ def _join (* , new_lines : list [ tuple [ str , str ]] ) -> str :
394435 """Join ParsedText into a single string representation."""
395- new_indents , new_contents = unzip (parsed_text . new_lines )
436+ new_indents , new_contents = unzip (new_lines )
396437
397438 new_indents_iter = new_indents
398439
@@ -434,4 +475,4 @@ def normalize_list(
434475 inc_numbers = inc_numbers ,
435476 use_sem_break = check_if_align_semantic_breaks_in_lists (),
436477 )
437- return _join (parsed_text = parsed_text )
478+ return _join (new_lines = parsed_text . new_lines )
0 commit comments