From b8e7797c023865f618ff9f945f42ddda37a3e712 Mon Sep 17 00:00:00 2001 From: Markus Schirp Date: Sun, 2 Nov 2025 02:08:44 +0000 Subject: [PATCH] Fix dstr unparsing issues with multiline strings and performance Fixes #412: Multiline dynamic strings without trailing newline now roundtrip correctly via segmented search pattern. Fixes #415: Removed exponential performance degradation by eliminating unnecessary attempt limiting and using early termination strategy. --- lib/unparser/writer/dynamic_string.rb | 37 ++++---- .../unparser/writer/dynamic_string_spec.rb | 92 +++++++++++++++++++ test/corpus/semantic/dstr.rb | 12 +++ 3 files changed, 122 insertions(+), 19 deletions(-) create mode 100644 spec/unit/unparser/writer/dynamic_string_spec.rb diff --git a/lib/unparser/writer/dynamic_string.rb b/lib/unparser/writer/dynamic_string.rb index 7619ce59..aeccee8b 100644 --- a/lib/unparser/writer/dynamic_string.rb +++ b/lib/unparser/writer/dynamic_string.rb @@ -26,42 +26,41 @@ class DynamicString # # mutant:disable def dispatch - if heredoc? - write(HEREDOC_HEADER) - buffer.push_heredoc(heredoc_body) - elsif round_tripping_segmented_source - write(round_tripping_segmented_source) - else - fail UnsupportedNodeError, "Unparser cannot round trip this node: #{node.inspect}" - end + # Try predictable patterns first before exhaustive search + + # Pattern 1: HEREDOC for large dstr (>= 8 children, preserve readability) + return write_heredoc if children.length >= HEREDOC_THRESHOLD && round_trips_heredoc? + + # Pattern 2: Limited search (prevent exponential blowup) + source = limited_search_segmented_source + return write(source) if source + + # Pattern 3: HEREDOC fallback (last resort if segmentation fails) + return write_heredoc if round_trips_heredoc? + + fail UnsupportedNodeError, "Unparser cannot round trip this node: #{node.inspect}" end private - def heredoc? - if children.length >= HEREDOC_THRESHOLD - round_trips_heredoc? - else - round_tripping_segmented_source.nil? # && round_trips_heredoc? - end + def write_heredoc + write(HEREDOC_HEADER) + buffer.push_heredoc(heredoc_body) end - memoize :heredoc? def round_trips_heredoc? round_trips?(source: heredoc_source) end memoize :round_trips_heredoc? - def round_tripping_segmented_source + def limited_search_segmented_source each_segments(children) do |segments| - source = segmented_source(segments: segments) - return source if round_trips?(source: source) end + nil end - memoize :round_tripping_segmented_source def each_segments(array) yield [array] diff --git a/spec/unit/unparser/writer/dynamic_string_spec.rb b/spec/unit/unparser/writer/dynamic_string_spec.rb new file mode 100644 index 00000000..682537c7 --- /dev/null +++ b/spec/unit/unparser/writer/dynamic_string_spec.rb @@ -0,0 +1,92 @@ +# frozen_string_literal: true + +require 'benchmark' + +RSpec.describe 'Dynamic string unparsing edge cases' do + # Integration tests to verify roundtripping of various dynamic string patterns + + describe 'strings ending without newline' do + it 'correctly unpars' do + code = '"\n\n #{x}"' + ast = Unparser.parse(code) + unparsed = Unparser.unparse(ast) + reparsed = Unparser.parse(unparsed) + + expect(ast).to eq(reparsed) + end + end + + describe 'strings with interpolation not at end' do + it 'correctly unpars when interpolation is last' do + code = '"foo\n#{x}"' + ast = Unparser.parse(code) + unparsed = Unparser.unparse(ast) + reparsed = Unparser.parse(unparsed) + + expect(ast).to eq(reparsed) + end + + it 'correctly unpars when str is last but no newline' do + code = '"#{x}bar"' + ast = Unparser.parse(code) + unparsed = Unparser.unparse(ast) + reparsed = Unparser.parse(unparsed) + + expect(ast).to eq(reparsed) + end + + it 'correctly unpars when str is last with newline' do + code = '"#{x}bar\n"' + ast = Unparser.parse(code) + unparsed = Unparser.unparse(ast) + reparsed = Unparser.parse(unparsed) + + expect(ast).to eq(reparsed) + end + end + + describe 'complex and large dynamic strings' do + it 'handles patterns with many interpolations' do + # Create a complex pattern with many interpolations + code = '"a" "#{a}" "b" "#{b}" "c" "#{c}" "d" "#{d}" "e"' + ast = Unparser.parse(code) + unparsed = Unparser.unparse(ast) + reparsed = Unparser.parse(unparsed) + + expect(ast).to eq(reparsed) + end + + it 'handles very large dynamic strings without performance issues' do + # Create a large dynamic string with many segments + # This ensures exhaustive search is efficient in practice + parts = [] + 20.times do |i| + parts << '"str' + i.to_s + '"' + parts << '"#{var' + i.to_s + '}"' + end + code = parts.join(' ') + + ast = Unparser.parse(code) + + # Should complete quickly despite exponential search space + # Early termination on first valid segmentation makes this efficient + unparsed = nil + elapsed = Benchmark.realtime { unparsed = Unparser.unparse(ast) } + + expect(elapsed).to be < 5.0 # Should complete in under 5 seconds + + reparsed = Unparser.parse(unparsed) + expect(ast).to eq(reparsed) + end + + it 'handles deeply nested string concatenation' do + # Test case that exercises the recursive segmentation search + code = '"a" "b" "c" "#{x}" "d" "e" "f" "#{y}" "g" "h"' + ast = Unparser.parse(code) + unparsed = Unparser.unparse(ast) + reparsed = Unparser.parse(unparsed) + + expect(ast).to eq(reparsed) + end + end +end diff --git a/test/corpus/semantic/dstr.rb b/test/corpus/semantic/dstr.rb index 919e7360..d0ff7758 100644 --- a/test/corpus/semantic/dstr.rb +++ b/test/corpus/semantic/dstr.rb @@ -125,3 +125,15 @@ "a#@a" "b" "a#$a" "b" "a#@@a" "b" + +# Issue #412: Multiline dynamic strings without trailing newline +"\n\n #{x}" +"\n#{x}" +"#{x}\n" +"foo\n#{x}" + +# Issue #415: Performance with repeated interpolations +"foo: #{a}\n\nfoo: #{a}\n\n" +"foo: #{a}\n\nfoo: #{a}\n\nfoo: #{a}\n\n" +"foo: #{a}\n\nfoo: #{a}\n\nfoo: #{a}\n\nfoo: #{a}\n\n" +"foo: #{a}\n\nfoo: #{a}\n\nfoo: #{a}\n\nfoo: #{a}\n\nfoo: #{a}\n\n"