Skip to content

Commit d6931fd

Browse files
committed
Fix dstr unparsing
* This is an entirely new approach. * Instead to find the "correct" dstr segments we simply try all and unparse the first one that round trips. * This so far guarantees we always get good concrete syntax, but it can be time intensive as the combinatoric space of possible dynamic string sequence is quadratic with the dstr children size. * For this reason we try above (currently) dstr children to unparse as heredoc first. * Passes the entire corpus and fixes bugs. [fix #249]
1 parent 2163503 commit d6931fd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+1274
-844
lines changed

Gemfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,6 @@
22

33
source 'https://rubygems.org'
44

5+
gem 'mutant', path: '../mutant'
6+
57
gemspec

Gemfile.lock

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
PATH
2+
remote: ../mutant
3+
specs:
4+
mutant (0.12.4)
5+
diff-lcs (~> 1.3)
6+
parser (~> 3.3.0)
7+
regexp_parser (~> 2.9.0)
8+
sorbet-runtime (~> 0.5.0)
9+
unparser (~> 0.6.14)
10+
111
PATH
212
remote: .
313
specs:
@@ -12,14 +22,8 @@ GEM
1222
diff-lcs (1.5.1)
1323
json (2.7.2)
1424
language_server-protocol (3.17.0.3)
15-
mutant (0.12.3)
16-
diff-lcs (~> 1.3)
17-
parser (~> 3.3.0)
18-
regexp_parser (~> 2.9.0)
19-
sorbet-runtime (~> 0.5.0)
20-
unparser (~> 0.6.14)
21-
mutant-rspec (0.12.3)
22-
mutant (= 0.12.3)
25+
mutant-rspec (0.12.4)
26+
mutant (= 0.12.4)
2327
rspec-core (>= 3.8.0, < 4.0.0)
2428
parallel (1.25.1)
2529
parser (3.3.2.0)
@@ -70,7 +74,7 @@ PLATFORMS
7074
ruby
7175

7276
DEPENDENCIES
73-
mutant (~> 0.12.2)
77+
mutant!
7478
mutant-rspec (~> 0.12.2)
7579
rspec (~> 3.9)
7680
rspec-core (~> 3.9)

bin/corpus

Lines changed: 93 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
#!/usr/bin/env ruby
22
# frozen_string_literal: true
33

4+
require 'etc'
45
require 'mutant'
56
require 'optparse'
7+
require 'pathname'
68
require 'unparser'
79

10+
Thread.abort_on_exception = true
11+
Thread.report_on_exception = true
12+
813
module Unparser
914
module Corpus
1015
ROOT = Pathname.new(__dir__).parent
@@ -17,16 +22,85 @@ module Unparser
1722
#
1823
# @return [Boolean]
1924
def verify
25+
puts("Verifiying: #{name}")
2026
checkout
21-
command = %W[unparser #{repo_path}]
22-
exclude.each do |name|
23-
command.push('--ignore', repo_path.join(name).to_s)
27+
28+
paths = Pathname.glob(Pathname.new(repo_path).join('**/*.rb'))
29+
30+
driver = Mutant::Parallel.async(
31+
config: Mutant::Parallel::Config.new(
32+
block: method(:verify_path),
33+
jobs: Etc.nprocessors,
34+
on_process_start: ->(*) {},
35+
process_name: 'unparser-corpus-test',
36+
sink: Sink.new,
37+
source: Mutant::Parallel::Source::Array.new(jobs: paths),
38+
thread_name: 'unparser-corpus-test',
39+
timeout: nil
40+
),
41+
world: Mutant::WORLD
42+
)
43+
44+
loop do
45+
status = driver.wait_timeout(1)
46+
47+
puts("Processed: #{status.payload.total}")
48+
49+
status.payload.errors.each do |report|
50+
puts report
51+
fail
52+
end
53+
54+
break if status.done?
2455
end
25-
Kernel.system(*command)
56+
57+
true
2658
end
2759

2860
private
2961

62+
class Sink
63+
include Mutant::Parallel::Sink
64+
65+
attr_reader :errors, :total
66+
67+
def initialize
68+
@errors = []
69+
@total = 0
70+
end
71+
72+
def stop?
73+
!@errors.empty?
74+
end
75+
76+
def status
77+
self
78+
end
79+
80+
def response(response)
81+
if response.error
82+
Mutant::WORLD.stderr.puts(response.log)
83+
fail response.error
84+
end
85+
86+
@total += 1
87+
88+
if response.result
89+
@errors << response.result
90+
end
91+
end
92+
end
93+
94+
def verify_path(path)
95+
validation = Validation.from_path(path)
96+
97+
if original_syntax_error?(validation) || generated_encoding_error?(validation) || validation.success?
98+
return
99+
end
100+
101+
validation.report
102+
end
103+
30104
def checkout
31105
TMP.mkdir unless TMP.directory?
32106

@@ -50,6 +124,21 @@ module Unparser
50124
TMP.join(name)
51125
end
52126

127+
private
128+
129+
# This happens if the original source contained a non UTF charset meta comment.
130+
# These are not exposed to the AST in a way unparser could know about to generate a non UTF-8
131+
# target and emit that meta comment itself.
132+
# For the purpose of corpus testing these cases are ignored.
133+
def generated_encoding_error?(validation)
134+
exception = validation.generated_node.from_left { return false }
135+
exception.instance_of?(Parser::SyntaxError) && exception.message.eql?('literal contains escape sequences incompatible with UTF-8')
136+
end
137+
138+
def original_syntax_error?(validation)
139+
validation.original_node.from_left { return false }.instance_of?(Parser::SyntaxError)
140+
end
141+
53142
def system(arguments)
54143
return if Kernel.system(*arguments)
55144

bin/parser-round-trip-test

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ class Test
4040
:rubies
4141
)
4242

43-
EXPECT_FAILURE = {}.freeze
43+
STATIC_LOCAL_VARIABLES = %i[foo bar baz].freeze
44+
EXPECT_FAILURE = {}.freeze
4445

4546
def legacy_attributes
4647
default_builder_attributes.reject do |attribute_name, value|
@@ -77,9 +78,9 @@ class Test
7778

7879
# rubocop:disable Metrics/AbcSize
7980
def validation
80-
identification = name.to_s
81+
identification = name.to_s
8182

82-
generated_source = Unparser.unparse_either(node)
83+
generated_source = Unparser.unparse_either(node, static_local_variables: STATIC_LOCAL_VARIABLES)
8384
.fmap { |string| string.dup.force_encoding(parser_source.encoding).freeze }
8485

8586
generated_node = generated_source.bind do |source|
@@ -99,7 +100,7 @@ class Test
99100

100101
def parser
101102
Unparser.parser.tap do |parser|
102-
%w[foo bar baz].each(&parser.static_env.method(:declare))
103+
STATIC_LOCAL_VARIABLES.each(&parser.static_env.method(:declare))
103104
end
104105
end
105106

lib/unparser.rb

Lines changed: 81 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ def initialize
3030
end
3131
end
3232

33+
# Parsed AST with additional details
34+
class AST
35+
include Anima.new(:comments, :explicit_encoding, :node, :static_local_variables)
36+
end
37+
3338
EMPTY_STRING = ''.freeze
3439
EMPTY_ARRAY = [].freeze
3540

@@ -44,39 +49,85 @@ def initialize(message, node)
4449
@node = node
4550
freeze
4651
end
47-
end
52+
end # InvalidNodeError
53+
54+
# Error raised when unparser encounders AST it cannot generate source for that would parse to the same AST.
55+
class UnsupportedNodeError < RuntimeError
56+
end # UnsupportedNodeError
4857

4958
# Unparse an AST (and, optionally, comments) into a string
5059
#
5160
# @param [Parser::AST::Node, nil] node
52-
# @param [Array] comment_array
61+
# @param [Array] comments
62+
# @param [Encoding, nil] explicit_encoding
63+
# @param [Set<Symbol>] static_local_variables
5364
#
5465
# @return [String]
5566
#
5667
# @raise InvalidNodeError
5768
# if the node passed is invalid
5869
#
5970
# @api public
60-
def self.unparse(node, comment_array = [])
61-
return '' if node.nil?
71+
def self.unparse(node, comments: EMPTY_ARRAY, explicit_encoding: nil, static_local_variables: Set.new)
72+
unparse_ast(
73+
AST.new(
74+
comments: comments,
75+
explicit_encoding: explicit_encoding,
76+
node: node,
77+
static_local_variables: static_local_variables
78+
)
79+
)
80+
end
81+
82+
# Unparse an AST
83+
#
84+
# @param [AST] ast
85+
#
86+
# @return [String]
87+
#
88+
# @raise InvalidNodeError
89+
# if the node passed is invalid
90+
#
91+
# @raise UnsupportedNodeError
92+
# if the node passed is valid but unparser cannot unparse it
93+
#
94+
# @api public
95+
def self.unparse_ast(ast)
96+
return EMPTY_STRING if ast.node.nil?
97+
98+
local_variable_scope = AST::LocalVariableScope.new(
99+
node: ast.node,
100+
static_local_variables: ast.static_local_variables
101+
)
62102

63103
Buffer.new.tap do |buffer|
64104
Emitter::Root.new(
65-
buffer,
66-
node,
67-
Comments.new(comment_array)
105+
buffer: buffer,
106+
comments: Comments.new(ast.comments),
107+
explicit_encoding: ast.explicit_encoding,
108+
local_variable_scope: local_variable_scope,
109+
node: ast.node
68110
).write_to_buffer
69111
end.content
70112
end
71113

114+
# Unparse AST either
115+
#
116+
# @param [AST] ast
117+
#
118+
# @return [Either<Exception,String>]
119+
def self.unparse_ast_either(ast)
120+
Either.wrap_error(Exception) { unparse_ast(ast) }
121+
end
122+
72123
# Unparse with validation
73124
#
74125
# @param [Parser::AST::Node, nil] node
75-
# @param [Array] comment_array
126+
# @param [Array] comments
76127
#
77128
# @return [Either<Validation,String>]
78-
def self.unparse_validate(node, comment_array = [])
79-
generated = unparse(node, comment_array)
129+
def self.unparse_validate(node, comments: EMPTY_ARRAY)
130+
generated = unparse(node, comments)
80131
validation = Validation.from_string(generated)
81132

82133
if validation.success?
@@ -86,44 +137,41 @@ def self.unparse_validate(node, comment_array = [])
86137
end
87138
end
88139

89-
# Unparse capturing errors
90-
#
91-
# This is mostly useful for writing testing tools against unparser.
92-
#
93-
# @param [Parser::AST::Node, nil] node
94-
#
95-
# @return [Either<Exception, String>]
96-
def self.unparse_either(node)
97-
Either.wrap_error(Exception) { unparse(node) }
98-
end
99-
100140
# Parse string into AST
101141
#
102142
# @param [String] source
103143
#
104144
# @return [Parser::AST::Node, nil]
105-
def self.parse(source)
106-
parser.parse(buffer(source))
145+
def self.parse(source, identification: '(string)')
146+
parse_ast(source, identification: identification).node
107147
end
108148

109149
# Parse string into either syntax error or AST
110150
#
111151
# @param [String] source
112152
#
113-
# @return [Either<Parser::SyntaxError, (Parser::ASTNode, nil)>]
114-
def self.parse_either(source)
115-
Either.wrap_error(Parser::SyntaxError) do
116-
parser.parse(buffer(source))
153+
# @return [Either<Exception, (Parser::ASTNode, nil)>]
154+
def self.parse_ast_either(source, identification: '(source)')
155+
Either.wrap_error(Exception) do
156+
parse_ast(source, identification: identification)
117157
end
118158
end
119159

120-
# Parse string into AST, with comments
160+
# Parse source with ast details
121161
#
122162
# @param [String] source
123163
#
124-
# @return [Parser::AST::Node]
125-
def self.parse_with_comments(source)
126-
parser.parse_with_comments(buffer(source))
164+
# @return [AST]
165+
def self.parse_ast(source, identification: '(source)', static_local_variables: Set.new)
166+
explicit_encoding = Parser::Source::Buffer.recognize_encoding(source.dup.force_encoding(Encoding::BINARY))
167+
node, comments = parser.parse_with_comments(buffer(source, identification: identification))
168+
169+
AST.new(
170+
comments: comments,
171+
explicit_encoding: explicit_encoding,
172+
node: node,
173+
static_local_variables: static_local_variables
174+
)
127175
end
128176

129177
# Parser instance that produces AST unparser understands
@@ -210,6 +258,7 @@ def self.buffer(source, identification = '(string)')
210258
require 'unparser/emitter/root'
211259
require 'unparser/emitter/send'
212260
require 'unparser/emitter/simple'
261+
require 'unparser/emitter/string'
213262
require 'unparser/emitter/splat'
214263
require 'unparser/emitter/super'
215264
require 'unparser/emitter/undef'
@@ -224,6 +273,7 @@ def self.buffer(source, identification = '(string)')
224273
require 'unparser/writer'
225274
require 'unparser/writer/binary'
226275
require 'unparser/writer/dynamic_string'
276+
require 'unparser/writer/regexp'
227277
require 'unparser/writer/resbody'
228278
require 'unparser/writer/rescue'
229279
require 'unparser/writer/send'

0 commit comments

Comments
 (0)