@@ -82,11 +82,7 @@ function Base.read!(io::IO, model::Model{T}) where {T}
8282 " No file contents are allowed after `end`." ,
8383 )
8484 else
85- _throw_parse_error (
86- state,
87- token,
88- " Parsing this section is not supported by the current reader." ,
89- )
85+ _expect (state, token, _TOKEN_KEYWORD)
9086 end
9187 end
9288 # if keyword != :END
@@ -190,8 +186,8 @@ This dictionary makes `_TokenKind` to a string that is used when printing error
190186messages. The string must complete the sentence "We expected this token to be ".
191187"""
192188const _KIND_TO_MSG = Dict {_TokenKind,String} (
193- _TOKEN_KEYWORD => " a keyword" ,
194- _TOKEN_IDENTIFIER => " a variable name " ,
189+ _TOKEN_KEYWORD => " a keyword defining a new section " ,
190+ _TOKEN_IDENTIFIER => " an identifier " ,
195191 _TOKEN_NUMBER => " a number" ,
196192 _TOKEN_ADDITION => " the symbol `+`" ,
197193 _TOKEN_SUBTRACTION => " the symbol `-`" ,
@@ -206,7 +202,7 @@ const _KIND_TO_MSG = Dict{_TokenKind,String}(
206202 _TOKEN_COLON => " the symbol `:`" ,
207203 _TOKEN_IMPLIES => " the symbol `->`" ,
208204 _TOKEN_NEWLINE => " a new line" ,
209- _TOKEN_UNKNOWN => " some unknown symbol " ,
205+ _TOKEN_UNKNOWN => " a token " ,
210206)
211207
212208"""
@@ -296,27 +292,66 @@ struct ParseError <: Exception
296292 msg:: String
297293end
298294
295+ _is_utf8_start (b:: UInt8 ) = b < 0x80 || (0xC0 <= b <= 0xF7 )
296+
297+ function _get_line_about_pos (io:: IO , pos:: Int , width:: Int )
298+ seek (io, max (0 , pos - width))
299+ # This byte might be an invalid or continuation byte. We need to seek
300+ # forward until we reach a new valid byte.
301+ while ! _is_utf8_start (peek (io, UInt8))
302+ read (io, UInt8)
303+ end
304+ char = Char[]
305+ mark = 0
306+ while ! eof (io) && position (io) <= pos + width
307+ c = read (io, Char)
308+ if c == ' \n '
309+ if position (io) < pos
310+ empty! (char)
311+ else
312+ break
313+ end
314+ elseif c != ' \r '
315+ push! (char, c)
316+ end
317+ if position (io) == pos
318+ mark = length (char)
319+ end
320+ end
321+ if mark == 0
322+ mark = length (char) + 1
323+ end
324+ return String (char), mark
325+ end
326+
299327function _throw_parse_error (state:: _LexerState , token:: _Token , msg:: String )
300- offset = min (40 , token. pos)
301- seek (state. io, token. pos - offset)
302- line = String (read (state. io, 2 * offset))
303- i = something (findprev (' \n ' , line, offset- 1 ), 0 )
304- j = something (findnext (' \n ' , line, offset), length (line) + 1 )
305- extract = replace (line[(i+ 1 ): (j- 1 )], " \r " => " " )
306- help = string (extract, " \n " , " " ^ (offset - i + - 1 ), " ^\n " , msg)
328+ line, mark = _get_line_about_pos (state. io, token. pos, 40 )
329+ help = string (
330+ line,
331+ " \n " ,
332+ " " ^ (mark - 1 ),
333+ " ^\n Got " ,
334+ _KIND_TO_MSG[token. kind],
335+ _with_value (token. value),
336+ " . " ,
337+ msg,
338+ )
307339 return throw (ParseError (state. line, help))
308340end
309341
310342function Base. showerror (io:: IO , err:: ParseError )
311343 return print (io, " Error parsing LP file on line $(err. line) :\n " , err. msg)
312344end
313345
346+ _with_value (:: Nothing ) = " "
347+ _with_value (x:: String ) = string (" with value `" , x, " `" )
348+
314349function _expect (state:: _LexerState , token:: _Token , kind:: _TokenKind )
315350 if token. kind != kind
316351 _throw_parse_error (
317352 state,
318353 token,
319- string ( " We expected this token to be " , _KIND_TO_MSG[kind]),
354+ " We expected this token to be $( _KIND_TO_MSG[kind]). " ,
320355 )
321356 end
322357 return token
@@ -346,6 +381,9 @@ function Base.read(state::_LexerState, ::Type{_Token})
346381 end
347382 popfirst! (state. peek_tokens)
348383 state. current_token = token
384+ if token. kind == _TOKEN_NEWLINE
385+ state. line += 1
386+ end
349387 return token
350388end
351389
@@ -445,7 +483,6 @@ function _peek_inner(state::_LexerState)
445483 while (c = peek (state, Char)) != = nothing
446484 pos = position (state. io)
447485 if c == ' \n '
448- state. line += 1
449486 _ = read (state, Char)
450487 return _Token (_TOKEN_NEWLINE, nothing , pos)
451488 elseif isspace (c) # Whitespace
@@ -480,11 +517,17 @@ function _peek_inner(state::_LexerState)
480517 _ = read (state, Char) # Allow <=, >=, and ==
481518 end
482519 return _Token (op, nothing , pos)
520+ elseif _is_identifier (c) && ! _is_starting_identifier (c)
521+ _throw_parse_error (
522+ state,
523+ _Token (_TOKEN_UNKNOWN, " $c " , pos),
524+ " This character is not supported at the start of an identifier." ,
525+ )
483526 else
484527 _throw_parse_error (
485528 state,
486529 _Token (_TOKEN_UNKNOWN, " $c " , pos),
487- " This character is not supported an LP file." ,
530+ " This character is not supported in an LP file." ,
488531 )
489532 end
490533 end
@@ -574,7 +617,11 @@ function _parse_number(state::_LexerState, cache::_ReadCache{T})::T where {T}
574617 _expect (state, token, _TOKEN_NUMBER)
575618 ret = tryparse (T, token. value)
576619 if ret === nothing
577- _throw_parse_error (state, token, " We expected this to be a number." )
620+ _throw_parse_error (
621+ state,
622+ _Token (_TOKEN_IDENTIFIER, token. value, token. pos),
623+ " We were unable to parse this as a number." ,
624+ )
578625 end
579626 return ret
580627end
@@ -751,7 +798,7 @@ function _parse_term(
751798 return _throw_parse_error (
752799 state,
753800 token,
754- " Got $(_KIND_TO_MSG[token . kind]) , but we expected this to be a new term in the expression." ,
801+ " We expected this to be a new term in the expression." ,
755802 )
756803end
757804
@@ -815,9 +862,7 @@ function _parse_set_suffix(state, cache)
815862 p = read (state, _Token)
816863 if _compare_case_insenstive (p, " free" )
817864 return nothing
818- end
819- _skip_newlines (state)
820- if p. kind == _TOKEN_GREATER_THAN
865+ elseif p. kind == _TOKEN_GREATER_THAN
821866 rhs = _parse_number (state, cache)
822867 return MOI. GreaterThan (rhs)
823868 elseif p. kind == _TOKEN_LESS_THAN
@@ -830,7 +875,7 @@ function _parse_set_suffix(state, cache)
830875 _throw_parse_error (
831876 state,
832877 p,
833- " We expected this to be an inequality like `>=`, `<=` , or `==`." ,
878+ " We expected this to be an inequality like `>=`, `<=`, or `==`." ,
834879 )
835880 end
836881end
@@ -856,7 +901,7 @@ function _parse_set_prefix(state, cache)
856901 _throw_parse_error (
857902 state,
858903 p,
859- " We expected this to be an inequality like `>=`, `<=` , or `==`." ,
904+ " We expected this to be an inequality like `>=`, `<=`, or `==`." ,
860905 )
861906 end
862907end
0 commit comments