Skip to content

Commit d86eea1

Browse files
authored
Merge pull request #3 from JuliaData/td-preallocate-common-patterns
Preallocate common patterns
2 parents 8f1d9d2 + 7d48783 commit d86eea1

File tree

1 file changed

+20
-7
lines changed

1 file changed

+20
-7
lines changed

src/NewlineLexers.jl

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,21 @@ end
8080
end
8181
@inline _internal_memchr(ptr::Ptr{UInt8}, len::UInt, byte::UInt8) = ScanByte.memchr(ScanByte.SizedMemory(Ptr{UInt8}(ptr), len), byte)
8282

83+
const _DOUBLEQUOTE64 = Vec(ntuple(_->VecElement(UInt8('"')), 64))
84+
const _SINGLEQUOTE64 = Vec(ntuple(_->VecElement(UInt8('\'')), 64))
85+
const _BACKSLASH64 = Vec(ntuple(_->VecElement(UInt8('\\')), 64))
86+
const _LINEFEED64 = Vec(ntuple(_->VecElement(UInt8('\n')), 64))
87+
const _CARRIAGERETURN64 = Vec(ntuple(_->VecElement(UInt8('\r')), 64))
88+
89+
function _get_char_vec64(c::UInt8)
90+
c == UInt8('"') && return _DOUBLEQUOTE64
91+
c == UInt8('\'') && return _SINGLEQUOTE64
92+
c == UInt8('\\') && return _BACKSLASH64
93+
c == UInt8('\n') && return _LINEFEED64
94+
c == UInt8('\r') && return _CARRIAGERETURN64
95+
return Vec(ntuple(_->VecElement(c), 64))
96+
end
97+
8398
# Rules for Lexer{Q,Q,Q} when there is ambiguity between quotechar and escapechar:
8499
# we use `prev_escaped` and `prev_in_string` to disambiguate the 4 cases:
85100
# ---------+--------------------+--------------------+--------------------------------------
@@ -109,23 +124,21 @@ mutable struct Lexer{E,OQ,CQ,NL,IO_t}
109124
closequotechar::Union{Char,UInt8}=UInt8('"'),
110125
newline::Union{Char,UInt8}=UInt8('\n'),
111126
) where {IO_t}
112-
NL = Vec(ntuple(_->VecElement(UInt8(newline)), 64))
113-
E = Vec(ntuple(_->VecElement(UInt8(escapechar)), 64))
127+
NL = _get_char_vec64(UInt8(newline))
128+
E = _get_char_vec64(UInt8(escapechar))
114129
if escapechar == openquotechar
115130
Q = E
116131
else
117-
Q = Vec(ntuple(_->VecElement(UInt8(openquotechar)), 64))
132+
Q = _get_char_vec64(UInt8(openquotechar))
118133
end
119134
return new{UInt8(escapechar), UInt8(openquotechar), UInt8(closequotechar), UInt8(newline), IO_t}(
120135
io, E, Q, NL, UInt(0), UInt(0), false
121136
)
122137
end
123138

124139
function Lexer(io::IO_t, ::Nothing, newline::Union{Char,UInt8}=UInt8('\n')) where {IO_t}
125-
NL = Vec(ntuple(_->VecElement(UInt8(newline)), 64))
126-
E = Vec(ntuple(_->VecElement(0xff), 64))
127-
Q = E
128-
return new{Nothing, Nothing, Nothing, UInt8(newline), IO_t}(io, E, Q, NL, UInt(0), UInt(0), false)
140+
NL = _get_char_vec64(UInt8(newline))
141+
return new{Nothing, Nothing, Nothing, UInt8(newline), IO_t}(io, NL, NL, NL, UInt(0), UInt(0), false)
129142
end
130143
end
131144

0 commit comments

Comments
 (0)