From a6c08b1f41226d0a44a2210a5031529818f114e7 Mon Sep 17 00:00:00 2001 From: davidanthoff Date: Fri, 14 Oct 2022 13:39:37 +0000 Subject: [PATCH] Format files using DocumentFormat --- benchmark/benchmarks.jl | 18 +- docs/make.jl | 10 +- src/VectorBackedStrings.jl | 7 +- src/csv.jl | 168 ++-- src/field.jl | 184 ++-- src/guesstype.jl | 19 +- src/lib/compat.jl | 1 - src/lib/date-tryparse-internal.jl | 14 +- src/lib/result.jl | 4 +- src/record.jl | 28 +- src/utf8optimizations.jl | 146 ++-- src/util.jl | 137 +-- test/runtests.jl | 1356 ++++++++++++++--------------- test/test_vectorbackedstrings.jl | 58 +- 14 files changed, 1086 insertions(+), 1064 deletions(-) diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl index 0839a4c..ced4067 100644 --- a/benchmark/benchmarks.jl +++ b/benchmark/benchmarks.jl @@ -53,7 +53,7 @@ percentagestring = "35.35%" percentagestringlen = our_lastindex(percentagestring) somestring = "foo something," somestringlen = our_lastindex(somestring) -somequotedstring = "\"Owner 2 ”Vicepresident\"\"\"" +somequotedstring = "\"Owner 2 ”Vicepresident\"\"\"" somequotedstringlen = our_lastindex(somequotedstring) longfloat64str = "2344345.1232353459389238738435" @@ -65,14 +65,14 @@ datetimestr = "1970-02-02 02:20:20" datetimestrlen = our_lastindex(datetimestr) SUITE["util"]["tryparsenext"] = BenchmarkGroup() -SUITE["util"]["tryparsenext"]["NumericFloat64"] = @benchmarkable TextParse.tryparsenext($(TextParse.Numeric(Float64)), $float64str,1,$float64strlen) -SUITE["util"]["tryparsenext"]["LongNumericFloat64"] = @benchmarkable TextParse.tryparsenext($(TextParse.Numeric(Float64)), $longfloat64str,1,$longfloat64strlen) -SUITE["util"]["tryparsenext"]["UInt64"] = @benchmarkable TextParse.tryparsenext($(TextParse.Numeric(UInt64)), $intstr,1,$intstrlen) -SUITE["util"]["tryparsenext"]["NegInt64"] = @benchmarkable TextParse.tryparsenext($(TextParse.Numeric(Int64)), $negintstr,1,$negintstrlen) -SUITE["util"]["tryparsenext"]["Percentage"] = @benchmarkable TextParse.tryparsenext($(TextParse.Percentage()), $percentagestring,1,$percentagestringlen, TextParse.default_opts) -SUITE["util"]["tryparsenext"]["StringToken"] = @benchmarkable TextParse.tryparsenext($(TextParse.StringToken(String)), $somestring,1,$somestringlen, TextParse.default_opts) -SUITE["util"]["tryparsenext"]["DateTimeToken"] = @benchmarkable TextParse.tryparsenext($tok, $datetimestr,1,$datetimestrlen, $opts) -SUITE["util"]["tryparsenext"]["QuotedStringToken"] = @benchmarkable TextParse.tryparsenext($(Quoted(String,quotechar='"', escapechar='"')), $somequotedstring) +SUITE["util"]["tryparsenext"]["NumericFloat64"] = @benchmarkable TextParse.tryparsenext($(TextParse.Numeric(Float64)), $float64str, 1, $float64strlen) +SUITE["util"]["tryparsenext"]["LongNumericFloat64"] = @benchmarkable TextParse.tryparsenext($(TextParse.Numeric(Float64)), $longfloat64str, 1, $longfloat64strlen) +SUITE["util"]["tryparsenext"]["UInt64"] = @benchmarkable TextParse.tryparsenext($(TextParse.Numeric(UInt64)), $intstr, 1, $intstrlen) +SUITE["util"]["tryparsenext"]["NegInt64"] = @benchmarkable TextParse.tryparsenext($(TextParse.Numeric(Int64)), $negintstr, 1, $negintstrlen) +SUITE["util"]["tryparsenext"]["Percentage"] = @benchmarkable TextParse.tryparsenext($(TextParse.Percentage()), $percentagestring, 1, $percentagestringlen, TextParse.default_opts) +SUITE["util"]["tryparsenext"]["StringToken"] = @benchmarkable TextParse.tryparsenext($(TextParse.StringToken(String)), $somestring, 1, $somestringlen, TextParse.default_opts) +SUITE["util"]["tryparsenext"]["DateTimeToken"] = @benchmarkable TextParse.tryparsenext($tok, $datetimestr, 1, $datetimestrlen, $opts) +SUITE["util"]["tryparsenext"]["QuotedStringToken"] = @benchmarkable TextParse.tryparsenext($(Quoted(String, quotechar='"', escapechar='"')), $somequotedstring) somefieldstring = " 12,3" f = TextParse.fromtype(Int) diff --git a/docs/make.jl b/docs/make.jl index dcaf5b4..7858509 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,12 +1,12 @@ using Documenter, TextParse makedocs( - modules = [TextParse], - sitename = "TextParse.jl", - authors = "Shashi Gowda", - pages = Any["Home" => "index.md"], + modules=[TextParse], + sitename="TextParse.jl", + authors="Shashi Gowda", + pages=Any["Home"=>"index.md"], ) deploydocs( - repo = "github.com/JuliaComputing/TextParse.jl.git" + repo="github.com/JuliaComputing/TextParse.jl.git" ) diff --git a/src/VectorBackedStrings.jl b/src/VectorBackedStrings.jl index 3b26e39..ec202dc 100644 --- a/src/VectorBackedStrings.jl +++ b/src/VectorBackedStrings.jl @@ -36,7 +36,7 @@ Base.@propagate_inbounds function Base.iterate(s::VectorBackedUTF8String, i::Int i > ncodeunits(s) && return nothing b = codeunit(s, i) u = UInt32(b) << 24 - Base.between(b, 0x80, 0xf7) || return reinterpret(Char, u), i+1 + Base.between(b, 0x80, 0xf7) || return reinterpret(Char, u), i + 1 return our_next_continued(s, i, u) end @@ -57,8 +57,9 @@ function our_next_continued(s::VectorBackedUTF8String, i::Int, u::UInt32) ((i += 1) > n) | (u < 0xf0000000) && @goto ret @inbounds b = codeunit(s, i) b & 0xc0 == 0x80 || @goto ret - u |= UInt32(b); i += 1 -@label ret + u |= UInt32(b) + i += 1 + @label ret return reinterpret(Char, u), i end diff --git a/src/csv.jl b/src/csv.jl index 76a496f..2d21fe9 100644 --- a/src/csv.jl +++ b/src/csv.jl @@ -4,13 +4,13 @@ using Mmap ismissingtype(T) = Missing <: T ismissingeltype(T) = missingtype(eltype(T)) -const UnionMissing{T} = Union{Missing, T} +const UnionMissing{T} = Union{Missing,T} export csvread const current_record = Ref{Any}() const debug = Ref{Bool}(false) -const StringLike = Union{AbstractString, StrRange} +const StringLike = Union{AbstractString,StrRange} optionsiter(opts::AbstractDict) = opts optionsiter(opts::AbstractVector) = enumerate(opts) @@ -21,7 +21,7 @@ function getbyheader(opts, header, i::AbstractString) if !(i in header) throw(ArgumentError("Unknown column $i")) end - getbyheader(opts, header, something(findfirst(isequal(i), header), length(header)+1)) + getbyheader(opts, header, something(findfirst(isequal(i), header), length(header) + 1)) end function optionsiter(opts::AbstractDict, header) @@ -79,7 +79,7 @@ Read CSV from `file`. Returns a tuple of 2 elements: function csvread(file::String, delim=','; kwargs...) cols, canonnames, parsers, finalrows = _csvread_f(file, delim; kwargs...) - return ((col for col in cols if col!==nothing)...,), [colname for (col, colname) in zip(cols, canonnames) if col!==nothing] + return ((col for col in cols if col !== nothing)...,), [colname for (col, colname) in zip(cols, canonnames) if col !== nothing] end function csvread(file::IOStream, delim=','; kwargs...) @@ -98,7 +98,7 @@ end function _csvread(str::AbstractString, delim=','; kwargs...) cols, canonnames, parsers, finalrows = _csvread_internal(str, delim; kwargs...) - return ((col for col in cols if col!==nothing)...,), [colname for (col, colname) in zip(cols, canonnames) if col!==nothing] + return ((col for col in cols if col !== nothing)...,), [colname for (col, colname) in zip(cols, canonnames) if col !== nothing] end function _csvread_f(file::AbstractString, delim=','; kwargs...) @@ -122,17 +122,17 @@ function _csvread_f(file::AbstractString, delim=','; kwargs...) end end -const ColsPool = OrderedDict{Union{Int, String}, Union{AbstractVector, Nothing}} +const ColsPool = OrderedDict{Union{Int,String},Union{AbstractVector,Nothing}} function csvread(files::AbstractVector{T}, - delim=','; kwargs...) where {T<:AbstractString} + delim=','; kwargs...) where {T<:AbstractString} @assert !isempty(files) colspool = ColsPool() cols, headers, parsers, nrows = try _csvread_f(files[1], delim; - noresize=true, - colspool=colspool, - kwargs...) + noresize=true, + colspool=colspool, + kwargs...) catch err println(stderr, "Error parsing $(files[1])") rethrow(err) @@ -141,13 +141,13 @@ function csvread(files::AbstractVector{T}, count = Int[nrows] prev = nrows for f in files[2:end] - if !isempty(cols) && length(cols[findfirst(i->i!==nothing, cols)]) == nrows + if !isempty(cols) && length(cols[findfirst(i -> i !== nothing, cols)]) == nrows n = ceil(Int, nrows * sqrt(2)) resizecols(colspool, n) end cols, headers, parsers, nrows = try - _csvread_f(f, delim; rowno=nrows+1, colspool=colspool, - prevheaders=headers, noresize=true, prev_parsers=parsers, kwargs...) + _csvread_f(f, delim; rowno=nrows + 1, colspool=colspool, + prevheaders=headers, noresize=true, prev_parsers=parsers, kwargs...) catch err println(stderr, "Error parsing $(f)") rethrow(err) @@ -157,34 +157,34 @@ function csvread(files::AbstractVector{T}, end resizecols(colspool, nrows) - ((i[2] for i in colspool if i[2]!==nothing)...,), [i[1] for i in colspool if i[2]!==nothing], count + ((i[2] for i in colspool if i[2] !== nothing)...,), [i[1] for i in colspool if i[2] !== nothing], count end # read CSV in a string function _csvread_internal(str::AbstractString, delim=','; - spacedelim=false, - quotechar='"', - escapechar='"', - commentchar=nothing, - stringtype=String, - stringarraytype=StringArray, - noresize=false, - rowno::Int=1, - prevheaders=nothing, - pooledstrings=nothing, - skiplines_begin=0, - samecols=nothing, - header_exists=true, - nastrings=NA_STRINGS, - colnames=String[], - #ignore_empty_rows=true, - colspool = ColsPool(), - row_estimate = !isempty(colspool) ? - length(first(i for i in colspool if i[2]!==nothing)[2]) : 0, - prev_parsers = nothing, - colparsers=[], - filename=nothing, - type_detect_rows=20) + spacedelim=false, + quotechar='"', + escapechar='"', + commentchar=nothing, + stringtype=String, + stringarraytype=StringArray, + noresize=false, + rowno::Int=1, + prevheaders=nothing, + pooledstrings=nothing, + skiplines_begin=0, + samecols=nothing, + header_exists=true, + nastrings=NA_STRINGS, + colnames=String[], + #ignore_empty_rows=true, + colspool=ColsPool(), + row_estimate=!isempty(colspool) ? + length(first(i for i in colspool if i[2] !== nothing)[2]) : 0, + prev_parsers=nothing, + colparsers=[], + filename=nothing, + type_detect_rows=20) if pooledstrings === true @warn("pooledstrings argument has been removed") @@ -198,8 +198,9 @@ function _csvread_internal(str::AbstractString, delim=','; lineno = 0 y = iterate(str, pos) - if y!==nothing - c = y[1]; i = y[2] + if y !== nothing + c = y[1] + i = y[2] if c == '\ufeff' pos = i end @@ -210,7 +211,7 @@ function _csvread_internal(str::AbstractString, delim=','; while lineno < skiplines_begin pos = getlineend(str, pos) y2 = iterate(str, pos) - y2===nothing && error("Internal error.") + y2 === nothing && error("Internal error.") pos = y2[2] pos, lines = eatnewlines(str, pos, len) lineno += lines @@ -249,8 +250,8 @@ function _csvread_internal(str::AbstractString, delim=','; # seed guesses using those from previous file guess, pos1 = guesscolparsers(str, len, canonnames, opts, - pos, type_detect_rows, colparsers, stringarraytype, - commentchar, nastrings, prev_parsers) + pos, type_detect_rows, colparsers, stringarraytype, + commentchar, nastrings, prev_parsers) if isempty(canonnames) canonnames = Any["Column$i" for i in 1:length(guess)] end @@ -268,12 +269,12 @@ function _csvread_internal(str::AbstractString, delim=','; # the last field is delimited by line end if !isempty(guess) - guess[end] = swapinner(guess[end], guess[end]; eoldelim = true) + guess[end] = swapinner(guess[end], guess[end]; eoldelim=true) rec = Record((guess...,)) else parsers = prev_parsers === nothing ? Dict() : copy(prev_parsers) rec = Record(()) - return (), String[], parsers, rowno-1 + return (), String[], parsers, rowno - 1 end if isempty(canonnames) @@ -284,7 +285,7 @@ function _csvread_internal(str::AbstractString, delim=','; if row_estimate == 0 # just an estimate, with some margin - row_estimate = ceil(Int, (len-pos) / ((pos1-pos)/max(1, type_detect_rows)) * sqrt(2)) + row_estimate = ceil(Int, (len - pos) / ((pos1 - pos) / max(1, type_detect_rows)) * sqrt(2)) end if isempty(colspool) @@ -298,13 +299,13 @@ function _csvread_internal(str::AbstractString, delim=','; c = get(canonnames, i, i) f = rec.fields[i] if haskey(colspool, c) - if eltype(colspool[c]) == fieldtype(f) || (fieldtype(f) <: StrRange && eltype(colspool[c]) <: AbstractString) || colspool[c]===nothing + if eltype(colspool[c]) == fieldtype(f) || (fieldtype(f) <: StrRange && eltype(colspool[c]) <: AbstractString) || colspool[c] === nothing return colspool[c] else try return colspool[c] = promote_column(colspool[c], - rowno-1, - fieldtype(f), stringtype, stringarraytype) + rowno - 1, + fieldtype(f), stringtype, stringarraytype) catch err error("Could not convert column $c of eltype $(eltype(colspool[c])) to eltype $(fieldtype(f))") end @@ -318,14 +319,14 @@ function _csvread_internal(str::AbstractString, delim=','; for k in missingcols if !ismissingtype(eltype(colspool[k])) && !(eltype(colspool[k]) <: StringLike) colspool[k] = promote_column(colspool[k], - rowno-1, - UnionMissing{eltype(colspool[k])}, stringtype, stringarraytype) + rowno - 1, + UnionMissing{eltype(colspool[k])}, stringtype, stringarraytype) end end cols = (_cols...,) end - if any(c->c!==nothing && length(c) != row_estimate, cols) + if any(c -> c !== nothing && length(c) != row_estimate, cols) resizecols(colspool, row_estimate) end @@ -336,7 +337,7 @@ function _csvread_internal(str::AbstractString, delim=','; @label retry try finalrows = parsefill!(str, opts, rec, row_estimate, cols, colspool, - pos, lineno, rowno, len, commentchar) + pos, lineno, rowno, len, commentchar) if !noresize resizecols(colspool, finalrows) end @@ -380,9 +381,9 @@ function _csvread_internal(str::AbstractString, delim=','; if length(failed_strs) != length(cols[err.colno:end]) fn = err.filename === nothing ? "" : "In $(err.filename) " @warn("$(fn)line $(err.lineno) has $(length(err.colno) + length(failed_strs) - 1) fields but $(length(cols)) fields are expected. Skipping row.") - pos = last(rng)+1 + pos = last(rng) + 1 rowno = err.rowno - lineno = err.lineno+1 + lineno = err.lineno + 1 @goto retry end @@ -392,7 +393,7 @@ function _csvread_internal(str::AbstractString, delim=','; f = rec.fields[colidx] name = get(canonnames, colidx, colidx) c = promote_field(s, f, col, err, nastrings, stringtype, stringarraytype, opts) - if c[2]==:reparserequired + if c[2] == :reparserequired reparse_needed[colidx] = true c = c[1], stringarraytype{stringtype,1}(undef, row_estimate) end @@ -407,7 +408,7 @@ function _csvread_internal(str::AbstractString, delim=','; cols2 = makeoutputvecs(rec2, row_estimate, stringtype, stringarraytype) for (iii, val) in enumerate(cols2) - if val!==nothing + if val !== nothing colspool[iii] = val end end @@ -415,7 +416,7 @@ function _csvread_internal(str::AbstractString, delim=','; finalrows2 = parsefill!(str, opts, rec2, row_estimate, cols2, colspool, pos_start_of_data, lineno_start_of_data, 1, l, commentchar) - for iii=err.colno:length(cols) + for iii = err.colno:length(cols) if reparse_needed[iii] promoted[iii-err.colno+1] = (promoted[iii-err.colno+1][1], cols2[iii]) end @@ -470,7 +471,7 @@ function promote_field(failed_str, field, col, err, nastrings, stringtype, strin return swapinner(field, newtoken), :reparserequired end newcol = try - promote_column(col, err.rowno-1, fieldtype(newtoken), stringtype, stringarraytype) + promote_column(col, err.rowno - 1, fieldtype(newtoken), stringtype, stringarraytype) catch err2 # TODO Should this really be shown? Base.showerror(stderr, err2) @@ -495,13 +496,13 @@ function promote_column(col, rowno, T, stringtype, stringarraytype, inner=false) end elseif ismissingtype(T) arr = convert(Array{UnionMissing{T}}, col) - for i=rowno+1:length(arr) + for i = rowno+1:length(arr) # if we convert an Array{Int} to be missing-friendly, we will not have missing in here by default arr[i] = missing end return arr else - newcol = Array{T, 1}(undef, length(col)) + newcol = Array{T,1}(undef, length(col)) copyto!(newcol, 1, col, 1, rowno) newcol end @@ -520,17 +521,17 @@ function readcolnames(str, opts, pos, colnames, len=lastindex(str)) for (i, v) in optionsiter(colnames, colnames_inferred) colnames_inferred[i] = v end - colnames_inferred, lineend+1 + colnames_inferred, lineend + 1 end function guesscolparsers(str::AbstractString, len, header, opts::LocalOpts, pos::Int, - nrows::Int, colparsers, stringarraytype, commentchar=nothing, nastrings=NA_STRINGS, prevs=nothing) + nrows::Int, colparsers, stringarraytype, commentchar=nothing, nastrings=NA_STRINGS, prevs=nothing) # Field type guesses guess = [] prevfields = String[] givenkeys = !isempty(colparsers) ? first.(collect(optionsiter(colparsers, header))) : [] - for i2=1:nrows + for i2 = 1:nrows pos, _ = eatnewlines(str, pos, len) # Move past commented lines before guessing. @@ -542,7 +543,7 @@ function guesscolparsers(str::AbstractString, len, header, opts::LocalOpts, pos: fields = quotedsplit(str, opts, true, pos, lineend) if i2 == 1 - guess = Any[Unknown() for i3=1:length(fields)] # idk + guess = Any[Unknown() for i3 = 1:length(fields)] # idk if prevs !== nothing && !isempty(header) # sometimes length(fields) can be != length(header). # this sucks! @@ -574,7 +575,7 @@ function guesscolparsers(str::AbstractString, len, header, opts::LocalOpts, pos: end end prevfields = fields - pos = lineend+1 + pos = lineend + 1 end # override guesses with user request @@ -585,7 +586,7 @@ function guesscolparsers(str::AbstractString, len, header, opts::LocalOpts, pos: end function parsefill!(str::AbstractString, opts, rec::RecN{N}, nrecs, cols, colspool, - pos, lineno, rowno, l=lastindex(str), commentchar=nothing) where {N} + pos, lineno, rowno, l=lastindex(str), commentchar=nothing) where {N} pos, lines = eatnewlines(str, pos, l) lineno += lines @@ -596,13 +597,13 @@ function parsefill!(str::AbstractString, opts, rec::RecN{N}, nrecs, cols, colspo # Do not try to parse commented lines. pos, lines = eatcommentlines(str, pos, l, commentchar) lineno += lines - pos > l && return rowno-1 + pos > l && return rowno - 1 res = tryparsesetindex(rec, str, pos, l, cols, rowno, opts) if !issuccess(res) pos, fieldpos, colno, err_code = geterror(res) - throw(CSVParseError(err_code, str, rec, lineno+1, rowno, - colno, pos, fieldpos)) + throw(CSVParseError(err_code, str, rec, lineno + 1, rowno, + colno, pos, fieldpos)) else pos = value(res) end @@ -627,33 +628,33 @@ end function resizecols(colspool, nrecs) for (h, c) in colspool - if c!==nothing + if c !== nothing l = length(c) resize!(c, nrecs) if eltype(c) <: AbstractString # fill with blanks c[l+1:nrecs] .= "" elseif eltype(c) <: StrRange - c[l+1:nrecs] .= StrRange(1,0) + c[l+1:nrecs] .= StrRange(1, 0) end end end end function makeoutputvecs(rec, N, stringtype, stringarraytype) - map(f->makeoutputvec(f, N, stringtype, stringarraytype), rec.fields) + map(f -> makeoutputvec(f, N, stringtype, stringarraytype), rec.fields) end function makeoutputvec(eltyp, N, stringtype, stringarraytype) - if fieldtype(eltyp)===Nothing + if fieldtype(eltyp) === Nothing return nothing elseif fieldtype(eltyp) == Missing # we weren't able to detect the type, - # all cells were blank + # all cells were blank Array{Missing}(undef, N) elseif fieldtype(eltyp) == StrRange stringarraytype{stringtype,1}(undef, N) elseif ismissingtype(fieldtype(eltyp)) && fieldtype(eltyp) <: StrRange - stringarraytype{Union{Missing, String},1}(undef, N) + stringarraytype{Union{Missing,String},1}(undef, N) else Array{fieldtype(eltyp)}(undef, N) end @@ -694,23 +695,23 @@ function Base.showerror(io::IO, err::CSVParseError) end function showerrorchar(str, pos, maxchar) - hmaxchar = round(Int, maxchar/2) + hmaxchar = round(Int, maxchar / 2) rng = getlineat(str, pos) substr = strip(str[rng]) - pointer = String(['_' for i=1:(pos-first(rng)-1)]) * "^" + pointer = String(['_' for i = 1:(pos-first(rng)-1)]) * "^" if length(substr) > maxchar # center the error char - lst = thisind(str, min(pos+ceil(Int, hmaxchar), last(rng))) - fst = thisind(str, max(first(rng), pos-hmaxchar)) + lst = thisind(str, min(pos + ceil(Int, hmaxchar), last(rng))) + fst = thisind(str, max(first(rng), pos - hmaxchar)) substr = "..." * strip(str[fst:lst]) * "..." - pointer = String(['_' for i=1:(pos-fst+2)]) * "^" + pointer = String(['_' for i = 1:(pos-fst+2)]) * "^" end substr * "\n" * pointer end function quotedsplit(str, opts, includequotes, i=firstindex(str), l=lastindex(str)) strtok = Quoted(StringToken(String), opts.quotechar, opts.escapechar, required=false, - includequotes=includequotes) + includequotes=includequotes) f = Field(strtok, eoldelim=true) strs = String[] @@ -722,8 +723,9 @@ function quotedsplit(str, opts, includequotes, i=firstindex(str), l=lastindex(st push!(strs, x) end y1 = iterate(str, prevind(str, i)) - y1===nothing && error("Internal error.") - c = y1[1]; i = y1[2] + y1 === nothing && error("Internal error.") + c = y1[1] + i = y1[2] if c == Char(opts.endchar) # edge case where there's a delim at the end of the string push!(strs, "") diff --git a/src/field.jl b/src/field.jl index 1ec8a20..e7e44e3 100644 --- a/src/field.jl +++ b/src/field.jl @@ -29,7 +29,7 @@ Options local to the token currently being parsed. - `includequotes`: whether to include quotes while parsing - `includenewlines`: whether to include newlines while parsing """ -struct LocalOpts{T_ENDCHAR<:Union{Char,UInt8}, T_QUOTECHAR<:Union{Char,UInt8}, T_ESCAPECHAR<:Union{Char,UInt8}} +struct LocalOpts{T_ENDCHAR<:Union{Char,UInt8},T_QUOTECHAR<:Union{Char,UInt8},T_ESCAPECHAR<:Union{Char,UInt8}} endchar::T_ENDCHAR # End parsing at this char spacedelim::Bool quotechar::T_QUOTECHAR # Quote char @@ -49,12 +49,12 @@ end tryparsenext(tok, str, i, len) end -struct WrapLocalOpts{T, X<:AbstractToken} <: AbstractToken{T} +struct WrapLocalOpts{T,X<:AbstractToken} <: AbstractToken{T} opts::LocalOpts inner::X end -WrapLocalOpts(opts, inner) = WrapLocalOpts{fieldtype(inner), typeof(inner)}(opts, inner) +WrapLocalOpts(opts, inner) = WrapLocalOpts{fieldtype(inner),typeof(inner)}(opts, inner) @inline function tryparsenext(tok::WrapLocalOpts, str, i, len, opts::LocalOpts=default_opts) tryparsenext(tok.inner, str, i, len, tok.opts) @@ -69,7 +69,7 @@ function tryparsenext(::Unknown, str, i, len, opts) nullableNA, i end show(io::IO, ::Unknown) = print(io, "") -struct CustomParser{T, F} <: AbstractToken{T} +struct CustomParser{T,F} <: AbstractToken{T} f::Function end @@ -124,23 +124,23 @@ function tryparsenext(::Numeric{T}, str, i, len) where {T<:Signed} @chk2 x, i = tryparsenext_base10(T, str, i, len) @label done - return R(convert(T, sign*x)), i + return R(convert(T, sign * x)), i @label error return R(), i end @inline function tryparsenext(::Numeric{T}, str, i, len) where {T<:Unsigned} - tryparsenext_base10(T,str, i, len) + tryparsenext_base10(T, str, i, len) end -@inline _is_e(str, i) = str[i]=='e' || str[i]=='E' +@inline _is_e(str, i) = str[i] == 'e' || str[i] == 'E' -@inline _is_negative(str, i) = str[i]=='-' +@inline _is_negative(str, i) = str[i] == '-' -@inline _is_positive(str, i) = str[i]=='+' +@inline _is_positive(str, i) = str[i] == '+' -const pre_comp_exp_double = Double64[Double64(10.0)^i for i=0:308] +const pre_comp_exp_double = Double64[Double64(10.0)^i for i = 0:308] @inline function convert_to_double(f1::Int64, exp::Int) f = Float64(f1) @@ -155,7 +155,7 @@ const pre_comp_exp_double = Double64[Double64(10.0)^i for i=0:308] else if exp < minexp # not sure why this is a good choice, but it seems to be! x /= pre_comp_exp_double[-minexp+1] - x /= pre_comp_exp_double[-exp + minexp + 1] + x /= pre_comp_exp_double[-exp+minexp+1] else x /= pre_comp_exp_double[-exp+1] end @@ -167,14 +167,14 @@ end R = Nullable{F} y1 = iterate(str, i) - y1===nothing && @goto error + y1 === nothing && @goto error negate = false c = y1[1] - if c=='-' + if c == '-' negate = true i = y1[2] - elseif c=='+' + elseif c == '+' i = y1[2] end @@ -190,8 +190,8 @@ end # next thing must be dec pt. y2 = iterate(str, i) - if y2!==nothing && y2[1]=='.' - i =y2[2] + if y2 !== nothing && y2[1] == '.' + i = y2[2] f1, rval2, ie = parse_uint_and_stop(str, i, len, f1) # TODO This is incorrect for string types where a digit takes up # more than one codeunit, we need to return the number of digits @@ -209,11 +209,11 @@ end eval::Int32 = 0 y3 = iterate(str, i) - if y3!==nothing && _is_e(str, i) + if y3 !== nothing && _is_e(str, i) i = y3[2] y4 = iterate(str, i) - if y4!==nothing + if y4 !== nothing enegate = false if _is_negative(str, i) enegate = true @@ -235,12 +235,12 @@ end if frac_digits <= 15 && -22 <= exp <= 22 if exp >= 0 - f = F(f1)*10.0^exp + f = F(f1) * 10.0^exp else - f = F(f1)/10.0^(-exp) + f = F(f1) / 10.0^(-exp) end else - f = convert_to_double(f1, exp) + f = convert_to_double(f1, exp) end if negate @@ -266,10 +266,11 @@ function tryparsenext(::Percentage, str, i, len, opts) # parse away the % char ii = eatwhitespaces(str, ii, len) y = iterate(str, ii) - if y===nothing + if y === nothing return Nullable{Float64}(), ii # failed to parse % else - c = y[1]; k = y[2] + c = y[1] + k = y[2] if c != '%' return Nullable{Float64}(), ii # failed to parse % else @@ -302,20 +303,22 @@ function tryparsenext(s::StringToken{T}, str, i, len, opts) where {T} i0 = i if opts.includequotes y = iterate(str, i) - if y!==nothing - c = y[1]; ii = y[2] + if y !== nothing + c = y[1] + ii = y[2] if c == Char(opts.quotechar) i = ii # advance counter so that - # the while loop doesn't react to opening quote + # the while loop doesn't react to opening quote end end end y2 = iterate(str, i) - while y2!==nothing - c = y2[1]; ii = y2[2] + while y2 !== nothing + c = y2[1] + ii = y2[2] - if inside_quoted_strong && p==Char(opts.escapechar) + if inside_quoted_strong && p == Char(opts.escapechar) escapecount += 1 end @@ -328,13 +331,14 @@ function tryparsenext(s::StringToken{T}, str, i, len, opts) where {T} # sometimes the quotechar is the escapechar # in that case we need to see the next char y3 = iterate(str, ii) - if y3===nothing + if y3 === nothing if opts.includequotes - i=ii + i = ii end break else - nxt = y3[1]; j = y3[2] + nxt = y3[1] + j = y3[2] if nxt == Char(opts.quotechar) # the current character is escaping the # next one @@ -365,15 +369,15 @@ function tryparsenext(s::StringToken{T}, str, i, len, opts) where {T} y2 = iterate(str, i) end - return R(_substring(T, str, i0, i-1, escapecount, opts)), i + return R(_substring(T, str, i0, i - 1, escapecount, opts)), i end @inline function _substring(::Type{String}, str, i, j, escapecount, opts) if escapecount > 0 - buf = IOBuffer(sizehint=j-i+1-escapecount) + buf = IOBuffer(sizehint=j - i + 1 - escapecount) cur_i = i c = str[cur_i] - if opts.includequotes && c==Char(opts.quotechar) + if opts.includequotes && c == Char(opts.quotechar) print(buf, c) cur_i = nextind(str, cur_i) end @@ -394,7 +398,7 @@ end end return String(take!(buf)) else - return unsafe_string(pointer(str, i), j-i+1) + return unsafe_string(pointer(str, i), j - i + 1) end end @@ -420,7 +424,7 @@ end export Quoted -struct Quoted{T, S<:AbstractToken, T_QUOTECHAR<:Union{Char,UInt8}, T_ESCAPECHAR<:Union{Char,UInt8}} <: AbstractToken{T} +struct Quoted{T,S<:AbstractToken,T_QUOTECHAR<:Union{Char,UInt8},T_ESCAPECHAR<:Union{Char,UInt8}} <: AbstractToken{T} inner::S required::Bool stripwhitespaces::Bool @@ -451,26 +455,27 @@ end function Quoted(inner::S, quotechar::T_QUOTECHAR, escapechar::T_ESCAPECHAR; required=false, - stripwhitespaces=fieldtype(S)<:Number, + stripwhitespaces=fieldtype(S) <: Number, includequotes=false, includenewlines=true) where {S<:AbstractToken,T_QUOTECHAR,T_ESCAPECHAR} T = fieldtype(S) Quoted{T,S,T_QUOTECHAR,T_ESCAPECHAR}(inner, required, stripwhitespaces, includequotes, - includenewlines, quotechar, escapechar) + includenewlines, quotechar, escapechar) end Quoted(t::Type, quotechar, escapechar; kwargs...) = Quoted(fromtype(t), quotechar, escapechar; kwargs...) function tryparsenext(q::Quoted{T,S,T_QUOTECHAR,T_ESCAPECHAR}, str, i, len, opts) where {T,S,T_QUOTECHAR,T_ESCAPECHAR} y1 = iterate(str, i) - if y1===nothing + if y1 === nothing q.required && @goto error # check to see if inner thing is ok with an empty field @chk2 x, i = tryparsenext(q.inner, str, i, len, opts) error @goto done end - c = y1[1]; ii = y1[2] + c = y1[1] + ii = y1[2] quotestarted = false if Char(q.quotechar) == c quotestarted = true @@ -487,7 +492,7 @@ function tryparsenext(q::Quoted{T,S,T_QUOTECHAR,T_ESCAPECHAR}, str, i, len, opts if quotestarted qopts = LocalOpts(q.quotechar, false, q.quotechar, q.escapechar, - q.includequotes, q.includenewlines) + q.includequotes, q.includenewlines) @chk2 x, i = tryparsenext(q.inner, str, i, len, qopts) else @chk2 x, i = tryparsenext(q.inner, str, i, len, opts) @@ -504,8 +509,9 @@ function tryparsenext(q::Quoted{T,S,T_QUOTECHAR,T_ESCAPECHAR}, str, i, len, opts i = eatwhitespaces(str, i, len) end y2 = iterate(str, i) - y2===nothing && error("Internal error.") - c = y2[1]; ii = y2[2] + y2 === nothing && error("Internal error.") + c = y2[1] + ii = y2[2] if quotestarted && !q.includequotes c != Char(q.quotechar) && @goto error @@ -531,8 +537,8 @@ end Parse a date time string of format `fmt` into type `T` which is either `Date`, `Time` or `DateTime`. """ -DateTimeToken(T::Type, df::S) where {S<:DateFormat} = DateTimeToken{T, S}(df) -DateTimeToken(df::S) where {S<:DateFormat} = DateTimeToken{DateTime, S}(df) +DateTimeToken(T::Type, df::S) where {S<:DateFormat} = DateTimeToken{T,S}(df) +DateTimeToken(df::S) where {S<:DateFormat} = DateTimeToken{DateTime,S}(df) fromtype(df::DateFormat) = DateTimeToken(DateTime, df) fromtype(::Type{DateTime}) = DateTimeToken(DateTime, ISODateTimeFormat) fromtype(::Type{Date}) = DateTimeToken(Date, ISODateFormat) @@ -549,13 +555,13 @@ end ### Missing -const nastrings_upcase = ["NA", "NULL", "N/A","#N/A", "#N/A N/A", "#NA", - "-1.#IND", "-1.#QNAN", "-NaN", "-nan", - "1.#IND", "1.#QNAN", "N/A", "NA", "NaN", "nan"] +const nastrings_upcase = ["NA", "NULL", "N/A", "#N/A", "#N/A N/A", "#NA", + "-1.#IND", "-1.#QNAN", "-NaN", "-nan", + "1.#IND", "1.#QNAN", "N/A", "NA", "NaN", "nan"] const NA_STRINGS = sort!(vcat(nastrings_upcase, map(lowercase, nastrings_upcase))) -struct NAToken{T, S<:AbstractToken} <: AbstractToken{T} +struct NAToken{T,S<:AbstractToken} <: AbstractToken{T} inner::S emptyisna::Bool nastrings::Vector{String} @@ -573,11 +579,10 @@ Parses a Nullable item. """ function NAToken( inner::S - ; emptyisna=true - , nastrings=NA_STRINGS) where S + ; emptyisna=true, nastrings=NA_STRINGS) where S T = fieldtype(inner) - NAToken{UnionMissing{T}, S}(inner, emptyisna, nastrings) + NAToken{UnionMissing{T},S}(inner, emptyisna, nastrings) end function show(io::IO, na::NAToken) @@ -588,8 +593,8 @@ end function tryparsenext(na::NAToken{T}, str, i, len, opts) where {T} R = Nullable{T} i = eatwhitespaces(str, i, len) - y1 = iterate(str,i) - if y1===nothing + y1 = iterate(str, i) + if y1 === nothing if na.emptyisna @goto null else @@ -597,25 +602,26 @@ function tryparsenext(na::NAToken{T}, str, i, len, opts) where {T} end end - c = y1[1]; ii=y1[2] + c = y1[1] + ii = y1[2] if (c == Char(opts.endchar) || isnewline(c)) && na.emptyisna - @goto null + @goto null end if isa(na.inner, Unknown) @goto maybe_null end - @chk2 x,ii = tryparsenext(na.inner, str, i, len, opts) maybe_null + @chk2 x, ii = tryparsenext(na.inner, str, i, len, opts) maybe_null @label done return R(convert(T, x)), ii @label maybe_null naopts = LocalOpts(opts.endchar, opts.spacedelim, opts.quotechar, - opts.escapechar, false, opts.includenewlines) + opts.escapechar, false, opts.includenewlines) @chk2 nastr, ii = tryparsenext(StringToken(WeakRefString{UInt8}), str, i, len, naopts) if !isempty(searchsorted(na.nastrings, nastr)) - i=ii + i = ii i = eatwhitespaces(str, i, len) @goto null end @@ -661,23 +667,19 @@ function Field(inner::S; ignore_init_whitespace=true, ignore_end_whitespace=true end function Field(f::Field; inner=f.inner, ignore_init_whitespace=f.ignore_init_whitespace, - ignore_end_whitespace=f.ignore_end_whitespace, - eoldelim=f.eoldelim) + ignore_end_whitespace=f.ignore_end_whitespace, + eoldelim=f.eoldelim) T = fieldtype(inner) Field{T,typeof(inner)}(inner, ignore_init_whitespace, - ignore_end_whitespace, eoldelim) + ignore_end_whitespace, eoldelim) end function swapinner(f::Field, inner::AbstractToken; - ignore_init_whitespace= f.ignore_end_whitespace - , ignore_end_whitespace=f.ignore_end_whitespace - , eoldelim=f.eoldelim - ) + ignore_init_whitespace=f.ignore_end_whitespace, ignore_end_whitespace=f.ignore_end_whitespace, eoldelim=f.eoldelim +) Field(inner; - ignore_init_whitespace=ignore_end_whitespace - , ignore_end_whitespace=ignore_end_whitespace - , eoldelim=eoldelim - ) + ignore_init_whitespace=ignore_end_whitespace, ignore_end_whitespace=ignore_end_whitespace, eoldelim=eoldelim + ) end @@ -686,8 +688,9 @@ function tryparsenext(f::Field{T}, str, i, len, opts) where {T} i > len && @goto error if f.ignore_init_whitespace y1 = iterate(str, i) - while y1!==nothing - c = y1[1]; ii = y1[2] + while y1 !== nothing + c = y1[1] + ii = y1[2] !isspace(c) && break i = ii y1 = iterate(str, i) @@ -698,9 +701,10 @@ function tryparsenext(f::Field{T}, str, i, len, opts) where {T} if f.ignore_end_whitespace i0 = i y2 = iterate(str, i) - while y2!==nothing - c = y2[1]; ii = y2[2] - !opts.spacedelim && Char(opts.endchar) == '\t' && c == '\t' && (i =ii; @goto done) + while y2 !== nothing + c = y2[1] + ii = y2[2] + !opts.spacedelim && Char(opts.endchar) == '\t' && c == '\t' && (i = ii; @goto done) !isspace(c) && c != '\t' && break i = ii y2 = iterate(str, i) @@ -719,29 +723,32 @@ function tryparsenext(f::Field{T}, str, i, len, opts) where {T} end y3 = iterate(str, i) - y3===nothing && error("Internal error.") - c = y3[1]; ii = y3[2] - opts.spacedelim && (isspace(c) || c == '\t') && (i=ii; @goto done) - !opts.spacedelim && Char(opts.endchar) == c && (i=ii; @goto done) + y3 === nothing && error("Internal error.") + c = y3[1] + ii = y3[2] + opts.spacedelim && (isspace(c) || c == '\t') && (i = ii; @goto done) + !opts.spacedelim && Char(opts.endchar) == c && (i = ii; @goto done) if f.eoldelim if c == '\r' - i=ii + i = ii y4 = iterate(str, i) - if y4!==nothing - c = y4[1]; ii = y4[2] + if y4 !== nothing + c = y4[1] + ii = y4[2] if c == '\n' - i=ii + i = ii end end @goto done elseif c == '\n' - i=ii + i = ii y5 = iterate(str, i) - if y5!==nothing - c = y5[1]; ii = y5[2] + if y5 !== nothing + c = y5[1] + ii = y5[2] if c == '\r' - i=ii + i = ii end end @goto done @@ -754,4 +761,3 @@ function tryparsenext(f::Field{T}, str, i, len, opts) where {T} @label done return R(convert(T, res)), i end - diff --git a/src/guesstype.jl b/src/guesstype.jl index f1d1beb..9d3f7c7 100644 --- a/src/guesstype.jl +++ b/src/guesstype.jl @@ -4,7 +4,7 @@ const common_date_formats = Any[ dateformat"yyyy-mm-dd", dateformat"yyyy/mm/dd", dateformat"mm-dd-yyyy", dateformat"mm/dd/yyyy", dateformat"dd-mm-yyyy", dateformat"dd/mm/yyyy", - dateformat"dd u yyyy", dateformat"e, dd u yyyy" + dateformat"dd u yyyy", dateformat"e, dd u yyyy" ] const common_datetime_formats = Any[ @@ -36,7 +36,8 @@ function guessdateformat(str, len=lastindex(str)) if l > len return DateTimeToken(typ, df) end - catch err; end + catch err + end end end return nothing @@ -52,7 +53,7 @@ function getquotechar(x) return '\0' end -function guesstoken(x, opts, prevent_quote_wrap, @nospecialize(prev_guess=Unknown()), nastrings=NA_STRINGS, stringarraytype=StringArray) +function guesstoken(x, opts, prevent_quote_wrap, @nospecialize(prev_guess = Unknown()), nastrings=NA_STRINGS, stringarraytype=StringArray) q = getquotechar(x) if isa(prev_guess, StringToken) @@ -66,9 +67,9 @@ function guesstoken(x, opts, prevent_quote_wrap, @nospecialize(prev_guess=Unknow prev_inner = prev_guess end inner_string = strip(strip(x, q)) - if inner_string=="" + if inner_string == "" # If we come across a "", we classify it as a string column no matter what - return Quoted(StringToken(stringarraytype<:StringArray ? StrRange : String), opts.quotechar, opts.escapechar) + return Quoted(StringToken(stringarraytype <: StringArray ? StrRange : String), opts.quotechar, opts.escapechar) else inner_token = guesstoken(inner_string, opts, true, prev_inner, nastrings, stringarraytype) return Quoted(inner_token, opts.quotechar, opts.escapechar) @@ -114,19 +115,19 @@ function guesstoken(x, opts, prevent_quote_wrap, @nospecialize(prev_guess=Unknow return Numeric(promote_type(T, fieldtype(prev_guess))) else # something like a date turned into a single number? - y1 = StringToken(stringarraytype<:StringArray ? StrRange : String) + y1 = StringToken(stringarraytype <: StringArray ? StrRange : String) return prevent_quote_wrap ? y1 : Quoted(y1, opts.quotechar, opts.escapechar) end else # fast-path if length(filter(isnumeric, x)) < 4 - y2 = StringToken(stringarraytype<:StringArray ? StrRange : String) + y2 = StringToken(stringarraytype <: StringArray ? StrRange : String) return prevent_quote_wrap ? y2 : Quoted(y2, opts.quotechar, opts.escapechar) end maybedate = guessdateformat(x) if maybedate === nothing - y3 = StringToken(stringarraytype<:StringArray ? StrRange : String) + y3 = StringToken(stringarraytype <: StringArray ? StrRange : String) return prevent_quote_wrap ? y3 : Quoted(y3, opts.quotechar, opts.escapechar) else return maybedate @@ -134,5 +135,3 @@ function guesstoken(x, opts, prevent_quote_wrap, @nospecialize(prev_guess=Unknow end end end - - diff --git a/src/lib/compat.jl b/src/lib/compat.jl index d7eef04..c0a47f5 100644 --- a/src/lib/compat.jl +++ b/src/lib/compat.jl @@ -11,4 +11,3 @@ include("date-tryparse-internal.jl") const ISODateFormat = Dates.ISODateFormat const ISODateTimeFormat = Dates.ISODateTimeFormat const RFC1123Format = Dates.RFC1123Format - diff --git a/src/lib/date-tryparse-internal.jl b/src/lib/date-tryparse-internal.jl index 0831293..3199c02 100644 --- a/src/lib/date-tryparse-internal.jl +++ b/src/lib/date-tryparse-internal.jl @@ -15,7 +15,7 @@ Returns a 2-element tuple `(values, pos)`: * `pos::Int`: The character index at which parsing stopped. """ @generated function tryparsenext_internal( - ::Type{T}, str::AbstractString, pos::Int, len::Int, df::DateFormat, endchar=UInt('\0'), raise::Bool=false, + ::Type{T}, str::AbstractString, pos::Int, len::Int, df::DateFormat, endchar=UInt('\0'), raise::Bool=false, ) where {T<:TimeType} letters = character_codes(df) @@ -41,9 +41,9 @@ Returns a 2-element tuple `(values, pos)`: value_tuple = Expr(:tuple, value_names...) assign_value_till = Expr[ - quote - ($i <= num_parsed) && ($name = unsafe_val[$i]) - end for (i,name) in enumerate(value_names)] + quote + ($i <= num_parsed) && ($name = unsafe_val[$i]) + end for (i, name) in enumerate(value_names)] quote values, pos, num_parsed = tryparsenext_core(str, pos, len, df, raise) @@ -52,7 +52,7 @@ Returns a 2-element tuple `(values, pos)`: $(assign_value_till...) if isnull(values) if (pos <= len && str[pos] == Char(endchar)) || - num_parsed == $(length(value_names)) + num_parsed == $(length(value_names)) # finished parsing and found an extra char, # or parsing was terminated by a delimiter return Nullable{$R}($(Expr(:tuple, output_names...))), pos @@ -108,7 +108,7 @@ Returns a 3-element tuple `(values, pos, num_parsed)`: quote pos > len && @goto done nothingable_tuple = tryparsenext(directives[$i], str, pos, len, locale) - nothingable_tuple===nothing && @goto error + nothingable_tuple === nothing && @goto error $name = nothingable_tuple[1] next_pos = nothingable_tuple[2] pos = next_pos @@ -119,7 +119,7 @@ Returns a 3-element tuple `(values, pos, num_parsed)`: quote pos > len && @goto done nothingable_tuple = tryparsenext(directives[$i], str, pos, len, locale) - nothingable_tuple===nothing && @goto error + nothingable_tuple === nothing && @goto error nullable_delim = nothingable_tuple[1] next_pos = nothingable_tuple[2] pos = next_pos diff --git a/src/lib/result.jl b/src/lib/result.jl index 6179324..ef5686d 100644 --- a/src/lib/result.jl +++ b/src/lib/result.jl @@ -18,8 +18,8 @@ struct Result{T,S} error::IRef{S} function Result{T,S}(issuccess, val) where {T,S} issuccess ? - new{T,S}(issuccess, IRef{T}(val), IRef{S}()) : - new{T,S}(issuccess, IRef{T}(), IRef{S}(val)) + new{T,S}(issuccess, IRef{T}(val), IRef{S}()) : + new{T,S}(issuccess, IRef{T}(), IRef{S}(val)) end end diff --git a/src/record.jl b/src/record.jl index 05939e4..f8a2220 100644 --- a/src/record.jl +++ b/src/record.jl @@ -1,22 +1,22 @@ -struct Record{Tf<:Tuple, To} +struct Record{Tf<:Tuple,To} fields::Tf end function Record(t::T) where T<:Tuple To = Tuple{map(fieldtype, t)...} #Tov = Tuple{map(s->Vector{s},map(fieldtype, t))...} - Record{T, To}(t) + Record{T,To}(t) end # for dispatch on N include_string(TextParse, "const RecN{N,U} = Record{T,U} where T<:NTuple{N, Any}") -@generated function tryparsenext(r::RecN{N, To}, str, i, len, opts=default_opts) where {N, To} +@generated function tryparsenext(r::RecN{N,To}, str, i, len, opts=default_opts) where {N,To} quote R = Nullable{To} i > len && @goto error - Base.@nexprs $N j->begin + Base.@nexprs $N j -> begin @chk2 (val_j, i) = tryparsenext(r.fields[j], str, i, len, opts) end @@ -29,7 +29,7 @@ include_string(TextParse, "const RecN{N,U} = Record{T,U} where T<:NTuple{N, Any} end const PARSE_SUCCESS = 0x00 -const PARSE_ERROR = 0x01 +const PARSE_ERROR = 0x01 function gen_1parsesetindex(j, fieldexpr, colexpr) val_j = Symbol(:val, j) @@ -64,16 +64,18 @@ end if rl > 2 body = gen_1parsesetindex(:jj, :(r.fields[jj]::($ft)), :(columns[jj]::($ct))) push!(fieldparsers, - quote - for jj = $j:$(j+rl-1); $body; end - end) + quote + for jj = $j:$(j + rl - 1) + $body + end + end) j += rl else push!(fieldparsers, gen_1parsesetindex(j, :(r.fields[$j]), :(columns[$j]))) j += 1 end end - R = Result{Int, Tuple{Int,Int,Int,UInt8}} + R = Result{Int,Tuple{Int,Int,Int,UInt8}} quote err_field = 1 ii = i @@ -115,7 +117,7 @@ fieldtype(::UseOne{T}) where {T} = T function UseOne(fields::Tuple, use) r = Record(fields) - UseOne{fieldtype(fields[use]), typeof(r), use}(r) + UseOne{fieldtype(fields[use]),typeof(r),use}(r) end getthing(x, ::Type{Val{n}}) where {n} = x[n] function tryparsenext(f::UseOne{T,S,use}, str, i, len, opts=default_opts) where {T,S,use} @@ -130,11 +132,11 @@ function tryparsenext(f::UseOne{T,S,use}, str, i, len, opts=default_opts) where end -struct Repeated{F, T, N} +struct Repeated{F,T,N} field::F end -Repeated(f::F, n) where {F} = Repeated{F, fieldtype(f), n}(f) +Repeated(f::F, n) where {F} = Repeated{F,fieldtype(f),n}(f) fieldtype(::Repeated{F,T,N}) where {F,T,N} = NTuple{N,T} @@ -144,7 +146,7 @@ fieldtype(::Repeated{F,T,N}) where {F,T,N} = NTuple{N,T} i > len && @goto error # pefect candidate for #11902 - Base.@nexprs $N j->begin + Base.@nexprs $N j -> begin @chk2 (val_j, i) = tryparsenext(f.field, str, i, len, opts) end diff --git a/src/utf8optimizations.jl b/src/utf8optimizations.jl index 59dda15..68984f0 100644 --- a/src/utf8optimizations.jl +++ b/src/utf8optimizations.jl @@ -1,8 +1,8 @@ -@inline function eatwhitespaces(str::Union{VectorBackedUTF8String, String}, i=1, len=lastindex(str)) - while i<=len +@inline function eatwhitespaces(str::Union{VectorBackedUTF8String,String}, i=1, len=lastindex(str)) + while i <= len @inbounds b = codeunit(str, i) - if b==0x20 # This is ' ' + if b == 0x20 # This is ' ' i += 1 else break @@ -11,13 +11,13 @@ return i end -@inline function eatnewlines(str::Union{VectorBackedUTF8String, String}, i=1, len=lastindex(str)) +@inline function eatnewlines(str::Union{VectorBackedUTF8String,String}, i=1, len=lastindex(str)) count = 0 - while i<=len + while i <= len @inbounds b = codeunit(str, i) if b == 0xd # '\r' i += 1 - if i<=len + if i <= len @inbounds b = codeunit(str, i) if b == 0xa # '\n' i += 1 @@ -26,7 +26,7 @@ end count += 1 elseif b == 0xa i += 1 - if i<=len + if i <= len @inbounds b = codeunit(str, i) if b == 0xd i += 1 @@ -41,40 +41,40 @@ end return i, count end -@inline function tryparsenext_sign(str::Union{VectorBackedUTF8String, String}, i, len) +@inline function tryparsenext_sign(str::Union{VectorBackedUTF8String,String}, i, len) i > len && return Nullable{Int}(), i @inbounds b = codeunit(str, i) - if b==0x2d - return Nullable{Int}(-1), i+1 - elseif b==0x2b - return Nullable{Int}(1), i+1 + if b == 0x2d + return Nullable{Int}(-1), i + 1 + elseif b == 0x2b + return Nullable{Int}(1), i + 1 else return Nullable{Int}(1), i end end -@inline function tryparsenext_base10_digit(T,str::Union{VectorBackedUTF8String, String},i, len) +@inline function tryparsenext_base10_digit(T, str::Union{VectorBackedUTF8String,String}, i, len) i > len && @goto error - @inbounds b = codeunit(str,i) - diff = b-0x30 + @inbounds b = codeunit(str, i) + diff = b - 0x30 diff >= UInt8(10) && @goto error - return convert(T, diff), i+1 + return convert(T, diff), i + 1 @label error return nothing end -@inline _isdigit(b::UInt8) = ( (0x30 ≤ b) & (b ≤ 0x39) ) +@inline _isdigit(b::UInt8) = ((0x30 ≤ b) & (b ≤ 0x39)) -@inline function parse_uint_and_stop(str::Union{VectorBackedUTF8String, String}, i, len, n::T) where {T <: Integer} +@inline function parse_uint_and_stop(str::Union{VectorBackedUTF8String,String}, i, len, n::T) where {T<:Integer} ten = T(10) # specialize handling of the first digit so we can return an error - max_without_overflow = div(typemax(T)-9,10) # the larg + max_without_overflow = div(typemax(T) - 9, 10) # the larg i <= len || return n, false, i @inbounds b = codeunit(str, i) - diff = b-0x30 + diff = b - 0x30 if diff < UInt8(10) && n <= max_without_overflow n *= ten n += T(diff) @@ -85,7 +85,7 @@ end while i <= len && n <= max_without_overflow @inbounds b = codeunit(str, i) - diff = b-0x30 + diff = b - 0x30 if diff < UInt8(10) n *= ten n += T(diff) @@ -97,7 +97,7 @@ end return n, true, i end -@inline function read_digits(str::Union{VectorBackedUTF8String, String}, i, len) +@inline function read_digits(str::Union{VectorBackedUTF8String,String}, i, len) # slurp up extra digits while i <= len @inbounds b = codeunit(str, i) @@ -109,35 +109,35 @@ end return i end -@inline function _is_e(str::Union{VectorBackedUTF8String, String}, i) +@inline function _is_e(str::Union{VectorBackedUTF8String,String}, i) @inbounds b = codeunit(str, i) - return (b==0x65) | (b==0x45) + return (b == 0x65) | (b == 0x45) end -@inline function _is_negative(str::Union{VectorBackedUTF8String, String}, i) +@inline function _is_negative(str::Union{VectorBackedUTF8String,String}, i) @inbounds b = codeunit(str, i) - return b==0x2d + return b == 0x2d end -@inline function _is_positive(str::Union{VectorBackedUTF8String, String}, i) +@inline function _is_positive(str::Union{VectorBackedUTF8String,String}, i) @inbounds b = codeunit(str, i) - return b==0x2b + return b == 0x2b end -const pre_comp_exp = Float64[10.0^i for i=0:22] +const pre_comp_exp = Float64[10.0^i for i = 0:22] -@inline function tryparsenext(::Numeric{F}, str::Union{VectorBackedUTF8String, String}, i, len) where {F<:AbstractFloat} +@inline function tryparsenext(::Numeric{F}, str::Union{VectorBackedUTF8String,String}, i, len) where {F<:AbstractFloat} R = Nullable{F} - i>len && @goto error + i > len && @goto error negate = false @inbounds b = codeunit(str, i) - if b==0x2d # '-' + if b == 0x2d # '-' negate = true i += 1 - elseif b==0x2b # '+' - i +=1 + elseif b == 0x2b # '+' + i += 1 end f1::Int64 = 0 @@ -169,7 +169,7 @@ const pre_comp_exp = Float64[10.0^i for i=0:22] i += 1 enegate = false - if i<=len + if i <= len if _is_negative(str, i) enegate = true i += 1 @@ -190,12 +190,12 @@ const pre_comp_exp = Float64[10.0^i for i=0:22] if frac_digits <= 15 && -22 <= exp <= 22 if exp >= 0 - f = F(f1)*pre_comp_exp[exp+1] + f = F(f1) * pre_comp_exp[exp+1] else - f = F(f1)/pre_comp_exp[-exp+1] + f = F(f1) / pre_comp_exp[-exp+1] end else - f = convert_to_double(f1, exp) + f = convert_to_double(f1, exp) end if negate @@ -209,7 +209,7 @@ const pre_comp_exp = Float64[10.0^i for i=0:22] return R(), i end -function tryparsenext(f::Field{T}, str::Union{VectorBackedUTF8String, String}, i, len, opts::LocalOpts{T_ENDCHAR}) where {T, T_ENDCHAR<:UInt8} +function tryparsenext(f::Field{T}, str::Union{VectorBackedUTF8String,String}, i, len, opts::LocalOpts{T_ENDCHAR}) where {T,T_ENDCHAR<:UInt8} R = Nullable{T} i > len && @goto error if f.ignore_init_whitespace @@ -220,13 +220,13 @@ function tryparsenext(f::Field{T}, str::Union{VectorBackedUTF8String, String}, i if f.ignore_end_whitespace i0 = i - while i<=len + while i <= len @inbounds b = codeunit(str, i) - !opts.spacedelim && opts.endchar == 0x09 && b == 0x09 && (i = i+1; @goto done) # 0x09 is \t + !opts.spacedelim && opts.endchar == 0x09 && b == 0x09 && (i = i + 1; @goto done) # 0x09 is \t - b!=0x20 && b!=0x09 && break - i=i+1 + b != 0x20 && b != 0x09 && break + i = i + 1 end opts.spacedelim && i > i0 && @goto done @@ -241,27 +241,27 @@ function tryparsenext(f::Field{T}, str::Union{VectorBackedUTF8String, String}, i end end - i>len && error("Internal error.") + i > len && error("Internal error.") @inbounds b = codeunit(str, i) - opts.spacedelim && (b!=0x20 || b!=0x09) && (i+=1; @goto done) - !opts.spacedelim && opts.endchar == b && (i+=1; @goto done) + opts.spacedelim && (b != 0x20 || b != 0x09) && (i += 1; @goto done) + !opts.spacedelim && opts.endchar == b && (i += 1; @goto done) if f.eoldelim if b == 0x0d # '\r' - i+=1 - if i<=len + i += 1 + if i <= len @inbounds b = codeunit(str, i) if b == 0x0a # '\n' - i+=1 + i += 1 end end @goto done elseif b == 0x0a # '\n' - i+=1 - if i<=len + i += 1 + if i <= len @inbounds b = codeunit(str, i) if b == 0x0d # '\r' - i+=1 + i += 1 end end @goto done @@ -275,15 +275,15 @@ function tryparsenext(f::Field{T}, str::Union{VectorBackedUTF8String, String}, i return R(convert(T, res)), i end -function tryparsenext(q::Quoted{T,S,<:UInt8,<:UInt8}, str::Union{VectorBackedUTF8String, String}, i, len, opts::LocalOpts{<:UInt8,<:UInt8,<:UInt8}) where {T,S} - if i>len +function tryparsenext(q::Quoted{T,S,<:UInt8,<:UInt8}, str::Union{VectorBackedUTF8String,String}, i, len, opts::LocalOpts{<:UInt8,<:UInt8,<:UInt8}) where {T,S} + if i > len q.required && @goto error # check to see if inner thing is ok with an empty field @chk2 x, i = tryparsenext(q.inner, str, i, len, opts) error @goto done end @inbounds b = codeunit(str, i) - ii = i+1 + ii = i + 1 quotestarted = false if q.quotechar == b quotestarted = true @@ -300,7 +300,7 @@ function tryparsenext(q::Quoted{T,S,<:UInt8,<:UInt8}, str::Union{VectorBackedUTF if quotestarted qopts = LocalOpts(q.quotechar, false, q.quotechar, q.escapechar, - q.includequotes, q.includenewlines) + q.includequotes, q.includenewlines) @chk2 x, i = tryparsenext(q.inner, str, i, len, qopts) else @chk2 x, i = tryparsenext(q.inner, str, i, len, opts) @@ -316,7 +316,7 @@ function tryparsenext(q::Quoted{T,S,<:UInt8,<:UInt8}, str::Union{VectorBackedUTF if q.stripwhitespaces i = eatwhitespaces(str, i, len) end - i>len && error("Internal error.") + i > len && error("Internal error.") @inbounds b = codeunit(str, i) ii = i + 1 @@ -337,7 +337,7 @@ end b == UInt8(10) || b == UInt8(13) end -function tryparsenext(s::StringToken{T}, str::Union{VectorBackedUTF8String, String}, i, len, opts::LocalOpts{<:UInt8,<:UInt8,<:UInt8}) where {T} +function tryparsenext(s::StringToken{T}, str::Union{VectorBackedUTF8String,String}, i, len, opts::LocalOpts{<:UInt8,<:UInt8,<:UInt8}) where {T} len = ncodeunits(str) inside_quoted_strong = opts.endchar == opts.quotechar escapecount = 0 @@ -345,9 +345,9 @@ function tryparsenext(s::StringToken{T}, str::Union{VectorBackedUTF8String, Stri p = UInt8(0) i0 = i if opts.includequotes - if i<=len + if i <= len @inbounds b = codeunit(str, i) - if b==opts.quotechar + if b == opts.quotechar # advance counter so that # the while loop doesn't react to opening quote i += 1 @@ -355,11 +355,11 @@ function tryparsenext(s::StringToken{T}, str::Union{VectorBackedUTF8String, Stri end end - while i<=len + while i <= len @inbounds b = codeunit(str, i) ii = i + 1 - if inside_quoted_strong && p==opts.escapechar + if inside_quoted_strong && p == opts.escapechar escapecount += 1 end @@ -373,7 +373,7 @@ function tryparsenext(s::StringToken{T}, str::Union{VectorBackedUTF8String, Stri # in that case we need to see the next char if ii > len if opts.includequotes - i=ii + i = ii end break else @@ -404,16 +404,16 @@ function tryparsenext(s::StringToken{T}, str::Union{VectorBackedUTF8String, Stri p = b end - return R(_substring(T, str, i0, i-1, escapecount, opts)), i + return R(_substring(T, str, i0, i - 1, escapecount, opts)), i end -@inline function _substring(::Type{String}, str::Union{VectorBackedUTF8String, String}, i, j, escapecount, opts::LocalOpts{<:UInt8,<:UInt8,<:UInt8}) +@inline function _substring(::Type{String}, str::Union{VectorBackedUTF8String,String}, i, j, escapecount, opts::LocalOpts{<:UInt8,<:UInt8,<:UInt8}) if escapecount > 0 - buffer = Vector{UInt8}(undef, j-i+1-escapecount) + buffer = Vector{UInt8}(undef, j - i + 1 - escapecount) cur_i = i cur_buffer_i = 1 @inbounds c = codeunit(str, cur_i) - if opts.includequotes && c==opts.quotechar + if opts.includequotes && c == opts.quotechar @inbounds buffer[cur_buffer_i] = c cur_i += 1 cur_buffer_i += 1 @@ -441,11 +441,11 @@ end end return String(buffer) else - return unsafe_string(pointer(str, i), j-i+1) + return unsafe_string(pointer(str, i), j - i + 1) end end -function tryparsenext(na::NAToken{T}, str::Union{VectorBackedUTF8String, String}, i, len, opts::LocalOpts{<:UInt8,<:UInt8,<:UInt8}) where {T} +function tryparsenext(na::NAToken{T}, str::Union{VectorBackedUTF8String,String}, i, len, opts::LocalOpts{<:UInt8,<:UInt8,<:UInt8}) where {T} R = Nullable{T} i = eatwhitespaces(str, i, len) if i > len @@ -459,23 +459,23 @@ function tryparsenext(na::NAToken{T}, str::Union{VectorBackedUTF8String, String} @inbounds b = codeunit(str, i) ii = i + 1 if (b == opts.endchar || isnewline(b)) && na.emptyisna - @goto null + @goto null end if isa(na.inner, Unknown) @goto maybe_null end - @chk2 x,ii = tryparsenext(na.inner, str, i, len, opts) maybe_null + @chk2 x, ii = tryparsenext(na.inner, str, i, len, opts) maybe_null @label done return R(convert(T, x)), ii @label maybe_null naopts = LocalOpts(opts.endchar, opts.spacedelim, opts.quotechar, - opts.escapechar, false, opts.includenewlines) + opts.escapechar, false, opts.includenewlines) @chk2 nastr, ii = tryparsenext(StringToken(WeakRefString{UInt8}), str, i, len, naopts) if !isempty(searchsorted(na.nastrings, nastr)) - i=ii + i = ii i = eatwhitespaces(str, i, len) @goto null end diff --git a/src/util.jl b/src/util.jl index 4199d20..03b0eb2 100644 --- a/src/util.jl +++ b/src/util.jl @@ -17,7 +17,7 @@ macro chk1(expr,label=:error) end =# -macro chk2(expr,label=:error) +macro chk2(expr, label=:error) @assert expr.head == :(=) lhs, rhs = expr.args @@ -36,27 +36,27 @@ end @inline _isdigit(c::Char) = isdigit(c) -@inline function parse_uint_and_stop(str, i, len, n::T) where {T <: Integer} +@inline function parse_uint_and_stop(str, i, len, n::T) where {T<:Integer} ten = T(10) # specialize handling of the first digit so we can return an error - max_without_overflow = div(typemax(T)-9,10) # the larg + max_without_overflow = div(typemax(T) - 9, 10) # the larg y1 = iterate(str, i) - y1===nothing && return n, false, i + y1 === nothing && return n, false, i c = y1[1] if _isdigit(c) && n <= max_without_overflow n *= ten - n += T(c-'0') + n += T(c - '0') else return n, false, i end i = y1[2] y2 = iterate(str, i) - while y2!==nothing && n <= max_without_overflow + while y2 !== nothing && n <= max_without_overflow c = y2[1] if _isdigit(c) n *= ten - n += T(c-'0') + n += T(c - '0') else return n, true, i end @@ -70,7 +70,7 @@ end # slurp up extra digits @inline function read_digits(str, i, len) y = iterate(str, i) - while y!==nothing + while y !== nothing c = y[1] if !_isdigit(c) # do nothing return i @@ -81,42 +81,46 @@ end return i end -@inline function tryparsenext_base10_digit(T,str,i, len) - y = iterate(str,i) - y===nothing && @goto error - c = y[1]; ii = y[2] +@inline function tryparsenext_base10_digit(T, str, i, len) + y = iterate(str, i) + y === nothing && @goto error + c = y[1] + ii = y[2] '0' <= c <= '9' || @goto error - return convert(T, c-'0'), ii + return convert(T, c - '0'), ii @label error return nothing end Base.@pure maxdigits(::Type{T}) where {T} = ndigits(typemax(T)) -Base.@pure min_with_max_digits(::Type{T}) where {T} = convert(T, T(10)^(maxdigits(T)-1)) +Base.@pure min_with_max_digits(::Type{T}) where {T} = convert(T, T(10)^(maxdigits(T) - 1)) -@inline function tryparsenext_base10(T, str,i,len) +@inline function tryparsenext_base10(T, str, i, len) i0 = i R = Nullable{T} - y = tryparsenext_base10_digit(T,str,i, len) - y===nothing && return R(), i - r = y[1]; i = y[2] + y = tryparsenext_base10_digit(T, str, i, len) + y === nothing && return R(), i + r = y[1] + i = y[2] # Eat zeros - while r==0 - y2 = tryparsenext_base10_digit(T,str,i, len) + while r == 0 + y2 = tryparsenext_base10_digit(T, str, i, len) y2 === nothing && return R(convert(T, 0)), i - r = y2[1]; i = y2[2] + r = y2[1] + i = y2[2] end digits = 1 ten = T(10) while true - y2 = tryparsenext_base10_digit(T,str,i,len) - y2===nothing && break + y2 = tryparsenext_base10_digit(T, str, i, len) + y2 === nothing && break digits += 1 - d = y2[1]; i = y2[2] - r = r*ten + d + d = y2[1] + i = y2[2] + r = r * ten + d end max_digits = maxdigits(T) @@ -141,10 +145,11 @@ end R = Nullable{Int} y = iterate(str, i) - if y===nothing + if y === nothing return return R(), i else - c = y[1]; ii = y[2] + c = y[1] + ii = y[2] if c == '-' return R(-1), ii elseif c == '+' @@ -165,10 +170,11 @@ end @inline function eatwhitespaces(str, i=1, l=lastindex(str)) y = iterate(str, i) - while y!==nothing - c = y[1]; ii = y[2] + while y !== nothing + c = y[1] + ii = y[2] if isspace(c) - i=ii + i = ii else break end @@ -181,27 +187,28 @@ end function eatnewlines(str, i=1, l=lastindex(str)) count = 0 y = iterate(str, i) - while y!==nothing - c = y[1]; ii = y[2] + while y !== nothing + c = y[1] + ii = y[2] if c == '\r' - i=ii + i = ii y2 = iterate(str, i) - if y2!==nothing + if y2 !== nothing c = y2[1] ii = y2[2] if c == '\n' - i=ii + i = ii end end count += 1 elseif c == '\n' - i=ii + i = ii y3 = iterate(str, i) - if y3!==nothing + if y3 !== nothing c = y3[1] ii = y3[2] if c == '\r' - i=ii + i = ii end end count += 1 @@ -216,7 +223,7 @@ end # Move past consecutive lines that start with commentchar. # Return a tuple of the new pos in str and the amount of comment lines moved past. -function eatcommentlines(str, i=1, l=lastindex(str), commentchar::Union{Char, Nothing}=nothing) +function eatcommentlines(str, i=1, l=lastindex(str), commentchar::Union{Char,Nothing}=nothing) commentchar === nothing && return i, 0 count = 0 @@ -233,13 +240,14 @@ end function stripquotes(x) x[1] in ('\'', '"') && x[1] == x[end] ? - strip(x, x[1]) : x + strip(x, x[1]) : x end function getlineend(str, i=1, l=lastindex(str)) y = iterate(str, i) - while y!==nothing - c = y[1]; ii = y[2] + while y !== nothing + c = y[1] + ii = y[2] isnewline(c) && break i = ii y = iterate(str, i) @@ -254,44 +262,48 @@ function getrowend(str, i, len, opts, delim) i0 = i i = eatwhitespaces(str, i, len) y = iterate(str, i) - while y!==nothing - c = y[1]; i = y[2] - if c==Char(opts.quotechar) + while y !== nothing + c = y[1] + i = y[2] + if c == Char(opts.quotechar) # We are now inside a quoted field y2 = iterate(str, i) - while y2!==nothing - c = y2[1]; i = y2[2] - if c==Char(opts.escapechar) + while y2 !== nothing + c = y2[1] + i = y2[2] + if c == Char(opts.escapechar) y3 = iterate(str, i) - if y3===nothing - if c==Char(opts.quotechar) + if y3 === nothing + if c == Char(opts.quotechar) return prevind(str, i) else error("Parsing error, quoted string never terminated.") end else - c2 = y3[1]; ii = y3[2] - if c2==Char(opts.quotechar) + c2 = y3[1] + ii = y3[2] + if c2 == Char(opts.quotechar) i = ii - elseif c==Char(opts.quotechar) + elseif c == Char(opts.quotechar) break end end - elseif c==Char(opts.quotechar) - break; + elseif c == Char(opts.quotechar) + break end y2 = iterate(str, i) - if y2===nothing + if y2 === nothing error("Parsing error, quoted string never terminated.") end end i = eatwhitespaces(str, i, len) y4 = iterate(str, i) - if y4!==nothing - c = y4[1]; i4 = y4[2] + if y4 !== nothing + c = y4[1] + i4 = y4[2] if isnewline(c) return prevind(str, i) - elseif c!=Char(delim) + elseif c != Char(delim) error("Invalid line") end else @@ -299,9 +311,10 @@ function getrowend(str, i, len, opts, delim) end else # We are now inside a non quoted field - while y!==nothing - c = y[1]; i = y[2] - if c==Char(delim) + while y !== nothing + c = y[1] + i = y[2] + if c == Char(delim) i = eatwhitespaces(str, i, len) break elseif isnewline(c) diff --git a/test/runtests.jl b/test/runtests.jl index 4bdf90f..581819b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -11,736 +11,736 @@ Base.:(==)(a::T, b::T) where {T<:AbstractToken} = string(a) == string(b) @testset "TextParse" begin -import TextParse: eatnewlines -@testset "eatnewlines" begin - @test eatnewlines("\n\r\nx") == (4, 2) - @test eatnewlines("x\n\r\nx") == (1, 0) - - # Also test the AbstractString variant - @test eatnewlines(SubString("\n\r\nx", 1)) == (4, 2) - @test eatnewlines(SubString("x\n\r\nx", 1)) == (1, 0) -end + import TextParse: eatnewlines + @testset "eatnewlines" begin + @test eatnewlines("\n\r\nx") == (4, 2) + @test eatnewlines("x\n\r\nx") == (1, 0) + + # Also test the AbstractString variant + @test eatnewlines(SubString("\n\r\nx", 1)) == (4, 2) + @test eatnewlines(SubString("x\n\r\nx", 1)) == (1, 0) + end -import TextParse: eatwhitespaces -@testset "eatwhitespaces" begin - @test eatwhitespaces(" x") == 3 - @test eatwhitespaces("x x") == 1 + import TextParse: eatwhitespaces + @testset "eatwhitespaces" begin + @test eatwhitespaces(" x") == 3 + @test eatwhitespaces("x x") == 1 - # Also test the AbstractString variant - @test eatwhitespaces(SubString(" x", 1)) == 3 - @test eatwhitespaces(SubString("x x", 1)) == 1 -end + # Also test the AbstractString variant + @test eatwhitespaces(SubString(" x", 1)) == 3 + @test eatwhitespaces(SubString("x x", 1)) == 1 + end -import TextParse: getlineend -@testset "getlineend" begin - @test getlineend("\nx") == 0 - @test getlineend("x\nx") == 1 - @test getlineend("x\ny", 2) == 1 - @test getlineend("x\nyz", 3) == 4 - @test getlineend("xβ\nyz") == 2 -end + import TextParse: getlineend + @testset "getlineend" begin + @test getlineend("\nx") == 0 + @test getlineend("x\nx") == 1 + @test getlineend("x\ny", 2) == 1 + @test getlineend("x\nyz", 3) == 4 + @test getlineend("xβ\nyz") == 2 + end -import TextParse: getrowend -@testset "getrowend" begin - opts = LocalOpts(',', false, '"', '"', true, true) - - @test getrowend("\nx", 1, lastindex("\nx"), opts, ',') == 0 - @test getrowend("x\nx", 1, lastindex("x\nx"), opts, ',') == 1 - @test getrowend("x\ny", 2, lastindex("x\ny"), opts, ',') == 1 - @test getrowend("x\nyz", 3, lastindex("x\nyz"), opts, ',') == 4 - @test getrowend("\"x\"\nyz", 1, lastindex("x\nyz"), opts, ',') == 3 - @test_throws ErrorException getrowend("\"x\" er", 1, lastindex("\"x\" er"), opts, ',') - @test getrowend("", 1, lastindex(""), opts, ',') == 0 - @test getrowend(" ", 1, lastindex(" "), opts, ',') == 2 - @test getrowend("a,b,c\nd,e,f", 1, lastindex("a,b,c\nd,e,f"), opts, ',') == 5 - @test getrowend("a,\"b\"\"ef\",c\nd,e,f", 1, lastindex("a,\"b\"\"ef\",c\nd,e,f"), opts, ',') == 11 - @test getrowend("a,\"b\"\"ef\"", 1, lastindex("a,\"b\"\"ef\""), opts, ',') == 9 - @test_throws ErrorException getrowend("\"xy", 1, lastindex("\"xy"), opts, ',') - - opts = LocalOpts(',', false, '"', '\\', true, true) - @test getrowend("a,\"bef\",f\na,b", 1, lastindex("a,\"bef\",f\na,b"), opts, ',') == 9 - @test_throws ErrorException getrowend("\"xy\\", 1, lastindex("\"xy"), opts, ',') -end + import TextParse: getrowend + @testset "getrowend" begin + opts = LocalOpts(',', false, '"', '"', true, true) + + @test getrowend("\nx", 1, lastindex("\nx"), opts, ',') == 0 + @test getrowend("x\nx", 1, lastindex("x\nx"), opts, ',') == 1 + @test getrowend("x\ny", 2, lastindex("x\ny"), opts, ',') == 1 + @test getrowend("x\nyz", 3, lastindex("x\nyz"), opts, ',') == 4 + @test getrowend("\"x\"\nyz", 1, lastindex("x\nyz"), opts, ',') == 3 + @test_throws ErrorException getrowend("\"x\" er", 1, lastindex("\"x\" er"), opts, ',') + @test getrowend("", 1, lastindex(""), opts, ',') == 0 + @test getrowend(" ", 1, lastindex(" "), opts, ',') == 2 + @test getrowend("a,b,c\nd,e,f", 1, lastindex("a,b,c\nd,e,f"), opts, ',') == 5 + @test getrowend("a,\"b\"\"ef\",c\nd,e,f", 1, lastindex("a,\"b\"\"ef\",c\nd,e,f"), opts, ',') == 11 + @test getrowend("a,\"b\"\"ef\"", 1, lastindex("a,\"b\"\"ef\""), opts, ',') == 9 + @test_throws ErrorException getrowend("\"xy", 1, lastindex("\"xy"), opts, ',') + + opts = LocalOpts(',', false, '"', '\\', true, true) + @test getrowend("a,\"bef\",f\na,b", 1, lastindex("a,\"bef\",f\na,b"), opts, ',') == 9 + @test_throws ErrorException getrowend("\"xy\\", 1, lastindex("\"xy"), opts, ',') + end -import TextParse: fromtype, Percentage -@testset "Float parsing" begin - - @test tryparsenext(fromtype(Float64), "1", 1, 1) |> unwrap == (1.0, 2) - @test tryparsenext(fromtype(Float64), "12", 1, 2) |> unwrap == (12.0, 3) - @test tryparsenext(fromtype(Float64), ".1", 1, 2) |> unwrap == (0.1, 3) - @test tryparsenext(fromtype(Float64), "1.1", 1, 3) |> unwrap == (1.1, 4) - @test tryparsenext(fromtype(Float32), "1.", 1, 2) |> unwrap == (1f0,3) - @test tryparsenext(fromtype(Float64), "-1.1", 1, 4) |> unwrap == (-1.1,5) - @test tryparsenext(fromtype(Float64), "-1.0e-12", 1, 8) |> unwrap == (-1.0e-12,9) - @test tryparsenext(fromtype(Float64), "-1e-12") |> unwrap == (-1.0e-12,7) - @test tryparsenext(fromtype(Float64), "-1.0E-12", 1, 8) |> unwrap == (-1.0e-12,9) - @test tryparsenext(fromtype(Float64), "5.e-3", 1, 5) |> unwrap == (5.0e-3,6) # 32 - @test tryparsenext(Percentage(), "33%") |> unwrap == (.33,4) - @test tryparsenext(Percentage(), "3.3%") |> unwrap == (.033,5) - - # Also test AbstractString variant - @test tryparsenext(fromtype(Float64), SubString("1", 1), 1, 1) |> unwrap == (1.0, 2) - @test tryparsenext(fromtype(Float64), SubString("12", 1), 1, 2) |> unwrap == (12.0, 3) - @test tryparsenext(fromtype(Float64), SubString(".1", 1), 1, 2) |> unwrap == (0.1, 3) - @test tryparsenext(fromtype(Float64), SubString("1.1", 1), 1, 3) |> unwrap == (1.1, 4) - @test tryparsenext(fromtype(Float32), SubString("1.", 1), 1, 2) |> unwrap == (1f0,3) - @test tryparsenext(fromtype(Float64), SubString("-1.1", 1), 1, 4) |> unwrap == (-1.1,5) - @test tryparsenext(fromtype(Float64), SubString("-1.0e-12", 1), 1, 8) |> unwrap == (-1.0e-12,9) - @test tryparsenext(fromtype(Float64), SubString("-1e-12", 1)) |> unwrap == (-1.0e-12,7) - @test tryparsenext(fromtype(Float64), SubString("-1.0E-12", 1), 1, 8) |> unwrap == (-1.0e-12,9) - @test tryparsenext(fromtype(Float64), SubString("5.e-3", 1), 1, 5) |> unwrap == (5.0e-3,6) # 32 - @test tryparsenext(Percentage(), SubString("33%", 1)) |> unwrap == (.33,4) - @test tryparsenext(Percentage(), SubString("3.3%", 1)) |> unwrap == (.033,5) - - rng = MersenneTwister(0) - floats = rand(1_000) - parsed_floats = map(i->get(tryparsenext(fromtype(Float64), i, 1, lastindex(i))[1]), string.(floats)) - @test parsed_floats == floats - - # Also test AbstractString variant - parsed_floats = map(i->get(tryparsenext(fromtype(Float64), SubString(i,1), 1, lastindex(i))[1]), string.(floats)) - @test parsed_floats == floats -end + import TextParse: fromtype, Percentage + @testset "Float parsing" begin + + @test tryparsenext(fromtype(Float64), "1", 1, 1) |> unwrap == (1.0, 2) + @test tryparsenext(fromtype(Float64), "12", 1, 2) |> unwrap == (12.0, 3) + @test tryparsenext(fromtype(Float64), ".1", 1, 2) |> unwrap == (0.1, 3) + @test tryparsenext(fromtype(Float64), "1.1", 1, 3) |> unwrap == (1.1, 4) + @test tryparsenext(fromtype(Float32), "1.", 1, 2) |> unwrap == (1.0f0, 3) + @test tryparsenext(fromtype(Float64), "-1.1", 1, 4) |> unwrap == (-1.1, 5) + @test tryparsenext(fromtype(Float64), "-1.0e-12", 1, 8) |> unwrap == (-1.0e-12, 9) + @test tryparsenext(fromtype(Float64), "-1e-12") |> unwrap == (-1.0e-12, 7) + @test tryparsenext(fromtype(Float64), "-1.0E-12", 1, 8) |> unwrap == (-1.0e-12, 9) + @test tryparsenext(fromtype(Float64), "5.e-3", 1, 5) |> unwrap == (5.0e-3, 6) # 32 + @test tryparsenext(Percentage(), "33%") |> unwrap == (0.33, 4) + @test tryparsenext(Percentage(), "3.3%") |> unwrap == (0.033, 5) + + # Also test AbstractString variant + @test tryparsenext(fromtype(Float64), SubString("1", 1), 1, 1) |> unwrap == (1.0, 2) + @test tryparsenext(fromtype(Float64), SubString("12", 1), 1, 2) |> unwrap == (12.0, 3) + @test tryparsenext(fromtype(Float64), SubString(".1", 1), 1, 2) |> unwrap == (0.1, 3) + @test tryparsenext(fromtype(Float64), SubString("1.1", 1), 1, 3) |> unwrap == (1.1, 4) + @test tryparsenext(fromtype(Float32), SubString("1.", 1), 1, 2) |> unwrap == (1.0f0, 3) + @test tryparsenext(fromtype(Float64), SubString("-1.1", 1), 1, 4) |> unwrap == (-1.1, 5) + @test tryparsenext(fromtype(Float64), SubString("-1.0e-12", 1), 1, 8) |> unwrap == (-1.0e-12, 9) + @test tryparsenext(fromtype(Float64), SubString("-1e-12", 1)) |> unwrap == (-1.0e-12, 7) + @test tryparsenext(fromtype(Float64), SubString("-1.0E-12", 1), 1, 8) |> unwrap == (-1.0e-12, 9) + @test tryparsenext(fromtype(Float64), SubString("5.e-3", 1), 1, 5) |> unwrap == (5.0e-3, 6) # 32 + @test tryparsenext(Percentage(), SubString("33%", 1)) |> unwrap == (0.33, 4) + @test tryparsenext(Percentage(), SubString("3.3%", 1)) |> unwrap == (0.033, 5) + + rng = MersenneTwister(0) + floats = rand(1_000) + parsed_floats = map(i -> get(tryparsenext(fromtype(Float64), i, 1, lastindex(i))[1]), string.(floats)) + @test parsed_floats == floats + + # Also test AbstractString variant + parsed_floats = map(i -> get(tryparsenext(fromtype(Float64), SubString(i, 1), 1, lastindex(i))[1]), string.(floats)) + @test parsed_floats == floats + end -@testset "Int parsing" begin - @test tryparsenext(fromtype(Int64), "1", 1, 1) |> unwrap == (1, 2) - @test tryparsenext(fromtype(Int64), "01", 1, 2) |> unwrap == (1, 3) - @test tryparsenext(fromtype(Int64), "0001", 1, 4) |> unwrap == (1, 5) - @test tryparsenext(fromtype(Int64), "123", 1, 3) |> unwrap == (123, 4) - @test tryparsenext(fromtype(Int64), "00123", 1, 5) |> unwrap == (123, 6) - @test tryparsenext(fromtype(Int64), "-1", 1, 2) |> unwrap == (-1, 3) - @test tryparsenext(fromtype(Int64), "-01", 1, 3) |> unwrap == (-1, 4) - @test tryparsenext(fromtype(Int64), "-0001", 1, 5) |> unwrap == (-1, 6) - @test tryparsenext(fromtype(Int64), "-123", 1, 4) |> unwrap == (-123, 5) - @test tryparsenext(fromtype(Int64), "-00123", 1, 6) |> unwrap == (-123, 7) - @test tryparsenext(fromtype(Int64), "+1", 1, 2) |> unwrap == (1, 3) - @test tryparsenext(fromtype(Int64), "+01", 1, 3) |> unwrap == (1, 4) - @test tryparsenext(fromtype(Int64), "+0001", 1, 5) |> unwrap == (1, 6) - @test tryparsenext(fromtype(Int64), "+123", 1, 4) |> unwrap == (123, 5) - @test tryparsenext(fromtype(Int64), "+00123", 1, 6) |> unwrap == (123, 7) - @test tryparsenext(fromtype(Int64), "9223372036854775807", 1, 19) |> unwrap == (9223372036854775807, 20) - @test tryparsenext(fromtype(Int64), "9223372036854775808", 1, 19) |> failedat == 1 - @test tryparsenext(fromtype(Int64), "19223372036854775808", 1, 20) |> failedat == 1 - @test tryparsenext(fromtype(UInt64), "18446744073709551615", 1, 20) |> unwrap == (0xffffffffffffffff, 21) - @test tryparsenext(fromtype(UInt64), "18446744073709551616", 1, 20) |> failedat == 1 - @test tryparsenext(fromtype(Int128), "170141183460469231731687303715884105727", 1, 39) |> unwrap == (170141183460469231731687303715884105727, 40) - @test tryparsenext(fromtype(Int128), "170141183460469231731687303715884105728", 1, 39) |> failedat == 1 - @test tryparsenext(fromtype(UInt128), "340282366920938463463374607431768211455", 1, 39) |> unwrap == (0xffffffffffffffffffffffffffffffff, 40) - @test tryparsenext(fromtype(UInt128), "340282366920938463463374607431768211456", 1, 39) |> failedat == 1 -end + @testset "Int parsing" begin + @test tryparsenext(fromtype(Int64), "1", 1, 1) |> unwrap == (1, 2) + @test tryparsenext(fromtype(Int64), "01", 1, 2) |> unwrap == (1, 3) + @test tryparsenext(fromtype(Int64), "0001", 1, 4) |> unwrap == (1, 5) + @test tryparsenext(fromtype(Int64), "123", 1, 3) |> unwrap == (123, 4) + @test tryparsenext(fromtype(Int64), "00123", 1, 5) |> unwrap == (123, 6) + @test tryparsenext(fromtype(Int64), "-1", 1, 2) |> unwrap == (-1, 3) + @test tryparsenext(fromtype(Int64), "-01", 1, 3) |> unwrap == (-1, 4) + @test tryparsenext(fromtype(Int64), "-0001", 1, 5) |> unwrap == (-1, 6) + @test tryparsenext(fromtype(Int64), "-123", 1, 4) |> unwrap == (-123, 5) + @test tryparsenext(fromtype(Int64), "-00123", 1, 6) |> unwrap == (-123, 7) + @test tryparsenext(fromtype(Int64), "+1", 1, 2) |> unwrap == (1, 3) + @test tryparsenext(fromtype(Int64), "+01", 1, 3) |> unwrap == (1, 4) + @test tryparsenext(fromtype(Int64), "+0001", 1, 5) |> unwrap == (1, 6) + @test tryparsenext(fromtype(Int64), "+123", 1, 4) |> unwrap == (123, 5) + @test tryparsenext(fromtype(Int64), "+00123", 1, 6) |> unwrap == (123, 7) + @test tryparsenext(fromtype(Int64), "9223372036854775807", 1, 19) |> unwrap == (9223372036854775807, 20) + @test tryparsenext(fromtype(Int64), "9223372036854775808", 1, 19) |> failedat == 1 + @test tryparsenext(fromtype(Int64), "19223372036854775808", 1, 20) |> failedat == 1 + @test tryparsenext(fromtype(UInt64), "18446744073709551615", 1, 20) |> unwrap == (0xffffffffffffffff, 21) + @test tryparsenext(fromtype(UInt64), "18446744073709551616", 1, 20) |> failedat == 1 + @test tryparsenext(fromtype(Int128), "170141183460469231731687303715884105727", 1, 39) |> unwrap == (170141183460469231731687303715884105727, 40) + @test tryparsenext(fromtype(Int128), "170141183460469231731687303715884105728", 1, 39) |> failedat == 1 + @test tryparsenext(fromtype(UInt128), "340282366920938463463374607431768211455", 1, 39) |> unwrap == (0xffffffffffffffffffffffffffffffff, 40) + @test tryparsenext(fromtype(UInt128), "340282366920938463463374607431768211456", 1, 39) |> failedat == 1 + end -import TextParse: StringToken -using WeakRefStrings -@testset "String parsing" begin - - # default options - @test tryparsenext(StringToken(String), "") |> unwrap == ("", 1) - x = "x" - @test tryparsenext(StringToken(WeakRefString), WeakRefString(pointer(x), 1)) |> unwrap == ("x", 2) - @test tryparsenext(StringToken(String), "x") |> unwrap == ("x", 2) - @test tryparsenext(StringToken(String), "x ") |> unwrap == ("x ", 3) - @test tryparsenext(StringToken(String), " x") |> unwrap == (" x", 3) - @test tryparsenext(StringToken(String), "x\ny") |> unwrap == ("x", 2) - @test tryparsenext(StringToken(String), "x,y") |> unwrap == ("x", 2) # test escape - - opts = LocalOpts(',', false, '"', '"', true, true) - @test tryparsenext(StringToken(String), "", opts) |> unwrap == ("", 1) - @test tryparsenext(StringToken(String), "\"\"", opts) |> unwrap == ("\"\"", 3) - @test tryparsenext(StringToken(String), "x", opts) |> unwrap == ("x", 2) - # test including new lines - @test tryparsenext(StringToken(String), "x\ny", opts) |> unwrap == ("x\ny", 4) - @test tryparsenext(StringToken(String), "\"x\ny\"", opts) |> unwrap == ("\"x\ny\"", 6) - - opts = LocalOpts(',', false, '"', '"', false, true) - # test that includequotes option doesn't affect string - @test tryparsenext(StringToken(String), "\"\"", opts) |> unwrap == ("\"\"", 3) - - opts = LocalOpts(',', false, '"', '\\', false, false) - str = "Owner 2 ”Vicepresident\"\"" - @test tryparsenext(Quoted(String, '"', '\\'), str, opts) |> unwrap == (str, lastindex(str)+1) - str1 = "\"Owner 2 ”Vicepresident\"\"\"" - @test tryparsenext(Quoted(String, '"', '"'), str1) |> unwrap == ("Owner 2 ”Vicepresident\"", lastindex(str1)+1) - @test tryparsenext(Quoted(String, '"', '"'), "\"\tx\"") |> unwrap == ("\tx", 5) - opts = LocalOpts(',', true, '"', '\\', false, false) - @test tryparsenext(StringToken(String), "x y",1,3, opts) |> unwrap == ("x", 2) - - @test tryparsenext(StringToken(String), "abcβ") |> unwrap == ("abcβ", 6) -end + import TextParse: StringToken + using WeakRefStrings + @testset "String parsing" begin + + # default options + @test tryparsenext(StringToken(String), "") |> unwrap == ("", 1) + x = "x" + @test tryparsenext(StringToken(WeakRefString), WeakRefString(pointer(x), 1)) |> unwrap == ("x", 2) + @test tryparsenext(StringToken(String), "x") |> unwrap == ("x", 2) + @test tryparsenext(StringToken(String), "x ") |> unwrap == ("x ", 3) + @test tryparsenext(StringToken(String), " x") |> unwrap == (" x", 3) + @test tryparsenext(StringToken(String), "x\ny") |> unwrap == ("x", 2) + @test tryparsenext(StringToken(String), "x,y") |> unwrap == ("x", 2) # test escape + + opts = LocalOpts(',', false, '"', '"', true, true) + @test tryparsenext(StringToken(String), "", opts) |> unwrap == ("", 1) + @test tryparsenext(StringToken(String), "\"\"", opts) |> unwrap == ("\"\"", 3) + @test tryparsenext(StringToken(String), "x", opts) |> unwrap == ("x", 2) + # test including new lines + @test tryparsenext(StringToken(String), "x\ny", opts) |> unwrap == ("x\ny", 4) + @test tryparsenext(StringToken(String), "\"x\ny\"", opts) |> unwrap == ("\"x\ny\"", 6) + + opts = LocalOpts(',', false, '"', '"', false, true) + # test that includequotes option doesn't affect string + @test tryparsenext(StringToken(String), "\"\"", opts) |> unwrap == ("\"\"", 3) + + opts = LocalOpts(',', false, '"', '\\', false, false) + str = "Owner 2 ”Vicepresident\"\"" + @test tryparsenext(Quoted(String, '"', '\\'), str, opts) |> unwrap == (str, lastindex(str) + 1) + str1 = "\"Owner 2 ”Vicepresident\"\"\"" + @test tryparsenext(Quoted(String, '"', '"'), str1) |> unwrap == ("Owner 2 ”Vicepresident\"", lastindex(str1) + 1) + @test tryparsenext(Quoted(String, '"', '"'), "\"\tx\"") |> unwrap == ("\tx", 5) + opts = LocalOpts(',', true, '"', '\\', false, false) + @test tryparsenext(StringToken(String), "x y", 1, 3, opts) |> unwrap == ("x", 2) + + @test tryparsenext(StringToken(String), "abcβ") |> unwrap == ("abcβ", 6) + end -import TextParse: Quoted, NAToken, Unknown -@testset "Quoted string parsing" begin - opts = LocalOpts(',', false, '"', '"', true, true) - - @test tryparsenext(Quoted(String, '"', '"'), "\"\"") |> unwrap == ("", 3) - @test tryparsenext(Quoted(String, '"', '"'), "\"\" ", opts) |> unwrap == ("", 3) - @test tryparsenext(Quoted(String, '"', '"'), "\"x\"") |> unwrap == ("x", 4) - @test tryparsenext(Quoted(String, '"', '"', includequotes=true), "\"x\"") |> unwrap == ("\"x\"", 4) - str2 = "\"\"\"\"" - @test tryparsenext(Quoted(String, '"', '"'), str2, opts) |> unwrap == ("\"", lastindex(str2)+1) - str1 = "\"x”y\"\"\"" - @test tryparsenext(Quoted(StringToken(String), '"', '"', required=true), "x\"y\"") |> failedat == 1 - - @test tryparsenext(Quoted(String, '"', '"'), str1) |> unwrap == ("x”y\"", lastindex(str1)+1) - @test tryparsenext(Quoted(StringToken(String), '"', '\\'), "\"x\\\"yz\"") |> unwrap == ("x\"yz", 8) - @test tryparsenext(Quoted(NAToken(fromtype(Int)), '"', '"'), "1") |> unwrap == (1,2) - - t = tryparsenext(Quoted(NAToken(fromtype(Int)), '"', '"'), "") |> unwrap - @test ismissing(t[1]) - @test t[2] == 1 - - t = tryparsenext(Quoted(NAToken(fromtype(Int)), '"', '"'), "\"\"") |> unwrap - @test ismissing(t[1]) - @test t[2] == 3 - @test tryparsenext(Quoted(NAToken(fromtype(Int)), '"', '"'), "\"1\"") |> unwrap == (1, 4) - - - @test tryparsenext(Quoted(StringToken(String), '"', '"'), "\"abc\"") |> unwrap == ("abc", 6) - @test tryparsenext(Quoted(StringToken(String), '"', '"'), "x\"abc\"") |> unwrap == ("x\"abc\"", 7) - @test tryparsenext(Quoted(StringToken(String), '"', '"'), "\"a\nbc\"") |> unwrap == ("a\nbc", 7) - @test tryparsenext(Quoted(StringToken(String), '"', '"', required=true), "x\"abc\"") |> failedat == 1 - @test tryparsenext(Quoted(fromtype(Int), '"', '"'), "21") |> unwrap == (21,3) - @test tryparsenext(Quoted(NAToken(fromtype(Int)), '"', '"'), "21") |> unwrap == (21,3) - - t = tryparsenext(Quoted(NAToken(fromtype(Int)), '"', '"'), "") |> unwrap - @test ismissing(t[1]) - @test t[2] == 1 - - t = tryparsenext(Quoted(NAToken(fromtype(Int)), '"', '"'), "\"\"") |> unwrap - @test ismissing(t[1]) - @test t[2] == 3 - - @test tryparsenext(Quoted(NAToken(fromtype(Int)), '"', '"'), "\"21\"") |> unwrap == (21, 5) - @test ismissing(tryparsenext(Quoted(NAToken(Unknown()), '"', '"'), " ") |> unwrap |> first) - opts = LocalOpts(',', false,'"', '"', false, false) - @test tryparsenext(Quoted(StringToken(String), '"', '"'), "x,", opts) |> unwrap == ("x", 2) - - # stripspaces - @test tryparsenext(Quoted(Percentage(), '"', '"'), "\" 10%\",", opts) |> unwrap == (0.1, 7) - @test tryparsenext(Quoted(String, '"', '"'), "\" 10%\",", opts) |> unwrap == (" 10%", 7) - opts = LocalOpts(',', true,'"', '"', false, false) - @test tryparsenext(Quoted(StringToken(String), '"', '"'), "\"x y\" y", opts) |> unwrap == ("x y", 6) - @test tryparsenext(Quoted(StringToken(String), '"', '"'), "x y", opts) |> unwrap == ("x", 2) -end + import TextParse: Quoted, NAToken, Unknown + @testset "Quoted string parsing" begin + opts = LocalOpts(',', false, '"', '"', true, true) + + @test tryparsenext(Quoted(String, '"', '"'), "\"\"") |> unwrap == ("", 3) + @test tryparsenext(Quoted(String, '"', '"'), "\"\" ", opts) |> unwrap == ("", 3) + @test tryparsenext(Quoted(String, '"', '"'), "\"x\"") |> unwrap == ("x", 4) + @test tryparsenext(Quoted(String, '"', '"', includequotes=true), "\"x\"") |> unwrap == ("\"x\"", 4) + str2 = "\"\"\"\"" + @test tryparsenext(Quoted(String, '"', '"'), str2, opts) |> unwrap == ("\"", lastindex(str2) + 1) + str1 = "\"x”y\"\"\"" + @test tryparsenext(Quoted(StringToken(String), '"', '"', required=true), "x\"y\"") |> failedat == 1 + + @test tryparsenext(Quoted(String, '"', '"'), str1) |> unwrap == ("x”y\"", lastindex(str1) + 1) + @test tryparsenext(Quoted(StringToken(String), '"', '\\'), "\"x\\\"yz\"") |> unwrap == ("x\"yz", 8) + @test tryparsenext(Quoted(NAToken(fromtype(Int)), '"', '"'), "1") |> unwrap == (1, 2) + + t = tryparsenext(Quoted(NAToken(fromtype(Int)), '"', '"'), "") |> unwrap + @test ismissing(t[1]) + @test t[2] == 1 + + t = tryparsenext(Quoted(NAToken(fromtype(Int)), '"', '"'), "\"\"") |> unwrap + @test ismissing(t[1]) + @test t[2] == 3 + @test tryparsenext(Quoted(NAToken(fromtype(Int)), '"', '"'), "\"1\"") |> unwrap == (1, 4) + + + @test tryparsenext(Quoted(StringToken(String), '"', '"'), "\"abc\"") |> unwrap == ("abc", 6) + @test tryparsenext(Quoted(StringToken(String), '"', '"'), "x\"abc\"") |> unwrap == ("x\"abc\"", 7) + @test tryparsenext(Quoted(StringToken(String), '"', '"'), "\"a\nbc\"") |> unwrap == ("a\nbc", 7) + @test tryparsenext(Quoted(StringToken(String), '"', '"', required=true), "x\"abc\"") |> failedat == 1 + @test tryparsenext(Quoted(fromtype(Int), '"', '"'), "21") |> unwrap == (21, 3) + @test tryparsenext(Quoted(NAToken(fromtype(Int)), '"', '"'), "21") |> unwrap == (21, 3) + + t = tryparsenext(Quoted(NAToken(fromtype(Int)), '"', '"'), "") |> unwrap + @test ismissing(t[1]) + @test t[2] == 1 + + t = tryparsenext(Quoted(NAToken(fromtype(Int)), '"', '"'), "\"\"") |> unwrap + @test ismissing(t[1]) + @test t[2] == 3 + + @test tryparsenext(Quoted(NAToken(fromtype(Int)), '"', '"'), "\"21\"") |> unwrap == (21, 5) + @test ismissing(tryparsenext(Quoted(NAToken(Unknown()), '"', '"'), " ") |> unwrap |> first) + opts = LocalOpts(',', false, '"', '"', false, false) + @test tryparsenext(Quoted(StringToken(String), '"', '"'), "x,", opts) |> unwrap == ("x", 2) + + # stripspaces + @test tryparsenext(Quoted(Percentage(), '"', '"'), "\" 10%\",", opts) |> unwrap == (0.1, 7) + @test tryparsenext(Quoted(String, '"', '"'), "\" 10%\",", opts) |> unwrap == (" 10%", 7) + opts = LocalOpts(',', true, '"', '"', false, false) + @test tryparsenext(Quoted(StringToken(String), '"', '"'), "\"x y\" y", opts) |> unwrap == ("x y", 6) + @test tryparsenext(Quoted(StringToken(String), '"', '"'), "x y", opts) |> unwrap == ("x", 2) + end -@testset "NA parsing" begin - t = tryparsenext(NAToken(fromtype(Float64)), ",") |> unwrap - @test ismissing(t[1]) - @test t[2] == 1 + @testset "NA parsing" begin + t = tryparsenext(NAToken(fromtype(Float64)), ",") |> unwrap + @test ismissing(t[1]) + @test t[2] == 1 - t = tryparsenext(NAToken(fromtype(Float64)), "NA,") |> unwrap - @test ismissing(t[1]) - @test t[2] == 3 + t = tryparsenext(NAToken(fromtype(Float64)), "NA,") |> unwrap + @test ismissing(t[1]) + @test t[2] == 3 - @test tryparsenext(NAToken(fromtype(Float64)), "X,") |> failedat == 1 - @test tryparsenext(NAToken(fromtype(Float64)), "1.212,") |> unwrap == (1.212, 6) -end + @test tryparsenext(NAToken(fromtype(Float64)), "X,") |> failedat == 1 + @test tryparsenext(NAToken(fromtype(Float64)), "1.212,") |> unwrap == (1.212, 6) + end -import TextParse: Field -@testset "Field parsing" begin - f = fromtype(Int) - @test tryparsenext(Field(f), "12,3") |> unwrap == (12, 4) - @test tryparsenext(Field(f), "12 ,3") |> unwrap == (12, 5) - @test tryparsenext(Field(f), " 12 ,3") |> unwrap == (12, 6) - opts = LocalOpts('\t', false, 'x','x',true,false) - @test tryparsenext(Field(f), "12\t3", 1, 4, opts) |> unwrap == (12, 4) - @test tryparsenext(Field(f), "12 \t3", 1, 5, opts) |> unwrap == (12, 5) - @test tryparsenext(Field(f), " 12 \t 3", 1, 6, opts) |> unwrap == (12, 6) - opts = LocalOpts('\t', true, 'x','x',true,false) - @test tryparsenext(Field(f), " 12 3", 1, 5, opts) |> unwrap == (12, 5) - @test tryparsenext(Field(f, ignore_end_whitespace=false), " 12 \t 3", 1,6, opts) |> unwrap == (12, 5) - opts = LocalOpts(' ', false, 'x','x',false, false) - @test tryparsenext(Field(f,ignore_end_whitespace=false), "12 3", 1,4,opts) |> unwrap == (12, 4) -# @test tryparsenext(Field(f,ignore_end_whitespace=false), "12 \t3", 1,5,opts) |> failedat == 3 - opts = LocalOpts('\t', false, 'x','x',false, false) - @test tryparsenext(Field(f,ignore_end_whitespace=false), " 12\t 3", 1, 6, opts) |> unwrap == (12,5) - @test tryparsenext(Field(f,eoldelim=true), " 12\n", 1, 4, opts) |> unwrap == (12,5) - @test tryparsenext(Field(f,eoldelim=true), " 12\n\r\n", 1, 5, opts) |> unwrap == (12,6) - @test tryparsenext(Field(f,eoldelim=true), " 12") |> unwrap == (12,4) - - # Also test AbstractString variant - @test tryparsenext(Field(f), SubString("12,3",1)) |> unwrap == (12, 4) - @test tryparsenext(Field(f), SubString("12 ,3",1)) |> unwrap == (12, 5) - @test tryparsenext(Field(f), SubString(" 12 ,3",1)) |> unwrap == (12, 6) - opts = LocalOpts('\t', false, 'x','x',true,false) - @test tryparsenext(Field(f), SubString("12\t3",1), 1, 4, opts) |> unwrap == (12, 4) - @test tryparsenext(Field(f), SubString("12 \t3",1), 1, 5, opts) |> unwrap == (12, 5) - @test tryparsenext(Field(f), SubString(" 12 \t 3",1), 1, 6, opts) |> unwrap == (12, 6) - opts = LocalOpts('\t', true, 'x','x',true,false) - @test tryparsenext(Field(f), SubString(" 12 3",1), 1, 5, opts) |> unwrap == (12, 5) - @test tryparsenext(Field(f, ignore_end_whitespace=false), SubString(" 12 \t 3",1), 1,6, opts) |> unwrap == (12, 5) - opts = LocalOpts(' ', false, 'x','x',false, false) - @test tryparsenext(Field(f,ignore_end_whitespace=false), SubString("12 3",1), 1,4,opts) |> unwrap == (12, 4) -# @test tryparsenext(Field(f,ignore_end_whitespace=false), "12 \t3", 1,5,opts) |> failedat == 3 - opts = LocalOpts('\t', false, 'x','x',false, false) - @test tryparsenext(Field(f,ignore_end_whitespace=false), SubString(" 12\t 3",1), 1, 6, opts) |> unwrap == (12,5) - @test tryparsenext(Field(f,eoldelim=true), SubString(" 12\n",1), 1, 4, opts) |> unwrap == (12,5) - @test tryparsenext(Field(f,eoldelim=true), SubString(" 12\n\r\n",1), 1, 5, opts) |> unwrap == (12,6) - @test tryparsenext(Field(f,eoldelim=true), SubString(" 12",1)) |> unwrap == (12,4) -end + import TextParse: Field + @testset "Field parsing" begin + f = fromtype(Int) + @test tryparsenext(Field(f), "12,3") |> unwrap == (12, 4) + @test tryparsenext(Field(f), "12 ,3") |> unwrap == (12, 5) + @test tryparsenext(Field(f), " 12 ,3") |> unwrap == (12, 6) + opts = LocalOpts('\t', false, 'x', 'x', true, false) + @test tryparsenext(Field(f), "12\t3", 1, 4, opts) |> unwrap == (12, 4) + @test tryparsenext(Field(f), "12 \t3", 1, 5, opts) |> unwrap == (12, 5) + @test tryparsenext(Field(f), " 12 \t 3", 1, 6, opts) |> unwrap == (12, 6) + opts = LocalOpts('\t', true, 'x', 'x', true, false) + @test tryparsenext(Field(f), " 12 3", 1, 5, opts) |> unwrap == (12, 5) + @test tryparsenext(Field(f, ignore_end_whitespace=false), " 12 \t 3", 1, 6, opts) |> unwrap == (12, 5) + opts = LocalOpts(' ', false, 'x', 'x', false, false) + @test tryparsenext(Field(f, ignore_end_whitespace=false), "12 3", 1, 4, opts) |> unwrap == (12, 4) + # @test tryparsenext(Field(f,ignore_end_whitespace=false), "12 \t3", 1,5,opts) |> failedat == 3 + opts = LocalOpts('\t', false, 'x', 'x', false, false) + @test tryparsenext(Field(f, ignore_end_whitespace=false), " 12\t 3", 1, 6, opts) |> unwrap == (12, 5) + @test tryparsenext(Field(f, eoldelim=true), " 12\n", 1, 4, opts) |> unwrap == (12, 5) + @test tryparsenext(Field(f, eoldelim=true), " 12\n\r\n", 1, 5, opts) |> unwrap == (12, 6) + @test tryparsenext(Field(f, eoldelim=true), " 12") |> unwrap == (12, 4) + + # Also test AbstractString variant + @test tryparsenext(Field(f), SubString("12,3", 1)) |> unwrap == (12, 4) + @test tryparsenext(Field(f), SubString("12 ,3", 1)) |> unwrap == (12, 5) + @test tryparsenext(Field(f), SubString(" 12 ,3", 1)) |> unwrap == (12, 6) + opts = LocalOpts('\t', false, 'x', 'x', true, false) + @test tryparsenext(Field(f), SubString("12\t3", 1), 1, 4, opts) |> unwrap == (12, 4) + @test tryparsenext(Field(f), SubString("12 \t3", 1), 1, 5, opts) |> unwrap == (12, 5) + @test tryparsenext(Field(f), SubString(" 12 \t 3", 1), 1, 6, opts) |> unwrap == (12, 6) + opts = LocalOpts('\t', true, 'x', 'x', true, false) + @test tryparsenext(Field(f), SubString(" 12 3", 1), 1, 5, opts) |> unwrap == (12, 5) + @test tryparsenext(Field(f, ignore_end_whitespace=false), SubString(" 12 \t 3", 1), 1, 6, opts) |> unwrap == (12, 5) + opts = LocalOpts(' ', false, 'x', 'x', false, false) + @test tryparsenext(Field(f, ignore_end_whitespace=false), SubString("12 3", 1), 1, 4, opts) |> unwrap == (12, 4) + # @test tryparsenext(Field(f,ignore_end_whitespace=false), "12 \t3", 1,5,opts) |> failedat == 3 + opts = LocalOpts('\t', false, 'x', 'x', false, false) + @test tryparsenext(Field(f, ignore_end_whitespace=false), SubString(" 12\t 3", 1), 1, 6, opts) |> unwrap == (12, 5) + @test tryparsenext(Field(f, eoldelim=true), SubString(" 12\n", 1), 1, 4, opts) |> unwrap == (12, 5) + @test tryparsenext(Field(f, eoldelim=true), SubString(" 12\n\r\n", 1), 1, 5, opts) |> unwrap == (12, 6) + @test tryparsenext(Field(f, eoldelim=true), SubString(" 12", 1)) |> unwrap == (12, 4) + end -import TextParse: Record -@testset "Record parsing" begin - r=Record((Field(fromtype(Int)), Field(fromtype(UInt)), Field(fromtype(Float64)))) - @test tryparsenext(r, "12,21,21,", 1, 9) |> unwrap == ((12, UInt(21), 21.0), 10) - @test tryparsenext(r, "12,21.0,21,", 1, 9) |> failedat == 6 - s = "12 , 21, 21.23," - @test tryparsenext(r, s, 1, length(s)) |> unwrap == ((12, UInt(21), 21.23), length(s)+1) -end + import TextParse: Record + @testset "Record parsing" begin + r = Record((Field(fromtype(Int)), Field(fromtype(UInt)), Field(fromtype(Float64)))) + @test tryparsenext(r, "12,21,21,", 1, 9) |> unwrap == ((12, UInt(21), 21.0), 10) + @test tryparsenext(r, "12,21.0,21,", 1, 9) |> failedat == 6 + s = "12 , 21, 21.23," + @test tryparsenext(r, s, 1, length(s)) |> unwrap == ((12, UInt(21), 21.23), length(s) + 1) + end -import TextParse: UseOne -@testset "UseOne" begin - f = UseOne((Field(fromtype(Int)), Field(fromtype(Float64)), Field(fromtype(Int), eoldelim=true)), 3) - @test tryparsenext(f, "1, 33.21, 45", 1, 12) |> unwrap == (45, 13) -end + import TextParse: UseOne + @testset "UseOne" begin + f = UseOne((Field(fromtype(Int)), Field(fromtype(Float64)), Field(fromtype(Int), eoldelim=true)), 3) + @test tryparsenext(f, "1, 33.21, 45", 1, 12) |> unwrap == (45, 13) + end -import TextParse: Repeated -@testset "Repeated" begin - f = Repeated(Field(fromtype(Int)), 3) - @test tryparsenext(f, "1, 33, 45,", 1, 12) |> unwrap == ((1,33,45), 11) + import TextParse: Repeated + @testset "Repeated" begin + f = Repeated(Field(fromtype(Int)), 3) + @test tryparsenext(f, "1, 33, 45,", 1, 12) |> unwrap == ((1, 33, 45), 11) - inp = join(map(string, [1:45;]), ", ") * ", " - out = ntuple(identity, 45) - f2 = Repeated(Field(fromtype(Int)), 45) - @test tryparsenext(f2, inp, 1, length(inp)) |> unwrap == (out, length(inp)) - #@benchmark tryparsenext($f2, $inp, 1, length($inp)) -end + inp = join(map(string, [1:45;]), ", ") * ", " + out = ntuple(identity, 45) + f2 = Repeated(Field(fromtype(Int)), 45) + @test tryparsenext(f2, inp, 1, length(inp)) |> unwrap == (out, length(inp)) + #@benchmark tryparsenext($f2, $inp, 1, length($inp)) + end -import TextParse: quotedsplit -@testset "quotedsplit" begin - opts = LocalOpts(',', false, '"', '\\', false, false) - @test quotedsplit("x", opts, false, 1, 1) == ["x"] - @test quotedsplit("x, y", opts, false, 1, 4) == ["x", "y"] - @test quotedsplit("\"x\", \"y\"", opts,false, 1, 8) == ["x", "y"] - @test quotedsplit("\"x\", \"y\"", opts,true, 1, 8) == ["\"x\"", "\"y\""] - str = """x\nx,"s,", "\\",x" """ - @test quotedsplit(str, opts, false, 3, length(str)) == ["x", "s,", "\",x"] - @test quotedsplit(",", opts, true, 1, 1) == ["", ""] - @test quotedsplit(", ", opts, false, 1, 2) == ["", ""] - str = "1, \"x \"\"y\"\" z\", 1" - qopts = LocalOpts(',', false,'"', '"', false, false) - @test quotedsplit(str, qopts,true, 1, lastindex(str)) == ["1", "\"x \"y\" z\"", "1"] -end + import TextParse: quotedsplit + @testset "quotedsplit" begin + opts = LocalOpts(',', false, '"', '\\', false, false) + @test quotedsplit("x", opts, false, 1, 1) == ["x"] + @test quotedsplit("x, y", opts, false, 1, 4) == ["x", "y"] + @test quotedsplit("\"x\", \"y\"", opts, false, 1, 8) == ["x", "y"] + @test quotedsplit("\"x\", \"y\"", opts, true, 1, 8) == ["\"x\"", "\"y\""] + str = """x\nx,"s,", "\\",x" """ + @test quotedsplit(str, opts, false, 3, length(str)) == ["x", "s,", "\",x"] + @test quotedsplit(",", opts, true, 1, 1) == ["", ""] + @test quotedsplit(", ", opts, false, 1, 2) == ["", ""] + str = "1, \"x \"\"y\"\" z\", 1" + qopts = LocalOpts(',', false, '"', '"', false, false) + @test quotedsplit(str, qopts, true, 1, lastindex(str)) == ["1", "\"x \"y\" z\"", "1"] + end -import TextParse: LocalOpts, readcolnames -@testset "CSV column names" begin - str1 = """ - a, b,c d, e - x,1,1,1 - ,1,1,1 - x,1,1.,1 - x y,1.0,1, - x,1.0,,1 - """ - - str2 = """ - a, " b", "c", "d\\" e " - """ - opts = LocalOpts(',', false, '"', '\\', false, false) - @test readcolnames(str1, opts, 1, String[]) == (["a", "b", "c d", "e"], 13) - @test readcolnames("\n\r$str1", opts, 3, Dict(3=>"x")) == (["a", "b", "x", "e"], 15) - #@test readcolnames("$str2", opts, 3, Dict(3=>"x")) == (["a", "b", "x", "d\" e"], 24) -end + import TextParse: LocalOpts, readcolnames + @testset "CSV column names" begin + str1 = """ + a, b,c d, e + x,1,1,1 + ,1,1,1 + x,1,1.,1 + x y,1.0,1, + x,1.0,,1 + """ -import TextParse: guesstoken, Unknown, Numeric, DateTimeToken, StrRange -@testset "guesstoken" begin - opts = LocalOpts(UInt8(','), false, UInt8('"'), UInt8('"'), false, false) - # Test null values - @test guesstoken("", opts, false, Unknown()) == NAToken(Unknown()) - @test guesstoken("null", opts, false, Unknown()) == NAToken(Unknown()) - @test guesstoken("", opts, false, NAToken(Unknown())) == NAToken(Unknown()) - @test guesstoken("null", opts, false, NAToken(Unknown())) == NAToken(Unknown()) - - # Test NA - @test guesstoken("1", opts, false, NAToken(Unknown())) == NAToken(Numeric(Int)) - @test guesstoken("1", opts, false, NAToken(Numeric(Int))) == NAToken(Numeric(Int)) - @test guesstoken("", opts, false, NAToken(Numeric(Int))) == NAToken(Numeric(Int)) - @test guesstoken("1%", opts, false, NAToken(Unknown())) == NAToken(Percentage()) - - # Test non-null numeric - @test guesstoken("1", opts, false, Unknown()) == Numeric(Int) - @test guesstoken("1", opts, false, Numeric(Int)) == Numeric(Int) - @test guesstoken("", opts, false, Numeric(Int)) == NAToken(Numeric(Int)) - @test guesstoken("1.0", opts, false, Numeric(Int)) == Numeric(Float64) - - # Test strings - @test guesstoken("x", opts, false, Unknown()) == Quoted(StringToken(StrRange), opts.quotechar, opts.escapechar) - - # Test nullable to string - @test guesstoken("x", opts, false, NAToken(Unknown())) == Quoted(StringToken(StrRange), opts.quotechar, opts.escapechar) - - # Test string to non-null (short circuit) - @test guesstoken("1", opts, false, StringToken(StrRange)) == StringToken(StrRange) - - # Test quoting - @test guesstoken("\"1\"", opts, false, Unknown()) == Quoted(Numeric(Int), opts.quotechar, opts.escapechar) - @test guesstoken("\"1\"", opts, false, Quoted(Numeric(Int), opts.quotechar, opts.escapechar)) == Quoted(Numeric(Int), opts.quotechar, opts.escapechar) - - # Test quoting with Nullable tokens - @test guesstoken("\"\"", opts, false, Quoted(Unknown(), opts.quotechar, opts.escapechar)) == Quoted(StringToken(StrRange), opts.quotechar, opts.escapechar) - @test guesstoken("\"\"", opts, false, Quoted(NAToken(Unknown()), opts.quotechar, opts.escapechar)) == Quoted(StringToken(StrRange), opts.quotechar, opts.escapechar) - @test guesstoken("\"\"", opts, false, Quoted(Numeric(Int), opts.quotechar, opts.escapechar)) == Quoted(StringToken(StrRange), opts.quotechar, opts.escapechar) - @test guesstoken("\"\"", opts, false, Unknown()) == Quoted(StringToken(StrRange), opts.quotechar, opts.escapechar) - @test guesstoken("\"\"", opts, false, Numeric(Int)) == Quoted(StringToken(StrRange), opts.quotechar, opts.escapechar) - @test guesstoken("", opts, false, Quoted(Numeric(Int), opts.quotechar, opts.escapechar)) == Quoted(NAToken(Numeric(Int)), opts.quotechar, opts.escapechar) - @test guesstoken("", opts, false, Quoted(NAToken(Numeric(Int)), opts.quotechar, opts.escapechar)) == Quoted(NAToken(Numeric(Int)), opts.quotechar, opts.escapechar) - @test guesstoken("1", opts, false, Quoted(NAToken(Numeric(Int)), opts.quotechar, opts.escapechar)) == Quoted(NAToken(Numeric(Int)), opts.quotechar, opts.escapechar) - @test guesstoken("\"1\"", opts, false, Quoted(NAToken(Numeric(Int)), opts.quotechar, opts.escapechar)) == Quoted(NAToken(Numeric(Int)), opts.quotechar, opts.escapechar) - - # Test DateTime detection: - tok = guesstoken("2016-01-01 10:10:10.10", opts, false, Unknown()) - @test tok == DateTimeToken(DateTime, dateformat"yyyy-mm-dd HH:MM:SS.s") - @test guesstoken("2016-01-01 10:10:10.10", opts, false, tok) == tok - @test guesstoken("2016-01-01 10:10:10.10", opts, false, Quoted(NAToken(Unknown()), opts.quotechar, opts.escapechar)) == Quoted(NAToken(tok), opts.quotechar, opts.escapechar) -end + str2 = """ + a, " b", "c", "d\\" e " + """ + opts = LocalOpts(',', false, '"', '\\', false, false) + @test readcolnames(str1, opts, 1, String[]) == (["a", "b", "c d", "e"], 13) + @test readcolnames("\n\r$str1", opts, 3, Dict(3 => "x")) == (["a", "b", "x", "e"], 15) + #@test readcolnames("$str2", opts, 3, Dict(3=>"x")) == (["a", "b", "x", "d\" e"], 24) + end -import TextParse: guesscolparsers -@testset "CSV type detect" begin - str1 = """ - a, b,c d, e - x,1,1,1 - x,1,1,1 - x,1,1.,1 - x y,1.0,1, - ,1.0,,1 - """ - opts = LocalOpts(',', false, '"', '\\', false, false) - _, pos = readcolnames(str1, opts, 1, String[]) - testtill(i, colparsers=[]) = guesscolparsers(str1, lastindex(str1), String[], opts, pos, i, colparsers, StringArray) - @test testtill(0) |> first == Any[] - @test testtill(1) |> first == Any[Quoted(StringToken(StrRange), '"', '"'), fromtype(Int), fromtype(Int), fromtype(Int)] - @test testtill(2) |> first == Any[Quoted(StringToken(StrRange), '"', '"'), fromtype(Int), fromtype(Int), fromtype(Int)] - @test testtill(3) |> first == Any[Quoted(StringToken(StrRange), '"', '"'), fromtype(Int), fromtype(Float64), fromtype(Int)] - @test testtill(4) |> first == Any[Quoted(StringToken(StrRange), '"', '"'), fromtype(Float64), fromtype(Float64), NAToken(fromtype(Int))] - @test testtill(5) |> first == Any[Quoted(StringToken(StrRange), '"', '"'), fromtype(Float64), NAToken(fromtype(Float64)), NAToken(fromtype(Int))] -end + import TextParse: guesstoken, Unknown, Numeric, DateTimeToken, StrRange + @testset "guesstoken" begin + opts = LocalOpts(UInt8(','), false, UInt8('"'), UInt8('"'), false, false) + # Test null values + @test guesstoken("", opts, false, Unknown()) == NAToken(Unknown()) + @test guesstoken("null", opts, false, Unknown()) == NAToken(Unknown()) + @test guesstoken("", opts, false, NAToken(Unknown())) == NAToken(Unknown()) + @test guesstoken("null", opts, false, NAToken(Unknown())) == NAToken(Unknown()) + + # Test NA + @test guesstoken("1", opts, false, NAToken(Unknown())) == NAToken(Numeric(Int)) + @test guesstoken("1", opts, false, NAToken(Numeric(Int))) == NAToken(Numeric(Int)) + @test guesstoken("", opts, false, NAToken(Numeric(Int))) == NAToken(Numeric(Int)) + @test guesstoken("1%", opts, false, NAToken(Unknown())) == NAToken(Percentage()) + + # Test non-null numeric + @test guesstoken("1", opts, false, Unknown()) == Numeric(Int) + @test guesstoken("1", opts, false, Numeric(Int)) == Numeric(Int) + @test guesstoken("", opts, false, Numeric(Int)) == NAToken(Numeric(Int)) + @test guesstoken("1.0", opts, false, Numeric(Int)) == Numeric(Float64) + + # Test strings + @test guesstoken("x", opts, false, Unknown()) == Quoted(StringToken(StrRange), opts.quotechar, opts.escapechar) + + # Test nullable to string + @test guesstoken("x", opts, false, NAToken(Unknown())) == Quoted(StringToken(StrRange), opts.quotechar, opts.escapechar) + + # Test string to non-null (short circuit) + @test guesstoken("1", opts, false, StringToken(StrRange)) == StringToken(StrRange) + + # Test quoting + @test guesstoken("\"1\"", opts, false, Unknown()) == Quoted(Numeric(Int), opts.quotechar, opts.escapechar) + @test guesstoken("\"1\"", opts, false, Quoted(Numeric(Int), opts.quotechar, opts.escapechar)) == Quoted(Numeric(Int), opts.quotechar, opts.escapechar) + + # Test quoting with Nullable tokens + @test guesstoken("\"\"", opts, false, Quoted(Unknown(), opts.quotechar, opts.escapechar)) == Quoted(StringToken(StrRange), opts.quotechar, opts.escapechar) + @test guesstoken("\"\"", opts, false, Quoted(NAToken(Unknown()), opts.quotechar, opts.escapechar)) == Quoted(StringToken(StrRange), opts.quotechar, opts.escapechar) + @test guesstoken("\"\"", opts, false, Quoted(Numeric(Int), opts.quotechar, opts.escapechar)) == Quoted(StringToken(StrRange), opts.quotechar, opts.escapechar) + @test guesstoken("\"\"", opts, false, Unknown()) == Quoted(StringToken(StrRange), opts.quotechar, opts.escapechar) + @test guesstoken("\"\"", opts, false, Numeric(Int)) == Quoted(StringToken(StrRange), opts.quotechar, opts.escapechar) + @test guesstoken("", opts, false, Quoted(Numeric(Int), opts.quotechar, opts.escapechar)) == Quoted(NAToken(Numeric(Int)), opts.quotechar, opts.escapechar) + @test guesstoken("", opts, false, Quoted(NAToken(Numeric(Int)), opts.quotechar, opts.escapechar)) == Quoted(NAToken(Numeric(Int)), opts.quotechar, opts.escapechar) + @test guesstoken("1", opts, false, Quoted(NAToken(Numeric(Int)), opts.quotechar, opts.escapechar)) == Quoted(NAToken(Numeric(Int)), opts.quotechar, opts.escapechar) + @test guesstoken("\"1\"", opts, false, Quoted(NAToken(Numeric(Int)), opts.quotechar, opts.escapechar)) == Quoted(NAToken(Numeric(Int)), opts.quotechar, opts.escapechar) + + # Test DateTime detection: + tok = guesstoken("2016-01-01 10:10:10.10", opts, false, Unknown()) + @test tok == DateTimeToken(DateTime, dateformat"yyyy-mm-dd HH:MM:SS.s") + @test guesstoken("2016-01-01 10:10:10.10", opts, false, tok) == tok + @test guesstoken("2016-01-01 10:10:10.10", opts, false, Quoted(NAToken(Unknown()), opts.quotechar, opts.escapechar)) == Quoted(NAToken(tok), opts.quotechar, opts.escapechar) + end + import TextParse: guesscolparsers + @testset "CSV type detect" begin + str1 = """ + a, b,c d, e + x,1,1,1 + x,1,1,1 + x,1,1.,1 + x y,1.0,1, + ,1.0,,1 + """ + opts = LocalOpts(',', false, '"', '\\', false, false) + _, pos = readcolnames(str1, opts, 1, String[]) + testtill(i, colparsers=[]) = guesscolparsers(str1, lastindex(str1), String[], opts, pos, i, colparsers, StringArray) + @test testtill(0) |> first == Any[] + @test testtill(1) |> first == Any[Quoted(StringToken(StrRange), '"', '"'), fromtype(Int), fromtype(Int), fromtype(Int)] + @test testtill(2) |> first == Any[Quoted(StringToken(StrRange), '"', '"'), fromtype(Int), fromtype(Int), fromtype(Int)] + @test testtill(3) |> first == Any[Quoted(StringToken(StrRange), '"', '"'), fromtype(Int), fromtype(Float64), fromtype(Int)] + @test testtill(4) |> first == Any[Quoted(StringToken(StrRange), '"', '"'), fromtype(Float64), fromtype(Float64), NAToken(fromtype(Int))] + @test testtill(5) |> first == Any[Quoted(StringToken(StrRange), '"', '"'), fromtype(Float64), NAToken(fromtype(Float64)), NAToken(fromtype(Int))] + end -import TextParse: getlineat -@testset "getlineat" begin - str = "abc\ndefg" - @test str[getlineat(str,1)] == "abc\n" - @test str[getlineat(str,4)] == "abc\n" - @test str[getlineat(str,5)] == "defg" - @test str[getlineat(str,lastindex(str))] == "defg" - @test getlineat("x", 5) == 1:1 -end + import TextParse: getlineat + @testset "getlineat" begin + str = "abc\ndefg" + @test str[getlineat(str, 1)] == "abc\n" + @test str[getlineat(str, 4)] == "abc\n" + @test str[getlineat(str, 5)] == "defg" + @test str[getlineat(str, lastindex(str))] == "defg" + @test getlineat("x", 5) == 1:1 + end -import TextParse: guessdateformat -@testset "date detection" begin - @test guessdateformat("2016") |> typeof == DateTimeToken(Date, dateformat"yyyy-mm-dd") |> typeof - @test guessdateformat("09/09/2016") |> typeof == DateTimeToken(Date, dateformat"mm/dd/yyyy") |> typeof - @test guessdateformat("24/09/2016") |> typeof == DateTimeToken(Date, dateformat"dd/mm/yyyy") |> typeof -end -@testset "date parsing" begin - tok = DateTimeToken(DateTime, dateformat"yyyy-mm-dd HH:MM:SS") - opts = LocalOpts('y', false, '"', '\\', false, false) - str = "1970-02-02 02:20:20" - @test tryparsenext(tok, str, 1, length(str), opts) |> unwrap == (DateTime("1970-02-02T02:20:20"), length(str)+1) - @test tryparsenext(tok, str*"x", 1, length(str)+1, opts) |> unwrap == (DateTime("1970-02-02T02:20:20"), length(str)+1) - @test tryparsenext(tok, str[1:end-3]*"x", 1, length(str)-2, opts) |> failedat == length(str)-2 - @test tryparsenext(tok, str[1:end-3]*"y", 1, length(str)-2, opts) |> unwrap == (DateTime("1970-02-02T02:20"), length(str)-2) -end + import TextParse: guessdateformat + @testset "date detection" begin + @test guessdateformat("2016") |> typeof == DateTimeToken(Date, dateformat"yyyy-mm-dd") |> typeof + @test guessdateformat("09/09/2016") |> typeof == DateTimeToken(Date, dateformat"mm/dd/yyyy") |> typeof + @test guessdateformat("24/09/2016") |> typeof == DateTimeToken(Date, dateformat"dd/mm/yyyy") |> typeof + end + + @testset "date parsing" begin + tok = DateTimeToken(DateTime, dateformat"yyyy-mm-dd HH:MM:SS") + opts = LocalOpts('y', false, '"', '\\', false, false) + str = "1970-02-02 02:20:20" + @test tryparsenext(tok, str, 1, length(str), opts) |> unwrap == (DateTime("1970-02-02T02:20:20"), length(str) + 1) + @test tryparsenext(tok, str * "x", 1, length(str) + 1, opts) |> unwrap == (DateTime("1970-02-02T02:20:20"), length(str) + 1) + @test tryparsenext(tok, str[1:end-3] * "x", 1, length(str) - 2, opts) |> failedat == length(str) - 2 + @test tryparsenext(tok, str[1:end-3] * "y", 1, length(str) - 2, opts) |> unwrap == (DateTime("1970-02-02T02:20"), length(str) - 2) + end + + import TextParse: _csvread + @testset "csvread" begin -import TextParse: _csvread -@testset "csvread" begin - - str1 = """ - a, b,c d, e - x,1,1,1 - ,1,,1 - x,1,1.,1 - x y,1.0,1, - x,1.0,,1 - """ - data = ((["x", "","x","x y","x"], - ones(5), - [1,missing,1,1,missing], - [1,1,1,missing,1]), - ["a", "b", "c d", "e"]) - @test isequal(_csvread(str1, ','), data) - coltype_test1 = _csvread(str1, - colparsers=Dict("b"=>Union{Missing, Float64}, - "e"=>Union{Missing,Float64})) - coltype_test2 = _csvread(str1, - colparsers=Dict(2=>Union{Missing, Float64}, - 4=>Union{Missing,Float64})) - - str2 = """ - x,1,1,1 - ,1,,1 - x,1,1.,1 - x y,1.0,1, - x,1.0,,1 - """ - coltype_test3 = _csvread(str2, header_exists=false, - colparsers=Dict(2=>Union{Missing,Float64}, - 4=>Union{Missing,Float64})) - @test eltype(coltype_test1[1][2]) == Union{Missing, Float64} - @test eltype(coltype_test1[1][4]) == Union{Missing, Float64} - @test eltype(coltype_test2[1][2]) == Union{Missing, Float64} - @test eltype(coltype_test2[1][4]) == Union{Missing, Float64} - @test eltype(coltype_test3[1][2]) == Union{Missing, Float64} - @test eltype(coltype_test3[1][4]) == Union{Missing, Float64} - - @test isequal(data, _csvread(str1, type_detect_rows=1)) - @test isequal(data, _csvread(str1, type_detect_rows=2)) - @test isequal(data, _csvread(str1, type_detect_rows=3)) - @test isequal(data, _csvread(str1, type_detect_rows=4)) - - # Test reparsing as a string column - str3 = """ - a, b,c d, e - 1,1,1,1 - x,1,1,1 - """ - coltype_test4 = _csvread(str3, type_detect_rows=1) - @test isequal(((["1","x"], [1,1], [1,1], [1,1]),["a","b","c d","e"]), coltype_test4) - - str4 = """ - a, b,c d, e - 1,1,01,1 - 2,1,x,1 - y,2,3,8 - """ - coltype_test4 = _csvread(str4, type_detect_rows=1) - @test isequal(((["1","2", "y"], [1,1,2], ["01","x", "3"], [1,1,8]),["a","b","c d","e"]), coltype_test4) - - str5 = """ - a, b,c d, e - 1,1,4,01.1 - 02,1,3,x - y,2,3,8 - """ - coltype_test4 = _csvread(str5, type_detect_rows=1) - @test isequal(((["1","02", "y"], [1,1,2], [4,3,3], ["01.1","x","8"]),["a","b","c d","e"]), coltype_test4) - - # test growing of columns if prediction is too low - @test _csvread("x,y\nabcd, defg\n,\n,\n", type_detect_rows=1) == - ((String["abcd", "", ""], String["defg", "", ""]), String["x", "y"]) - - # #19 - s=""" - x,y,z - 1,1,x - "2",2,x - 1,2,"x \"\"y\"\"" - """ - - res = (([1, 2, 1], [1, 2, 2], String["x", "x", "x \"y\""]), String["x", "y", "z"]) - @test_broken _csvread(s, type_detect_rows=1, escapechar='"') == res - @test_broken _csvread(s, type_detect_rows=2, escapechar='"') == res - @test _csvread(s, type_detect_rows=1, escapechar='"', stringarraytype=Array) == res - @test _csvread(s, type_detect_rows=2, escapechar='"', stringarraytype=Array) == res - - @test csvread(IOBuffer("x\n1")) == (([1],),["x"]) - - @test _csvread("x\n1\n") == (([1],),["x"]) - - # test detection of newlines in fields - s = """x, y - abc, def - g - hi,jkl - mno,pqr + str1 = """ + a, b,c d, e + x,1,1,1 + ,1,,1 + x,1,1.,1 + x y,1.0,1, + x,1.0,,1 """ + data = ((["x", "", "x", "x y", "x"], + ones(5), + [1, missing, 1, 1, missing], + [1, 1, 1, missing, 1]), + ["a", "b", "c d", "e"]) + @test isequal(_csvread(str1, ','), data) + coltype_test1 = _csvread(str1, + colparsers=Dict("b" => Union{Missing,Float64}, + "e" => Union{Missing,Float64})) + coltype_test2 = _csvread(str1, + colparsers=Dict(2 => Union{Missing,Float64}, + 4 => Union{Missing,Float64})) + + str2 = """ + x,1,1,1 + ,1,,1 + x,1,1.,1 + x y,1.0,1, + x,1.0,,1 + """ + coltype_test3 = _csvread(str2, header_exists=false, + colparsers=Dict(2 => Union{Missing,Float64}, + 4 => Union{Missing,Float64})) + @test eltype(coltype_test1[1][2]) == Union{Missing,Float64} + @test eltype(coltype_test1[1][4]) == Union{Missing,Float64} + @test eltype(coltype_test2[1][2]) == Union{Missing,Float64} + @test eltype(coltype_test2[1][4]) == Union{Missing,Float64} + @test eltype(coltype_test3[1][2]) == Union{Missing,Float64} + @test eltype(coltype_test3[1][4]) == Union{Missing,Float64} + + @test isequal(data, _csvread(str1, type_detect_rows=1)) + @test isequal(data, _csvread(str1, type_detect_rows=2)) + @test isequal(data, _csvread(str1, type_detect_rows=3)) + @test isequal(data, _csvread(str1, type_detect_rows=4)) + + # Test reparsing as a string column + str3 = """ + a, b,c d, e + 1,1,1,1 + x,1,1,1 + """ + coltype_test4 = _csvread(str3, type_detect_rows=1) + @test isequal(((["1", "x"], [1, 1], [1, 1], [1, 1]), ["a", "b", "c d", "e"]), coltype_test4) + + str4 = """ + a, b,c d, e + 1,1,01,1 + 2,1,x,1 + y,2,3,8 + """ + coltype_test4 = _csvread(str4, type_detect_rows=1) + @test isequal(((["1", "2", "y"], [1, 1, 2], ["01", "x", "3"], [1, 1, 8]), ["a", "b", "c d", "e"]), coltype_test4) + + str5 = """ + a, b,c d, e + 1,1,4,01.1 + 02,1,3,x + y,2,3,8 + """ + coltype_test4 = _csvread(str5, type_detect_rows=1) + @test isequal(((["1", "02", "y"], [1, 1, 2], [4, 3, 3], ["01.1", "x", "8"]), ["a", "b", "c d", "e"]), coltype_test4) + + # test growing of columns if prediction is too low + @test _csvread("x,y\nabcd, defg\n,\n,\n", type_detect_rows=1) == + ((String["abcd", "", ""], String["defg", "", ""]), String["x", "y"]) + + # #19 + s = """ + x,y,z + 1,1,x + "2",2,x + 1,2,"x \"\"y\"\"" + """ + + res = (([1, 2, 1], [1, 2, 2], String["x", "x", "x \"y\""]), String["x", "y", "z"]) + @test_broken _csvread(s, type_detect_rows=1, escapechar='"') == res + @test_broken _csvread(s, type_detect_rows=2, escapechar='"') == res + @test _csvread(s, type_detect_rows=1, escapechar='"', stringarraytype=Array) == res + @test _csvread(s, type_detect_rows=2, escapechar='"', stringarraytype=Array) == res + + @test csvread(IOBuffer("x\n1")) == (([1],), ["x"]) + + @test _csvread("x\n1\n") == (([1],), ["x"]) + + # test detection of newlines in fields + s = """x, y + abc, def + g + hi,jkl + mno,pqr + """ + + @test _csvread(s, type_detect_rows=1) == ((["abc", "g\nhi", "mno"], ["def", "jkl", "pqr"]), ["x", "y"]) + # test custom na strings + s = """ + x,y + 1,2 + ?,3 + 4,* + """ + nullness = ([false, true, false], [false, false, true]) + @test map(x -> map(ismissing, x), first(_csvread(s, nastrings=["?", "*"]))) == nullness + @test map(x -> map(ismissing, x), first(_csvread(s, nastrings=["?", "*"], type_detect_rows=1))) == nullness - @test _csvread(s, type_detect_rows=1) == ((["abc", "g\nhi", "mno"], ["def", "jkl", "pqr"]), ["x", "y"]) - # test custom na strings - s = """ - x,y - 1,2 - ?,3 - 4,* - """ - nullness = ([false, true, false], [false, false, true]) - @test map(x->map(ismissing, x), first(_csvread(s, nastrings=["?","*"]))) == nullness - @test map(x->map(ismissing, x), first(_csvread(s, nastrings=["?","*"], type_detect_rows=1))) == nullness - - @test isequal(csvread(["data/a.csv", "data/b.csv"]), - (([1.0, 2.0, 1.0, 2.0, 3.0], [2, 2, missing, missing, missing], + @test isequal(csvread(["data/a.csv", "data/b.csv"]), + (([1.0, 2.0, 1.0, 2.0, 3.0], [2, 2, missing, missing, missing], [missing, missing, missing, 2, 1]), String["x", "y", "z"], [2, 3])) - @test isequal(csvread(["data/a.csv", "data/b.csv"], samecols=[("y","z")]), - (([1.0, 2.0, 1.0, 2.0, 3.0], [2, 2, missing, 2, 1]), String["x", "y"], [2,3])) + @test isequal(csvread(["data/a.csv", "data/b.csv"], samecols=[("y", "z")]), + (([1.0, 2.0, 1.0, 2.0, 3.0], [2, 2, missing, 2, 1]), String["x", "y"], [2, 3])) - # shouldn't fail because y doesn't exist - @test _csvread("x\n1", colparsers=Dict("y"=>String)) == (([1],), ["x"]) + # shouldn't fail because y doesn't exist + @test _csvread("x\n1", colparsers=Dict("y" => String)) == (([1],), ["x"]) - # Don't try to guess type if it's provided by user. Issue JuliaDB.jl#109 - s=""" - time,value - "2017-11-09T07:00:07.391101180",0 - """ - @test _csvread(s) == ((String["2017-11-09T07:00:07.391101180"], [0]), String["time", "value"]) - @test _csvread(s, colparsers=Dict(:time=>String)) == ((String["2017-11-09T07:00:07.391101180"], [0]), String["time", "value"]) + # Don't try to guess type if it's provided by user. Issue JuliaDB.jl#109 + s = """ + time,value + "2017-11-09T07:00:07.391101180",0 + """ + @test _csvread(s) == ((String["2017-11-09T07:00:07.391101180"], [0]), String["time", "value"]) + @test _csvread(s, colparsers=Dict(:time => String)) == ((String["2017-11-09T07:00:07.391101180"], [0]), String["time", "value"]) - @test _csvread("") == ((), String[]) + @test _csvread("") == ((), String[]) - @test _csvread("""x""y"", z - a""b"", 1""", stringarraytype=Array) == ((["a\"\"b\"\""], [1]), ["x\"\"y\"\"", "z"]) -end + @test _csvread("""x""y"", z + a""b"", 1""", stringarraytype=Array) == ((["a\"\"b\"\""], [1]), ["x\"\"y\"\"", "z"]) + end -import TextParse: _csvread -@testset "commentchar" begin - - # First line a comment. - str1 = """ - x,y,z - #1,1,1 - 2,2,2 - """ - - @test _csvread(str1, commentchar='#') == (([2], [2], [2]), String["x", "y","z"]) - - # Last line a comment. - str2 = """ - x,y,z - 1,1,1 - #2,2,2 - """ - - @test _csvread(str2, commentchar='#') == (([1], [1], [1]), String["x", "y","z"]) - - # Multiple comments. - str3 = """ - x,y,z - 1,1,1 - #2,2,2 - #3,3,3 - #4,4,4 - 5,5,5 - #6,6,6 - """ - - @test _csvread(str3, commentchar='#') == (([1, 5], [1, 5], [1, 5]), String["x", "y","z"]) - - # Comments before headers. - str4 = """ - #foo - #bar - x,y,z - 1,1,1 - #2,2,2 - """ - - @test _csvread(str4, commentchar='#') == (([1], [1], [1]), String["x", "y","z"]) - - # No comments. - str5 = """ - x,y,z - 1,1,1 - 2,2,2 - """ - - @test _csvread(str5, commentchar='#') == (([1, 2], [1, 2], [1, 2]), String["x", "y","z"]) - - # Non-default comment. - str6 = """ - %test - x,y,z - 1,1,1 - %2,2,2 - 2,2,2 - """ - - @test _csvread(str6, commentchar='%') == (([1, 2], [1, 2], [1, 2]), String["x", "y","z"]) - - # Do not skip commented lines (commentchar=nothing). - str7 = """ - x,y,z - 1,1,1 - #2,2,2 - """ - - # Since we are not skipping commented lines the '#' character is considered - # data. This will force parsing to treat columns with '#'s as String columns. - # Here, we verify this behavior. - result = _csvread(str7) - @test eltype(result[1][1]) == String - @test result == ((["1", "#2"], [1, 2], [1, 2]), String["x", "y","z"]) -end + import TextParse: _csvread + @testset "commentchar" begin -@testset "skiplines_begin" begin - str1 = """ - hello - - world - x,y,z - 1,1,1 - """ - @test _csvread(str1, skiplines_begin=3) == (([1], [1], [1]), String["x", "y","z"]) - - s = """ - x,y z - a,b 1 - e 3 - """ - @test _csvread(s, spacedelim=true) == ((["a,b", "e"],[1,3]), ["x,y","z"]) -end + # First line a comment. + str1 = """ + x,y,z + #1,1,1 + 2,2,2 + """ -@testset "skipfield" begin - str1 = """ - x,y,z - 1,2.1,"John" - 4,5.2,"Sally" - """ + @test _csvread(str1, commentchar='#') == (([2], [2], [2]), String["x", "y", "z"]) - @test _csvread(str1, colparsers=Dict(1=>nothing)) == (([2.1,5.2], ["John", "Sally"]), String["y","z"]) - @test _csvread(str1, colparsers=Dict(2=>nothing)) == (([1,4], ["John", "Sally"]), String["x","z"]) - @test _csvread(str1, colparsers=Dict(3=>nothing)) == (([1,4], [2.1,5.2]), String["x","y"]) + # Last line a comment. + str2 = """ + x,y,z + 1,1,1 + #2,2,2 + """ - @test _csvread(str1, colparsers=Dict(1=>nothing,2=>nothing)) == ((["John", "Sally"],), String["z"]) - @test _csvread(str1, colparsers=Dict(1=>nothing,3=>nothing)) == (([2.1,5.2],), String["y"]) - @test _csvread(str1, colparsers=Dict(2=>nothing,3=>nothing)) == (([1,4],), String["x"]) + @test _csvread(str2, commentchar='#') == (([1], [1], [1]), String["x", "y", "z"]) + + # Multiple comments. + str3 = """ + x,y,z + 1,1,1 + #2,2,2 + #3,3,3 + #4,4,4 + 5,5,5 + #6,6,6 + """ - @test _csvread(str1, colparsers=Dict(1=>nothing,2=>nothing,3=>nothing)) == ((), String[]) -end + @test _csvread(str3, commentchar='#') == (([1, 5], [1, 5], [1, 5]), String["x", "y", "z"]) + + # Comments before headers. + str4 = """ + #foo + #bar + x,y,z + 1,1,1 + #2,2,2 + """ + + @test _csvread(str4, commentchar='#') == (([1], [1], [1]), String["x", "y", "z"]) + + # No comments. + str5 = """ + x,y,z + 1,1,1 + 2,2,2 + """ -import TextParse: eatwhitespaces -@testset "custom parser" begin - floatparser = Numeric(Float64) - percentparser = CustomParser(Float64) do str, i, len, opts - num, ii = tryparsenext(floatparser, str, i, len, opts) - if num === nothing - return num, ii - else - # parse away the % char - ii = eatwhitespaces(str, ii, len) - c, k = iterate(str, ii) - if c != '%' - return Nullable{Float64}(), ii # failed to parse % + @test _csvread(str5, commentchar='#') == (([1, 2], [1, 2], [1, 2]), String["x", "y", "z"]) + + # Non-default comment. + str6 = """ + %test + x,y,z + 1,1,1 + %2,2,2 + 2,2,2 + """ + + @test _csvread(str6, commentchar='%') == (([1, 2], [1, 2], [1, 2]), String["x", "y", "z"]) + + # Do not skip commented lines (commentchar=nothing). + str7 = """ + x,y,z + 1,1,1 + #2,2,2 + """ + + # Since we are not skipping commented lines the '#' character is considered + # data. This will force parsing to treat columns with '#'s as String columns. + # Here, we verify this behavior. + result = _csvread(str7) + @test eltype(result[1][1]) == String + @test result == ((["1", "#2"], [1, 2], [1, 2]), String["x", "y", "z"]) + end + + @testset "skiplines_begin" begin + str1 = """ + hello + + world + x,y,z + 1,1,1 + """ + @test _csvread(str1, skiplines_begin=3) == (([1], [1], [1]), String["x", "y", "z"]) + + s = """ + x,y z + a,b 1 + e 3 + """ + @test _csvread(s, spacedelim=true) == ((["a,b", "e"], [1, 3]), ["x,y", "z"]) + end + + @testset "skipfield" begin + str1 = """ + x,y,z + 1,2.1,"John" + 4,5.2,"Sally" + """ + + @test _csvread(str1, colparsers=Dict(1 => nothing)) == (([2.1, 5.2], ["John", "Sally"]), String["y", "z"]) + @test _csvread(str1, colparsers=Dict(2 => nothing)) == (([1, 4], ["John", "Sally"]), String["x", "z"]) + @test _csvread(str1, colparsers=Dict(3 => nothing)) == (([1, 4], [2.1, 5.2]), String["x", "y"]) + + @test _csvread(str1, colparsers=Dict(1 => nothing, 2 => nothing)) == ((["John", "Sally"],), String["z"]) + @test _csvread(str1, colparsers=Dict(1 => nothing, 3 => nothing)) == (([2.1, 5.2],), String["y"]) + @test _csvread(str1, colparsers=Dict(2 => nothing, 3 => nothing)) == (([1, 4],), String["x"]) + + @test _csvread(str1, colparsers=Dict(1 => nothing, 2 => nothing, 3 => nothing)) == ((), String[]) + end + + import TextParse: eatwhitespaces + @testset "custom parser" begin + floatparser = Numeric(Float64) + percentparser = CustomParser(Float64) do str, i, len, opts + num, ii = tryparsenext(floatparser, str, i, len, opts) + if num === nothing + return num, ii else - return num, k # the point after % + # parse away the % char + ii = eatwhitespaces(str, ii, len) + c, k = iterate(str, ii) + if c != '%' + return Nullable{Float64}(), ii # failed to parse % + else + return num, k # the point after % + end end end - end - @test tryparsenext(percentparser, "10%") |> unwrap == (10.0, 4) - @test tryparsenext(percentparser, "10.32 %") |> unwrap == (10.32, 8) - @test tryparsenext(percentparser, "2k%") |> failedat == 2 -end + @test tryparsenext(percentparser, "10%") |> unwrap == (10.0, 4) + @test tryparsenext(percentparser, "10.32 %") |> unwrap == (10.32, 8) + @test tryparsenext(percentparser, "2k%") |> failedat == 2 + end -@testset "read gzipped files" begin - fn = joinpath(@__DIR__, "data", "a.csv") - fngz = fn*".gz" - open(fn, "r") do ior - open(GzipCompressorStream, fngz, "w") do iow - write(iow, ior) + @testset "read gzipped files" begin + fn = joinpath(@__DIR__, "data", "a.csv") + fngz = fn * ".gz" + open(fn, "r") do ior + open(GzipCompressorStream, fngz, "w") do iow + write(iow, ior) + end + end + @test csvread(fn) == csvread(fngz) + @test csvread([fn]) == csvread([fngz]) + if isfile(fngz) + rm(fngz) end end - @test csvread(fn) == csvread(fngz) - @test csvread([fn]) == csvread([fngz]) - if isfile(fngz) - rm(fngz) - end -end -include("test_vectorbackedstrings.jl") + include("test_vectorbackedstrings.jl") end diff --git a/test/test_vectorbackedstrings.jl b/test/test_vectorbackedstrings.jl index 4f7bc85..46348f2 100644 --- a/test/test_vectorbackedstrings.jl +++ b/test/test_vectorbackedstrings.jl @@ -2,50 +2,50 @@ using Test using TextParse: VectorBackedUTF8String @testset "VectorBackedStrings" begin - -buffer = UInt8['T', 'e', 's', 't'] -s = VectorBackedUTF8String(buffer) + buffer = UInt8['T', 'e', 's', 't'] -@test s == VectorBackedUTF8String(copy(buffer)) + s = VectorBackedUTF8String(buffer) -@test pointer(s) == pointer(buffer) + @test s == VectorBackedUTF8String(copy(buffer)) -@test pointer(s, 2) == pointer(buffer, 2) + @test pointer(s) == pointer(buffer) -@test ncodeunits(s) == length(buffer) + @test pointer(s, 2) == pointer(buffer, 2) -@test codeunit(s) <: UInt8 + @test ncodeunits(s) == length(buffer) -@test codeunit(s, 2) == UInt8('e') + @test codeunit(s) <: UInt8 -@test thisind(s, 2) == 2 + @test codeunit(s, 2) == UInt8('e') -@test isvalid(s, 2) == true + @test thisind(s, 2) == 2 -@test iterate(s) == ('T', 2) + @test isvalid(s, 2) == true -@test iterate(s, 2) == ('e', 3) + @test iterate(s) == ('T', 2) -@test iterate(s, 5) == nothing + @test iterate(s, 2) == ('e', 3) -@test string(s) == "Test" + @test iterate(s, 5) == nothing -sub_s = SubString(s, 2:3) + @test string(s) == "Test" -@test sub_s == "es" + sub_s = SubString(s, 2:3) -@test pointer(sub_s, 1) == pointer(s, 2) -@test pointer(sub_s, 2) == pointer(s, 3) + @test sub_s == "es" + + @test pointer(sub_s, 1) == pointer(s, 2) + @test pointer(sub_s, 2) == pointer(s, 3) + + @test_throws ErrorException s == "Test" + @test_throws ErrorException "Test" == s + @test_throws ErrorException hash(s, UInt(1)) + @test_throws ErrorException print(s) + @test_throws ErrorException textwidth(s) + @test_throws ErrorException convert(VectorBackedUTF8String, "foo") + @test_throws ErrorException convert(String, s) + @test_throws ErrorException String(s) + @test_throws ErrorException Symbol(s) -@test_throws ErrorException s == "Test" -@test_throws ErrorException "Test" == s -@test_throws ErrorException hash(s, UInt(1)) -@test_throws ErrorException print(s) -@test_throws ErrorException textwidth(s) -@test_throws ErrorException convert(VectorBackedUTF8String, "foo") -@test_throws ErrorException convert(String, s) -@test_throws ErrorException String(s) -@test_throws ErrorException Symbol(s) - end