Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Writing of DateTime columns #34

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/datetime.jl
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,11 @@
end
return out
end

function datetimes_to_doubles(col::AbstractVector{<:Union{Missing,DateTime}}, ext::String, format::String)
# TODO: make this more robust than just a double dict lookup
epoch, delta = dt_formats[ext][format]
map(col) do value
value === missing ? missing : (value - epoch) / delta

Check warning on line 93 in src/datetime.jl

View check run for this annotation

Codecov / codecov/patch

src/datetime.jl#L91-L93

Added lines #L91 - L93 were not covered by tests
end
end
35 changes: 33 additions & 2 deletions src/writestat.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,17 @@
".sav" => 2,
)

# I would assume that each file type has a format with the most accuracy, and this should be picked
# here. However, the SAS docs mention that DATETIME only resolves to seconds but DATETIME21.2 for example
# has two decimal places for the seconds. Maybe that needs to be revisited in this code base.
const default_datetime_format = Dict{String, String}(
".sav" => "DATETIME",
".por" => "DATETIME",
".sas7bdat" => "DATETIME",
".xpt" => "DATETIME",
".dta" => "%tc",
)

# Accepted maximum length for strings varies by the file format and version
function _readstat_string_width(col)
if eltype(col) <: Union{InlineString, Missing}
Expand Down Expand Up @@ -115,6 +126,11 @@
# Only used for .xpt files
meta.table_name = metadata(table, "table_name", "")
end

# this is just a hack so columns with DateTime values can be replaced with Float64
# further down, we can't modify the input table directly.
cols_dict = Dict{Symbol,AbstractVector}(pairs(cols))

Check warning on line 132 in src/writestat.jl

View check run for this annotation

Codecov / codecov/patch

src/writestat.jl#L132

Added line #L132 was not covered by tests

# Assume colmeta is manually specified if the length matches
# Otherwise, any value in colmeta is overwritten
# The metadata interface is absent before DataFrames.jl v1.4 which requires Julia v1.6
Expand All @@ -124,7 +140,18 @@
col = Tables.getcolumn(cols, i)
colmeta.label[i] = colmetadata(table, i, "label", "")
#! To do: handle format for DateTime columns
colmeta.format[i] = colmetadata(table, i, "format", "")
if nonmissingtype(eltype(col)) === DateTime

Check warning on line 143 in src/writestat.jl

View check run for this annotation

Codecov / codecov/patch

src/writestat.jl#L143

Added line #L143 was not covered by tests
# pick a default datetime format given the file extension.
# what should happen when there is already format metadata for that column?
# then we should probably use that but check that it's also correcty applicable
datetime_format = default_datetime_format[ext]
colmeta.format[i] = datetime_format

Check warning on line 148 in src/writestat.jl

View check run for this annotation

Codecov / codecov/patch

src/writestat.jl#L147-L148

Added lines #L147 - L148 were not covered by tests
# overwrite the original column in the table copy with a Float64 column where
# the conversion depends on the datetime format we picked
cols_dict[names[i]] = datetimes_to_doubles(col, ext, datetime_format)

Check warning on line 151 in src/writestat.jl

View check run for this annotation

Codecov / codecov/patch

src/writestat.jl#L151

Added line #L151 was not covered by tests
else
colmeta.format[i] = colmetadata(table, i, "format", "")

Check warning on line 153 in src/writestat.jl

View check run for this annotation

Codecov / codecov/patch

src/writestat.jl#L153

Added line #L153 was not covered by tests
end
if col isa LabeledArrOrSubOrReshape || refpool(col) !== nothing && refpoolaslabel
type = rstype(nonmissingtype(eltype(refarray(col))))
else
Expand All @@ -150,7 +177,11 @@
colmeta.alignment[i] = READSTAT_ALIGNMENT_UNKNOWN
end
end
return ReadStatTable(cols, names, vallabels, hasmissing, meta, colmeta, styles)

# can't use a dict here because it loses order, but which other base Julia data structure can be used as an ordered table with integer and symbol access?
namedtupletable = NamedTuple(names .=> [cols_dict[n] for n in names])

Check warning on line 182 in src/writestat.jl

View check run for this annotation

Codecov / codecov/patch

src/writestat.jl#L182

Added line #L182 was not covered by tests

return ReadStatTable(namedtupletable, names, vallabels, hasmissing, meta, colmeta, styles)

Check warning on line 184 in src/writestat.jl

View check run for this annotation

Codecov / codecov/patch

src/writestat.jl#L184

Added line #L184 was not covered by tests
end

"""
Expand Down
Loading