diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ccfee3f..10c0a20 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -39,7 +39,7 @@ Please proceed with a Pull Request only after you're assigned. It'd be sad if yo 4. Add any gem dependencies required for the Format Importer to the `:optional` group of the Gemfile. -5. Add code and YARD documentation to `lib/daru/io/importers/format.rb`, consistent with other IO modules. +5. Add code and YARD documentation to `lib/daru/io/importers/format.rb`, consistent with other IO modules. Update the `README.md` if required. 6. Add tests to `spec/daru/io/importers/format_spec.rb`. Add any `.format` files required for importer in `spec/fixtures/format/` directory. diff --git a/Gemfile b/Gemfile index 3889d5a..cea7dd1 100644 --- a/Gemfile +++ b/Gemfile @@ -12,6 +12,7 @@ group :optional do gem 'redis' gem 'roo', '~> 2.7.0' gem 'rsruby' + gem 'rubyXL' gem 'snappy' gem 'spreadsheet', '~> 1.1.1' gem 'sqlite3' diff --git a/README.md b/README.md index 132a4c2..e77c667 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ While supporting various IO modules, daru-io also provides an easier way of addi - [Installation](#installation) - *[Importers](#importers): [ActiveRecord](#activerecord-importer), [Avro](#avro-importer), [CSV](#csv-importer), [Excel](#excel-importer), [Excelx](#excelx-importer), [HTML](#html-importer), [JSON](#json-importer), [Mongo](#mongo-importer), [Plaintext](#plaintext-importer), [RData](#rdata-importer), [RDS](#rds-importer), [Redis](#redis-importer), [SQL](#sql-importer)* -- *[Exporters](#exporters): [Avro](#avro-exporter), [CSV](#csv-exporter), [Excel](#excel-exporter), [JSON](#json-exporter), [RData](#rdata-exporter), [RDS](#rds-exporter), [SQL](#sql-exporter)* +- *[Exporters](#exporters): [Avro](#avro-exporter), [CSV](#csv-exporter), [Excel](#excel-exporter), [Excelx](#excelx-exporter), [JSON](#json-exporter), [RData](#rdata-exporter), [RDS](#rds-exporter), [SQL](#sql-exporter)* - [Creating your own IO modules](#creating-your-own-io-modules) - [Contributing](#contributing) - [License](#license) @@ -458,6 +458,28 @@ Exports a **Daru::DataFrame** into a **.xls** file. df.write_excel('path/to/file.xls', header: {color: :red, weight: :bold}, data: {color: :blue }, index: false) ``` +### Excelx Exporter + +[(Go to Table of Contents)](#table-of-contents) + +Exports a **Daru::DataFrame** into a **.xlsx** file. + +- **Docs**: [rubydoc.info](http://www.rubydoc.info/github/athityakumar/daru-io/master/Daru/IO/Exporters/Excelx) +- **Gem Dependencies**: `rubyXL` gem +- **Usage**: + ```ruby + #! Partially require just Excelx Exporter + require 'daru/io/exporters/excelx' + + #! Usage from Daru::IO + string = Daru::IO::Exporters::Excelx.new(df, index: false).to_s + Daru::IO::Exporters::Excelx.new(df, index: false).write('path/to/file.xlsx') + + #! Usage from Daru::DataFrame + string = df.to_excelx_string(index: false) + df.write_excel('path/to/file.xlsx', index: false) + ``` + ### JSON Exporter [(Go to Table of Contents)](#table-of-contents) diff --git a/daru-io.gemspec b/daru-io.gemspec index ff5f4ab..f4c5c9e 100644 --- a/daru-io.gemspec +++ b/daru-io.gemspec @@ -33,5 +33,6 @@ Gem::Specification.new do |spec| spec.add_development_dependency 'simplecov' spec.add_development_dependency 'webmock' spec.add_development_dependency 'yard' + spec.add_development_dependency 'guard-rspec' if RUBY_VERSION >= '2.2.5' end diff --git a/lib/daru/io/exporters/avro.rb b/lib/daru/io/exporters/avro.rb index ed0ca7f..d7ae874 100644 --- a/lib/daru/io/exporters/avro.rb +++ b/lib/daru/io/exporters/avro.rb @@ -78,6 +78,7 @@ def write(path) @writer.close File.open(path, 'w') { |file| file.write(@buffer.string) } + true end private diff --git a/lib/daru/io/exporters/base.rb b/lib/daru/io/exporters/base.rb index 33ab203..5cafd28 100644 --- a/lib/daru/io/exporters/base.rb +++ b/lib/daru/io/exporters/base.rb @@ -41,8 +41,8 @@ def initialize(dataframe) # # instance = Daru::IO::Exporters::Format.new(opts) # instance.to_s #! same as df.to_format_string(opts) - def to_s - tempfile = Tempfile.new('tempfile') + def to_s(file_extension: '') + tempfile = Tempfile.new(['filename', file_extension]) path = tempfile.path write(path) diff --git a/lib/daru/io/exporters/csv.rb b/lib/daru/io/exporters/csv.rb index 3227ff1..897fc77 100644 --- a/lib/daru/io/exporters/csv.rb +++ b/lib/daru/io/exporters/csv.rb @@ -80,6 +80,7 @@ def write(path) contents.each { |content| csv << content } csv.close end + true end private diff --git a/lib/daru/io/exporters/excel.rb b/lib/daru/io/exporters/excel.rb index bc7662e..bc2c742 100755 --- a/lib/daru/io/exporters/excel.rb +++ b/lib/daru/io/exporters/excel.rb @@ -62,7 +62,7 @@ def initialize(dataframe, header: true, data: true, index: true) # @return [String] A file-writable string # # @example Getting a file-writable string from Excel Exporter instance - # simple_instance.to_s #! same as df.to_avro_string(schema) + # simple_instance.to_s # # #=> "\xD0\xCF\u0011\u0871\u001A\xE1\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000..." # @@ -92,6 +92,7 @@ def write(path) end @book.write(path) + true end private diff --git a/lib/daru/io/exporters/excelx.rb b/lib/daru/io/exporters/excelx.rb new file mode 100644 index 0000000..d0b088e --- /dev/null +++ b/lib/daru/io/exporters/excelx.rb @@ -0,0 +1,114 @@ +require 'daru/io/exporters/base' + +module Daru + module IO + module Exporters + # Excelx Exporter Class, that extends `to_excelx_string` and `write_excelx` methods to + # `Daru::DataFrame` instance variables + class Excelx < Base + Daru::DataFrame.register_io_module :to_excelx_string, self + Daru::DataFrame.register_io_module :write_excelx, self + + # Initializes an Excelx Exporter instance. + # + # @param dataframe [Daru::DataFrame] A dataframe to export. Supports even dataframes + # with multi-index. + # @param sheet [String] A sheet name, to export the dataframe into. Defaults to + # 'Sheet0'. + # @param header [Boolean] Defaults to true. When set to false or nil, + # headers are not written. + # @param data [Boolean] Defaults to true. When set to false or nil, + # data values are not written. + # @param index [Boolean] Defaults to true. When set to false or nil, + # index values are not written + # + # @example Initializing an Excel Exporter instance + # df = Daru::DataFrame.new([[1,2],[3,4]], order: [:a, :b]) + # + # #=> # + # # a b + # # 0 1 3 + # # 1 2 4 + # + # instance = Daru::IO::Exporters::Excelx.new(df) + def initialize(dataframe, sheet: 'Sheet0', header: true, data: true, index: true) + optional_gem 'rubyXL' + + super(dataframe) + @data = data + @index = index + @sheet = sheet + @header = header + end + + # Exports an Excelx Exporter instance to a file-writable String. + # + # @return [String] A file-writable string + # + # @example Getting a file-writable string from Excelx Exporter instance + # instance.to_s + # + # #=> "PK\u0003\u0004\u0014\u0000\u0000\u0000\b\u0000X\xA5YK\u0018\x87\xFC\u0017..." + def to_s + super(file_extension: '.xlsx') + end + + # Exports an Excelx Exporter instance to an xlsx file. + # + # @param path [String] Path of excelx file where the dataframe is to be saved + # + # @example Writing an Excelx Exporter instance to an xlsx file + # instance.write('filename.xlsx') + def write(path) + @workbook = RubyXL::Workbook.new + @sheet = @workbook.add_worksheet(@sheet) + process_offsets + + write_row(@header ? 0 : 1, fetch_headers) + + @dataframe.each_row_with_index.with_index do |(row, idx), i| + write_row(@row_offset+i, fetch_index(idx) + fetch_data(row)) + end + + @workbook.write(path) + true + end + + private + + def process_offsets + @row_offset = @header ? 1 : 0 + @col_offset = 0 unless @index + @col_offset ||= @dataframe.index.is_a?(Daru::MultiIndex) ? @dataframe.index.width : 1 + end + + def fetch_headers + formatting([' '] * @col_offset + @dataframe.vectors.map(&:to_s), @header) + end + + def fetch_index(idx) + formatting(idx, @index) + end + + def fetch_data(row) + formatting(row, @data) + end + + def formatting(idx, format) + return [] unless format + + case idx + when Daru::Vector, Daru::MultiIndex, Array then idx.map(&:to_s) + else [idx.to_s] + end + end + + def write_row(row_index, row_array) + row_array.each_with_index do |element, col_index| + @sheet.insert_cell(row_index, col_index, element.to_s) + end + end + end + end + end +end diff --git a/lib/daru/io/exporters/json.rb b/lib/daru/io/exporters/json.rb index 1832f5a..5454be9 100644 --- a/lib/daru/io/exporters/json.rb +++ b/lib/daru/io/exporters/json.rb @@ -487,6 +487,7 @@ def write(path) File.open(path, 'w') do |file| file.write(::JSON.send(@pretty ? :pretty_generate : :generate, to)) end + true end private diff --git a/lib/daru/io/exporters/r_data.rb b/lib/daru/io/exporters/r_data.rb index f9d3eec..434a28f 100644 --- a/lib/daru/io/exporters/r_data.rb +++ b/lib/daru/io/exporters/r_data.rb @@ -59,6 +59,7 @@ def write(path) end.flatten @statements << "save(#{@options.keys.map(&:to_s).join(', ')}, file='#{path}')" @statements.each { |statement| @instance.eval_R(statement) } + true end end end diff --git a/lib/daru/io/exporters/rds.rb b/lib/daru/io/exporters/rds.rb index f744b16..faa08b8 100644 --- a/lib/daru/io/exporters/rds.rb +++ b/lib/daru/io/exporters/rds.rb @@ -53,6 +53,7 @@ def write(path) @statements = process_statements(@r_variable, @dataframe) @statements << "saveRDS(#{@r_variable}, file='#{path}')" @statements.each { |statement| @instance.eval_R(statement) } + true end private diff --git a/lib/daru/io/importers/excel.rb b/lib/daru/io/importers/excel.rb index e5bf2ff..7465541 100644 --- a/lib/daru/io/importers/excel.rb +++ b/lib/daru/io/importers/excel.rb @@ -10,9 +10,9 @@ class Excel < Base Daru::DataFrame.register_io_module :read_excel do |*args, &io_block| if args.first.end_with?('.xlsx') require 'daru/io/importers/excelx' - Daru::IO::Importers::Excelx.new(*args[1..-1], &io_block).read(*args[0]) + Daru::IO::Importers::Excelx.new.read(*args[0]).call(*args[1..-1], &io_block) else - Daru::IO::Importers::Excel.new(*args[1..-1], &io_block).read(*args[0]) + Daru::IO::Importers::Excel.new.read(*args[0]).call(*args[1..-1], &io_block) end end diff --git a/lib/daru/io/link.rb b/lib/daru/io/link.rb index 7e44aa4..63239f3 100644 --- a/lib/daru/io/link.rb +++ b/lib/daru/io/link.rb @@ -25,20 +25,22 @@ class << self # # #### Exporters # - # | `Daru::DataFrame` instance method | `Daru::IO::Exporters` class | - # | --------------------------------- | -----------------------------------| - # | `Daru::DataFrame.to_avro_string` | {Daru::IO::Exporters::Avro#to_s} | - # | `Daru::DataFrame.write_avro` | {Daru::IO::Exporters::Avro#write} | - # | `Daru::DataFrame.to_csv_string` | {Daru::IO::Exporters::CSV#to_s} | - # | `Daru::DataFrame.write_csv` | {Daru::IO::Exporters::CSV#write} | - # | `Daru::DataFrame.to_excel_string` | {Daru::IO::Exporters::Excel#to_s} | - # | `Daru::DataFrame.write_excel` | {Daru::IO::Exporters::Excel#write} | - # | `Daru::DataFrame.to_json` | {Daru::IO::Exporters::JSON#to} | - # | `Daru::DataFrame.to_json_string` | {Daru::IO::Exporters::JSON#to_s} | - # | `Daru::DataFrame.write_json` | {Daru::IO::Exporters::JSON#write} | - # | `Daru::DataFrame.to_rds_string` | {Daru::IO::Exporters::RDS#to_s} | - # | `Daru::DataFrame.write_rds` | {Daru::IO::Exporters::RDS#write} | - # | `Daru::DataFrame.to_sql` | {Daru::IO::Exporters::SQL#to} | + # | `Daru::DataFrame` instance method | `Daru::IO::Exporters` class | + # | -----------------------------------| ------------------------------------| + # | `Daru::DataFrame.to_avro_string` | {Daru::IO::Exporters::Avro#to_s} | + # | `Daru::DataFrame.write_avro` | {Daru::IO::Exporters::Avro#write} | + # | `Daru::DataFrame.to_csv_string` | {Daru::IO::Exporters::CSV#to_s} | + # | `Daru::DataFrame.write_csv` | {Daru::IO::Exporters::CSV#write} | + # | `Daru::DataFrame.to_excel_string` | {Daru::IO::Exporters::Excel#to_s} | + # | `Daru::DataFrame.write_excel` | {Daru::IO::Exporters::Excel#write} | + # | `Daru::DataFrame.to_excelx_string` | {Daru::IO::Exporters::Excelx#to_s} | + # | `Daru::DataFrame.write_excelx` | {Daru::IO::Exporters::Excelx#write} | + # | `Daru::DataFrame.to_json` | {Daru::IO::Exporters::JSON#to} | + # | `Daru::DataFrame.to_json_string` | {Daru::IO::Exporters::JSON#to_s} | + # | `Daru::DataFrame.write_json` | {Daru::IO::Exporters::JSON#write} | + # | `Daru::DataFrame.to_rds_string` | {Daru::IO::Exporters::RDS#to_s} | + # | `Daru::DataFrame.write_rds` | {Daru::IO::Exporters::RDS#write} | + # | `Daru::DataFrame.to_sql` | {Daru::IO::Exporters::SQL#to} | # # @param function [Symbol] Functon name to be monkey-patched into +Daru::DataFrame+ # @param instance [Class] The Daru-IO class to be linked to monkey-patched function @@ -62,7 +64,7 @@ def register_exporter(function, instance) case function.to_s when /\Ato_.*_string\Z/ then instance.new(self, *args, &io_block).to_s when /\Ato_/ then instance.new(self, *args, &io_block).to - when /Awrite_/ then instance.new(self, *args[1..-1], &io_block).write(*args[0]) + when /\Awrite_/ then instance.new(self, *args[1..-1], &io_block).write(*args[0]) end end end diff --git a/spec/daru/io/exporters/csv_spec.rb b/spec/daru/io/exporters/csv_spec.rb index e3dad6b..1a78da4 100644 --- a/spec/daru/io/exporters/csv_spec.rb +++ b/spec/daru/io/exporters/csv_spec.rb @@ -8,7 +8,7 @@ before { described_class.new(df, opts).write(tempfile.path) } context 'writes DataFrame to a CSV file' do - subject { Daru::DataFrame.rows content[1..-1].map { |x| x.map { |y| convert(y) } }, order: content[0] } + subject { Daru::DataFrame.rows(content[1..-1].map { |x| x.map { |y| convert(y) } }, order: content[0]) } let(:opts) { {} } let(:content) { CSV.read(tempfile.path) } diff --git a/spec/daru/io/exporters/excel_spec.rb b/spec/daru/io/exporters/excel_spec.rb index eacc037..0944101 100644 --- a/spec/daru/io/exporters/excel_spec.rb +++ b/spec/daru/io/exporters/excel_spec.rb @@ -2,7 +2,7 @@ include_context 'exporter setup' let(:filename) { 'test_write.xls' } - let(:content) { Spreadsheet.open tempfile.path } + let(:content) { Spreadsheet.open(tempfile.path) } let(:opts) { {header: {color: :blue}, data: {color: :red}, index: {color: :green}} } before { described_class.new(df, **opts).write(tempfile.path) } diff --git a/spec/daru/io/exporters/excelx_spec.rb b/spec/daru/io/exporters/excelx_spec.rb new file mode 100644 index 0000000..c554c5e --- /dev/null +++ b/spec/daru/io/exporters/excelx_spec.rb @@ -0,0 +1,39 @@ +RSpec.describe Daru::IO::Exporters::Excelx do + include_context 'exporter setup' + + let(:filename) { ['test_write', '.xlsx'] } + let(:content) { Roo::Excelx.new(tempfile.path).sheet('Sheet0').to_a } + + before { described_class.new(df, **opts).write(tempfile.path) } + + context 'writes to excelx worksheet without index' do + subject { Daru::DataFrame.rows(content[1..-1].map { |x| x.map { |y| convert(y) } }, order: content[0]) } + + let(:opts) { {index: false} } + + it_behaves_like 'exact daru dataframe', + ncols: 4, + nrows: 5, + order: %w[a b c d], + data: [ + [1,2,3,4,5], + [11,22,33,44,55], + ['a', 'g', 4, 5,'addadf'], + ['', 23, 4,'a','ff'] + ] + end + + context 'writes to excelx worksheet with multi-index' do + subject { content.map { |x| x.map { |y| convert(y) } } } + + let(:df) do + Daru::DataFrame.new( + [[1,2],[3,4]], + order: %i[x y], + index: [%i[a b c], %i[d e f]] + ) + end + + it { is_expected.to eq([[' ', ' ', ' ', 'x', 'y'], ['a', 'b', 'c', 1, 3], ['d', 'e', 'f', 2, 4]]) } + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index c5b5745..6775045 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -17,6 +17,7 @@ require 'dbd/SQLite3' require 'active_record' require 'redis' +require 'roo' require 'dbi' require 'jsonpath' require 'nokogiri'