-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1522 from zazuko/1495
CSV handling improvements
- Loading branch information
Showing
14 changed files
with
349 additions
and
308 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
--- | ||
"@cube-creator/core-api": patch | ||
--- | ||
|
||
Updates `csv-parse` to v5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
--- | ||
"@cube-creator/cli": patch | ||
--- | ||
|
||
Empty lines will be ignored when parsing CSVs (fixes #1495) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
--- | ||
"@cube-creator/cli": patch | ||
--- | ||
|
||
Whitespace will be trimmed from CSV headers. A message will be displayed to the user in that case. (fixes #1232) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
import { createReadStream, promises as fs } from 'fs' | ||
import { resolve } from 'path' | ||
import { Readable } from 'stream' | ||
import { describe, it } from 'mocha' | ||
import { expect } from 'chai' | ||
import { sniffParse } from '../../../lib/domain/csv' | ||
import { loadFileHeadString } from '../../../lib/domain/csv/file-head' | ||
|
||
describe('domain/csv/file-head', () => { | ||
it('sniffs and parses', async () => { | ||
const path = resolve(__dirname, '../../fixtures/CH_yearly_air_immission_aggregation_id.csv') | ||
const input = await fs.readFile(path) | ||
const { dialect, header, rows } = await sniffParse(input.toString()) | ||
const [lastRow] = rows.slice(-1) | ||
|
||
expect(dialect).to.contain({ delimiter: ',', quote: '"' }) | ||
expect(header).to.deep.eq(['aggregation_id', 'aggregation_name_de', 'aggregation_name_fr', 'aggregation_name_it', 'aggregation_name_en']) | ||
expect(lastRow).to.deep.eq(['dosisaot40f', 'Dosis AOT40f', 'Dose AOT40f', 'Dose AOT40f', 'Dosis AOT40f']) | ||
}) | ||
|
||
it('reads parts of a file ', async () => { | ||
const path = resolve(__dirname, '../../fixtures/CH_yearly_air_immission_basetable.csv') | ||
const fileContent = await fs.readFile(path) | ||
|
||
const input1 = fileContent.toString() | ||
const input2 = await loadFileHeadString(createReadStream(path)) | ||
|
||
expect(input1).not.to.eq(input2) | ||
|
||
const lines1 = input1?.split('\n') || [] | ||
const lines2 = input2?.split('\n') || [] | ||
const firstLine2 = lines2[0] | ||
expect(lines1[0]).to.eq(firstLine2) | ||
expect(lines2.length).to.eq(21) | ||
}) | ||
|
||
it('reads parts of a file with CRLF line endings', async () => { | ||
const input = `"station_id","pollutant_id","aggregation_id","limitvalue","year","value","unit_id","value_remark"\r | ||
"blBAS","so2","annualmean",30,1984,31.9,"µg/m3","incomplete series"\r | ||
"blBAS","so2","annualmean",30,1985,40.2,"µg/m3","incomplete series"\r | ||
"blBAS","so2","annualmean",30,1985,40.2,"µg/m3","incomplete series"\r | ||
"blBAS","so2","annualmean",30,1985,40.2,"µg/m3","incomplete series"\r | ||
"blBAS","so2","annualmean",30,1986,33.6,"µg/m3","incomplete series"\r | ||
"blBAS","so2","annualmean",30,1987,33,"µg/m3","incomplete series"` | ||
const stream = new Readable() | ||
stream.push(input) | ||
stream.push(null) | ||
const head = await loadFileHeadString(stream) | ||
|
||
const lines = head.split('\n') | ||
expect(lines[0]).to.eq('"station_id","pollutant_id","aggregation_id","limitvalue","year","value","unit_id","value_remark"') | ||
expect(lines.length).to.eq(5) | ||
}) | ||
|
||
it('parses all lines on short file', async () => { | ||
const path = resolve(__dirname, '../../fixtures/CH_yearly_air_immission_unit_id.csv') | ||
|
||
const input = await loadFileHeadString(createReadStream(path)) | ||
const lines = input?.split('\n') || [] | ||
expect(lines.length).be.eq(11) | ||
}) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,62 +1,28 @@ | ||
import { createReadStream, promises as fs } from 'fs' | ||
import { resolve } from 'path' | ||
import { Readable } from 'stream' | ||
import { describe, it } from 'mocha' | ||
import { expect } from 'chai' | ||
import { sniffParse } from '../../../lib/domain/csv' | ||
import { loadFileHeadString } from '../../../lib/domain/csv/file-head' | ||
import { parse } from '../../../lib/domain/csv' | ||
|
||
describe('domain/csv/parse', () => { | ||
it('sniffs and parses', async () => { | ||
const path = resolve(__dirname, '../../fixtures/CH_yearly_air_immission_aggregation_id.csv') | ||
const input = await fs.readFile(path) | ||
const { dialect, header, rows } = await sniffParse(input.toString()) | ||
const [lastRow] = rows.slice(-1) | ||
it('trims headers', async () => { | ||
// given | ||
const input = '" station_id ","\tpollutant_id\t","aggregation_id\t","\tlimitvalue","year"' | ||
|
||
expect(dialect).to.contain({ delimiter: ',', quote: '"' }) | ||
expect(header).to.deep.eq(['aggregation_id', 'aggregation_name_de', 'aggregation_name_fr', 'aggregation_name_it', 'aggregation_name_en']) | ||
expect(lastRow).to.deep.eq(['dosisaot40f', 'Dosis AOT40f', 'Dose AOT40f', 'Dose AOT40f', 'Dosis AOT40f']) | ||
}) | ||
|
||
it('reads parts of a file ', async () => { | ||
const path = resolve(__dirname, '../../fixtures/CH_yearly_air_immission_basetable.csv') | ||
const fileContent = await fs.readFile(path) | ||
|
||
const input1 = fileContent.toString() | ||
const input2 = await loadFileHeadString(createReadStream(path)) | ||
// when | ||
const { header, headerTrimmed } = await parse(input, {}) | ||
|
||
expect(input1).not.to.eq(input2) | ||
|
||
const lines1 = input1?.split('\n') || [] | ||
const lines2 = input2?.split('\n') || [] | ||
const firstLine2 = lines2[0] | ||
expect(lines1[0]).to.eq(firstLine2) | ||
expect(lines2.length).to.eq(21) | ||
// then | ||
expect(headerTrimmed).to.be.true | ||
expect(header).to.contain.ordered.members(['station_id', 'pollutant_id', 'aggregation_id', 'limitvalue', 'year']) | ||
}) | ||
|
||
it('reads parts of a file with CRLF line endings', async () => { | ||
const input = `"station_id","pollutant_id","aggregation_id","limitvalue","year","value","unit_id","value_remark"\r | ||
"blBAS","so2","annualmean",30,1984,31.9,"µg/m3","incomplete series"\r | ||
"blBAS","so2","annualmean",30,1985,40.2,"µg/m3","incomplete series"\r | ||
"blBAS","so2","annualmean",30,1985,40.2,"µg/m3","incomplete series"\r | ||
"blBAS","so2","annualmean",30,1985,40.2,"µg/m3","incomplete series"\r | ||
"blBAS","so2","annualmean",30,1986,33.6,"µg/m3","incomplete series"\r | ||
"blBAS","so2","annualmean",30,1987,33,"µg/m3","incomplete series"` | ||
const stream = new Readable() | ||
stream.push(input) | ||
stream.push(null) | ||
const head = await loadFileHeadString(stream) | ||
|
||
const lines = head.split('\n') | ||
expect(lines[0]).to.eq('"station_id","pollutant_id","aggregation_id","limitvalue","year","value","unit_id","value_remark"') | ||
expect(lines.length).to.eq(5) | ||
}) | ||
it('parses header', async () => { | ||
// given | ||
const input = '"station_id","pollutant_id","aggregation_id","limitvalue","year"' | ||
|
||
it('parses all lines on short file', async () => { | ||
const path = resolve(__dirname, '../../fixtures/CH_yearly_air_immission_unit_id.csv') | ||
// when | ||
const { header, headerTrimmed } = await parse(input, {}) | ||
|
||
const input = await loadFileHeadString(createReadStream(path)) | ||
const lines = input?.split('\n') || [] | ||
expect(lines.length).be.eq(11) | ||
// then | ||
expect(headerTrimmed).to.be.false | ||
expect(header).to.contain.ordered.members(['station_id', 'pollutant_id', 'aggregation_id', 'limitvalue', 'year']) | ||
}) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.