Skip to content

Commit

Permalink
Merge pull request #141 from share-research/gh-137-update-normed-person
Browse files Browse the repository at this point in the history
Expanding NormedPerson interface to class
  • Loading branch information
rickjohnson authored Feb 10, 2021
2 parents 5b2b1bc + 70d0afe commit 1d31ad6
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 79 deletions.
70 changes: 61 additions & 9 deletions ingest/modules/normedPerson.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,63 @@
interface NormedPerson {
id: Number,
familyName: string,
givenNameInitial: string,
givenName: string,
startDate: Date,
endDate: Date,
sourceIds: {
scopusAffiliationId?: string
import _ from 'lodash'
import { getDateObject } from '../units/dateRange'
import { command as loadCsv } from '../units/loadCsv'

export default class NormedPerson {
// ------ begin declare properties used when using NormedPerson like an interface
id: Number
familyName: string
givenNameInitial: string
givenName: string
startDate: Date
endDate: Date
sourceIds: {
scopusAffiliationId?: string
}
// ------ end declare properties used when using NormedPerson like an interface

/**
* @param csvPath the path to a CSV that contains the flat
* non-normalized person records
*
* @returns an array of normalized people (e.g. NormedPerson)
*/
public static async loadFromCSV (csvPath: string): Promise<NormedPerson[]> {
console.log(`Loading People from path: ${csvPath}`)
try {
const peopleFromCsv: any = await loadCsv({
path: csvPath,
lowerCaseColumns: true
})

return _.map(peopleFromCsv, (personRow) => {
return NormedPerson.getNormedPersonObjectFromCSVRow(personRow)
})
} catch (error) {
console.log(`Error on paper load for path ${csvPath}, error: ${error}`)
throw error
}
}

/**
* This function normalizes the given `personRow` into a NormedPerson
*
* @param personRow a single row, in a key/value pair form
*
* @return NormedPerson
**/
public static getNormedPersonObjectFromCSVRow(personRow): NormedPerson {
const givenName = personRow['given_name']
let person: NormedPerson = {
id: personRow['id'] ? Number.parseInt(personRow['id']) : undefined,
familyName: personRow['family_name'],
givenNameInitial: personRow['given_name_initial'] ? personRow['given_name_initial'] : (givenName) ? givenName.charAt(0) : undefined,
givenName: givenName,
startDate: getDateObject(personRow['start_date']),
endDate: getDateObject(personRow['end_date']),
sourceIds: {
scopusAffiliationId: personRow['scopus_affiliation_id']
}
}
return person
}
}
10 changes: 5 additions & 5 deletions ingest/modules/test/harvester.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { Harvester, HarvestOperation } from '../harvester'
import { ScopusDataSource } from '../scopusDataSource'
import NormedPublication from '../normedPublication'
import { loadPersons} from '../../units/loadPersons'
import NormedPerson from '../normedPerson'
import { randomWait } from '../../units/randomWait'
import { getDateObject } from '../../units/dateRange'

Expand All @@ -22,9 +22,9 @@ if (!fs.existsSync(filePath)) {

dotenv.config({
path: filePath
})
})



// environment variables
process.env.NODE_ENV = 'development';

Expand Down Expand Up @@ -128,7 +128,7 @@ test('test Scopus harvester.harvest by author name', async () => {
const combinedNormedPubs = _.mapValues(results, (result:HarvestSet) => {
return result.normedPublications
})

let resultNormedPubs = []
_.each(combinedNormedPubs, (pubs, index) => {
resultNormedPubs = _.concat(resultNormedPubs, pubs)
Expand All @@ -146,7 +146,7 @@ test('test Scopus harvester.harvest by author name', async () => {
// ignore sourcemetadata since things like citedby-count often change over time
const expectedPub = _.omit(expectedNormedPubsByDoi[doi], 'sourceMetadata')
const receivedPub = _.omit(resultNormedPubsByDoi[doi], 'sourceMetadata')

expect(expectedPub).toEqual(receivedPub)
// finally just check that source metadata is defined
expect(resultNormedPubsByDoi[doi]['sourceMetadata']).toBeDefined()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { loadPersons } from '../loadPersons'
import _ from 'lodash'
import { getDateObject } from '../dateRange'
import NormedPerson from '../normedPerson'
import { getDateObject } from '../../units/dateRange'

const testCSVPath = './test/fixtures/persons_2020.csv'

Expand All @@ -9,14 +9,14 @@ const propMapNoLowercase = {
'Given_name': 'givenName',
'Family_Name': 'familyName',
'Start_Date': 'startDate',
'End_Date': 'endDate'
'End_Date': 'endDate'
}
const propMapLowercaseKeysOnly = {
id: 'ID',
'given_name': 'givenName',
'family_name': 'familyName',
'start_date': 'startDate',
'end_date': 'endDate'
'end_date': 'endDate'
}
const propMapAllLowercase = {
'id': 'id',
Expand Down Expand Up @@ -61,19 +61,8 @@ const defaultExpectedPersons = [
}
]

test('test load persons works with property map with no lowerCase keys or values', async () => {
test('test loadFromCSV', async () => {
expect.hasAssertions()
const persons: NormedPerson[] = await loadPersons(testCSVPath, propMapNoLowercase)
expect(persons).toEqual(expect.arrayContaining(defaultExpectedPersons))
})

test('test load persons works with property map with lowerCase propertyNames and non lowerCase mapped column names', async () => {
expect.hasAssertions()
const persons: NormedPerson[] = await loadPersons(testCSVPath, propMapLowercaseKeysOnly)
expect(persons).toEqual(expect.arrayContaining(defaultExpectedPersons))
})
test('test load persons works with property map with lowerCase propertyNames and lowerCase mapped column names', async () => {
expect.hasAssertions()
const persons: NormedPerson[] = await loadPersons(testCSVPath, propMapAllLowercase)
expect(persons).toEqual(expect.arrayContaining(defaultExpectedPersons))
const persons: NormedPerson[] = await NormedPerson.loadFromCSV(testCSVPath)
expect(persons[0]).toEqual(defaultExpectedPersons[0])
})
47 changes: 0 additions & 47 deletions ingest/units/loadPersons.ts

This file was deleted.

0 comments on commit 1d31ad6

Please sign in to comment.