Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GRAMEX-97 ⁃ ENH: Update multiple rows in files and DBs with data.update #456

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions testlib/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,108 @@ def test_update(self):
gramex.data.update(data, args=args, id=['देश', 'city', 'product'])
ase(types_original, data.dtypes)

def test_update_multiple_file(self):
update_file = os.path.join(folder, 'actors.update.csv')
shutil.copy(os.path.join(folder, '..', 'tests/actors.csv'), update_file)
self.tmpfiles.append(update_file)

names = ['Humphrey Bogart', 'James Stewart', 'Audrey Hepburn']
categories = ['Stars', 'Thespians', 'Heartthrobs']
ratings = [1, 0.99, 1.11]
gramex.data.update(
update_file,
args={
'name': names,
'category': categories,
'rating': ratings
}, id=['name']
)
df = gramex.data.filter(update_file, args={'name': names})
self.assertEqual(df['category'].tolist(), categories)
self.assertEqual(df['rating'].tolist(), ratings)

def test_update_multiple_file_uneven_args(self):
update_file = os.path.join(folder, 'actors.update.csv')
shutil.copy(os.path.join(folder, '..', 'tests/actors.csv'), update_file)
self.tmpfiles.append(update_file)

names = ['Humphrey Bogart', 'James Stewart', 'Audrey Hepburn']
categories = ['Stars', 'Thespians', 'Heartthrobs']
ratings = [1, 0.99]
gramex.data.update(
update_file,
args={
'name': names,
'category': categories,
'rating': ratings
}, id=['name']
)
df = gramex.data.filter(update_file, args={'name': names})
self.assertEqual(df['category'].tolist(), categories)
# Check that the original rating is not updated for Audrey Hepburn
self.assertEqual(df['rating'].tolist(), ratings + [0.120196561])

# Only two values in index, but three fields in other columns. Check if they are ignored.
shutil.copy(os.path.join(folder, '..', 'tests/actors.csv'), update_file)
names = ['Humphrey Bogart', 'James Stewart']
ratings = [1, 0.99, 1.11]
gramex.data.update(
update_file,
args={
'name': names,
'category': categories,
'rating': ratings
}, id=['name']
)
df = gramex.data.filter(update_file, args={'name': names + ['Audrey Hepburn']})
categories[-1] = 'Actresses'
self.assertEqual(df['category'].tolist(), categories)
ratings[-1] = 0.120196561
self.assertEqual(df['rating'].tolist(), ratings)

def test_update_multiple_db(self):
actors = gramex.cache.open(os.path.join(folder, '../tests/actors.csv'))
temp_db = f'sqlite:///{folder}/actors.db'
self.tmpfiles.append(os.path.join(folder, 'actors.db'))
actors.to_sql('actors', sa.create_engine(temp_db), index=False, if_exists='replace')

names = ['Humphrey Bogart', 'James Stewart', 'Audrey Hepburn']
categories = ['Stars', 'Thespians', 'Heartthrobs']
ratings = [1, 0.99, 1.11]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a test case where

  • ratings has only 2 values [1, 0.99] and the third is missing. Audrey's rating should not get updated, but her categories should be.
  • names has only 2 values -- in which case, the 3rd category and rating are ignored

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sanand0 This works for files, but for sqlalchemy, to deal with uneven args, we might have to do column-wise update queries. Is this acceptable?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jaidevd -- yes, column-wise update queries are fine. I don't anticipate this to be used often and we can optimize later

gramex.data.update(
temp_db,
args={
'name': names,
'category': categories,
'rating': ratings
}, id=['name'], table='actors'
)
df = gramex.data.filter(temp_db, args={'name': names}, table='actors')
self.assertEqual(df['category'].tolist(), categories)
self.assertEqual(df['rating'].tolist(), ratings)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add test cases for MySQL, PostgreSQL.

def test_update_multiple_db_uneven_args(self):
actors = gramex.cache.open(os.path.join(folder, '../tests/actors.csv'))
temp_db = f'sqlite:///{folder}/actors.db'
self.tmpfiles.append(os.path.join(folder, 'actors.db'))
actors.to_sql('actors', sa.create_engine(temp_db), index=False, if_exists='replace')

names = ['Humphrey Bogart', 'James Stewart', 'Audrey Hepburn']
categories = ['Stars', 'Thespians', 'Heartthrobs']
ratings = [1, 0.99]
gramex.data.update(
temp_db,
args={
'name': names,
'category': categories,
'rating': ratings
}, id=['name'], table='actors'
)
df = gramex.data.filter(temp_db, args={'name': names}, table='actors')
self.assertEqual(df['category'].tolist(), categories)
# Check that the original rating is not updated for Audrey Hepburn
self.assertEqual(df['rating'].tolist(), ratings + [0.120196561])

@classmethod
def tearDownClass(cls):
for path in cls.tmpfiles:
Expand Down