Skip to content

Commit

Permalink
changelog: Internal, Reporting, stale data check updates
Browse files Browse the repository at this point in the history
  • Loading branch information
colter-nattrass committed Jan 24, 2025
1 parent bfc5404 commit 4201da4
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 5 deletions.
19 changes: 16 additions & 3 deletions app/jobs/data_warehouse/table_summary_stats_export_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ module DataWarehouse
class TableSummaryStatsExportJob < BaseJob
REPORT_NAME = 'table_summary_stats'

TABLE_EXCLUSION_LIST = %w[
agency_identities
].freeze

def perform(timestamp)
return if data_warehouse_disabled?

Expand All @@ -22,6 +26,8 @@ def fetch_table_max_ids_and_counts(timestamp)
def max_ids_and_counts(timestamp)
active_tables = {}
ActiveRecord::Base.connection.tables.each do |table|
next if TABLE_EXCLUSION_LIST.include?(table)

if table_has_id_column?(table)
active_tables[table] = fetch_max_id_and_count(table, timestamp)
end
Expand All @@ -39,15 +45,22 @@ def table_has_id_column?(table)
def fetch_max_id_and_count(table, timestamp)
quoted_table = ActiveRecord::Base.connection.quote_table_name(table)
query = <<-SQL
SELECT COALESCE(MAX(id), 0) AS max_id, COUNT(*) AS row_count
FROM #{quoted_table}
SELECT COALESCE(MAX(id), 0) AS max_id, COUNT(*) AS row_count
FROM #{quoted_table}
SQL
if table_has_column?(table, 'created_at')
quoted_timestamp = ActiveRecord::Base.connection.quote(timestamp)
query += " WHERE created_at <= #{quoted_timestamp}"
end

ActiveRecord::Base.connection.execute(query).first
result = ActiveRecord::Base.connection.execute(query).first
if table_has_column?(table, 'created_at')
result['timestamp_column'] = 'created_at'
else
result['timestamp_column'] = nil
end

result
end

def table_has_column?(table, column_name)
Expand Down
66 changes: 64 additions & 2 deletions spec/jobs/data_warehouse/table_summary_stats_export_job_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,21 @@
let(:timestamp) { Date.new(2024, 10, 10).in_time_zone('UTC').end_of_day }
let(:job) { described_class.new }
let(:expected_bucket) { 'login-gov-analytics-export-test-1234-us-west-2' }
let(:test_on_tables) { ['users'] }
let(:test_on_tables) { ['agencies', 'users'] }
let(:s3_data_warehouse_bucket_prefix) { 'login-gov-analytics-export' }
let(:data_warehouse_enabled) { true }

let(:expected_json) do
{
'agencies' => {
'max_id' => 19,
'row_count' => 19,
'timestamp_column' => nil,
},
'users' => {
'max_id' => 2,
'row_count' => 2,
'timestamp_column' => 'created_at',
},
}.to_json
end
Expand Down Expand Up @@ -66,7 +72,12 @@
end

context 'when tables are empty' do
let(:expected_empty_json) { { 'users' => { 'max_id' => 0, 'row_count' => 0 } }.to_json }
let(:test_on_tables) { ['users'] }
let(:expected_empty_json) do
{ 'users' => { 'max_id' => 0,
'row_count' => 0,
'timestamp_column' => 'created_at' } }.to_json
end

before do
User.delete_all # Clear the User table to simulate emptiness
Expand Down Expand Up @@ -97,6 +108,57 @@
end
end

context 'when tables are missing the timestamp column' do
let(:expected_json) do
{
'users' => {
'max_id' => 2,
'row_count' => 2,
'timestamp_column' => 'created_at',
},
'agencies' => {
'max_id' => 19,
'row_count' => 19,
'timestamp_column' => nil,
},
}.to_json
end

before do
allow(ActiveRecord::Base.connection).to receive(:tables).and_return(['users', 'agencies'])
end

it 'generates correct values without timestamp column' do
json_data = job.fetch_table_max_ids_and_counts(timestamp)

expect(json_data.to_json).to eq(expected_json)
end
end

context 'when tables should be excluded' do
let(:test_on_tables) { ['agency_identities', 'users'] }
let(:expected_json) do
{
'users' => {
'max_id' => 2,
'row_count' => 2,
'timestamp_column' => 'created_at',
},
}.to_json
end

before do
allow(ActiveRecord::Base.connection).to receive(:tables).and_return(test_on_tables)
end

it 'excludes tables in the exclusion list' do
json_data = job.fetch_table_max_ids_and_counts(timestamp)

expect(json_data.to_json).to eq(expected_json)
expect(json_data.keys).not_to include('agency_identities')
end
end

context 'when uploading to S3' do
it 'uploads a file to S3 based on the report date' do
expect(job).to receive(:upload_file_to_s3_bucket).with(
Expand Down

0 comments on commit 4201da4

Please sign in to comment.