Skip to content

Commit

Permalink
RHINENG-5394: new migration for procedure change
Browse files Browse the repository at this point in the history
  • Loading branch information
psegedy committed Dec 7, 2023
1 parent b7ab92b commit 997b6bc
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 12 deletions.
14 changes: 3 additions & 11 deletions database_admin/migrations/117_migrate_system_package2.up.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
Loops through the PK(account_id, system_id, package_id) in chunks of 5000 and inserts it into the new table.
Loops through the PK(account_id, system_id, package_id) in chunks of 1000 and inserts it into the new table.
Assumptions:
1. the system_packages2 table is empty.
2. No new system_packages comes in when this is running.
Expand All @@ -14,8 +14,6 @@ DECLARE
account_idx integer := 0;
system_idx bigint := 0;
package_idx bigint := 0;
cnt bigint := 0;
prev_cnt bigint := 0;
BEGIN
LOOP
INSERT INTO system_package2
Expand All @@ -36,7 +34,7 @@ BEGIN
WHERE (rh_account_id = account_idx AND system_id = system_idx AND package_id > package_idx)
OR (rh_account_id = account_idx AND system_id > system_idx)
ORDER BY rh_account_id, system_id, package_id
LIMIT 5000;
LIMIT 1000;

GET DIAGNOSTICS rows_inserted = ROW_COUNT;

Expand Down Expand Up @@ -67,7 +65,7 @@ BEGIN
FROM system_package
WHERE rh_account_id > account_idx
ORDER BY rh_account_id, system_id, package_id
LIMIT 5000;
LIMIT 1000;

GET DIAGNOSTICS rows_inserted = ROW_COUNT;

Expand All @@ -84,12 +82,6 @@ BEGIN
ORDER BY rh_account_id DESC, system_id DESC, package_id DESC
LIMIT 1;

cnt := cnt + rows_inserted;
IF (cnt/1000000)::int > (prev_cnt/1000000)::int THEN
RAISE NOTICE 'inserted % rows, account: %, system: %, partition: %', cnt, account_idx, system_idx, hash_partition_id(account_idx, 128);
prev_cnt := cnt;
END IF;

END LOOP;
END
$$;
87 changes: 87 additions & 0 deletions database_admin/migrations/118_migrate_system_package2.down.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
Loops through the PK(account_id, system_id, package_id) in chunks of 1000 and inserts it into the new table.
Assumptions:
1. the system_packages2 table is empty.
2. No new system_packages comes in when this is running.
*/

CREATE OR REPLACE PROCEDURE copy_system_packages()
LANGUAGE plpgsql
AS
$$
DECLARE
rows_inserted INTEGER := 0;
account_idx integer := 0;
system_idx bigint := 0;
package_idx bigint := 0;
BEGIN
LOOP
INSERT INTO system_package2
SELECT rh_account_id,
system_id,
name_id,
package_id,
(SELECT id
FROM package
WHERE package.name_id = system_package.name_id
AND evra =
JSONB_PATH_QUERY_ARRAY(update_data, '$[*] ? (@.status== "Installable").evra') ->> 0),
(SELECT id
FROM package
WHERE package.name_id = system_package.name_id
AND evra = JSONB_PATH_QUERY_ARRAY(update_data, '$[*] ? (@.status== "Applicable").evra') ->> 0)
FROM system_package
WHERE (rh_account_id = account_idx AND system_id = system_idx AND package_id > package_idx)
OR (rh_account_id = account_idx AND system_id > system_idx)
ORDER BY rh_account_id, system_id, package_id
LIMIT 1000;

GET DIAGNOSTICS rows_inserted = ROW_COUNT;

COMMIT;

/* I Originally did not include this if statement but I couldn't get the select to use the PK index when I had all 3 where clauses
1. rh_account_id = account_idx AND system_id = system_idx AND package_id > package_idx
2. rh_account_id = account_idx AND system_id > system_idx
3. rh_account_id > account_idx
So I just copied and pasted the same select / insert and used the final clause when the first two return 0 rows
*/
IF rows_inserted = 0 THEN
INSERT INTO system_package2
SELECT rh_account_id,
system_id,
name_id,
package_id,
(SELECT id
FROM package
WHERE package.name_id = system_package.name_id
AND evra = JSONB_PATH_QUERY_ARRAY(update_data, '$[*] ? (@.status== "Installable").evra') ->> 0),
(SELECT id
FROM package
WHERE package.name_id = system_package.name_id
AND evra = JSONB_PATH_QUERY_ARRAY(update_data, '$[*] ? (@.status== "Applicable").evra') ->> 0)
FROM system_package
WHERE rh_account_id > account_idx
ORDER BY rh_account_id, system_id, package_id
LIMIT 1000;

GET DIAGNOSTICS rows_inserted = ROW_COUNT;

COMMIT;
END IF;

EXIT WHEN rows_inserted = 0;

/* Store the highest values of our account/system/package ids are up to.
Should be O(1) because it can just look at the tail of the PK index*/
SELECT rh_account_id, system_id, package_id
INTO account_idx, system_idx, package_idx
FROM system_package2
ORDER BY rh_account_id DESC, system_id DESC, package_id DESC
LIMIT 1;

END LOOP;
END
$$;
95 changes: 95 additions & 0 deletions database_admin/migrations/118_migrate_system_package2.up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*
Loops through the PK(account_id, system_id, package_id) in chunks of 5000 and inserts it into the new table.
Assumptions:
1. the system_packages2 table is empty.
2. No new system_packages comes in when this is running.
*/

CREATE OR REPLACE PROCEDURE copy_system_packages()
LANGUAGE plpgsql
AS
$$
DECLARE
rows_inserted INTEGER := 0;
account_idx integer := 0;
system_idx bigint := 0;
package_idx bigint := 0;
cnt bigint := 0;
prev_cnt bigint := 0;
BEGIN
LOOP
INSERT INTO system_package2
SELECT rh_account_id,
system_id,
name_id,
package_id,
(SELECT id
FROM package
WHERE package.name_id = system_package.name_id
AND evra =
JSONB_PATH_QUERY_ARRAY(update_data, '$[*] ? (@.status== "Installable").evra') ->> 0),
(SELECT id
FROM package
WHERE package.name_id = system_package.name_id
AND evra = JSONB_PATH_QUERY_ARRAY(update_data, '$[*] ? (@.status== "Applicable").evra') ->> 0)
FROM system_package
WHERE (rh_account_id = account_idx AND system_id = system_idx AND package_id > package_idx)
OR (rh_account_id = account_idx AND system_id > system_idx)
ORDER BY rh_account_id, system_id, package_id
LIMIT 5000;

GET DIAGNOSTICS rows_inserted = ROW_COUNT;

COMMIT;

/* I Originally did not include this if statement but I couldn't get the select to use the PK index when I had all 3 where clauses
1. rh_account_id = account_idx AND system_id = system_idx AND package_id > package_idx
2. rh_account_id = account_idx AND system_id > system_idx
3. rh_account_id > account_idx
So I just copied and pasted the same select / insert and used the final clause when the first two return 0 rows
*/
IF rows_inserted = 0 THEN
INSERT INTO system_package2
SELECT rh_account_id,
system_id,
name_id,
package_id,
(SELECT id
FROM package
WHERE package.name_id = system_package.name_id
AND evra = JSONB_PATH_QUERY_ARRAY(update_data, '$[*] ? (@.status== "Installable").evra') ->> 0),
(SELECT id
FROM package
WHERE package.name_id = system_package.name_id
AND evra = JSONB_PATH_QUERY_ARRAY(update_data, '$[*] ? (@.status== "Applicable").evra') ->> 0)
FROM system_package
WHERE rh_account_id > account_idx
ORDER BY rh_account_id, system_id, package_id
LIMIT 5000;

GET DIAGNOSTICS rows_inserted = ROW_COUNT;

COMMIT;
END IF;

EXIT WHEN rows_inserted = 0;

/* Store the highest values of our account/system/package ids are up to.
Should be O(1) because it can just look at the tail of the PK index*/
SELECT rh_account_id, system_id, package_id
INTO account_idx, system_idx, package_idx
FROM system_package2
ORDER BY rh_account_id DESC, system_id DESC, package_id DESC
LIMIT 1;

cnt := cnt + rows_inserted;
IF (cnt/1000000)::int > (prev_cnt/1000000)::int THEN
RAISE NOTICE 'inserted % rows, account: %, system: %, partition: %', cnt, account_idx, system_idx, hash_partition_id(account_idx, 128);
prev_cnt := cnt;
END IF;

END LOOP;
END
$$;
2 changes: 1 addition & 1 deletion database_admin/schema/create_schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ CREATE TABLE IF NOT EXISTS schema_migrations


INSERT INTO schema_migrations
VALUES (117, false);
VALUES (118, false);

-- ---------------------------------------------------------------------------
-- Functions
Expand Down

0 comments on commit 997b6bc

Please sign in to comment.