Skip to content

Commit

Permalink
Merge pull request #20 from arenadata/6.3.1-sync
Browse files Browse the repository at this point in the history
ADBDEV-2799 6.3.1 sync
  • Loading branch information
deart2k authored Jul 13, 2022
2 parents ac49d8f + 21dea1a commit d6eb6bc
Show file tree
Hide file tree
Showing 74 changed files with 1,180 additions and 465 deletions.
14 changes: 3 additions & 11 deletions .github/workflows/create-release-on-tag.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,11 @@ jobs:
tag_name: ${{ github.ref }}
release_name: PXF Version ${{ github.ref }}
body: |
## 6.3.0 (03/16/2022)
### Enhancements:
- [#703](https://github.com/greenplum-db/pxf/pull/703) Added Support for Avro Logical Types for Readable External Tables
- [#707](https://github.com/greenplum-db/pxf/pull/707) Enabled Kerberos Constrained Delegation impersonation for secure clusters
- [#752](https://github.com/greenplum-db/pxf/pull/752) Add support for GPDB6 on RHEL 8
- [#754](https://github.com/greenplum-db/pxf/pull/754) Add scripts for modifying PXF extension to support gpupgrade
## 6.3.1 (04/27/2022)
### Bug Fixes:
- [#738](https://github.com/greenplum-db/pxf/pull/738) Fix: For reading the records correctly from a MultiLine JSON file
- [#756](https://github.com/greenplum-db/pxf/pull/756) Fixed HiveDataFragmenter not closing connections to Hive Metastore
- [#760](https://github.com/greenplum-db/pxf/pull/760) Update bundled postgresql to 42.3.3
- [#788](https://github.com/greenplum-db/pxf/pull/788) Replace prefix macro with environment variable in scriptlets
- [#794](https://github.com/greenplum-db/pxf/pull/794) Fix NPE in Hive ORC vectorized query execution
draft: false
prerelease: false
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

## 6.3.1 (04/27/2022)

### Bug Fixes:

- [#788](https://github.com/greenplum-db/pxf/pull/788) Replace prefix macro with environment variable in scriptlets
- [#794](https://github.com/greenplum-db/pxf/pull/794) Fix NPE in Hive ORC vectorized query execution

## 6.3.0 (03/16/2022)

### Enhancements:
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ all: external-table cli server
external-table:
make -C external-table

fwd:
fdw:
make -C fdw

cli:
Expand Down
3 changes: 3 additions & 0 deletions automation/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ ifeq "$(PXF_TEST_DEBUG)" "true"
endif
endif

# disables credentials check by artifactregistry-maven-wagon plugin when run inside GCE VM.
export NO_GCE_CHECK := true

MVN=mvn

.PHONY: all test
Expand Down
24 changes: 23 additions & 1 deletion automation/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,19 @@
<repositories>
<repository>
<id>test-dependencies</id>
<url>https://repo.pivotal.io/artifactory/gpdb-ud/</url>
<url>artifactregistry://us-central1-maven.pkg.dev/data-gpdb-ud/pxf-automation-test</url>
</repository>
</repositories>

<build>
<extensions>
<extension>
<groupId>com.google.cloud.artifactregistry</groupId>
<artifactId>artifactregistry-maven-wagon</artifactId>
<version>2.1.4</version>
</extension>
</extensions>

<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
Expand Down Expand Up @@ -151,12 +159,26 @@
<groupId>org.jsystemtest</groupId>
<artifactId>jsystemCore</artifactId>
<version>6.0.01</version>
<exclusions>
<!-- javax.comm provides applications access to RS-232 hardware - not needed -->
<exclusion>
<groupId>javax.comm</groupId>
<artifactId>comm</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.jsystemtest.systemobjects</groupId>
<artifactId>cli</artifactId>
<version>6.0.01</version>
<exclusions>
<!-- javax.comm provides applications access to RS-232 hardware - not needed -->
<exclusion>
<groupId>javax.comm</groupId>
<artifactId>comm</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,44 @@

public class HiveVectorizedOrcTest extends HiveBaseTest {

static final String[] HIVE_TYPES_NO_TIMESTAMP_COLS = {
"t1 STRING",
"t2 STRING",
"num1 INT",
"dub1 DOUBLE",
"dec1 DECIMAL(38,18)",
"r FLOAT",
"bg BIGINT",
"b BOOLEAN",
"tn TINYINT",
"sml SMALLINT",
"dt DATE",
"vc1 VARCHAR(5)",
"c1 CHAR(3)",
"bin BINARY"
};

static final String[] PXF_HIVE_TYPES_NO_TIMESTAMP_COLS = {
"t1 TEXT",
"t2 TEXT",
"num1 INTEGER",
"dub1 DOUBLE PRECISION",
"dec1 NUMERIC",
"r REAL",
"bg BIGINT",
"b BOOLEAN",
"tn SMALLINT",
"sml SMALLINT",
"dt DATE",
"vc1 VARCHAR(5)",
"c1 CHAR(3)",
"bin BYTEA"
};

private HiveTable hiveRepeatingCsvTable;
private HiveTable hiveRepeatingNoNullsOrcTable;
private HiveTable hiveRepeatingNullsOrcTable;

ArrayList<String> hiveTypesNoTMCols = new ArrayList<>(Arrays.asList(HIVE_TYPES_COLS));
ArrayList<String> gpdbTypesNoTMCols = new ArrayList<>(Arrays.asList(PXF_HIVE_TYPES_COLS));

Expand Down Expand Up @@ -44,6 +82,26 @@ private void preparePxfHiveOrcTypes() throws Exception {
createTable(exTable);
}

private void prepareOrcDataWithRepeatingData() throws Exception {
String dataFileName = "hive_types_all_columns_repeating.txt";
// timestamp conversion is not supported by HiveORCVectorizedResolver

hiveRepeatingCsvTable = prepareTableData(hdfs, hive, hiveRepeatingCsvTable, "hive_types_all_columns_repeating_csv", HIVE_TYPES_NO_TIMESTAMP_COLS, "hive_types_all_columns_repeating.txt");

hiveRepeatingNoNullsOrcTable = new HiveTable("hive_types_all_columns_repeating_no_nulls_orc", HIVE_TYPES_NO_TIMESTAMP_COLS);
hiveRepeatingNoNullsOrcTable.setStoredAs(ORC);
hive.createTableAndVerify(hiveRepeatingNoNullsOrcTable);
hive.insertData(hiveRepeatingCsvTable, hiveRepeatingNoNullsOrcTable);

hiveRepeatingCsvTable = prepareTableData(hdfs, hive, null, "hive_types_all_columns_repeating_csv", HIVE_TYPES_NO_TIMESTAMP_COLS, "hive_types_all_columns_repeating_nulls.txt");

hiveRepeatingNullsOrcTable = new HiveTable("hive_types_all_columns_repeating_nulls_orc", HIVE_TYPES_NO_TIMESTAMP_COLS);
hiveRepeatingNullsOrcTable.setStoredAs(ORC);
hive.createTableAndVerify(hiveRepeatingNullsOrcTable);
hive.insertData(hiveRepeatingCsvTable, hiveRepeatingNullsOrcTable);

}

@Override
void prepareData() throws Exception {

Expand Down Expand Up @@ -77,4 +135,27 @@ public void storeAsOrcAllTypes() throws Exception {
runTincTest("pxf.features.hive.orc_primitive_types_no_timestamp.runTest");
}

@Test(groups = { "hive", "features", "gpdb", "security" })
public void columsnWithRepeating() throws Exception {
prepareOrcDataWithRepeatingData();

exTable = TableFactory.getPxfHiveVectorizedOrcReadableTable("pxf_hivevectorizedorc_repeating_no_nulls", PXF_HIVE_TYPES_NO_TIMESTAMP_COLS, hiveRepeatingNoNullsOrcTable, true);
createTable(exTable);

exTable = TableFactory.getPxfHiveVectorizedOrcReadableTable("pxf_hivevectorizedorc_repeating_nulls", PXF_HIVE_TYPES_NO_TIMESTAMP_COLS, hiveRepeatingNullsOrcTable, true);
createTable(exTable);

exTable = TableFactory.getPxfHiveVectorizedOrcReadableTable("pxf_hive_orc_vectorize_repeating_no_nulls", PXF_HIVE_TYPES_NO_TIMESTAMP_COLS, hiveRepeatingNoNullsOrcTable, true);
exTable.setProfile("hive:orc");
exTable.setUserParameters(new String[] { "VECTORIZE=true" });
createTable(exTable);

exTable = TableFactory.getPxfHiveVectorizedOrcReadableTable("pxf_hive_orc_vectorize_repeating_nulls", PXF_HIVE_TYPES_NO_TIMESTAMP_COLS, hiveRepeatingNullsOrcTable, true);
exTable.setProfile("hive:orc");
exTable.setUserParameters(new String[] { "VECTORIZE=true" });
createTable(exTable);

runTincTest("pxf.features.hive.orc_repeating.runTest");
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
col1,col2,1,6,1.23456,7.7,23456789,false,1,10,2015-03-06,abcd,abc,1
col1,col2,1,6,1.23456,7.7,23456789,false,1,10,2015-03-06,abcd,abc,1
col1,col2,1,6,1.23456,7.7,23456789,false,1,10,2015-03-06,abcd,abc,1
col1,col2,1,6,1.23456,7.7,23456789,false,1,10,2015-03-06,abcd,abc,1
col1,col2,1,6,1.23456,7.7,23456789,false,1,10,2015-03-06,abcd,abc,1
col1,col2,1,6,1.23456,7.7,23456789,false,1,10,2015-03-06,abcd,abc,1
col1,col2,1,6,1.23456,7.7,23456789,false,1,10,2015-03-06,abcd,abc,1
col1,col2,1,6,1.23456,7.7,23456789,false,1,10,2015-03-06,abcd,abc,1
col1,col2,1,6,1.23456,7.7,23456789,false,1,10,2015-03-06,abcd,abc,1
col1,col2,1,6,1.23456,7.7,23456789,false,1,10,2015-03-06,abcd,abc,1
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
-- start_ignore
-- end_ignore
-- @description query01 for PXF Hive ORC vectorized with repeating data cases
\pset null 'NIL'
Null display is "NIL".
\d pxf_hivevectorizedorc_repeating_no_nulls
External table "public.pxf_hivevectorizedorc_repeating_no_nulls"
Column | Type | Modifiers
--------+----------------------+-----------
t1 | text |
t2 | text |
num1 | integer |
dub1 | double precision |
dec1 | numeric |
r | real |
bg | bigint |
b | boolean |
tn | smallint |
sml | smallint |
dt | date |
vc1 | character varying(5) |
c1 | character(3) |
bin | bytea |
Type: readable
Encoding: UTF8
Format type: custom
Format options: formatter 'pxfwritable_import'
External options: {}
External location: pxf://hive_types_all_columns_repeating_no_nulls_orc?PROFILE=HiveVectorizedORC
Execute on: all segments

SELECT * FROM pxf_hivevectorizedorc_repeating_no_nulls ORDER BY t1;
t1 | t2 | num1 | dub1 | dec1 | r | bg | b | tn | sml | dt | vc1 | c1 | bin
------+------+------+------+---------+-----+----------+---+----+-----+------------+------+-----+-----
col1 | col2 | 1 | 6 | 1.23456 | 7.7 | 23456789 | f | 1 | 10 | 2015-03-06 | abcd | abc | 1
col1 | col2 | 1 | 6 | 1.23456 | 7.7 | 23456789 | f | 1 | 10 | 2015-03-06 | abcd | abc | 1
col1 | col2 | 1 | 6 | 1.23456 | 7.7 | 23456789 | f | 1 | 10 | 2015-03-06 | abcd | abc | 1
col1 | col2 | 1 | 6 | 1.23456 | 7.7 | 23456789 | f | 1 | 10 | 2015-03-06 | abcd | abc | 1
col1 | col2 | 1 | 6 | 1.23456 | 7.7 | 23456789 | f | 1 | 10 | 2015-03-06 | abcd | abc | 1
col1 | col2 | 1 | 6 | 1.23456 | 7.7 | 23456789 | f | 1 | 10 | 2015-03-06 | abcd | abc | 1
col1 | col2 | 1 | 6 | 1.23456 | 7.7 | 23456789 | f | 1 | 10 | 2015-03-06 | abcd | abc | 1
col1 | col2 | 1 | 6 | 1.23456 | 7.7 | 23456789 | f | 1 | 10 | 2015-03-06 | abcd | abc | 1
col1 | col2 | 1 | 6 | 1.23456 | 7.7 | 23456789 | f | 1 | 10 | 2015-03-06 | abcd | abc | 1
col1 | col2 | 1 | 6 | 1.23456 | 7.7 | 23456789 | f | 1 | 10 | 2015-03-06 | abcd | abc | 1
(10 rows)

\d pxf_hivevectorizedorc_repeating_nulls
External table "public.pxf_hivevectorizedorc_repeating_nulls"
Column | Type | Modifiers
--------+----------------------+-----------
t1 | text |
t2 | text |
num1 | integer |
dub1 | double precision |
dec1 | numeric |
r | real |
bg | bigint |
b | boolean |
tn | smallint |
sml | smallint |
dt | date |
vc1 | character varying(5) |
c1 | character(3) |
bin | bytea |
Type: readable
Encoding: UTF8
Format type: custom
Format options: formatter 'pxfwritable_import'
External options: {}
External location: pxf://hive_types_all_columns_repeating_nulls_orc?PROFILE=HiveVectorizedORC
Execute on: all segments

SELECT * FROM pxf_hivevectorizedorc_repeating_nulls ORDER BY t1;
t1 | t2 | num1 | dub1 | dec1 | r | bg | b | tn | sml | dt | vc1 | c1 | bin
-----+-----+------+------+------+-----+-----+-----+-----+-----+-----+-----+-----+-----
NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL
NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL
NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL
NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL
NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL
NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL
NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL
NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL
NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL
NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL
(10 rows)
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
-- start_ignore
-- end_ignore
-- @description query01 for PXF Hive ORC vectorized with repeating data cases
\pset null 'NIL'
Null display is "NIL".
\d pxf_hive_orc_vectorize_repeating_no_nulls
External table "public.pxf_hive_orc_vectorize_repeating_no_nulls"
Column | Type | Modifiers
--------+----------------------+-----------
t1 | text |
t2 | text |
num1 | integer |
dub1 | double precision |
dec1 | numeric |
r | real |
bg | bigint |
b | boolean |
tn | smallint |
sml | smallint |
dt | date |
vc1 | character varying(5) |
c1 | character(3) |
bin | bytea |
Type: readable
Encoding: UTF8
Format type: custom
Format options: formatter 'pxfwritable_import'
External options: {}
External location: pxf://hive_types_all_columns_repeating_no_nulls_orc?PROFILE=hive:orc&VECTORIZE=true
Execute on: all segments

SELECT * FROM pxf_hive_orc_vectorize_repeating_no_nulls ORDER BY t1;
t1 | t2 | num1 | dub1 | dec1 | r | bg | b | tn | sml | dt | vc1 | c1 | bin
------+------+------+------+---------+-----+----------+---+----+-----+------------+------+-----+-----
col1 | col2 | 1 | 6 | 1.23456 | 7.7 | 23456789 | f | 1 | 10 | 2015-03-06 | abcd | abc | 1
col1 | col2 | 1 | 6 | 1.23456 | 7.7 | 23456789 | f | 1 | 10 | 2015-03-06 | abcd | abc | 1
col1 | col2 | 1 | 6 | 1.23456 | 7.7 | 23456789 | f | 1 | 10 | 2015-03-06 | abcd | abc | 1
col1 | col2 | 1 | 6 | 1.23456 | 7.7 | 23456789 | f | 1 | 10 | 2015-03-06 | abcd | abc | 1
col1 | col2 | 1 | 6 | 1.23456 | 7.7 | 23456789 | f | 1 | 10 | 2015-03-06 | abcd | abc | 1
col1 | col2 | 1 | 6 | 1.23456 | 7.7 | 23456789 | f | 1 | 10 | 2015-03-06 | abcd | abc | 1
col1 | col2 | 1 | 6 | 1.23456 | 7.7 | 23456789 | f | 1 | 10 | 2015-03-06 | abcd | abc | 1
col1 | col2 | 1 | 6 | 1.23456 | 7.7 | 23456789 | f | 1 | 10 | 2015-03-06 | abcd | abc | 1
col1 | col2 | 1 | 6 | 1.23456 | 7.7 | 23456789 | f | 1 | 10 | 2015-03-06 | abcd | abc | 1
col1 | col2 | 1 | 6 | 1.23456 | 7.7 | 23456789 | f | 1 | 10 | 2015-03-06 | abcd | abc | 1
(10 rows)

\d pxf_hive_orc_vectorize_repeating_nulls
External table "public.pxf_hive_orc_vectorize_repeating_nulls"
Column | Type | Modifiers
--------+----------------------+-----------
t1 | text |
t2 | text |
num1 | integer |
dub1 | double precision |
dec1 | numeric |
r | real |
bg | bigint |
b | boolean |
tn | smallint |
sml | smallint |
dt | date |
vc1 | character varying(5) |
c1 | character(3) |
bin | bytea |
Type: readable
Encoding: UTF8
Format type: custom
Format options: formatter 'pxfwritable_import'
External options: {}
External location: pxf://hive_types_all_columns_repeating_nulls_orc?PROFILE=hive:orc&VECTORIZE=true
Execute on: all segments

SELECT * FROM pxf_hive_orc_vectorize_repeating_nulls ORDER BY t1;
t1 | t2 | num1 | dub1 | dec1 | r | bg | b | tn | sml | dt | vc1 | c1 | bin
-----+-----+------+------+------+-----+-----+-----+-----+-----+-----+-----+-----+-----
NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL
NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL
NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL
NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL
NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL
NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL
NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL
NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL
NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL
NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL | NIL
(10 rows)
Loading

0 comments on commit d6eb6bc

Please sign in to comment.