Skip to content

Commit

Permalink
Implement smoke test for vector cql data type support
Browse files Browse the repository at this point in the history
  • Loading branch information
msmygit committed Sep 20, 2023
1 parent 93dce93 commit ec1c449
Show file tree
Hide file tree
Showing 21 changed files with 130 additions and 144 deletions.
32 changes: 0 additions & 32 deletions .classpath

This file was deleted.

3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ dependency-reduced-pom.xml
cassandra-data-migrator.iml
SIT/local
*.DS_Store
.classpath
.project
.settings/*
23 changes: 0 additions & 23 deletions .project

This file was deleted.

5 changes: 0 additions & 5 deletions .settings/org.eclipse.core.resources.prefs

This file was deleted.

8 changes: 0 additions & 8 deletions .settings/org.eclipse.jdt.core.prefs

This file was deleted.

4 changes: 0 additions & 4 deletions .settings/org.eclipse.m2e.core.prefs

This file was deleted.

8 changes: 4 additions & 4 deletions SIT/cdm-assert.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,17 @@ assertCmd="egrep 'JobSession.* Final ' \${OUTPUT_FILE} | sed 's/^.*Final //'"
_usage() {
cat <<EOF
usage: $0 -f output_file -a assertFile [-d directory]
usage: $0 -f output_file -a assert_file [-d directory]
Required
-f output_file : a file with list of scenarios, same format as cdm.sh
-a assertFile : a file with the assertions
-a assert_file : a file with the assertions
Optional
-d directory : directory in which output_file and assertFile may be found
==================
assertFile Format
assert_file Format
==================
Expected to contain the "Final" job session summary information, generated similar to
${assertCmd}
Expand Down Expand Up @@ -66,7 +66,7 @@ else
fi

if [[ -z "$ASSERT_FILENAME" ]]; then
echo "missing -a assertFile"
echo "missing -a assert_file"
argErrors=1
else
if [[ -z "${CONFIG_DIR}" ]]; then
Expand Down
2 changes: 1 addition & 1 deletion SIT/smoke/04_counters/breakData.cql
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ DELETE FROM target.smoke_counters where key='record3';
UPDATE origin.smoke_counters SET col2=col2+1 WHERE key='record4';
UPDATE target.smoke_counters SET col2=col2+1 WHERE key='record4';

-- change by different amounts, they shouuld be brought into alignment with
-- change by different amounts, they should be brought into alignment with
-- origin, even if that means going down
UPDATE origin.smoke_counters SET col2=col2+1 WHERE key='record5';
UPDATE target.smoke_counters SET col2=col2+2 WHERE key='record5';
Expand Down
4 changes: 4 additions & 0 deletions SIT/smoke_inflight/06_vector/cdm.migrateData.assert
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Read Record Count: 2
Skipped Record Count: 0
Write Record Count: 2
Error Record Count: 0
2 changes: 2 additions & 0 deletions SIT/smoke_inflight/06_vector/cdm.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
migrateData com.datastax.cdm.job.Migrate /smoke/06_vector/migrate.properties
validateData com.datastax.cdm.job.DiffData /smoke/06_vector/migrate.properties
7 changes: 7 additions & 0 deletions SIT/smoke_inflight/06_vector/cdm.validateData.assert
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Read Record Count: 2
Mismatch Record Count: 0
Corrected Mismatch Record Count: 0
Missing Record Count: 0
Corrected Missing Record Count: 0
Valid Record Count: 2
Skipped Record Count: 0
9 changes: 9 additions & 0 deletions SIT/smoke_inflight/06_vector/execute.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash -e

workingDir="$1"
cd "$workingDir"

for scenario in $(cat cdm.txt | awk '{print $1}'); do
/local/cdm.sh -f cdm.txt -s $scenario -d "$workingDir" > "cdm.$scenario.out" 2>cdm.$scenario.err
/local/cdm-assert.sh -f "cdm.$scenario.out" -a "cdm.$scenario.assert" -d "$workingDir"
done
1 change: 1 addition & 0 deletions SIT/smoke_inflight/06_vector/expected.cql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT * FROM target.smoke_vector;
7 changes: 7 additions & 0 deletions SIT/smoke_inflight/06_vector/expected.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@

col1 | embedding
------+--------------------
1 | [1.1, 2.2, 3.3]
2 | [4.4, -0.01, 0.99]

(2 rows)
10 changes: 10 additions & 0 deletions SIT/smoke_inflight/06_vector/migrate.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
spark.cdm.connect.origin.host cdm-sit-cass
spark.cdm.connect.target.host cdm-sit-cass

spark.cdm.schema.origin.keyspaceTable origin.smoke_vector
spark.cdm.schema.target.keyspaceTable target.smoke_vector
spark.cdm.perfops.numParts 1

spark.cdm.autocorrect.missing true
spark.cdm.autocorrect.mismatch true

7 changes: 7 additions & 0 deletions SIT/smoke_inflight/06_vector/setup.cql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
DROP TABLE IF EXISTS origin.smoke_vector;
CREATE TABLE origin.smoke_vector (col1 int PRIMARY KEY, embedding vector<float, 3>);
INSERT INTO origin.smoke_vector(col1, embedding) VALUES (1,[1.1,2.2,3.3]);
INSERT INTO origin.smoke_vector(col1, embedding) VALUES (2,[4.4,-0.01,0.99]);

DROP TABLE IF EXISTS target.smoke_vector;
CREATE TABLE target.smoke_vector (col1 int PRIMARY KEY, embedding vector<float, 3>);
130 changes: 65 additions & 65 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
<cassandra.version>5.0-alpha1</cassandra.version>
<junit.version>5.9.1</junit.version>
<mockito.version>4.11.0</mockito.version>
<java-driver.version>4.17.0</java-driver.version>
<java-driver.version>4.17.0</java-driver.version>
</properties>

<distributionManagement>
Expand Down Expand Up @@ -75,24 +75,24 @@
<groupId>com.datastax.spark</groupId>
<artifactId>spark-cassandra-connector_${scala.main.version}</artifactId>
<version>${connector.version}</version>
<exclusions>
<exclusion>
<groupId>com.datastax.oss</groupId>
<artifactId>java-driver-core-shaded</artifactId>
</exclusion>
<exclusion>
<groupId>com.datastax.oss</groupId>
<artifactId>java-driver-mapper-runtime</artifactId>
</exclusion>
<exclusion>
<groupId>com.datastax.oss</groupId>
<artifactId>java-driver-query-builder</artifactId>
</exclusion>
<exclusion>
<groupId>com.datastax.oss</groupId>
<artifactId>java-driver-shaded-guava</artifactId>
</exclusion>
</exclusions>
<exclusions>
<exclusion>
<groupId>com.datastax.oss</groupId>
<artifactId>java-driver-core-shaded</artifactId>
</exclusion>
<exclusion>
<groupId>com.datastax.oss</groupId>
<artifactId>java-driver-mapper-runtime</artifactId>
</exclusion>
<exclusion>
<groupId>com.datastax.oss</groupId>
<artifactId>java-driver-query-builder</artifactId>
</exclusion>
<exclusion>
<groupId>com.datastax.oss</groupId>
<artifactId>java-driver-shaded-guava</artifactId>
</exclusion>
</exclusions>
</dependency>

<!-- Adding Java Driver latest version explicitly -->
Expand Down Expand Up @@ -270,54 +270,54 @@

<!-- Integrate JaCoCo code coverage plugin from https://www.eclemma.org/jacoco/trunk/doc/index.html -->
<plugin>
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<version>0.8.10</version>
<executions>
<execution>
<goals>
<goal>prepare-agent</goal>
</goals>
</execution>
<execution>
<id>report</id>
<phase>prepare-package</phase>
<goals>
<goal>report</goal>
</goals>
</execution>
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<version>0.8.10</version>
<executions>
<execution>
<goals>
<goal>prepare-agent</goal>
</goals>
</execution>
<execution>
<id>report</id>
<phase>prepare-package</phase>
<goals>
<goal>report</goal>
</goals>
</execution>
<execution>
<id>jacoco-check</id>
<goals>
<goal>check</goal>
</goals>
<configuration>
<rules>
<rule>
<element>BUNDLE</element>
<limits>
<limit>
<id>jacoco-check</id>
<goals>
<goal>check</goal>
</goals>
<configuration>
<rules>
<rule>
<element>BUNDLE</element>
<limits>
<limit>
<counter>COMPLEXITY</counter>
<value>COVEREDRATIO</value>
<minimum>0.33</minimum>
</limit>
<limit>
<counter>INSTRUCTION</counter>
<value>COVEREDRATIO</value>
<minimum>42%</minimum>
</limit>
<limit>
<counter>LINE</counter>
<value>MISSEDCOUNT</value>
<maximum>1544</maximum>
</limit>
</limits>
</rule>
</rules>
</configuration>
</execution>
</executions>
</plugin>
<value>COVEREDRATIO</value>
<minimum>0.33</minimum>
</limit>
<limit>
<counter>INSTRUCTION</counter>
<value>COVEREDRATIO</value>
<minimum>42%</minimum>
</limit>
<limit>
<counter>LINE</counter>
<value>MISSEDCOUNT</value>
<maximum>1544</maximum>
</limit>
</limits>
</rule>
</rules>
</configuration>
</execution>
</executions>
</plugin>

</plugins>
</build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public ByteBuffer encode(Double value, @NotNull ProtocolVersion protocolVersion)
@Override
public Double decode(ByteBuffer bytes, @NotNull ProtocolVersion protocolVersion) {
String stringValue = TypeCodecs.TEXT.decode(bytes, protocolVersion);
return new Double(stringValue);
return Double.valueOf(stringValue);
}

@Override
Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/datastax/cdm/data/CqlData.java
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ public static String getFormattedContent(Type type, Object value) {
case UDT:
return ((UdtValue) value).getFormattedContents();
case LIST:
case VECTOR:
openBracket = "[";
closeBracket = "]";
break;
Expand Down
6 changes: 5 additions & 1 deletion src/main/java/com/datastax/cdm/job/DiffJobSession.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@

import com.datastax.cdm.cql.statement.OriginSelectByPartitionRangeStatement;
import com.datastax.cdm.cql.statement.TargetSelectByPKStatement;
import com.datastax.cdm.data.*;
import com.datastax.cdm.data.CqlData;
import com.datastax.cdm.data.DataUtility;
import com.datastax.cdm.data.EnhancedPK;
import com.datastax.cdm.data.PKFactory;
import com.datastax.cdm.data.Record;
import com.datastax.cdm.feature.ConstantColumns;
import com.datastax.cdm.feature.ExplodeMap;
import com.datastax.cdm.feature.Featureset;
Expand Down
3 changes: 3 additions & 0 deletions src/test/java/com/datastax/cdm/cql/CommonMocks.java
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
package com.datastax.cdm.cql;

import com.datastax.cdm.data.*;
import com.datastax.cdm.data.Record;
import com.datastax.cdm.feature.*;
import com.datastax.cdm.properties.IPropertyHelper;
import com.datastax.cdm.properties.KnownProperties;
import com.datastax.cdm.schema.CqlTable;
import com.datastax.oss.driver.api.core.ConsistencyLevel;
import com.datastax.oss.driver.api.core.CqlSession;
import com.datastax.oss.driver.api.core.cql.*;
import com.datastax.oss.driver.api.core.data.CqlVector;
import com.datastax.oss.driver.api.core.type.DataType;
import com.datastax.oss.driver.api.core.type.DataTypes;
import com.datastax.oss.driver.api.core.type.codec.TypeCodec;
Expand Down Expand Up @@ -492,6 +494,7 @@ public static Object getSampleData(DataType type) {
case LIST: return Arrays.asList("1","2","3");
case SET: return new HashSet(Arrays.asList("1","2","3"));
case MAP: return new HashMap<String,String>() {{put("1","one");put("2","two");put("3","three");}};
case VECTOR: return CqlVector.newInstance(1.1,2.2,3.3);
}
return "DataType "+type+" is not supported, so returning a String";
}
Expand Down

0 comments on commit ec1c449

Please sign in to comment.