Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Return statistics for faker tables #24863

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions plugin/trino-faker/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,12 @@
<scope>test</scope>
</dependency>

<dependency>
<groupId>io.trino</groupId>
<artifactId>trino-exchange-filesystem</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>io.trino</groupId>
<artifactId>trino-main</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@

package io.trino.plugin.faker;

import com.google.common.collect.ImmutableList;
import io.airlift.units.Duration;
import io.trino.spi.TrinoException;
import io.trino.spi.connector.ColumnHandle;
import io.trino.spi.connector.ColumnMetadata;
Expand All @@ -32,17 +30,15 @@
import io.trino.spi.type.VarcharType;

import java.util.Collection;
import java.util.List;
import java.util.concurrent.TimeUnit;

import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.trino.plugin.faker.ColumnInfo.ALLOWED_VALUES_PROPERTY;
import static io.trino.plugin.faker.ColumnInfo.GENERATOR_PROPERTY;
import static io.trino.plugin.faker.ColumnInfo.MAX_PROPERTY;
import static io.trino.plugin.faker.ColumnInfo.MIN_PROPERTY;
import static io.trino.plugin.faker.ColumnInfo.NULL_PROBABILITY_PROPERTY;
import static io.trino.plugin.faker.ColumnInfo.STEP_PROPERTY;
import static io.trino.plugin.faker.PropertyValues.propertyValue;
import static io.trino.spi.StandardErrorCode.INVALID_COLUMN_PROPERTY;
import static io.trino.spi.type.BigintType.BIGINT;
import static io.trino.spi.type.DateType.DATE;
Expand Down Expand Up @@ -86,20 +82,13 @@ public static FakerColumnHandle of(int columnId, ColumnMetadata column, double d
}
domain = Domain.create(ValueSet.ofRanges(range(column.getType(), min, max)), false);
}
if (column.getProperties().containsKey(ALLOWED_VALUES_PROPERTY)) {
Object allowedValues = propertyValue(column, ALLOWED_VALUES_PROPERTY);

if (allowedValues != null) {
if (min != null || max != null || generator != null) {
throw new TrinoException(INVALID_COLUMN_PROPERTY, "The `%s` property cannot be set together with `%s`, `%s`, and `%s` properties".formatted(ALLOWED_VALUES_PROPERTY, MIN_PROPERTY, MAX_PROPERTY, GENERATOR_PROPERTY));
}
ImmutableList.Builder<Object> builder = ImmutableList.builder();
for (String value : strings((List<?>) column.getProperties().get(ALLOWED_VALUES_PROPERTY))) {
try {
builder.add(Literal.parse(value, column.getType()));
}
catch (IllegalArgumentException | ClassCastException e) {
throw new TrinoException(INVALID_COLUMN_PROPERTY, "The `%s` property must only contain valid %s literals, failed to parse `%s`".formatted(ALLOWED_VALUES_PROPERTY, column.getType().getDisplayName(), value), e);
}
}
domain = Domain.create(ValueSet.copyOf(column.getType(), builder.build()), false);
domain = Domain.create(ValueSet.copyOf(column.getType(), (Collection<?>) allowedValues), false);
}

return new FakerColumnHandle(
Expand All @@ -117,40 +106,21 @@ private static boolean isCharacterColumn(ColumnMetadata column)
return column.getType() instanceof CharType || column.getType() instanceof VarcharType || column.getType() instanceof VarbinaryType;
}

private static Object propertyValue(ColumnMetadata column, String property)
{
try {
return Literal.parse((String) column.getProperties().get(property), column.getType());
}
catch (IllegalArgumentException e) {
throw new TrinoException(INVALID_COLUMN_PROPERTY, "The `%s` property must be a valid %s literal".formatted(property, column.getType().getDisplayName()), e);
}
}

private static ValueSet stepValue(ColumnMetadata column)
{
Type type = column.getType();
String rawStep = (String) column.getProperties().get(STEP_PROPERTY);
if (rawStep == null) {
Object step = propertyValue(column, STEP_PROPERTY);
if (step == null) {
return ValueSet.none(type);
}
if (isCharacterColumn(column)) {
throw new TrinoException(INVALID_COLUMN_PROPERTY, "The `%s` property cannot be set for CHAR, VARCHAR or VARBINARY columns".formatted(STEP_PROPERTY));
}
if (DATE.equals(column.getType()) || type instanceof TimestampType || type instanceof TimestampWithTimeZoneType || type instanceof TimeType || type instanceof TimeWithTimeZoneType) {
try {
return ValueSet.of(BIGINT, Duration.valueOf(rawStep).roundTo(TimeUnit.NANOSECONDS));
}
catch (IllegalArgumentException e) {
throw new TrinoException(INVALID_COLUMN_PROPERTY, "The `%s` property for a %s column must be a valid duration literal".formatted(STEP_PROPERTY, column.getType().getDisplayName()), e);
}
}
try {
return ValueSet.of(type, Literal.parse(rawStep, type));
}
catch (IllegalArgumentException e) {
throw new TrinoException(INVALID_COLUMN_PROPERTY, "The `%s` property for a %s column must be a valid %s literal".formatted(STEP_PROPERTY, column.getType().getDisplayName(), type.getDisplayName()), e);
Type stepType = type;
if (DATE.equals(type) || type instanceof TimestampType || type instanceof TimestampWithTimeZoneType || type instanceof TimeType || type instanceof TimeWithTimeZoneType) {
stepType = BIGINT;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we only support long steps, maybe no need to check the types here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it was failing without that

}
return ValueSet.of(stepType, step);
}

private static Range range(Type type, Object min, Object max)
Expand All @@ -168,13 +138,6 @@ private static Range range(Type type, Object min, Object max)
return Range.range(type, min, true, max, true);
}

private static List<String> strings(Collection<?> values)
{
return values.stream()
.map(String.class::cast)
.collect(toImmutableList());
}

public FakerColumnHandle withNullProbability(double nullProbability)
{
return new FakerColumnHandle(columnIndex, name, type, nullProbability, generator, domain, step);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,10 @@
import io.trino.spi.predicate.ValueSet;
import io.trino.spi.security.TrinoPrincipal;
import io.trino.spi.statistics.ColumnStatisticMetadata;
import io.trino.spi.statistics.ColumnStatistics;
import io.trino.spi.statistics.ComputedStatistics;
import io.trino.spi.statistics.Estimate;
import io.trino.spi.statistics.TableStatistics;
import io.trino.spi.statistics.TableStatisticsMetadata;
import io.trino.spi.type.CharType;
import io.trino.spi.type.Type;
Expand Down Expand Up @@ -759,4 +762,41 @@ public FunctionDependencyDeclaration getFunctionDependencies(ConnectorSession se
{
return FunctionDependencyDeclaration.NO_DEPENDENCIES;
}

@Override
public synchronized TableStatistics getTableStatistics(ConnectorSession session, ConnectorTableHandle tableHandle)
{
FakerTableHandle fakerTableHandle = (FakerTableHandle) tableHandle;
TableInfo info = tables.get(fakerTableHandle.schemaTableName());

TableStatistics.Builder tableStatisitics = TableStatistics.builder();
tableStatisitics.setRowCount(Estimate.of(fakerTableHandle.limit()));

info.columns().forEach(columnInfo -> {
Object min = PropertyValues.propertyValue(columnInfo.metadata(), MIN_PROPERTY);
Object max = PropertyValues.propertyValue(columnInfo.metadata(), MAX_PROPERTY);
Object step = PropertyValues.propertyValue(columnInfo.metadata(), STEP_PROPERTY);
Collection<?> allowedValues = (Collection<?>) columnInfo.metadata().getProperties().get(ALLOWED_VALUES_PROPERTY); // skip parsing as we don't need the values

checkState(allowedValues == null || (min == null && max == null), "The `%s` property cannot be set together with `%s` and `%s` properties".formatted(ALLOWED_VALUES_PROPERTY, MIN_PROPERTY, MAX_PROPERTY));

ColumnStatistics.Builder columnStatistics = ColumnStatistics.builder();
if (allowedValues != null) {
columnStatistics.setDistinctValuesCount(Estimate.of(allowedValues.size()));
}
else {
Type type = columnInfo.metadata().getType();
if (min != null && max != null && type.getJavaType() == long.class) {
long distinctValuesCount = (long) max - (long) min;
if (step != null) {
distinctValuesCount = distinctValuesCount / (long) step;
}
columnStatistics.setDistinctValuesCount(Estimate.of(distinctValuesCount));
}
}
columnStatistics.setNullsFraction(Estimate.of(columnInfo.handle().nullProbability()));
tableStatisitics.setColumnStatistics(columnInfo.handle(), columnStatistics.build());
});
return tableStatisitics.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,23 @@
import io.trino.spi.connector.ConnectorSplit;

import static com.google.common.base.Preconditions.checkArgument;
import static io.airlift.slice.SizeOf.instanceSize;

public record FakerSplit(long splitNumber, long rowsOffset, long rowsCount)
implements ConnectorSplit
{
private static final int INSTANCE_SIZE = instanceSize(FakerSplit.class);

public FakerSplit
{
checkArgument(splitNumber >= 0, "splitNumber is negative");
checkArgument(rowsOffset >= 0, "rowsOffset is negative");
checkArgument(rowsCount >= 0, "rowsCount is negative");
}

@Override
public long getRetainedSizeInBytes()
{
return INSTANCE_SIZE;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.faker;

import io.airlift.units.Duration;
import io.trino.spi.TrinoException;
import io.trino.spi.connector.ColumnMetadata;
import io.trino.spi.type.TimeType;
import io.trino.spi.type.TimeWithTimeZoneType;
import io.trino.spi.type.TimestampType;
import io.trino.spi.type.TimestampWithTimeZoneType;
import io.trino.spi.type.Type;

import java.util.Collection;
import java.util.List;
import java.util.concurrent.TimeUnit;

import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.trino.spi.StandardErrorCode.INVALID_COLUMN_PROPERTY;
import static io.trino.spi.type.DateType.DATE;

public class PropertyValues
{
private PropertyValues() {}

public static Object propertyValue(ColumnMetadata column, String property)
{
Object propertyValue = column.getProperties().get(property);
if (propertyValue == null) {
return null;
}

if (propertyValue instanceof Collection<?> propertyValues) {
return propertyValues.stream()
.map(String.class::cast)
.map(value -> {
try {
return Literal.parse(value, column.getType());
}
catch (IllegalArgumentException | ClassCastException e) {
throw new TrinoException(INVALID_COLUMN_PROPERTY, "The `%s` property must only contain valid %s literals, failed to parse `%s`".formatted(property, column.getType().getDisplayName(), value), e);
}
})
.collect(toImmutableList());
}

if (property.equals(ColumnInfo.STEP_PROPERTY)) {
Type type = column.getType();
if (DATE.equals(type) || type instanceof TimestampType || type instanceof TimestampWithTimeZoneType || type instanceof TimeType || type instanceof TimeWithTimeZoneType) {
try {
return Duration.valueOf((String) propertyValue).roundTo(TimeUnit.NANOSECONDS);
}
catch (IllegalArgumentException e) {
throw new TrinoException(INVALID_COLUMN_PROPERTY, "The `%s` property for a %s column must be a valid duration literal".formatted(property, type.getDisplayName()), e);
}
}
}

try {
return Literal.parse((String) propertyValue, column.getType());
}
catch (IllegalArgumentException e) {
throw new TrinoException(INVALID_COLUMN_PROPERTY, "The `%s` property must be a valid %s literal".formatted(property, column.getType().getDisplayName()), e);
}
}

private static List<String> strings(Collection<?> values)
{
return values.stream()
.map(String.class::cast)
.collect(toImmutableList());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,17 @@
import io.airlift.log.Level;
import io.airlift.log.Logger;
import io.airlift.log.Logging;
import io.trino.plugin.exchange.filesystem.FileSystemExchangePlugin;
import io.trino.plugin.tpch.TpchPlugin;
import io.trino.testing.DistributedQueryRunner;
import io.trino.testing.QueryRunner;

import java.io.File;
import java.util.Map;

import static io.airlift.testing.Closeables.closeAllSuppress;
import static io.trino.testing.TestingSession.testSessionBuilder;
import static java.nio.file.Files.createTempDirectory;
import static java.util.Objects.requireNonNullElse;

public class FakerQueryRunner
Expand Down Expand Up @@ -95,4 +98,37 @@ public static void main(String[] args)
log.info("======== SERVER STARTED ========");
log.info("\n====\n%s\n====", queryRunner.getCoordinator().getBaseUrl());
}

public static final class FakerQueryRunnerWithTaskRetries
{
private FakerQueryRunnerWithTaskRetries() {}

public static void main(String[] args)
throws Exception
{
Logger log = Logger.get(FakerQueryRunnerWithTaskRetries.class);

File exchangeManagerDirectory = createTempDirectory("exchange_manager").toFile();
Map<String, String> exchangeManagerProperties = ImmutableMap.<String, String>builder()
.put("exchange.base-directories", exchangeManagerDirectory.getAbsolutePath())
.buildOrThrow();
exchangeManagerDirectory.deleteOnExit();

@SuppressWarnings("resource")
QueryRunner queryRunner = builder()
.setExtraProperties(ImmutableMap.<String, String>builder()
.put("http-server.http.port", requireNonNullElse(System.getenv("TRINO_PORT"), "8080"))
.put("retry-policy", "TASK")
.put("fault-tolerant-execution-task-memory", "1GB")
.buildOrThrow())
.setAdditionalSetup(runner -> {
runner.installPlugin(new FileSystemExchangePlugin());
runner.loadExchangeManager("filesystem", exchangeManagerProperties);
})
.build();

log.info("======== SERVER STARTED ========");
log.info("\n====\n%s\n====", queryRunner.getCoordinator().getBaseUrl());
}
}
}