Skip to content

Commit

Permalink
Merge pull request #16 from ClickHouse/speed-up
Browse files Browse the repository at this point in the history
Speed up data loading
  • Loading branch information
alexey-milovidov authored Apr 27, 2024
2 parents 9bda758 + b0fdf4b commit 2e8fca4
Showing 1 changed file with 135 additions and 129 deletions.
264 changes: 135 additions & 129 deletions process-file.sh
Original file line number Diff line number Diff line change
@@ -1,133 +1,139 @@
#!/usr/bin/bash

INPUT=$1
TMPFILE=/tmp/$$
#SOURCE='adsb.lol'

gzip -cd "$INPUT" | sed '/^"trace"/Q' | tr -d '\n' | sed -r -e 's/,$/}\n/' > ${TMPFILE}.meta.jsonl
gzip -cd "$INPUT" | grep '^\[' | sed -r -e 's/ ]$/,/' > ${TMPFILE}.data.jsonl

clickhouse-local --query "
INSERT INTO FUNCTION remoteSecure('kvzqttvc2n.eu-west-1.aws.clickhouse-staging.com', default.planes_mercator, 'default', '')
SELECT
CAST(timestamp + time_offset AS DateTime64(3)) AS time, time::Date AS date,
icao, r, t, dbFlags, noRegData, ownOp, year, desc,
lat, lon,
toInt32OrZero(altitude),
ground_speed,
track_degrees,
flags,
vertical_rate,
aircraft.alert,
aircraft.alt_geom,
aircraft.gva,
aircraft.nac_p,
aircraft.nac_v,
aircraft.nic,
aircraft.nic_baro,
aircraft.rc,
aircraft.sda,
aircraft.sil,
aircraft.sil_type,
aircraft.spi,
aircraft.track,
aircraft.type,
aircraft.version,
aircraft.category,
aircraft.emergency,
trimRight(aircraft.flight),
aircraft.squawk,
aircraft.baro_rate,
aircraft.nav_altitude_fms,
aircraft.nav_altitude_mcp,
aircraft.nav_modes,
aircraft.nav_qnh,
aircraft.geom_rate,
aircraft.ias,
aircraft.mach,
aircraft.mag_heading,
aircraft.oat,
aircraft.roll,
aircraft.tas,
aircraft.tat,
aircraft.true_heading,
aircraft.wd,
aircraft.ws,
aircraft.track_rate,
aircraft.nav_heading,
source,
geometric_altitude,
geometric_vertical_rate,
indicated_airspeed,
roll_angle,
'${SOURCE}'
FROM
file('${TMPFILE}.data.jsonl', JSONCompactEachRow, '
time_offset Decimal64(3),
lat Float64,
lon Float64,
altitude String,
ground_speed Float32,
track_degrees Float32,
flags UInt32,
vertical_rate Int32,
aircraft Tuple(
alert Int64,
alt_geom Int64,
gva Int64,
nac_p Int64,
nac_v Int64,
nic Int64,
nic_baro Int64,
rc Int64,
sda Int64,
sil Int64,
sil_type String,
spi Int64,
track Float64,
type String,
version Int64,
category String,
emergency String,
flight String,
squawk String,
baro_rate Int64,
nav_altitude_fms Int64,
nav_altitude_mcp Int64,
nav_modes Array(String),
nav_qnh Float64,
geom_rate Int64,
ias Int64,
mach Float64,
mag_heading Float64,
oat Int64,
roll Float64,
tas Int64,
tat Int64,
true_heading Float64,
wd Int64,
ws Int64,
track_rate Float64,
nav_heading Float64
),
source LowCardinality(String),
geometric_altitude Int32,
geometric_vertical_rate Int32,
indicated_airspeed Int32,
roll_angle Float32
/* , hex String */
') AS d,
file('${TMPFILE}.meta.jsonl', JSONEachRow, '
icao String,
r String,
t String,
dbFlags Int32,
noRegData Bool,
ownOp String,
year UInt16,
timestamp Decimal64(3),
desc String
') AS m
clickhouse-local --time --engine_file_skip_empty_files 1 --optimize_trivial_insert_select 0 --query "
INSERT INTO FUNCTION remoteSecure('${CLICKHOUSE_PLANES_HOST}', '${TABLE}', '${CLICKHOUSE_PLANES_USER}', '${CLICKHOUSE_PLANES_PASSWORD}')
WITH arrayJoin(trace) AS elem,
elem.1 AS time_offset,
elem.2 AS lat,
elem.3 AS lon,
elem.4 AS altitude,
elem.5 AS ground_speed,
elem.6 AS track_degrees,
elem.7 AS flags,
elem.8 AS vertical_rate,
elem.9 AS aircraft,
elem.10 AS source,
elem.11 AS geometric_altitude,
elem.12 AS geometric_vertical_rate,
elem.13 AS indicated_airspeed,
elem.14 AS roll_angle
SELECT
CAST(timestamp + time_offset AS DateTime64(3)) AS time, time::Date AS date,
icao, r, t, dbFlags, noRegData, ownOp, year, desc,
lat, lon,
toInt32OrZero(altitude),
ground_speed,
track_degrees,
flags,
vertical_rate,
aircraft.alert,
aircraft.alt_geom,
aircraft.gva,
aircraft.nac_p,
aircraft.nac_v,
aircraft.nic,
aircraft.nic_baro,
aircraft.rc,
aircraft.sda,
aircraft.sil,
aircraft.sil_type,
aircraft.spi,
aircraft.track,
aircraft.type,
aircraft.version,
aircraft.category,
aircraft.emergency,
trimRight(aircraft.flight),
aircraft.squawk,
aircraft.baro_rate,
aircraft.nav_altitude_fms,
aircraft.nav_altitude_mcp,
aircraft.nav_modes,
aircraft.nav_qnh,
aircraft.geom_rate,
aircraft.ias,
aircraft.mach,
aircraft.mag_heading,
aircraft.oat,
aircraft.roll,
aircraft.tas,
aircraft.tat,
aircraft.true_heading,
aircraft.wd,
aircraft.ws,
aircraft.track_rate,
aircraft.nav_heading,
source,
geometric_altitude,
geometric_vertical_rate,
indicated_airspeed,
roll_angle,
'${SOURCE}'
FROM file('$1', JSONLines, '
icao String,
r String,
t String,
dbFlags Int32,
noRegData Bool,
ownOp String,
year UInt16,
timestamp Decimal64(3),
desc String,
trace Array(Tuple(
Decimal64(3),
Float64,
Float64,
String,
Float32,
Float32,
UInt32,
Int32,
Tuple(
alert Int64,
alt_geom Int64,
gva Int64,
nac_p Int64,
nac_v Int64,
nic Int64,
nic_baro Int64,
rc Int64,
sda Int64,
sil Int64,
sil_type String,
spi Int64,
track Float64,
type String,
version Int64,
category String,
emergency String,
flight String,
squawk String,
baro_rate Int64,
nav_altitude_fms Int64,
nav_altitude_mcp Int64,
nav_modes Array(String),
nav_qnh Float64,
geom_rate Int64,
ias Int64,
mach Float64,
mag_heading Float64,
oat Int64,
roll Float64,
tas Int64,
tat Int64,
true_heading Float64,
wd Int64,
ws Int64,
track_rate Float64,
nav_heading Float64
),
LowCardinality(String),
Int32,
Int32,
Int32,
Float32
))
', 'gz')
" || exit 1

rm ${TMPFILE}.{meta,data}.jsonl
echo -n '.'

0 comments on commit 2e8fca4

Please sign in to comment.