-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #16 from ClickHouse/speed-up
Speed up data loading
- Loading branch information
Showing
1 changed file
with
135 additions
and
129 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,133 +1,139 @@ | ||
#!/usr/bin/bash | ||
|
||
INPUT=$1 | ||
TMPFILE=/tmp/$$ | ||
#SOURCE='adsb.lol' | ||
|
||
gzip -cd "$INPUT" | sed '/^"trace"/Q' | tr -d '\n' | sed -r -e 's/,$/}\n/' > ${TMPFILE}.meta.jsonl | ||
gzip -cd "$INPUT" | grep '^\[' | sed -r -e 's/ ]$/,/' > ${TMPFILE}.data.jsonl | ||
|
||
clickhouse-local --query " | ||
INSERT INTO FUNCTION remoteSecure('kvzqttvc2n.eu-west-1.aws.clickhouse-staging.com', default.planes_mercator, 'default', '') | ||
SELECT | ||
CAST(timestamp + time_offset AS DateTime64(3)) AS time, time::Date AS date, | ||
icao, r, t, dbFlags, noRegData, ownOp, year, desc, | ||
lat, lon, | ||
toInt32OrZero(altitude), | ||
ground_speed, | ||
track_degrees, | ||
flags, | ||
vertical_rate, | ||
aircraft.alert, | ||
aircraft.alt_geom, | ||
aircraft.gva, | ||
aircraft.nac_p, | ||
aircraft.nac_v, | ||
aircraft.nic, | ||
aircraft.nic_baro, | ||
aircraft.rc, | ||
aircraft.sda, | ||
aircraft.sil, | ||
aircraft.sil_type, | ||
aircraft.spi, | ||
aircraft.track, | ||
aircraft.type, | ||
aircraft.version, | ||
aircraft.category, | ||
aircraft.emergency, | ||
trimRight(aircraft.flight), | ||
aircraft.squawk, | ||
aircraft.baro_rate, | ||
aircraft.nav_altitude_fms, | ||
aircraft.nav_altitude_mcp, | ||
aircraft.nav_modes, | ||
aircraft.nav_qnh, | ||
aircraft.geom_rate, | ||
aircraft.ias, | ||
aircraft.mach, | ||
aircraft.mag_heading, | ||
aircraft.oat, | ||
aircraft.roll, | ||
aircraft.tas, | ||
aircraft.tat, | ||
aircraft.true_heading, | ||
aircraft.wd, | ||
aircraft.ws, | ||
aircraft.track_rate, | ||
aircraft.nav_heading, | ||
source, | ||
geometric_altitude, | ||
geometric_vertical_rate, | ||
indicated_airspeed, | ||
roll_angle, | ||
'${SOURCE}' | ||
FROM | ||
file('${TMPFILE}.data.jsonl', JSONCompactEachRow, ' | ||
time_offset Decimal64(3), | ||
lat Float64, | ||
lon Float64, | ||
altitude String, | ||
ground_speed Float32, | ||
track_degrees Float32, | ||
flags UInt32, | ||
vertical_rate Int32, | ||
aircraft Tuple( | ||
alert Int64, | ||
alt_geom Int64, | ||
gva Int64, | ||
nac_p Int64, | ||
nac_v Int64, | ||
nic Int64, | ||
nic_baro Int64, | ||
rc Int64, | ||
sda Int64, | ||
sil Int64, | ||
sil_type String, | ||
spi Int64, | ||
track Float64, | ||
type String, | ||
version Int64, | ||
category String, | ||
emergency String, | ||
flight String, | ||
squawk String, | ||
baro_rate Int64, | ||
nav_altitude_fms Int64, | ||
nav_altitude_mcp Int64, | ||
nav_modes Array(String), | ||
nav_qnh Float64, | ||
geom_rate Int64, | ||
ias Int64, | ||
mach Float64, | ||
mag_heading Float64, | ||
oat Int64, | ||
roll Float64, | ||
tas Int64, | ||
tat Int64, | ||
true_heading Float64, | ||
wd Int64, | ||
ws Int64, | ||
track_rate Float64, | ||
nav_heading Float64 | ||
), | ||
source LowCardinality(String), | ||
geometric_altitude Int32, | ||
geometric_vertical_rate Int32, | ||
indicated_airspeed Int32, | ||
roll_angle Float32 | ||
/* , hex String */ | ||
') AS d, | ||
file('${TMPFILE}.meta.jsonl', JSONEachRow, ' | ||
icao String, | ||
r String, | ||
t String, | ||
dbFlags Int32, | ||
noRegData Bool, | ||
ownOp String, | ||
year UInt16, | ||
timestamp Decimal64(3), | ||
desc String | ||
') AS m | ||
clickhouse-local --time --engine_file_skip_empty_files 1 --optimize_trivial_insert_select 0 --query " | ||
INSERT INTO FUNCTION remoteSecure('${CLICKHOUSE_PLANES_HOST}', '${TABLE}', '${CLICKHOUSE_PLANES_USER}', '${CLICKHOUSE_PLANES_PASSWORD}') | ||
WITH arrayJoin(trace) AS elem, | ||
elem.1 AS time_offset, | ||
elem.2 AS lat, | ||
elem.3 AS lon, | ||
elem.4 AS altitude, | ||
elem.5 AS ground_speed, | ||
elem.6 AS track_degrees, | ||
elem.7 AS flags, | ||
elem.8 AS vertical_rate, | ||
elem.9 AS aircraft, | ||
elem.10 AS source, | ||
elem.11 AS geometric_altitude, | ||
elem.12 AS geometric_vertical_rate, | ||
elem.13 AS indicated_airspeed, | ||
elem.14 AS roll_angle | ||
SELECT | ||
CAST(timestamp + time_offset AS DateTime64(3)) AS time, time::Date AS date, | ||
icao, r, t, dbFlags, noRegData, ownOp, year, desc, | ||
lat, lon, | ||
toInt32OrZero(altitude), | ||
ground_speed, | ||
track_degrees, | ||
flags, | ||
vertical_rate, | ||
aircraft.alert, | ||
aircraft.alt_geom, | ||
aircraft.gva, | ||
aircraft.nac_p, | ||
aircraft.nac_v, | ||
aircraft.nic, | ||
aircraft.nic_baro, | ||
aircraft.rc, | ||
aircraft.sda, | ||
aircraft.sil, | ||
aircraft.sil_type, | ||
aircraft.spi, | ||
aircraft.track, | ||
aircraft.type, | ||
aircraft.version, | ||
aircraft.category, | ||
aircraft.emergency, | ||
trimRight(aircraft.flight), | ||
aircraft.squawk, | ||
aircraft.baro_rate, | ||
aircraft.nav_altitude_fms, | ||
aircraft.nav_altitude_mcp, | ||
aircraft.nav_modes, | ||
aircraft.nav_qnh, | ||
aircraft.geom_rate, | ||
aircraft.ias, | ||
aircraft.mach, | ||
aircraft.mag_heading, | ||
aircraft.oat, | ||
aircraft.roll, | ||
aircraft.tas, | ||
aircraft.tat, | ||
aircraft.true_heading, | ||
aircraft.wd, | ||
aircraft.ws, | ||
aircraft.track_rate, | ||
aircraft.nav_heading, | ||
source, | ||
geometric_altitude, | ||
geometric_vertical_rate, | ||
indicated_airspeed, | ||
roll_angle, | ||
'${SOURCE}' | ||
FROM file('$1', JSONLines, ' | ||
icao String, | ||
r String, | ||
t String, | ||
dbFlags Int32, | ||
noRegData Bool, | ||
ownOp String, | ||
year UInt16, | ||
timestamp Decimal64(3), | ||
desc String, | ||
trace Array(Tuple( | ||
Decimal64(3), | ||
Float64, | ||
Float64, | ||
String, | ||
Float32, | ||
Float32, | ||
UInt32, | ||
Int32, | ||
Tuple( | ||
alert Int64, | ||
alt_geom Int64, | ||
gva Int64, | ||
nac_p Int64, | ||
nac_v Int64, | ||
nic Int64, | ||
nic_baro Int64, | ||
rc Int64, | ||
sda Int64, | ||
sil Int64, | ||
sil_type String, | ||
spi Int64, | ||
track Float64, | ||
type String, | ||
version Int64, | ||
category String, | ||
emergency String, | ||
flight String, | ||
squawk String, | ||
baro_rate Int64, | ||
nav_altitude_fms Int64, | ||
nav_altitude_mcp Int64, | ||
nav_modes Array(String), | ||
nav_qnh Float64, | ||
geom_rate Int64, | ||
ias Int64, | ||
mach Float64, | ||
mag_heading Float64, | ||
oat Int64, | ||
roll Float64, | ||
tas Int64, | ||
tat Int64, | ||
true_heading Float64, | ||
wd Int64, | ||
ws Int64, | ||
track_rate Float64, | ||
nav_heading Float64 | ||
), | ||
LowCardinality(String), | ||
Int32, | ||
Int32, | ||
Int32, | ||
Float32 | ||
)) | ||
', 'gz') | ||
" || exit 1 | ||
|
||
rm ${TMPFILE}.{meta,data}.jsonl | ||
echo -n '.' |