From b0fdf4b07bc4ae925a403b5d536647617b983a2e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 27 Apr 2024 22:15:30 +0200 Subject: [PATCH] Speed up --- process-file.sh | 264 +++++++++++++++++++++++++----------------------- 1 file changed, 135 insertions(+), 129 deletions(-) diff --git a/process-file.sh b/process-file.sh index d449508..d48f594 100755 --- a/process-file.sh +++ b/process-file.sh @@ -1,133 +1,139 @@ #!/usr/bin/bash -INPUT=$1 -TMPFILE=/tmp/$$ -#SOURCE='adsb.lol' - -gzip -cd "$INPUT" | sed '/^"trace"/Q' | tr -d '\n' | sed -r -e 's/,$/}\n/' > ${TMPFILE}.meta.jsonl -gzip -cd "$INPUT" | grep '^\[' | sed -r -e 's/ ]$/,/' > ${TMPFILE}.data.jsonl - -clickhouse-local --query " - INSERT INTO FUNCTION remoteSecure('kvzqttvc2n.eu-west-1.aws.clickhouse-staging.com', default.planes_mercator, 'default', '') - SELECT - CAST(timestamp + time_offset AS DateTime64(3)) AS time, time::Date AS date, - icao, r, t, dbFlags, noRegData, ownOp, year, desc, - lat, lon, - toInt32OrZero(altitude), - ground_speed, - track_degrees, - flags, - vertical_rate, - aircraft.alert, - aircraft.alt_geom, - aircraft.gva, - aircraft.nac_p, - aircraft.nac_v, - aircraft.nic, - aircraft.nic_baro, - aircraft.rc, - aircraft.sda, - aircraft.sil, - aircraft.sil_type, - aircraft.spi, - aircraft.track, - aircraft.type, - aircraft.version, - aircraft.category, - aircraft.emergency, - trimRight(aircraft.flight), - aircraft.squawk, - aircraft.baro_rate, - aircraft.nav_altitude_fms, - aircraft.nav_altitude_mcp, - aircraft.nav_modes, - aircraft.nav_qnh, - aircraft.geom_rate, - aircraft.ias, - aircraft.mach, - aircraft.mag_heading, - aircraft.oat, - aircraft.roll, - aircraft.tas, - aircraft.tat, - aircraft.true_heading, - aircraft.wd, - aircraft.ws, - aircraft.track_rate, - aircraft.nav_heading, - source, - geometric_altitude, - geometric_vertical_rate, - indicated_airspeed, - roll_angle, - '${SOURCE}' - FROM - file('${TMPFILE}.data.jsonl', JSONCompactEachRow, ' - time_offset Decimal64(3), - lat Float64, - lon Float64, - altitude String, - ground_speed Float32, - track_degrees Float32, - flags UInt32, - vertical_rate Int32, - aircraft Tuple( - alert Int64, - alt_geom Int64, - gva Int64, - nac_p Int64, - nac_v Int64, - nic Int64, - nic_baro Int64, - rc Int64, - sda Int64, - sil Int64, - sil_type String, - spi Int64, - track Float64, - type String, - version Int64, - category String, - emergency String, - flight String, - squawk String, - baro_rate Int64, - nav_altitude_fms Int64, - nav_altitude_mcp Int64, - nav_modes Array(String), - nav_qnh Float64, - geom_rate Int64, - ias Int64, - mach Float64, - mag_heading Float64, - oat Int64, - roll Float64, - tas Int64, - tat Int64, - true_heading Float64, - wd Int64, - ws Int64, - track_rate Float64, - nav_heading Float64 - ), - source LowCardinality(String), - geometric_altitude Int32, - geometric_vertical_rate Int32, - indicated_airspeed Int32, - roll_angle Float32 - /* , hex String */ - ') AS d, - file('${TMPFILE}.meta.jsonl', JSONEachRow, ' - icao String, - r String, - t String, - dbFlags Int32, - noRegData Bool, - ownOp String, - year UInt16, - timestamp Decimal64(3), - desc String - ') AS m +clickhouse-local --time --engine_file_skip_empty_files 1 --optimize_trivial_insert_select 0 --query " +INSERT INTO FUNCTION remoteSecure('${CLICKHOUSE_PLANES_HOST}', '${TABLE}', '${CLICKHOUSE_PLANES_USER}', '${CLICKHOUSE_PLANES_PASSWORD}') +WITH arrayJoin(trace) AS elem, + elem.1 AS time_offset, + elem.2 AS lat, + elem.3 AS lon, + elem.4 AS altitude, + elem.5 AS ground_speed, + elem.6 AS track_degrees, + elem.7 AS flags, + elem.8 AS vertical_rate, + elem.9 AS aircraft, + elem.10 AS source, + elem.11 AS geometric_altitude, + elem.12 AS geometric_vertical_rate, + elem.13 AS indicated_airspeed, + elem.14 AS roll_angle +SELECT + CAST(timestamp + time_offset AS DateTime64(3)) AS time, time::Date AS date, + icao, r, t, dbFlags, noRegData, ownOp, year, desc, + lat, lon, + toInt32OrZero(altitude), + ground_speed, + track_degrees, + flags, + vertical_rate, + aircraft.alert, + aircraft.alt_geom, + aircraft.gva, + aircraft.nac_p, + aircraft.nac_v, + aircraft.nic, + aircraft.nic_baro, + aircraft.rc, + aircraft.sda, + aircraft.sil, + aircraft.sil_type, + aircraft.spi, + aircraft.track, + aircraft.type, + aircraft.version, + aircraft.category, + aircraft.emergency, + trimRight(aircraft.flight), + aircraft.squawk, + aircraft.baro_rate, + aircraft.nav_altitude_fms, + aircraft.nav_altitude_mcp, + aircraft.nav_modes, + aircraft.nav_qnh, + aircraft.geom_rate, + aircraft.ias, + aircraft.mach, + aircraft.mag_heading, + aircraft.oat, + aircraft.roll, + aircraft.tas, + aircraft.tat, + aircraft.true_heading, + aircraft.wd, + aircraft.ws, + aircraft.track_rate, + aircraft.nav_heading, + source, + geometric_altitude, + geometric_vertical_rate, + indicated_airspeed, + roll_angle, + '${SOURCE}' +FROM file('$1', JSONLines, ' + icao String, + r String, + t String, + dbFlags Int32, + noRegData Bool, + ownOp String, + year UInt16, + timestamp Decimal64(3), + desc String, + trace Array(Tuple( + Decimal64(3), + Float64, + Float64, + String, + Float32, + Float32, + UInt32, + Int32, + Tuple( + alert Int64, + alt_geom Int64, + gva Int64, + nac_p Int64, + nac_v Int64, + nic Int64, + nic_baro Int64, + rc Int64, + sda Int64, + sil Int64, + sil_type String, + spi Int64, + track Float64, + type String, + version Int64, + category String, + emergency String, + flight String, + squawk String, + baro_rate Int64, + nav_altitude_fms Int64, + nav_altitude_mcp Int64, + nav_modes Array(String), + nav_qnh Float64, + geom_rate Int64, + ias Int64, + mach Float64, + mag_heading Float64, + oat Int64, + roll Float64, + tas Int64, + tat Int64, + true_heading Float64, + wd Int64, + ws Int64, + track_rate Float64, + nav_heading Float64 + ), + LowCardinality(String), + Int32, + Int32, + Int32, + Float32 + )) +', 'gz') " || exit 1 -rm ${TMPFILE}.{meta,data}.jsonl +echo -n '.'