-
Notifications
You must be signed in to change notification settings - Fork 0
Hunter Wu edited this page Jan 16, 2019
·
3 revisions
- count
stats count(*) by bin(min)
stats count(*) as cnt by fsym, tsym, exchange
| sort cnt desc
- filter
stats count(*) by bin(min)
| filter fsym="BNB"
stats count(tag ispresent), count(*) by bin(1h)
- filter syntax
{ $.status = 400 }
{ $.status = 4* }
{ $.request = "GET /myproject1/v1/storage/presignLogUrl*" }
{ $.request = "* /myproject1/*" }
{ ($.request = "* /myproject1/*") && ($.status = "4*") }
- partition
- 可節省掃描的資料容量(省錢)
- s3 路徑和資料來源都必須要有 partition 的欄位
- reload:
MSCK REPAIR TABLE table_name
CREATE EXTERNAL TABLE IF NOT EXISTS my_api_log (
`datetime` timestamp,
`name` string,
`mcc` string,
`locale` string,
`client_version` string,
`androidid` string,
`region` string
) PARTITIONED BY (
dt string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = '1'
) LOCATION 's3://my_api_logs/daily/'
TBLPROPERTIES ('has_encrypted_data'='false');
- struct data type
CREATE EXTERNAL TABLE IF NOT EXISTS default.structdata (
name string, androidid int, mcc int, log_details struct<uid:string,invite_code:string,register_time:int,trial_quota:int,trial_time:int,gained_quota:int,gained_time:int,invited_user:int,invited_user_gained:int,last_signin_date:string,signin_days:int,signin_points:array<int>,longest_signin_days:int>
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = '1'
) LOCATION 's3://paramaplab1/json/'
SELECT * FROM structdata WHERE log_details.trial_time=20;
- UnmarshalMap - (golang) 如果單一屬性型態不對,可能導致後面其他屬性就不轉了!
- 沒有 batch update。BatchWriteItem 只能 Put、Delete,且批次內的 key 不能重複
- Secondary index 非 unique
docker pull amazon/dynamodb-local
- 執行時需要用
-shareDB
,這樣才能不分 region 共用
docker run -p 8000:8000 amazon/dynamodb-local -jar DynamoDBLocal.jar -inMemory -sharedDb
- 3rd party GUI
npm install dynamodb-admin -g
export DYNAMO_ENDPOINT=http://localhost:8000
dynamodb-admin
- NodeJS
AWS.config.update({
endpoint: 'http://localhost:8000',
});
- COPY FROM S3
- target table 欄位可以比較多,給 FILLRECORD 參數可填入預設值
- source 欄位比較多則會出錯,只能先存到暫存表再複製到目的表
COPY public.test1(datetime,host,mcc,uuid,cnl,dt)
FROM 's3://my-bucket/my/path/app.log'
CREDENTIALS 'aws_access_key_id=A...Q;aws_secret_access_key=0...N'
FILLRECORD TRUNCATECOLUMNS IGNOREBLANKLINES MAXERROR 100;
- UNLOAD TO S3
UNLOAD ('SELECT DISTINCT uuid FROM test1 WHERE uuid<>\\'\\' AND (mcc=\\'310\\' OR mcc=\\'311\\')')
TO 's3://my-bucket/my/path/uuid.log'
CREDENTIALS 'aws_access_key_id=A...Q;aws_secret_access_key=0...N'
GZIP NULL '' ALLOWOVERWRITE;
- COPY FROM DynamoDB
COPY public.users
FROM 'dynamodb://Users'
CREDENTIALS 'aws_access_key_id=A...Q;aws_secret_access_key=0...N'
readratio 90 MAXERROR 100;