-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcommands.txt
28 lines (23 loc) · 1.15 KB
/
commands.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
CREATE EXTERNAL TABLE json_tweet (json STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\n'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE
LOCATION 's3n://bdigital-mob-dataset-twitter/CAT/';
CREATE TABLE IF NOT EXISTS tweets AS
SELECT get_json_object(json_tweet.json, '$.id') as id, get_json_object(json_tweet.json, '$.createdAt') as timestamp, get_json_object(json_tweet.json, '$.geoLocation.longitude') as longitude, get_json_object(json_tweet.json, '$.geoLocation.latitude') as latitude,
ST_Point(get_json_object(json_tweet.json, '$.geoLocation.longitude') ,get_json_object(json_tweet.json, '$.geoLocation.latitude') ) as geom ,
get_json_object(json_tweet.json, '$.text') as text FROM json_tweet;
curl -XPUT -u bdigital:gvivSjqauYLvcTnW50p8 http://54.194.28.244:8080/tweets -d '
{
"mappings" : {
"_default_" : {
"properties" : {
"loc" : {
"type" : "geo_point"
}
}
}
}
}
';
geohive -e 'select concat("{\"index\":{\"_index\":\"tweets\",\"_type\":\"tweet\",\"_id\":", rowsequence() , "}} \n") , to_json(named_struct("id", id, "loc", array(cast(longitude as double),cast(latitude as double)))) from tweets where geom is not null;' > tweets.json