Hive Paritioning
create
external table customer(custno string, firstname string, lastname string, age
int,profession string)
row
format delimited
fields
terminated by ',';
Array :
The first complex type is an array. It is nothing but
a collection of
items of similar data type. i.e, an array can contain
one or more
values of the same data type. In our Array example, we
will be using the dataset Temperature.txt
where the fields are delimited by tab and the complex
data type Array
values are delimited by the comma.
Sample DataSet…
18:43
25-07-2017 karnataka 22,52,6,52,6,4.2,475,7,54
18:44
25-07-2017 Andhraprdesh 45,65,12,65,456,65,12,54,65
18:45
25-07-2017 Assam 7,8,56,51,32,53,44,56,45
CREATE EXTERNAL TABLE IF NOT EXISTS tempset(temp_date
STRING, state STRING, temperature array<double>)
row format delimited
fields terminated by '\t'
collection items terminated by ',';
CREATE EXTERNAL TABLE IF NOT EXISTS
tempset99(temp_date
STRING, state STRING, temperature
array<double>)
row format delimited
fields terminated by '\t'
collection items terminated by ',';
LOAD
DATA LOCAL INPATH’/home/cloudera/Array_Data.txt’ OVERWRITE INTO TABLE
tempset99;
Map :
Map is a collection of
key-value pairs where fields are accessed using array notation of keys
Eg: [‘Key’]
In our Map example we will be
using the dataset Schools.txt where the fields are delimited by tab, the
complex type Map values are delimited by the comma.
CREATE TABLE IF NOT EXISTS
myschool(schooltypeSTRING,stateSTRING,gender STRING, total map<int,int>)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY ','
MAP KEYS TERMINATED BY ':';
Struct :
Struct is a record type which
encapsulates a set of named fields that can be any primitive data type. An element
in STRUCT type can be accessed using the DOT (.) notation.
In our Struct example, we will
be using the dataset Bikes.txt where the fields are delimited by tab and the
complex type Array values are delimited by the comma.
CREATE EXTERNAL TABLE IF NOT EXISTS
employee(name
STRING, salary FLOAT, address
struct<city:STRING,state:STRING,zip:INT>)
row format delimited
fields terminated by '\t'
collection items terminated by ',';
jimmy 85263 Mumbai,KA,565685
krina 96523 Pune,KA,54654
Queries :
select
state,temperature[0] from tempset;
select
state,temperature[0],temperature[4] from tempest;
select
state.temperature[0] from tempset;
select
temp_date,state,temperature[3] from tempset;
select name, address[zip] from employee;
select name,address.city from employee;
select name,address.city from employee;
select name,address.city,address.state from employee;
Queries :
Array
select
state.temperature[0] from tempset;
select
date,state,temperature[3] from tempset;
Map
select
schooltype,total[2015],total[2014] from mys;
select
schooltype,total[2015] from mys;
select total[2016] from MySchools where
state=’Assam’;
select total[2017] from MySchools where
state=’Chhattisgarh’ and gender=’Female’;
Struct
select BikeFeatures.EngineType from MyBikes;
select BikeFeatures.EngineType
from MyBikes where name=’Suzuki Swish’
Partition::::::::::
DYNAMIC PARTITIONING
set
hive.exec.dynamic.partition.mode=nonstrict;
set
hive.exec.dynamic.partition=true;
set hive.enforce.bucketing=true;
create table txnByCat(txnno INT,
txndate STRING, custno INT, amount DOUBLE,
product STRING, city STRING, state
STRING, spendby STRING)
partitioned by (category STRING)
row format delimited
fields terminated by ','
stored as textfile;
from txn909 txn INSERT OVERWRITE TABLE
txnByCat PARTITION(category) select
txn.txnno,txn.txndate,txn.custno,txn.amount,txn.product,txn.city,txn.state,txn.spendby,
txn.category DISTRIBUTE BY category;
Comments
Post a Comment