Hive Paritioning


create external table customer(custno string, firstname string, lastname string, age int,profession string)
row format delimited
fields terminated by ',';

Array :
The first complex type is an array. It is nothing but a collection of
items of similar data type. i.e, an array can contain one or more
values of the same data type. In our Array example, we will be using the dataset Temperature.txt
where the fields are delimited by tab and the complex data type Array
values are delimited by the comma.
Sample DataSet…
18:43 25-07-2017        karnataka       22,52,6,52,6,4.2,475,7,54
18:44 25-07-2017        Andhraprdesh    45,65,12,65,456,65,12,54,65
18:45 25-07-2017        Assam   7,8,56,51,32,53,44,56,45
CREATE EXTERNAL TABLE IF NOT EXISTS tempset(temp_date
STRING, state STRING, temperature array<double>)
row format delimited
fields terminated by '\t'
collection items terminated by ',';


CREATE EXTERNAL TABLE IF NOT EXISTS tempset99(temp_date
STRING, state STRING, temperature array<double>)
row format delimited
fields terminated by '\t'
collection items terminated by ',';

LOAD DATA LOCAL INPATH’/home/cloudera/Array_Data.txt’ OVERWRITE INTO TABLE tempset99;

Map  :
Map is a collection of key-value pairs where fields are accessed using array notation of keys
Eg: [‘Key’]
In our Map example we will be using the dataset Schools.txt where the fields are delimited by tab, the complex type Map values are delimited by the comma.
CREATE TABLE IF NOT EXISTS myschool(schooltypeSTRING,stateSTRING,gender STRING, total map<int,int>)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY ','
MAP KEYS TERMINATED BY ':';

Struct :
Struct is a record type which encapsulates a set of named fields that can be any primitive data type. An element in STRUCT type can be accessed using the DOT (.) notation.
In our Struct example, we will be using the dataset Bikes.txt where the fields are delimited by tab and the complex type Array values are delimited by the comma.

CREATE EXTERNAL TABLE IF NOT EXISTS employee(name
STRING, salary FLOAT, address
struct<city:STRING,state:STRING,zip:INT>)
row format delimited
fields terminated by '\t'
collection items terminated by ',';
jimmy  85263  Mumbai,KA,565685
krina  96523  Pune,KA,54654
Queries       :
select state,temperature[0] from tempset;
select state,temperature[0],temperature[4] from tempest;

select state.temperature[0] from tempset;
select temp_date,state,temperature[3] from tempset;

select name, address[zip] from employee;
select name,address.city from employee;
select name,address.city from employee;
select name,address.city,address.state from employee;


Queries :
Array
select state.temperature[0] from tempset;
select date,state,temperature[3] from tempset;
Map
select schooltype,total[2015],total[2014] from mys;
select schooltype,total[2015] from mys;
select total[2016] from MySchools where state=’Assam’; 
select total[2017] from MySchools where state=’Chhattisgarh’ and gender=’Female’; 

Struct
select BikeFeatures.EngineType from MyBikes; 
select BikeFeatures.EngineType from MyBikes where name=’Suzuki Swish’
Partition::::::::::
DYNAMIC PARTITIONING

set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.dynamic.partition=true;
set hive.enforce.bucketing=true;

create table txnByCat(txnno INT, txndate STRING, custno INT, amount DOUBLE,
product STRING, city STRING, state STRING, spendby STRING)
partitioned by (category STRING)
row format delimited
fields terminated by ','
stored as textfile;

from txn909 txn INSERT OVERWRITE TABLE txnByCat PARTITION(category) select txn.txnno,txn.txndate,txn.custno,txn.amount,txn.product,txn.city,txn.state,txn.spendby, txn.category DISTRIBUTE BY category;



Comments

Popular posts from this blog

Problem Statement Of Real Estate Use Cases

Problem Statement Of Bank Marketing analysis

Hadoop Comand