HBase Filters Examples

Create table and insert some sample records. Save the text as sensors.txt and execute using hbase shell.

create "sensors", "vol", "amp" 
put "sensors", "d1-2020-01-02", "vol:1", "1.01"
put "sensors", "d1-2020-01-02", "vol:4", "0.0"
put "sensors", "d1-2020-01-02", "vol:10", "1.02"
put "sensors", "d1-2020-01-02", "vol:24", "1.11"
put "sensors", "d1-2020-01-02", "vol:50", "1.41"
put "sensors", "d1-2020-01-02", "amp:1", "5.01"
put "sensors", "d1-2020-01-02", "amp:4", "6.0"
put "sensors", "d1-2020-01-02", "amp:10", "7.02"
put "sensors", "d1-2020-01-02", "amp:24", "5.11"
put "sensors", "d1-2020-01-02", "amp:50", "3.41"

put "sensors", "d2-2020-01-02", "vol:3", "1.01"
put "sensors", "d2-2020-01-02", "vol:4", "0.0"
put "sensors", "d2-2020-01-02", "vol:12", "1.02"
put "sensors", "d2-2020-01-02", "vol:16", "1.11"
put "sensors", "d2-2020-01-02", "vol:30", "1.41"
put "sensors", "d2-2020-01-02", "amp:3", "5.51"
put "sensors", "d2-2020-01-02", "amp:4", "6.2"
put "sensors", "d2-2020-01-02", "amp:12", "6.02"
put "sensors", "d2-2020-01-02", "amp:16", "3.21"
put "sensors", "d2-2020-01-02", "amp:30", "4.41"


Insert sample records

$ hbase shell < sensors.txt




hbase(main):002:0> scan "sensors", {FILTER => "KeyOnlyFilter()"}                                                                                                                              
ROW                                              COLUMN+CELL                                                                                                                                  
 d1-2020-01-02                                   column=amp:1, timestamp=1589229099711, value=                                                                                                
 d1-2020-01-02                                   column=amp:10, timestamp=1589229099743, value=                                                                                               
 d1-2020-01-02                                   column=amp:24, timestamp=1589229099761, value=                                                                                               
 d1-2020-01-02                                   column=amp:4, timestamp=1589229099728, value=                                                                                                
 d1-2020-01-02                                   column=amp:50, timestamp=1589229099776, value=                                                                                               
 d1-2020-01-02                                   column=vol:1, timestamp=1589229099579, value=                                                                                                
 d1-2020-01-02                                   column=vol:10, timestamp=1589229099629, value=                                                                                               
 d1-2020-01-02                                   column=vol:24, timestamp=1589229099656, value=                                                                                               
 d1-2020-01-02                                   column=vol:4, timestamp=1589229099595, value=                                                                                                
 d1-2020-01-02                                   column=vol:50, timestamp=1589229099696, value=                                                                                               
 d2-2020-01-02                                   column=amp:12, timestamp=1589229099989, value=                                                                                               
 d2-2020-01-02                                   column=amp:16, timestamp=1589229100004, value=                                                                                               
 d2-2020-01-02                                   column=amp:3, timestamp=1589229099919, value=                                                                                                
 d2-2020-01-02                                   column=amp:30, timestamp=1589229100019, value=                                                                                               
 d2-2020-01-02                                   column=amp:4, timestamp=1589229099974, value=                                                                                                
 d2-2020-01-02                                   column=vol:12, timestamp=1589229099859, value=                                                                                               
 d2-2020-01-02                                   column=vol:16, timestamp=1589229099874, value=                                                                                               
 d2-2020-01-02                                   column=vol:3, timestamp=1589229099793, value=                                                                                                
 d2-2020-01-02                                   column=vol:30, timestamp=1589229099900, value=                                                                                               
 d2-2020-01-02                                   column=vol:4, timestamp=1589229099810, value=                                                                                                
2 row(s)                                                                                            



hbase(main):006:0> scan "sensors", {FILTER => "(KeyOnlyFilter() AND PrefixFilter('d2'))"}                                                                                                     
ROW                                              COLUMN+CELL                                                                                                                                  
 d2-2020-01-02                                   column=amp:12, timestamp=1589229099989, value=                                                                                               
 d2-2020-01-02                                   column=amp:16, timestamp=1589229100004, value=                                                                                               
 d2-2020-01-02                                   column=amp:3, timestamp=1589229099919, value=                                                                                                
 d2-2020-01-02                                   column=amp:30, timestamp=1589229100019, value=                                                                                               
 d2-2020-01-02                                   column=amp:4, timestamp=1589229099974, value=                                                                                                
 d2-2020-01-02                                   column=vol:12, timestamp=1589229099859, value=                                                                                               
 d2-2020-01-02                                   column=vol:16, timestamp=1589229099874, value=                                                                                               
 d2-2020-01-02                                   column=vol:3, timestamp=1589229099793, value=                                                                                                
 d2-2020-01-02                                   column=vol:30, timestamp=1589229099900, value=                                                                                               
 d2-2020-01-02                                   column=vol:4, timestamp=1589229099810, value=                                                                                                
1 row(s)                                                                                                    






hbase(main):009:0> scan "sensors", {FILTER => "(KeyOnlyFilter() AND PrefixFilter('d2') AND ColumnCountGetFilter(1))"}                                                                         
ROW                                              COLUMN+CELL                                                                                                                                  
 d2-2020-01-02                                   column=amp:12, timestamp=1589229099989, value=                                                                                               
1 row(s)                                                                                                                                                                                      
Took 0.0154 seconds                                                                                                                                                                           
hbase(main):010:0> scan "sensors", {FILTER => "(KeyOnlyFilter() AND PrefixFilter('d2') AND ColumnCountGetFilter(2))"}                                                                         
ROW                                              COLUMN+CELL                                                                                                                                  
 d2-2020-01-02                                   column=amp:12, timestamp=1589229099989, value=                                                                                               
 d2-2020-01-02                                   column=amp:16, timestamp=1589229100004, value=                                                                                               
1 row(s)                                                                                                                                                                                      
Took 0.0112 seconds                                                                                                                                                                           
hbase(main):011:0> scan "sensors", {FILTER => "(KeyOnlyFilter() AND PrefixFilter('d2') AND ColumnCountGetFilter(3))"}                                                                         
ROW                                              COLUMN+CELL                                                                                                                                  
 d2-2020-01-02                                   column=amp:12, timestamp=1589229099989, value=                                                                                               
 d2-2020-01-02                                   column=amp:16, timestamp=1589229100004, value=                                                                                               
 d2-2020-01-02                                   column=amp:3, timestamp=1589229099919, value=                                                                                                
1 row(s)                                                                                                                   






TTL Behaviour

create 'session_id', {'NAME' => 'cf1','TTL' => 30} # 30 sec

put 'session_id','row123','cf1:desc', 'TTL Demo' # Create new record

get 'session_id','row123','cf1:desc' # try to get record after 30. it should be gone

scan 'test_table', {RAW => true} # HBase keeps it HFile

flush 'test_table' # Hbase remove the record after major compaction

compact 'test_table'