I am trying to get the selected columns in rows from HBase after applying some filters. Consider a table like:
ename:fname ename:lname salary:gross salary:da salary:ta
I want to get list of all employees having gross salary > 1500. For this I have written following code. The problem I am facing is that when I filter column I get only that filter in output, which make sense because that is what they are created for, but what if I want to get desired column, but want to filter based only on specific column, like the one I just mentioned - list of all employees having salary > 1500.
Output should be the following set of columns:
lname,fname,salary:gross,salary:ta
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.RegexStringComparator;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.QualifierFilter;
import org.apache.hadoop.hbase.filter.FamilyFilter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.ValueFilter;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class MyQualifierFilterExample {
public static void main(String[] args) throws IOException {
Configuration conf = HBaseConfiguration.create();
HTable table = new HTable(conf, "emp");
List<Filter> filters = new ArrayList<Filter>();
Filter famFilter = new FamilyFilter(CompareFilter.CompareOp.EQUAL,
new BinaryComparator(Bytes.toBytes("salary")));
filters.add(famFilter);
Filter colFilter = new QualifierFilter(CompareFilter.CompareOp.EQUAL,
new BinaryComparator(Bytes.toBytes("gross")));
filters.add(colFilter);
Filter valFilter = new ValueFilter(CompareFilter.CompareOp.GREATER_OR_EQUAL,
new BinaryComparator(Bytes.toBytes("1500")));
filters.add(valFilter);
FilterList fl = new FilterList( FilterList.Operator.MUST_PASS_ALL, filters);
Scan scan = new Scan();
scan.setFilter(fl);
ResultScanner scanner = table.getScanner(scan);
System.out.println("Scanning table... ");
for (Result result : scanner) {
//System.out.println("getRow:"+Bytes.toString(result.getRow()));
for (KeyValue kv : result.raw()) {
//System.out.println("Family - "+Bytes.toString(kv.getFamily()));
//System.out.println("Qualifier - "+Bytes.toString(kv.getQualifier() ));
System.out.println("kv:"+kv +", Key: " + Bytes.toString(kv.getRow()) + ", Value: " +Bytes.toString(kv.getValue()));
}
}
scanner.close();
System.out.println("Completed ");
}
}
Scanning table...
kv:101/salary:gross/1339876269770/Put/vlen=4, Key: 101, Value: 2000
kv:102/salary:gross/1339876277659/Put/vlen=4, Key: 102, Value: 2400
kv:105/salary:gross/1339876300585/Put/vlen=4, Key: 105, Value: 2300
kv:106/salary:gross/1339876310004/Put/vlen=4, Key: 106, Value: 2900
Completed
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.RegexStringComparator;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.QualifierFilter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.filter.FamilyFilter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.ValueFilter;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class MyQualifierFilterExample {
public static void main(String[] args) throws IOException {
Configuration conf = HBaseConfiguration.create();
HTable table = new HTable(conf, "emp");
List<Filter> filters = new ArrayList<Filter>();
SingleColumnValueFilter colValFilter = new SingleColumnValueFilter(Bytes.toBytes("salary"), Bytes.toBytes("gross")
, CompareFilter.CompareOp.GREATER_OR_EQUAL, new BinaryComparator(Bytes.toBytes("1300")));
colValFilter.setFilterIfMissing(false);
filters.add(colValFilter);
Filter colValFilter2 = new SingleColumnValueFilter(Bytes.toBytes("salary"), Bytes.toBytes("da")
, CompareFilter.CompareOp.GREATER_OR_EQUAL, new BinaryComparator(Bytes.toBytes("150")));
filters.add(colValFilter2);
//Filter colValFilter3 = new SingleColumnValueFilter(Bytes.toBytes("ename"), Bytes.toBytes("fname")
// , CompareFilter.CompareOp.GREATER_OR_EQUAL, new SubstringComparator("jack"));
//filters.add(colValFilter3);
FilterList fl = new FilterList( FilterList.Operator.MUST_PASS_ALL, filters);
Scan scan = new Scan();
scan.setFilter(fl);
scan.addColumn(Bytes.toBytes("ename"), Bytes.toBytes("fname"));
scan.addColumn(Bytes.toBytes("ename"), Bytes.toBytes("lname"));
scan.addColumn(Bytes.toBytes("salary"), Bytes.toBytes("gross"));
scan.addColumn(Bytes.toBytes("salary"), Bytes.toBytes("da"));
ResultScanner scanner = table.getScanner(scan);
String key = new String("~");
String keyFlag = new String("~");
System.out.println("Scanning table... ");
for (Result result : scanner) {
//System.out.println("getRow:"+Bytes.toString(result.getRow()));
key = "~";
for (KeyValue kv : result.raw()) {
if (key.compareTo(keyFlag)==0)
{
key = Bytes.toString(kv.getRow());
System.out.print("Key: " + key);
}
//System.out.print("Family - "+Bytes.toString(kv.getFamily()));
//System.out.print(", Buffer - "+Bytes.toString(kv.getBuffer() ));
//System.out.print(", FamilyOffset - " + kv.getFamilyOffset() );
System.out.print(", "+Bytes.toString(kv.getFamily())+"."+Bytes.toString(kv.getQualifier()));
System.out.print("=" +Bytes.toString(kv.getValue()));
}
System.out.println("");
System.out.println("-------------------");
}
scanner.close();
System.out.println("Completed ");
}
}
Scanning table...
Key: 103, ename.fname=peter, ename.lname=parker, salary.da=190, salary.gross=1400
-------------------
Key: 105, ename.fname=harry, ename.lname=potter, salary.da=154, salary.gross=2300
-------------------
Completed
When you compare a partial key scan and a get, remember that the row key you use for Get can be a much longer string than the partial key you use for the scan. In that case, for the Get, HBase has to do a deterministic lookup to ascertain the exact location of the row key that it needs to match and fetch it.
This filter is used for selecting only those keys with columns that matches a particular prefix. Filter to support scan multiple row key ranges. A binary comparator which lexicographically compares against the specified byte array using Bytes.
You should use a combination of SingleColumnValueFilter and addFamily (or addColumn)
See below (I cannot test it on my end at this time):
SingleColumnValueFilter filter = new SingleColumnValueFilter(
Bytes.toBytes("salary"),
Bytes.toBytes("gross"),
CompareOp.GREATER,
Bytes.toBytes("1500")
);
//To prevent the entire row from being emitted
//if the column is not found on a row
scan.setFilterIfMissing(true)
scan.setFilter(filter);
scan.addFamily(Bytes.toBytes("ename"))
scan.addColumn(Bytes.toBytes("salary"), Bytes.toBytes("da"))
scan.addColumn(Bytes.toBytes("salary"), Bytes.toBytes("gross"))
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With