python访问hbase需要额外的库,一般用thrift。使用thrift调用hbase,由于篇幅限制在这里不能说的很详细。
请百度Phthon thrift 或 python hbase 自行查阅相关资料。
下面是一个例子仅供参考
# coding:utf-8from thrift import Thrift
from thrift.transport import TSocket
from thrift.transport import TTransport
from thrift.protocol import TBinaryProtocol
from hbase import Hbase
from hbase.ttypes import *
import csv
def client_conn():
transport = TSocket.TSocket('hostname,like:localhost', port)
transport = TTransport.TBufferedTransport(transport)
protocol = TBinaryProtocol.TBinaryProtocol(transport)
client = Hbase.Client(protocol)
transport.open()
return client
if __name__ == "__main__":
client = client_conn()
result = client.getRow("table name","row name")
data_simple =[]
for k, v in result[0].columns.items(): #.keys()
data_simple.append((v.timestamp, v.value))
writer.writerows(data)
csvfile.close()
csvfile_simple = open("data_xy_simple.csv", "wb")
writer_simple = csv.writer(csvfile_simple)
writer_simple.writerow(["timestamp", "value"])
writer_simple.writerows(data_simple)
csvfile_simple.close()
python访问hbase数据
#!/usr/bin/python
import getopt,sys,time
from thrift.transport.TSocket import TSocket
from thrift.transport.TTransport import TBufferedTransport
from thrift.protocol import TBinaryProtocol
from hbase import Hbase
def usage():
print '''Usage :
-h: Show help information
-l: Show all table in hbase
-t {table} Show table descriptors
-t {table} -k {key} : show cell
-t {table} -k {key} -c {coulmn} : Show the coulmn
-t {table} -k {key} -c {coulmn} -v {versions} : Show more version
(write by [email protected])
'''
class geilihbase:
def __init__(self):
self.transport = TBufferedTransport(TSocket("127.0.0.1", "9090"))
self.transport.open()
self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
self.client = Hbase.Client(self.protocol)
def __del__(self):
self.transport.close()
def glisttable(self):
for table in self.client.getTableNames():
print table
def ggetColumnDescriptors(self,table):
rarr=self.client.getColumnDescriptors(table)
if rarr:
for (k,v) in rarr.items():
print "%-20s\t%s" % (k,v)
def gget(self,table,key,coulmn):
rarr=self.client.get(table,key,coulmn)
if rarr:
print "%-15s %-20s\t%s" % (rarr[0].timestamp,time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(rarr[0].timestamp/1000)),rarr[0].value)
def ggetrow(self,table,key):
rarr=self.client.getRow(table, key)
if rarr:
for (k,v) in rarr[0].columns.items():
print "%-20s\t%-15s %-20s\t%s" % (k,v.timestamp,time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(v.timestamp/1000)),v.value)
def ggetver(self, table, key, coulmn, versions):
rarr=self.client.getVer(table,key,coulmn, versions)
if rarr:
for row in rarr:
print "%-15s %-20s\t%s" % (row.timestamp,time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(row.timestamp/1000)),row.value)
def main(argv):
tablename=""
key=""
coulmn=""
versions=""
try:
opts, args = getopt.getopt(argv, "lht:k:c:v:", ["help","list"])
except getopt.GetoptError:
usage()
sys.exit(2)
for opt, arg in opts:
if opt in ("-h", "--help"):
usage()
sys.exit(0)
elif opt in ("-l", "--list"):
ghbase=geilihbase()
ghbase.glisttable()
sys.exit(0)
elif opt == '-t':
tablename = arg
elif opt == '-k':
key = arg
elif opt == '-c':
coulmn = arg
elif opt == '-v':
versions = int(arg)
if ( tablename and key and coulmn and versions ):
ghbase=geilihbase()
ghbase.ggetver(tablename, key, coulmn, versions)
sys.exit(0)
if (tablename and key and coulmn ):
ghbase=geilihbase()
ghbase.gget(tablename, key, coulmn)
sys.exit(0)
if (tablename and key ):
ghbase=geilihbase()
ghbase.ggetrow(tablename, key)
sys.exit(0)
if (tablename ):
ghbase=geilihbase()
ghbase.ggetColumnDescriptors(tablename)
sys.exit(0)
usage()
sys.exit(1)
if __name__ == "__main__":
main(sys.argv[1:])