12. Hbase运维

参数配置 #

CDH Hbase Master #

HBASE_OPTS="-Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=10101 -javaagent:/opt/cloudera/parcels/CDH-5.16.1-1.cdh5.16.1.p0.3/lib/hbase/lib/jmx_prometheus_javaagent-0.16.1.jar=17000:/opt/cloudera/parcels/CDH-5.16.1-1.cdh5.16.1.p0.3/lib/hbase/conf/hbase_jmx_config.yaml"

CDH Hbase RegionServer #

HBASE_OPTS="-javaagent:/opt/cloudera/parcels/CDH-5.16.1-1.cdh5.16.1.p0.3/lib/hbase/lib/jmx_prometheus_javaagent-0.16.1.jar=17002:/opt/cloudera/parcels/CDH-5.16.1-1.cdh5.16.1.p0.3/lib/hbase/conf/hbase_jmx_config.yaml"

RegionServer Jvm 参数 #

-Xmx30g -Xms30g
-XX:MaxDirectMemorySize=30g
-XX:+UseG1GC
-XX:+UnlockExperimentalVMOptions
-XX:MaxGCPauseMillis=90
-XX:G1NewSizePercent=1
-XX:InitiatingHeapOccupancyPercent=30
-XX:+ParallelRefProcEnabled
-XX:ConcGCThreads=4
-XX:ParallelGCThreads=16
-XX:MaxTenuringThreshold=15
-XX:G1HeapRegionSize=32m
-XX:G1MixedGCCountTarget=32
-XX:G1OldCSetRegionThresholdPercent=5


-verbose:gc
-XX:+PrintGC
-XX:+PrintGCDetails
-XX:+PrintGCApplicationStoppedTime
-XX:+PrintHeapAtGC
-XX:+PrintGCDateStamps
-XX:+PrintAdaptiveSizePolicy
-XX:+PrintTenuringDistribution
-XX:+PrintSafepointStatistics
-XX:PrintSafepointStatisticsCount=1
-XX:PrintFLSStatistics=1

Hbase Jmx #

lowercaseOutputName: true
lowercaseOutputLabelNames: true
rules:
  - pattern: Hadoop<service=HBase, name=RegionServer, sub=Regions><>Namespace_([^\W_]+)_table_([^\W_]+)_region_([^\W_]+)_metric_(\w+)
    name: HBase_metric_$4
    labels:
      namespace: "$1"
      table: "$2"
      region: "$3"
  - pattern: Hadoop<service=(\w+), name=(\w+), sub=(\w+)><>([\w._]+)
    name: hadoop_$1_$4
    labels:
      "name": "$2"
      "sub": "$3"
  - pattern: .+

Grafana #

HBase 1.x dashboard

admin/admin

YCSB #

  1. 下载Hbase 1.2 ycsb-hbase12-binding-0.17.0.tar.gz

  2. 创建测试表

hbase(main):001:0> n_splits=300 # HBaserecommends (100 * number of regionservers)
hbase(main):002:0> create 'usertable','family', {SPLITS=> (1..n_splits).map{|i| "user#{1000+i*(9999-1000)/n_splits}"}}

  1. load 数据
python bin/ycsb load hbase12 \
-P workloads/workload_load \
-cp $HADOOP_CLASSPATH:hbase-conf \
-p table=usertable \
-p columnfamily=family \
-threads 200 \
-s
  1. 负载测试
python bin/ycsb run hbase12 \
-P workloads/workloadb \
-cp $HADOOP_CLASSPATH:hbase-conf \
-p table=usertable \
-p columnfamily=family \
-p measurementtype=timeseries \
-p timeseries.granularity=2000 \
-threads 200 \
-s

测试结果1 #

条数: 14232772

READ/UPDATE: 95/5

吞吐量/延时 #

客户端GC

测试结果2 #

hbase.bucketcache.ioengine heap
hfile.block.cache.size 0.4
hbase.regionserver.global.memstore.size 0.4

条数: 100000000

Read/Update 95/5
数据量 100000000
列簇数 1
字段数 10
region数量 300
吞吐量 25k/s
缓存命中 100k/s

PS: 磁盘 80M/s 达到瓶颈

hbase.bucketcache.ioengine offheap
hfile.block.cache.size 0.6
hbase.regionserver.global.memstore.size 0.2
Read/Update 95/5
数据量 100000000
列簇数 1
字段数 10
region数量 300
吞吐量 58k/s
缓存命中 500k/s

QA #

No FileSystem for scheme: hdfs
<property>
   <name>fs.file.impl</name>
   <value>org.apache.hadoop.fs.LocalFileSystem</value>
   <description>The FileSystem for file: uris.</description>
</property>
 
<property>
   <name>fs.hdfs.impl</name>
   <value>org.apache.hadoop.hdfs.DistributedFileSystem</value>
   <description>The FileSystem for hdfs: uris.</description>
</property>