Haproxy before InfluxDB

What we have: Telegraf with send stats to two influxdb servers and dashboard with we need point to only one server. Also we have Nagios as InfluxDB health monitoring.

Nagios check (as main trigger for influx health)
#cat /srv/check_influx.sh

#!/bin/bash
#
# Script to make a proxy (ie HAProxy) capable of monitoring incoming InfluxDB data amount
# Demonized via Xinetd

STATE_OK=0
STATE_CRITICAL=2
STATE_UNKNOWN=3
USER_PASS="login:pass"
HOST="https://127.0.0.1:8086"

exec 2>/dev/null

ping() {
  echo `curl -s -o /dev/null -w "%{http_code}" -k $HOST/ping -u $USER_PASS`
}

get() {
  echo `curl -s -G -k $HOST/query?pretty=true -u $USER_PASS --data-urlencode "$1" --data-urlencode "q=$2"`
}

main() {
  ping_hrsp=$(ping)
  ERR=0

  if [ $ping_hrsp == '204' ]; then
    records=$(get "db=tcp_telegraf" "select count(*) from net where time > now() - 5m;" | jq ".results | .[0].series | .[0].values | .[] | .[length-1]" 2>/dev/null) #"

    if [ $records -ge 10000 ]; then
      OUT_HEAD="HTTP/1.1 200 OK $ping_hrsp $records\r\n"
      OUT_TEXT="OK Ping hrsp: $ping_hrsp ($records)\r\n"
    else
      OUT_HEAD="HTTP/1.1 503 ERR $ping_hrsp $records\r\n"
      OUT_TEXT="ERR Ping hrsp: $ping_hrsp ($records)\r\n"
      ERR=1
    fi

  else
    OUT_HEAD="HTTP/1.1 503 ERR $ping_hrsp\r\n"
    OUT_TEXT="ERR Ping hrsp: $ping_hrsp\r\n"
    ERR=1
  fi

  echo -en "$OUT_HEAD"
  echo -en "Content-Type: text/plain\r\n"
  echo -en "Connection: close\r\n"
  echo -en "Content-Length: $(echo "$OUT_TEXT" | wc -c)\r\n"
  echo -en "\r\n" 
  echo -en "$OUT_TEXT"
  logger -i -t "check_influx.sh" "$OUT_TEXT"
  sleep 0.1

  if [ $ERR == 0 ]; then
    exit $STATE_OK
  else
    exit $STATE_CRITICAL
  fi
}

case "$1" in
    check)
        main
        ;;
    *)
        echo "Usage: $0 check"
        ;;
esac

exit $STATE_UNKNOWN

Script return OK if we have more then 10k records.
Bad answer
#/srv/check_influx.sh check
HTTP/1.1 503 ERR 204 179
Content-Type: text/plain
Connection: close
Content-Length: 29
ERR Ping hrsp: 204 (179)

Good answer
#curl http://1.1.1.1:9200 --head
HTTP/1.1 200 OK 204 33452
Content-Type: text/plain
Connection: close
Content-Length: 30

Demonization via xinetd
# cat /etc/xinetd.d/influxchk
service influxchk
{
        disable         = no
        flags           = REUSE
        socket_type     = stream
        type            = UNLISTED
        port            = 9200
        wait            = no
        user            = nagios
        server          = /srv/check_influx.sh
        server_args     = check
        log_on_failure  += USERID
        only_from       = 0.0.0.0/0
        per_source      = UNLIMITED
}

Haproxy backend
...
listen influx_8086
        mode http
        bind *:8086 ssl crt /etc/haproxy/ssl/cert.pem
        timeout client 10800s
        timeout server 10800s
        option forwardfor
        option httpchk
        default-server port 9200 inter 2s downinter 5s fall 3 rise 2
        server aaa 1.1.1.1:8086 ssl verify none check
        server bbb 2.2.2.2:8086 ssl verify none check
...

Comments

Popular posts from this blog

FreeRadius and Google Workspace LDAP

pssh (parallel-ssh) problems on Debian 10 with Python 3.7