python实现presto查询语句监控


presto作为OLAP引擎旨在解决快速的响应,然而有的开发或分析人员,从来不考虑语句是否需要大量资源,动辄查询上月,甚至上年的数据,占满了所有的presto集群资源,导致别的快速查询语句不能够执行。

鉴于presto的用途,所以需要对所有的查询语句加以监控,超过指定时间,就应该被kill掉,不能让一次大的查询占满集群资源。

#!/usr/bin/python
# -*- coding: UTF-8 -*-

#
#  python -u presto.py 2&>1 > presto.log
#


import urllib2
import json
import time
import sys

# 超过30s的会被kill掉
timeout = 30
# kill 提示语,会在presto自带监控页面看到
kill_data = """ query timeout, killed by God! """
# coordinator地址
coordinator = "http://coordinator:9002"


def checkQuery(dirct):
    status = dirct['state']
    if status == 'RUNNING':
        startTime = dirct['session']['startTime']
        now = int(round(time.time() * 1000))
        if now - startTime > 1000 * timeout:
            print 'killed %s' % dirct['queryId']
            print """ 

queryId: {queryId} 
pool: {pool} 
queryType: {type}
source: {source}
query: {query} 

            """.format(queryId=dirct['queryId'],
                       pool=dirct['memoryPool'],
                       type=dirct['queryType'],
                       source=dirct['session']['source'],
                       query=dirct['query'].encode('UTF-8')
                      )
            kill(dirct)



def kill(dirct):
    _self = dirct['self']
    request = urllib2.Request(_self.decode('UTF-8') + u'/killed', data=kill_data.decode('UTF-8'))
    request.add_header("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36")
    request.get_method = lambda: 'PUT'
    urllib2.urlopen(request)


def getExceptionQuerys():
    request = urllib2.Request(coordinator + "/v1/query")
    request.add_header("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36")
    response = urllib2.urlopen(request)
    queryJsonStr = response.read()
    queryJson = json.loads(queryJsonStr)
    return queryJson


def printException(dirct):
    print ''


if __name__ == '__main__':
    querys = getExceptionQuerys()
    for query in querys:
        checkQuery(query)

写文章

提问题

面试题