时间:2020-11-06 23:49:25 | 栏目:Python代码 | 点击:次
supervisor虽然也能拉起来logstash进程,但是有时候supervisor也会挂,也有时会拉不起,就算拉起来了也没有邮件告警功能 ,所以编写一个python脚本监控所有服务器,以下代码只列举了一台服务器,需要更多服务器在列表里面添加就行!
(日志采集过程中连续几天数据异常,由于服务器太多,当时不太想一一去将近40台服务器查看logstash进程,但又一直查不出原因,后来就每台服务器查看logstash进程,果然发现3台采集搜索日志的logstash进程没了,然后就编写了此脚本监控,效果很好)
# coding=utf-8 #!/usr/bin/env python _author_ = 'liuzc' import os import datetime import smtplib from email.mime.text import MIMEText from email.header import Header # 定义日志输出文件 log = "/home/data/liuzc/check_ps/logstash_process.log" ipAndUrlList = [ ["log@10.10.10.10","aiu-service-agent.conf"] ] def checkPS(): beginTime = datetime.datetime.now() print("开始检查进程。。。时间为:%s" % beginTime) for ipAndUrl in ipAndUrlList: ip = ipAndUrl[0] configName = ipAndUrl[1] alist = ip.split("@") username = alist[0] #将进程信息写入log文件 os.system("ssh %s ps -ef |grep %s |grep -v 'grep'> %s" %(ip,configName,log)) #判断文件大小,当logstash没有运行时上一步写入log的内容为空 if not(os.path.getsize(log)): # 重启进程 print("ip为%s,配置文件名称为 %s 不在进程中,正在重启进程......" %(ip,configName)) # os.system('ssh %s /home/%s/elk/logstash/bin/logstash -f %s &'%(ip,username,url)) os.system('ssh %s /home/%s/elk/logstash/bin/logstash -f /home/%s/elk/logstash/etc/%s &' % (ip, username,username, configName)) print("重启进程成功,开始发送邮件") # 发送邮件 sendMail(ip,configName) endTime = datetime.datetime.now() print("检查进程结束。。。时间为:%s" % endTime) print("=============================================================================") def sendMail(ip,configName): # 当前时间 now = datetime.datetime.now() # 第三方 SMTP 服务 mail_host = "mail.qq.com" # 设置服务器 mail_user = "aa@qq.com" # 用户名 mail_pass = "deretrgt4556h" # 口令 sender = 'aa@qq.com' receivers = ['bb@qq.com','cc@qq.com','dd@qq.com'] # 接收邮件,可设置为你的QQ邮箱或者其他邮箱 message = MIMEText('logstash进程异常,但已自动重启...服务器为 %s , 配置文件为 %s , 时间为 %s ' %(ip,configName,now), 'plain', 'utf-8') message['From'] = Header("logstash_ps", 'utf-8') message['To'] = Header("users", 'utf-8') subject = 'logstash进程异常告警!!!' message['Subject'] = Header(subject, 'utf-8') try: smtpObj = smtplib.SMTP() smtpObj.connect(mail_host, 25) # 25 为 SMTP 端口号 smtpObj.login(mail_user, mail_pass) smtpObj.sendmail(sender, receivers, message.as_string()) print ("邮件发送成功!!!") except smtplib.SMTPException: print("Error: 邮件发送失败。。。") if __name__ == '__main__': while True: try: checkPS() except Exception as e: print('Error:', e)