第一步:在我们的服务器上搭建rsyslog服务,开放端口,收集对方产生的日志。
第二步:编写Java服务,批量接收日志信息或文件,解析并储存。
第三步:编写python脚本,监控日志变化,提交增量日志到后台。异常情况处理:服务器访问出错,后台服务处理失败。
方案:
1.启动子进程调用linux工具tail监控日志变化,增量日志临时保存到队列中;
def __init__(self, server_url, upload_file_action, upload_msg_action, file, encoding, sendinterval, retryinterval): ... self.start_monitor_log() def start_monitor_log(self): popen = subprocess.Popen('tail -f ' + self.fileurl, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) pid = popen.pid print "tail pid:", pid while True: line = popen.stdout.readline().strip() if line: self.msgqueue.put(line)
2.启动消息发送线程,定期从队列中提取保存的日志并发送,服务器不可达或处理失败则备份日志到文件;
def __init__(self, server_url, upload_file_action, upload_msg_action, file, encoding, sendinterval, retryinterval): ... self.start_send_msg_task(self.sendinterval) def start_send_msg_task(self, interval): try: logger.info("start msg sendding task, interval:" + bytes(interval)) thread.start_new_thread(self.send_msg_task, ("msg_sending_thread", interval, )) except: logger.exception("start msg sending task failed") def send_msg_task(self, threadName, interval): while True: list = [] size = self.msgqueue.qsize() for i in range(0, size): list.append(base64.b64encode(self.msgqueue.get())) if len(list) > 0: msg = json.dumps(list) self.sendmsg(msg, self.MODE_NEW_MSG) time.sleep(interval)
3.服务器不可达和处理异常情况的备份保存在不同的文件夹waitback,backup;
服务不可达
def sendExCallback(self, datas, ex, mode): logger.exception("perform http request error, save to file") if mode == self.MODE_NEW_MSG: list = json.loads(datas[0][1][1]) msgs = [] for s in list: msgs.append(base64.b64decode(s)) self.savemsg(msgs, self.waitdir) elif mode == self.MODE_NEW_FILE: sourcefile = datas[0][1][1] self.savefile(sourcefile, self.waitdir) elif mode == self.MODE_EX_FILE: pass elif mode == self.MODE_ERR_FILE: sourcefile = datas[0][1][1] self.movefile(sourcefile, self.backup)
处理失败
def sendErrCallback(self, datas, httpcode, resp, mode): logger.error(resp) logger.error("upload error, try backup") if mode == self.MODE_NEW_MSG: self.savemsg(json.loads(datas[0][1][1]), self.backup) elif mode == self.MODE_NEW_FILE: sourcefile = datas[0][1][1] self.savefile(sourcefile, self.backup) elif mode == self.MODE_EX_FILE: sourcefile = datas[0][1][1] self.movefile(sourcefile, self.waitdir) elif mode == self.MODE_ERR_FILE: pass
4.启动线程监控waitback文件夹,定期检查,如果文件夹不为空,则尝试发送文件记录至后台服务
def __init__(self, server_url, upload_file_action, upload_msg_action, file, encoding, sendinterval, retryinterval): ... self.start_monitor_server(self.retryinterval) def start_monitor_server(self, interval): try: logger.info("start monitor server thread, interval:" + bytes(interval)) thread.start_new_thread(self.check_server_task, ("monitor_server_thread", interval, )) except: logger.exception("start monitor_server_thread failed") def check_server_task(self, threadName, interval): while True: list = [] for item in os.listdir(self.waitdir): item = os.path.join(self.waitdir, item) if os.path.isfile(item): list.append(item) if len(list) > 0: self.redo(list, self.MODE_EX_FILE) time.sleep(interval)
5.脚本启动时尝试发送处理失败的日志记录,通过线程处理
def __init__(self, server_url, upload_file_action, upload_msg_action, file, encoding, sendinterval, retryinterval): ... self.start_retry_error_task() def start_retry_error_task(self): try: logger.info("start error retry task") thread.start_new_thread(self.check_error, ("error_task_retry_thread", )) except: logger.exception("start error_task_retry_thread failed") def check_error(self, threadName): list = [] for item in os.listdir(self.backup): item = os.path.join(self.backup, item) if os.path.isfile(item): list.append(item) if len(list) > 0: self.redo(list, self.MODE_ERR_FILE)
【注】日志原始编码为gb2312,但是其中部分日志又存在乱码,因此暂时先使用base64编码,然后json编码,失败的时候还需要重新解码,然后保存到文件,效率较低