python 写一个监控另一个 程序中方法超时与否的服务

# monitor_service.py

import logging
from logging.handlers import TimedRotatingFileHandler
from flask import Flask, jsonify,request
import threading
import time
import os
# 创建一个TimedRotatingFileHandler对象,每天更换一次日志文件
log_handler = TimedRotatingFileHandler('monitor_service.log', when='midnight', interval=1, backupCount=7)
log_handler.suffix = "%Y-%m-%d"  # 设置文件后缀为年-月-日
log_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))

# 将handler添加到root logger
logging.getLogger().addHandler(log_handler)
logging.getLogger().setLevel(logging.INFO)
app = Flask(__name__)

# 存储被监控进程的字典,格式为 {进程ID: 最后成功时间戳}
monitored_processes = {
    
    }
# 设置超时时间(秒)
TIMEOUT = 3

@app.route('/register', methods=['POST'])
def register_process():
    data = request.json
    process_id = data['process_id']
    monitored_processes[process_id] = time.time()
    return jsonify({
    
    'status': 'success', 'message': 'Process registered successfully.'})

@app.route('/success', methods=['POST'])
def process_success():
    data = request.json
    process_id = data['process_id']
    timestamp = data['timestamp']
    monitored_processes[process_id] = timestamp
    return jsonify({
    
    'status': 'success', 'message': 'Process success reported.'})

def monitor():
    while True:
        current_time = time.time()
        for process_id in list(monitored_processes):
            if current_time - monitored_processes[process_id] > TIMEOUT:
                kill_process(process_id)
                monitored_processes.pop(process_id, None)  # 从监控列表中移除
        time.sleep(1)  # 检查间隔时间

def kill_process(process_id):
    try:
        os.kill(process_id, -9)  # 发送SIGKILL信号杀死进程
        logging.info(f"Process {
      
      process_id} has been killed due to timeout.")
    except OSError as e:
        logging.error(f"Error killing process {
      
      process_id}: {
      
      e}")

# 启动监控线程
monitor_thread = threading.Thread(target=monitor)
monitor_thread.daemon = True
monitor_thread.start()

if __name__ == '__main__':
    app.run(debug=True)

import requests
import time
import os

def monitored_method():
    # Here is the method that is being monitored
    print("执行被监控的方法")
    time.sleep(11)  # Simulating the execution time of the method
    return True

def register_process(process_id):
    # Register the process with the monitoring service
    try:
        response = requests.post('http://127.0.0.1:5000/register', json={
    
    'process_id': process_id})
        response.raise_for_status()  # Raises an HTTPError for bad responses
    except requests.RequestException as e:
        print(f"Failed to register process: {
      
      e}")

def report_success(process_id):
    # Report the successful execution to the monitoring service
    try:
        response = requests.post('http://127.0.0.1:5000/success', json={
    
    'process_id': process_id, 'timestamp': time.time()})
        response.raise_for_status()
    except requests.RequestException as e:
        print(f"Failed to report success: {
      
      e}")

def report_failure(process_id, error):
    # Report the failure to the monitoring service
    try:
        response = requests.post('http://127.0.0.1:5000/failure', json={
    
    'process_id': process_id, 'error': str(error), 'timestamp': time.time()})
        response.raise_for_status()
    except requests.RequestException as e:
        print(f"Failed to report failure: {
      
      e}")

if __name__ == '__main__':
    process_id = os.getpid()

    while True:
        try:
            register_process(process_id)
            if monitored_method():
                report_success(process_id)
            else:
                report_failure(process_id, "monitored_method returned False")
        except Exception as e:
            print(f"An error occurred: {
      
      e}")
            report_failure(process_id, e)

        # Wait for a specified interval before the next execution
        time.sleep(10)

猜你喜欢

转载自blog.csdn.net/weixin_32759777/article/details/143029350