Go's elastic scaling management program of volcano cloud server based on rabbitmq

Author: wencoo
Blog:https://wencoo.blog.csdn.net/
Date: 18/04/2023
Details:

Background of the project

The project server is not enough, and an elastic scaling server is needed, and the elastic scaling function of the volcano is ready to be used. However, during the configuration process, it is found that the elastic scaling indicators are all monitored for the load of the local machine, etc., so as to achieve elastic scaling. My project uses a GPU server. When the program starts, the relevant GPU resources have been applied for and reached the maximum value. Even if there are no tasks, the resources are occupied. The source of the tasks is the middleware rabbitmq, so in this On-board monitoring is unreasonable.

project function

Because the rabbitmq used, the tasks are stored in the queue, first obtain the number of queue tasks, according to the number of queue tasks, according to the strategy, decide whether to create an elastic scaling server, after creation, wait for the task consumption to decrease, and then delete the previously created elastic scaling server.

module implementation

Create a configuration file configMq.json for configuring mq-related information, create a configuration file configECS.json for configuring server-related information, and create a configuration file configDB.json for configuring database-related information. The content here is very simple, and the entire database is not necessary. , so directly use the configuration file to solve

configMq.json

{
	"port": 5672,
	"hostName": "106.15.0.159",
	"passwd": "xxx",
	"userName": "admin",
	"maxCounts": 3000,
	"interval": 3000
}

configECS.json

{
	"ImageId":            "image-ycct00qocb4qwlkufab4",
	"InstanceName":       "instance-test",
	"InstanceTypeId":     "ecs.g1tl.4xlarge",
	"Password":           "xxx",	
	"ZoneId":             "cn-beijing-b"
}

configDB.json

{
	"min":2,
	"max":3,
	"base":[
		"i-yc9mljz3vo2ud9z787ky",
		"i-ycbpj0u2pz8rx7cc8n06"
	],
	"elasticity": [
	],
	"rule":1
}

Read related configuration information

//读取configDB.json
	jsonFile, err := os.Open("configDB.json")
	if err != nil {
		fmt.Println("error opening json file")
		return
	}
	defer jsonFile.Close()

	jsonData, err := ioutil.ReadAll(jsonFile)
	if err != nil {
		fmt.Println("error reading json file")
		return
	}
	var dbS DbConfStruct
	json.Unmarshal(jsonData, &dbS)
	fmt.Println(dbS.Max)

	//读取configECS.json
	jsonFile, err = os.Open("configECS.json")
	if err != nil {
		fmt.Println("error opening json file")
		return
	}
	defer jsonFile.Close()

	jsonData, err = ioutil.ReadAll(jsonFile)
	if err != nil {
		fmt.Println("error reading json file")
		return
	}
	var ecsS EcsConfStruct
	json.Unmarshal(jsonData, &ecsS)
	fmt.Println(ecsS.ImageId)

	//读取configMq.json
	jsonFile, err = os.Open("configMq.json")
	if err != nil {
		fmt.Println("error opening json file")
		return
	}
	defer jsonFile.Close()

	jsonData, err = ioutil.ReadAll(jsonFile)
	if err != nil {
		fmt.Println("error reading json file")
		return
	}
	var mqS MqConfStruct
	json.Unmarshal(jsonData, &mqS)
	fmt.Println(mqS.Port)

Get the number of tasks in the mq queue

//获取mq队列任务数量
		msgCounts := getQueueMsgCounts("material.analysis.new", "material.exchange",
			"material.analysis.new", mqS.HostName, mqS.UserName, mqS.Passwd, mqS.Port)

Create rules for auto-scaling machines and create new servers

baseCounts := len(dbS.Base)
		elasticityCounts := len(dbS.Elasticity)
		nextMaxCounts := mqS.MaxCounts + (elasticityCounts+1)*mqS.Interval

		if (baseCounts + elasticityCounts) >= dbS.Max {
			logger.Println("启动服务器数量已达到最大值...不再继续启动新服务器")
			goto Sleep
		}

		if msgCounts >= mqS.MaxCounts && msgCounts < nextMaxCounts {
			//资源申请
			serverIdList, err := RunInstances()
			var serverId *string
			if err == nil {
				if len(serverIdList) >= 1 {
					serverId = serverIdList[0]
					logger.Println("创建伸缩服务器成功,serverId:", serverId)
					fmt.Println("create server id:", serverId)
				}
			} else {
				logger.Println("创建伸缩服务器失败,err:", err.Error())
				goto Sleep
			}
			...

delete server resources

//资源释放
			elasticityCount := len(dbS.Elasticity)
			if 0 == elasticityCount {
				fmt.Println("没有伸缩资源,继续休眠")
				goto Sleep
			} else {
				eServerId := dbS.Elasticity[len(dbS.Elasticity)-1].EServerId
				DeleteInstance(eServerId)
				fmt.Println("删除伸缩资源:", eServerId)
			}

full code

package main

import (
	"encoding/json"
	"fmt"
	"io/ioutil"
	"log"
	"os"
	"time"
)

var logger *log.Logger

func init() {
	//指定路径的文件,无则创建
	logFile, err := os.OpenFile("./log.txt", os.O_RDWR|os.O_CREATE|os.O_APPEND|os.O_TRUNC, 0666)
	if err != nil {
		panic(err)
	}
	logger = log.New(logFile, "", log.Lshortfile|log.Lmicroseconds)
}

func main() {
	logger.Println("application start ...")

	//读取configDB.json
	jsonFile, err := os.Open("configDB.json")
	if err != nil {
		fmt.Println("error opening json file")
		return
	}
	defer jsonFile.Close()

	jsonData, err := ioutil.ReadAll(jsonFile)
	if err != nil {
		fmt.Println("error reading json file")
		return
	}
	var dbS DbConfStruct
	json.Unmarshal(jsonData, &dbS)
	fmt.Println(dbS.Max)

	//读取configECS.json
	jsonFile, err = os.Open("configECS.json")
	if err != nil {
		fmt.Println("error opening json file")
		return
	}
	defer jsonFile.Close()

	jsonData, err = ioutil.ReadAll(jsonFile)
	if err != nil {
		fmt.Println("error reading json file")
		return
	}
	var ecsS EcsConfStruct
	json.Unmarshal(jsonData, &ecsS)
	fmt.Println(ecsS.ImageId)

	//读取configMq.json
	jsonFile, err = os.Open("configMq.json")
	if err != nil {
		fmt.Println("error opening json file")
		return
	}
	defer jsonFile.Close()

	jsonData, err = ioutil.ReadAll(jsonFile)
	if err != nil {
		fmt.Println("error reading json file")
		return
	}
	var mqS MqConfStruct
	json.Unmarshal(jsonData, &mqS)
	fmt.Println(mqS.Port)

	for {
		//获取mq队列任务数量
		msgCounts := getQueueMsgCounts("material.analysis.new", "material.exchange",
			"material.analysis.new", mqS.HostName, mqS.UserName, mqS.Passwd, mqS.Port)

		fmt.Println("get queue counts is: ", msgCounts)

		baseCounts := len(dbS.Base)
		elasticityCounts := len(dbS.Elasticity)
		nextMaxCounts := mqS.MaxCounts + (elasticityCounts+1)*mqS.Interval

		if (baseCounts + elasticityCounts) >= dbS.Max {
			logger.Println("启动服务器数量已达到最大值...不再继续启动新服务器")
			goto Sleep
		}

		if msgCounts >= mqS.MaxCounts && msgCounts < nextMaxCounts {
			//资源申请
			serverIdList, err := RunInstances()
			var serverId *string
			if err == nil {
				if len(serverIdList) >= 1 {
					serverId = serverIdList[0]
					logger.Println("创建伸缩服务器成功,serverId:", serverId)
					fmt.Println("create server id:", serverId)
				}
			} else {
				logger.Println("创建伸缩服务器失败,err:", err.Error())
				goto Sleep
			}
			//写入json
			var eesId ElasticityMachine
			eesId.EServerId = *serverId
			dbS.Elasticity = append(dbS.Elasticity, eesId)

			jsonFile, err = os.Open("configDB.json")
			if err != nil {
				fmt.Println("error opening json file")
				return
			}
			defer jsonFile.Close()

			data, err := json.MarshalIndent(dbS, "", "	") // 第二个表示每行的前缀,这里不用,第三个是缩进符号,这里用tab
			if err != nil {
				panic(err)
			}

			err = ioutil.WriteFile("configDB.json", data, 0777)
			if err != nil {
				panic(err)
			}

		} else {
			fmt.Println("队列任务较少,尝试释放资源")
			//查询弹性设备创建时间,不用查,直接删除任意一个,反正值伸缩出来的服务
			// for _, serverId := range dbS.Elasticity {
			// 	sid := serverId.EServerId

			// }
			//资源释放
			elasticityCount := len(dbS.Elasticity)
			if 0 == elasticityCount {
				fmt.Println("没有伸缩资源,继续休眠")
				goto Sleep
			} else {
				eServerId := dbS.Elasticity[len(dbS.Elasticity)-1].EServerId
				DeleteInstance(eServerId)
				fmt.Println("删除伸缩资源:", eServerId)
			}
			//删除数据写入json
			dbS.Elasticity = dbS.Elasticity[:len(dbS.Elasticity)-1]
			jsonFile, err = os.Open("configDB.json")
			if err != nil {
				fmt.Println("error opening json file")
				return
			}
			defer jsonFile.Close()

			data, err := json.MarshalIndent(dbS, "", "   ") // 第二个表示每行的前缀,这里不用,第三个是缩进符号,这里用tab
			if err != nil {
				panic(err)
			}

			err = ioutil.WriteFile("configDB.json", data, 0777)
			if err != nil {
				panic(err)
			}
		}
	Sleep:
		time.Sleep(time.Second * 300)
	}

}

The project code has been uploaded to gitee, portal volcano_ecs_manage

reward

If this article is helpful to you, you can mark it a little, your support is the greatest support for originality.
insert image description here

Guess you like

Origin blog.csdn.net/bootleader/article/details/130221289