Spark Master and the communication process illustrated worker
Code Explanation
SparkMaster
functions: receiving registration of a worker, record keeping
heartbeat receiving worker 2, the recording time of the heartbeat
3 regular inspection worker survival (more than a predetermined time does not send heartbeats)
package cn.sheep.spark
import akka.actor.{Actor, ActorSystem, Props}
import com.typesafe.config.ConfigFactory
import scala.concurrent.duration._
class SparkMaster extends Actor{
// 存储worker的信息的HashMap,key存Worker的id,value是 WorkerInfo类,存id ,ip,端口
val id2WorkerInfo = collection.mutable.HashMap[String, WorkerInfo]()
// override def preStart(): Unit = {
// context.system.scheduler.schedule(0 millis, 6000 millis, self, RemoveTimeOutWorker)
// }
override def receive: Receive = {
// 收到worker注册过来的信息
case RegisterWorkerInfo(wkId, core, ram) => {
// 将worker的信息存储起来,存储到HashMap
if (!id2WorkerInfo.contains(wkId)) {
val workerInfo = new WorkerInfo(wkId, core, ram)
id2WorkerInfo += ((wkId, workerInfo))
// master存储完worker注册的数据之后,要告诉worker说你已经注册成功
sender() ! RegisteredWorkerInfo // 此时worker会收到注册成功消息
}
}
case HearBeat(wkId) => {
// master收到worker的心跳消息之后,更新woker的上一次心跳时间
val workerInfo = id2WorkerInfo(wkId)
// 更改心跳时间
val currentTime = System.currentTimeMillis()
workerInfo.lastHeartBeatTime = currentTime
}
//收到CheckTimeOutWorker,则使用调度器,定期给自己发送RemoveTimeOutWorker
case CheckTimeOutWorker => {
import context.dispatcher // 使用调度器时候必须导入dispatcher
context.system.scheduler.schedule(0 millis, 6000 millis, self, RemoveTimeOutWorker) //参数 :开始发送时间,发送的时间间隔,发送对象,发送内容
}
//每间隔一段时间收到RemoveTimeOutWorker,
case RemoveTimeOutWorker => {
// 将hashMap中的所有的value都拿出来,查看当前时间和上一次心跳时间的差 3000
val workerInfos = id2WorkerInfo.values //上一次心跳时间的差
val currentTime = System.currentTimeMillis() //当前时间
// 过滤超时的worker
workerInfos
.filter(wkInfo => currentTime - wkInfo.lastHeartBeatTime > 3000)
.foreach(wk => id2WorkerInfo.remove(wk.id))
//每次执行检查,都汇报 存活的Worker
println(s"-----还剩 ${id2WorkerInfo.size} 存活的Worker-----")
}
}
}
object SparkMaster {
private var name = ""
private val age = 100
def main(args: Array[String]): Unit = {
// 将参数ip、端口、mastername存在args中,检验参数
if(args.length != 3) {
println(
"""
|请输入参数:<host> <port> <masterName>
""".stripMargin)
sys.exit() // 退出程序
}
val host = args(0)
val port = args(1)
val masterName = args(2)
val config = ConfigFactory.parseString(
s"""
|akka.actor.provider="akka.remote.RemoteActorRefProvider"
|akka.remote.netty.tcp.hostname=$host
|akka.remote.netty.tcp.port=$port
""".stripMargin)
val actorSystem = ActorSystem("sparkMaster", config)
val masterActorRef = actorSystem.actorOf(Props[SparkMaster], masterName)
// 自己给自己发送一个消息,去启动一个定时器,定期的检测HashMap中超时的worker
//当Master启动时,给自己发送一条消息CheckTimeOutWorker
masterActorRef ! CheckTimeOutWorker
}
}
SparkWorker
Features: 1 Start the master registration information
2 periodically sends a heartbeat to the master
package cn.sheep.spark
import java.util.UUID
import akka.actor.{Actor, ActorSelection, ActorSystem, Props}
import com.typesafe.config.ConfigFactory
import scala.concurrent.duration._ // 导入时间单位
class SparkWorker(masterUrl: String) extends Actor{
// master的 actorRef
var masterProxy: ActorSelection = _
val workId = UUID.randomUUID().toString
override def preStart(): Unit = {
masterProxy = context.actorSelection(masterUrl)
}
override def receive: Receive = {
case "started" => { // 自己已就绪
// 向master注册自己的信息,id, core, ram,用RegisterWorkerInfo封装
masterProxy ! RegisterWorkerInfo(workId, 4, 32 * 1024) // 此时master会收到该条信息
}
case RegisteredWorkerInfo => { // master发送给自己的注册成功消息
// worker 启动一个定时器,定时给自己发送RegisteredWorkerInfo
import context.dispatcher
context.system.scheduler.schedule(0 millis, 1500 millis, self, SendHeartBeat)
}
case SendHeartBeat => {
// 定时收到消息,定时向master发送心跳了
println(s"------- $workId 发送心跳-------")
masterProxy ! HearBeat(workId) // 此时master将会收到心跳信息
}
}
}
object SparkWorker {
def main(args: Array[String]): Unit = {
// 检验参数
if(args.length != 4) {
println(
"""
|请输入参数:<host> <port> <workName> <masterURL>
""".stripMargin)
sys.exit() // 退出程序
}
val host = args(0)
val port = args(1)
val workName = args(2)
val masterURL = args(3)
val config = ConfigFactory.parseString(
s"""
|akka.actor.provider="akka.remote.RemoteActorRefProvider"
|akka.remote.netty.tcp.hostname=$host
|akka.remote.netty.tcp.port=$port
""".stripMargin)
val actorSystem = ActorSystem("sparkWorker", config)
// 创建自己的actorRef
val workerActorRef = actorSystem.actorOf(Props(new SparkWorker(masterURL)), workName)
// 给自己发送一个以启动的消息,标识自己已经就绪了
workerActorRef ! "started"
}
}
MessageProtocol
information specification
configured to encapsulate the data type and pattern matching
package cn.sheep.spark
/**
* worker -> master
*/
// worker向master注册自己(信息)
case class RegisterWorkerInfo(id: String, core: Int, ram: Int)
// worker给master发送心跳信息,只用id
case class HearBeat(id: String)
/**
* master -> worker
*/
// master向worker发送注册成功消息
case object RegisteredWorkerInfo
// worker 发送发送给自己的消息,告诉自己说要开始周期性的向master发送心跳消息
case object SendHeartBeat
//master自己给自己发送一个检查超时worker的信息,并启动一个调度器,周期新检测删除超时worker
case object CheckTimeOutWorker
// master发送给自己的消息,删除超时的worker
case object RemoveTimeOutWorker
// 存储worker信息的类
class WorkerInfo(val id: String, core: Int, ram: Int) {
var lastHeartBeatTime: Long = _
}