import java.io.{File, FileWriter, PrintWriter}
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
import scala.collection.mutable
import scala.util.parsing.json.JSON
/**
* @author shkstart
* @create 2020-08-11 22:34
*/
object demo {
def main(args: Array[String]): Unit = {
fun() // 进行添加操作
// 在驱动端分发任务给执行器, 然后将执行器的结果拉到驱动端
val conf: SparkConf = new SparkConf().setMaster("local[2]").setAppName("CreateRDD")
val sc = new SparkContext(conf)
val rdd1: RDD[String] = sc.textFile("data.json")
import scala.util.parsing.json.JSON
val rdd2: RDD[Option[Any]] = rdd1.map(JSON.parseFull)
var rdd3: RDD[Map[String, Any]] = rdd2.map {
case Some(x) => x.asInstanceOf[Map[String, Any]]
}
import scala.io.Source
val lines: List[String] = Source.fromFile("next.txt").getLines().toList
val score: mutable.HashMap[String, Double] = new mutable.HashMap[String, Double]()
for (line <- lines) {
var max: Double = Double.MinValue
var m: String = null
val words: Array[String] = line.split(",")
if (words(0).length != 0) {
val wBUffer: mutable.Buffer[String] = words.toBuffer
if (!score.contains(wBUffer(1))) {
score.put(wBUffer(1), 100)
} else {
score.put(wBUffer(1), score.get(wBUffer(1)).get - 10.0)
}
wBUffer.append(score.get(wBUffer(1)).get + "")
rdd3 = rdd3.map {
case map: Map[String, Any] if map.get("deviceId").get.asInstanceOf[String].equals(wBUffer(1)) => {
// // 需要重新分装的列表
val temp: mutable.LinkedHashMap[String, Any] = mutable.LinkedHashMap[String, Any]()
temp.put("deviceId", wBUffer(1))
// 原先的用户列表
var ll: List[Map[String, Any]] = map.get("lst").get.asInstanceOf[List[Map[String, Any]]]
val tempMap: mutable.Buffer[Map[String, Any]] = ll.toBuffer // 单个lst
val curr: mutable.Buffer[Map[String, Any]] = new mutable.ListBuffer[Map[String, Any]]()
if (tempMap.length == 0) {
temp.put("lst", List(Map(("uid", wBUffer(0)), ("ts", wBUffer(2).toDouble), ("score", wBUffer(3).toDouble))))
if (map.get("guid") != None) {
if(map.get("guid").get.asInstanceOf[String].equals(" ")){
temp.put("guid", wBUffer(0))
}else{
temp.put("guid", map.get("guid").get)
}
}
} else {
var flag: Boolean = false
for (i <- 0 until tempMap.length) { // 每个用户
if (tempMap(i).get("uid").get.equals(wBUffer(0))) {
val map1: mutable.Map[String, Any] = scala.collection.mutable.Map(tempMap(i).toSeq: _*)
map1.put("ts", wBUffer(2))
var value: Double = 0.0
if (tempMap(i).get("score").get.isInstanceOf[String]) {
value = tempMap(i).get("score").get.asInstanceOf[String].toDouble
} else {
value = tempMap(i).get("score").get.asInstanceOf[Double]
}
map1.put("score", wBUffer(3).toDouble + value)
curr.append(map1.toMap)
flag = true
} else {
curr.append(tempMap(i))
}
if (max < curr(i).get("score").get.asInstanceOf[Double]) {
max = curr(i).get("score").get.asInstanceOf[Double]
m = curr(i).get("uid").get.asInstanceOf[String]
}
}
if (!flag) {
curr.append(Map(("uid", wBUffer(0)), ("ts", wBUffer(2)), ("score", wBUffer(3))))
}
temp.put("lst", curr.toList)
if (map.get("guid").get.asInstanceOf[String].charAt(0) == 'd') {
temp.put("guid", map.get("guid").get)
}
if (!temp.contains("guid")) {
temp.put("guid", m)
}
}
temp.toMap
}
case map: Map[String, Any] => map
}
}
}
rdd3.collect().foreach(println)
}
def fun(): Unit = {
import scala.io.Source
val strings: Iterator[String] = Source.fromFile("data.json").getLines()
val so: mutable.HashSet[String] = mutable.HashSet[String]() // 一开始json文件
while (strings.hasNext) {
val option: Option[Any] = JSON.parseFull(strings.next())
option match {
case Some(label: Map[String, Any]) => {
so.add(label.get("deviceId").get.asInstanceOf[String])
}
}
}
val writer: FileWriter = new FileWriter(new File("data.json"), true)
val strings1: Iterator[String] = Source.fromFile("next.txt").getLines()
while (strings1.hasNext) {
val str: Array[String] = strings1.next().split(",")
if (!so.contains(str(1))) {
var s : String = "{\"deviceId\":\"" + str(1) + "\",\"lst\":[], \"guid\":\" \"}"
println(s)
writer.write("\n" + s);
so.add(str(1))
}
}
writer.close()
}
}
pom依赖
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.1.1</version>
</dependency>
</dependencies>