20.三个集合的小练习

三个集合的小练习

wordCount简单

/**
 * 对一个List进行wordCount,返回出现次数最多的三个单词,及其出现次数
 */
object WordCountSimple {
    def main(args: Array[String]): Unit = {
        // WordCount
        val list = List("Hello", "world", "Hello", "world", "Hello World", "Hello Hadoop", "Hello spark")
        
        // 将词组转换单词,1=>将一个String转换为一个字符数组,再将数组扁平化,可以直接使用flatMap()

        val list1: List[String] = list.flatMap(_.split(" "))
        // 将单词分组
        val stringToStrings: Map[String, List[String]] = list1.groupBy(s => s)
        // 将Map(hellp->list(hello,hello))转为Map(hello,2)
        val list2: List[(String, Int)] = stringToStrings.map(s => (s._1, s._2.length)).toList
        // 排序取前三
        list2.sortBy(_._2).reverse.take(3)
        
    }
}

wordtoCount复杂

方法一,同上

package cn.lpc
/**
 * 对一个List进行wordCount,返回出现次数最多的三个单词,及其出现次数
 * 不过list之间又是一个新的tuple
 */
object WordCountHard {
    def main(args: Array[String]): Unit = {
        val list = List(("Hello Hadoop Hive Kafka", 4), ("Hello Hadoop Hive", 3), ("Hello Hadoop", 2), ("Hello", 1))
        println(list.map(s => (s._1 + " ") * s._2).flatMap(s => s.split(" ")).groupBy(s => s).toList.map(s=>(s._1,s._2.size)).sortBy(_._2).reverse.take(3))
    }
}

方法二,

在map时也可以用函数做临时变量

object WordCountHard2 {
    def main(args: Array[String]): Unit = {
        val list = List(("Hello Hadoop Hive Kafka", 4), ("Hello Hadoop Hive", 3), ("Hello Hadoop", 2), ("Hello", 1))
        val tuples: List[(String, Int)] = list.flatMap(
            t => {
                val strings: Array[String] = t._1.split(" ")
                strings.map(s => (s, t._2))
            }
        )

        val stringToTuples: Map[String, List[(String, Int)]] = tuples.groupBy(s => s._1)

        val tuples1: List[(String, Int)] = stringToTuples.map(
            t => {
                (t._1, t._2.map(l => l._2))
            }
        ).map(m => (m._1, m._2.sum)).toList.sortBy(s => s._2).reverse.take(3)

        println(tuples1)
    }
}

两个map的合并

将两个map相同的key的value相加,没有的key新增

object MapUnion {
    def main(args: Array[String]): Unit = {
        val map = Map(("a", 1), ("b", 2), ("c", 3))
        val map1 = Map(("a", 4), ("d", 5), ("c", 6))

        val map3: Map[String, Int] = map1.foldLeft(map)((mapTemp, kv) => {
            val map2: Map[String, Int] = mapTemp.updated(kv._1, mapTemp.getOrElse(kv._1, 0) + kv._2)
            map2
        })
        println(map3)
    }
}
发布了118 篇原创文章 · 获赞 5 · 访问量 7169

猜你喜欢

转载自blog.csdn.net/resilienter/article/details/103784218