spark--开窗函数/窗口函数/排序函数/分析函数-★★

开窗函数

  • 1.row_number() over(partition by 列名 order by 列名) 别名 from 表名 //并列时123
  • 2.rank() over(partition by 列名 order by 列名) 别名 from 表名 //并列时113,留空位
  • 3.dense_rank() over(partition by 列名 order by 列名) 别名 from 表名 //并列时112,不留空位
package cn.hanjiaxiaozhi.sql

import org.apache.spark.SparkContext
import org.apache.spark.sql.SparkSession

/**
 * Author hanjiaxiaozhi
 * Date 2020/7/25 10:45
 * Desc 演示SparkSQL开窗函数/窗口函数/排序函数/分析函数
 */
object FunctionsDemo {
    
    
  case class Score(name: String, clazz: Int, score: Int)
  def main(args: Array[String]): Unit = {
    
    
    //0.准备环境和数据
    val spark: SparkSession = SparkSession.builder().appName("sql").master("local[*]").getOrCreate()
    val sc: SparkContext = spark.sparkContext
    sc.setLogLevel("WARN")
    import spark.implicits._

    val scoreDF = spark.sparkContext.makeRDD(Array(
      Score("a1", 1, 80),
      Score("a2", 1, 78),
      Score("a3", 1, 95),
      Score("a4", 2, 74),
      Score("a5", 2, 92),
      Score("a6", 3, 99),
      Score("a7", 3, 99),
      Score("a8", 3, 45),
      Score("a9", 3, 55),
      Score("a10", 3, 78),
      Score("a11", 3, 100))
    ).toDF("name", "class", "score")
    scoreDF.createOrReplaceTempView("scores")
    scoreDF.show()
    /*
     * +----+-----+-----+
     * |name|class|score|
     * +----+-----+-----+
     * |  a1|    1|   80|
     * |  a2|    1|   78|
     * |  a3|    1|   95|
     * |  a4|    2|   74|
     * |  a5|    2|   92|
     * |  a6|    3|   99|
     * |  a7|    3|   99|
     * |  a8|    3|   45|
     * |  a9|    3|   55|
     * | a10|    3|   78|
     * | a11|    3|  100|
     * +----+-----+-----+
     */
    //需求:统计每个学生在各个班级内部的分数排名
    //ROW_NUMBER顺序排序-如果数值一样,名次按顺序排列
    spark.sql("select name,class,score,row_number() over(partition by class order by score) num from scores").show()

    /*
     * +----+-----+-----+---+
     * |name|class|score|num|
     * +----+-----+-----+---+
     * |  a2|    1|   78|  1|
     * |  a1|    1|   80|  2|
     * |  a3|    1|   95|  3|
     * |  a8|    3|   45|  1|
     * |  a9|    3|   55|  2|
     * | a10|    3|   78|  3|
     *
     * |  a6|    3|   99|  4|
     * |  a7|    3|   99|  5|
     * | a11|    3|  100|  6|
     * 
     * |  a4|    2|   74|  1|
     * |  a5|    2|   92|  2|
     * +----+-----+-----+---+
     */

    //RANK跳跃排序
    spark.sql("select name,class,score,rank() over(partition by class order by score) num from scores").show()

    /**
     * +----+-----+-----+---+
     * |name|class|score|num|
     * +----+-----+-----+---+
     * |  a2|    1|   78|  1|
     * |  a1|    1|   80|  2|
     * |  a3|    1|   95|  3|
     * |  a8|    3|   45|  1|
     * |  a9|    3|   55|  2|
     * | a10|    3|   78|  3|
     *
     * |  a6|    3|   99|  4|
     * |  a7|    3|   99|  4|
     * | a11|    3|  100|  6|
     *
     * |  a4|    2|   74|  1|
     * |  a5|    2|   92|  2|
     * +----+-----+-----+---+
     *
     */


    //DENSE_RANK连续排序
    spark.sql("select name,class,score,dense_rank() over(partition by class order by score) num from scores").show()
    /*
     *+----+-----+-----+---+
     *|name|class|score|num|
     *+----+-----+-----+---+
     *|  a2|    1|   78|  1|
     *|  a1|    1|   80|  2|
     *|  a3|    1|   95|  3|
     *|  a8|    3|   45|  1|
     *|  a9|    3|   55|  2|
     *| a10|    3|   78|  3|

     *|  a6|    3|   99|  4|
     *|  a7|    3|   99|  4|
     *| a11|    3|  100|  5|

     *|  a4|    2|   74|  1|
     *|  a5|    2|   92|  2|
     *+----+-----+-----+---+
     *
     */
  }
}

猜你喜欢

转载自blog.csdn.net/qq_46893497/article/details/113926632