import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{DataFrame, SparkSession}/**
* @author 红尘丶世界
* @version v 1.0
* @date 2020.4.17
*/
object Test01 {
def main(args: Array[String]): Unit ={//创建sparkSession
val spark: SparkSession = SparkSession.builder().master("local[*]").appName("StructStreamingJson").getOrCreate()//创建 sqlContext
val sqlContext = spark.sqlContext
//设置日志级别
spark.sparkContext.setLogLevel("ERROR")//读取文件//创建元数据信息
val structType: StructType =newStructType().add("studentId","string").add("name","string").add("sex","string").add("clazzId","integer").add("score","integer")//读取数据
val df: DataFrame = sqlContext.read.schema(structType).csv("D:\\dev\\大数据\\大数据资料\\spark\\4.16号练习题50道2.0\\student_score").filter(_.get(0)!= null)
df.createOrReplaceTempView("student_scores")//统计出每个班级的最高分数
spark.sql("select clazzId,max(score) from student_scores group by clazzId order by clazzId").show()//统计出男生最高分
spark.sql("select clazzId,max(score) from student_scores where sex = '男' group by clazzId order by clazzId").show()//统计出女生最高分
spark.sql("select clazzId,max(score) from student_scores where sex = '女' group by clazzId order by clazzId").show()//分别统计出男生和女生的分数前三名
spark.sql("""
|select clazzId,score,rank()over(partition by clazzId order by score desc) rank from student_scores having rank <=3|""".stripMargin
).show()//统计出分数在500分以上的人数
spark.sql("""
|select count(score) from student_scores where score >500|""".stripMargin
).show()//统计出分数在300分以下的人中男女各占多少
spark.sql("""
|select count(score) from student_scores where score >300|""".stripMargin
).show()//关流
spark.sparkContext.stop()}}