Apache Spark window functions and User defined function example:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.sps.test | |
import org.apache.spark.sql.SparkSession | |
import org.apache.spark.sql.functions._ | |
import org.apache.log4j._ | |
object WindowFunEx extends App{ | |
val spark = SparkSession | |
.builder | |
.appName("SaprkSqlTest") | |
.master("local[*]") | |
.getOrCreate() | |
import spark.implicits._ | |
Logger.getLogger("org").setLevel(Level.ERROR) | |
case class Employee(depName: String, empNo: Long, salary: Long,fName:String,lName:String) | |
val empsalary = Seq( | |
Employee("sales", 1, 5000,"Shemeem","SP"), | |
Employee("personnel", 2, 3900,"Steve","smith"), | |
Employee("sales", 3, 4800,"Scahin","tendulkar"), | |
Employee("sales", 4, 4800,"john","bbbbb"), | |
Employee("personnel", 5, 3500,"eeee","dddd"), | |
Employee("develop", 7, 4200,"bbbb","ddd"), | |
Employee("develop", 8, 6000,"cccc","dddy"), | |
Employee("develop", 9, 4500,"uuuu","yhgg"), | |
Employee("develop", 10, 5200,"llll","thffg"), | |
Employee("develop", 11, 5200,"kkk","tyngjh")).toDS | |
import org.apache.spark.sql.expressions.Window | |
//creating a window based on department. | |
val byDepName = Window.partitionBy('depName) | |
//average salary of by department | |
empsalary.withColumn("avg", avg('salary) over byDepName).show | |
//maximum salary by department. | |
empsalary.withColumn("Max Salary In Dept", max('salary) over byDepName).select("depname","Max Salary In Dept").distinct().show | |
//salary rank based on department. | |
val byDepnameSalaryDesc = Window.partitionBy('depname).orderBy('salary desc) | |
val rankByDepname = rank().over(byDepnameSalaryDesc) | |
empsalary.select('*, rankByDepname as 'rank).show | |
//User defined function to concat names. | |
val concatName = (fName:String,lName:String) => { | |
fName+" "+lName | |
} | |
import org.apache.spark.sql.functions.udf | |
val concatN = udf(concatName) | |
empsalary.withColumn("fullName", concatN('fName,'lName)).show | |
} |
This comment has been removed by the author.
ReplyDeleteThank you.Well it was nice post and very helpful information on
ReplyDeleteBig Data Hadoop Online Course