Friday, 17 November 2017

Apache Spark window functions and User defined function example.

Apache Spark window functions and User defined function example.
Apache Spark window functions and User defined function example:
package com.sps.test
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions._
import org.apache.log4j._
object WindowFunEx extends App{
val spark = SparkSession
.builder
.appName("SaprkSqlTest")
.master("local[*]")
.getOrCreate()
import spark.implicits._
Logger.getLogger("org").setLevel(Level.ERROR)
case class Employee(depName: String, empNo: Long, salary: Long,fName:String,lName:String)
val empsalary = Seq(
Employee("sales", 1, 5000,"Shemeem","SP"),
Employee("personnel", 2, 3900,"Steve","smith"),
Employee("sales", 3, 4800,"Scahin","tendulkar"),
Employee("sales", 4, 4800,"john","bbbbb"),
Employee("personnel", 5, 3500,"eeee","dddd"),
Employee("develop", 7, 4200,"bbbb","ddd"),
Employee("develop", 8, 6000,"cccc","dddy"),
Employee("develop", 9, 4500,"uuuu","yhgg"),
Employee("develop", 10, 5200,"llll","thffg"),
Employee("develop", 11, 5200,"kkk","tyngjh")).toDS
import org.apache.spark.sql.expressions.Window
//creating a window based on department.
val byDepName = Window.partitionBy('depName)
//average salary of by department
empsalary.withColumn("avg", avg('salary) over byDepName).show
//maximum salary by department.
empsalary.withColumn("Max Salary In Dept", max('salary) over byDepName).select("depname","Max Salary In Dept").distinct().show
//salary rank based on department.
val byDepnameSalaryDesc = Window.partitionBy('depname).orderBy('salary desc)
val rankByDepname = rank().over(byDepnameSalaryDesc)
empsalary.select('*, rankByDepname as 'rank).show
//User defined function to concat names.
val concatName = (fName:String,lName:String) => {
fName+" "+lName
}
import org.apache.spark.sql.functions.udf
val concatN = udf(concatName)
empsalary.withColumn("fullName", concatN('fName,'lName)).show
}

2 comments:

Python and packages offline Installation in ubuntu machine.

https://www.linkedin.com/pulse/python-packages-offline-installation-ubuntu-machine-sijo-jose/