Problem
We will use the FileSystem and Path classes from the org.apache.hadoop.fs library to achieve it.
Spark 2.0 or higher
package com.bigdataetl import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.sql.SparkSession object Test extends App { val spark = SparkSession.builder // I set master to local[*], because I run it on my local computer. // I production mode master will be set from spark-submit command. .master("local[*]") .appName("BigDataETL - Check if file exists") .getOrCreate() // Create FileSystem object from Hadoop Configuration val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration) // This methods returns Boolean (true - if file exists, false - if file doesn't exist val fileExists = fs.exists(new Path("<parh_to_file>")) if (fileExists) println("File exists!") else println("File doesn't exist!") }
Since Spark 1.6 to 2.0
package com.bigdataetl import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.{SparkConf, SparkContext} object Test extends App { val sparkConf = new SparkConf().setAppName(s"BigDataETL - Check if file exists") val sc = new SparkContext(sparkConf) // Create FileSystem object from Hadoop Configuration val fs = FileSystem.get(sc.hadoopConfiguration) // This methods returns Boolean (true - if file exists, false - if file doesn't exist val fileExists = fs.exists(new Path("<parh_to_file>")) if (fileExists) println("File exists!") else println("File doesn't exist!") }
If you enjoyed this post please add the comment below or share this post on your Facebook, Twitter, LinkedIn or another social media webpage.
Thanks in advanced!