From ae1f54aa0ed69f9daa1f32766ca234bda9320452 Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Wed, 23 Dec 2015 13:24:06 -0800 Subject: [PATCH 1/6] [SPARK-12500][CORE] Fix Tachyon deprecations; pull Tachyon dependency into one class Fix Tachyon deprecations; pull Tachyon dependency into `TachyonBlockManager` only CC calvinjia as I probably need a double-check that the usage of the new API is correct. Author: Sean Owen Closes #10449 from srowen/SPARK-12500. --- .../spark/storage/TachyonBlockManager.scala | 135 ++++++++++++++---- .../spark/util/ShutdownHookManager.scala | 42 ------ .../scala/org/apache/spark/util/Utils.scala | 11 -- 3 files changed, 104 insertions(+), 84 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala index d14fe4613528a..7f88f2fe6d503 100644 --- a/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala @@ -26,13 +26,17 @@ import scala.util.control.NonFatal import com.google.common.io.ByteStreams -import tachyon.client.{ReadType, WriteType, TachyonFS, TachyonFile} +import tachyon.{Constants, TachyonURI} +import tachyon.client.ClientContext +import tachyon.client.file.{TachyonFile, TachyonFileSystem} +import tachyon.client.file.TachyonFileSystem.TachyonFileSystemFactory +import tachyon.client.file.options.DeleteOptions import tachyon.conf.TachyonConf -import tachyon.TachyonURI +import tachyon.exception.{FileAlreadyExistsException, FileDoesNotExistException} import org.apache.spark.Logging import org.apache.spark.executor.ExecutorExitCode -import org.apache.spark.util.{ShutdownHookManager, Utils} +import org.apache.spark.util.Utils /** @@ -44,15 +48,15 @@ private[spark] class TachyonBlockManager() extends ExternalBlockManager with Log var rootDirs: String = _ var master: String = _ - var client: tachyon.client.TachyonFS = _ + var client: TachyonFileSystem = _ private var subDirsPerTachyonDir: Int = _ // Create one Tachyon directory for each path mentioned in spark.tachyonStore.folderName; // then, inside this directory, create multiple subdirectories that we will hash files into, // in order to avoid having really large inodes at the top level in Tachyon. private var tachyonDirs: Array[TachyonFile] = _ - private var subDirs: Array[Array[tachyon.client.TachyonFile]] = _ - + private var subDirs: Array[Array[TachyonFile]] = _ + private val shutdownDeleteTachyonPaths = new scala.collection.mutable.HashSet[String]() override def init(blockManager: BlockManager, executorId: String): Unit = { super.init(blockManager, executorId) @@ -62,7 +66,10 @@ private[spark] class TachyonBlockManager() extends ExternalBlockManager with Log rootDirs = s"$storeDir/$appFolderName/$executorId" master = blockManager.conf.get(ExternalBlockStore.MASTER_URL, "tachyon://localhost:19998") client = if (master != null && master != "") { - TachyonFS.get(new TachyonURI(master), new TachyonConf()) + val tachyonConf = new TachyonConf() + tachyonConf.set(Constants.MASTER_ADDRESS, master) + ClientContext.reset(tachyonConf) + TachyonFileSystemFactory.get } else { null } @@ -80,7 +87,7 @@ private[spark] class TachyonBlockManager() extends ExternalBlockManager with Log // in order to avoid having really large inodes at the top level in Tachyon. tachyonDirs = createTachyonDirs() subDirs = Array.fill(tachyonDirs.length)(new Array[TachyonFile](subDirsPerTachyonDir)) - tachyonDirs.foreach(tachyonDir => ShutdownHookManager.registerShutdownDeleteDir(tachyonDir)) + tachyonDirs.foreach(registerShutdownDeleteDir) } override def toString: String = {"ExternalBlockStore-Tachyon"} @@ -89,6 +96,7 @@ private[spark] class TachyonBlockManager() extends ExternalBlockManager with Log val file = getFile(blockId) if (fileExists(file)) { removeFile(file) + true } else { false } @@ -101,7 +109,7 @@ private[spark] class TachyonBlockManager() extends ExternalBlockManager with Log override def putBytes(blockId: BlockId, bytes: ByteBuffer): Unit = { val file = getFile(blockId) - val os = file.getOutStream(WriteType.TRY_CACHE) + val os = client.getOutStream(new TachyonURI(client.getInfo(file).getPath)) try { Utils.writeByteBuffer(bytes, os) } catch { @@ -115,7 +123,7 @@ private[spark] class TachyonBlockManager() extends ExternalBlockManager with Log override def putValues(blockId: BlockId, values: Iterator[_]): Unit = { val file = getFile(blockId) - val os = file.getOutStream(WriteType.TRY_CACHE) + val os = client.getOutStream(new TachyonURI(client.getInfo(file).getPath)) try { blockManager.dataSerializeStream(blockId, os, values) } catch { @@ -129,12 +137,17 @@ private[spark] class TachyonBlockManager() extends ExternalBlockManager with Log override def getBytes(blockId: BlockId): Option[ByteBuffer] = { val file = getFile(blockId) - if (file == null || file.getLocationHosts.size == 0) { + if (file == null) { return None } - val is = file.getInStream(ReadType.CACHE) + val is = try { + client.getInStream(file) + } catch { + case _: FileDoesNotExistException => + return None + } try { - val size = file.length + val size = client.getInfo(file).length val bs = new Array[Byte](size.asInstanceOf[Int]) ByteStreams.readFully(is, bs) Some(ByteBuffer.wrap(bs)) @@ -149,25 +162,37 @@ private[spark] class TachyonBlockManager() extends ExternalBlockManager with Log override def getValues(blockId: BlockId): Option[Iterator[_]] = { val file = getFile(blockId) - if (file == null || file.getLocationHosts().size() == 0) { + if (file == null) { return None } - val is = file.getInStream(ReadType.CACHE) - Option(is).map { is => - blockManager.dataDeserializeStream(blockId, is) + val is = try { + client.getInStream(file) + } catch { + case _: FileDoesNotExistException => + return None + } + try { + Some(blockManager.dataDeserializeStream(blockId, is)) + } finally { + is.close() } } override def getSize(blockId: BlockId): Long = { - getFile(blockId.name).length + client.getInfo(getFile(blockId.name)).length } - def removeFile(file: TachyonFile): Boolean = { - client.delete(new TachyonURI(file.getPath()), false) + def removeFile(file: TachyonFile): Unit = { + client.delete(file) } def fileExists(file: TachyonFile): Boolean = { - client.exist(new TachyonURI(file.getPath())) + try { + client.getInfo(file) + true + } catch { + case _: FileDoesNotExistException => false + } } def getFile(filename: String): TachyonFile = { @@ -186,18 +211,18 @@ private[spark] class TachyonBlockManager() extends ExternalBlockManager with Log } else { val path = new TachyonURI(s"${tachyonDirs(dirId)}/${"%02x".format(subDirId)}") client.mkdir(path) - val newDir = client.getFile(path) + val newDir = client.loadMetadata(path) subDirs(dirId)(subDirId) = newDir newDir } } } val filePath = new TachyonURI(s"$subDir/$filename") - if(!client.exist(filePath)) { - client.createFile(filePath) + try { + client.create(filePath) + } catch { + case _: FileAlreadyExistsException => client.loadMetadata(filePath) } - val file = client.getFile(filePath) - file } def getFile(blockId: BlockId): TachyonFile = getFile(blockId.name) @@ -217,9 +242,11 @@ private[spark] class TachyonBlockManager() extends ExternalBlockManager with Log try { tachyonDirId = "%s-%04x".format(dateFormat.format(new Date), rand.nextInt(65536)) val path = new TachyonURI(s"$rootDir/spark-tachyon-$tachyonDirId") - if (!client.exist(path)) { + try { foundLocalDir = client.mkdir(path) - tachyonDir = client.getFile(path) + tachyonDir = client.loadMetadata(path) + } catch { + case _: FileAlreadyExistsException => // continue } } catch { case NonFatal(e) => @@ -240,14 +267,60 @@ private[spark] class TachyonBlockManager() extends ExternalBlockManager with Log logDebug("Shutdown hook called") tachyonDirs.foreach { tachyonDir => try { - if (!ShutdownHookManager.hasRootAsShutdownDeleteDir(tachyonDir)) { - Utils.deleteRecursively(tachyonDir, client) + if (!hasRootAsShutdownDeleteDir(tachyonDir)) { + deleteRecursively(tachyonDir, client) } } catch { case NonFatal(e) => logError("Exception while deleting tachyon spark dir: " + tachyonDir, e) } } - client.close() } + + /** + * Delete a file or directory and its contents recursively. + */ + private def deleteRecursively(dir: TachyonFile, client: TachyonFileSystem) { + client.delete(dir, new DeleteOptions.Builder(ClientContext.getConf).setRecursive(true).build()) + } + + // Register the tachyon path to be deleted via shutdown hook + private def registerShutdownDeleteDir(file: TachyonFile) { + val absolutePath = client.getInfo(file).getPath + shutdownDeleteTachyonPaths.synchronized { + shutdownDeleteTachyonPaths += absolutePath + } + } + + // Remove the tachyon path to be deleted via shutdown hook + private def removeShutdownDeleteDir(file: TachyonFile) { + val absolutePath = client.getInfo(file).getPath + shutdownDeleteTachyonPaths.synchronized { + shutdownDeleteTachyonPaths -= absolutePath + } + } + + // Is the path already registered to be deleted via a shutdown hook ? + private def hasShutdownDeleteTachyonDir(file: TachyonFile): Boolean = { + val absolutePath = client.getInfo(file).getPath + shutdownDeleteTachyonPaths.synchronized { + shutdownDeleteTachyonPaths.contains(absolutePath) + } + } + + // Note: if file is child of some registered path, while not equal to it, then return true; + // else false. This is to ensure that two shutdown hooks do not try to delete each others + // paths - resulting in Exception and incomplete cleanup. + private def hasRootAsShutdownDeleteDir(file: TachyonFile): Boolean = { + val absolutePath = client.getInfo(file).getPath + val hasRoot = shutdownDeleteTachyonPaths.synchronized { + shutdownDeleteTachyonPaths.exists( + path => !absolutePath.equals(path) && absolutePath.startsWith(path)) + } + if (hasRoot) { + logInfo(s"path = $absolutePath, already present as root for deletion.") + } + hasRoot + } + } diff --git a/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala b/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala index 1a0f3b477ba3f..0065b1fc660b0 100644 --- a/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala +++ b/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala @@ -21,7 +21,6 @@ import java.io.File import java.util.PriorityQueue import scala.util.{Failure, Success, Try} -import tachyon.client.TachyonFile import org.apache.hadoop.fs.FileSystem import org.apache.spark.Logging @@ -52,7 +51,6 @@ private[spark] object ShutdownHookManager extends Logging { } private val shutdownDeletePaths = new scala.collection.mutable.HashSet[String]() - private val shutdownDeleteTachyonPaths = new scala.collection.mutable.HashSet[String]() // Add a shutdown hook to delete the temp dirs when the JVM exits addShutdownHook(TEMP_DIR_SHUTDOWN_PRIORITY) { () => @@ -77,14 +75,6 @@ private[spark] object ShutdownHookManager extends Logging { } } - // Register the tachyon path to be deleted via shutdown hook - def registerShutdownDeleteDir(tachyonfile: TachyonFile) { - val absolutePath = tachyonfile.getPath() - shutdownDeleteTachyonPaths.synchronized { - shutdownDeleteTachyonPaths += absolutePath - } - } - // Remove the path to be deleted via shutdown hook def removeShutdownDeleteDir(file: File) { val absolutePath = file.getAbsolutePath() @@ -93,14 +83,6 @@ private[spark] object ShutdownHookManager extends Logging { } } - // Remove the tachyon path to be deleted via shutdown hook - def removeShutdownDeleteDir(tachyonfile: TachyonFile) { - val absolutePath = tachyonfile.getPath() - shutdownDeleteTachyonPaths.synchronized { - shutdownDeleteTachyonPaths.remove(absolutePath) - } - } - // Is the path already registered to be deleted via a shutdown hook ? def hasShutdownDeleteDir(file: File): Boolean = { val absolutePath = file.getAbsolutePath() @@ -109,14 +91,6 @@ private[spark] object ShutdownHookManager extends Logging { } } - // Is the path already registered to be deleted via a shutdown hook ? - def hasShutdownDeleteTachyonDir(file: TachyonFile): Boolean = { - val absolutePath = file.getPath() - shutdownDeleteTachyonPaths.synchronized { - shutdownDeleteTachyonPaths.contains(absolutePath) - } - } - // Note: if file is child of some registered path, while not equal to it, then return true; // else false. This is to ensure that two shutdown hooks do not try to delete each others // paths - resulting in IOException and incomplete cleanup. @@ -133,22 +107,6 @@ private[spark] object ShutdownHookManager extends Logging { retval } - // Note: if file is child of some registered path, while not equal to it, then return true; - // else false. This is to ensure that two shutdown hooks do not try to delete each others - // paths - resulting in Exception and incomplete cleanup. - def hasRootAsShutdownDeleteDir(file: TachyonFile): Boolean = { - val absolutePath = file.getPath() - val retval = shutdownDeleteTachyonPaths.synchronized { - shutdownDeleteTachyonPaths.exists { path => - !absolutePath.equals(path) && absolutePath.startsWith(path) - } - } - if (retval) { - logInfo("path = " + file + ", already present as root for deletion.") - } - retval - } - /** * Detect whether this thread might be executing a shutdown hook. Will always return true if * the current thread is a running a shutdown hook but may spuriously return true otherwise (e.g. diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 1a07f7ca7eaf5..b8ca6b07e4198 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -44,8 +44,6 @@ import org.apache.log4j.PropertyConfigurator import org.eclipse.jetty.util.MultiException import org.json4s._ import org.slf4j.Logger -import tachyon.TachyonURI -import tachyon.client.{TachyonFS, TachyonFile} import org.apache.spark._ import org.apache.spark.deploy.SparkHadoopUtil @@ -946,15 +944,6 @@ private[spark] object Utils extends Logging { } } - /** - * Delete a file or directory and its contents recursively. - */ - def deleteRecursively(dir: TachyonFile, client: TachyonFS) { - if (!client.delete(new TachyonURI(dir.getPath()), true)) { - throw new IOException("Failed to delete the tachyon dir: " + dir) - } - } - /** * Check to see if file is a symbolic link. */ From ead6abf7e7fc14b451214951d4991d497aa65e63 Mon Sep 17 00:00:00 2001 From: Adrian Bridgett Date: Wed, 23 Dec 2015 16:00:03 -0800 Subject: [PATCH 2/6] [SPARK-12499][BUILD] don't force MAVEN_OPTS allow the user to override MAVEN_OPTS (2GB wasn't sufficient for me) Author: Adrian Bridgett Closes #10448 from abridgett/feature/do_not_force_maven_opts. --- make-distribution.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/make-distribution.sh b/make-distribution.sh index 351b9e7d89a32..a38fd8df17206 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -159,7 +159,7 @@ fi # Build uber fat JAR cd "$SPARK_HOME" -export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m" +export MAVEN_OPTS="${MAVEN_OPTS:--Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m}" # Store the command as an array because $MVN variable might have spaces in it. # Normal quoting tricks don't work. From 9e85bb71ad2d7d3a9da0cb8853f3216d37e6ff47 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Thu, 24 Dec 2015 21:27:55 +0900 Subject: [PATCH 3/6] [SPARK-12502][BUILD][PYTHON] Script /dev/run-tests fails when IBM Java is used fix an exception with IBM JDK by removing update field from a JavaVersion tuple. This is because IBM JDK does not have information on update '_xx' Author: Kazuaki Ishizaki Closes #10463 from kiszk/SPARK-12502. --- dev/run-tests.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dev/run-tests.py b/dev/run-tests.py index 17ceba052b8cd..6129f87cf8503 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -148,7 +148,7 @@ def determine_java_executable(): return java_exe if java_exe else which("java") -JavaVersion = namedtuple('JavaVersion', ['major', 'minor', 'patch', 'update']) +JavaVersion = namedtuple('JavaVersion', ['major', 'minor', 'patch']) def determine_java_version(java_exe): @@ -164,14 +164,13 @@ def determine_java_version(java_exe): # find raw version string, eg 'java version "1.8.0_25"' raw_version_str = next(x for x in raw_output_lines if " version " in x) - match = re.search('(\d+)\.(\d+)\.(\d+)_(\d+)', raw_version_str) + match = re.search('(\d+)\.(\d+)\.(\d+)', raw_version_str) major = int(match.group(1)) minor = int(match.group(2)) patch = int(match.group(3)) - update = int(match.group(4)) - return JavaVersion(major, minor, patch, update) + return JavaVersion(major, minor, patch) # ------------------------------------------------------------------------------------------------- # Functions for running the other build and test scripts From 392046611837a3a740ff97fa8177ca7c12316fb7 Mon Sep 17 00:00:00 2001 From: Kazuaki Ishizaki Date: Thu, 24 Dec 2015 13:37:28 +0000 Subject: [PATCH 4/6] [SPARK-12311][CORE] Restore previous value of "os.arch" property in test suites after forcing to set specific value to "os.arch" property Restore the original value of os.arch property after each test Since some of tests forced to set the specific value to os.arch property, we need to set the original value. Author: Kazuaki Ishizaki Closes #10289 from kiszk/SPARK-12311. --- .../sort/IndexShuffleBlockResolverSuite.scala | 7 +++- .../org/apache/spark/CheckpointSuite.scala | 7 ++-- .../spark/ExternalShuffleServiceSuite.scala | 7 +++- .../org/apache/spark/FileServerSuite.scala | 7 ++-- .../scala/org/apache/spark/FileSuite.scala | 7 ++-- .../org/apache/spark/HashShuffleSuite.scala | 1 + .../apache/spark/HeartbeatReceiverSuite.scala | 1 + .../apache/spark/JobCancellationSuite.scala | 7 ++-- .../org/apache/spark/LocalSparkContext.scala | 9 +++-- .../apache/spark/SecurityManagerSuite.scala | 4 +-- .../org/apache/spark/SharedSparkContext.scala | 11 +++--- .../org/apache/spark/ShuffleNettySuite.scala | 1 + .../org/apache/spark/SortShuffleSuite.scala | 2 ++ .../StandaloneDynamicAllocationSuite.scala | 21 ++++++----- .../spark/deploy/client/AppClientSuite.scala | 21 ++++++----- .../deploy/history/HistoryServerSuite.scala | 3 +- .../rest/StandaloneRestSubmitSuite.scala | 8 +++-- .../WholeTextFileRecordReaderSuite.scala | 7 +++- .../NettyBlockTransferServiceSuite.scala | 18 ++++++---- .../spark/rdd/AsyncRDDActionsSuite.scala | 9 +++-- .../spark/rdd/LocalCheckpointSuite.scala | 1 + .../org/apache/spark/rpc/RpcEnvSuite.scala | 11 ++++-- .../SerializationDebuggerSuite.scala | 1 + .../BypassMergeSortShuffleWriterSuite.scala | 11 ++++-- .../spark/storage/BlockManagerSuite.scala | 35 +++++++++++-------- .../spark/storage/DiskBlockManagerSuite.scala | 16 ++++++--- .../storage/DiskBlockObjectWriterSuite.scala | 7 +++- .../org/apache/spark/ui/UISeleniumSuite.scala | 9 +++-- .../spark/util/ClosureCleanerSuite2.scala | 11 ++++-- .../spark/util/SizeEstimatorSuite.scala | 4 +++ .../source/libsvm/LibSVMRelationSuite.scala | 7 ++-- .../apache/spark/ml/util/TempDirectory.scala | 7 ++-- .../mllib/util/LocalClusterSparkContext.scala | 11 +++--- .../mllib/util/MLlibTestSparkContext.scala | 15 ++++---- .../spark/repl/ExecutorClassLoaderSuite.scala | 15 ++++---- .../spark/streaming/CheckpointSuite.scala | 15 +++++--- .../spark/streaming/DStreamClosureSuite.scala | 9 +++-- .../spark/streaming/DStreamScopeSuite.scala | 7 +++- .../spark/streaming/MapWithStateSuite.scala | 9 +++-- .../streaming/ReceiverInputDStreamSuite.scala | 6 +++- .../spark/streaming/UISeleniumSuite.scala | 9 +++-- .../streaming/rdd/MapWithStateRDDSuite.scala | 11 ++++-- .../WriteAheadLogBackedBlockRDDSuite.scala | 16 +++++++-- .../streaming/util/WriteAheadLogSuite.scala | 9 +++-- .../deploy/yarn/BaseYarnClusterSuite.scala | 13 +++++-- .../spark/deploy/yarn/ClientSuite.scala | 18 ++++++++-- .../deploy/yarn/YarnAllocatorSuite.scala | 7 +++- .../yarn/YarnSparkHadoopUtilSuite.scala | 5 +-- .../yarn/YarnShuffleServiceSuite.scala | 27 ++++++++------ 49 files changed, 338 insertions(+), 142 deletions(-) diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala b/core/src/test/java/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala index 0b19861fc41ee..f200ff36c7dd5 100644 --- a/core/src/test/java/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala +++ b/core/src/test/java/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala @@ -42,6 +42,7 @@ class IndexShuffleBlockResolverSuite extends SparkFunSuite with BeforeAndAfterEa private val conf: SparkConf = new SparkConf(loadDefaults = false) override def beforeEach(): Unit = { + super.beforeEach() tempDir = Utils.createTempDir() MockitoAnnotations.initMocks(this) @@ -55,7 +56,11 @@ class IndexShuffleBlockResolverSuite extends SparkFunSuite with BeforeAndAfterEa } override def afterEach(): Unit = { - Utils.deleteRecursively(tempDir) + try { + Utils.deleteRecursively(tempDir) + } finally { + super.afterEach() + } } test("commit shuffle files multiple times") { diff --git a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala index 553d46285ac03..390764ba242fd 100644 --- a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala +++ b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala @@ -256,8 +256,11 @@ class CheckpointSuite extends SparkFunSuite with RDDCheckpointTester with LocalS } override def afterEach(): Unit = { - super.afterEach() - Utils.deleteRecursively(checkpointDir) + try { + Utils.deleteRecursively(checkpointDir) + } finally { + super.afterEach() + } } override def sparkContext: SparkContext = sc diff --git a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala index 1c775bcb3d9c1..eb3fb99747d12 100644 --- a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala +++ b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala @@ -35,6 +35,7 @@ class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll { var rpcHandler: ExternalShuffleBlockHandler = _ override def beforeAll() { + super.beforeAll() val transportConf = SparkTransportConf.fromSparkConf(conf, "shuffle", numUsableCores = 2) rpcHandler = new ExternalShuffleBlockHandler(transportConf, null) val transportContext = new TransportContext(transportConf, rpcHandler) @@ -46,7 +47,11 @@ class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll { } override def afterAll() { - server.close() + try { + server.close() + } finally { + super.afterAll() + } } // This test ensures that the external shuffle service is actually in use for the other tests. diff --git a/core/src/test/scala/org/apache/spark/FileServerSuite.scala b/core/src/test/scala/org/apache/spark/FileServerSuite.scala index 1255e71af6c0b..2c32b69715484 100644 --- a/core/src/test/scala/org/apache/spark/FileServerSuite.scala +++ b/core/src/test/scala/org/apache/spark/FileServerSuite.scala @@ -75,8 +75,11 @@ class FileServerSuite extends SparkFunSuite with LocalSparkContext { } override def afterAll() { - super.afterAll() - Utils.deleteRecursively(tmpDir) + try { + Utils.deleteRecursively(tmpDir) + } finally { + super.afterAll() + } } test("Distributing files locally") { diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala index fdb00aafc4a48..f6a7f4375fac8 100644 --- a/core/src/test/scala/org/apache/spark/FileSuite.scala +++ b/core/src/test/scala/org/apache/spark/FileSuite.scala @@ -44,8 +44,11 @@ class FileSuite extends SparkFunSuite with LocalSparkContext { } override def afterEach() { - super.afterEach() - Utils.deleteRecursively(tempDir) + try { + Utils.deleteRecursively(tempDir) + } finally { + super.afterEach() + } } test("text files") { diff --git a/core/src/test/scala/org/apache/spark/HashShuffleSuite.scala b/core/src/test/scala/org/apache/spark/HashShuffleSuite.scala index 19180e88ebe0a..10794235ed392 100644 --- a/core/src/test/scala/org/apache/spark/HashShuffleSuite.scala +++ b/core/src/test/scala/org/apache/spark/HashShuffleSuite.scala @@ -24,6 +24,7 @@ class HashShuffleSuite extends ShuffleSuite with BeforeAndAfterAll { // This test suite should run all tests in ShuffleSuite with hash-based shuffle. override def beforeAll() { + super.beforeAll() conf.set("spark.shuffle.manager", "hash") } } diff --git a/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala b/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala index 3cd80c0f7d171..9b43341576a8a 100644 --- a/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala +++ b/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala @@ -66,6 +66,7 @@ class HeartbeatReceiverSuite * that uses a manual clock. */ override def beforeEach(): Unit = { + super.beforeEach() val conf = new SparkConf() .setMaster("local[2]") .setAppName("test") diff --git a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala index 1168eb0b802f2..e13a442463e8d 100644 --- a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala +++ b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala @@ -38,8 +38,11 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft with LocalSparkContext { override def afterEach() { - super.afterEach() - resetSparkContext() + try { + resetSparkContext() + } finally { + super.afterEach() + } } test("local mode, FIFO scheduler") { diff --git a/core/src/test/scala/org/apache/spark/LocalSparkContext.scala b/core/src/test/scala/org/apache/spark/LocalSparkContext.scala index 8bf2e55defd02..214681970acbf 100644 --- a/core/src/test/scala/org/apache/spark/LocalSparkContext.scala +++ b/core/src/test/scala/org/apache/spark/LocalSparkContext.scala @@ -28,13 +28,16 @@ trait LocalSparkContext extends BeforeAndAfterEach with BeforeAndAfterAll { self @transient var sc: SparkContext = _ override def beforeAll() { - InternalLoggerFactory.setDefaultFactory(new Slf4JLoggerFactory()) super.beforeAll() + InternalLoggerFactory.setDefaultFactory(new Slf4JLoggerFactory()) } override def afterEach() { - resetSparkContext() - super.afterEach() + try { + resetSparkContext() + } finally { + super.afterEach() + } } def resetSparkContext(): Unit = { diff --git a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala index 26b95c06789f7..e0226803bb1cf 100644 --- a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala @@ -19,9 +19,9 @@ package org.apache.spark import java.io.File -import org.apache.spark.util.{SparkConfWithEnv, Utils} +import org.apache.spark.util.{ResetSystemProperties, SparkConfWithEnv, Utils} -class SecurityManagerSuite extends SparkFunSuite { +class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties { test("set security with conf") { val conf = new SparkConf diff --git a/core/src/test/scala/org/apache/spark/SharedSparkContext.scala b/core/src/test/scala/org/apache/spark/SharedSparkContext.scala index 3d2700b7e6be4..858bc742e07cf 100644 --- a/core/src/test/scala/org/apache/spark/SharedSparkContext.scala +++ b/core/src/test/scala/org/apache/spark/SharedSparkContext.scala @@ -30,13 +30,16 @@ trait SharedSparkContext extends BeforeAndAfterAll { self: Suite => var conf = new SparkConf(false) override def beforeAll() { - _sc = new SparkContext("local[4]", "test", conf) super.beforeAll() + _sc = new SparkContext("local[4]", "test", conf) } override def afterAll() { - LocalSparkContext.stop(_sc) - _sc = null - super.afterAll() + try { + LocalSparkContext.stop(_sc) + _sc = null + } finally { + super.afterAll() + } } } diff --git a/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala b/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala index d78c99c2e1e06..73638d9b131ea 100644 --- a/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala +++ b/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala @@ -24,6 +24,7 @@ class ShuffleNettySuite extends ShuffleSuite with BeforeAndAfterAll { // This test suite should run all tests in ShuffleSuite with Netty shuffle mode. override def beforeAll() { + super.beforeAll() conf.set("spark.shuffle.blockTransferService", "netty") } } diff --git a/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala b/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala index b8ab227517cc4..5354731465a4a 100644 --- a/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala +++ b/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala @@ -37,10 +37,12 @@ class SortShuffleSuite extends ShuffleSuite with BeforeAndAfterAll { private var tempDir: File = _ override def beforeAll() { + super.beforeAll() conf.set("spark.shuffle.manager", "sort") } override def beforeEach(): Unit = { + super.beforeEach() tempDir = Utils.createTempDir() conf.set("spark.local.dir", tempDir.getAbsolutePath) } diff --git a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala index 314517d296049..85c1c1bbf3dc1 100644 --- a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala @@ -71,15 +71,18 @@ class StandaloneDynamicAllocationSuite } override def afterAll(): Unit = { - masterRpcEnv.shutdown() - workerRpcEnvs.foreach(_.shutdown()) - master.stop() - workers.foreach(_.stop()) - masterRpcEnv = null - workerRpcEnvs = null - master = null - workers = null - super.afterAll() + try { + masterRpcEnv.shutdown() + workerRpcEnvs.foreach(_.shutdown()) + master.stop() + workers.foreach(_.stop()) + masterRpcEnv = null + workerRpcEnvs = null + master = null + workers = null + } finally { + super.afterAll() + } } test("dynamic allocation default behavior") { diff --git a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala index 1e5c05a73f8aa..415e2b37dbbdc 100644 --- a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala @@ -63,15 +63,18 @@ class AppClientSuite extends SparkFunSuite with LocalSparkContext with BeforeAnd } override def afterAll(): Unit = { - workerRpcEnvs.foreach(_.shutdown()) - masterRpcEnv.shutdown() - workers.foreach(_.stop()) - master.stop() - workerRpcEnvs = null - masterRpcEnv = null - workers = null - master = null - super.afterAll() + try { + workerRpcEnvs.foreach(_.shutdown()) + masterRpcEnv.shutdown() + workers.foreach(_.stop()) + master.stop() + workerRpcEnvs = null + masterRpcEnv = null + workers = null + master = null + } finally { + super.afterAll() + } } test("interface methods of AppClient using local Master") { diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala index 4b7fd4f13b692..18659fc0c18de 100644 --- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala @@ -30,6 +30,7 @@ import org.scalatest.mock.MockitoSugar import org.apache.spark.{JsonTestUtils, SecurityManager, SparkConf, SparkFunSuite} import org.apache.spark.ui.{SparkUI, UIUtils} +import org.apache.spark.util.ResetSystemProperties /** * A collection of tests against the historyserver, including comparing responses from the json @@ -43,7 +44,7 @@ import org.apache.spark.ui.{SparkUI, UIUtils} * are considered part of Spark's public api. */ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers with MockitoSugar - with JsonTestUtils { + with JsonTestUtils with ResetSystemProperties { private val logDir = new File("src/test/resources/spark-events") private val expRoot = new File("src/test/resources/HistoryServerExpectations/") diff --git a/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala index 9693e32bf6af6..fa39aa2cb1311 100644 --- a/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala @@ -43,8 +43,12 @@ class StandaloneRestSubmitSuite extends SparkFunSuite with BeforeAndAfterEach { private var server: Option[RestSubmissionServer] = None override def afterEach() { - rpcEnv.foreach(_.shutdown()) - server.foreach(_.stop()) + try { + rpcEnv.foreach(_.shutdown()) + server.foreach(_.stop()) + } finally { + super.afterEach() + } } test("construct submit request") { diff --git a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala index 8a199459c1ddf..24184b02cb4c1 100644 --- a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala +++ b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala @@ -47,6 +47,7 @@ class WholeTextFileRecordReaderSuite extends SparkFunSuite with BeforeAndAfterAl // hard-to-reproduce test failures, since any suites that were run after this one would inherit // the new value of "fs.local.block.size" (see SPARK-5227 and SPARK-5679). To work around this, // we disable FileSystem caching in this suite. + super.beforeAll() val conf = new SparkConf().set("spark.hadoop.fs.file.impl.disable.cache", "true") sc = new SparkContext("local", "test", conf) @@ -59,7 +60,11 @@ class WholeTextFileRecordReaderSuite extends SparkFunSuite with BeforeAndAfterAl } override def afterAll() { - sc.stop() + try { + sc.stop() + } finally { + super.afterAll() + } } private def createNativeFile(inputDir: File, fileName: String, contents: Array[Byte], diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala index 6f8e8a7ac6033..92daf4e6a2169 100644 --- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala +++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala @@ -31,14 +31,18 @@ class NettyBlockTransferServiceSuite private var service1: NettyBlockTransferService = _ override def afterEach() { - if (service0 != null) { - service0.close() - service0 = null - } + try { + if (service0 != null) { + service0.close() + service0 = null + } - if (service1 != null) { - service1.close() - service1 = null + if (service1 != null) { + service1.close() + service1 = null + } + } finally { + super.afterEach() } } diff --git a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala index de015ebd5d237..d18bde790b40a 100644 --- a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala @@ -34,12 +34,17 @@ class AsyncRDDActionsSuite extends SparkFunSuite with BeforeAndAfterAll with Tim @transient private var sc: SparkContext = _ override def beforeAll() { + super.beforeAll() sc = new SparkContext("local[2]", "test") } override def afterAll() { - LocalSparkContext.stop(sc) - sc = null + try { + LocalSparkContext.stop(sc) + sc = null + } finally { + super.afterAll() + } } lazy val zeroPartRdd = new EmptyRDD[Int](sc) diff --git a/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala b/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala index 5103eb74b2457..3a22a9850a096 100644 --- a/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/LocalCheckpointSuite.scala @@ -29,6 +29,7 @@ import org.apache.spark.storage.{RDDBlockId, StorageLevel} class LocalCheckpointSuite extends SparkFunSuite with LocalSparkContext { override def beforeEach(): Unit = { + super.beforeEach() sc = new SparkContext("local[2]", "test") } diff --git a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala index 7b3a17c17233a..9c850c0da52a3 100644 --- a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala +++ b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala @@ -44,6 +44,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll { var env: RpcEnv = _ override def beforeAll(): Unit = { + super.beforeAll() val conf = new SparkConf() env = createRpcEnv(conf, "local", 0) @@ -53,10 +54,14 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll { } override def afterAll(): Unit = { - if (env != null) { - env.shutdown() + try { + if (env != null) { + env.shutdown() + } + SparkEnv.set(null) + } finally { + super.afterAll() } - SparkEnv.set(null) } def createRpcEnv(conf: SparkConf, name: String, port: Int, clientMode: Boolean = false): RpcEnv diff --git a/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala index 2d5e9d66b2e15..683aaa3aab1ba 100644 --- a/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala +++ b/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala @@ -29,6 +29,7 @@ class SerializationDebuggerSuite extends SparkFunSuite with BeforeAndAfterEach { import SerializationDebugger.find override def beforeEach(): Unit = { + super.beforeEach() SerializationDebugger.enableDebugging = true } diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala index d3b1b2b620b4d..bb331bb385df3 100644 --- a/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala +++ b/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala @@ -55,6 +55,7 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte private var shuffleHandle: BypassMergeSortShuffleHandle[Int, Int] = _ override def beforeEach(): Unit = { + super.beforeEach() tempDir = Utils.createTempDir() outputFile = File.createTempFile("shuffle", null, tempDir) taskMetrics = new TaskMetrics @@ -119,9 +120,13 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte } override def afterEach(): Unit = { - Utils.deleteRecursively(tempDir) - blockIdToFileMap.clear() - temporaryFilesCreated.clear() + try { + Utils.deleteRecursively(tempDir) + blockIdToFileMap.clear() + temporaryFilesCreated.clear() + } finally { + super.afterEach() + } } test("write empty iterator") { diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala index bf49be3d4c4fd..2224a444c7b54 100644 --- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala @@ -79,6 +79,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE } override def beforeEach(): Unit = { + super.beforeEach() rpcEnv = RpcEnv.create("test", "localhost", 0, conf, securityMgr) // Set the arch to 64-bit and compressedOops to true to get a deterministic test-case @@ -97,22 +98,26 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE } override def afterEach(): Unit = { - if (store != null) { - store.stop() - store = null - } - if (store2 != null) { - store2.stop() - store2 = null - } - if (store3 != null) { - store3.stop() - store3 = null + try { + if (store != null) { + store.stop() + store = null + } + if (store2 != null) { + store2.stop() + store2 = null + } + if (store3 != null) { + store3.stop() + store3 = null + } + rpcEnv.shutdown() + rpcEnv.awaitTermination() + rpcEnv = null + master = null + } finally { + super.afterEach() } - rpcEnv.shutdown() - rpcEnv.awaitTermination() - rpcEnv = null - master = null } test("StorageLevel object caching") { diff --git a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala index 688f56f4665f3..69e17461df755 100644 --- a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala @@ -45,19 +45,27 @@ class DiskBlockManagerSuite extends SparkFunSuite with BeforeAndAfterEach with B } override def afterAll() { - super.afterAll() - Utils.deleteRecursively(rootDir0) - Utils.deleteRecursively(rootDir1) + try { + Utils.deleteRecursively(rootDir0) + Utils.deleteRecursively(rootDir1) + } finally { + super.afterAll() + } } override def beforeEach() { + super.beforeEach() val conf = testConf.clone conf.set("spark.local.dir", rootDirs) diskBlockManager = new DiskBlockManager(blockManager, conf) } override def afterEach() { - diskBlockManager.stop() + try { + diskBlockManager.stop() + } finally { + super.afterEach() + } } test("basic block creation") { diff --git a/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala index 7c19531c18802..5d36617cfc447 100644 --- a/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala @@ -30,11 +30,16 @@ class DiskBlockObjectWriterSuite extends SparkFunSuite with BeforeAndAfterEach { var tempDir: File = _ override def beforeEach(): Unit = { + super.beforeEach() tempDir = Utils.createTempDir() } override def afterEach(): Unit = { - Utils.deleteRecursively(tempDir) + try { + Utils.deleteRecursively(tempDir) + } finally { + super.afterEach() + } } test("verify write metrics") { diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala index ceecfd665bf87..0e36d7fda430d 100644 --- a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala @@ -76,14 +76,19 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B override def beforeAll(): Unit = { + super.beforeAll() webDriver = new HtmlUnitDriver { getWebClient.setCssErrorHandler(new SparkUICssErrorHandler) } } override def afterAll(): Unit = { - if (webDriver != null) { - webDriver.quit() + try { + if (webDriver != null) { + webDriver.quit() + } + } finally { + super.afterAll() } } diff --git a/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite2.scala b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite2.scala index a829b099025e9..934385fbcad1b 100644 --- a/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite2.scala +++ b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite2.scala @@ -38,14 +38,19 @@ class ClosureCleanerSuite2 extends SparkFunSuite with BeforeAndAfterAll with Pri private var closureSerializer: SerializerInstance = null override def beforeAll(): Unit = { + super.beforeAll() sc = new SparkContext("local", "test") closureSerializer = sc.env.closureSerializer.newInstance() } override def afterAll(): Unit = { - sc.stop() - sc = null - closureSerializer = null + try { + sc.stop() + sc = null + closureSerializer = null + } finally { + super.afterAll() + } } // Some fields and methods to reference in inner closures later diff --git a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala index 101610e38014e..fbe7b956682d5 100644 --- a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala @@ -79,6 +79,10 @@ class SizeEstimatorSuite System.setProperty("spark.test.useCompressedOops", "true") } + override def afterEach(): Unit = { + super.afterEach() + } + test("simple classes") { assertResult(16)(SizeEstimator.estimate(new DummyClass1)) assertResult(16)(SizeEstimator.estimate(new DummyClass2)) diff --git a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala index 997f574e51f6a..5f4d5f11bdd68 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala @@ -46,8 +46,11 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext { } override def afterAll(): Unit = { - Utils.deleteRecursively(tempDir) - super.afterAll() + try { + Utils.deleteRecursively(tempDir) + } finally { + super.afterAll() + } } test("select as sparse vector") { diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/TempDirectory.scala b/mllib/src/test/scala/org/apache/spark/ml/util/TempDirectory.scala index c8a0bb16247b4..8f11bbc8e47af 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/util/TempDirectory.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/util/TempDirectory.scala @@ -39,7 +39,10 @@ trait TempDirectory extends BeforeAndAfterAll { self: Suite => } override def afterAll(): Unit = { - Utils.deleteRecursively(_tempDir) - super.afterAll() + try { + Utils.deleteRecursively(_tempDir) + } finally { + super.afterAll() + } } } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/LocalClusterSparkContext.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/LocalClusterSparkContext.scala index 525ab68c7921a..4f73b0809dca4 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/util/LocalClusterSparkContext.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/util/LocalClusterSparkContext.scala @@ -25,18 +25,21 @@ trait LocalClusterSparkContext extends BeforeAndAfterAll { self: Suite => @transient var sc: SparkContext = _ override def beforeAll() { + super.beforeAll() val conf = new SparkConf() .setMaster("local-cluster[2, 1, 1024]") .setAppName("test-cluster") .set("spark.akka.frameSize", "1") // set to 1MB to detect direct serialization of data sc = new SparkContext(conf) - super.beforeAll() } override def afterAll() { - if (sc != null) { - sc.stop() + try { + if (sc != null) { + sc.stop() + } + } finally { + super.afterAll() } - super.afterAll() } } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala index 378139593b26f..ebcd591465cb5 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala @@ -38,12 +38,15 @@ trait MLlibTestSparkContext extends BeforeAndAfterAll { self: Suite => } override def afterAll() { - sqlContext = null - SQLContext.clearActive() - if (sc != null) { - sc.stop() + try { + sqlContext = null + SQLContext.clearActive() + if (sc != null) { + sc.stop() + } + sc = null + } finally { + super.afterAll() } - sc = null - super.afterAll() } } diff --git a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala index 1360f09e7fa1f..05bf7a3aaefbf 100644 --- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala +++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala @@ -72,13 +72,16 @@ class ExecutorClassLoaderSuite } override def afterAll() { - super.afterAll() - if (classServer != null) { - classServer.stop() + try { + if (classServer != null) { + classServer.stop() + } + Utils.deleteRecursively(tempDir1) + Utils.deleteRecursively(tempDir2) + SparkEnv.set(null) + } finally { + super.afterAll() } - Utils.deleteRecursively(tempDir1) - Utils.deleteRecursively(tempDir2) - SparkEnv.set(null) } test("child first") { diff --git a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala index f5f446f14a0da..4d04138da01f7 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala @@ -37,7 +37,8 @@ import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite, TestUtils} import org.apache.spark.rdd.RDD import org.apache.spark.streaming.dstream._ import org.apache.spark.streaming.scheduler._ -import org.apache.spark.util.{Clock, ManualClock, MutableURLClassLoader, Utils} +import org.apache.spark.util.{Clock, ManualClock, MutableURLClassLoader, ResetSystemProperties, + Utils} /** * A input stream that records the times of restore() invoked @@ -196,7 +197,8 @@ trait DStreamCheckpointTester { self: SparkFunSuite => * the checkpointing of a DStream's RDDs as well as the checkpointing of * the whole DStream graph. */ -class CheckpointSuite extends TestSuiteBase with DStreamCheckpointTester { +class CheckpointSuite extends TestSuiteBase with DStreamCheckpointTester + with ResetSystemProperties { var ssc: StreamingContext = null @@ -208,9 +210,12 @@ class CheckpointSuite extends TestSuiteBase with DStreamCheckpointTester { } override def afterFunction() { - super.afterFunction() - if (ssc != null) { ssc.stop() } - Utils.deleteRecursively(new File(checkpointDir)) + try { + if (ssc != null) { ssc.stop() } + Utils.deleteRecursively(new File(checkpointDir)) + } finally { + super.afterFunction() + } } test("basic rdd checkpoints + dstream graph checkpoint recovery") { diff --git a/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala index 9b5e4dc819a2b..e897de3cba6d2 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala @@ -33,13 +33,18 @@ class DStreamClosureSuite extends SparkFunSuite with BeforeAndAfterAll { private var ssc: StreamingContext = null override def beforeAll(): Unit = { + super.beforeAll() val sc = new SparkContext("local", "test") ssc = new StreamingContext(sc, Seconds(1)) } override def afterAll(): Unit = { - ssc.stop(stopSparkContext = true) - ssc = null + try { + ssc.stop(stopSparkContext = true) + ssc = null + } finally { + super.afterAll() + } } test("user provided closures are actually cleaned") { diff --git a/streaming/src/test/scala/org/apache/spark/streaming/DStreamScopeSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/DStreamScopeSuite.scala index bc223e648a417..4c12ecc399e41 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/DStreamScopeSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/DStreamScopeSuite.scala @@ -35,13 +35,18 @@ class DStreamScopeSuite extends SparkFunSuite with BeforeAndAfter with BeforeAnd private val batchDuration: Duration = Seconds(1) override def beforeAll(): Unit = { + super.beforeAll() val conf = new SparkConf().setMaster("local").setAppName("test") conf.set("spark.streaming.clock", classOf[ManualClock].getName()) ssc = new StreamingContext(new SparkContext(conf), batchDuration) } override def afterAll(): Unit = { - ssc.stop(stopSparkContext = true) + try { + ssc.stop(stopSparkContext = true) + } finally { + super.afterAll() + } } before { assertPropertiesNotSet() } diff --git a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala index 6b21433f1781b..62d75a9e0e7aa 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala @@ -49,14 +49,19 @@ class MapWithStateSuite extends SparkFunSuite } override def beforeAll(): Unit = { + super.beforeAll() val conf = new SparkConf().setMaster("local").setAppName("MapWithStateSuite") conf.set("spark.streaming.clock", classOf[ManualClock].getName()) sc = new SparkContext(conf) } override def afterAll(): Unit = { - if (sc != null) { - sc.stop() + try { + if (sc != null) { + sc.stop() + } + } finally { + super.afterAll() } } diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala index 6d388d9624d92..e6d8fbd4d7c57 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala @@ -33,7 +33,11 @@ import org.apache.spark.{SparkConf, SparkEnv} class ReceiverInputDStreamSuite extends TestSuiteBase with BeforeAndAfterAll { override def afterAll(): Unit = { - StreamingContext.getActive().map { _.stop() } + try { + StreamingContext.getActive().map { _.stop() } + } finally { + super.afterAll() + } } testWithoutWAL("createBlockRDD creates empty BlockRDD when no block info") { receiverStream => diff --git a/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala index a5744a9009c1c..c4ecebcacf3c8 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala @@ -38,14 +38,19 @@ class UISeleniumSuite implicit var webDriver: WebDriver = _ override def beforeAll(): Unit = { + super.beforeAll() webDriver = new HtmlUnitDriver { getWebClient.setCssErrorHandler(new SparkUICssErrorHandler) } } override def afterAll(): Unit = { - if (webDriver != null) { - webDriver.quit() + try { + if (webDriver != null) { + webDriver.quit() + } + } finally { + super.afterAll() } } diff --git a/streaming/src/test/scala/org/apache/spark/streaming/rdd/MapWithStateRDDSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/rdd/MapWithStateRDDSuite.scala index aa95bd33dda9f..1640b9e6b7a6c 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/rdd/MapWithStateRDDSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/rdd/MapWithStateRDDSuite.scala @@ -36,6 +36,7 @@ class MapWithStateRDDSuite extends SparkFunSuite with RDDCheckpointTester with B private var checkpointDir: File = _ override def beforeAll(): Unit = { + super.beforeAll() sc = new SparkContext( new SparkConf().setMaster("local").setAppName("MapWithStateRDDSuite")) checkpointDir = Utils.createTempDir() @@ -43,10 +44,14 @@ class MapWithStateRDDSuite extends SparkFunSuite with RDDCheckpointTester with B } override def afterAll(): Unit = { - if (sc != null) { - sc.stop() + try { + if (sc != null) { + sc.stop() + } + Utils.deleteRecursively(checkpointDir) + } finally { + super.afterAll() } - Utils.deleteRecursively(checkpointDir) } override def sparkContext: SparkContext = sc diff --git a/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala index cb017b798b2a4..43833c4361473 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala @@ -42,22 +42,32 @@ class WriteAheadLogBackedBlockRDDSuite var dir: File = null override def beforeEach(): Unit = { + super.beforeEach() dir = Utils.createTempDir() } override def afterEach(): Unit = { - Utils.deleteRecursively(dir) + try { + Utils.deleteRecursively(dir) + } finally { + super.afterEach() + } } override def beforeAll(): Unit = { + super.beforeAll() sparkContext = new SparkContext(conf) blockManager = sparkContext.env.blockManager } override def afterAll(): Unit = { // Copied from LocalSparkContext, simpler than to introduced test dependencies to core tests. - sparkContext.stop() - System.clearProperty("spark.driver.port") + try { + sparkContext.stop() + System.clearProperty("spark.driver.port") + } finally { + super.afterAll() + } } test("Read data available in both block manager and write ahead log") { diff --git a/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala index ef1e89df31305..beaae34535fd6 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala @@ -432,6 +432,7 @@ class BatchedWriteAheadLogSuite extends CommonWriteAheadLogTests( private val queueLength = PrivateMethod[Int]('getQueueLength) override def beforeEach(): Unit = { + super.beforeEach() wal = mock[WriteAheadLog] walHandle = mock[WriteAheadLogRecordHandle] walBatchingThreadPool = ThreadUtils.newDaemonFixedThreadPool(8, "wal-test-thread-pool") @@ -439,8 +440,12 @@ class BatchedWriteAheadLogSuite extends CommonWriteAheadLogTests( } override def afterEach(): Unit = { - if (walBatchingExecutionContext != null) { - walBatchingExecutionContext.shutdownNow() + try { + if (walBatchingExecutionContext != null) { + walBatchingExecutionContext.shutdownNow() + } + } finally { + super.afterEach() } } diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala index 12494b01054ba..cd24c704ece5b 100644 --- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala +++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala @@ -27,6 +27,7 @@ import scala.language.postfixOps import com.google.common.base.Charsets.UTF_8 import com.google.common.io.Files +import org.apache.commons.lang3.SerializationUtils import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.server.MiniYARNCluster import org.scalatest.{BeforeAndAfterAll, Matchers} @@ -59,10 +60,13 @@ abstract class BaseYarnClusterSuite protected var hadoopConfDir: File = _ private var logConfDir: File = _ + var oldSystemProperties: Properties = null + def newYarnConfig(): YarnConfiguration override def beforeAll() { super.beforeAll() + oldSystemProperties = SerializationUtils.clone(System.getProperties) tempDir = Utils.createTempDir() logConfDir = new File(tempDir, "log4j") @@ -115,9 +119,12 @@ abstract class BaseYarnClusterSuite } override def afterAll() { - yarnCluster.stop() - System.clearProperty("SPARK_YARN_MODE") - super.afterAll() + try { + yarnCluster.stop() + } finally { + System.setProperties(oldSystemProperties) + super.afterAll() + } } protected def runSpark( diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala index e7f2501e7899f..7709c2f6e4f5f 100644 --- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala +++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala @@ -19,12 +19,14 @@ package org.apache.spark.deploy.yarn import java.io.File import java.net.URI +import java.util.Properties import scala.collection.JavaConverters._ import scala.collection.mutable.{HashMap => MutableHashMap} import scala.reflect.ClassTag import scala.util.Try +import org.apache.commons.lang3.SerializationUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.hadoop.mapreduce.MRJobConfig @@ -39,16 +41,26 @@ import org.mockito.Mockito._ import org.scalatest.{BeforeAndAfterAll, Matchers} import org.apache.spark.{SparkConf, SparkFunSuite} -import org.apache.spark.util.Utils +import org.apache.spark.util.{Utils, ResetSystemProperties} -class ClientSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll { +class ClientSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll + with ResetSystemProperties { + + var oldSystemProperties: Properties = null override def beforeAll(): Unit = { + super.beforeAll() + oldSystemProperties = SerializationUtils.clone(System.getProperties) System.setProperty("SPARK_YARN_MODE", "true") } override def afterAll(): Unit = { - System.clearProperty("SPARK_YARN_MODE") + try { + System.setProperties(oldSystemProperties) + oldSystemProperties = null + } finally { + super.afterAll() + } } test("default Yarn application classpath") { diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala index bd80036c5cfa7..57edbd67253d4 100644 --- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala +++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala @@ -72,13 +72,18 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter var containerNum = 0 override def beforeEach() { + super.beforeEach() rmClient = AMRMClient.createAMRMClient() rmClient.init(conf) rmClient.start() } override def afterEach() { - rmClient.stop() + try { + rmClient.stop() + } finally { + super.afterEach() + } } class MockSplitInfo(host: String) extends SplitInfo(null, host, null, 1, null) { diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala index 3fafc91a166aa..c2861c9d7fbc7 100644 --- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala +++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala @@ -34,10 +34,11 @@ import org.apache.hadoop.yarn.api.records.ApplicationAccessType import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkException, SparkFunSuite} import org.apache.spark.deploy.SparkHadoopUtil -import org.apache.spark.util.Utils +import org.apache.spark.util.{Utils, ResetSystemProperties} -class YarnSparkHadoopUtilSuite extends SparkFunSuite with Matchers with Logging { +class YarnSparkHadoopUtilSuite extends SparkFunSuite with Matchers with Logging + with ResetSystemProperties { val hasBash = try { diff --git a/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala b/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala index 6aa8c814cd4f0..5a426b86d10e0 100644 --- a/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala +++ b/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala @@ -34,6 +34,7 @@ class YarnShuffleServiceSuite extends SparkFunSuite with Matchers with BeforeAnd private[yarn] var yarnConfig: YarnConfiguration = new YarnConfiguration override def beforeEach(): Unit = { + super.beforeEach() yarnConfig.set(YarnConfiguration.NM_AUX_SERVICES, "spark_shuffle") yarnConfig.set(YarnConfiguration.NM_AUX_SERVICE_FMT.format("spark_shuffle"), classOf[YarnShuffleService].getCanonicalName) @@ -54,17 +55,21 @@ class YarnShuffleServiceSuite extends SparkFunSuite with Matchers with BeforeAnd var s3: YarnShuffleService = null override def afterEach(): Unit = { - if (s1 != null) { - s1.stop() - s1 = null - } - if (s2 != null) { - s2.stop() - s2 = null - } - if (s3 != null) { - s3.stop() - s3 = null + try { + if (s1 != null) { + s1.stop() + s1 = null + } + if (s2 != null) { + s2.stop() + s2 = null + } + if (s3 != null) { + s3.stop() + s3 = null + } + } finally { + super.afterEach() } } From 502476e45c314a1229b3bce1c61f5cb94a9fc04b Mon Sep 17 00:00:00 2001 From: CK50 Date: Thu, 24 Dec 2015 13:39:11 +0000 Subject: [PATCH 5/6] [SPARK-12010][SQL] Spark JDBC requires support for column-name-free INSERT syntax In the past Spark JDBC write only worked with technologies which support the following INSERT statement syntax (JdbcUtils.scala: insertStatement()): INSERT INTO $table VALUES ( ?, ?, ..., ? ) But some technologies require a list of column names: INSERT INTO $table ( $colNameList ) VALUES ( ?, ?, ..., ? ) This was blocking the use of e.g. the Progress JDBC Driver for Cassandra. Another limitation is that syntax 1 relies no the dataframe field ordering match that of the target table. This works fine, as long as the target table has been created by writer.jdbc(). If the target table contains more columns (not created by writer.jdbc()), then the insert fails due mismatch of number of columns or their data types. This PR switches to the recommended second INSERT syntax. Column names are taken from datafram field names. Author: CK50 Closes #10380 from CK50/master-SPARK-12010-2. --- .../sql/execution/datasources/jdbc/JdbcUtils.scala | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala index 252f1cfd5d9c5..28cd688ef7d7a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala @@ -63,14 +63,10 @@ object JdbcUtils extends Logging { * Returns a PreparedStatement that inserts a row into table via conn. */ def insertStatement(conn: Connection, table: String, rddSchema: StructType): PreparedStatement = { - val sql = new StringBuilder(s"INSERT INTO $table VALUES (") - var fieldsLeft = rddSchema.fields.length - while (fieldsLeft > 0) { - sql.append("?") - if (fieldsLeft > 1) sql.append(", ") else sql.append(")") - fieldsLeft = fieldsLeft - 1 - } - conn.prepareStatement(sql.toString()) + val columns = rddSchema.fields.map(_.name).mkString(",") + val placeholders = rddSchema.fields.map(_ => "?").mkString(",") + val sql = s"INSERT INTO $table ($columns) VALUES ($placeholders)" + conn.prepareStatement(sql) } /** From ea4aab7e87fbcf9ac90f93af79cc892b56508aa0 Mon Sep 17 00:00:00 2001 From: pierre-borckmans Date: Thu, 24 Dec 2015 13:48:21 +0000 Subject: [PATCH 6/6] [SPARK-12440][CORE] Avoid setCheckpoint warning when directory is not local In SparkContext method `setCheckpointDir`, a warning is issued when spark master is not local and the passed directory for the checkpoint dir appears to be local. In practice, when relying on HDFS configuration file and using a relative path for the checkpoint directory (using an incomplete URI without HDFS scheme, ...), this warning should not be issued and might be confusing. In fact, in this case, the checkpoint directory is successfully created, and the checkpointing mechanism works as expected. This PR uses the `FileSystem` instance created with the given directory, and checks whether it is local or not. (The rationale is that since this same `FileSystem` instance is used to create the checkpoint dir anyway and can therefore be reliably used to determine if it is local or not). The warning is only issued if the directory is not local, on top of the existing conditions. Author: pierre-borckmans Closes #10392 from pierre-borckmans/SPARK-12440_CheckpointDir_Warning_NonLocal. --- core/src/main/scala/org/apache/spark/SparkContext.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 67230f4207b83..d506782b73c43 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -2073,8 +2073,9 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli // its own local file system, which is incorrect because the checkpoint files // are actually on the executor machines. if (!isLocal && Utils.nonLocalPaths(directory).isEmpty) { - logWarning("Checkpoint directory must be non-local " + - "if Spark is running on a cluster: " + directory) + logWarning("Spark is not running in local mode, therefore the checkpoint directory " + + s"must not be on the local filesystem. Directory '$directory' " + + "appears to be on the local filesystem.") } checkpointDir = Option(directory).map { dir =>