mesos · shimingfei · Apr 18, 2013 · Apr 18, 2013 · Apr 18, 2013 · Apr 18, 2013
diff --git a/core/src/main/scala/spark/BlockStoreShuffleFetcher.scala b/core/src/main/scala/spark/BlockStoreShuffleFetcher.scala
@@ -53,6 +53,7 @@ private[spark] class BlockStoreShuffleFetcher extends ShuffleFetcher with Loggin
     itr.setDelegate(blockFetcherItr)
     CompletionIterator[(K,V), Iterator[(K,V)]](itr, {
       val shuffleMetrics = new ShuffleReadMetrics
+      shuffleMetrics.shuffleFinishTime = System.currentTimeMillis
       shuffleMetrics.shuffleReadMillis = itr.getNetMillis
       shuffleMetrics.remoteFetchTime = itr.remoteFetchTime
       shuffleMetrics.fetchWaitTime = itr.fetchWaitTime

diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/spark/RDD.scala
@@ -114,6 +114,14 @@ abstract class RDD[T: ClassManifest](
     name = _name
     this
   }
+
+  /**User-defined generator of this RDD*/
+  var generator = Utils.getCallSiteInfo._4
+
+  /**reset generator*/
+  def setGenerator(_generator: String) = {
+    generator = _generator
+  }
 
   /**
    * Set this RDD's storage level to persist its values across operations after the first time
@@ -749,7 +757,7 @@ abstract class RDD[T: ClassManifest](
   private var storageLevel: StorageLevel = StorageLevel.NONE
 
   /** Record user function generating this RDD. */
-  private[spark] val origin = Utils.getSparkCallSite
+  private[spark] val origin = Utils.formatSparkCallSite
 
   private[spark] def elementClassManifest: ClassManifest[T] = classManifest[T]
 

diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala
@@ -42,7 +42,7 @@ import spark.scheduler.mesos.{CoarseMesosSchedulerBackend, MesosSchedulerBackend
 import spark.storage.BlockManagerUI
 import spark.util.{MetadataCleaner, TimeStampedHashMap}
 import spark.storage.{StorageStatus, StorageUtils, RDDInfo}
-
+import scala.util.DynamicVariable
 /**
  * Main entry point for Spark functionality. A SparkContext represents the connection to a Spark
  * cluster, and can be used to create RDDs, accumulators and broadcast variables on that cluster.
@@ -65,6 +65,9 @@ class SparkContext(
   // Ensure logging is initialized before we spawn any threads
   initLogging()
 
+  // Allows higher layer frameworks to describe the context of a job
+  val annotation = new DynamicVariable[String]("")
+
   // Set Spark driver host and port system properties
   if (System.getProperty("spark.driver.host") == null) {
     System.setProperty("spark.driver.host", Utils.localIpAddress)
@@ -575,10 +578,11 @@ class SparkContext(
       partitions: Seq[Int],
       allowLocal: Boolean,
       resultHandler: (Int, U) => Unit) {
-    val callSite = Utils.getSparkCallSite
+    val callSite = Utils.formatSparkCallSite
     logInfo("Starting job: " + callSite)
     val start = System.nanoTime
-    val result = dagScheduler.runJob(rdd, func, partitions, callSite, allowLocal, resultHandler)
+    val result = dagScheduler.runJob(rdd, func, partitions, callSite + "|" + 
+                                     annotation.value.toString, allowLocal, resultHandler)
     logInfo("Job finished: " + callSite + ", took " + (System.nanoTime - start) / 1e9 + " s")
     rdd.doCheckpoint()
     result
@@ -659,7 +663,7 @@ class SparkContext(
       evaluator: ApproximateEvaluator[U, R],
       timeout: Long
       ): PartialResult[R] = {
-    val callSite = Utils.getSparkCallSite
+    val callSite = Utils.formatSparkCallSite
     logInfo("Starting job: " + callSite)
     val start = System.nanoTime
     val result = dagScheduler.runApproximateJob(rdd, func, evaluator, callSite, timeout)

diff --git a/core/src/main/scala/spark/Utils.scala b/core/src/main/scala/spark/Utils.scala
@@ -401,7 +401,7 @@ private object Utils extends Logging {
    * (outside the spark package) that called into Spark, as well as which Spark method they called.
    * This is used, for example, to tell users where in their code each RDD got created.
    */
-  def getSparkCallSite: String = {
+  def getCallSiteInfo = {
     val trace = Thread.currentThread.getStackTrace().filter( el =>
       (!el.getMethodName.contains("getStackTrace")))
 
@@ -413,6 +413,7 @@ private object Utils extends Logging {
     var firstUserFile = "<unknown>"
     var firstUserLine = 0
     var finished = false
+    var firstUserClass = "<unknown>"
 
     for (el <- trace) {
       if (!finished) {
@@ -427,13 +428,18 @@ private object Utils extends Logging {
         else {
           firstUserLine = el.getLineNumber
           firstUserFile = el.getFileName
+          firstUserClass = el.getClassName
           finished = true
         }
       }
     }
-    "%s at %s:%s".format(lastSparkMethod, firstUserFile, firstUserLine)
+    (lastSparkMethod, firstUserFile, firstUserLine, firstUserClass)
   }
 
+  def formatSparkCallSite = {
+    val callSiteInfo = getCallSiteInfo
+    "%s_%s_%s_%s".format(callSiteInfo._1, callSiteInfo._2, callSiteInfo._3, callSiteInfo._4)
+  }
   /**
    * Try to find a free port to bind to on the local host. This should ideally never be needed,
    * except that, unfortunately, some of the networking libraries we currently rely on (e.g. Spray)

diff --git a/core/src/main/scala/spark/executor/TaskMetrics.scala b/core/src/main/scala/spark/executor/TaskMetrics.scala
@@ -33,10 +33,12 @@ object TaskMetrics {
 
 
 class ShuffleReadMetrics extends Serializable {
+
+  var shuffleFinishTime: Long = _
   /**
    * Total number of blocks fetched in a shuffle (remote or local)
    */
-  var totalBlocksFetched : Int = _
+  var totalBlocksFetched: Int = _
 
   /**
    * Number of remote blocks fetched in a shuffle

diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/spark/scheduler/DAGScheduler.scala
@@ -275,11 +275,16 @@ class DAGScheduler(
   private[scheduler] def processEvent(event: DAGSchedulerEvent): Boolean = {
     event match {
       case JobSubmitted(finalRDD, func, partitions, allowLocal, callSite, listener) =>
+        val callSites = callSite.split("\\|",2)
+        var jobInfo = ""
+        if (callSites.size == 2) 
+          jobInfo = callSites(1)
         val runId = nextRunId.getAndIncrement()
         val finalStage = newStage(finalRDD, None, runId)
-        val job = new ActiveJob(runId, finalStage, func, partitions, callSite, listener)
+        val job = new ActiveJob(runId, finalStage, func, partitions, callSites(0), listener)
         clearCacheLocs()
-        logInfo("Got job " + job.runId + " (" + callSite + ") with " + partitions.length +
+        sparkListeners.foreach{_.onJobStart(job, jobInfo)}
+        logInfo("Got job " + job.runId + " (" + callSites(0) + ") with " + partitions.length +
                 " output partitions (allowLocal=" + allowLocal + ")")
         logInfo("Final stage: " + finalStage + " (" + finalStage.origin + ")")
         logInfo("Parents of final stage: " + finalStage.parents)
@@ -297,6 +302,7 @@ class DAGScheduler(
         handleExecutorLost(execId)
 
       case completion: CompletionEvent =>
+        sparkListeners.foreach{_.onTaskEnd(completion)}
         handleTaskCompletion(completion)
 
       case TaskSetFailed(taskSet, reason) =>
@@ -307,6 +313,8 @@ class DAGScheduler(
         for (job <- activeJobs) {
           val error = new SparkException("Job cancelled because SparkContext was shut down")
           job.listener.jobFailed(error)
+          val JobCancelEvent = new SparkListenerJobCancelled("SPARKCONTEXT_SHUTDOWN")
+          sparkListeners.foreach{_.onJobEnd(job, JobCancelEvent)}
         }
         return true
     }
@@ -454,6 +462,7 @@ class DAGScheduler(
       }
     }
     if (tasks.size > 0) {
+      sparkListeners.foreach{_.onStageSubmitted(stage, "TASKS_SIZE=" + tasks.size)}
       logInfo("Submitting " + tasks.size + " missing tasks from " + stage + " (" + stage.rdd + ")")
       myPending ++= tasks
       logDebug("New pending tasks: " + myPending)
@@ -507,6 +516,7 @@ class DAGScheduler(
                     activeJobs -= job
                     resultStageToJob -= stage
                     markStageAsFinished(stage)
+                    sparkListeners.foreach{_.onJobEnd(job, SparkListenerJobSuccess)}
                   }
                   job.listener.taskSucceeded(rt.outputId, event.result)
                 }
@@ -642,6 +652,8 @@ class DAGScheduler(
       job.listener.jobFailed(new SparkException("Job failed: " + reason))
       activeJobs -= job
       resultStageToJob -= resultStage
+      val jobFailedEvent = new SparkListenerJobFailed(failedStage)
+      sparkListeners.foreach{_.onJobEnd(job, jobFailedEvent)}
     }
     if (dependentStages.isEmpty) {
       logInfo("Ignoring failure of " + failedStage + " because all jobs depending on it are done")