GoogleChrome · paulirish · Sep 26, 2019 · Jul 31, 2019 · Aug 5, 2019 · Aug 5, 2019
diff --git a/lighthouse-core/lib/tracehouse/main-thread-tasks.js b/lighthouse-core/lib/tracehouse/main-thread-tasks.js
@@ -44,104 +44,285 @@ const {taskGroups, taskNameToGroup} = require('./task-groups.js');
 class MainThreadTasks {
   /**
    * @param {LH.TraceEvent} event
-   * @param {TaskNode} [parent]
+   * @param {Pick<LH.TraceEvent, 'ph'|'ts'>} [endEvent]
    * @return {TaskNode}
    */
-  static _createNewTaskNode(event, parent) {
+  static _createNewTaskNode(event, endEvent) {
+    const isCompleteEvent = event.ph === 'X' && !endEvent;
+    const isStartEndEventPair = event.ph === 'B' && endEvent && endEvent.ph === 'E';
+    if (!isCompleteEvent && !isStartEndEventPair) {
+      throw new Error('Invalid parameters for _createNewTaskNode');
+    }
+
+    const startTime = event.ts;
+    const endTime = endEvent ? endEvent.ts : event.ts + Number(event.dur || 0);
+
     const newTask = {
       event,
-      startTime: event.ts,
-      endTime: event.ph === 'X' ? event.ts + Number(event.dur || 0) : NaN,
-      parent: parent,
-      children: [],
+      startTime,
+      endTime,
+      duration: endTime - startTime,
 
       // These properties will be filled in later
+      parent: undefined,
+      children: [],
       attributableURLs: [],
       group: taskGroups.other,
-      duration: NaN,
       selfTime: NaN,
     };
 
-    if (parent) {
-      parent.children.push(newTask);
-    }
-
     return newTask;
   }
 
   /**
-   * @param {LH.TraceEvent[]} mainThreadEvents
+   *
+   * @param {TaskNode} currentTask
+   * @param {{startTime: number}} nextTask
    * @param {PriorTaskData} priorTaskData
+   * @param {Array<LH.TraceEvent>} reverseEventsQueue
+   */
+  static _assignAllTimerInstallsBetweenTasks(
   it('should compute attributableURLs correctly across timers', () => { 
     const baseTs = 1241250325; 
     const url = s => ({args: {data: {url: s}}}); 
     const stackFrames = f => ({args: {data: {stackTrace: f.map(url => ({url}))}}}); 
     const timerId = id => ({args: {data: {timerId: id}}}); 
     /* 
     An artistic rendering of the below trace: 
     █████████████████████████████TaskA██████████████████████████████████████████████ 
           ████████████████TaskB███████████████████                  █Timer Fire█ 
                ████EvaluateScript██████                               █TaskE█ 
                    | <-- Timer Install 
     */ 
     const traceEvents = [ 
       ...boilerplateTrace, 
       {ph: 'X', name: 'TaskA', pid, tid, ts: baseTs, dur: 100e3, ...url('about:blank')}, 
       {ph: 'B', name: 'TaskB', pid, tid, ts: baseTs + 5e3, ...stackFrames(['urlB.1', 'urlB.2'])}, 
       {ph: 'X', name: 'EvaluateScript', pid, tid, ts: baseTs + 10e3, dur: 30e3, ...url('urlC')}, 
       {ph: 'I', name: 'TimerInstall', pid, tid, ts: baseTs + 15e3, ...timerId(1)}, 
       {ph: 'E', name: 'TaskB', pid, tid, ts: baseTs + 55e3}, 
       {ph: 'X', name: 'TimerFire', pid, tid, ts: baseTs + 75e3, dur: 10e3, ...timerId(1)}, 
       {ph: 'X', name: 'TaskE', pid, tid, ts: baseTs + 80e3, dur: 5e3, ...stackFrames(['urlD'])}, 
     ]; 
     traceEvents.forEach(evt => { 
       evt.cat = 'devtools.timeline'; 
       evt.args = evt.args || args; 
     }); 
     const tasks = run({traceEvents}); 
     const taskA = tasks.find(task => task.event.name === 'TaskA'); 
     const taskB = tasks.find(task => task.event.name === 'TaskB'); 
     const taskC = tasks.find(task => task.event.name === 'EvaluateScript'); 
     const taskD = tasks.find(task => task.event.name === 'TimerFire'); 
     const taskE = tasks.find(task => task.event.name === 'TaskE'); 
     expect(taskA.attributableURLs).toEqual([]); 
     expect(taskB.attributableURLs).toEqual(['urlB.1', 'urlB.2']); 
     expect(taskC.attributableURLs).toEqual(['urlB.1', 'urlB.2', 'urlC']); 
     expect(taskD.attributableURLs).toEqual(['urlB.1', 'urlB.2', 'urlC']); 
     expect(taskE.attributableURLs).toEqual(['urlB.1', 'urlB.2', 'urlC', 'urlD']); 
   }); 
   it('should compute attributableURLs correctly across timers', () => { 
     const baseTs = 1241250325; 
     const url = s => ({args: {data: {url: s}}}); 
     const stackFrames = f => ({args: {data: {stackTrace: f.map(url => ({url}))}}}); 
     const timerId = id => ({args: {data: {timerId: id}}}); 
  
     /* 
     An artistic rendering of the below trace: 
     █████████████████████████████TaskA██████████████████████████████████████████████ 
           ████████████████TaskB███████████████████                  █Timer Fire█ 
                ████EvaluateScript██████                               █TaskE█ 
                    | <-- Timer Install 
     */ 
     const traceEvents = [ 
       ...boilerplateTrace, 
       {ph: 'X', name: 'TaskA', pid, tid, ts: baseTs, dur: 100e3, ...url('about:blank')}, 
       {ph: 'B', name: 'TaskB', pid, tid, ts: baseTs + 5e3, ...stackFrames(['urlB.1', 'urlB.2'])}, 
       {ph: 'X', name: 'EvaluateScript', pid, tid, ts: baseTs + 10e3, dur: 30e3, ...url('urlC')}, 
       {ph: 'I', name: 'TimerInstall', pid, tid, ts: baseTs + 15e3, ...timerId(1)}, 
       {ph: 'E', name: 'TaskB', pid, tid, ts: baseTs + 55e3}, 
       {ph: 'X', name: 'TimerFire', pid, tid, ts: baseTs + 75e3, dur: 10e3, ...timerId(1)}, 
       {ph: 'X', name: 'TaskE', pid, tid, ts: baseTs + 80e3, dur: 5e3, ...stackFrames(['urlD'])}, 
     ]; 
  
     traceEvents.forEach(evt => { 
       evt.cat = 'devtools.timeline'; 
       evt.args = evt.args || args; 
     }); 
  
     const tasks = run({traceEvents}); 
     const taskA = tasks.find(task => task.event.name === 'TaskA'); 
     const taskB = tasks.find(task => task.event.name === 'TaskB'); 
     const taskC = tasks.find(task => task.event.name === 'EvaluateScript'); 
     const taskD = tasks.find(task => task.event.name === 'TimerFire'); 
     const taskE = tasks.find(task => task.event.name === 'TaskE'); 
  
     expect(taskA.attributableURLs).toEqual([]); 
     expect(taskB.attributableURLs).toEqual(['urlB.1', 'urlB.2']); 
     expect(taskC.attributableURLs).toEqual(['urlB.1', 'urlB.2', 'urlC']); 
     expect(taskD.attributableURLs).toEqual(['urlB.1', 'urlB.2', 'urlC']); 
     expect(taskE.attributableURLs).toEqual(['urlB.1', 'urlB.2', 'urlC', 'urlD']); 
   }); 
+      currentTask,
+      nextTask,
+      priorTaskData,
+      reverseEventsQueue
+  ) {
+    while (reverseEventsQueue.length) {
+      const nextTimerInstallEvent = reverseEventsQueue.pop();
+      // We're out of events to look at; we're done.
+      if (!nextTimerInstallEvent) break;
+
+      // Timer event is after our current task; push it back on for next time, and we're done.
+      if (nextTimerInstallEvent.ts > Math.min(nextTask.startTime, currentTask.endTime)) {
+        reverseEventsQueue.push(nextTimerInstallEvent);
+        break;
+      }
+
+      // Timer event is before the current task, just skip it.
+      if (nextTimerInstallEvent.ts < currentTask.startTime) {
+        continue;
+      }
+
+      // We're right where we need to be, point the timerId to our `currentTask`
+      /** @type {string} */
+      // @ts-ignore - timerId exists on `TimerInstall` events.
+      const timerId = nextTimerInstallEvent.args.data.timerId;
+      priorTaskData.timers.set(timerId, currentTask);
+    }
+  }
+
+  /**
+   * This function takes the start and end events from a thread and creates tasks from them.
+   * We do this by iterating through the start and end event arrays simultaneously. For each start
+   * event we attempt to find its end event.
+   *
+   * Because of this matching of start/end events and the need to be mutating our end events queue,
+   * we reverse the array to more efficiently `.pop()` them off rather than `.shift()` them off.
+   * While it's true the worst case runtime here is O(n^2), ~99.999% of the time the reverse loop is O(1)
+   * because the overwhelmingly common case is that end event for a given start event is simply the very next event in our queue.
+   *
+   * @param {LH.TraceEvent[]} taskStartEvents
+   * @param {LH.TraceEvent[]} taskEndEvents
    * @param {number} traceEndTs
    * @return {TaskNode[]}
    */
-  static _createTasksFromEvents(mainThreadEvents, priorTaskData, traceEndTs) {
+  static _createTasksFromStartAndEndEvents(taskStartEvents, taskEndEvents, traceEndTs) {
     /** @type {TaskNode[]} */
     const tasks = [];
-    /** @type {TaskNode|undefined} */
-    let currentTask;
+    // Create a reversed copy of the array to avoid copying the rest of the queue on every mutation.
+    // i.e. pop() is O(1) while shift() is O(n), we take the earliest ts element off the queue *a lot*
+    // so we'll optimize for having the earliest timestamp events at the end of the array.
+    const taskEndEventsReverseQueue = taskEndEvents.slice().reverse();
+
+    for (let i = 0; i < taskStartEvents.length; i++) {
+      const taskStartEvent = taskStartEvents[i];
+      if (taskStartEvent.ph === 'X') {
+        // Task is a complete X event, we have all the information we need already.
+        tasks.push(MainThreadTasks._createNewTaskNode(taskStartEvent));
+        continue;
+      }
 
-    for (const event of mainThreadEvents) {
-      // Save the timer data, TimerInstall events are instant events `ph === 'I'` so process them first.
-      if (event.name === 'TimerInstall' && currentTask) {
-        /** @type {string} */
-        // @ts-ignore - timerId exists when name is TimerInstall
-        const timerId = event.args.data.timerId;
-        priorTaskData.timers.set(timerId, currentTask);
+      // Task is a B/E event pair, we need to find the matching E event.
+      let matchedEventIndex = -1;
+      let matchingNestedEventCount = 0;
+      let matchingNestedEventIndex = i + 1;
+
+      // We loop through the reversed end events queue from back to front because we still want to
+      // see end events in increasing timestamp order.
+      // While worst case we will loop through all events, the overwhelmingly common case is that
+      // the immediate next event is our event of interest which makes this loop typically O(1).
+      for (let j = taskEndEventsReverseQueue.length - 1; j >= 0; j--) {
+        const endEvent = taskEndEventsReverseQueue[j];
+        // We are considering an end event, so we'll count how many nested events we saw along the way.
+        while (matchingNestedEventIndex < taskStartEvents.length &&
+              taskStartEvents[matchingNestedEventIndex].ts < endEvent.ts) {
+          if (taskStartEvents[matchingNestedEventIndex].name === taskStartEvent.name) {
+            matchingNestedEventCount++;
+          }
+
+          matchingNestedEventIndex++;
+        }
+
+        // The event doesn't have a matching name, skip it.
+        if (endEvent.name !== taskStartEvent.name) continue;
+        // The event has a timestamp that is too early, skip it.
+        if (endEvent.ts < taskStartEvent.ts) continue;
+
+        // The event matches our name and happened after start, the last thing to check is if it was for a nested event.
+        if (matchingNestedEventCount > 0) {
+          // If it was for a nested event, decrement our counter and move on.
+          matchingNestedEventCount--;
+          continue;
+        }
+
+        // If it wasn't, we found our matching E event! Mark the index and stop the loop.
+        matchedEventIndex = j;
+        break;
+      }
+
+      /** @type {Pick<LH.TraceEvent, 'ph'|'ts'>} */
+      let taskEndEvent;
+      if (matchedEventIndex === -1) {
+        // If we couldn't find an end event, we'll assume it's the end of the trace.
+        // If this creates invalid parent/child relationships it will be caught in the next step.
+        taskEndEvent = {ph: 'E', ts: traceEndTs};
+      } else if (matchedEventIndex === taskEndEventsReverseQueue.length - 1) {
+        // Use .pop() in the common case where the immediately next event is needed.
+        // It's ~25x faster, https://jsperf.com/pop-vs-splice.
+        taskEndEvent = /** @type {LH.TraceEvent} */ (taskEndEventsReverseQueue.pop());
+      } else {
+        taskEndEvent = taskEndEventsReverseQueue.splice(matchedEventIndex, 1)[0];
       }
 
-      // Only look at X (Complete), B (Begin), and E (End) events as they have most data
-      if (event.ph !== 'X' && event.ph !== 'B' && event.ph !== 'E') continue;
+      tasks.push(MainThreadTasks._createNewTaskNode(taskStartEvent, taskEndEvent));
+    }
+
+    if (taskEndEventsReverseQueue.length) {
+      throw new Error(
+        `Fatal trace logic error - ${taskEndEventsReverseQueue.length} unmatched end events`
+      );
+    }
+
+    return tasks;
+  }
+
+  /**
+   *
+   * @param {TaskNode[]} sortedTasks
+   * @param {LH.TraceEvent[]} timerInstallEvents
+   * @param {PriorTaskData} priorTaskData
+   */
+  static _createTaskRelationships(sortedTasks, timerInstallEvents, priorTaskData) {
+    /** @type {TaskNode|undefined} */
+    let currentTask;
+    // Create a reversed copy of the array to avoid copying the rest of the queue on every mutation.
+    const timerInstallEventsReverseQueue = timerInstallEvents.slice().reverse();
 
-      // Update currentTask based on the elapsed time.
-      // The next event may be after currentTask has ended.
+    for (let i = 0; i < sortedTasks.length; i++) {
+      const nextTask = sortedTasks[i];
+
+      // Update `currentTask` based on the elapsed time.
+      // The `nextTask` may be after currentTask has ended.
       while (
         currentTask &&
         Number.isFinite(currentTask.endTime) &&
-        currentTask.endTime <= event.ts
+        currentTask.endTime <= nextTask.startTime
       ) {
+        MainThreadTasks._assignAllTimerInstallsBetweenTasks(
+          currentTask,
+          nextTask,
+          priorTaskData,
+          timerInstallEventsReverseQueue
+        );
         currentTask = currentTask.parent;
       }
 
-      // If we don't have a current task, start a new one.
-      if (!currentTask) {
-        // We can't start a task with an end event
-        if (event.ph === 'E') {
-          throw new Error('Fatal trace logic error - unexpected end event');
+      if (currentTask) {
+        if (nextTask.endTime > currentTask.endTime) {
+          const timeDelta = nextTask.endTime - currentTask.endTime;
+          // The child task is taking longer than the parent task, which should be impossible.
+          //    If it's less than 1ms, we'll let it slide.
+          //    If it's more, throw an error.
+          if (timeDelta < 1000) {
+            currentTask.endTime = nextTask.endTime;
+            currentTask.duration += timeDelta;
+          } else {
+            throw new Error('Fatal trace logic error - child cannot end after parent');
+          }
         }
 
-        currentTask = MainThreadTasks._createNewTaskNode(event);
-        tasks.push(currentTask);
-
-        continue;
+        // We're currently in the middle of a task, so `nextTask` is a child.
+        nextTask.parent = currentTask;
+        currentTask.children.push(nextTask);
+        MainThreadTasks._assignAllTimerInstallsBetweenTasks(
+          currentTask,
+          nextTask,
+          priorTaskData,
+          timerInstallEventsReverseQueue
+        );
+      } else {
+        // We're not currently in the middle of a task, so `nextTask` is a toplevel task.
+        // Nothing really to do here.
       }
 
-      if (event.ph === 'X' || event.ph === 'B') {
-        // We're starting a nested event, create it as a child and make it the currentTask
-        const newTask = MainThreadTasks._createNewTaskNode(event, currentTask);
-        tasks.push(newTask);
-        currentTask = newTask;
-      } else {
-        if (currentTask.event.ph !== 'B') {
-          throw new Error(
-            `Fatal trace logic error - expected start event, got ${currentTask.event.ph}`);
-        }
+      currentTask = nextTask;
+    }
 
-        // We're ending an event, update the end time and the currentTask to its parent
-        currentTask.endTime = event.ts;
-        currentTask = currentTask.parent;
-      }
+    if (currentTask) {
+      MainThreadTasks._assignAllTimerInstallsBetweenTasks(
+        currentTask,
+        {startTime: Infinity},
+        priorTaskData,
+        timerInstallEventsReverseQueue
+      );
     }
+  }
 
-    // Starting from the last and bottom-most task, we finish any tasks that didn't end yet.
-    while (currentTask && !Number.isFinite(currentTask.endTime)) {
-      // The last event didn't finish before tracing stopped, use traceEnd timestamp instead.
-      currentTask.endTime = traceEndTs;
-      currentTask = currentTask.parent;
+  /**
+   * This function takes the raw trace events sorted in increasing timestamp order and outputs connected task nodes.
+   * To create the task heirarchy we make several passes over the events.
+   *
+   *    1. Create three arrays of X/B events, E events, and TimerInstall events.
+   *    2. Create tasks for each X/B event, throwing if a matching E event cannot be found for a given B.
+   *    3. Sort the tasks by ↑ startTime, ↓ duration.
+   *    4. Match each task to its parent, throwing if there is any invalid overlap between tasks.
+   *
+   * @param {LH.TraceEvent[]} mainThreadEvents
+   * @param {PriorTaskData} priorTaskData
+   * @param {number} traceEndTs
+   * @return {TaskNode[]}
+   */
+  static _createTasksFromEvents(mainThreadEvents, priorTaskData, traceEndTs) {
+    /** @type {Array<LH.TraceEvent>} */
+    const taskStartEvents = [];
+    /** @type {Array<LH.TraceEvent>} */
+    const taskEndEvents = [];
+    /** @type {Array<LH.TraceEvent>} */
+    const timerInstallEvents = [];
+
+    // Phase 1 - Create three arrays of X/B events, E events, and TimerInstall events.
+    for (const event of mainThreadEvents) {
+      if (event.ph === 'X' || event.ph === 'B') taskStartEvents.push(event);
+      if (event.ph === 'E') taskEndEvents.push(event);
+      if (event.name === 'TimerInstall') timerInstallEvents.push(event);
     }
 
-    // At this point we expect all tasks to have a finite startTime and endTime.
-    return tasks;
+    // Phase 2 - Create tasks for each taskStartEvent.
+    const tasks = MainThreadTasks._createTasksFromStartAndEndEvents(
+      taskStartEvents,
+      taskEndEvents,
+      traceEndTs
+    );
+
+    // Phase 3 - Sort the tasks by increasing startTime, decreasing duration.
+    const sortedTasks = tasks.sort(
+      (taskA, taskB) => taskA.startTime - taskB.startTime || taskB.duration - taskA.duration
+    );
+
+    // Phase 4 - Match each task to its parent.
+    MainThreadTasks._createTaskRelationships(sortedTasks, timerInstallEvents, priorTaskData);
+
+    return sortedTasks;
   }
 
   /**
@@ -157,7 +338,6 @@ class MainThreadTasks {
     const childTime = task.children
       .map(child => MainThreadTasks._computeRecursiveSelfTime(child, task))
       .reduce((sum, child) => sum + child, 0);
-    task.duration = task.endTime - task.startTime;
     task.selfTime = task.duration - childTime;
     return task.duration;
   }
@@ -213,7 +393,8 @@ class MainThreadTasks {
 
     task.attributableURLs = attributableURLs;
     task.children.forEach(child =>
-      MainThreadTasks._computeRecursiveAttributableURLs(child, attributableURLs, priorTaskData));
+      MainThreadTasks._computeRecursiveAttributableURLs(child, attributableURLs, priorTaskData)
+    );
   }
 
   /**
@@ -227,14 +408,18 @@ class MainThreadTasks {
   }
 
   /**
-   * @param {LH.TraceEvent[]} traceEvents
+   * @param {LH.TraceEvent[]} mainThreadEvents
    * @param {number} traceEndTs
    * @return {TaskNode[]}
    */
-  static getMainThreadTasks(traceEvents, traceEndTs) {
+  static getMainThreadTasks(mainThreadEvents, traceEndTs) {
     const timers = new Map();
     const priorTaskData = {timers};
-    const tasks = MainThreadTasks._createTasksFromEvents(traceEvents, priorTaskData, traceEndTs);
+    const tasks = MainThreadTasks._createTasksFromEvents(
+      mainThreadEvents,
+      priorTaskData,
+      traceEndTs
+    );
 
     // Compute the recursive properties we couldn't compute earlier, starting at the toplevel tasks
     for (const task of tasks) {