]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - android-sdk/arm-ds5-gator.git/blobdiff - driver/gator_main.c
gator-driver: Revert #error about lack of CONFIG_PERF_EVENTS
[android-sdk/arm-ds5-gator.git] / driver / gator_main.c
index 340756e6f5249c556004f7d8aa43d42e92b71db0..5eab03456ce67060b0828dc82247aec3ba91be85 100644 (file)
@@ -1,5 +1,5 @@
 /**
- * Copyright (C) ARM Limited 2010-2011. All rights reserved.
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -7,7 +7,8 @@
  *
  */
 
-static unsigned long gator_protocol_version = 5;
+// This version must match the gator daemon version
+static unsigned long gator_protocol_version = 14;
 
 #include <linux/slab.h>
 #include <linux/cpu.h>
@@ -17,15 +18,22 @@ static unsigned long gator_protocol_version = 5;
 #include <linux/hardirq.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
+#include <linux/suspend.h>
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <linux/utsname.h>
+#include <asm/stacktrace.h>
 #include <asm/uaccess.h>
 
 #include "gator.h"
 #include "gator_events.h"
 
-#ifndef CONFIG_GENERIC_TRACER
-#ifndef CONFIG_TRACING
-#error gator requires the kernel to have CONFIG_GENERIC_TRACER or CONFIG_TRACING defined
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
+#error kernels prior to 2.6.32 are not supported
 #endif
+
+#if !defined(CONFIG_GENERIC_TRACER) && !defined(CONFIG_TRACING)
+#error gator requires the kernel to have CONFIG_GENERIC_TRACER or CONFIG_TRACING defined
 #endif
 
 #ifndef CONFIG_PROFILING
@@ -33,279 +41,482 @@ static unsigned long gator_protocol_version = 5;
 #endif
 
 #ifndef CONFIG_HIGH_RES_TIMERS
-#error gator requires the kernel to have CONFIG_HIGH_RES_TIMERS defined
+#error gator requires the kernel to have CONFIG_HIGH_RES_TIMERS defined to support PC sampling
 #endif
 
-#ifdef CONFIG_SMP
-#ifndef CONFIG_LOCAL_TIMERS
+#if defined(__arm__) && defined(CONFIG_SMP) && !defined(CONFIG_LOCAL_TIMERS)
 #error gator requires the kernel to have CONFIG_LOCAL_TIMERS defined on SMP systems
 #endif
-#endif
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
-#error kernels prior to 2.6.32 are not supported
+#if (GATOR_PERF_SUPPORT) && (!(GATOR_PERF_PMU_SUPPORT))
+#ifndef CONFIG_PERF_EVENTS
+#warning gator requires the kernel to have CONFIG_PERF_EVENTS defined to support pmu hardware counters
+#elif !defined CONFIG_HW_PERF_EVENTS
+#warning gator requires the kernel to have CONFIG_HW_PERF_EVENTS defined to support pmu hardware counters
+#endif
 #endif
 
 /******************************************************************************
  * DEFINES
  ******************************************************************************/
-#define BUFFER_SIZE_DEFAULT            (256*1024)
-#define SYNC_FREQ_DEFAULT              1000
-
-#define NO_COOKIE                              0UL
-#define INVALID_COOKIE                 ~0UL
-
-#define PROTOCOL_FRAME                         ~0
-#define PROTOCOL_START_TICK                    1
-#define PROTOCOL_END_TICK                      3
-#define PROTOCOL_START_BACKTRACE       5
-#define PROTOCOL_END_BACKTRACE         7
-#define PROTOCOL_COOKIE                                9
-#define PROTOCOL_SCHEDULER_TRACE       11
-#define PROTOCOL_COUNTERS                      13
-#define PROTOCOL_ANNOTATE                      15
-#define PROTOCOL_CPU_SYNC                      17
+#define SUMMARY_BUFFER_SIZE       (1*1024)
+#define BACKTRACE_BUFFER_SIZE     (128*1024)
+#define NAME_BUFFER_SIZE          (64*1024)
+#define COUNTER_BUFFER_SIZE       (64*1024)    // counters have the core as part of the data and the core value in the frame header may be discarded
+#define BLOCK_COUNTER_BUFFER_SIZE (128*1024)
+#define ANNOTATE_BUFFER_SIZE      (64*1024)    // annotate counters have the core as part of the data and the core value in the frame header may be discarded
+#define SCHED_TRACE_BUFFER_SIZE   (128*1024)
+#define GPU_TRACE_BUFFER_SIZE     (64*1024)    // gpu trace counters have the core as part of the data and the core value in the frame header may be discarded
+#define IDLE_BUFFER_SIZE          (32*1024)    // idle counters have the core as part of the data and the core value in the frame header may be discarded
+
+#define NO_COOKIE      0U
+#define INVALID_COOKIE ~0U
+
+#define FRAME_SUMMARY       1
+#define FRAME_BACKTRACE     2
+#define FRAME_NAME          3
+#define FRAME_COUNTER       4
+#define FRAME_BLOCK_COUNTER 5
+#define FRAME_ANNOTATE      6
+#define FRAME_SCHED_TRACE   7
+#define FRAME_GPU_TRACE     8
+#define FRAME_IDLE          9
+
+#define MESSAGE_END_BACKTRACE 1
+
+#define MESSAGE_COOKIE      1
+#define MESSAGE_THREAD_NAME 2
+#define HRTIMER_CORE_NAME   3
+
+#define MESSAGE_GPU_START 1
+#define MESSAGE_GPU_STOP  2
+
+#define MESSAGE_SCHED_SWITCH 1
+#define MESSAGE_SCHED_EXIT   2
+
+#define MESSAGE_IDLE_ENTER 1
+#define MESSAGE_IDLE_EXIT 2
+
+#define MAXSIZE_PACK32     5
+#define MAXSIZE_PACK64    10
+
+#define FRAME_HEADER_SIZE 3
 
 #if defined(__arm__)
 #define PC_REG regs->ARM_pc
+#elif defined(__aarch64__)
+#define PC_REG regs->pc
 #else
 #define PC_REG regs->ip
 #endif
 
+enum {
+       SUMMARY_BUF,
+       BACKTRACE_BUF,
+       NAME_BUF,
+       COUNTER_BUF,
+       BLOCK_COUNTER_BUF,
+       ANNOTATE_BUF,
+       SCHED_TRACE_BUF,
+       GPU_TRACE_BUF,
+       IDLE_BUF,
+       NUM_GATOR_BUFS
+};
+
 /******************************************************************************
- * PER CPU
+ * Globals
  ******************************************************************************/
 static unsigned long gator_cpu_cores;
-static unsigned long gator_buffer_size;
+// Size of the largest buffer. Effectively constant, set in gator_op_create_files
+static unsigned long userspace_buffer_size;
 static unsigned long gator_backtrace_depth;
+// How often to commit the buffers for live in nanoseconds
+static u64 gator_live_rate;
 
 static unsigned long gator_started;
+static u64 gator_monotonic_started;
 static unsigned long gator_buffer_opened;
 static unsigned long gator_timer_count;
-static unsigned long gator_streaming;
-static int gator_master_tick;
+static unsigned long gator_response_type;
 static DEFINE_MUTEX(start_mutex);
 static DEFINE_MUTEX(gator_buffer_mutex);
 
-unsigned long gator_net_traffic;
-
-#define COMMIT_SIZE            128
-#define COMMIT_MASK            (COMMIT_SIZE-1)
-static DEFINE_SPINLOCK(gator_commit_lock);
-static int *gator_commit;
-static int gator_commit_read;
-static int gator_commit_write;
+bool event_based_sampling;
 
 static DECLARE_WAIT_QUEUE_HEAD(gator_buffer_wait);
-static DEFINE_PER_CPU(int, gator_cpu_sync);
-static DEFINE_PER_CPU(int, gator_cpu_tick);
-static DEFINE_PER_CPU(int, gator_first_time);
+static DECLARE_WAIT_QUEUE_HEAD(gator_annotate_wait);
+static struct timer_list gator_buffer_wake_up_timer;
+static LIST_HEAD(gator_events);
+
+static DEFINE_PER_CPU(u64, last_timestamp);
+
+static bool printed_monotonic_warning;
+
+static bool sent_core_name[NR_CPUS];
 
 /******************************************************************************
  * Prototypes
  ******************************************************************************/
-static void gator_buffer_write_packed_int(int cpu, unsigned int x);
-static void gator_buffer_write_string(int cpu, char *x);
-static int  gator_write_packed_int(char *buffer, unsigned int x);
-static void gator_add_trace(int cpu, unsigned int address);
-static uint64_t gator_get_time(void);
+static void buffer_check(int cpu, int buftype, u64 time);
+static void gator_commit_buffer(int cpu, int buftype, u64 time);
+static int buffer_bytes_available(int cpu, int buftype);
+static bool buffer_check_space(int cpu, int buftype, int bytes);
+static int contiguous_space_available(int cpu, int bufytpe);
+static void gator_buffer_write_packed_int(int cpu, int buftype, int x);
+static void gator_buffer_write_packed_int64(int cpu, int buftype, long long x);
+static void gator_buffer_write_bytes(int cpu, int buftype, const char *x, int len);
+static void gator_buffer_write_string(int cpu, int buftype, const char *x);
+static void gator_add_trace(int cpu, unsigned long address);
+static void gator_add_sample(int cpu, struct pt_regs *const regs);
+static u64 gator_get_time(void);
+
+// Size of the buffer, must be a power of 2. Effectively constant, set in gator_op_setup.
+static uint32_t gator_buffer_size[NUM_GATOR_BUFS];
+// gator_buffer_size - 1, bitwise and with pos to get offset into the array. Effectively constant, set in gator_op_setup.
+static uint32_t gator_buffer_mask[NUM_GATOR_BUFS];
+// Read position in the buffer. Initialized to zero in gator_op_setup and incremented after bytes are read by userspace in userspace_buffer_read
+static DEFINE_PER_CPU(int[NUM_GATOR_BUFS], gator_buffer_read);
+// Write position in the buffer. Initialized to zero in gator_op_setup and incremented after bytes are written to the buffer
+static DEFINE_PER_CPU(int[NUM_GATOR_BUFS], gator_buffer_write);
+// Commit position in the buffer. Initialized to zero in gator_op_setup and incremented after a frame is ready to be read by userspace
+static DEFINE_PER_CPU(int[NUM_GATOR_BUFS], gator_buffer_commit);
+// If set to false, decreases the number of bytes returned by buffer_bytes_available. Set in buffer_check_space if no space is remaining. Initialized to true in gator_op_setup
+// This means that if we run out of space, continue to report that no space is available until bytes are read by userspace
+static DEFINE_PER_CPU(int[NUM_GATOR_BUFS], buffer_space_available);
+// The buffer. Allocated in gator_op_setup
+static DEFINE_PER_CPU(char *[NUM_GATOR_BUFS], gator_buffer);
+
+#if GATOR_LIVE
+// The time after which the buffer should be committed for live display
+static DEFINE_PER_CPU(u64, gator_buffer_commit_time);
+#endif
 
 /******************************************************************************
  * Application Includes
  ******************************************************************************/
+#include "gator_marshaling.c"
+#include "gator_hrtimer_perf.c"
+#include "gator_hrtimer_gator.c"
 #include "gator_cookies.c"
 #include "gator_trace_sched.c"
+#include "gator_trace_power.c"
+#include "gator_trace_gpu.c"
 #include "gator_backtrace.c"
 #include "gator_annotate.c"
 #include "gator_fs.c"
+#include "gator_pack.c"
 
 /******************************************************************************
  * Misc
  ******************************************************************************/
-#if defined(__arm__)
+
+const struct gator_cpu gator_cpus[] = {
+       {
+               .cpuid = ARM1136,
+               .core_name = "ARM1136",
+               .pmnc_name = "ARM_ARM11",
+               .pmnc_counters = 3,
+       },
+       {
+               .cpuid = ARM1156,
+               .core_name = "ARM1156",
+               .pmnc_name = "ARM_ARM11",
+               .pmnc_counters = 3,
+       },
+       {
+               .cpuid = ARM1176,
+               .core_name = "ARM1176",
+               .pmnc_name = "ARM_ARM11",
+               .pmnc_counters = 3,
+       },
+       {
+               .cpuid = ARM11MPCORE,
+               .core_name = "ARM11MPCore",
+               .pmnc_name = "ARM_ARM11MPCore",
+               .pmnc_counters = 3,
+       },
+       {
+               .cpuid = CORTEX_A5,
+               .core_name = "Cortex-A5",
+               .pmu_name = "ARMv7_Cortex_A5",
+               .pmnc_name = "ARM_Cortex-A5",
+               .pmnc_counters = 2,
+       },
+       {
+               .cpuid = CORTEX_A7,
+               .core_name = "Cortex-A7",
+               .pmu_name = "ARMv7_Cortex_A7",
+               .pmnc_name = "ARM_Cortex-A7",
+               .pmnc_counters = 4,
+       },
+       {
+               .cpuid = CORTEX_A8,
+               .core_name = "Cortex-A8",
+               .pmu_name = "ARMv7_Cortex_A8",
+               .pmnc_name = "ARM_Cortex-A8",
+               .pmnc_counters = 4,
+       },
+       {
+               .cpuid = CORTEX_A9,
+               .core_name = "Cortex-A9",
+               .pmu_name = "ARMv7_Cortex_A9",
+               .pmnc_name = "ARM_Cortex-A9",
+               .pmnc_counters = 6,
+       },
+       {
+               .cpuid = CORTEX_A15,
+               .core_name = "Cortex-A15",
+               .pmu_name = "ARMv7_Cortex_A15",
+               .pmnc_name = "ARM_Cortex-A15",
+               .pmnc_counters = 6,
+       },
+       {
+               .cpuid = SCORPION,
+               .core_name = "Scorpion",
+               .pmnc_name = "Scorpion",
+               .pmnc_counters = 4,
+       },
+       {
+               .cpuid = SCORPIONMP,
+               .core_name = "ScorpionMP",
+               .pmnc_name = "ScorpionMP",
+               .pmnc_counters = 4,
+       },
+       {
+               .cpuid = KRAITSIM,
+               .core_name = "KraitSIM",
+               .pmnc_name = "Krait",
+               .pmnc_counters = 4,
+       },
+       {
+               .cpuid = KRAIT,
+               .core_name = "Krait",
+               .pmnc_name = "Krait",
+               .pmnc_counters = 4,
+       },
+       {
+               .cpuid = KRAIT_S4_PRO,
+               .core_name = "Krait S4 Pro",
+               .pmnc_name = "Krait",
+               .pmnc_counters = 4,
+       },
+       {
+               .cpuid = CORTEX_A53,
+               .core_name = "Cortex-A53",
+               .pmnc_name = "ARM_Cortex-A53",
+               .pmnc_counters = 6,
+       },
+       {
+               .cpuid = CORTEX_A57,
+               .core_name = "Cortex-A57",
+               .pmnc_name = "ARM_Cortex-A57",
+               .pmnc_counters = 6,
+       },
+       {
+               .cpuid = AARCH64,
+               .core_name = "AArch64",
+               .pmnc_name = "ARM_AArch64",
+               .pmnc_counters = 6,
+       },
+       {
+               .cpuid = OTHER,
+               .core_name = "Other",
+               .pmnc_name = "Other",
+               .pmnc_counters = 6,
+       },
+       {}
+};
+
+const struct gator_cpu *gator_find_cpu_by_cpuid(const u32 cpuid)
+{
+       int i;
+
+       for (i = 0; gator_cpus[i].cpuid != 0; ++i) {
+               const struct gator_cpu *const gator_cpu = &gator_cpus[i];
+               if (gator_cpu->cpuid == cpuid) {
+                       return gator_cpu;
+               }
+       }
+
+       return NULL;
+}
+
+const struct gator_cpu *gator_find_cpu_by_pmu_name(const char *const name)
+{
+       int i;
+
+       for (i = 0; gator_cpus[i].cpuid != 0; ++i) {
+               const struct gator_cpu *const gator_cpu = &gator_cpus[i];
+               if (gator_cpu->pmu_name != NULL && strcmp(gator_cpu->pmu_name, name) == 0) {
+                       return gator_cpu;
+               }
+       }
+
+       return NULL;
+}
+
 u32 gator_cpuid(void)
 {
+#if defined(__arm__) || defined(__aarch64__)
        u32 val;
+#if !defined(__aarch64__)
        asm volatile("mrc p15, 0, %0, c0, c0, 0" : "=r" (val));
+#else
+       asm volatile("mrs %0, midr_el1" : "=r" (val));
+#endif
        return (val >> 4) & 0xfff;
-}
+#else
+       return OTHER;
 #endif
-
-/******************************************************************************
- * Commit interface
- ******************************************************************************/
-static int buffer_commit_ready(void)
-{
-       return (gator_commit_read != gator_commit_write);
 }
 
-static void buffer_commit_read(int *cpu, int *readval, int *writeval)
+static void gator_buffer_wake_up(unsigned long data)
 {
-       int read = gator_commit_read;
-       *cpu      = gator_commit[read+0];
-       *readval  = gator_commit[read+1];
-       *writeval = gator_commit[read+2];
-       gator_commit_read = (read + 4) & COMMIT_MASK;
+       wake_up(&gator_buffer_wait);
 }
 
-static void buffer_commit_write(int cpu, int readval, int writeval) {
-       int write = gator_commit_write;
-       gator_commit[write+0] = cpu;
-       gator_commit[write+1] = readval;
-       gator_commit[write+2] = writeval;
-       gator_commit_write = (write + 4) & COMMIT_MASK;
+/******************************************************************************
+ * Commit interface
+ ******************************************************************************/
+static bool buffer_commit_ready(int *cpu, int *buftype)
+{
+       int cpu_x, x;
+       for_each_present_cpu(cpu_x) {
+               for (x = 0; x < NUM_GATOR_BUFS; x++)
+                       if (per_cpu(gator_buffer_commit, cpu_x)[x] != per_cpu(gator_buffer_read, cpu_x)[x]) {
+                               *cpu = cpu_x;
+                               *buftype = x;
+                               return true;
+                       }
+       }
+       *cpu = -1;
+       *buftype = -1;
+       return false;
 }
 
 /******************************************************************************
  * Buffer management
  ******************************************************************************/
-static uint32_t use_buffer_size;
-static uint32_t use_buffer_mask;
-static DEFINE_PER_CPU(int, use_buffer_seq);
-static DEFINE_PER_CPU(int, use_buffer_read);
-static DEFINE_PER_CPU(int, use_buffer_write);
-static DEFINE_PER_CPU(char *, use_buffer);
-
-static void gator_buffer_write_packed_int(int cpu, unsigned int x)
+static int buffer_bytes_available(int cpu, int buftype)
 {
-       uint32_t write = per_cpu(use_buffer_write, cpu);
-       uint32_t mask = use_buffer_mask;
-       char *buffer = per_cpu(use_buffer, cpu);
-       int write0 = (write + 0) & mask;
-       int write1 = (write + 1) & mask;
-       int write2 = (write + 2) & mask;
-       int write3 = (write + 3) & mask;
-       int write4 = (write + 4) & mask;
-       int write5 = (write + 5) & mask;
-
-       if ((x & 0xffffff80) == 0) {
-               buffer[write0] = x & 0x7f;
-               per_cpu(use_buffer_write, cpu) = write1;
-       } else if ((x & 0xffffc000) == 0) {
-               buffer[write0] = x | 0x80;
-               buffer[write1] = (x>>7) & 0x7f;
-               per_cpu(use_buffer_write, cpu) = write2;
-       } else if ((x & 0xffe00000) == 0) {
-               buffer[write0] = x | 0x80;
-               buffer[write1] = (x>>7) | 0x80;
-               buffer[write2] = (x>>14) & 0x7f;
-               per_cpu(use_buffer_write, cpu) = write3;
-       } else if ((x & 0xf0000000) == 0) {
-               buffer[write0] = x | 0x80;
-               buffer[write1] = (x>>7) | 0x80;
-               buffer[write2] = (x>>14) | 0x80;
-               buffer[write3] = (x>>21) & 0x7f;
-               per_cpu(use_buffer_write, cpu) = write4;
+       int remaining, filled;
+
+       filled = per_cpu(gator_buffer_write, cpu)[buftype] - per_cpu(gator_buffer_read, cpu)[buftype];
+       if (filled < 0) {
+               filled += gator_buffer_size[buftype];
+       }
+
+       remaining = gator_buffer_size[buftype] - filled;
+
+       if (per_cpu(buffer_space_available, cpu)[buftype]) {
+               // Give some extra room; also allows space to insert the overflow error packet
+               remaining -= 200;
        } else {
-               buffer[write0] = x | 0x80;
-               buffer[write1] = (x>>7) | 0x80;
-               buffer[write2] = (x>>14) | 0x80;
-               buffer[write3] = (x>>21) | 0x80;
-               buffer[write4] = (x>>28) & 0x0f;
-               per_cpu(use_buffer_write, cpu) = write5;
+               // Hysteresis, prevents multiple overflow messages
+               remaining -= 2000;
        }
+
+       return remaining;
 }
 
-static int gator_write_packed_int(char *buffer, unsigned int x)
+static int contiguous_space_available(int cpu, int buftype)
 {
-       if ((x & 0xffffff80) == 0) {
-               buffer[0] = x & 0x7f;
-               return 1;
-       } else if ((x & 0xffffc000) == 0) {
-               buffer[0] = x | 0x80;
-               buffer[1] = (x>>7) & 0x7f;
-               return 2;
-       } else if ((x & 0xffe00000) == 0) {
-               buffer[0] = x | 0x80;
-               buffer[1] = (x>>7) | 0x80;
-               buffer[2] = (x>>14) & 0x7f;
-               return 3;
-       } else if ((x & 0xf0000000) == 0) {
-               buffer[0] = x | 0x80;
-               buffer[1] = (x>>7) | 0x80;
-               buffer[2] = (x>>14) | 0x80;
-               buffer[3] = (x>>21) & 0x7f;
-               return 4;
+       int remaining = buffer_bytes_available(cpu, buftype);
+       int contiguous = gator_buffer_size[buftype] - per_cpu(gator_buffer_write, cpu)[buftype];
+       if (remaining < contiguous)
+               return remaining;
+       else
+               return contiguous;
+}
+
+static bool buffer_check_space(int cpu, int buftype, int bytes)
+{
+       int remaining = buffer_bytes_available(cpu, buftype);
+
+       if (remaining < bytes) {
+               per_cpu(buffer_space_available, cpu)[buftype] = false;
        } else {
-               buffer[0] = x | 0x80;
-               buffer[1] = (x>>7) | 0x80;
-               buffer[2] = (x>>14) | 0x80;
-               buffer[3] = (x>>21) | 0x80;
-               buffer[4] = (x>>28) & 0x0f;
-               return 5;
+               per_cpu(buffer_space_available, cpu)[buftype] = true;
        }
+
+       return per_cpu(buffer_space_available, cpu)[buftype];
 }
 
-static void gator_buffer_write_bytes(int cpu, char *x, int len)
+static void gator_buffer_write_bytes(int cpu, int buftype, const char *x, int len)
 {
-       uint32_t write = per_cpu(use_buffer_write, cpu);
-       uint32_t mask = use_buffer_mask;
-       char *buffer = per_cpu(use_buffer, cpu);
        int i;
+       u32 write = per_cpu(gator_buffer_write, cpu)[buftype];
+       u32 mask = gator_buffer_mask[buftype];
+       char *buffer = per_cpu(gator_buffer, cpu)[buftype];
 
        for (i = 0; i < len; i++) {
                buffer[write] = x[i];
                write = (write + 1) & mask;
        }
 
-       per_cpu(use_buffer_write, cpu) = write;
+       per_cpu(gator_buffer_write, cpu)[buftype] = write;
 }
 
-static void gator_buffer_write_string(int cpu, char *x)
+static void gator_buffer_write_string(int cpu, int buftype, const char *x)
 {
        int len = strlen(x);
-       gator_buffer_write_packed_int(cpu, len);
-       gator_buffer_write_bytes(cpu, x, len);
+       gator_buffer_write_packed_int(cpu, buftype, len);
+       gator_buffer_write_bytes(cpu, buftype, x, len);
 }
 
-static void gator_buffer_header(int cpu)
+static void gator_commit_buffer(int cpu, int buftype, u64 time)
 {
-       gator_buffer_write_packed_int(cpu, PROTOCOL_FRAME);
-       gator_buffer_write_packed_int(cpu, cpu);
-       gator_buffer_write_packed_int(cpu, per_cpu(use_buffer_seq, cpu));
-       per_cpu(use_buffer_seq, cpu)++;
-}
+       int type_length, commit, length, byte;
 
-static void gator_buffer_commit(int cpu)
-{
-       buffer_commit_write(cpu, per_cpu(use_buffer_read, cpu), per_cpu(use_buffer_write, cpu));
-       per_cpu(use_buffer_read, cpu) = per_cpu(use_buffer_write, cpu);
-       gator_buffer_header(cpu);
-       wake_up(&gator_buffer_wait);
+       if (!per_cpu(gator_buffer, cpu)[buftype])
+               return;
+
+       // post-populate the length, which does not include the response type length nor the length itself, i.e. only the length of the payload
+       type_length = gator_response_type ? 1 : 0;
+       commit = per_cpu(gator_buffer_commit, cpu)[buftype];
+       length = per_cpu(gator_buffer_write, cpu)[buftype] - commit;
+       if (length < 0) {
+               length += gator_buffer_size[buftype];
+       }
+       length = length - type_length - sizeof(s32);
+
+       if (length <= FRAME_HEADER_SIZE) {
+               // Nothing to write, only the frame header is present
+               return;
+       }
+
+       for (byte = 0; byte < sizeof(s32); byte++) {
+               per_cpu(gator_buffer, cpu)[buftype][(commit + type_length + byte) & gator_buffer_mask[buftype]] = (length >> byte * 8) & 0xFF;
+       }
+
+       per_cpu(gator_buffer_commit, cpu)[buftype] = per_cpu(gator_buffer_write, cpu)[buftype];
+
+#if GATOR_LIVE
+       if (gator_live_rate > 0) {
+               while (time > per_cpu(gator_buffer_commit_time, cpu)) {
+                       per_cpu(gator_buffer_commit_time, cpu) += gator_live_rate;
+               }
+       }
+#endif
+
+       marshal_frame(cpu, buftype);
+
+       // had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater
+       mod_timer(&gator_buffer_wake_up_timer, jiffies + 1);
 }
 
-static void gator_buffer_check(int cpu, int tick)
+static void buffer_check(int cpu, int buftype, u64 time)
 {
-       if (!(tick % gator_timer_count)) {
-               int c, sync;
-               spin_lock(&gator_commit_lock);
-               // synchronize, if all online cpus have the same tick waypoint
-               sync = per_cpu(gator_cpu_sync, cpu) = per_cpu(gator_cpu_tick, cpu);
-               for_each_online_cpu(c) {
-                       if (sync != per_cpu(gator_cpu_sync, c)) {
-                               sync = 0;
-                               break;
-                       }
-               }
-               if (sync) {
-                       gator_buffer_write_packed_int(cpu, PROTOCOL_CPU_SYNC);
-               }
-               gator_buffer_commit(cpu);
-               spin_unlock(&gator_commit_lock);
-       } else {
-               int available = per_cpu(use_buffer_write, cpu) - per_cpu(use_buffer_read, cpu);
-               if (available < 0) {
-                       available += use_buffer_size;
-               }
-               if (available >= ((use_buffer_size * 3) / 4)) {
-                       spin_lock(&gator_commit_lock);
-                       gator_buffer_commit(cpu);
-                       spin_unlock(&gator_commit_lock);
-               }
+       int filled = per_cpu(gator_buffer_write, cpu)[buftype] - per_cpu(gator_buffer_commit, cpu)[buftype];
+       if (filled < 0) {
+               filled += gator_buffer_size[buftype];
+       }
+       if (filled >= ((gator_buffer_size[buftype] * 3) / 4)) {
+               gator_commit_buffer(cpu, buftype, time);
        }
 }
 
-static void gator_add_trace(int cpu, unsigned int address)
+static void gator_add_trace(int cpu, unsigned long address)
 {
        off_t offset = 0;
        unsigned long cookie = get_address_cookie(cpu, current, address & ~1, &offset);
@@ -314,260 +525,349 @@ static void gator_add_trace(int cpu, unsigned int address)
                offset = address;
        }
 
-       gator_buffer_write_packed_int(cpu, offset & ~1);
-       gator_buffer_write_packed_int(cpu, cookie);
+       marshal_backtrace(offset & ~1, cookie);
 }
 
-static void gator_add_sample(int cpu, struct pt_regs * const regs)
+static void gator_add_sample(int cpu, struct pt_regs *const regs)
 {
-       struct module *mod;
-       unsigned int addr, cookie = 0;
-       int inKernel = regs ? !user_mode(regs) : 1;
-       unsigned long exec_cookie = !inKernel ? get_exec_cookie(cpu, current) : NO_COOKIE;
-
-       gator_buffer_write_packed_int(cpu, PROTOCOL_START_BACKTRACE);
-
-       // TGID::PID::inKernel
-       gator_buffer_write_packed_int(cpu, exec_cookie);
-       gator_buffer_write_packed_int(cpu, (unsigned int)current->tgid);
-       gator_buffer_write_packed_int(cpu, (unsigned int)current->pid);
-       gator_buffer_write_packed_int(cpu, inKernel);
-
-       // get_irq_regs() will return NULL outside of IRQ context (e.g. nested IRQ)
-       if (regs) {
-               if (inKernel) {
-                       addr = PC_REG;
-                       mod = __module_address(addr);
-                       if (mod) {
-                               cookie = get_cookie(cpu, current, NULL, mod);
-                               addr = addr - (unsigned long)mod->module_core;
-                       }
-                       gator_buffer_write_packed_int(cpu, addr & ~1);
-                       gator_buffer_write_packed_int(cpu, cookie);
-               } else {
-                       // Cookie+PC
-                       gator_add_trace(cpu, PC_REG);
+       bool inKernel;
+       unsigned long exec_cookie;
 
-                       // Backtrace
-                       if (gator_backtrace_depth)
-                               arm_backtrace_eabi(cpu, regs, gator_backtrace_depth);
-               }
-       }
+       if (!regs)
+               return;
 
-       gator_buffer_write_packed_int(cpu, PROTOCOL_END_BACKTRACE);
-}
+       inKernel = !user_mode(regs);
+       exec_cookie = get_exec_cookie(cpu, current);
 
-static void gator_write_packet(int cpu, int type, int len, int *buffer)
-{
-       int i;
-       gator_buffer_write_packed_int(cpu, type);
-       gator_buffer_write_packed_int(cpu, len);
-       for (i = 0; i < len; i++) {
-               gator_buffer_write_packed_int(cpu, buffer[i]);
+       if (!marshal_backtrace_header(exec_cookie, current->tgid, current->pid, inKernel))
+               return;
+
+       if (inKernel) {
+               kernel_backtrace(cpu, regs);
+       } else {
+               // Cookie+PC
+               gator_add_trace(cpu, PC_REG);
+
+               // Backtrace
+               if (gator_backtrace_depth)
+                       arm_backtrace_eabi(cpu, regs, gator_backtrace_depth);
        }
+
+       marshal_backtrace_footer();
 }
 
 /******************************************************************************
- * Interrupt Processing
+ * hrtimer interrupt processing
  ******************************************************************************/
-static LIST_HEAD(gator_events);
-
 static void gator_timer_interrupt(void)
 {
-       struct pt_regs * const regs = get_irq_regs();
-       int cpu = smp_processor_id();
-       int *buffer, len, tick;
-       struct gator_interface *gi;
+       struct pt_regs *const regs = get_irq_regs();
+       gator_backtrace_handler(regs);
+}
 
-       // check full backtrace has enough space, otherwise may
-       // have breaks between samples in the same callstack
-       if (per_cpu(gator_first_time, cpu)) {
-               per_cpu(gator_first_time, cpu) = 0;
+void gator_backtrace_handler(struct pt_regs *const regs)
+{
+       int cpu = get_physical_cpu();
 
-               list_for_each_entry(gi, &gator_events, list)
-                       if (gi->read)
-                               gi->read(NULL);
+       // Output backtrace
+       gator_add_sample(cpu, regs);
 
-               return;
+       // Collect counters
+       if (!per_cpu(collecting, cpu)) {
+               collect_counters();
        }
+}
 
-       // Header
-       gator_buffer_write_packed_int(cpu, PROTOCOL_START_TICK);                        // Escape
+static int gator_running;
 
-       // Output scheduler
-       len = gator_trace_sched_read(&buffer);
-       if (len > 0) {
-               gator_write_packet(cpu, PROTOCOL_SCHEDULER_TRACE, len, buffer);
-       }
+// This function runs in interrupt context and on the appropriate core
+static void gator_timer_offline(void *migrate)
+{
+       struct gator_interface *gi;
+       int i, len, cpu = get_physical_cpu();
+       int *buffer;
+       u64 time;
 
-       // Output counters
-       list_for_each_entry(gi, &gator_events, list) {
-               if (gi->read) {
-                       len = gi->read(&buffer);
-                       if (len > 0)
-                               gator_write_packet(cpu, PROTOCOL_COUNTERS, len, buffer);
-               }
-       }
+       gator_trace_sched_offline();
+       gator_trace_power_offline();
 
-       // Output backtrace
-       gator_add_sample(cpu, regs);
+       if (!migrate) {
+               gator_hrtimer_offline();
+       }
 
-       // Timer Tick
-       tick = per_cpu(gator_cpu_tick, cpu);
-       if (tick == gator_master_tick) {
-               tick++;
-               per_cpu(gator_cpu_tick, cpu) = gator_master_tick = tick;
-       } else {
-               per_cpu(gator_cpu_tick, cpu) = tick = gator_master_tick;
+       // Offline any events and output counters
+       time = gator_get_time();
+       if (marshal_event_header(time)) {
+               list_for_each_entry(gi, &gator_events, list) {
+                       if (gi->offline) {
+                               len = gi->offline(&buffer, migrate);
+                               marshal_event(len, buffer);
+                       }
+               }
+               // Only check after writing all counters so that time and corresponding counters appear in the same frame
+               buffer_check(cpu, BLOCK_COUNTER_BUF, time);
        }
-       gator_write_packet(cpu, PROTOCOL_END_TICK, 1, &tick);
 
-       // Check and commit; generally, commit is set to occur once per second
-       gator_buffer_check(cpu, tick);
+       // Flush all buffers on this core
+       for (i = 0; i < NUM_GATOR_BUFS; i++)
+               gator_commit_buffer(cpu, i, time);
 }
 
-/******************************************************************************
- * hrtimer
- ******************************************************************************/
-DEFINE_PER_CPU(struct hrtimer, percpu_hrtimer);
-DEFINE_PER_CPU(int, hrtimer_is_active);
-static int hrtimer_running;
-static ktime_t profiling_interval;
-
-static enum hrtimer_restart gator_hrtimer_notify(struct hrtimer *hrtimer)
+// This function runs in interrupt context and may be running on a core other than core 'cpu'
+static void gator_timer_offline_dispatch(int cpu, bool migrate)
 {
-       hrtimer_forward_now(hrtimer, profiling_interval);
-       gator_timer_interrupt();
-       return HRTIMER_RESTART;
-}
+       struct gator_interface *gi;
 
-static int gator_timer_init(void)
-{
-       return 0;
+       list_for_each_entry(gi, &gator_events, list) {
+               if (gi->offline_dispatch) {
+                       gi->offline_dispatch(cpu, migrate);
+               }
+       }
 }
 
-static void __gator_timer_offline(void *unused)
+static void gator_timer_stop(void)
 {
-       int cpu = smp_processor_id();
-       if (per_cpu(hrtimer_is_active, cpu)) {
-               struct gator_interface *gi;
-               struct hrtimer *hrtimer = &per_cpu(percpu_hrtimer, cpu);
-               hrtimer_cancel(hrtimer);
-               per_cpu(hrtimer_is_active, cpu) = 0;
-               gator_buffer_commit(cpu);
-
-               // offline any events
-               list_for_each_entry(gi, &gator_events, list)
-                       if (gi->offline)
-                               gi->offline();
+       int cpu;
+
+       if (gator_running) {
+               on_each_cpu(gator_timer_offline, NULL, 1);
+               for_each_online_cpu(cpu) {
+                       gator_timer_offline_dispatch(lcpu_to_pcpu(cpu), false);
+               }
+
+               gator_running = 0;
+               gator_hrtimer_shutdown();
        }
 }
 
-static void gator_timer_offline(void)
+// This function runs in interrupt context and on the appropriate core
+static void gator_timer_online(void *migrate)
 {
-       if (hrtimer_running) {
-               hrtimer_running = 0;
+       struct gator_interface *gi;
+       int len, cpu = get_physical_cpu();
+       int *buffer;
+       u64 time;
+
+       gator_trace_power_online();
+
+       // online any events and output counters
+       time = gator_get_time();
+       if (marshal_event_header(time)) {
+               list_for_each_entry(gi, &gator_events, list) {
+                       if (gi->online) {
+                               len = gi->online(&buffer, migrate);
+                               marshal_event(len, buffer);
+                       }
+               }
+               // Only check after writing all counters so that time and corresponding counters appear in the same frame
+               buffer_check(cpu, BLOCK_COUNTER_BUF, time);
+       }
 
-               on_each_cpu(__gator_timer_offline, NULL, 1);
+       if (!migrate) {
+               gator_hrtimer_online();
+       }
 
-               // output a final sync point
-               gator_buffer_write_packed_int(0, PROTOCOL_CPU_SYNC);
-               gator_buffer_commit(0);
+#if defined(__arm__) || defined(__aarch64__)
+       if (!sent_core_name[cpu]) {
+               const char *core_name = NULL;
+               const u32 cpuid = gator_cpuid();
+               const struct gator_cpu *const gator_cpu = gator_find_cpu_by_cpuid(cpuid);
+               char core_name_buf[32];
+
+               if (gator_cpu != NULL) {
+                       core_name = gator_cpu->core_name;
+               } else {
+                       snprintf(core_name_buf, sizeof(core_name_buf), "Unknown (0x%.3x)", cpuid);
+                       core_name = core_name_buf;
+               }
+
+               marshal_core_name(cpuid, core_name);
+               sent_core_name[cpu] = true;
        }
+#endif
 }
 
-static void __gator_timer_online(void *unused)
+// This function runs in interrupt context and may be running on a core other than core 'cpu'
+static void gator_timer_online_dispatch(int cpu, bool migrate)
 {
-       int cpu = smp_processor_id();
-       if (!per_cpu(hrtimer_is_active, cpu)) {
-               struct gator_interface *gi;
-               struct hrtimer *hrtimer = &per_cpu(percpu_hrtimer, cpu);
-               hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-               hrtimer->function = gator_hrtimer_notify;
-               hrtimer_start(hrtimer, profiling_interval, HRTIMER_MODE_REL_PINNED);
-               per_cpu(gator_cpu_tick, cpu) = 0;
-               per_cpu(gator_first_time, cpu) = 1;
-               per_cpu(hrtimer_is_active, cpu) = 1;
-
-               // online any events
-               list_for_each_entry(gi, &gator_events, list)
-                       if (gi->online)
-                               gi->online();
+       struct gator_interface *gi;
+
+       list_for_each_entry(gi, &gator_events, list) {
+               if (gi->online_dispatch) {
+                       gi->online_dispatch(cpu, migrate);
+               }
        }
 }
 
-int gator_timer_online(unsigned long setup)
+int gator_timer_start(unsigned long sample_rate)
 {
-       if (!setup) {
-               pr_err("gator: cannot start due to a system tick value of zero");
-               return -1;
-       } else if (hrtimer_running) {
-               pr_notice("gator: high res timer already running");
+       int cpu;
+
+       if (gator_running) {
+               pr_notice("gator: already running\n");
                return 0;
        }
 
-       hrtimer_running = 1;
+       gator_running = 1;
 
-       // calculate profiling interval
-       profiling_interval = ns_to_ktime(1000000000UL / setup);
+       // event based sampling trumps hr timer based sampling
+       if (event_based_sampling) {
+               sample_rate = 0;
+       }
 
-       // timer interrupt
-       gator_master_tick = 0;
-       on_each_cpu(__gator_timer_online, NULL, 1);
+       if (gator_hrtimer_init(sample_rate, gator_timer_interrupt) == -1)
+               return -1;
+
+       for_each_online_cpu(cpu) {
+               gator_timer_online_dispatch(lcpu_to_pcpu(cpu), false);
+       }
+       on_each_cpu(gator_timer_online, NULL, 1);
 
        return 0;
 }
 
-static uint64_t gator_get_time(void)
+static u64 gator_get_time(void)
 {
        struct timespec ts;
-       uint64_t timestamp;
+       u64 timestamp;
+       u64 prev_timestamp;
+       u64 delta;
+       int cpu = smp_processor_id();
 
-       ktime_get_ts(&ts);
+       // Match clock_gettime(CLOCK_MONOTONIC_RAW, &ts) from userspace
+       getrawmonotonic(&ts);
        timestamp = timespec_to_ns(&ts);
 
-       return timestamp;
+       // getrawmonotonic is not monotonic on all systems. Detect and attempt to correct these cases.
+       // up to 0.5ms delta has been seen on some systems, which can skew Streamline data when viewing at high resolution.
+       prev_timestamp = per_cpu(last_timestamp, cpu);
+       if (prev_timestamp <= timestamp) {
+               per_cpu(last_timestamp, cpu) = timestamp;
+       } else {
+               delta = prev_timestamp - timestamp;
+               // Log the error once
+               if (!printed_monotonic_warning && delta > 500000) {
+                       printk(KERN_ERR "%s: getrawmonotonic is not monotonic  cpu: %i  delta: %lli\nSkew in Streamline data may be present at the fine zoom levels\n", __FUNCTION__, cpu, delta);
+                       printed_monotonic_warning = true;
+               }
+               timestamp = prev_timestamp;
+       }
+
+       return timestamp - gator_monotonic_started;
 }
 
 /******************************************************************************
- * cpu online notifier
+ * cpu hotplug and pm notifiers
  ******************************************************************************/
-static int __cpuinit gator_cpu_notify(struct notifier_block *self,
-                                                                                       unsigned long action, void *hcpu)
+#include "gator_iks.c"
+
+static int __cpuinit gator_hotcpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 {
-       long cpu = (long)hcpu;
+       int cpu = lcpu_to_pcpu((long)hcpu);
 
        switch (action) {
-               case CPU_ONLINE:
-               case CPU_ONLINE_FROZEN:
-                       smp_call_function_single(cpu, __gator_timer_online, NULL, 1);
-                       break;
-               case CPU_DOWN_PREPARE:
-               case CPU_DOWN_PREPARE_FROZEN:
-                       smp_call_function_single(cpu, __gator_timer_offline, NULL, 1);
-                       break;
+       case CPU_DOWN_PREPARE:
+       case CPU_DOWN_PREPARE_FROZEN:
+               smp_call_function_single(cpu, gator_timer_offline, NULL, 1);
+               gator_timer_offline_dispatch(cpu, false);
+               break;
+       case CPU_ONLINE:
+       case CPU_ONLINE_FROZEN:
+               gator_timer_online_dispatch(cpu, false);
+               smp_call_function_single(cpu, gator_timer_online, NULL, 1);
+               break;
        }
 
        return NOTIFY_OK;
 }
 
-static struct notifier_block __refdata gator_cpu_notifier = {
-       .notifier_call = gator_cpu_notify,
+static struct notifier_block __refdata gator_hotcpu_notifier = {
+       .notifier_call = gator_hotcpu_notify,
+};
+
+// n.b. calling "on_each_cpu" only runs on those that are online
+// Registered linux events are not disabled, so their counters will continue to collect
+static int gator_pm_notify(struct notifier_block *nb, unsigned long event, void *dummy)
+{
+       int cpu;
+
+       switch (event) {
+       case PM_HIBERNATION_PREPARE:
+       case PM_SUSPEND_PREPARE:
+               unregister_hotcpu_notifier(&gator_hotcpu_notifier);
+               unregister_scheduler_tracepoints();
+               on_each_cpu(gator_timer_offline, NULL, 1);
+               for_each_online_cpu(cpu) {
+                       gator_timer_offline_dispatch(lcpu_to_pcpu(cpu), false);
+               }
+               break;
+       case PM_POST_HIBERNATION:
+       case PM_POST_SUSPEND:
+               for_each_online_cpu(cpu) {
+                       gator_timer_online_dispatch(lcpu_to_pcpu(cpu), false);
+               }
+               on_each_cpu(gator_timer_online, NULL, 1);
+               register_scheduler_tracepoints();
+               register_hotcpu_notifier(&gator_hotcpu_notifier);
+               break;
+       }
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block gator_pm_notifier = {
+       .notifier_call = gator_pm_notify,
 };
 
 static int gator_notifier_start(void)
 {
-       return register_hotcpu_notifier(&gator_cpu_notifier);
+       int retval;
+       retval = register_hotcpu_notifier(&gator_hotcpu_notifier);
+       if (retval == 0)
+               retval = register_pm_notifier(&gator_pm_notifier);
+       return retval;
 }
 
 static void gator_notifier_stop(void)
 {
-       unregister_hotcpu_notifier(&gator_cpu_notifier);
+       unregister_pm_notifier(&gator_pm_notifier);
+       unregister_hotcpu_notifier(&gator_hotcpu_notifier);
 }
 
 /******************************************************************************
  * Main
  ******************************************************************************/
+static void gator_summary(void)
+{
+       u64 timestamp, uptime;
+       struct timespec ts;
+       char uname_buf[512];
+       void (*m2b)(struct timespec *ts);
+       unsigned long flags;
+
+       snprintf(uname_buf, sizeof(uname_buf), "%s %s %s %s %s GNU/Linux", utsname()->sysname, utsname()->nodename, utsname()->release, utsname()->version, utsname()->machine);
+
+       getnstimeofday(&ts);
+       timestamp = timespec_to_ns(&ts);
+
+       do_posix_clock_monotonic_gettime(&ts);
+       // monotonic_to_bootbased is not defined for some versions of Android
+       m2b = symbol_get(monotonic_to_bootbased);
+       if (m2b) {
+               m2b(&ts);
+       }
+       uptime = timespec_to_ns(&ts);
+
+       // Disable interrupts as gator_get_time calls smp_processor_id to verify time is monotonic
+       local_irq_save(flags);
+       // Set monotonic_started to zero as gator_get_time is uptime minus monotonic_started
+       gator_monotonic_started = 0;
+       gator_monotonic_started = gator_get_time();
+       local_irq_restore(flags);
+
+       marshal_summary(timestamp, uptime, uname_buf);
+}
+
 int gator_events_install(struct gator_interface *interface)
 {
        list_add_tail(&interface->list, &gator_events);
@@ -577,42 +877,67 @@ int gator_events_install(struct gator_interface *interface)
 
 int gator_events_get_key(void)
 {
-       static int key;
+       // key of zero is reserved as a timestamp
+       static int key = 1;
 
-       return key++;
+       const int ret = key;
+       key += 2;
+       return ret;
 }
 
 static int gator_init(void)
 {
        int i;
 
-       if (gator_timer_init())
-               return -1;
-       if (gator_trace_sched_init())
-               return -1;
-       if (gator_annotate_init())
-               return -1;
+       calc_first_cluster_size();
 
        // events sources (gator_events.h, generated by gator_events.sh)
        for (i = 0; i < ARRAY_SIZE(gator_events_list); i++)
                if (gator_events_list[i])
                        gator_events_list[i]();
 
+       gator_trace_sched_init();
+       gator_trace_power_init();
+
        return 0;
 }
 
+static void gator_exit(void)
+{
+       struct gator_interface *gi;
+
+       list_for_each_entry(gi, &gator_events, list)
+               if (gi->shutdown)
+                       gi->shutdown();
+}
+
 static int gator_start(void)
 {
+       unsigned long cpu, i;
        struct gator_interface *gi;
 
+       if (gator_migrate_start())
+               goto migrate_failure;
+
+       // Initialize the buffer with the frame type and core
+       for_each_present_cpu(cpu) {
+               for (i = 0; i < NUM_GATOR_BUFS; i++) {
+                       marshal_frame(cpu, i);
+               }
+               per_cpu(last_timestamp, cpu) = 0;
+       }
+       printed_monotonic_warning = false;
+
+       // Capture the start time
+       gator_summary();
+
        // start all events
        list_for_each_entry(gi, &gator_events, list) {
                if (gi->start && gi->start() != 0) {
                        struct list_head *ptr = gi->list.prev;
 
                        while (ptr != &gator_events) {
-                               gi = list_entry(ptr, struct gator_interface,
-                                               list);
+                               gi = list_entry(ptr, struct gator_interface, list);
 
                                if (gi->stop)
                                        gi->stop();
@@ -623,11 +948,18 @@ static int gator_start(void)
                }
        }
 
+       // cookies shall be initialized before trace_sched_start() and gator_timer_start()
+       if (cookies_initialize())
+               goto cookies_failure;
        if (gator_annotate_start())
                goto annotate_failure;
        if (gator_trace_sched_start())
                goto sched_failure;
-       if (gator_timer_online(gator_timer_count))
+       if (gator_trace_power_start())
+               goto power_failure;
+       if (gator_trace_gpu_start())
+               goto gpu_failure;
+       if (gator_timer_start(gator_timer_count))
                goto timer_failure;
        if (gator_notifier_start())
                goto notifier_failure;
@@ -635,13 +967,25 @@ static int gator_start(void)
        return 0;
 
 notifier_failure:
-       gator_timer_offline();
+       gator_timer_stop();
 timer_failure:
+       gator_trace_gpu_stop();
+gpu_failure:
+       gator_trace_power_stop();
+power_failure:
        gator_trace_sched_stop();
 sched_failure:
        gator_annotate_stop();
 annotate_failure:
+       cookies_release();
+cookies_failure:
+       // stop all events
+       list_for_each_entry(gi, &gator_events, list)
+               if (gi->stop)
+                       gi->stop();
 events_failure:
+       gator_migrate_stop();
+migrate_failure:
 
        return -1;
 }
@@ -650,22 +994,21 @@ static void gator_stop(void)
 {
        struct gator_interface *gi;
 
-       // stop all events
-       list_for_each_entry(gi, &gator_events, list)
-               if (gi->stop)
-                       gi->stop();
-
        gator_annotate_stop();
        gator_trace_sched_stop();
+       gator_trace_power_stop();
+       gator_trace_gpu_stop();
 
        // stop all interrupt callback reads before tearing down other interfaces
-       gator_timer_offline();
-       gator_notifier_stop();
-}
+       gator_notifier_stop();  // should be called before gator_timer_stop to avoid re-enabling the hrtimer after it has been offlined
+       gator_timer_stop();
 
-static void gator_exit(void)
-{
-       gator_annotate_exit();
+       // stop all events
+       list_for_each_entry(gi, &gator_events, list)
+               if (gi->stop)
+                       gi->stop();
+
+       gator_migrate_stop();
 }
 
 /******************************************************************************
@@ -675,42 +1018,66 @@ static void gator_exit(void)
 static int gator_op_setup(void)
 {
        int err = 0;
-       int cpu;
+       int cpu, i;
 
        mutex_lock(&start_mutex);
 
-       use_buffer_size = gator_buffer_size;
-       use_buffer_mask = use_buffer_size - 1;
+       gator_buffer_size[SUMMARY_BUF] = SUMMARY_BUFFER_SIZE;
+       gator_buffer_mask[SUMMARY_BUF] = SUMMARY_BUFFER_SIZE - 1;
 
-       // must be a power of 2
-       if (use_buffer_size & (use_buffer_size - 1)) {
-               err = -ENOEXEC;
-               goto setup_error;
-       }
+       gator_buffer_size[BACKTRACE_BUF] = BACKTRACE_BUFFER_SIZE;
+       gator_buffer_mask[BACKTRACE_BUF] = BACKTRACE_BUFFER_SIZE - 1;
 
-       gator_net_traffic = 0;
+       gator_buffer_size[NAME_BUF] = NAME_BUFFER_SIZE;
+       gator_buffer_mask[NAME_BUF] = NAME_BUFFER_SIZE - 1;
 
-       gator_commit_read = gator_commit_write = 0;
-       gator_commit = vmalloc(COMMIT_SIZE * sizeof(int));
-       if (!gator_commit) {
-               err = -ENOMEM;
-               goto setup_error;
-       }
+       gator_buffer_size[COUNTER_BUF] = COUNTER_BUFFER_SIZE;
+       gator_buffer_mask[COUNTER_BUF] = COUNTER_BUFFER_SIZE - 1;
 
-       for_each_present_cpu(cpu) {
-               per_cpu(use_buffer, cpu) = vmalloc(use_buffer_size);
-               if (!per_cpu(use_buffer, cpu)) {
-                       err = -ENOMEM;
+       gator_buffer_size[BLOCK_COUNTER_BUF] = BLOCK_COUNTER_BUFFER_SIZE;
+       gator_buffer_mask[BLOCK_COUNTER_BUF] = BLOCK_COUNTER_BUFFER_SIZE - 1;
+
+       gator_buffer_size[ANNOTATE_BUF] = ANNOTATE_BUFFER_SIZE;
+       gator_buffer_mask[ANNOTATE_BUF] = ANNOTATE_BUFFER_SIZE - 1;
+
+       gator_buffer_size[SCHED_TRACE_BUF] = SCHED_TRACE_BUFFER_SIZE;
+       gator_buffer_mask[SCHED_TRACE_BUF] = SCHED_TRACE_BUFFER_SIZE - 1;
+
+       gator_buffer_size[GPU_TRACE_BUF] = GPU_TRACE_BUFFER_SIZE;
+       gator_buffer_mask[GPU_TRACE_BUF] = GPU_TRACE_BUFFER_SIZE - 1;
+
+       gator_buffer_size[IDLE_BUF] = IDLE_BUFFER_SIZE;
+       gator_buffer_mask[IDLE_BUF] = IDLE_BUFFER_SIZE - 1;
+
+       // Initialize percpu per buffer variables
+       for (i = 0; i < NUM_GATOR_BUFS; i++) {
+               // Verify buffers are a power of 2
+               if (gator_buffer_size[i] & (gator_buffer_size[i] - 1)) {
+                       err = -ENOEXEC;
                        goto setup_error;
                }
 
-               per_cpu(gator_cpu_sync, cpu) = 0;
-               per_cpu(gator_cpu_tick, cpu) = 0;
+               for_each_present_cpu(cpu) {
+                       per_cpu(gator_buffer_read, cpu)[i] = 0;
+                       per_cpu(gator_buffer_write, cpu)[i] = 0;
+                       per_cpu(gator_buffer_commit, cpu)[i] = 0;
+                       per_cpu(buffer_space_available, cpu)[i] = true;
+#if GATOR_LIVE
+                       per_cpu(gator_buffer_commit_time, cpu) = gator_live_rate;
+#endif
 
-               per_cpu(use_buffer_seq, cpu) = 0;
-               per_cpu(use_buffer_read, cpu) = 0;
-               per_cpu(use_buffer_write, cpu) = 0;
-               gator_buffer_header(cpu);
+                       // Annotation is a special case that only uses a single buffer
+                       if (cpu > 0 && i == ANNOTATE_BUF) {
+                               per_cpu(gator_buffer, cpu)[i] = NULL;
+                               continue;
+                       }
+
+                       per_cpu(gator_buffer, cpu)[i] = vmalloc(gator_buffer_size[i]);
+                       if (!per_cpu(gator_buffer, cpu)[i]) {
+                               err = -ENOMEM;
+                               goto setup_error;
+                       }
+               }
        }
 
 setup_error:
@@ -725,7 +1092,7 @@ static int gator_op_start(void)
 
        mutex_lock(&start_mutex);
 
-       if (gator_started || gator_start() || cookies_initialize())
+       if (gator_started || gator_start())
                err = -EINVAL;
        else
                gator_started = 1;
@@ -746,6 +1113,7 @@ static void gator_op_stop(void)
                mutex_lock(&gator_buffer_mutex);
 
                gator_started = 0;
+               gator_monotonic_started = 0;
                cookies_release();
                wake_up(&gator_buffer_wait);
 
@@ -757,23 +1125,28 @@ static void gator_op_stop(void)
 
 static void gator_shutdown(void)
 {
-       int cpu;
+       int cpu, i;
 
        mutex_lock(&start_mutex);
 
-       vfree(gator_commit);
-       gator_commit = NULL;
-
        for_each_present_cpu(cpu) {
                mutex_lock(&gator_buffer_mutex);
-               vfree(per_cpu(use_buffer, cpu));
-               per_cpu(use_buffer, cpu) = NULL;
-               per_cpu(use_buffer_seq, cpu) = 0;
-               per_cpu(use_buffer_read, cpu) = 0;
-               per_cpu(use_buffer_write, cpu) = 0;
+               for (i = 0; i < NUM_GATOR_BUFS; i++) {
+                       vfree(per_cpu(gator_buffer, cpu)[i]);
+                       per_cpu(gator_buffer, cpu)[i] = NULL;
+                       per_cpu(gator_buffer_read, cpu)[i] = 0;
+                       per_cpu(gator_buffer_write, cpu)[i] = 0;
+                       per_cpu(gator_buffer_commit, cpu)[i] = 0;
+                       per_cpu(buffer_space_available, cpu)[i] = true;
+#if GATOR_LIVE
+                       per_cpu(gator_buffer_commit_time, cpu) = 0;
+#endif
+               }
                mutex_unlock(&gator_buffer_mutex);
        }
 
+       memset(&sent_core_name, 0, sizeof(sent_core_name));
+
        mutex_unlock(&start_mutex);
 }
 
@@ -821,11 +1194,11 @@ static ssize_t enable_write(struct file *file, char const __user *buf, size_t co
 }
 
 static const struct file_operations enable_fops = {
-       .read           = enable_read,
-       .write          = enable_write,
+       .read = enable_read,
+       .write = enable_write,
 };
 
-static int event_buffer_open(struct inode *inode, struct file *file)
+static int userspace_buffer_open(struct inode *inode, struct file *file)
 {
        int err = -EPERM;
 
@@ -849,7 +1222,7 @@ fail:
        return err;
 }
 
-static int event_buffer_release(struct inode *inode, struct file *file)
+static int userspace_buffer_release(struct inode *inode, struct file *file)
 {
        gator_op_stop();
        gator_shutdown();
@@ -857,97 +1230,92 @@ static int event_buffer_release(struct inode *inode, struct file *file)
        return 0;
 }
 
-static ssize_t event_buffer_read(struct file *file, char __user *buf,
-                                size_t count, loff_t *offset)
+static ssize_t userspace_buffer_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
 {
-       int retval = -EINVAL;
        int commit, length1, length2, read;
-       char *buffer1, *buffer2;
-       char annotate_header[6];
-       int cpu;
+       char *buffer1;
+       char *buffer2;
+       int cpu, buftype;
+       int written = 0;
 
-       /* do not handle partial reads */
-       if (count != use_buffer_size || *offset)
+       // ensure there is enough space for a whole frame
+       if (count < userspace_buffer_size || *offset) {
                return -EINVAL;
+       }
 
        // sleep until the condition is true or a signal is received
        // the condition is checked each time gator_buffer_wait is woken up
-       wait_event_interruptible(gator_buffer_wait, buffer_commit_ready() || gator_annotate_ready() || !gator_started);
+       wait_event_interruptible(gator_buffer_wait, buffer_commit_ready(&cpu, &buftype) || !gator_started);
 
-       if (signal_pending(current))
+       if (signal_pending(current)) {
                return -EINTR;
+       }
 
-       retval = -EFAULT;
+       if (buftype == -1 || cpu == -1) {
+               return 0;
+       }
 
        mutex_lock(&gator_buffer_mutex);
 
-       if (buffer_commit_ready()) {
-               buffer_commit_read(&cpu, &read, &commit);
+       do {
+               read = per_cpu(gator_buffer_read, cpu)[buftype];
+               commit = per_cpu(gator_buffer_commit, cpu)[buftype];
 
-               /* May happen if the buffer is freed during pending reads. */
-               if (!per_cpu(use_buffer, cpu)) {
-                       retval = -EFAULT;
-                       goto out;
+               // May happen if the buffer is freed during pending reads.
+               if (!per_cpu(gator_buffer, cpu)[buftype]) {
+                       break;
                }
 
-               /* determine the size of two halves */
+               // determine the size of two halves
                length1 = commit - read;
                length2 = 0;
-               buffer1 = &(per_cpu(use_buffer, cpu)[read]);
-               buffer2 = &(per_cpu(use_buffer, cpu)[0]);
+               buffer1 = &(per_cpu(gator_buffer, cpu)[buftype][read]);
+               buffer2 = &(per_cpu(gator_buffer, cpu)[buftype][0]);
                if (length1 < 0) {
-                       length1 = use_buffer_size - read;
+                       length1 = gator_buffer_size[buftype] - read;
                        length2 = commit;
                }
-       } else if (gator_annotate_ready()) {
-               length2 = gator_annotate_read(&buffer2);
-               if (!length2)
-                       goto out;
-               annotate_header[0] = PROTOCOL_ANNOTATE;
-               length1 = gator_write_packed_int(&annotate_header[1], length2) + 1;
-               buffer1 = annotate_header;
-       } else {
-               retval = 0;
-               goto out;
-       }
 
-       /* start, middle or end */
-       if (length1 > 0) {
-               if (copy_to_user(&buf[0], buffer1, length1)) {
-                       goto out;
+               if (length1 + length2 > count - written) {
+                       break;
                }
-       }
 
-       /* possible wrap around */
-       if (length2 > 0) {
-               if (copy_to_user(&buf[length1], buffer2, length2)) {
-                       goto out;
+               // start, middle or end
+               if (length1 > 0 && copy_to_user(&buf[written], buffer1, length1)) {
+                       break;
                }
-       }
 
-       retval = length1 + length2;
+               // possible wrap around
+               if (length2 > 0 && copy_to_user(&buf[written + length1], buffer2, length2)) {
+                       break;
+               }
 
-       /* kick just in case we've lost an SMP event */
-       wake_up(&gator_buffer_wait);
+               per_cpu(gator_buffer_read, cpu)[buftype] = commit;
+               written += length1 + length2;
+
+               // Wake up annotate_write if more space is available
+               if (buftype == ANNOTATE_BUF) {
+                       wake_up(&gator_annotate_wait);
+               }
+       }       while (buffer_commit_ready(&cpu, &buftype));
 
-out:
-       // only adjust network stats if in streaming mode
-       if (gator_streaming)
-               gator_net_traffic += retval;
        mutex_unlock(&gator_buffer_mutex);
-       return retval;
+
+       // kick just in case we've lost an SMP event
+       wake_up(&gator_buffer_wait);
+
+       return written > 0 ? written : -EFAULT;
 }
 
 const struct file_operations gator_event_buffer_fops = {
-       .open           = event_buffer_open,
-       .release        = event_buffer_release,
-       .read           = event_buffer_read,
+       .open = userspace_buffer_open,
+       .release = userspace_buffer_release,
+       .read = userspace_buffer_read,
 };
 
 static ssize_t depth_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
 {
-       return gatorfs_ulong_to_user(gator_backtrace_depth, buf, count,
-                                       offset);
+       return gatorfs_ulong_to_user(gator_backtrace_depth, buf, count, offset);
 }
 
 static ssize_t depth_write(struct file *file, char const __user *buf, size_t count, loff_t *offset)
@@ -970,19 +1338,8 @@ static ssize_t depth_write(struct file *file, char const __user *buf, size_t cou
 }
 
 static const struct file_operations depth_fops = {
-       .read           = depth_read,
-       .write          = depth_write
-};
-
-static const char gator_cpu_type[] = "gator";
-
-static ssize_t cpu_type_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
-{
-       return gatorfs_str_to_user(gator_cpu_type, buf, count, offset);
-}
-
-static const struct file_operations cpu_type_fops = {
-       .read           = cpu_type_read,
+       .read = depth_read,
+       .write = depth_write
 };
 
 void gator_op_create_files(struct super_block *sb, struct dentry *root)
@@ -996,18 +1353,20 @@ void gator_op_create_files(struct super_block *sb, struct dentry *root)
        for_each_present_cpu(cpu) {
                gator_cpu_cores++;
        }
-       gator_buffer_size =     BUFFER_SIZE_DEFAULT;
-       gator_streaming = 1;
+       userspace_buffer_size = BACKTRACE_BUFFER_SIZE;
+       gator_response_type = 1;
+       gator_live_rate = 0;
 
        gatorfs_create_file(sb, root, "enable", &enable_fops);
        gatorfs_create_file(sb, root, "buffer", &gator_event_buffer_fops);
        gatorfs_create_file(sb, root, "backtrace_depth", &depth_fops);
-       gatorfs_create_file(sb, root, "cpu_type", &cpu_type_fops);
-       gatorfs_create_ulong(sb, root, "cpu_cores", &gator_cpu_cores);
-       gatorfs_create_ulong(sb, root, "buffer_size", &gator_buffer_size);
+       gatorfs_create_ro_ulong(sb, root, "cpu_cores", &gator_cpu_cores);
+       gatorfs_create_ro_ulong(sb, root, "buffer_size", &userspace_buffer_size);
        gatorfs_create_ulong(sb, root, "tick", &gator_timer_count);
-       gatorfs_create_ulong(sb, root, "streaming", &gator_streaming);
+       gatorfs_create_ulong(sb, root, "response_type", &gator_response_type);
        gatorfs_create_ro_ulong(sb, root, "version", &gator_protocol_version);
+       gatorfs_create_ro_u64(sb, root, "started", &gator_monotonic_started);
+       gatorfs_create_u64(sb, root, "live_rate", &gator_live_rate);
 
        // Annotate interface
        gator_annotate_create_files(sb, root);
@@ -1017,6 +1376,12 @@ void gator_op_create_files(struct super_block *sb, struct dentry *root)
        list_for_each_entry(gi, &gator_events, list)
                if (gi->create_files)
                        gi->create_files(sb, dir);
+
+       // Sched Events
+       sched_trace_create_files(sb, dir);
+
+       // Power interface
+       gator_trace_power_create_files(sb, dir);
 }
 
 /******************************************************************************
@@ -1033,20 +1398,17 @@ static int __init gator_module_init(void)
                return -1;
        }
 
-#ifdef GATOR_DEBUG
-       pr_err("gator_module_init");
-#endif
+       setup_timer(&gator_buffer_wake_up_timer, gator_buffer_wake_up, 0);
+
        return 0;
 }
 
 static void __exit gator_module_exit(void)
 {
-#ifdef GATOR_DEBUG
-       pr_err("gator_module_exit");
-#endif
+       del_timer_sync(&gator_buffer_wake_up_timer);
        tracepoint_synchronize_unregister();
-       gatorfs_unregister();
        gator_exit();
+       gatorfs_unregister();
 }
 
 module_init(gator_module_init);