From 63752c2d34da50ad6a0d38e292f075ee5cdddcd2 Mon Sep 17 00:00:00 2001 From: Adam Greloch Date: Fri, 30 May 2025 09:50:40 +0200 Subject: [PATCH 1/4] proc: extract process name retrieval from proc_threadsList() to a function JIRA: RTOS-1057 --- proc/process.c | 33 +++++++++++++++++++++++++++++++++ proc/process.h | 3 +++ proc/threads.c | 33 ++------------------------------- 3 files changed, 38 insertions(+), 31 deletions(-) diff --git a/proc/process.c b/proc/process.c index ad613beb0..0442090e5 100644 --- a/proc/process.c +++ b/proc/process.c @@ -1821,3 +1821,36 @@ int process_tlsDestroy(hal_tls_t *tls, vm_map_t *map) { return vm_munmap(map, (void *)tls->tls_base, tls->tls_sz); } + + +void process_getName(const process_t *process, char *buf, size_t sz) +{ + int argc; + size_t len = 0, space; + char *sbuf; + + if (process->path != NULL) { + space = sz; + sbuf = buf; + + if (process->argv != NULL) { + for (argc = 0; process->argv[argc] != NULL && space > 0U; ++argc) { + len = min(hal_strlen(process->argv[argc]) + 1, space); + hal_memcpy(sbuf, process->argv[argc], len); + sbuf[len - 1U] = ' '; + sbuf += len; + space -= len; + } + *(sbuf - 1) = '\0'; + } + else { + len = hal_strlen(process->path) + 1U; + hal_memcpy(buf, process->path, min(space, len)); + } + + buf[sz - 1U] = '\0'; + } + else { + buf[0] = '\0'; + } +} diff --git a/proc/process.h b/proc/process.h index 3e1b6698d..fbf2c217c 100644 --- a/proc/process.h +++ b/proc/process.h @@ -75,6 +75,9 @@ static inline int process_getPid(const process_t *process) } +void process_getName(const process_t *process, char *buf, size_t sz); + + process_t *proc_find(int pid); diff --git a/proc/threads.c b/proc/threads.c index 0cae5c968..67cd8c254 100644 --- a/proc/threads.c +++ b/proc/threads.c @@ -2092,13 +2092,11 @@ void proc_threadsDump(u8 priority) int proc_threadsList(int n, threadinfo_t *info) { - int i = 0, argc; - size_t len, space; + int i = 0; thread_t *t; map_entry_t *entry; vm_map_t *map; time_t now; - char *name; spinlock_ctx_t sc; (void)proc_lockSet(&threads_common.lock); @@ -2140,34 +2138,7 @@ int proc_threadsList(int n, threadinfo_t *info) if (t->process != NULL) { map = t->process->mapp; - - if (t->process->path != NULL) { - space = sizeof(info[i].name); - name = info[i].name; - - if (t->process->argv != NULL) { - for (argc = 0; t->process->argv[argc] != NULL; ++argc) { - if ((int)space <= 0) { - break; - } - len = min(hal_strlen(t->process->argv[argc]) + 1U, space); - hal_memcpy(name, t->process->argv[argc], len); - name[len - 1U] = ' '; - name += len; - space -= len; - } - *(name - 1) = '\0'; - } - else { - len = hal_strlen(t->process->path) + 1U; - hal_memcpy(info[i].name, t->process->path, min(space, len)); - } - - info[i].name[sizeof(info[i].name) - 1U] = '\0'; - } - else { - info[i].name[0] = '\0'; - } + process_getName(t->process, info[i].name, sizeof(info[i].name)); } else { map = threads_common.kmap; From a3cd7bf6c570fcd87b5bb90882f1b9f091885792 Mon Sep 17 00:00:00 2001 From: Adam Greloch Date: Thu, 3 Jul 2025 11:44:51 +0200 Subject: [PATCH 2/4] proc: add threadinfo iterator and implement proc_threadsList using it Adds a constant memory iterator over running threads that accepts a custom threadinfo callback. It is a generalization of proc_threadsList that makes it possible, e.g., to write the threadinfo struct directly to RTT without allocating memory for an array of threadinfos (as would be the case if using proc_threadsList). JIRA: RTOS-1057 --- proc/threads.c | 55 +++++++++++++++++++++++++++++++++----------------- proc/threads.h | 6 ++++++ 2 files changed, 42 insertions(+), 19 deletions(-) diff --git a/proc/threads.c b/proc/threads.c index 67cd8c254..ccaad589b 100644 --- a/proc/threads.c +++ b/proc/threads.c @@ -2090,7 +2090,7 @@ void proc_threadsDump(u8 priority) } -int proc_threadsList(int n, threadinfo_t *info) +int proc_threadsIter(int n, proc_threadsListCb_t cb, void *arg) { int i = 0; thread_t *t; @@ -2098,6 +2098,7 @@ int proc_threadsList(int n, threadinfo_t *info) vm_map_t *map; time_t now; spinlock_ctx_t sc; + threadinfo_t tinfo; (void)proc_lockSet(&threads_common.lock); @@ -2105,54 +2106,55 @@ int proc_threadsList(int n, threadinfo_t *info) while (i < n && t != NULL) { if (t->process != NULL) { - info[i].pid = process_getPid(t->process); - /* TODO: info[i].ppid = t->process->parent != NULL ? t->process->parent->id : 0; */ - info[i].ppid = 0; + tinfo.pid = process_getPid(t->process); + // tinfo.ppid = t->process->parent != NULL ? t->process->parent->id : 0; + /* TODO: tinfo.ppid = t->process->parent != NULL ? t->process->parent->id : 0; */ + tinfo.ppid = 0; } else { - info[i].pid = 0; - info[i].ppid = 0; + tinfo.pid = 0; + tinfo.ppid = 0; } hal_spinlockSet(&threads_common.spinlock, &sc); - info[i].tid = (unsigned int)proc_getTid(t); - info[i].priority = (int)t->priorityBase; - info[i].state = (int)t->state; + tinfo.tid = (unsigned int)proc_getTid(t); + tinfo.priority = (int)t->priorityBase; + tinfo.state = (int)t->state; now = _proc_gettimeRaw(); if (now != t->startTime) { - info[i].load = (int)((t->cpuTime * 1000) / (now - t->startTime)); + tinfo.load = (int)((t->cpuTime * 1000) / (now - t->startTime)); } else { - info[i].load = 0; + tinfo.load = 0; } - info[i].cpuTime = t->cpuTime; + tinfo.cpuTime = t->cpuTime; if (t->state == READY && t->maxWait < now - t->readyTime) { - info[i].wait = now - t->readyTime; + tinfo.wait = now - t->readyTime; } else { - info[i].wait = t->maxWait; + tinfo.wait = t->maxWait; } hal_spinlockClear(&threads_common.spinlock, &sc); if (t->process != NULL) { map = t->process->mapp; - process_getName(t->process, info[i].name, sizeof(info[i].name)); + process_getName(t->process, tinfo.name, sizeof(tinfo.name)); } else { map = threads_common.kmap; - hal_memcpy(info[i].name, "[idle]", sizeof("[idle]")); + hal_memcpy(tinfo.name, "[idle]", sizeof("[idle]")); } - info[i].vmem = 0; + tinfo.vmem = 0; #ifdef NOMMU if (t->process != NULL) { entry = t->process->entries; if (entry != NULL) { do { - info[i].vmem += (int)entry->size; + tinfo.vmem += (int)entry->size; entry = entry->next; } while (entry != t->process->entries); } @@ -2164,7 +2166,7 @@ int proc_threadsList(int n, threadinfo_t *info) entry = lib_treeof(map_entry_t, linkage, lib_rbMinimum(map->tree.root)); while (entry != NULL) { - info[i].vmem += (int)entry->size; + tinfo.vmem += (int)entry->size; entry = lib_treeof(map_entry_t, linkage, lib_rbNext(&entry->linkage)); } (void)proc_lockClear(&map->lock); @@ -2173,6 +2175,8 @@ int proc_threadsList(int n, threadinfo_t *info) /* No action required */ } + cb(arg, i, &tinfo); + ++i; t = lib_idtreeof(thread_t, idlinkage, lib_idtreeNext(&t->idlinkage.linkage)); } @@ -2197,6 +2201,19 @@ int proc_threadsOther(thread_t *t) } +static void proc_threadsListCb(void *arg, int i, threadinfo_t *tinfo) +{ + threadinfo_t *tinfos = (threadinfo_t *)arg; + hal_memcpy(tinfos + i, tinfo, sizeof(threadinfo_t)); +} + + +int proc_threadsList(int n, threadinfo_t *tinfos) +{ + return proc_threadsIter(n, proc_threadsListCb, tinfos); +} + + int _threads_init(vm_map_t *kmap, vm_object_t *kernel) { unsigned int i; diff --git a/proc/threads.h b/proc/threads.h index 60e89e077..d868c1f98 100644 --- a/proc/threads.h +++ b/proc/threads.h @@ -135,6 +135,12 @@ int proc_join(int tid, time_t timeout); void proc_changeMap(process_t *proc, vm_map_t *map, vm_map_t *imap, pmap_t *pmap); +typedef void (*proc_threadsListCb_t)(void *arg, int i, threadinfo_t *info); + + +int proc_threadsIter(int n, proc_threadsListCb_t cb, void *arg); + + int proc_threadsList(int n, threadinfo_t *info); From c07f6582c462a4c13cb6628af8b2b7cb12f3e980 Mon Sep 17 00:00:00 2001 From: Adam Greloch Date: Fri, 16 May 2025 16:12:37 +0200 Subject: [PATCH 3/4] !perf: introduce performance analysis subsystem Introduces a subsystem intended for various performance analysis mechanisms: tracing, sample-based methods, PMU utilization, etc. Breaking changes: * This commit generalizes the `perf_*` syscalls to support various perf modes * Renames `perf_*` into `sys_perf_*` * Previous perf semantics are preserved in the `perf_mode_threads` mode * The return value of `perf_start` on success changes from 0 to a non-negative number denoting count of channels that shouold be read by `perf_read`. * `perf_read` accepts additional `chan` argument denoting an ID of a channel to be read (a mode could serve multiple read-only channels) The subsystem currently implements `perf_mode_trace` mode - a mechanism for tracing syscalls, interrupts, locks and scheduler events. The events are outputed in CTF format to a set of cbuffers that can be retrieved from userspace via `perf_read(perf_mode_trace, ..., chan)` and later processed by 3rd party libraries like babeltrace2. This commit also adds a `perf_stop` syscall fo stopping the perf action without freeing the internal buffers, to allow reading them post-factum and not disturb the trace with `perf_read` calls while it is being recorded. The trace can be gathered in start-stop fashion or in a rolling-window fashion, if `perf_start` is supplied with `PERF_TRACE_FLAG_ROLLING` flag. In the rolling-window fashion, the trace buffer write ops will discard the oldest events if the buffers are full. NOTE: Interrupts are not yet traced on sparcv8leon-gaisler targets, see TODO. JIRA: RTOS-1057 --- Makefile | 1 + hal/aarch64/interrupts_gicv2.c | 22 ++ hal/aarch64/zynqmp/config.h | 1 + hal/aarch64/zynqmp/timer.c | 2 +- hal/arm/rtt.c | 34 ++- hal/arm/rtt.h | 4 + hal/armv7a/imx6ull/config.h | 2 + hal/armv7a/imx6ull/interrupts.c | 21 ++ hal/armv7a/imx6ull/timer.c | 3 +- hal/armv7a/zynq7000/config.h | 2 + hal/armv7a/zynq7000/interrupts.c | 21 ++ hal/armv7a/zynq7000/timer.c | 2 +- hal/armv7m/imxrt/10xx/console.c | 2 +- hal/armv7m/imxrt/10xx/imxrt10xx.c | 2 + hal/armv7m/imxrt/117x/console.c | 2 +- hal/armv7m/imxrt/117x/imxrt117x.c | 2 + hal/armv7m/imxrt/interrupts.c | 20 ++ hal/armv7m/stm32/interrupts.c | 20 ++ hal/armv7r/zynqmp/config.h | 1 + hal/armv7r/zynqmp/interrupts.c | 21 ++ hal/armv7r/zynqmp/timer.c | 2 +- hal/armv8m/interrupts.c | 21 ++ hal/armv8m/mcx/n94x/config.h | 2 + hal/armv8m/mcx/n94x/timer.c | 4 +- hal/armv8m/nrf/91/config.h | 1 + hal/armv8m/nrf/91/timer.c | 2 +- hal/armv8r/mps3an536/interrupts.c | 20 ++ hal/ia32/interrupts.c | 20 ++ hal/interrupts.h | 4 + hal/riscv64/interrupts.c | 31 +++ hal/sparcv8leon/gaisler/irqamp.c | 6 + hal/sparcv8leon/gaisler/irqmp.c | 6 + include/perf.h | 28 +++ include/syscalls.h | 7 +- include/sysinfo.h | 2 +- main.c | 2 + perf/Makefile | 14 ++ perf/buffer-mem.c | 195 +++++++++++++++++ perf/buffer-rtt.c | 134 ++++++++++++ perf/buffer.h | 46 ++++ perf/perf.c | 74 +++++++ perf/perf.h | 38 ++++ perf/trace-events.h | 289 +++++++++++++++++++++++++ perf/trace.c | 348 ++++++++++++++++++++++++++++++ perf/trace.h | 38 ++++ perf/tsdl/metadata | 196 +++++++++++++++++ proc/lock.h | 8 + proc/process.c | 2 + proc/threads.c | 79 +++++-- proc/threads.h | 8 +- syscalls.c | 60 +++++- 51 files changed, 1830 insertions(+), 42 deletions(-) create mode 100644 include/perf.h create mode 100644 perf/Makefile create mode 100644 perf/buffer-mem.c create mode 100644 perf/buffer-rtt.c create mode 100644 perf/buffer.h create mode 100644 perf/perf.c create mode 100644 perf/perf.h create mode 100644 perf/trace-events.h create mode 100644 perf/trace.c create mode 100644 perf/trace.h create mode 100644 perf/tsdl/metadata diff --git a/Makefile b/Makefile index fbc7d08e7..e31846e03 100644 --- a/Makefile +++ b/Makefile @@ -58,6 +58,7 @@ include posix/Makefile include lib/Makefile include test/Makefile include log/Makefile +include perf/Makefile # incremental build quick-fix, WARN: assuming the sources are in c DEPS := $(patsubst %.o, %.c.d, $(OBJS)) diff --git a/hal/aarch64/interrupts_gicv2.c b/hal/aarch64/interrupts_gicv2.c index 51802b97f..faee6ef72 100644 --- a/hal/aarch64/interrupts_gicv2.c +++ b/hal/aarch64/interrupts_gicv2.c @@ -25,6 +25,9 @@ #include "dtb.h" #include "interrupts_gicv2.h" #include "arch/pmap.h" +#include "config.h" + +#include "perf/trace-events.h" #define SPI_FIRST_IRQID 32 @@ -87,6 +90,7 @@ static struct { spinlock_t spinlock[SIZE_INTERRUPTS]; intr_handler_t *handlers[SIZE_INTERRUPTS]; unsigned int counters[SIZE_INTERRUPTS]; + int trace_irqs; } interrupts_common; @@ -101,6 +105,7 @@ int interrupts_dispatch(unsigned int n, cpu_context_t *ctx) intr_handler_t *h; unsigned int reschedule = 0; spinlock_ctx_t sc; + int trace; u32 ciarValue = *(interrupts_common.gicc + gicc_iar); n = ciarValue & 0x3ffU; @@ -109,6 +114,11 @@ int interrupts_dispatch(unsigned int n, cpu_context_t *ctx) return 0; } + trace = interrupts_common.trace_irqs != 0 && n != TIMER_IRQ_ID; + if (trace != 0) { + trace_eventInterruptEnter(n); + } + hal_spinlockSet(&interrupts_common.spinlock[n], &sc); interrupts_common.counters[n]++; @@ -129,6 +139,10 @@ int interrupts_dispatch(unsigned int n, cpu_context_t *ctx) hal_spinlockClear(&interrupts_common.spinlock[n], &sc); + if (trace != 0) { + trace_eventInterruptExit(n); + } + return (int)reschedule; } @@ -233,12 +247,20 @@ int hal_interruptsDeleteHandler(intr_handler_t *h) } +void _hal_interruptsTrace(int enable) +{ + interrupts_common.trace_irqs = !!enable; +} + + /* Function initializes interrupt handling */ void _hal_interruptsInit(void) { u32 i; addr_t gicc, gicd; + interrupts_common.trace_irqs = 0; + dtb_getGIC(&gicc, &gicd); interrupts_common.gicd = _pmap_halMapDevice(gicd, 0, SIZE_PAGE); interrupts_common.gicc = _pmap_halMapDevice(gicc, 0, SIZE_PAGE); diff --git a/hal/aarch64/zynqmp/config.h b/hal/aarch64/zynqmp/config.h index bd0d22f3a..a3285165a 100644 --- a/hal/aarch64/zynqmp/config.h +++ b/hal/aarch64/zynqmp/config.h @@ -24,6 +24,7 @@ #define ASID_BITS 16 #define NUM_CPUS 4 #define SIZE_INTERRUPTS 188U +#define TIMER_IRQ_ID 68U #ifndef __ASSEMBLY__ diff --git a/hal/aarch64/zynqmp/timer.c b/hal/aarch64/zynqmp/timer.c index daa27eb8e..7b4e66511 100644 --- a/hal/aarch64/zynqmp/timer.c +++ b/hal/aarch64/zynqmp/timer.c @@ -21,11 +21,11 @@ #include "hal/aarch64/arch/pmap.h" #include "zynqmp.h" +#include "config.h" #define TTC0_BASE_ADDR ((addr_t)0x00ff110000U) #define TIMER_SRC_CLK_CPU_1x 99990000U -#define TIMER_IRQ_ID 68U static struct { volatile u32 *ttc; diff --git a/hal/arm/rtt.c b/hal/arm/rtt.c index 7bb048dd6..d0893279e 100644 --- a/hal/arm/rtt.c +++ b/hal/arm/rtt.c @@ -26,6 +26,14 @@ #define RTT_CB_SIZE 256U #endif +#ifndef RTT_ENABLED +#define RTT_ENABLED 0 +#endif + +#ifndef RTT_ENABLED_PLO +#define RTT_ENABLED_PLO 0 +#endif + struct rtt_pipe { const char *name; @@ -144,10 +152,33 @@ int _hal_rttReset(unsigned int chan, rtt_dir_t dir) } +int _hal_rttIsReady(void) +{ + return common.rtt != NULL; +} + + int _hal_rttInit(void) { - const syspage_map_t *map = syspage_mapNameResolve(RTT_SYSPAGE_MAP_NAME); + common.rtt = NULL; + return 0; +} + + +int _hal_rttSetup(void) +{ + const syspage_map_t *map; + if (_hal_rttIsReady() != 0) { + /* RTT already set up */ + return 0; + } + + if (RTT_ENABLED == 0 || RTT_ENABLED_PLO == 0) { + return -ENOSYS; + } + + map = syspage_mapNameResolve(RTT_SYSPAGE_MAP_NAME); if (map == NULL) { return -ENOENT; } @@ -158,5 +189,6 @@ int _hal_rttInit(void) /* TODO: Place CB always at the start of the map? */ common.rtt = (void *)(map->end - RTT_CB_SIZE); + return 0; } diff --git a/hal/arm/rtt.h b/hal/arm/rtt.h index 479428264..ead32c761 100644 --- a/hal/arm/rtt.h +++ b/hal/arm/rtt.h @@ -32,6 +32,10 @@ typedef enum { int _hal_rttInit(void); +/* Setup rtt based on syspage map */ +int _hal_rttSetup(void); + + /* Non-blocking write to channel */ int _hal_rttWrite(unsigned int chan, const void *buf, unsigned int count); diff --git a/hal/armv7a/imx6ull/config.h b/hal/armv7a/imx6ull/config.h index eaee79758..225171cfd 100644 --- a/hal/armv7a/imx6ull/config.h +++ b/hal/armv7a/imx6ull/config.h @@ -21,6 +21,8 @@ #define NUM_CPUS 1 +#define TIMER_IRQ_ID 88 + #ifndef __ASSEMBLY__ #define HAL_NAME_PLATFORM "NXP i.MX 6ULL " diff --git a/hal/armv7a/imx6ull/interrupts.c b/hal/armv7a/imx6ull/interrupts.c index e5058c87d..8d221d81b 100644 --- a/hal/armv7a/imx6ull/interrupts.c +++ b/hal/armv7a/imx6ull/interrupts.c @@ -16,8 +16,10 @@ #include "hal/cpu.h" #include "hal/interrupts.h" #include "hal/list.h" +#include "config.h" #include "proc/userintr.h" +#include "perf/trace-events.h" #define SIZE_INTERRUPTS 159U @@ -40,6 +42,7 @@ static struct { spinlock_t spinlock[SIZE_INTERRUPTS]; intr_handler_t *handlers[SIZE_INTERRUPTS]; unsigned int counters[SIZE_INTERRUPTS]; + int trace_irqs; } interrupts; @@ -55,6 +58,7 @@ int interrupts_dispatch(unsigned int n, cpu_context_t *ctx) intr_handler_t *h; int reschedule = 0; spinlock_ctx_t sc; + int trace; u32 iarValue = *(interrupts.gic + iar); n = iarValue & 0x3ffU; @@ -63,6 +67,11 @@ int interrupts_dispatch(unsigned int n, cpu_context_t *ctx) return 0; } + trace = interrupts.trace_irqs != 0 && n != TIMER_IRQ_ID; + if (trace != 0) { + trace_eventInterruptEnter(n); + } + hal_spinlockSet(&interrupts.spinlock[n], &sc); interrupts.counters[n]++; @@ -85,6 +94,10 @@ int interrupts_dispatch(unsigned int n, cpu_context_t *ctx) hal_spinlockClear(&interrupts.spinlock[n], &sc); + if (trace != 0) { + trace_eventInterruptExit(n); + } + return reschedule; } @@ -175,10 +188,18 @@ int hal_interruptsDeleteHandler(intr_handler_t *h) } +void _hal_interruptsTrace(int enable) +{ + interrupts.trace_irqs = !!enable; +} + + void _hal_interruptsInit(void) { u32 i, t, priority; + interrupts.trace_irqs = 0; + for (i = 0; i < SIZE_INTERRUPTS; ++i) { interrupts.handlers[i] = NULL; interrupts.counters[i] = 0; diff --git a/hal/armv7a/imx6ull/timer.c b/hal/armv7a/imx6ull/timer.c index ccb5cb6df..0a484edb2 100644 --- a/hal/armv7a/imx6ull/timer.c +++ b/hal/armv7a/imx6ull/timer.c @@ -17,8 +17,7 @@ #include "hal/timer.h" #include "hal/spinlock.h" #include "hal/string.h" - -#define TIMER_IRQ_ID 88 +#include "config.h" static struct { volatile u32 *epit1; diff --git a/hal/armv7a/zynq7000/config.h b/hal/armv7a/zynq7000/config.h index f8fd65631..257815ba6 100644 --- a/hal/armv7a/zynq7000/config.h +++ b/hal/armv7a/zynq7000/config.h @@ -29,6 +29,8 @@ #define NUM_CPUS 2 +#define TIMER_IRQ_ID 42U + #ifndef __ASSEMBLY__ #define HAL_NAME_PLATFORM "Xilinx Zynq-7000 " diff --git a/hal/armv7a/zynq7000/interrupts.c b/hal/armv7a/zynq7000/interrupts.c index 67160a33e..afe6d50d1 100644 --- a/hal/armv7a/zynq7000/interrupts.c +++ b/hal/armv7a/zynq7000/interrupts.c @@ -20,9 +20,11 @@ #include "hal/spinlock.h" #include "hal/interrupts.h" #include "hal/list.h" +#include "config.h" #include "proc/userintr.h" +#include "perf/trace-events.h" #define SIZE_INTERRUPTS 95U #define SPI_FIRST_IRQID 32U @@ -58,6 +60,7 @@ static struct { spinlock_t spinlock[SIZE_INTERRUPTS]; intr_handler_t *handlers[SIZE_INTERRUPTS]; unsigned int counters[SIZE_INTERRUPTS]; + int trace_irqs; } interrupts_common; @@ -87,6 +90,7 @@ int interrupts_dispatch(unsigned int n, cpu_context_t *ctx) intr_handler_t *h; unsigned int reschedule = 0U; spinlock_ctx_t sc; + int trace; u32 ciarValue = *(interrupts_common.gic + ciar); n = ciarValue & 0x3ffU; @@ -95,6 +99,11 @@ int interrupts_dispatch(unsigned int n, cpu_context_t *ctx) return 0; } + trace = interrupts_common.trace_irqs != 0 && n != TIMER_IRQ_ID; + if (trace != 0) { + trace_eventInterruptEnter(n); + } + hal_spinlockSet(&interrupts_common.spinlock[n], &sc); interrupts_common.counters[n]++; @@ -115,6 +124,10 @@ int interrupts_dispatch(unsigned int n, cpu_context_t *ctx) hal_spinlockClear(&interrupts_common.spinlock[n], &sc); + if (trace != 0) { + trace_eventInterruptExit(n); + } + return (int)reschedule; } @@ -208,11 +221,19 @@ int hal_interruptsDeleteHandler(intr_handler_t *h) } +void _hal_interruptsTrace(int enable) +{ + interrupts_common.trace_irqs = !!enable; +} + + /* Function initializes interrupt handling */ void _hal_interruptsInit(void) { u32 i; + interrupts_common.trace_irqs = 0; + for (i = 0; i < SIZE_INTERRUPTS; ++i) { interrupts_common.handlers[i] = NULL; interrupts_common.counters[i] = 0; diff --git a/hal/armv7a/zynq7000/timer.c b/hal/armv7a/zynq7000/timer.c index a813a9e2d..80680bae1 100644 --- a/hal/armv7a/zynq7000/timer.c +++ b/hal/armv7a/zynq7000/timer.c @@ -17,11 +17,11 @@ #include "hal/timer.h" #include "hal/spinlock.h" #include "hal/string.h" +#include "config.h" #include "zynq.h" #define TIMER_SRC_CLK_CPU_1x 111111115U /* Hz */ -#define TIMER_IRQ_ID 42U static struct { volatile u32 *ttc; diff --git a/hal/armv7m/imxrt/10xx/console.c b/hal/armv7m/imxrt/10xx/console.c index 5fb6e304c..ef7d4af64 100644 --- a/hal/armv7m/imxrt/10xx/console.c +++ b/hal/armv7m/imxrt/10xx/console.c @@ -206,7 +206,7 @@ static void _hal_uartInit(void) void _hal_consoleInit(void) { #if RTT_ENABLED && !ISEMPTY(RTT_CONSOLE_KERNEL) - _hal_rttInit(); + _hal_rttSetup(); #endif #if !ISEMPTY(UART_CONSOLE_KERNEL) diff --git a/hal/armv7m/imxrt/10xx/imxrt10xx.c b/hal/armv7m/imxrt/10xx/imxrt10xx.c index 8e454186d..cc823f317 100644 --- a/hal/armv7m/imxrt/10xx/imxrt10xx.c +++ b/hal/armv7m/imxrt/10xx/imxrt10xx.c @@ -22,6 +22,7 @@ #include "include/arch/armv7m/imxrt/10xx/imxrt10xx.h" #include "hal/arm/scs.h" +#include "hal/arm/rtt.h" #include "imxrt10xx.h" #include "config.h" @@ -1980,6 +1981,7 @@ void _imxrt_init(void) imxrt_common.cpuclk = 528000000U; /* Default system clock */ _hal_scsInit(); + _hal_rttInit(); /* Disable watchdogs */ if ((*(imxrt_common.wdog1 + wdog_wcr) & (1U << 2)) != 0U) { diff --git a/hal/armv7m/imxrt/117x/console.c b/hal/armv7m/imxrt/117x/console.c index d5534d7c9..1e4516697 100644 --- a/hal/armv7m/imxrt/117x/console.c +++ b/hal/armv7m/imxrt/117x/console.c @@ -204,7 +204,7 @@ static void _hal_uartInit(void) void _hal_consoleInit(void) { #if RTT_ENABLED && !ISEMPTY(RTT_CONSOLE_KERNEL) - _hal_rttInit(); + _hal_rttSetup(); #endif #if !ISEMPTY(UART_CONSOLE_KERNEL) diff --git a/hal/armv7m/imxrt/117x/imxrt117x.c b/hal/armv7m/imxrt/117x/imxrt117x.c index 01148e70d..a0e260b73 100644 --- a/hal/armv7m/imxrt/117x/imxrt117x.c +++ b/hal/armv7m/imxrt/117x/imxrt117x.c @@ -25,6 +25,7 @@ #include "hal/arm/barriers.h" #include "hal/arm/scs.h" +#include "hal/arm/rtt.h" #include @@ -815,6 +816,7 @@ void _imxrt_init(void) imxrt_common.cpuclk = 696000000U; _hal_scsInit(); + _hal_rttInit(); /* WDOG1 and WDOG2 can't be disabled once enabled */ diff --git a/hal/armv7m/imxrt/interrupts.c b/hal/armv7m/imxrt/interrupts.c index a775662e7..5ff294aaf 100644 --- a/hal/armv7m/imxrt/interrupts.c +++ b/hal/armv7m/imxrt/interrupts.c @@ -20,11 +20,13 @@ #include "proc/userintr.h" +#include "perf/trace-events.h" static struct { spinlock_t spinlock; intr_handler_t *handlers[SIZE_INTERRUPTS]; unsigned int counters[SIZE_INTERRUPTS]; + int trace_irqs; } interrupts; @@ -37,11 +39,17 @@ void interrupts_dispatch(unsigned int n, cpu_context_t *ctx) intr_handler_t *h; int reschedule = 0; spinlock_ctx_t sc; + int trace; if (n >= SIZE_INTERRUPTS) { return; } + trace = interrupts.trace_irqs != 0 && n != GPT_IRQ; + if (trace != 0) { + trace_eventInterruptEnter(n); + } + hal_spinlockSet(&interrupts.spinlock, &sc); interrupts.counters[n]++; @@ -59,6 +67,10 @@ void interrupts_dispatch(unsigned int n, cpu_context_t *ctx) hal_spinlockClear(&interrupts.spinlock, &sc); + if (trace != 0) { + trace_eventInterruptExit(n); + } + if (reschedule != 0) { (void)threads_schedule(n, ctx, NULL); } @@ -118,10 +130,18 @@ char *hal_interruptsFeatures(char *features, size_t len) } +void _hal_interruptsTrace(int enable) +{ + interrupts.trace_irqs = !!enable; +} + + __attribute__((section(".init"))) void _hal_interruptsInit(void) { unsigned int n; + interrupts.trace_irqs = 0; + for (n = 0; n < SIZE_INTERRUPTS; ++n) { interrupts.handlers[n] = NULL; interrupts.counters[n] = 0; diff --git a/hal/armv7m/stm32/interrupts.c b/hal/armv7m/stm32/interrupts.c index 042bd967f..0c23f800b 100644 --- a/hal/armv7m/stm32/interrupts.c +++ b/hal/armv7m/stm32/interrupts.c @@ -19,6 +19,7 @@ #include "hal/cpu.h" #include "proc/userintr.h" +#include "perf/trace-events.h" #ifdef __CPU_STM32L4X6 #define SIZE_INTERRUPTS 97U @@ -29,6 +30,7 @@ static struct { spinlock_t spinlock; intr_handler_t *handlers[SIZE_INTERRUPTS]; unsigned int counters[SIZE_INTERRUPTS]; + int trace_irqs; } interrupts; @@ -41,11 +43,17 @@ void interrupts_dispatch(unsigned int n, cpu_context_t *ctx) intr_handler_t *h; int reschedule = 0; spinlock_ctx_t sc; + int trace; if (n >= SIZE_INTERRUPTS) { return; } + trace = interrupts.trace_irqs != 0 && n != SYSTICK_IRQ; + if (trace != 0) { + trace_eventInterruptEnter(n); + } + hal_spinlockSet(&interrupts.spinlock, &sc); interrupts.counters[n]++; @@ -63,6 +71,10 @@ void interrupts_dispatch(unsigned int n, cpu_context_t *ctx) hal_spinlockClear(&interrupts.spinlock, &sc); + if (trace != 0) { + trace_eventInterruptExit(n); + } + if (reschedule != 0) { (void)threads_schedule(n, ctx, NULL); } @@ -122,10 +134,18 @@ char *hal_interruptsFeatures(char *features, size_t len) } +void _hal_interruptsTrace(int enable) +{ + interrupts.trace_irqs = !!enable; +} + + __attribute__((section(".init"))) void _hal_interruptsInit(void) { unsigned int n; + interrupts.trace_irqs = 0; + for (n = 0; n < SIZE_INTERRUPTS; ++n) { interrupts.handlers[n] = NULL; interrupts.counters[n] = 0; diff --git a/hal/armv7r/zynqmp/config.h b/hal/armv7r/zynqmp/config.h index 6bd637a80..d01bf1c6a 100644 --- a/hal/armv7r/zynqmp/config.h +++ b/hal/armv7r/zynqmp/config.h @@ -19,6 +19,7 @@ #define NUM_CPUS 1 #define SIZE_INTERRUPTS 188U +#define TIMER_IRQ_ID 68U #ifndef __ASSEMBLY__ diff --git a/hal/armv7r/zynqmp/interrupts.c b/hal/armv7r/zynqmp/interrupts.c index 5a038f7bb..9c53d017e 100644 --- a/hal/armv7r/zynqmp/interrupts.c +++ b/hal/armv7r/zynqmp/interrupts.c @@ -19,9 +19,12 @@ #include "hal/spinlock.h" #include "hal/interrupts.h" #include "hal/list.h" +#include "config.h" #include "proc/userintr.h" +#include "perf/trace-events.h" + #define SPI_FIRST_IRQID 32U @@ -83,6 +86,7 @@ static struct { spinlock_t spinlock[SIZE_INTERRUPTS]; intr_handler_t *handlers[SIZE_INTERRUPTS]; unsigned int counters[SIZE_INTERRUPTS]; + int trace_irqs; } interrupts_common; @@ -98,6 +102,7 @@ int interrupts_dispatch(unsigned int n, cpu_context_t *ctx) intr_handler_t *h; int reschedule = 0; spinlock_ctx_t sc; + int trace; u32 ciarValue = *(interrupts_common.gicc + gicc_iar); n = ciarValue & 0x3ffU; @@ -106,6 +111,11 @@ int interrupts_dispatch(unsigned int n, cpu_context_t *ctx) return 0; } + trace = interrupts_common.trace_irqs != 0 && n != TIMER_IRQ_ID; + if (trace != 0) { + trace_eventInterruptEnter(n); + } + hal_spinlockSet(&interrupts_common.spinlock[n], &sc); interrupts_common.counters[n]++; @@ -129,6 +139,10 @@ int interrupts_dispatch(unsigned int n, cpu_context_t *ctx) hal_spinlockClear(&interrupts_common.spinlock[n], &sc); + if (trace != 0) { + trace_eventInterruptExit(n); + } + return reschedule; } /* parasoft-end-suppress MISRAC2012-RULE_2_2 MISRAC2012-RULE_8_4 */ @@ -250,10 +264,17 @@ static unsigned int _interrupts_gicv2_classify(unsigned int irqn) } +void _hal_interruptsTrace(int enable) +{ + interrupts_common.trace_irqs = !!enable; +} + + /* Function initializes interrupt handling */ void _hal_interruptsInit(void) { u32 i; + interrupts_common.trace_irqs = 0; interrupts_common.gicd = (void *)0xf9000000U; interrupts_common.gicc = (void *)0xf9001000U; diff --git a/hal/armv7r/zynqmp/timer.c b/hal/armv7r/zynqmp/timer.c index eb397ddd6..18ad9f651 100644 --- a/hal/armv7r/zynqmp/timer.c +++ b/hal/armv7r/zynqmp/timer.c @@ -17,13 +17,13 @@ #include "hal/timer.h" #include "hal/spinlock.h" #include "hal/string.h" +#include "config.h" #include "zynqmp.h" #define TTC0_BASE_ADDR ((addr_t)0x00ff110000U) #define TIMER_SRC_CLK_CPU_1x 99990000U -#define TIMER_IRQ_ID 68U static struct { volatile u32 *ttc; diff --git a/hal/armv8m/interrupts.c b/hal/armv8m/interrupts.c index b62a32f2d..a07494c18 100644 --- a/hal/armv8m/interrupts.c +++ b/hal/armv8m/interrupts.c @@ -22,10 +22,13 @@ #include "config.h" +#include "perf/trace-events.h" + static struct { spinlock_t spinlock; intr_handler_t *handlers[SIZE_INTERRUPTS]; unsigned int counters[SIZE_INTERRUPTS]; + int trace_irqs; } interrupts; @@ -38,11 +41,17 @@ void interrupts_dispatch(unsigned int n, cpu_context_t *ctx) intr_handler_t *h; int reschedule = 0; spinlock_ctx_t sc; + int trace; if (n >= SIZE_INTERRUPTS) { return; } + trace = interrupts.trace_irqs != 0 && n != TIMER_IRQ_ID; + if (trace != 0) { + trace_eventInterruptEnter(n); + } + hal_spinlockSet(&interrupts.spinlock, &sc); interrupts.counters[n]++; @@ -60,6 +69,10 @@ void interrupts_dispatch(unsigned int n, cpu_context_t *ctx) hal_spinlockClear(&interrupts.spinlock, &sc); + if (trace != 0) { + trace_eventInterruptExit(n); + } + if (reschedule != 0) { (void)threads_schedule(n, ctx, NULL); } @@ -120,10 +133,18 @@ char *hal_interruptsFeatures(char *features, size_t len) } +void _hal_interruptsTrace(int enable) +{ + interrupts.trace_irqs = !!enable; +} + + __attribute__((section(".init"))) void _hal_interruptsInit(void) { unsigned int n; + interrupts.trace_irqs = 0; + for (n = 0; n < SIZE_INTERRUPTS; ++n) { interrupts.handlers[n] = NULL; interrupts.counters[n] = 0; diff --git a/hal/armv8m/mcx/n94x/config.h b/hal/armv8m/mcx/n94x/config.h index 4c45912ba..842be18e8 100644 --- a/hal/armv8m/mcx/n94x/config.h +++ b/hal/armv8m/mcx/n94x/config.h @@ -18,6 +18,8 @@ #define SIZE_INTERRUPTS (171U + 16U) +#define TIMER_IRQ_ID ostimer0_irq + #ifndef __ASSEMBLY__ #include "include/arch/armv8m/mcx/syspage.h" diff --git a/hal/armv8m/mcx/n94x/timer.c b/hal/armv8m/mcx/n94x/timer.c index c825b354c..d17ea8886 100644 --- a/hal/armv8m/mcx/n94x/timer.c +++ b/hal/armv8m/mcx/n94x/timer.c @@ -117,7 +117,7 @@ void hal_timerSetWakeup(u32 waitUs) if ((hal_timerGetCyc() >= val) && (((val >> 32) & 0x400U) == 0U) && ((tmp & 1U) == 0U)) { /* We just missed the timer value and be the interrupt won't * be generated. Trigger the interrupt manually instead. */ - _hal_scsIRQPendingSet((u32)ostimer0_irq - 0x10U); + _hal_scsIRQPendingSet((u32)TIMER_IRQ_ID - 0x10U); } hal_spinlockClear(&timer_common.lock, &sc); @@ -140,7 +140,7 @@ time_t hal_timerGetUs(void) int hal_timerRegister(intrFn_t f, void *data, intr_handler_t *h) { h->f = f; - h->n = ostimer0_irq; + h->n = TIMER_IRQ_ID; h->data = data; return hal_interruptsSetHandler(h); diff --git a/hal/armv8m/nrf/91/config.h b/hal/armv8m/nrf/91/config.h index 3b33c83b9..79f300826 100644 --- a/hal/armv8m/nrf/91/config.h +++ b/hal/armv8m/nrf/91/config.h @@ -16,6 +16,7 @@ #ifndef _PH_HAL_CONFIG_H_ #define _PH_HAL_CONFIG_H_ +#define TIMER_IRQ_ID SYSTICK_IRQ #ifndef __ASSEMBLY__ diff --git a/hal/armv8m/nrf/91/timer.c b/hal/armv8m/nrf/91/timer.c index 82d55e5be..24414de42 100644 --- a/hal/armv8m/nrf/91/timer.c +++ b/hal/armv8m/nrf/91/timer.c @@ -82,7 +82,7 @@ time_t hal_timerGetUs(void) int hal_timerRegister(intrFn_t f, void *data, intr_handler_t *h) { h->f = f; - h->n = SYSTICK_IRQ; + h->n = TIMER_IRQ_ID; h->data = data; return hal_interruptsSetHandler(h); diff --git a/hal/armv8r/mps3an536/interrupts.c b/hal/armv8r/mps3an536/interrupts.c index a8f756650..b08a63c36 100644 --- a/hal/armv8r/mps3an536/interrupts.c +++ b/hal/armv8r/mps3an536/interrupts.c @@ -21,6 +21,9 @@ #include "include/errno.h" +#include "perf/trace-events.h" + +#include #define SIZE_INTERRUPTS 126U #define PPI_FIRST_IRQID 16U @@ -102,6 +105,7 @@ static struct { volatile u32 *gic; spinlock_t lock; intr_handler_t *handlers[SIZE_INTERRUPTS]; + int trace_irqs; } interrupts_common; @@ -143,6 +147,7 @@ int interrupts_dispatch(unsigned int n, cpu_context_t *ctx) unsigned int reschedule = 0; intr_handler_t *h; spinlock_ctx_t sc; + int trace; n = gic_acknowledge(); @@ -151,6 +156,11 @@ int interrupts_dispatch(unsigned int n, cpu_context_t *ctx) return 0; } + trace = interrupts_common.trace_irqs != 0 && n != TIMER_IRQ; + if (trace != 0) { + trace_eventInterruptEnter(n); + } + hal_spinlockSet(&interrupts_common.lock, &sc); h = interrupts_common.handlers[n]; @@ -170,6 +180,10 @@ int interrupts_dispatch(unsigned int n, cpu_context_t *ctx) gic_EOI(n); + if (trace != 0) { + trace_eventInterruptExit(n); + } + return (int)reschedule; } @@ -326,6 +340,12 @@ char *hal_interruptsFeatures(char *features, size_t len) } +void _hal_interruptsTrace(int enable) +{ + interrupts_common.trace_irqs = !!enable; +} + + /* parasoft-suppress-next-line MISRAC2012-DIR_4_3 "Assembly is required for low-level operations" */ void _hal_interruptsInit(void) { diff --git a/hal/ia32/interrupts.c b/hal/ia32/interrupts.c index 607f14797..6bfdca648 100644 --- a/hal/ia32/interrupts.c +++ b/hal/ia32/interrupts.c @@ -26,6 +26,8 @@ #include "include/errno.h" #include "init.h" +#include "perf/trace-events.h" + #include @@ -75,6 +77,7 @@ static struct { pic_8259 } pic; u32 systickIRQ; spinlock_t sp_ioapic; + int trace_irqs; } interrupts_common; @@ -190,11 +193,17 @@ int interrupts_dispatchIRQ(unsigned int n, cpu_context_t *ctx) intr_handler_t *h; int reschedule = 0; spinlock_ctx_t sc; + int trace; if (n >= SIZE_INTERRUPTS) { return 0; } + trace = interrupts_common.trace_irqs != 0 && n != SYSTICK_IRQ; + if (trace != 0) { + trace_eventInterruptEnter(n); + } + hal_spinlockSet(&interrupts_common.interrupts[n].spinlock, &sc); interrupts_common.interrupts[n].counter++; @@ -211,6 +220,10 @@ int interrupts_dispatchIRQ(unsigned int n, cpu_context_t *ctx) hal_spinlockClear(&interrupts_common.interrupts[n].spinlock, &sc); + if (trace != 0) { + trace_eventInterruptExit(n); + } + return reschedule; } @@ -451,12 +464,19 @@ static int _hal_ioapicInit(void) } +void _hal_interruptsTrace(int enable) +{ + interrupts_common.trace_irqs = !!enable; +} + + void _hal_interruptsInit(void) { static const u32 flags = IGBITS_PRES | IGBITS_SYSTEM | IGBITS_IRQEXC; unsigned int k; _interrupts_multilock = 1U; + interrupts_common.trace_irqs = 0; interrupts_common.pic = pic_undefined; for (k = 0; k < SIZE_INTERRUPTS; ++k) { diff --git a/hal/interrupts.h b/hal/interrupts.h index d89be299b..4b4bd6603 100644 --- a/hal/interrupts.h +++ b/hal/interrupts.h @@ -31,4 +31,8 @@ char *hal_interruptsFeatures(char *features, size_t len); void _hal_interruptsInit(void); +/* controls trace of non-systick interrupts */ +void _hal_interruptsTrace(int enable); + + #endif diff --git a/hal/riscv64/interrupts.c b/hal/riscv64/interrupts.c index 9f7c39731..dba2e032e 100644 --- a/hal/riscv64/interrupts.c +++ b/hal/riscv64/interrupts.c @@ -24,6 +24,8 @@ #include "include/errno.h" +#include "perf/trace-events.h" + #include @@ -50,6 +52,7 @@ static struct { } plic; u32 irqTargetCpu; + int trace_irqs; } interrupts_common; @@ -61,6 +64,7 @@ static int interrupts_dispatchPlic(cpu_context_t *ctx) unsigned int reschedule = 0; intr_handler_t *h; spinlock_ctx_t sc; + int trace; unsigned int irq = plic_claim(PLIC_SCONTEXT(hal_cpuGetID())); RISCV_FENCE(o, i); @@ -69,6 +73,11 @@ static int interrupts_dispatchPlic(cpu_context_t *ctx) return 0; } + trace = interrupts_common.trace_irqs != 0 && irq != SYSTICK_IRQ; + if (trace != 0) { + trace_eventInterruptEnter(irq); + } + hal_spinlockSet(&interrupts_common.plic.spinlocks[irq], &sc); interrupts_common.plic.counters[irq]++; @@ -89,6 +98,10 @@ static int interrupts_dispatchPlic(cpu_context_t *ctx) plic_complete(PLIC_SCONTEXT(hal_cpuGetID()), irq); + if (trace != 0) { + trace_eventInterruptExit(irq); + } + return (int)reschedule; } @@ -98,6 +111,12 @@ static int interrupts_dispatchClint(unsigned int n, cpu_context_t *ctx) intr_handler_t *h; unsigned int reschedule = 0; spinlock_ctx_t sc; + int trace; + + trace = interrupts_common.trace_irqs != 0 && n != SYSTICK_IRQ; + if (trace != 0) { + trace_eventInterruptEnter(n); + } hal_spinlockSet(&interrupts_common.clint.spinlocks[n], &sc); @@ -117,6 +136,10 @@ static int interrupts_dispatchClint(unsigned int n, cpu_context_t *ctx) hal_spinlockClear(&interrupts_common.clint.spinlocks[n], &sc); + if (trace != 0) { + trace_eventInterruptExit(n); + } + return (int)reschedule; } @@ -260,10 +283,18 @@ void hal_interruptsInitCore(void) } +void _hal_interruptsTrace(int enable) +{ + interrupts_common.trace_irqs = !!enable; +} + + __attribute__((section(".init"))) void _hal_interruptsInit(void) { unsigned int i; + interrupts_common.trace_irqs = 0; + /* parasoft-suppress-next-line MISRAC2012-RULE_11_1 "Need to assign function address to processor register" */ csr_write(stvec, _interrupts_dispatch); for (i = 0; i < CLINT_IRQ_SIZE; i++) { diff --git a/hal/sparcv8leon/gaisler/irqamp.c b/hal/sparcv8leon/gaisler/irqamp.c index e19d872ce..ef407b6df 100644 --- a/hal/sparcv8leon/gaisler/irqamp.c +++ b/hal/sparcv8leon/gaisler/irqamp.c @@ -232,6 +232,12 @@ char *hal_interruptsFeatures(char *features, size_t len) } +void _hal_interruptsTrace(int enable) +{ + /* TODO */ +} + + void _hal_interruptsInit(void) { unsigned int i; diff --git a/hal/sparcv8leon/gaisler/irqmp.c b/hal/sparcv8leon/gaisler/irqmp.c index 4b8f2d6f2..1f9cf1d73 100644 --- a/hal/sparcv8leon/gaisler/irqmp.c +++ b/hal/sparcv8leon/gaisler/irqmp.c @@ -196,6 +196,12 @@ char *hal_interruptsFeatures(char *features, size_t len) } +void _hal_interruptsTrace(int enable) +{ + /* TODO */ +} + + void _hal_interruptsInit(void) { unsigned int i; diff --git a/include/perf.h b/include/perf.h new file mode 100644 index 000000000..732107d15 --- /dev/null +++ b/include/perf.h @@ -0,0 +1,28 @@ +/* + * Phoenix-RTOS + * + * Operating system kernel + * + * Performance analysis subsystem + * + * Copyright 2025 Phoenix Systems + * Author: Adam Greloch + * + * %LICENSE% + */ + + +#ifndef _PHOENIX_PERF_H_ +#define _PHOENIX_PERF_H_ + + +/* clang-format off */ +typedef enum { perf_mode_threads, perf_mode_trace, perf_mode_count } perf_mode_t; +typedef enum { trace_channel_meta, trace_channel_event, trace_channel_count } trace_channel_t; +/* clang-format on */ + + +#define PERF_TRACE_FLAG_ROLLING (1U << 0) /* treat event channel as rolling window */ + + +#endif diff --git a/include/syscalls.h b/include/syscalls.h index 5b45b7556..e6680f5c7 100644 --- a/include/syscalls.h +++ b/include/syscalls.h @@ -59,9 +59,10 @@ ID(wdgreload) \ ID(threadsinfo) \ ID(meminfo) \ - ID(perf_start) \ - ID(perf_read) \ - ID(perf_finish) \ + ID(sys_perf_start) \ + ID(sys_perf_read) \ + ID(sys_perf_finish) \ + ID(sys_perf_stop) \ ID(syspageprog) \ ID(va2pa) \ ID(signalHandle) \ diff --git a/include/sysinfo.h b/include/sysinfo.h index d2b92584e..e0279e936 100644 --- a/include/sysinfo.h +++ b/include/sysinfo.h @@ -37,7 +37,7 @@ typedef struct _threadinfo_t { time_t wait; char name[128]; -} threadinfo_t; +} __attribute__((packed)) threadinfo_t; typedef struct _entryinfo_t { diff --git a/main.c b/main.c index d36fb01f3..84dace1b8 100644 --- a/main.c +++ b/main.c @@ -24,6 +24,7 @@ #include "syscalls.h" #include "syspage.h" #include "test/test.h" +#include "perf/perf.h" static struct { @@ -116,6 +117,7 @@ int main(void) lib_printf("hal: %s\n", hal_timerFeatures(s, sizeof(s))); _vm_init(&main_common.kmap, &main_common.kernel); + (void)_perf_init(&main_common.kmap); (void)_proc_init(&main_common.kmap, &main_common.kernel); _syscalls_init(); diff --git a/perf/Makefile b/perf/Makefile new file mode 100644 index 000000000..d0c886470 --- /dev/null +++ b/perf/Makefile @@ -0,0 +1,14 @@ +# +# Makefile for phoenix-rtos-kernel/trace +# +# Copyright 2025 Phoenix Systems +# Author: Adam Greloch +# + +ifneq (,$(findstring armv7m,$(TARGET_FAMILY))) + BUFFER_OBJ=buffer-rtt.o +else + BUFFER_OBJ=buffer-mem.o +endif + +OBJS += $(addprefix $(PREFIX_O)perf/, perf.o trace.o $(BUFFER_OBJ)) diff --git a/perf/buffer-mem.c b/perf/buffer-mem.c new file mode 100644 index 000000000..17ddd5acc --- /dev/null +++ b/perf/buffer-mem.c @@ -0,0 +1,195 @@ +/* + * Phoenix-RTOS + * + * Operating system kernel + * + * Performance analysis subsystem - event buffer implementation using large in-memory circular buffer + * + * Copyright 2025 Phoenix Systems + * Author: Adam Greloch + * + * %LICENSE% + */ + + +#include "include/perf.h" +#include "include/errno.h" +#include "vm/vm.h" + +#include "buffer.h" + +#include + + +#ifndef TRACE_EVENT_CHANNEL_BUFSIZE +#define TRACE_EVENT_CHANNEL_BUFSIZE (4UL << 20) /* 4 MB */ +#endif + +#ifndef TRACE_META_CHANNEL_BUFSIZE +#define TRACE_META_CHANNEL_BUFSIZE (1UL << 20) /* 1 MB */ +#endif + + +typedef struct { + cbuffer_t buffer; + page_t *pages; + size_t bufsize; +} chan_t; + +static struct { + vm_map_t *kmap; + chan_t *chans; + size_t nchans; +} buffer_common; + + +static inline cbuffer_t *getBuffer(u8 chan) +{ + LIB_ASSERT(chan < buffer_common.nchans, "invalid chan id: %d", chan); + return &buffer_common.chans[chan].buffer; +} + + +static void _bufferFree(void *data, page_t **pages) +{ + size_t sz = 0; + page_t *p = *pages; + + while (p != NULL) { + *pages = p->next; + vm_pageFree(p); + sz += SIZE_PAGE; + p = *pages; + } + + (void)vm_munmap(buffer_common.kmap, data, sz); +} + + +static void *_bufferAlloc(page_t **pages, size_t sz) +{ + page_t *p; + void *v, *data; + int err = EOK; + + *pages = NULL; + data = vm_mapFind(buffer_common.kmap, NULL, sz, MAP_NONE, PROT_READ | PROT_WRITE); + + if (data == NULL) { + return NULL; + } + + for (v = data; v < data + sz; v += SIZE_PAGE) { + p = vm_pageAlloc(SIZE_PAGE, PAGE_OWNER_APP); + + if (p == NULL) { + err = -ENOMEM; + break; + } + + p->next = *pages; + *pages = p; + + err = page_map(&buffer_common.kmap->pmap, v, p->addr, PGHD_PRESENT | PGHD_WRITE | PGHD_READ); + if (err < 0) { + break; + } + } + + if (err < 0) { + _bufferFree(data, pages); + return NULL; + } + + return data; +} + + +int _trace_bufferStart(void) +{ + void *data; + size_t i, j; + + for (i = 0; i < buffer_common.nchans; i++) { + data = _bufferAlloc(&buffer_common.chans[i].pages, buffer_common.chans[i].bufsize); + if (data == NULL) { + for (j = 0; j < i; j++) { + _bufferFree(buffer_common.chans[j].buffer.data, &buffer_common.chans[j].pages); + } + return -ENOMEM; + } + + _cbuffer_init(&buffer_common.chans[i].buffer, data, buffer_common.chans[i].bufsize); + } + + return 0; +} + + +ssize_t _trace_bufferRead(u8 chan, void *buf, size_t bufsz) +{ + cbuffer_t *cbuf = getBuffer(chan); + return (ssize_t)_cbuffer_read(cbuf, buf, bufsz); +} + + +ssize_t _trace_bufferWrite(u8 chan, const void *data, size_t sz) +{ + cbuffer_t *cbuf = getBuffer(chan); + return (ssize_t)_cbuffer_write(cbuf, data, sz); +} + + +int _trace_bufferWaitUntilAvail(u8 chan, size_t sz) +{ + /* overwrite intentionally to prevent deadlock */ + return 0; +} + + +ssize_t _trace_bufferAvail(u8 chan) +{ + cbuffer_t *cbuf = getBuffer(chan); + return (ssize_t)_cbuffer_free(cbuf); +} + + +ssize_t _trace_bufferDiscard(u8 chan, size_t sz) +{ + cbuffer_t *cbuf = getBuffer(chan); + return (ssize_t)_cbuffer_discard(cbuf, sz); +} + + +int _trace_bufferFinish(void) +{ + size_t i; + for (i = 0; i < buffer_common.nchans; i++) { + _bufferFree(buffer_common.chans[i].buffer.data, &buffer_common.chans[i].pages); + } + return EOK; +} + + +int trace_bufferInit(vm_map_t *kmap) +{ + const size_t nchansPerCpu = (size_t)trace_channel_count; + unsigned int ncpus = hal_cpuGetCount(); + size_t nchans = nchansPerCpu * ncpus; + size_t i; + + buffer_common.kmap = kmap; + + buffer_common.chans = vm_kmalloc(sizeof(chan_t) * nchans); + if (buffer_common.chans == NULL) { + return -ENOMEM; + } + buffer_common.nchans = nchans; + + for (i = 0; i < ncpus; i++) { + buffer_common.chans[(size_t)trace_channel_meta + i * nchansPerCpu].bufsize = TRACE_META_CHANNEL_BUFSIZE; + buffer_common.chans[(size_t)trace_channel_event + i * nchansPerCpu].bufsize = TRACE_EVENT_CHANNEL_BUFSIZE; + } + + return EOK; +} diff --git a/perf/buffer-rtt.c b/perf/buffer-rtt.c new file mode 100644 index 000000000..908064576 --- /dev/null +++ b/perf/buffer-rtt.c @@ -0,0 +1,134 @@ +/* + * Phoenix-RTOS + * + * Operating system kernel + * + * Performance analysis subsystem - event buffer implementation using RTT + * + * Copyright 2025 Phoenix Systems + * Author: Adam Greloch + * + * %LICENSE% + */ + + +#include "include/errno.h" +#include "include/perf.h" +#include "hal/arm/rtt.h" + +#include "buffer.h" + +#include + +#ifndef RTT_TRACE_META_CHANNEL +#define RTT_TRACE_META_CHANNEL 2 +#endif + +#ifndef RTT_TRACE_EVENT_CHANNEL +#define RTT_TRACE_EVENT_CHANNEL 3 +#endif + +#ifndef PERF_RTT_ENABLED +#define PERF_RTT_ENABLED 0 +#endif + +#if PERF_RTT_ENABLED && (!defined(RTT_ENABLED_PLO) || !RTT_ENABLED_PLO || !defined(RTT_ENABLED) || !RTT_ENABLED) +#error "RTT_ENABLED requires RTT_ENABLED_PLO" +#endif + +#ifndef RTT_PERF_BUFFERS +#define RTT_PERF_BUFFERS 0 +#endif + +#if PERF_RTT_ENABLED && !RTT_PERF_BUFFERS +#error "buffer-rtt used but RTT_PERF_BUFFERS is disabled" +#endif + + +static struct { + int initialized; + struct { + u8 rtt; + } chans[trace_channel_count]; +} buffer_common; + + +int _trace_bufferStart(void) +{ + if (buffer_common.initialized == 0) { + return -ENOSYS; + } + + return EOK; +} + + +ssize_t _trace_bufferRead(u8 chan, void *buf, size_t bufsz) +{ + return 0; +} + + +ssize_t _trace_bufferWrite(u8 chan, const void *data, size_t sz) +{ + if (buffer_common.initialized == 0) { + return -EINVAL; + } + + return _hal_rttWrite(buffer_common.chans[chan].rtt, data, sz); +} + + +int _trace_bufferWaitUntilAvail(u8 chan, size_t sz) +{ + int try = 0; + + while (_hal_rttTxAvail(buffer_common.chans[chan].rtt) < sz) { + try++; + }; + + return try; +} + + +ssize_t _trace_bufferAvail(u8 chan) +{ + return _hal_rttTxAvail(buffer_common.chans[chan].rtt); +} + + +ssize_t _trace_bufferDiscard(u8 chan, size_t sz) +{ + return -ENOSYS; +} + + +int _trace_bufferFinish(void) +{ + if (buffer_common.initialized == 0) { + return -ENOSYS; + } + + return EOK; +} + + +int trace_bufferInit(vm_map_t *kmap) +{ + buffer_common.initialized = 0; + +#if PERF_RTT_ENABLED + if (_hal_rttSetup() < 0) { + return -1; + } + + buffer_common.initialized = 1; + + buffer_common.chans[trace_channel_event].rtt = RTT_TRACE_EVENT_CHANNEL; + buffer_common.chans[trace_channel_meta].rtt = RTT_TRACE_META_CHANNEL; +#else + return -ENOSYS; +#endif + + return EOK; +} diff --git a/perf/buffer.h b/perf/buffer.h new file mode 100644 index 000000000..7c8722987 --- /dev/null +++ b/perf/buffer.h @@ -0,0 +1,46 @@ +/* + * Phoenix-RTOS + * + * Operating system kernel + * + * Performance analysis subsystem - event buffer + * + * Copyright 2025 Phoenix Systems + * Author: Adam Greloch + * + * %LICENSE% + */ + +#ifndef _TRACE_BUFFER_H_ +#define _TRACE_BUFFER_H_ + + +#include "vm/map.h" + + +int trace_bufferInit(vm_map_t *kmap); + + +int _trace_bufferStart(void); + + +int _trace_bufferFinish(void); + + +ssize_t _trace_bufferRead(u8 chan, void *buf, size_t bufsz); + + +ssize_t _trace_bufferWrite(u8 chan, const void *data, size_t sz); + + +int _trace_bufferWaitUntilAvail(u8 chan, size_t sz); + + +/* returns bytes available to write */ +ssize_t _trace_bufferAvail(u8 chan); + + +ssize_t _trace_bufferDiscard(u8 chan, size_t sz); + + +#endif diff --git a/perf/perf.c b/perf/perf.c new file mode 100644 index 000000000..d832a141f --- /dev/null +++ b/perf/perf.c @@ -0,0 +1,74 @@ +/* + * Phoenix-RTOS + * + * Operating system kernel + * + * Performance analysis subsystem + * + * Copyright 2025 Phoenix Systems + * Author: Adam Greloch + * + * %LICENSE% + */ + +#include "include/errno.h" +#include "proc/threads.h" + +#include "trace.h" +#include "perf.h" + + +int _perf_init(vm_map_t *kmap) +{ + return _trace_init(kmap); +} + + +int perf_start(perf_mode_t mode, unsigned flags, void *arg, size_t sz) +{ + switch (mode) { + case perf_mode_threads: + return perf_threadsStart((unsigned)(ptr_t)arg); + case perf_mode_trace: + return trace_start(flags); + default: + return -ENOSYS; + } +} + + +int perf_read(perf_mode_t mode, void *buf, size_t bufsz, int chan) +{ + switch (mode) { + case perf_mode_threads: + return perf_threadsRead(buf, bufsz); + case perf_mode_trace: + return trace_read((u8)chan, buf, bufsz); + default: + return -ENOSYS; + } +} + + +int perf_stop(perf_mode_t mode) +{ + switch (mode) { + case perf_mode_trace: + return trace_stop(); + default: + return -ENOSYS; + } +} + + +int perf_finish(perf_mode_t mode) +{ + switch (mode) { + case perf_mode_threads: + return perf_threadsFinish(); + case perf_mode_trace: + return trace_finish(); + default: + return -ENOSYS; + } +} diff --git a/perf/perf.h b/perf/perf.h new file mode 100644 index 000000000..3c92c0b1a --- /dev/null +++ b/perf/perf.h @@ -0,0 +1,38 @@ +/* + * Phoenix-RTOS + * + * Operating system kernel + * + * Performance analysis subsystem + * + * Copyright 2025 Phoenix Systems + * Author: Adam Greloch + * + * %LICENSE% + */ + +#ifndef _PERF_PERF_H_ +#define _PERF_PERF_H_ + +#include "include/perf.h" +#include "vm/map.h" + + +int _perf_init(vm_map_t *kmap); + + +/* if successful (non-negative rv), returns number of output channels (readable via perf_read) */ +int perf_start(perf_mode_t mode, unsigned flags, void *arg, size_t sz); + + +int perf_read(perf_mode_t mode, void *buf, size_t bufsz, int chan); + + +/* if successful (non-negative rv), returns number of output channels (readable via perf_read) */ +int perf_stop(perf_mode_t mode); + + +int perf_finish(perf_mode_t mode); + + +#endif diff --git a/perf/trace-events.h b/perf/trace-events.h new file mode 100644 index 000000000..35809637e --- /dev/null +++ b/perf/trace-events.h @@ -0,0 +1,289 @@ +/* + * Phoenix-RTOS + * + * Operating system kernel + * + * Performance analysis subsystem - kernel events to Common Trace Format writer + * + * Event stream conforms to the metadata stream located under + * perf/tsdl/metadata. + * + * Copyright 2025 Phoenix Systems + * Author: Adam Greloch + * + * %LICENSE% + */ + +#ifndef _TRACE_EVENTS_H_ +#define _TRACE_EVENTS_H_ + +#include "include/perf.h" +#include "hal/types.h" +#include "proc/proc.h" +#include "trace.h" + + +/* NOTE: must mirror tsdl/metadata event IDs. */ +enum { + TRACE_EVENT_INTERRUPT_ENTER = 0x20, + TRACE_EVENT_INTERRUPT_EXIT = 0x21, + TRACE_EVENT_THREAD_SCHEDULING = 0x22, + TRACE_EVENT_THREAD_PREEMPTED = 0x23, + TRACE_EVENT_THREAD_ENQUEUED = 0x24, + TRACE_EVENT_THREAD_WAKING = 0x25, + TRACE_EVENT_THREAD_CREATE = 0x26, + TRACE_EVENT_THREAD_END = 0x27, + TRACE_EVENT_SYSCALL_ENTER = 0x28, + TRACE_EVENT_SYSCALL_EXIT = 0x29, + TRACE_EVENT_SCHED_ENTER = 0x2a, + TRACE_EVENT_SCHED_EXIT = 0x2b, + TRACE_EVENT_LOCK_NAME = 0x2c, + TRACE_EVENT_LOCK_SET_ENTER = 0x2d, + TRACE_EVENT_LOCK_SET_ACQUIRED = 0x2e, + TRACE_EVENT_LOCK_SET_EXIT = 0x2f, + TRACE_EVENT_LOCK_CLEAR = 0x30, + TRACE_EVENT_THREAD_PRIORITY = 0x31, + TRACE_EVENT_PROCESS_KILL = 0x32, +}; + + +void trace_writeEvent(u8 cpuChan, u8 event, const void *data, size_t sz, u32 *ts); + + +/* + * Updates lock epoch counter. If lock hasn't been used in this trace epoch, + * emits LOCK_NAME event. + */ +void _trace_updateLockEpoch(lock_t *lock); + + +#define TRACE_EVENT_BODY_CHAN(chan, event_id, ev, ts, ...) \ + do { \ + if (trace_isRunning() == 0) { \ + return; \ + } \ + __VA_ARGS__ trace_writeEvent((chan), (event_id), &(ev), sizeof(ev), (ts)); \ + } while (0) + + +/* + * NOTE: The ev structure passed to PERF_{META,EVENT}_BODY must match the + * field struct declared in the tsdl/metadata for a given event_id. + */ +#define TRACE_META_BODY(event_id, ev, ts, ...) TRACE_EVENT_BODY_CHAN((u8)(trace_channel_meta), (event_id), (ev), (ts), __VA_ARGS__) +#define TRACE_EVENT_BODY(event_id, ev, ts, ...) TRACE_EVENT_BODY_CHAN((u8)(trace_channel_event), (event_id), (ev), (ts), __VA_ARGS__) + + +/* assumes lock->spinlock is set */ +static inline void _trace_eventLockName(const lock_t *lock) +{ + struct { + u32 lid; + char name[16]; + } __attribute__((packed)) ev; + + TRACE_META_BODY(TRACE_EVENT_LOCK_NAME, ev, NULL, { + ev.lid = (ptr_t)lock; + (void)hal_strcpy(ev.name, lock->name); + }); +} + + +/* assumes lock->spinlock is set */ +static inline void _trace_eventLockSetEnter(lock_t *lock, u16 tid) +{ + struct { + u16 tid; + u32 lid; + } __attribute__((packed)) ev; + + TRACE_EVENT_BODY(TRACE_EVENT_LOCK_SET_ENTER, ev, NULL, { + _trace_updateLockEpoch(lock); + ev.tid = tid; + ev.lid = (ptr_t)lock; + }); +} + + +/* assumes lock->spinlock is set */ +static inline void _trace_eventLockSetExit(lock_t *lock, u16 tid, int ret) +{ + struct { + u16 tid; + u32 lid; + } __attribute__((packed)) ev; + u32 ts = 0; + + TRACE_EVENT_BODY(TRACE_EVENT_LOCK_SET_EXIT, ev, &ts, { + _trace_updateLockEpoch(lock); + ev.tid = tid; + ev.lid = (ptr_t)lock; + }); + + if (ret == EOK) { + /* reuse lock_set_exit timestamp so that there is no gap between events */ + TRACE_EVENT_BODY(TRACE_EVENT_LOCK_SET_ACQUIRED, ev, &ts, { + /* epoch already updated */ + ev.tid = tid; + ev.lid = (ptr_t)lock; + }); + } +} + + +/* assumes lock->spinlock is set */ +static inline void _trace_eventLockClear(lock_t *lock, u16 tid) +{ + struct { + u16 tid; + u32 lid; + } __attribute__((packed)) ev; + + TRACE_EVENT_BODY(TRACE_EVENT_LOCK_CLEAR, ev, NULL, { + _trace_updateLockEpoch(lock); + ev.tid = tid; + ev.lid = (ptr_t)lock; + }); +} + + +static inline void trace_eventInterruptEnter(u8 n) +{ + TRACE_EVENT_BODY(TRACE_EVENT_INTERRUPT_ENTER, n, NULL); +} + + +static inline void trace_eventInterruptExit(u8 n) +{ + TRACE_EVENT_BODY(TRACE_EVENT_INTERRUPT_EXIT, n, NULL); +} + + +static inline void trace_eventThreadScheduling(u16 tid) +{ + TRACE_EVENT_BODY(TRACE_EVENT_THREAD_SCHEDULING, tid, NULL); +} + + +static inline void trace_eventThreadPreempted(u16 tid) +{ + TRACE_EVENT_BODY(TRACE_EVENT_THREAD_PREEMPTED, tid, NULL); +} + + +static inline void trace_eventThreadEnqueued(u16 tid) +{ + TRACE_EVENT_BODY(TRACE_EVENT_THREAD_ENQUEUED, tid, NULL); +} + + +static inline void trace_eventThreadWaking(u16 tid) +{ + TRACE_EVENT_BODY(TRACE_EVENT_THREAD_WAKING, tid, NULL); +} + + +static inline void trace_eventThreadCreate(const thread_t *t) +{ + struct { + u16 pid; + u16 tid; + u8 priority; + char name[128]; + } __attribute__((packed)) ev; + + TRACE_META_BODY(TRACE_EVENT_THREAD_CREATE, ev, NULL, { + ev.tid = (u16)proc_getTid(t); + ev.priority = (u8)t->priority; + + if (t->process != NULL) { + ev.pid = (u16)process_getPid(t->process); + process_getName(t->process, ev.name, sizeof(ev.name)); + } + else { + ev.pid = 0; + hal_memcpy(ev.name, "[kernel]", sizeof("[kernel]")); + } + }); +} + + +static inline void trace_eventThreadEnd(const thread_t *t) +{ + struct { + u16 pid; + u16 tid; + } __attribute__((packed)) ev; + + TRACE_EVENT_BODY(TRACE_EVENT_THREAD_END, ev, NULL, { + ev.pid = (u16)process_getPid(t->process); + ev.tid = (u16)proc_getTid(t); + }); +} + + +static inline void trace_eventSyscallEnter(u8 n, u16 tid) +{ + struct { + u8 n; + u16 tid; + } __attribute__((packed)) ev; + + TRACE_EVENT_BODY(TRACE_EVENT_SYSCALL_ENTER, ev, NULL, { + ev.n = n; + ev.tid = tid; + }); +} + + +static inline void trace_eventSyscallExit(u8 n, u16 tid) +{ + struct { + u8 n; + u16 tid; + } __attribute__((packed)) ev; + + TRACE_EVENT_BODY(TRACE_EVENT_SYSCALL_EXIT, ev, NULL, { + ev.n = n; + ev.tid = tid; + }); +} + + +static inline void trace_eventSchedEnter(u8 cpuId) +{ + TRACE_EVENT_BODY(TRACE_EVENT_SCHED_ENTER, cpuId, NULL); +} + + +static inline void trace_eventSchedExit(u8 cpuId) +{ + TRACE_EVENT_BODY(TRACE_EVENT_SCHED_EXIT, cpuId, NULL); +} + + +static inline void trace_eventThreadPriority(u16 tid, u8 priority) +{ + struct { + u16 tid; + u8 priority; + } __attribute__((packed)) ev; + + TRACE_EVENT_BODY(TRACE_EVENT_THREAD_PRIORITY, ev, NULL, { + ev.tid = tid; + ev.priority = priority; + }); +} + + +static inline void trace_eventProcessKill(const process_t *p) +{ + u16 pid; + + TRACE_EVENT_BODY(TRACE_EVENT_PROCESS_KILL, pid, NULL, { + pid = (u16)process_getPid(p); + }); +} + + +#endif diff --git a/perf/trace.c b/perf/trace.c new file mode 100644 index 000000000..d354f181a --- /dev/null +++ b/perf/trace.c @@ -0,0 +1,348 @@ +/* + * Phoenix-RTOS + * + * Operating system kernel + * + * Performance analysis subsystem - CTF backend + * + * Copyright 2025 Phoenix Systems + * Author: Adam Greloch + * + * %LICENSE% + */ + + +#include "include/perf.h" +#include "buffer.h" +#include "trace-events.h" +#include "trace.h" + + +static struct { + /* + * Treat `running` as atomic to reduce overhead on the kernel when the tracing is disabled + * - there is only one writer at a time (perf_trace{Start,Finish}()) and multiple readers + * (trace events doing trace_isRunning()). Due to eventual consistency in the readers + * we may lose some events, but we may lose them anyway as the invocation of + * trace_start() naturally races with kernel events occurring in the meantime. + * + * `running` under spinlock is always consistent. + */ + volatile int running; + spinlock_t spinlock; + + /* guarded by spinlock */ + int stopped; + int startPending; /* trace_start guard flag */ + + int epoch; + u32 prev; + unsigned int flags; + + u8 errorFlags; + u64 eventDelayCount; + u64 eventDelayTimestamp; + u64 startTimestamp; +} trace_common; + + +#define TRACE_NON_MONOTONICITY (1U << 1) +#define TRACE_EVENT_DELAYED (1U << 2) +#define TRACE_BUFFER_WRITE_ERR (1U << 3) + + +static u32 _gettimeRaw(void) +{ + /* Intentional downcast to u32 - traces >1h are not supported */ + u32 now = (u32)hal_timerGetUs(); + + while (now < trace_common.prev) { + trace_common.errorFlags |= TRACE_NON_MONOTONICITY; + now = (u32)hal_timerGetUs(); + } + + trace_common.prev = now; + + return now; +} + + +static void _writeEvent(u8 cpuChan, u8 event, const void *data, size_t sz, u32 *ts) +{ + u32 eventTs; + ssize_t ret, try = 0; + size_t eventSz = sizeof(eventTs) + sizeof(event) + sz; + size_t avail; + u8 chan = cpuChan + (u8)hal_cpuGetID() * (u8)trace_channel_count; + + struct { + u32 ts; + u8 eventId; + } __attribute__((packed)) ev; + + if (ts == NULL || *ts == 0U) { + eventTs = _gettimeRaw(); + if (ts != NULL) { + *ts = eventTs; + } + } + else { + /* use timestamp provided by the caller */ + eventTs = *ts; + } + + ret = _trace_bufferAvail(chan); + if (ret < 0) { + trace_common.errorFlags |= TRACE_BUFFER_WRITE_ERR; + return; + } + + avail = (size_t)ret; + if (avail < eventSz) { + if ((trace_common.flags & PERF_TRACE_FLAG_ROLLING) != 0U) { + (void)_trace_bufferDiscard(chan, eventSz - avail); + } + else { + try = _trace_bufferWaitUntilAvail(chan, eventSz); + } + } + + ev.ts = eventTs; + ev.eventId = event; + ret = _trace_bufferWrite(chan, &ev, sizeof(ev)); + if (ret == (ssize_t)sizeof(ev)) { + ret = _trace_bufferWrite(chan, data, sz); + } + + if (ret < 0) { + trace_common.errorFlags |= TRACE_BUFFER_WRITE_ERR; + } + if (try > 0) { + /* + * Record first occurrence of event delay to caution the user about possible + * loss of timestamp precision. This may happen if e.g. the buffer is implemented as RTT + * and the receiver (debug probe) can't keep up with the event generation rate + */ + trace_common.errorFlags |= TRACE_EVENT_DELAYED; + trace_common.eventDelayCount++; + trace_common.eventDelayTimestamp = _gettimeRaw(); + } +} + + +/* WARN: should be callable from interrupt handler */ +void trace_writeEvent(u8 cpuChan, u8 event, const void *data, size_t sz, u32 *ts) +{ + spinlock_ctx_t sc; + + hal_spinlockSet(&trace_common.spinlock, &sc); + if (trace_common.running != 0) { + _writeEvent(cpuChan, event, data, sz, ts); + } + hal_spinlockClear(&trace_common.spinlock, &sc); +} + + +void _trace_updateLockEpoch(lock_t *lock) +{ + int prev = _proc_lockSetTraceEpoch(lock, trace_common.epoch); + + if (prev != trace_common.epoch) { + _trace_eventLockName(lock); + } +} + + +/* WARN: eventually consistent */ +int trace_isRunning(void) +{ + return trace_common.running; +} + + +static void _emitThreadsCb(void *arg, int i, threadinfo_t *tinfo) +{ + struct { + u16 pid; + u16 tid; + u8 priority; + char name[128]; + } __attribute__((packed)) ev; + + ev.tid = (u16)tinfo->tid; + ev.priority = (u8)tinfo->priority; + ev.pid = (u16)tinfo->pid; + + hal_memcpy(ev.name, tinfo->name, sizeof(tinfo->name)); + + _writeEvent((u8)trace_channel_meta, TRACE_EVENT_THREAD_CREATE, &ev, sizeof(ev), NULL); +} + + +static void _emitThreadinfo(void) +{ + (void)proc_threadsIter(0xffff, _emitThreadsCb, NULL); +} + + +static void _enableTracing(int enable) +{ + trace_common.running = enable; + _hal_interruptsTrace(enable); +} + + +static int getChannelCount(void) +{ + return (int)hal_cpuGetCount() * (int)trace_channel_count; +} + + +int trace_start(unsigned int flags) +{ + spinlock_ctx_t sc; + int ret; + + hal_spinlockSet(&trace_common.spinlock, &sc); + if (trace_common.running != 0 || trace_common.startPending != 0) { + hal_spinlockClear(&trace_common.spinlock, &sc); + return -EINPROGRESS; + } + trace_common.startPending = 1; + hal_spinlockClear(&trace_common.spinlock, &sc); + + ret = _trace_bufferStart(); + if (ret < 0) { + hal_spinlockSet(&trace_common.spinlock, &sc); + trace_common.startPending = 0; + hal_spinlockClear(&trace_common.spinlock, &sc); + return ret; + } + + if (_trace_bufferDiscard(0, 0) == -ENOSYS) { + /* If discarding is unsupported by the buffer backend, ignore the flag */ + flags &= ~PERF_TRACE_FLAG_ROLLING; + } + + /* Must be set before _emitThreadinfo as it depends on flags (via _writeEvent) */ + trace_common.flags = flags; + + /* Without spinlock - trace is not enabled yet, so there's no concurrent access */ + _emitThreadinfo(); + + trace_common.errorFlags = 0; + trace_common.eventDelayCount = 0; + + trace_common.epoch++; + + hal_spinlockSet(&trace_common.spinlock, &sc); + trace_common.startPending = 0; + _enableTracing(1); + trace_common.startTimestamp = _gettimeRaw(); + hal_spinlockClear(&trace_common.spinlock, &sc); + + return (int)getChannelCount(); +} + + +int trace_read(u8 chan, void *buf, size_t bufsz) +{ + spinlock_ctx_t sc; + int ret, running; + + hal_spinlockSet(&trace_common.spinlock, &sc); + running = trace_common.running; + if (chan < (u8)getChannelCount() && (running != 0 || trace_common.stopped != 0)) { + ret = _trace_bufferRead(chan, buf, bufsz); + } + else { + ret = -EINVAL; + } + hal_spinlockClear(&trace_common.spinlock, &sc); + + return ret; +} + + +int trace_stop(void) +{ + int ret = EOK, running; + spinlock_ctx_t sc; + + hal_spinlockSet(&trace_common.spinlock, &sc); + running = trace_common.running; + if (trace_common.stopped == 0 && running != 0) { + _enableTracing(0); + trace_common.stopped = 1; + ret = getChannelCount(); + } + else { + ret = -EINVAL; + } + hal_spinlockClear(&trace_common.spinlock, &sc); + + return ret; +} + + +int trace_finish(void) +{ + spinlock_ctx_t sc; + int ret = EOK; + u8 errorFlags = 0; + u64 eventDelayCount = 0; + u64 eventDelayTimestamp = 0; + u64 startTimestamp = 0; + u64 stopTimestamp = 0; + + hal_spinlockSet(&trace_common.spinlock, &sc); + if (trace_common.running != 0 || trace_common.stopped != 0) { + _enableTracing(0); + trace_common.stopped = 0; + errorFlags = trace_common.errorFlags; + eventDelayCount = trace_common.eventDelayCount; + trace_common.eventDelayCount = 0; + + startTimestamp = trace_common.startTimestamp; + stopTimestamp = _gettimeRaw(); + eventDelayTimestamp = trace_common.eventDelayTimestamp; + } + else { + ret = -EINVAL; + } + hal_spinlockClear(&trace_common.spinlock, &sc); + + if (ret == EOK) { + if ((errorFlags & TRACE_NON_MONOTONICITY) != 0U) { + lib_printf("kernel (%s:%d): timer non-monotonicity detected during event gathering\n", __func__, __LINE__); + } + + if ((errorFlags & TRACE_EVENT_DELAYED) != 0U) { + lib_printf("kernel (%s:%d): event delay detected %llu times - event receiver couldn't keep up\n", __func__, __LINE__, eventDelayCount); + lib_printf("kernel (%s:%d): start ts=%lld delay ts=%lld stop ts=%lld\n", __func__, __LINE__, startTimestamp, eventDelayTimestamp, stopTimestamp); + } + + if ((errorFlags & TRACE_BUFFER_WRITE_ERR) != 0U) { + lib_printf("kernel (%s:%d): buffer write error detected\n", __func__, __LINE__); + } + + ret = _trace_bufferFinish(); + } + + return ret; +} + + +int _trace_init(vm_map_t *kmap) +{ + trace_common.running = 0; + trace_common.stopped = 0; + trace_common.startPending = 0; + + trace_common.epoch = 0; + trace_common.prev = 0; + + hal_spinlockCreate(&trace_common.spinlock, "trace.spinlock"); + + return trace_bufferInit(kmap); +} diff --git a/perf/trace.h b/perf/trace.h new file mode 100644 index 000000000..904a7210f --- /dev/null +++ b/perf/trace.h @@ -0,0 +1,38 @@ +/* + * Phoenix-RTOS + * + * Operating system kernel + * + * Performance analysis subsystem - event tracing + * + * Copyright 2025 Phoenix Systems + * Author: Adam Greloch + * + * %LICENSE% + */ + +#ifndef _PERF_TRACE_H_ +#define _PERF_TRACE_H_ + +#include "vm/map.h" + + +int _trace_init(vm_map_t *kmap); + + +int trace_start(unsigned flags); + + +int trace_read(u8 chan, void *buf, size_t bufsz); + + +int trace_stop(void); + + +int trace_finish(void); + + +int trace_isRunning(void); + + +#endif diff --git a/perf/tsdl/metadata b/perf/tsdl/metadata new file mode 100644 index 000000000..9a5be86b8 --- /dev/null +++ b/perf/tsdl/metadata @@ -0,0 +1,196 @@ +/* CTF 1.8 */ +typealias integer { size = 8; align = 8; signed = true; } := i8; +typealias integer { size = 8; align = 8; signed = false; } := u8; +typealias integer { size = 16; align = 8; signed = false; } := u16; +typealias integer { size = 32; align = 8; signed = false; } := u32; +typealias integer { size = 32; align = 8; signed = true; } := i32; +typealias integer { size = 64; align = 8; signed = false; } := u64; +typealias integer { size = 8; align = 8; signed = false; encoding = ASCII; } := str; + +trace { + major = 1; + minor = 8; + byte_order = le; +}; + +clock { + name = monotonic; + freq = 1000000; +}; + + +stream { + event.header := struct { + u32 timestamp; + u8 id; + }; + packet.context := struct { + u8 cpu; + }; +}; + +event { + name = interrupt_enter; + id = 0x20; + fields := struct { + u8 irq; + }; +}; + +event { + name = interrupt_exit; + id = 0x21; + fields := struct { + u8 irq; + }; +}; + +event { + name = thread_scheduling; + id = 0x22; + fields := struct { + u16 tid; + }; +}; + +event { + name = thread_preempted; + id = 0x23; + fields := struct { + u16 tid; + }; +}; + +event { + name = thread_enqueued; + id = 0x24; + fields := struct { + u16 tid; + }; +}; + +event { + name = thread_waking; + id = 0x25; + fields := struct { + u16 tid; + }; +}; + +event { + name = thread_create; + id = 0x26; + fields := struct { + u16 pid; + u16 tid; + u8 prio; + str name[128]; + }; +}; + +event { + name = thread_end; + id = 0x27; + fields := struct { + u16 pid; + u16 tid; + }; +}; + +event { + name = syscall_enter; + id = 0x28; + fields := struct { + u8 n; + u16 tid; + }; +}; + +event { + name = syscall_exit; + id = 0x29; + fields := struct { + u8 n; + u16 tid; + }; +}; + +/* TODO: remove cpu fields from sched events - they are redundant due to stream event context*/ +event { + name = sched_enter; + id = 0x2a; + fields := struct { + u8 cpu; + }; +}; + +event { + name = sched_exit; + id = 0x2b; + fields := struct { + u8 cpu; + }; +}; + +event { + name = lock_name; + id = 0x2c; + fields := struct { + u32 lid; + str name[16]; + }; +}; + +event { + name = lock_set_enter; + id = 0x2d; + fields := struct { + u16 tid; + u32 lid; + }; +}; + + +event { + name = lock_set_acquired; + id = 0x2e; + fields := struct { + u16 tid; + u32 lid; + }; +}; + +event { + name = lock_set_exit; + id = 0x2f; + fields := struct { + u16 tid; + u32 lid; + }; +}; + +event { + name = lock_clear; + id = 0x30; + fields := struct { + u16 tid; + u32 lid; + }; +}; + +event { + name = thread_priority; + id = 0x31; + fields := struct { + u16 tid; + u8 priority; + }; +}; + +event { + name = process_kill; + id = 0x32; + fields := struct { + u16 pid; + }; +}; diff --git a/proc/lock.h b/proc/lock.h index b853ac788..682ba5bdd 100644 --- a/proc/lock.h +++ b/proc/lock.h @@ -22,12 +22,16 @@ typedef struct _lock_t { spinlock_t spinlock; /* Spinlock */ + struct _thread_t *owner; /* Owner thread */ struct _thread_t *queue; /* Waiting threads */ struct _lock_t *prev, *next; /* Doubly linked list */ + const char *name; struct lockAttr attr; unsigned int depth; /* Used with recursive locks */ + + int epoch; /* Current trace epoch - used for tracking lock name emission */ } lock_t; @@ -59,4 +63,8 @@ int proc_lockInit(lock_t *lock, const struct lockAttr *attr, const char *name); int proc_lockDone(lock_t *lock); +/* returns previous epoch */ +int _proc_lockSetTraceEpoch(lock_t *lock, int epoch); + + #endif diff --git a/proc/process.c b/proc/process.c index 0442090e5..07290ae6c 100644 --- a/proc/process.c +++ b/proc/process.c @@ -29,6 +29,7 @@ #include "msg.h" #include "ports.h" #include "userintr.h" +#include "perf/trace-events.h" /* Process states */ #define PREFORK 0 @@ -85,6 +86,7 @@ static void process_destroy(process_t *p) vm_map_t *mapp = p->mapp, *imapp = p->imapp; perf_kill(p); + trace_eventProcessKill(p); posix_died(process_getPid(p), p->exit); diff --git a/proc/threads.c b/proc/threads.c index ccaad589b..280fa4f4b 100644 --- a/proc/threads.c +++ b/proc/threads.c @@ -24,6 +24,7 @@ #include "resource.h" #include "msg.h" #include "ports.h" +#include "perf/trace-events.h" const struct lockAttr proc_lockAttrDefault = { .type = PH_LOCK_NORMAL }; @@ -155,24 +156,28 @@ static void _perf_event(thread_t *t, int type) static void _perf_scheduling(thread_t *t) { _perf_event(t, perf_evScheduling); + trace_eventThreadScheduling(proc_getTid(t)); } static void _perf_preempted(thread_t *t) { _perf_event(t, perf_evPreempted); + trace_eventThreadPreempted(proc_getTid(t)); } static void _perf_enqueued(thread_t *t) { _perf_event(t, perf_evEnqueued); + trace_eventThreadEnqueued(proc_getTid(t)); } static void _perf_waking(thread_t *t) { _perf_event(t, perf_evWaking); + trace_eventThreadWaking(proc_getTid(t)); } @@ -350,7 +355,7 @@ static void *perf_bufferAlloc(page_t **pages, size_t sz) } -int perf_start(unsigned int pid) +int perf_threadsStart(unsigned int pid) { void *data; spinlock_ctx_t sc; @@ -378,25 +383,32 @@ int perf_start(unsigned int pid) threads_common.perfLastTimestamp = _proc_gettimeRaw(); hal_spinlockClear(&threads_common.spinlock, &sc); - return EOK; + return 1; } -int perf_read(void *buffer, size_t bufsz) +int perf_threadsRead(void *buffer, size_t bufsz) { spinlock_ctx_t sc; + int ret; hal_spinlockSet(&threads_common.spinlock, &sc); - bufsz = _cbuffer_read(&threads_common.perfBuffer, buffer, bufsz); + if (threads_common.perfGather != 0) { + ret = _cbuffer_read(&threads_common.perfBuffer, buffer, bufsz); + } + else { + ret = -EINVAL; + } hal_spinlockClear(&threads_common.spinlock, &sc); - return (int)bufsz; + return ret; } -int perf_finish(void) +int perf_threadsFinish(void) { spinlock_ctx_t sc; + int ret; hal_spinlockSet(&threads_common.spinlock, &sc); if (threads_common.perfGather != 0) { @@ -404,12 +416,14 @@ int perf_finish(void) hal_spinlockClear(&threads_common.spinlock, &sc); perf_bufferFree(threads_common.perfBuffer.data, &threads_common.perfPages); + ret = EOK; } else { hal_spinlockClear(&threads_common.spinlock, &sc); + ret = -EINVAL; } - return EOK; + return ret; } @@ -501,6 +515,7 @@ static void thread_destroy(thread_t *thread) spinlock_ctx_t sc; perf_end(thread); + trace_eventThreadEnd(thread); /* No need to protect thread->locks access with threads_common.spinlock */ /* The destroyed thread is a ghost and no thread (except for the current one) can access it */ @@ -597,13 +612,16 @@ int _threads_schedule(unsigned int n, cpu_context_t *context, void *arg) unsigned int i; process_t *proc; cpu_context_t *signalCtx, *selCtx; + int cpuId = hal_cpuGetID(); (void)arg; (void)n; hal_lockScheduler(); + trace_eventSchedEnter(cpuId); + current = _proc_current(); - threads_common.current[hal_cpuGetID()] = NULL; + threads_common.current[cpuId] = NULL; /* Save current thread context */ if (current != NULL) { @@ -695,6 +713,8 @@ int _threads_schedule(unsigned int n, cpu_context_t *context, void *arg) /* Update CPU usage */ _threads_cpuTimeCalc(current, selected); + trace_eventSchedExit(cpuId); + return EOK; } @@ -704,9 +724,11 @@ int threads_schedule(unsigned int n, cpu_context_t *context, void *arg) { spinlock_ctx_t sc; int ret; + hal_spinlockSet(&threads_common.spinlock, &sc); ret = _threads_schedule(n, context, arg); hal_spinlockClear(&threads_common.spinlock, &sc); + return ret; } @@ -859,6 +881,9 @@ int proc_threadCreate(process_t *process, startFn_t start, int *id, u8 priority, else { hal_spinlockSet(&threads_common.spinlock, &sc); } + + trace_eventThreadCreate(t); + /* Insert thread to scheduler queue */ _perf_begin(t); @@ -941,6 +966,7 @@ static void _proc_threadSetPriority(thread_t *thread, u8 priority) } thread->priority = priority; + trace_eventThreadPriority(proc_getTid(thread), thread->priority); } @@ -994,6 +1020,8 @@ int proc_threadPriority(int signedPriority) (void)hal_spinlockClear(&threads_common.spinlock, &sc); } + trace_eventThreadPriority(proc_getTid(current), current->priority); + return ret; } @@ -1728,15 +1756,20 @@ static int _proc_lockSet(lock_t *lock, u8 interruptible, spinlock_ctx_t *scp) { thread_t *current; spinlock_ctx_t sc; - int ret; + int ret = EOK, tid; hal_spinlockSet(&threads_common.spinlock, &sc); current = _proc_current(); + tid = proc_getTid(current); + + _trace_eventLockSetEnter(lock, tid); if ((lock->attr.type == PH_LOCK_ERRORCHECK) && (lock->owner == current)) { hal_spinlockClear(&threads_common.spinlock, &sc); - return -EDEADLK; + ret = -EDEADLK; + _trace_eventLockSetExit(lock, tid, ret); + return ret; } if ((lock->attr.type == PH_LOCK_RECURSIVE) && (lock->owner == current)) { @@ -1749,6 +1782,7 @@ static int _proc_lockSet(lock_t *lock, u8 interruptible, spinlock_ctx_t *scp) } hal_spinlockClear(&threads_common.spinlock, &sc); + _trace_eventLockSetExit(lock, tid, ret); return ret; } @@ -1769,7 +1803,9 @@ static int _proc_lockSet(lock_t *lock, u8 interruptible, spinlock_ctx_t *scp) if (proc_threadWaitEx(&lock->queue, &lock->spinlock, 0, interruptible, scp) == -EINTR) { /* Can happen when thread_destroy is called on lock owner and current */ if (lock->owner == NULL) { - return -EINTR; + ret = -EINTR; + _trace_eventLockSetExit(lock, tid, ret); + return ret; } /* Don't return EINTR if we got lock anyway */ if (lock->owner != current) { @@ -1780,7 +1816,9 @@ static int _proc_lockSet(lock_t *lock, u8 interruptible, spinlock_ctx_t *scp) hal_spinlockClear(&threads_common.spinlock, &sc); - return -EINTR; + ret = -EINTR; + _trace_eventLockSetExit(lock, tid, ret); + return ret; } } } while (lock->owner != current); @@ -1791,7 +1829,8 @@ static int _proc_lockSet(lock_t *lock, u8 interruptible, spinlock_ctx_t *scp) lock->depth = 1; - return EOK; + _trace_eventLockSetExit(lock, tid, ret); + return ret; } @@ -1844,6 +1883,8 @@ static int _proc_lockUnlock(lock_t *lock) current = _proc_current(); + _trace_eventLockClear(lock, proc_getTid(current)); + LIB_ASSERT(LIST_BELONGS(&owner->locks, lock) != 0, "lock: %s, owner pid: %d, owner tid: %d, lock is not on the list", lock->name, (owner->process != NULL) ? process_getPid(owner->process) : 0, proc_getTid(owner)); @@ -2022,6 +2063,7 @@ int proc_lockInit(lock_t *lock, const struct lockAttr *attr, const char *name) lock->owner = NULL; lock->queue = NULL; lock->name = name; + lock->epoch = -1; hal_memcpy(&lock->attr, attr, sizeof(struct lockAttr)); @@ -2029,6 +2071,17 @@ int proc_lockInit(lock_t *lock, const struct lockAttr *attr, const char *name) } +int _proc_lockSetTraceEpoch(lock_t *lock, int epoch) +{ + int prev; + + prev = lock->epoch; + lock->epoch = epoch; + + return prev; +} + + /* * Initialization */ diff --git a/proc/threads.h b/proc/threads.h index d868c1f98..8d5b9fd65 100644 --- a/proc/threads.h +++ b/proc/threads.h @@ -84,19 +84,19 @@ typedef struct _thread_t { } thread_t; -static inline int proc_getTid(thread_t *t) +static inline int proc_getTid(const thread_t *t) { return t->idlinkage.id; } -int perf_start(unsigned int pid); +int perf_threadsStart(unsigned int pid); -int perf_read(void *buffer, size_t bufsz); +int perf_threadsRead(void *buffer, size_t bufsz); -int perf_finish(void); +int perf_threadsFinish(void); void perf_fork(process_t *p); diff --git a/syscalls.c b/syscalls.c index 212e4f041..213ff953e 100644 --- a/syscalls.c +++ b/syscalls.c @@ -25,11 +25,14 @@ #include "include/threads.h" #include "include/utsname.h" #include "include/time.h" +#include "include/perf.h" #include "lib/lib.h" #include "proc/proc.h" #include "vm/object.h" #include "posix/posix.h" #include "syspage.h" +#include "perf/perf.h" +#include "perf/trace-events.h" #define SYSCALLS_NAME(name) syscalls_##name, @@ -470,38 +473,68 @@ int syscalls_syspageprog(u8 *ustack) } -int syscalls_perf_start(u8 *ustack) +int syscalls_sys_perf_start(u8 *ustack) { - unsigned int pid; + process_t *proc = proc_current()->process; + perf_mode_t mode; + unsigned flags; + void *arg; + size_t sz; + + GETFROMSTACK(ustack, int, mode, 0U); + GETFROMSTACK(ustack, unsigned, flags, 1U); + GETFROMSTACK(ustack, void *, arg, 2U); + GETFROMSTACK(ustack, size_t, sz, 3U); - GETFROMSTACK(ustack, unsigned int, pid, 0U); + if (arg != NULL && vm_mapBelongs(proc, arg, sz) < 0) { + return -EFAULT; + } - return perf_start(pid); + return perf_start(mode, flags, arg, sz); } -int syscalls_perf_read(u8 *ustack) +int syscalls_sys_perf_read(u8 *ustack) { process_t *proc = proc_current()->process; void *buffer; size_t sz; + perf_mode_t mode; + int chan; - GETFROMSTACK(ustack, void *, buffer, 0U); - GETFROMSTACK(ustack, size_t, sz, 1U); + GETFROMSTACK(ustack, perf_mode_t, mode, 0U); + GETFROMSTACK(ustack, void *, buffer, 1U); + GETFROMSTACK(ustack, size_t, sz, 2U); + GETFROMSTACK(ustack, int, chan, 3U); if (vm_mapBelongs(proc, buffer, sz) < 0) { return -EFAULT; } - return perf_read(buffer, sz); + return perf_read(mode, buffer, sz, chan); +} + + +int syscalls_sys_perf_stop(u8 *ustack) +{ + perf_mode_t mode; + + GETFROMSTACK(ustack, perf_mode_t, mode, 0U); + + return perf_stop(mode); } -int syscalls_perf_finish(u8 *ustack) +int syscalls_sys_perf_finish(u8 *ustack) { - return perf_finish(); + perf_mode_t mode; + + GETFROMSTACK(ustack, perf_mode_t, mode, 0U); + + return perf_finish(mode); } + /* * Mutexes */ @@ -1858,14 +1891,21 @@ const void *const syscalls[] = { SYSCALLS(SYSCALLS_NAME) }; void *syscalls_dispatch(int n, u8 *ustack, cpu_context_t *ctx) { void *retval; + int tid; if (n >= (int)(sizeof(syscalls) / sizeof(syscalls[0]))) { return (void *)-EINVAL; } + tid = proc_getTid(proc_current()); + + trace_eventSyscallEnter(n, tid); + /* parasoft-suppress-next-line MISRAC2012-RULE_11_1 MISRAC2012-RULE_11_8 "Related to previous suppression" */ retval = ((void *(*)(u8 *arg))syscalls[n])(ustack); + trace_eventSyscallExit(n, tid); + if (proc_current()->exit != 0U) { proc_threadEnd(); } From b34be1e1ce021b59ef41eeae7f319ad8b02a5317 Mon Sep 17 00:00:00 2001 From: Adam Greloch Date: Thu, 11 Sep 2025 14:57:25 +0200 Subject: [PATCH 4/4] !proc: remove threads perf mode (Old) threads perf mode is redundant as its functionality is realized by trace mode JIRA: RTOS-1057 --- include/perf.h | 2 +- include/sysinfo.h | 75 ----------- perf/perf.c | 7 -- proc/process.c | 2 - proc/threads.c | 311 +++------------------------------------------- proc/threads.h | 18 --- 6 files changed, 21 insertions(+), 394 deletions(-) diff --git a/include/perf.h b/include/perf.h index 732107d15..29b5294ae 100644 --- a/include/perf.h +++ b/include/perf.h @@ -17,7 +17,7 @@ /* clang-format off */ -typedef enum { perf_mode_threads, perf_mode_trace, perf_mode_count } perf_mode_t; +typedef enum { perf_mode_trace, perf_mode_count } perf_mode_t; typedef enum { trace_channel_meta, trace_channel_event, trace_channel_count } trace_channel_t; /* clang-format on */ diff --git a/include/sysinfo.h b/include/sysinfo.h index e0279e936..1c85108e8 100644 --- a/include/sysinfo.h +++ b/include/sysinfo.h @@ -99,79 +99,4 @@ typedef struct _meminfo_t { } meminfo_t; -enum { perf_evScheduling, - perf_evEnqueued, - perf_evWaking, - perf_evPreempted }; - - -typedef struct { - unsigned int deltaTimestamp : 12; - unsigned int type : 2; - unsigned int tid : 18; -} __attribute__((packed)) perf_event_t; - - -enum { perf_levBegin, - perf_levEnd, - perf_levFork, - perf_levKill, - perf_levExec }; - - -typedef struct { - unsigned int sbz; - - unsigned int deltaTimestamp : 12; - unsigned int type : 3; - - unsigned int prio : 3; - unsigned int tid : 18; - unsigned int pid : 18; -} __attribute__((packed)) perf_levent_begin_t; - - -typedef struct { - unsigned int sbz; - - unsigned int deltaTimestamp : 12; - unsigned int type : 3; - - unsigned int tid : 18; -} __attribute__((packed)) perf_levent_end_t; - - -typedef struct { - unsigned int sbz; - - unsigned int deltaTimestamp : 12; - unsigned int type : 3; - - unsigned int tid : 18; - unsigned int ppid : 18; - unsigned int pid : 18; -} __attribute__((packed)) perf_levent_fork_t; - - -typedef struct { - unsigned int sbz; - - unsigned int deltaTimestamp : 12; - unsigned int type : 3; - - unsigned int tid : 18; - unsigned int pid : 18; -} __attribute__((packed)) perf_levent_kill_t; - - -typedef struct { - unsigned int sbz; - - unsigned int deltaTimestamp : 12; - unsigned int type : 3; - - unsigned int tid : 18; - char path[32]; -} __attribute__((packed)) perf_levent_exec_t; - #endif diff --git a/perf/perf.c b/perf/perf.c index d832a141f..78240522c 100644 --- a/perf/perf.c +++ b/perf/perf.c @@ -12,7 +12,6 @@ */ #include "include/errno.h" -#include "proc/threads.h" #include "trace.h" #include "perf.h" @@ -27,8 +26,6 @@ int _perf_init(vm_map_t *kmap) int perf_start(perf_mode_t mode, unsigned flags, void *arg, size_t sz) { switch (mode) { - case perf_mode_threads: - return perf_threadsStart((unsigned)(ptr_t)arg); case perf_mode_trace: return trace_start(flags); default: @@ -40,8 +37,6 @@ int perf_start(perf_mode_t mode, unsigned flags, void *arg, size_t sz) int perf_read(perf_mode_t mode, void *buf, size_t bufsz, int chan) { switch (mode) { - case perf_mode_threads: - return perf_threadsRead(buf, bufsz); case perf_mode_trace: return trace_read((u8)chan, buf, bufsz); default: @@ -64,8 +59,6 @@ int perf_stop(perf_mode_t mode) int perf_finish(perf_mode_t mode) { switch (mode) { - case perf_mode_threads: - return perf_threadsFinish(); case perf_mode_trace: return trace_finish(); default: diff --git a/proc/process.c b/proc/process.c index 07290ae6c..6b7fbb5a1 100644 --- a/proc/process.c +++ b/proc/process.c @@ -85,7 +85,6 @@ static void process_destroy(process_t *p) thread_t *ghost; vm_map_t *mapp = p->mapp, *imapp = p->imapp; - perf_kill(p); trace_eventProcessKill(p); posix_died(process_getPid(p), p->exit); @@ -227,7 +226,6 @@ int proc_start(startFn_t start, void *arg, const char *path) /* Initialize resources tree for mutex and cond handles */ _resource_init(process); (void)process_alloc(process); - perf_fork(process); if (proc_threadCreate(process, start, NULL, 4, SIZE_KSTACK, NULL, 0, (void *)arg) < 0) { (void)proc_put(process); diff --git a/proc/threads.c b/proc/threads.c index 280fa4f4b..7a8c4285d 100644 --- a/proc/threads.c +++ b/proc/threads.c @@ -26,6 +26,9 @@ #include "ports.h" #include "perf/trace-events.h" +/* clang-format off */ +enum { event_scheduling, event_enqueued, event_waking, event_preempted }; +/* clang-format on */ const struct lockAttr proc_lockAttrDefault = { .type = PH_LOCK_NORMAL }; @@ -56,11 +59,6 @@ static struct { thread_t *ghosts; thread_t *reaper; - int perfGather; - time_t perfLastTimestamp; - cbuffer_t perfBuffer; - page_t *perfPages; - /* Debug */ unsigned char stackCanary[16]; time_t prev; @@ -111,24 +109,17 @@ static int _proc_threadWakeup(thread_t **queue); static int _proc_threadBroadcast(thread_t **queue); -static unsigned int perf_idpack(int id) -{ - return ((unsigned int)id) >> 8; -} - - /* Note: always called with threads_common.spinlock set */ -static void _perf_event(thread_t *t, int type) +static void _threads_updateWaits(thread_t *t, int type) { - perf_event_t ev; time_t now = 0, wait; now = _proc_gettimeRaw(); - if (type == perf_evWaking || type == perf_evPreempted) { + if (type == event_waking || type == event_preempted) { t->readyTime = now; } - else if (type == perf_evScheduling) { + else if (type == event_scheduling) { wait = now - t->readyTime; if (t->maxWait < wait) { @@ -138,295 +129,37 @@ static void _perf_event(thread_t *t, int type) else { /* No action required */ } - - if (threads_common.perfGather == 0) { - return; - } - - ev.type = (u8)type & 0x03U; - - ev.deltaTimestamp = (u16)(time_t)(now - threads_common.perfLastTimestamp) & 0x0fffU; - threads_common.perfLastTimestamp = now; - ev.tid = perf_idpack(proc_getTid(t)); - - (void)_cbuffer_write(&threads_common.perfBuffer, &ev, sizeof(ev)); } -static void _perf_scheduling(thread_t *t) +static void _threads_scheduling(thread_t *t) { - _perf_event(t, perf_evScheduling); + _threads_updateWaits(t, event_scheduling); trace_eventThreadScheduling(proc_getTid(t)); } -static void _perf_preempted(thread_t *t) +static void _threads_preempted(thread_t *t) { - _perf_event(t, perf_evPreempted); + _threads_updateWaits(t, event_preempted); trace_eventThreadPreempted(proc_getTid(t)); } -static void _perf_enqueued(thread_t *t) +static void _threads_enqueued(thread_t *t) { - _perf_event(t, perf_evEnqueued); + _threads_updateWaits(t, event_enqueued); trace_eventThreadEnqueued(proc_getTid(t)); } -static void _perf_waking(thread_t *t) +static void _threads_waking(thread_t *t) { - _perf_event(t, perf_evWaking); + _threads_updateWaits(t, event_waking); trace_eventThreadWaking(proc_getTid(t)); } -static void _perf_begin(thread_t *t) -{ - perf_levent_begin_t ev; - time_t now; - - if (threads_common.perfGather == 0) { - return; - } - - ev.sbz = 0; - ev.type = perf_levBegin; - ev.prio = t->priority; - ev.tid = perf_idpack(proc_getTid(t)); - ev.pid = t->process != NULL ? perf_idpack(process_getPid(t->process)) : (unsigned int)-1; - - now = _proc_gettimeRaw(); - ev.deltaTimestamp = (u16)(time_t)(now - threads_common.perfLastTimestamp) & 0x0fffU; - threads_common.perfLastTimestamp = now; - - (void)_cbuffer_write(&threads_common.perfBuffer, &ev, sizeof(ev)); -} - - -static void perf_end(thread_t *t) -{ - perf_levent_end_t ev; - time_t now; - spinlock_ctx_t sc; - - if (threads_common.perfGather == 0) { - return; - } - - hal_spinlockSet(&threads_common.spinlock, &sc); - ev.sbz = 0; - ev.type = perf_levEnd; - ev.tid = perf_idpack(proc_getTid(t)); - - now = _proc_gettimeRaw(); - ev.deltaTimestamp = (u16)(time_t)(now - threads_common.perfLastTimestamp) & 0x0fffU; - threads_common.perfLastTimestamp = now; - - (void)_cbuffer_write(&threads_common.perfBuffer, &ev, sizeof(ev)); - hal_spinlockClear(&threads_common.spinlock, &sc); -} - - -void perf_fork(process_t *p) -{ - perf_levent_fork_t ev; - time_t now; - spinlock_ctx_t sc; - - if (threads_common.perfGather == 0) { - return; - } - - hal_spinlockSet(&threads_common.spinlock, &sc); - ev.sbz = 0; - ev.type = perf_levFork; - ev.pid = perf_idpack(process_getPid(p)); - /* TODO: ev.ppid = p->parent != NULL ? perf_idpack(p->parent->id) : -1; */ - ev.tid = perf_idpack(proc_getTid(_proc_current())); - - now = _proc_gettimeRaw(); - ev.deltaTimestamp = (u16)(time_t)(now - threads_common.perfLastTimestamp) & 0x0fffU; - threads_common.perfLastTimestamp = now; - - (void)_cbuffer_write(&threads_common.perfBuffer, &ev, sizeof(ev)); - hal_spinlockClear(&threads_common.spinlock, &sc); -} - - -void perf_kill(process_t *p) -{ - perf_levent_kill_t ev; - time_t now; - spinlock_ctx_t sc; - - if (threads_common.perfGather == 0) { - return; - } - - hal_spinlockSet(&threads_common.spinlock, &sc); - ev.sbz = 0; - ev.type = perf_levKill; - ev.pid = perf_idpack(process_getPid(p)); - ev.tid = perf_idpack(proc_getTid(_proc_current())); - - now = _proc_gettimeRaw(); - ev.deltaTimestamp = (u16)(time_t)(now - threads_common.perfLastTimestamp) & 0x0fffU; - threads_common.perfLastTimestamp = now; - - (void)_cbuffer_write(&threads_common.perfBuffer, &ev, sizeof(ev)); - hal_spinlockClear(&threads_common.spinlock, &sc); -} - - -void perf_exec(process_t *p, char *path) -{ - perf_levent_exec_t ev; - time_t now; - size_t plen; - spinlock_ctx_t sc; - - if (threads_common.perfGather == 0) { - return; - } - - hal_spinlockSet(&threads_common.spinlock, &sc); - ev.sbz = 0; - ev.type = perf_levExec; - ev.tid = perf_idpack(proc_getTid(_proc_current())); - - plen = hal_strlen(path); - plen = min(plen, sizeof(ev.path) - 1U); - hal_memcpy(ev.path, path, plen); - ev.path[plen] = '\0'; - - now = _proc_gettimeRaw(); - ev.deltaTimestamp = (u16)(time_t)(now - threads_common.perfLastTimestamp) & 0x0fffU; - threads_common.perfLastTimestamp = now; - - (void)_cbuffer_write(&threads_common.perfBuffer, &ev, sizeof(ev) - sizeof(ev.path) + plen + 1U); - hal_spinlockClear(&threads_common.spinlock, &sc); -} - - -static void perf_bufferFree(void *data, page_t **pages) -{ - size_t sz = 0; - page_t *p; - - p = *pages; - while (p != NULL) { - *pages = p->next; - vm_pageFree(p); - sz += SIZE_PAGE; - p = *pages; - } - - (void)vm_munmap(threads_common.kmap, data, sz); -} - - -static void *perf_bufferAlloc(page_t **pages, size_t sz) -{ - page_t *p; - void *v, *data; - - *pages = NULL; - data = vm_mapFind(threads_common.kmap, NULL, sz, MAP_NONE, PROT_READ | PROT_WRITE); - - if (data == NULL) { - return NULL; - } - /* parasoft-suppress-next-line MISRAC2012-DIR_4_1-k "data will never be -1" */ - for (v = data; (ptr_t)v < (ptr_t)data + sz; v += SIZE_PAGE) { - p = vm_pageAlloc(SIZE_PAGE, PAGE_OWNER_APP); - - if (p == NULL) { - perf_bufferFree(data, pages); - return NULL; - } - - p->next = *pages; - *pages = p; - (void)page_map(&threads_common.kmap->pmap, v, p->addr, PGHD_PRESENT | PGHD_WRITE | PGHD_READ); - } - - return data; -} - - -int perf_threadsStart(unsigned int pid) -{ - void *data; - spinlock_ctx_t sc; - - if (pid == 0U) { - return -EINVAL; - } - - if (threads_common.perfGather == 1) { - return -EINVAL; - } - - /* Allocate 4M for events */ - data = perf_bufferAlloc(&threads_common.perfPages, 0x4UL << 20); - - if (data == NULL) { - return -ENOMEM; - } - - _cbuffer_init(&threads_common.perfBuffer, data, 0x4UL << 20); - - /* Start gathering events */ - hal_spinlockSet(&threads_common.spinlock, &sc); - threads_common.perfGather = 1; - threads_common.perfLastTimestamp = _proc_gettimeRaw(); - hal_spinlockClear(&threads_common.spinlock, &sc); - - return 1; -} - - -int perf_threadsRead(void *buffer, size_t bufsz) -{ - spinlock_ctx_t sc; - int ret; - - hal_spinlockSet(&threads_common.spinlock, &sc); - if (threads_common.perfGather != 0) { - ret = _cbuffer_read(&threads_common.perfBuffer, buffer, bufsz); - } - else { - ret = -EINVAL; - } - hal_spinlockClear(&threads_common.spinlock, &sc); - - return ret; -} - - -int perf_threadsFinish(void) -{ - spinlock_ctx_t sc; - int ret; - - hal_spinlockSet(&threads_common.spinlock, &sc); - if (threads_common.perfGather != 0) { - threads_common.perfGather = 0; - hal_spinlockClear(&threads_common.spinlock, &sc); - - perf_bufferFree(threads_common.perfBuffer.data, &threads_common.perfPages); - ret = EOK; - } - else { - hal_spinlockClear(&threads_common.spinlock, &sc); - ret = -EINVAL; - } - - return ret; -} - - /* * Time management */ @@ -514,7 +247,6 @@ static void thread_destroy(thread_t *thread) process_t *process; spinlock_ctx_t sc; - perf_end(thread); trace_eventThreadEnd(thread); /* No need to protect thread->locks access with threads_common.spinlock */ @@ -630,7 +362,7 @@ int _threads_schedule(unsigned int n, cpu_context_t *context, void *arg) /* Move thread to the end of queue */ if (current->state == READY) { LIST_ADD(&threads_common.ready[current->priority], current); - _perf_preempted(current); + _threads_preempted(current); } } @@ -692,7 +424,7 @@ int _threads_schedule(unsigned int n, cpu_context_t *context, void *arg) hal_cpuTlsSet(&selected->tls, selCtx); } - _perf_scheduling(selected); + _threads_scheduling(selected); hal_cpuRestore(context, selCtx); #if defined(STACK_CANARY) || !defined(NDEBUG) @@ -886,8 +618,7 @@ int proc_threadCreate(process_t *process, startFn_t start, int *id, u8 priority, /* Insert thread to scheduler queue */ - _perf_begin(t); - _perf_waking(t); + _threads_waking(t); LIST_ADD(&threads_common.ready[priority], t); hal_spinlockClear(&threads_common.spinlock, &sc); @@ -1134,7 +865,7 @@ static void _proc_threadDequeue(thread_t *t) return; } - _perf_waking(t); + _threads_waking(t); if (t->wait != NULL) { LIST_REMOVE(t->wait, t); @@ -1186,7 +917,7 @@ static void _proc_threadEnqueue(thread_t **queue, time_t timeout, u8 interruptib _threads_updateWakeup(_proc_gettimeRaw(), NULL); } - _perf_enqueued(current); + _threads_enqueued(current); } @@ -1220,7 +951,7 @@ static int _proc_threadSleepAbs(time_t abs, time_t now, spinlock_ctx_t *sc) (void)lib_rbInsert(&threads_common.sleeping, ¤t->sleeplinkage); - _perf_enqueued(current); + _threads_enqueued(current); _threads_updateWakeup(now, NULL); } @@ -2277,8 +2008,6 @@ int _threads_init(vm_map_t *kmap, vm_object_t *kernel) threads_common.idcounter = 0; threads_common.prev = 0; - threads_common.perfGather = 0; - (void)proc_lockInit(&threads_common.lock, &proc_lockAttrDefault, "threads.common"); for (i = 0U; i < sizeof(threads_common.stackCanary); ++i) { diff --git a/proc/threads.h b/proc/threads.h index 8d5b9fd65..a30f682c9 100644 --- a/proc/threads.h +++ b/proc/threads.h @@ -90,24 +90,6 @@ static inline int proc_getTid(const thread_t *t) } -int perf_threadsStart(unsigned int pid); - - -int perf_threadsRead(void *buffer, size_t bufsz); - - -int perf_threadsFinish(void); - - -void perf_fork(process_t *p); - - -void perf_kill(process_t *p); - - -void perf_exec(process_t *p, char *path); - - thread_t *proc_current(void);