[funini.com] -> [kei@sodan] -> Kernel Reading

root/kernel/time/timer_stats.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. reset_entries
  2. alloc_entry
  3. match_entries
  4. tstat_lookup
  5. timer_stats_update_stats
  6. print_name_offset
  7. tstats_show
  8. sync_access
  9. tstats_write
  10. tstats_open
  11. init_timer_stats
  12. init_tstats_procfs

/*
 * kernel/time/timer_stats.c
 *
 * Collect timer usage statistics.
 *
 * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar
 * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
 *
 * timer_stats is based on timer_top, a similar functionality which was part of
 * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the
 * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based
 * on dynamic allocation of the statistics entries and linear search based
 * lookup combined with a global lock, rather than the static array, hash
 * and per-CPU locking which is used by timer_stats. It was written for the
 * pre hrtimer kernel code and therefore did not take hrtimers into account.
 * Nevertheless it provided the base for the timer_stats implementation and
 * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks
 * for this effort.
 *
 * timer_top.c is
 *      Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus
 *      Written by Daniel Petrini <d.pensator@gmail.com>
 *      timer_top.c was released under the GNU General Public License version 2
 *
 * We export the addresses and counting of timer functions being called,
 * the pid and cmdline from the owner process if applicable.
 *
 * Start/stop data collection:
 * # echo [1|0] >/proc/timer_stats
 *
 * Display the information collected so far:
 * # cat /proc/timer_stats
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <linux/proc_fs.h>
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/kallsyms.h>

#include <asm/uaccess.h>

/*
 * This is our basic unit of interest: a timer expiry event identified
 * by the timer, its start/expire functions and the PID of the task that
 * started the timer. We count the number of times an event happens:
 */
struct entry {
        /*
         * Hash list:
         */
        struct entry            *next;

        /*
         * Hash keys:
         */
        void                    *timer;
        void                    *start_func;
        void                    *expire_func;
        pid_t                   pid;

        /*
         * Number of timeout events:
         */
        unsigned long           count;
        unsigned int            timer_flag;

        /*
         * We save the command-line string to preserve
         * this information past task exit:
         */
        char                    comm[TASK_COMM_LEN + 1];

} ____cacheline_aligned_in_smp;

/*
 * Spinlock protecting the tables - not taken during lookup:
 */
static DEFINE_SPINLOCK(table_lock);

/*
 * Per-CPU lookup locks for fast hash lookup:
 */
static DEFINE_PER_CPU(spinlock_t, lookup_lock);

/*
 * Mutex to serialize state changes with show-stats activities:
 */
static DEFINE_MUTEX(show_mutex);

/*
 * Collection status, active/inactive:
 */
static int __read_mostly active;

/*
 * Beginning/end timestamps of measurement:
 */
static ktime_t time_start, time_stop;

/*
 * tstat entry structs only get allocated while collection is
 * active and never freed during that time - this simplifies
 * things quite a bit.
 *
 * They get freed when a new collection period is started.
 */
#define MAX_ENTRIES_BITS        10
#define MAX_ENTRIES             (1UL << MAX_ENTRIES_BITS)

static unsigned long nr_entries;
static struct entry entries[MAX_ENTRIES];

static atomic_t overflow_count;

/*
 * The entries are in a hash-table, for fast lookup:
 */
#define TSTAT_HASH_BITS         (MAX_ENTRIES_BITS - 1)
#define TSTAT_HASH_SIZE         (1UL << TSTAT_HASH_BITS)
#define TSTAT_HASH_MASK         (TSTAT_HASH_SIZE - 1)

#define __tstat_hashfn(entry)                                           \
        (((unsigned long)(entry)->timer       ^                         \
          (unsigned long)(entry)->start_func  ^                         \
          (unsigned long)(entry)->expire_func ^                         \
          (unsigned long)(entry)->pid           ) & TSTAT_HASH_MASK)

#define tstat_hashentry(entry)  (tstat_hash_table + __tstat_hashfn(entry))

static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly;

static void reset_entries(void)
{
        nr_entries = 0;
        memset(entries, 0, sizeof(entries));
        memset(tstat_hash_table, 0, sizeof(tstat_hash_table));
        atomic_set(&overflow_count, 0);
}

static struct entry *alloc_entry(void)
{
        if (nr_entries >= MAX_ENTRIES)
                return NULL;

        return entries + nr_entries++;
}

static int match_entries(struct entry *entry1, struct entry *entry2)
{
        return entry1->timer       == entry2->timer       &&
               entry1->start_func  == entry2->start_func  &&
               entry1->expire_func == entry2->expire_func &&
               entry1->pid         == entry2->pid;
}

/*
 * Look up whether an entry matching this item is present
 * in the hash already. Must be called with irqs off and the
 * lookup lock held:
 */
static struct entry *tstat_lookup(struct entry *entry, char *comm)
{
        struct entry **head, *curr, *prev;

        head = tstat_hashentry(entry);
        curr = *head;

        /*
         * The fastpath is when the entry is already hashed,
         * we do this with the lookup lock held, but with the
         * table lock not held:
         */
        while (curr) {
                if (match_entries(curr, entry))
                        return curr;

                curr = curr->next;
        }
        /*
         * Slowpath: allocate, set up and link a new hash entry:
         */
        prev = NULL;
        curr = *head;

        spin_lock(&table_lock);
        /*
         * Make sure we have not raced with another CPU:
         */
        while (curr) {
                if (match_entries(curr, entry))
                        goto out_unlock;

                prev = curr;
                curr = curr->next;
        }

        curr = alloc_entry();
        if (curr) {
                *curr = *entry;
                curr->count = 0;
                curr->next = NULL;
                memcpy(curr->comm, comm, TASK_COMM_LEN);

                smp_mb(); /* Ensure that curr is initialized before insert */

                if (prev)
                        prev->next = curr;
                else
                        *head = curr;
        }
 out_unlock:
        spin_unlock(&table_lock);

        return curr;
}

/**
 * timer_stats_update_stats - Update the statistics for a timer.
 * @timer:      pointer to either a timer_list or a hrtimer
 * @pid:        the pid of the task which set up the timer
 * @startf:     pointer to the function which did the timer setup
 * @timerf:     pointer to the timer callback function of the timer
 * @comm:       name of the process which set up the timer
 *
 * When the timer is already registered, then the event counter is
 * incremented. Otherwise the timer is registered in a free slot.
 */
void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
                              void *timerf, char *comm,
                              unsigned int timer_flag)
{
        /*
         * It doesnt matter which lock we take:
         */
        spinlock_t *lock;
        struct entry *entry, input;
        unsigned long flags;

        if (likely(!active))
                return;

        lock = &per_cpu(lookup_lock, raw_smp_processor_id());

        input.timer = timer;
        input.start_func = startf;
        input.expire_func = timerf;
        input.pid = pid;
        input.timer_flag = timer_flag;

        spin_lock_irqsave(lock, flags);
        if (!active)
                goto out_unlock;

        entry = tstat_lookup(&input, comm);
        if (likely(entry))
                entry->count++;
        else
                atomic_inc(&overflow_count);

 out_unlock:
        spin_unlock_irqrestore(lock, flags);
}

static void print_name_offset(struct seq_file *m, unsigned long addr)
{
        char symname[KSYM_NAME_LEN];

        if (lookup_symbol_name(addr, symname) < 0)
                seq_printf(m, "<%p>", (void *)addr);
        else
                seq_printf(m, "%s", symname);
}

static int tstats_show(struct seq_file *m, void *v)
{
        struct timespec period;
        struct entry *entry;
        unsigned long ms;
        long events = 0;
        ktime_t time;
        int i;

        mutex_lock(&show_mutex);
        /*
         * If still active then calculate up to now:
         */
        if (active)
                time_stop = ktime_get();

        time = ktime_sub(time_stop, time_start);

        period = ktime_to_timespec(time);
        ms = period.tv_nsec / 1000000;

        seq_puts(m, "Timer Stats Version: v0.2\n");
        seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms);
        if (atomic_read(&overflow_count))
                seq_printf(m, "Overflow: %d entries\n",
                        atomic_read(&overflow_count));

        for (i = 0; i < nr_entries; i++) {
                entry = entries + i;
                if (entry->timer_flag & TIMER_STATS_FLAG_DEFERRABLE) {
                        seq_printf(m, "%4luD, %5d %-16s ",
                                entry->count, entry->pid, entry->comm);
                } else {
                        seq_printf(m, " %4lu, %5d %-16s ",
                                entry->count, entry->pid, entry->comm);
                }

                print_name_offset(m, (unsigned long)entry->start_func);
                seq_puts(m, " (");
                print_name_offset(m, (unsigned long)entry->expire_func);
                seq_puts(m, ")\n");

                events += entry->count;
        }

        ms += period.tv_sec * 1000;
        if (!ms)
                ms = 1;

        if (events && period.tv_sec)
                seq_printf(m, "%ld total events, %ld.%03ld events/sec\n",
                           events, events * 1000 / ms,
                           (events * 1000000 / ms) % 1000);
        else
                seq_printf(m, "%ld total events\n", events);

        mutex_unlock(&show_mutex);

        return 0;
}

/*
 * After a state change, make sure all concurrent lookup/update
 * activities have stopped:
 */
static void sync_access(void)
{
        unsigned long flags;
        int cpu;

        for_each_online_cpu(cpu) {
                spin_lock_irqsave(&per_cpu(lookup_lock, cpu), flags);
                /* nothing */
                spin_unlock_irqrestore(&per_cpu(lookup_lock, cpu), flags);
        }
}

static ssize_t tstats_write(struct file *file, const char __user *buf,
                            size_t count, loff_t *offs)
{
        char ctl[2];

        if (count != 2 || *offs)
                return -EINVAL;

        if (copy_from_user(ctl, buf, count))
                return -EFAULT;

        mutex_lock(&show_mutex);
        switch (ctl[0]) {
        case '0':
                if (active) {
                        active = 0;
                        time_stop = ktime_get();
                        sync_access();
                }
                break;
        case '1':
                if (!active) {
                        reset_entries();
                        time_start = ktime_get();
                        smp_mb();
                        active = 1;
                }
                break;
        default:
                count = -EINVAL;
        }
        mutex_unlock(&show_mutex);

        return count;
}

static int tstats_open(struct inode *inode, struct file *filp)
{
        return single_open(filp, tstats_show, NULL);
}

static struct file_operations tstats_fops = {
        .open           = tstats_open,
        .read           = seq_read,
        .write          = tstats_write,
        .llseek         = seq_lseek,
        .release        = single_release,
};

void __init init_timer_stats(void)
{
        int cpu;

        for_each_possible_cpu(cpu)
                spin_lock_init(&per_cpu(lookup_lock, cpu));
}

static int __init init_tstats_procfs(void)
{
        struct proc_dir_entry *pe;

        pe = proc_create("timer_stats", 0644, NULL, &tstats_fops);
        if (!pe)
                return -ENOMEM;
        return 0;
}
__initcall(init_tstats_procfs);

/* [<][>][^][v][top][bottom][index][help] */

[funini.com] -> [kei@sodan] -> Kernel Reading