[funini.com] -> [kei@sodan] -> Kernel Reading

root/fs/ubifs/shrinker.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. shrink_tnc
  2. shrink_tnc_trees
  3. kick_a_thread
  4. ubifs_shrinker

/*
 * This file is part of UBIFS.
 *
 * Copyright (C) 2006-2008 Nokia Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 as published by
 * the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 51
 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 *
 * Authors: Artem Bityutskiy (Битюцкий Артём)
 *          Adrian Hunter
 */

/*
 * This file implements UBIFS shrinker which evicts clean znodes from the TNC
 * tree when Linux VM needs more RAM.
 *
 * We do not implement any LRU lists to find oldest znodes to free because it
 * would add additional overhead to the file system fast paths. So the shrinker
 * just walks the TNC tree when searching for znodes to free.
 *
 * If the root of a TNC sub-tree is clean and old enough, then the children are
 * also clean and old enough. So the shrinker walks the TNC in level order and
 * dumps entire sub-trees.
 *
 * The age of znodes is just the time-stamp when they were last looked at.
 * The current shrinker first tries to evict old znodes, then young ones.
 *
 * Since the shrinker is global, it has to protect against races with FS
 * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
 */

#include "ubifs.h"

/* List of all UBIFS file-system instances */
LIST_HEAD(ubifs_infos);

/*
 * We number each shrinker run and record the number on the ubifs_info structure
 * so that we can easily work out which ubifs_info structures have already been
 * done by the current run.
 */
static unsigned int shrinker_run_no;

/* Protects 'ubifs_infos' list */
DEFINE_SPINLOCK(ubifs_infos_lock);

/* Global clean znode counter (for all mounted UBIFS instances) */
atomic_long_t ubifs_clean_zn_cnt;

/**
 * shrink_tnc - shrink TNC tree.
 * @c: UBIFS file-system description object
 * @nr: number of znodes to free
 * @age: the age of znodes to free
 * @contention: if any contention, this is set to %1
 *
 * This function traverses TNC tree and frees clean znodes. It does not free
 * clean znodes which younger then @age. Returns number of freed znodes.
 */
static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
{
        int total_freed = 0;
        struct ubifs_znode *znode, *zprev;
        int time = get_seconds();

        ubifs_assert(mutex_is_locked(&c->umount_mutex));
        ubifs_assert(mutex_is_locked(&c->tnc_mutex));

        if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
                return 0;

        /*
         * Traverse the TNC tree in levelorder manner, so that it is possible
         * to destroy large sub-trees. Indeed, if a znode is old, then all its
         * children are older or of the same age.
         *
         * Note, we are holding 'c->tnc_mutex', so we do not have to lock the
         * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
         * changed only when the 'c->tnc_mutex' is held.
         */
        zprev = NULL;
        znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
        while (znode && total_freed < nr &&
               atomic_long_read(&c->clean_zn_cnt) > 0) {
                int freed;

                /*
                 * If the znode is clean, but it is in the 'c->cnext' list, this
                 * means that this znode has just been written to flash as a
                 * part of commit and was marked clean. They will be removed
                 * from the list at end commit. We cannot change the list,
                 * because it is not protected by any mutex (design decision to
                 * make commit really independent and parallel to main I/O). So
                 * we just skip these znodes.
                 *
                 * Note, the 'clean_zn_cnt' counters are not updated until
                 * after the commit, so the UBIFS shrinker does not report
                 * the znodes which are in the 'c->cnext' list as freeable.
                 *
                 * Also note, if the root of a sub-tree is not in 'c->cnext',
                 * then the whole sub-tree is not in 'c->cnext' as well, so it
                 * is safe to dump whole sub-tree.
                 */

                if (znode->cnext) {
                        /*
                         * Very soon these znodes will be removed from the list
                         * and become freeable.
                         */
                        *contention = 1;
                } else if (!ubifs_zn_dirty(znode) &&
                           abs(time - znode->time) >= age) {
                        if (znode->parent)
                                znode->parent->zbranch[znode->iip].znode = NULL;
                        else
                                c->zroot.znode = NULL;

                        freed = ubifs_destroy_tnc_subtree(znode);
                        atomic_long_sub(freed, &ubifs_clean_zn_cnt);
                        atomic_long_sub(freed, &c->clean_zn_cnt);
                        ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0);
                        total_freed += freed;
                        znode = zprev;
                }

                if (unlikely(!c->zroot.znode))
                        break;

                zprev = znode;
                znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
                cond_resched();
        }

        return total_freed;
}

/**
 * shrink_tnc_trees - shrink UBIFS TNC trees.
 * @nr: number of znodes to free
 * @age: the age of znodes to free
 * @contention: if any contention, this is set to %1
 *
 * This function walks the list of mounted UBIFS file-systems and frees clean
 * znodes which are older then @age, until at least @nr znodes are freed.
 * Returns the number of freed znodes.
 */
static int shrink_tnc_trees(int nr, int age, int *contention)
{
        struct ubifs_info *c;
        struct list_head *p;
        unsigned int run_no;
        int freed = 0;

        spin_lock(&ubifs_infos_lock);
        do {
                run_no = ++shrinker_run_no;
        } while (run_no == 0);
        /* Iterate over all mounted UBIFS file-systems and try to shrink them */
        p = ubifs_infos.next;
        while (p != &ubifs_infos) {
                c = list_entry(p, struct ubifs_info, infos_list);
                /*
                 * We move the ones we do to the end of the list, so we stop
                 * when we see one we have already done.
                 */
                if (c->shrinker_run_no == run_no)
                        break;
                if (!mutex_trylock(&c->umount_mutex)) {
                        /* Some un-mount is in progress, try next FS */
                        *contention = 1;
                        p = p->next;
                        continue;
                }
                /*
                 * We're holding 'c->umount_mutex', so the file-system won't go
                 * away.
                 */
                if (!mutex_trylock(&c->tnc_mutex)) {
                        mutex_unlock(&c->umount_mutex);
                        *contention = 1;
                        p = p->next;
                        continue;
                }
                spin_unlock(&ubifs_infos_lock);
                /*
                 * OK, now we have TNC locked, the file-system cannot go away -
                 * it is safe to reap the cache.
                 */
                c->shrinker_run_no = run_no;
                freed += shrink_tnc(c, nr, age, contention);
                mutex_unlock(&c->tnc_mutex);
                spin_lock(&ubifs_infos_lock);
                /* Get the next list element before we move this one */
                p = p->next;
                /*
                 * Move this one to the end of the list to provide some
                 * fairness.
                 */
                list_del(&c->infos_list);
                list_add_tail(&c->infos_list, &ubifs_infos);
                mutex_unlock(&c->umount_mutex);
                if (freed >= nr)
                        break;
        }
        spin_unlock(&ubifs_infos_lock);
        return freed;
}

/**
 * kick_a_thread - kick a background thread to start commit.
 *
 * This function kicks a background thread to start background commit. Returns
 * %-1 if a thread was kicked or there is another reason to assume the memory
 * will soon be freed or become freeable. If there are no dirty znodes, returns
 * %0.
 */
static int kick_a_thread(void)
{
        int i;
        struct ubifs_info *c;

        /*
         * Iterate over all mounted UBIFS file-systems and find out if there is
         * already an ongoing commit operation there. If no, then iterate for
         * the second time and initiate background commit.
         */
        spin_lock(&ubifs_infos_lock);
        for (i = 0; i < 2; i++) {
                list_for_each_entry(c, &ubifs_infos, infos_list) {
                        long dirty_zn_cnt;

                        if (!mutex_trylock(&c->umount_mutex)) {
                                /*
                                 * Some un-mount is in progress, it will
                                 * certainly free memory, so just return.
                                 */
                                spin_unlock(&ubifs_infos_lock);
                                return -1;
                        }

                        dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);

                        if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
                            c->ro_media) {
                                mutex_unlock(&c->umount_mutex);
                                continue;
                        }

                        if (c->cmt_state != COMMIT_RESTING) {
                                spin_unlock(&ubifs_infos_lock);
                                mutex_unlock(&c->umount_mutex);
                                return -1;
                        }

                        if (i == 1) {
                                list_del(&c->infos_list);
                                list_add_tail(&c->infos_list, &ubifs_infos);
                                spin_unlock(&ubifs_infos_lock);

                                ubifs_request_bg_commit(c);
                                mutex_unlock(&c->umount_mutex);
                                return -1;
                        }
                        mutex_unlock(&c->umount_mutex);
                }
        }
        spin_unlock(&ubifs_infos_lock);

        return 0;
}

int ubifs_shrinker(int nr, gfp_t gfp_mask)
{
        int freed, contention = 0;
        long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);

        if (nr == 0)
                return clean_zn_cnt;

        if (!clean_zn_cnt) {
                /*
                 * No clean znodes, nothing to reap. All we can do in this case
                 * is to kick background threads to start commit, which will
                 * probably make clean znodes which, in turn, will be freeable.
                 * And we return -1 which means will make VM call us again
                 * later.
                 */
                dbg_tnc("no clean znodes, kick a thread");
                return kick_a_thread();
        }

        freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention);
        if (freed >= nr)
                goto out;

        dbg_tnc("not enough old znodes, try to free young ones");
        freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention);
        if (freed >= nr)
                goto out;

        dbg_tnc("not enough young znodes, free all");
        freed += shrink_tnc_trees(nr - freed, 0, &contention);

        if (!freed && contention) {
                dbg_tnc("freed nothing, but contention");
                return -1;
        }

out:
        dbg_tnc("%d znodes were freed, requested %d", freed, nr);
        return freed;
}

/* [<][>][^][v][top][bottom][index][help] */

[funini.com] -> [kei@sodan] -> Kernel Reading