source: rtems/cpukit/libfs/src/jffs2/src/nodemgmt.c @ d60c1665

5
Last change on this file since d60c1665 was d60c1665, checked in by Li Zefan <lizefan@…>, on 02/12/14 at 20:44:56

jffs2: avoid soft-lockup in jffs2_reserve_space_gc()

We triggered soft-lockup under stress test on 2.6.34 kernel.

BUG: soft lockup - CPU#1 stuck for 60009ms! [lockf2.test:14488]
...
[<bf09a4d4>] (jffs2_do_reserve_space+0x420/0x440 [jffs2])
[<bf09a528>] (jffs2_reserve_space_gc+0x34/0x78 [jffs2])
[<bf0a1350>] (jffs2_garbage_collect_dnode.isra.3+0x264/0x478 [jffs2])
[<bf0a2078>] (jffs2_garbage_collect_pass+0x9c0/0xe4c [jffs2])
[<bf09a670>] (jffs2_reserve_space+0x104/0x2a8 [jffs2])
[<bf09dc48>] (jffs2_write_inode_range+0x5c/0x4d4 [jffs2])
[<bf097d8c>] (jffs2_write_end+0x198/0x2c0 [jffs2])
[<c00e00a4>] (generic_file_buffered_write+0x158/0x200)
[<c00e14f4>] (generic_file_aio_write+0x3a4/0x414)
[<c00e15c0>] (generic_file_aio_write+0x5c/0xbc)
[<c012334c>] (do_sync_write+0x98/0xd4)
[<c0123a84>] (vfs_write+0xa8/0x150)
[<c0123d74>] (sys_write+0x3c/0xc0)]

Fix this by adding a cond_resched() in the while loop.

[akpm@…: don't initialize `ret']
Signed-off-by: Li Zefan <lizefan@…>
Cc: David Woodhouse <dwmw2@…>
Cc: Artem Bityutskiy <artem.bityutskiy@…>
Cc: <stable@…>
Signed-off-by: Andrew Morton <akpm@…>
Signed-off-by: Brian Norris <computersforpeace@…>

  • Property mode set to 100644
File size: 28.5 KB
Line 
1#include "rtems-jffs2-config.h"
2
3/*
4 * JFFS2 -- Journalling Flash File System, Version 2.
5 *
6 * Copyright © 2001-2007 Red Hat, Inc.
7 *
8 * Created by David Woodhouse <dwmw2@infradead.org>
9 *
10 * For licensing information, see the file 'LICENCE' in this directory.
11 *
12 */
13
14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
16#include <linux/kernel.h>
17#include <linux/mtd/mtd.h>
18#include <linux/compiler.h>
19#include <linux/sched.h> /* For cond_resched() */
20#include "nodelist.h"
21#include "debug.h"
22
23/*
24 * Check whether the user is allowed to write.
25 */
26static int jffs2_rp_can_write(struct jffs2_sb_info *c)
27{
28        uint32_t avail;
29        struct jffs2_mount_opts *opts = &c->mount_opts;
30
31        avail = c->dirty_size + c->free_size + c->unchecked_size +
32                c->erasing_size - c->resv_blocks_write * c->sector_size
33                - c->nospc_dirty_size;
34
35        if (avail < 2 * opts->rp_size)
36                jffs2_dbg(1, "rpsize %u, dirty_size %u, free_size %u, "
37                          "erasing_size %u, unchecked_size %u, "
38                          "nr_erasing_blocks %u, avail %u, resrv %u\n",
39                          opts->rp_size, c->dirty_size, c->free_size,
40                          c->erasing_size, c->unchecked_size,
41                          c->nr_erasing_blocks, avail, c->nospc_dirty_size);
42
43        if (avail > opts->rp_size)
44                return 1;
45
46        /* Always allow root */
47        if (capable(CAP_SYS_RESOURCE))
48                return 1;
49
50        jffs2_dbg(1, "forbid writing\n");
51        return 0;
52}
53
54/**
55 *      jffs2_reserve_space - request physical space to write nodes to flash
56 *      @c: superblock info
57 *      @minsize: Minimum acceptable size of allocation
58 *      @len: Returned value of allocation length
59 *      @prio: Allocation type - ALLOC_{NORMAL,DELETION}
60 *
61 *      Requests a block of physical space on the flash. Returns zero for success
62 *      and puts 'len' into the appropriate place, or returns -ENOSPC or other
63 *      error if appropriate. Doesn't return len since that's
64 *
65 *      If it returns zero, jffs2_reserve_space() also downs the per-filesystem
66 *      allocation semaphore, to prevent more than one allocation from being
67 *      active at any time. The semaphore is later released by jffs2_commit_allocation()
68 *
69 *      jffs2_reserve_space() may trigger garbage collection in order to make room
70 *      for the requested allocation.
71 */
72
73static int jffs2_do_reserve_space(struct jffs2_sb_info *c,  uint32_t minsize,
74                                  uint32_t *len, uint32_t sumsize);
75
76int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
77                        uint32_t *len, int prio, uint32_t sumsize)
78{
79        int ret = -EAGAIN;
80        int blocksneeded = c->resv_blocks_write;
81        /* align it */
82        minsize = PAD(minsize);
83
84        jffs2_dbg(1, "%s(): Requested 0x%x bytes\n", __func__, minsize);
85        mutex_lock(&c->alloc_sem);
86
87        jffs2_dbg(1, "%s(): alloc sem got\n", __func__);
88
89        spin_lock(&c->erase_completion_lock);
90
91        /*
92         * Check if the free space is greater then size of the reserved pool.
93         * If not, only allow root to proceed with writing.
94         */
95        if (prio != ALLOC_DELETION && !jffs2_rp_can_write(c)) {
96                ret = -ENOSPC;
97                goto out;
98        }
99
100        /* this needs a little more thought (true <tglx> :)) */
101        while(ret == -EAGAIN) {
102                while(c->nr_free_blocks + c->nr_erasing_blocks < blocksneeded) {
103                        uint32_t dirty, avail;
104
105                        /* calculate real dirty size
106                         * dirty_size contains blocks on erase_pending_list
107                         * those blocks are counted in c->nr_erasing_blocks.
108                         * If one block is actually erased, it is not longer counted as dirty_space
109                         * but it is counted in c->nr_erasing_blocks, so we add it and subtract it
110                         * with c->nr_erasing_blocks * c->sector_size again.
111                         * Blocks on erasable_list are counted as dirty_size, but not in c->nr_erasing_blocks
112                         * This helps us to force gc and pick eventually a clean block to spread the load.
113                         * We add unchecked_size here, as we hopefully will find some space to use.
114                         * This will affect the sum only once, as gc first finishes checking
115                         * of nodes.
116                         */
117                        dirty = c->dirty_size + c->erasing_size - c->nr_erasing_blocks * c->sector_size + c->unchecked_size;
118                        if (dirty < c->nospc_dirty_size) {
119                                if (prio == ALLOC_DELETION && c->nr_free_blocks + c->nr_erasing_blocks >= c->resv_blocks_deletion) {
120                                        jffs2_dbg(1, "%s(): Low on dirty space to GC, but it's a deletion. Allowing...\n",
121                                                  __func__);
122                                        break;
123                                }
124                                jffs2_dbg(1, "dirty size 0x%08x + unchecked_size 0x%08x < nospc_dirty_size 0x%08x, returning -ENOSPC\n",
125                                          dirty, c->unchecked_size,
126                                          c->sector_size);
127
128                                spin_unlock(&c->erase_completion_lock);
129                                mutex_unlock(&c->alloc_sem);
130                                return -ENOSPC;
131                        }
132
133                        /* Calc possibly available space. Possibly available means that we
134                         * don't know, if unchecked size contains obsoleted nodes, which could give us some
135                         * more usable space. This will affect the sum only once, as gc first finishes checking
136                         * of nodes.
137                         + Return -ENOSPC, if the maximum possibly available space is less or equal than
138                         * blocksneeded * sector_size.
139                         * This blocks endless gc looping on a filesystem, which is nearly full, even if
140                         * the check above passes.
141                         */
142                        avail = c->free_size + c->dirty_size + c->erasing_size + c->unchecked_size;
143                        if ( (avail / c->sector_size) <= blocksneeded) {
144                                if (prio == ALLOC_DELETION && c->nr_free_blocks + c->nr_erasing_blocks >= c->resv_blocks_deletion) {
145                                        jffs2_dbg(1, "%s(): Low on possibly available space, but it's a deletion. Allowing...\n",
146                                                  __func__);
147                                        break;
148                                }
149
150                                jffs2_dbg(1, "max. available size 0x%08x  < blocksneeded * sector_size 0x%08x, returning -ENOSPC\n",
151                                          avail, blocksneeded * c->sector_size);
152                                spin_unlock(&c->erase_completion_lock);
153                                mutex_unlock(&c->alloc_sem);
154                                return -ENOSPC;
155                        }
156
157                        mutex_unlock(&c->alloc_sem);
158
159                        jffs2_dbg(1, "Triggering GC pass. nr_free_blocks %d, nr_erasing_blocks %d, free_size 0x%08x, dirty_size 0x%08x, wasted_size 0x%08x, used_size 0x%08x, erasing_size 0x%08x, bad_size 0x%08x (total 0x%08x of 0x%08x)\n",
160                                  c->nr_free_blocks, c->nr_erasing_blocks,
161                                  c->free_size, c->dirty_size, c->wasted_size,
162                                  c->used_size, c->erasing_size, c->bad_size,
163                                  c->free_size + c->dirty_size +
164                                  c->wasted_size + c->used_size +
165                                  c->erasing_size + c->bad_size,
166                                  c->flash_size);
167                        spin_unlock(&c->erase_completion_lock);
168
169                        ret = jffs2_garbage_collect_pass(c);
170
171                        if (ret == -EAGAIN) {
172                                spin_lock(&c->erase_completion_lock);
173                                if (c->nr_erasing_blocks &&
174                                    list_empty(&c->erase_pending_list) &&
175                                    list_empty(&c->erase_complete_list)) {
176                                        DECLARE_WAITQUEUE(wait, current);
177                                        set_current_state(TASK_UNINTERRUPTIBLE);
178                                        add_wait_queue(&c->erase_wait, &wait);
179                                        jffs2_dbg(1, "%s waiting for erase to complete\n",
180                                                  __func__);
181                                        spin_unlock(&c->erase_completion_lock);
182
183                                        schedule();
184                                        remove_wait_queue(&c->erase_wait, &wait);
185                                } else
186                                        spin_unlock(&c->erase_completion_lock);
187                        } else if (ret)
188                                return ret;
189
190                        cond_resched();
191
192                        if (signal_pending(current))
193                                return -EINTR;
194
195                        mutex_lock(&c->alloc_sem);
196                        spin_lock(&c->erase_completion_lock);
197                }
198
199                ret = jffs2_do_reserve_space(c, minsize, len, sumsize);
200                if (ret) {
201                        jffs2_dbg(1, "%s(): ret is %d\n", __func__, ret);
202                }
203        }
204
205out:
206        spin_unlock(&c->erase_completion_lock);
207        if (!ret)
208                ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, 1);
209        if (ret)
210                mutex_unlock(&c->alloc_sem);
211        return ret;
212}
213
214int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize,
215                           uint32_t *len, uint32_t sumsize)
216{
217        int ret;
218        minsize = PAD(minsize);
219
220        jffs2_dbg(1, "%s(): Requested 0x%x bytes\n", __func__, minsize);
221
222        while (true) {
223                spin_lock(&c->erase_completion_lock);
224                ret = jffs2_do_reserve_space(c, minsize, len, sumsize);
225                if (ret) {
226                        jffs2_dbg(1, "%s(): looping, ret is %d\n",
227                                  __func__, ret);
228                }
229                spin_unlock(&c->erase_completion_lock);
230
231                if (ret == -EAGAIN)
232                        cond_resched();
233                else
234                        break;
235        }
236        if (!ret)
237                ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, 1);
238
239        return ret;
240}
241
242
243/* Classify nextblock (clean, dirty of verydirty) and force to select an other one */
244
245static void jffs2_close_nextblock(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
246{
247
248        if (c->nextblock == NULL) {
249                jffs2_dbg(1, "%s(): Erase block at 0x%08x has already been placed in a list\n",
250                          __func__, jeb->offset);
251                return;
252        }
253        /* Check, if we have a dirty block now, or if it was dirty already */
254        if (ISDIRTY (jeb->wasted_size + jeb->dirty_size)) {
255                c->dirty_size += jeb->wasted_size;
256                c->wasted_size -= jeb->wasted_size;
257                jeb->dirty_size += jeb->wasted_size;
258                jeb->wasted_size = 0;
259                if (VERYDIRTY(c, jeb->dirty_size)) {
260                        jffs2_dbg(1, "Adding full erase block at 0x%08x to very_dirty_list (free 0x%08x, dirty 0x%08x, used 0x%08x\n",
261                                  jeb->offset, jeb->free_size, jeb->dirty_size,
262                                  jeb->used_size);
263                        list_add_tail(&jeb->list, &c->very_dirty_list);
264                } else {
265                        jffs2_dbg(1, "Adding full erase block at 0x%08x to dirty_list (free 0x%08x, dirty 0x%08x, used 0x%08x\n",
266                                  jeb->offset, jeb->free_size, jeb->dirty_size,
267                                  jeb->used_size);
268                        list_add_tail(&jeb->list, &c->dirty_list);
269                }
270        } else {
271                jffs2_dbg(1, "Adding full erase block at 0x%08x to clean_list (free 0x%08x, dirty 0x%08x, used 0x%08x\n",
272                          jeb->offset, jeb->free_size, jeb->dirty_size,
273                          jeb->used_size);
274                list_add_tail(&jeb->list, &c->clean_list);
275        }
276        c->nextblock = NULL;
277
278}
279
280/* Select a new jeb for nextblock */
281
282static int jffs2_find_nextblock(struct jffs2_sb_info *c)
283{
284        struct list_head *next;
285
286        /* Take the next block off the 'free' list */
287
288        if (list_empty(&c->free_list)) {
289
290                if (!c->nr_erasing_blocks &&
291                        !list_empty(&c->erasable_list)) {
292                        struct jffs2_eraseblock *ejeb;
293
294                        ejeb = list_entry(c->erasable_list.next, struct jffs2_eraseblock, list);
295                        list_move_tail(&ejeb->list, &c->erase_pending_list);
296                        c->nr_erasing_blocks++;
297                        jffs2_garbage_collect_trigger(c);
298                        jffs2_dbg(1, "%s(): Triggering erase of erasable block at 0x%08x\n",
299                                  __func__, ejeb->offset);
300                }
301
302                if (!c->nr_erasing_blocks &&
303                        !list_empty(&c->erasable_pending_wbuf_list)) {
304                        jffs2_dbg(1, "%s(): Flushing write buffer\n",
305                                  __func__);
306                        /* c->nextblock is NULL, no update to c->nextblock allowed */
307                        spin_unlock(&c->erase_completion_lock);
308                        jffs2_flush_wbuf_pad(c);
309                        spin_lock(&c->erase_completion_lock);
310                        /* Have another go. It'll be on the erasable_list now */
311                        return -EAGAIN;
312                }
313
314                if (!c->nr_erasing_blocks) {
315                        /* Ouch. We're in GC, or we wouldn't have got here.
316                           And there's no space left. At all. */
317                        pr_crit("Argh. No free space left for GC. nr_erasing_blocks is %d. nr_free_blocks is %d. (erasableempty: %s, erasingempty: %s, erasependingempty: %s)\n",
318                                c->nr_erasing_blocks, c->nr_free_blocks,
319                                list_empty(&c->erasable_list) ? "yes" : "no",
320                                list_empty(&c->erasing_list) ? "yes" : "no",
321                                list_empty(&c->erase_pending_list) ? "yes" : "no");
322                        return -ENOSPC;
323                }
324
325                spin_unlock(&c->erase_completion_lock);
326                /* Don't wait for it; just erase one right now */
327                jffs2_erase_pending_blocks(c, 1);
328                spin_lock(&c->erase_completion_lock);
329
330                /* An erase may have failed, decreasing the
331                   amount of free space available. So we must
332                   restart from the beginning */
333                return -EAGAIN;
334        }
335
336        next = c->free_list.next;
337        list_del(next);
338        c->nextblock = list_entry(next, struct jffs2_eraseblock, list);
339        c->nr_free_blocks--;
340
341        jffs2_sum_reset_collected(c->summary); /* reset collected summary */
342
343#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
344        /* adjust write buffer offset, else we get a non contiguous write bug */
345        if (!(c->wbuf_ofs % c->sector_size) && !c->wbuf_len)
346                c->wbuf_ofs = 0xffffffff;
347#endif
348
349        jffs2_dbg(1, "%s(): new nextblock = 0x%08x\n",
350                  __func__, c->nextblock->offset);
351
352        return 0;
353}
354
355/* Called with alloc sem _and_ erase_completion_lock */
356static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
357                                  uint32_t *len, uint32_t sumsize)
358{
359        struct jffs2_eraseblock *jeb = c->nextblock;
360        uint32_t reserved_size;                         /* for summary information at the end of the jeb */
361        int ret;
362
363 restart:
364        reserved_size = 0;
365
366        if (jffs2_sum_active() && (sumsize != JFFS2_SUMMARY_NOSUM_SIZE)) {
367                                                        /* NOSUM_SIZE means not to generate summary */
368
369                if (jeb) {
370                        reserved_size = PAD(sumsize + c->summary->sum_size + JFFS2_SUMMARY_FRAME_SIZE);
371                        dbg_summary("minsize=%d , jeb->free=%d ,"
372                                                "summary->size=%d , sumsize=%d\n",
373                                                minsize, jeb->free_size,
374                                                c->summary->sum_size, sumsize);
375                }
376
377                /* Is there enough space for writing out the current node, or we have to
378                   write out summary information now, close this jeb and select new nextblock? */
379                if (jeb && (PAD(minsize) + PAD(c->summary->sum_size + sumsize +
380                                        JFFS2_SUMMARY_FRAME_SIZE) > jeb->free_size)) {
381
382                        /* Has summary been disabled for this jeb? */
383                        if (jffs2_sum_is_disabled(c->summary)) {
384                                sumsize = JFFS2_SUMMARY_NOSUM_SIZE;
385                                goto restart;
386                        }
387
388                        /* Writing out the collected summary information */
389                        dbg_summary("generating summary for 0x%08x.\n", jeb->offset);
390                        ret = jffs2_sum_write_sumnode(c);
391
392                        if (ret)
393                                return ret;
394
395                        if (jffs2_sum_is_disabled(c->summary)) {
396                                /* jffs2_write_sumnode() couldn't write out the summary information
397                                   diabling summary for this jeb and free the collected information
398                                 */
399                                sumsize = JFFS2_SUMMARY_NOSUM_SIZE;
400                                goto restart;
401                        }
402
403                        jffs2_close_nextblock(c, jeb);
404                        jeb = NULL;
405                        /* keep always valid value in reserved_size */
406                        reserved_size = PAD(sumsize + c->summary->sum_size + JFFS2_SUMMARY_FRAME_SIZE);
407                }
408        } else {
409                if (jeb && minsize > jeb->free_size) {
410                        uint32_t waste;
411
412                        /* Skip the end of this block and file it as having some dirty space */
413                        /* If there's a pending write to it, flush now */
414
415                        if (jffs2_wbuf_dirty(c)) {
416                                spin_unlock(&c->erase_completion_lock);
417                                jffs2_dbg(1, "%s(): Flushing write buffer\n",
418                                          __func__);
419                                jffs2_flush_wbuf_pad(c);
420                                spin_lock(&c->erase_completion_lock);
421                                jeb = c->nextblock;
422                                goto restart;
423                        }
424
425                        spin_unlock(&c->erase_completion_lock);
426
427                        ret = jffs2_prealloc_raw_node_refs(c, jeb, 1);
428
429                        /* Just lock it again and continue. Nothing much can change because
430                           we hold c->alloc_sem anyway. In fact, it's not entirely clear why
431                           we hold c->erase_completion_lock in the majority of this function...
432                           but that's a question for another (more caffeine-rich) day. */
433                        spin_lock(&c->erase_completion_lock);
434
435                        if (ret)
436                                return ret;
437
438                        waste = jeb->free_size;
439                        jffs2_link_node_ref(c, jeb,
440                                            (jeb->offset + c->sector_size - waste) | REF_OBSOLETE,
441                                            waste, NULL);
442                        /* FIXME: that made it count as dirty. Convert to wasted */
443                        jeb->dirty_size -= waste;
444                        c->dirty_size -= waste;
445                        jeb->wasted_size += waste;
446                        c->wasted_size += waste;
447
448                        jffs2_close_nextblock(c, jeb);
449                        jeb = NULL;
450                }
451        }
452
453        if (!jeb) {
454
455                ret = jffs2_find_nextblock(c);
456                if (ret)
457                        return ret;
458
459                jeb = c->nextblock;
460
461                if (jeb->free_size != c->sector_size - c->cleanmarker_size) {
462                        pr_warn("Eep. Block 0x%08x taken from free_list had free_size of 0x%08x!!\n",
463                                jeb->offset, jeb->free_size);
464                        goto restart;
465                }
466        }
467        /* OK, jeb (==c->nextblock) is now pointing at a block which definitely has
468           enough space */
469        *len = jeb->free_size - reserved_size;
470
471        if (c->cleanmarker_size && jeb->used_size == c->cleanmarker_size &&
472            !jeb->first_node->next_in_ino) {
473                /* Only node in it beforehand was a CLEANMARKER node (we think).
474                   So mark it obsolete now that there's going to be another node
475                   in the block. This will reduce used_size to zero but We've
476                   already set c->nextblock so that jffs2_mark_node_obsolete()
477                   won't try to refile it to the dirty_list.
478                */
479                spin_unlock(&c->erase_completion_lock);
480                jffs2_mark_node_obsolete(c, jeb->first_node);
481                spin_lock(&c->erase_completion_lock);
482        }
483
484        jffs2_dbg(1, "%s(): Giving 0x%x bytes at 0x%x\n",
485                  __func__,
486                  *len, jeb->offset + (c->sector_size - jeb->free_size));
487        return 0;
488}
489
490/**
491 *      jffs2_add_physical_node_ref - add a physical node reference to the list
492 *      @c: superblock info
493 *      @new: new node reference to add
494 *      @len: length of this physical node
495 *
496 *      Should only be used to report nodes for which space has been allocated
497 *      by jffs2_reserve_space.
498 *
499 *      Must be called with the alloc_sem held.
500 */
501
502struct jffs2_raw_node_ref *jffs2_add_physical_node_ref(struct jffs2_sb_info *c,
503                                                       uint32_t ofs, uint32_t len,
504                                                       struct jffs2_inode_cache *ic)
505{
506        struct jffs2_eraseblock *jeb;
507        struct jffs2_raw_node_ref *new;
508
509        jeb = &c->blocks[ofs / c->sector_size];
510
511        jffs2_dbg(1, "%s(): Node at 0x%x(%d), size 0x%x\n",
512                  __func__, ofs & ~3, ofs & 3, len);
513#if 1
514        /* Allow non-obsolete nodes only to be added at the end of c->nextblock,
515           if c->nextblock is set. Note that wbuf.c will file obsolete nodes
516           even after refiling c->nextblock */
517        if ((c->nextblock || ((ofs & 3) != REF_OBSOLETE))
518            && (jeb != c->nextblock || (ofs & ~3) != jeb->offset + (c->sector_size - jeb->free_size))) {
519                pr_warn("argh. node added in wrong place at 0x%08x(%d)\n",
520                        ofs & ~3, ofs & 3);
521                if (c->nextblock)
522                        pr_warn("nextblock 0x%08x", c->nextblock->offset);
523                else
524                        pr_warn("No nextblock");
525                pr_cont(", expected at %08x\n",
526                        jeb->offset + (c->sector_size - jeb->free_size));
527                return ERR_PTR(-EINVAL);
528        }
529#endif
530        spin_lock(&c->erase_completion_lock);
531
532        new = jffs2_link_node_ref(c, jeb, ofs, len, ic);
533
534        if (!jeb->free_size && !jeb->dirty_size && !ISDIRTY(jeb->wasted_size)) {
535                /* If it lives on the dirty_list, jffs2_reserve_space will put it there */
536                jffs2_dbg(1, "Adding full erase block at 0x%08x to clean_list (free 0x%08x, dirty 0x%08x, used 0x%08x\n",
537                          jeb->offset, jeb->free_size, jeb->dirty_size,
538                          jeb->used_size);
539                if (jffs2_wbuf_dirty(c)) {
540                        /* Flush the last write in the block if it's outstanding */
541                        spin_unlock(&c->erase_completion_lock);
542                        jffs2_flush_wbuf_pad(c);
543                        spin_lock(&c->erase_completion_lock);
544                }
545
546                list_add_tail(&jeb->list, &c->clean_list);
547                c->nextblock = NULL;
548        }
549        jffs2_dbg_acct_sanity_check_nolock(c,jeb);
550        jffs2_dbg_acct_paranoia_check_nolock(c, jeb);
551
552        spin_unlock(&c->erase_completion_lock);
553
554        return new;
555}
556
557
558void jffs2_complete_reservation(struct jffs2_sb_info *c)
559{
560        jffs2_dbg(1, "jffs2_complete_reservation()\n");
561        spin_lock(&c->erase_completion_lock);
562        jffs2_garbage_collect_trigger(c);
563        spin_unlock(&c->erase_completion_lock);
564        mutex_unlock(&c->alloc_sem);
565}
566
567static inline int on_list(struct list_head *obj, struct list_head *head)
568{
569        struct list_head *this;
570
571        list_for_each(this, head) {
572                if (this == obj) {
573                        jffs2_dbg(1, "%p is on list at %p\n", obj, head);
574                        return 1;
575
576                }
577        }
578        return 0;
579}
580
581void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref)
582{
583        struct jffs2_eraseblock *jeb;
584        int blocknr;
585        struct jffs2_unknown_node n;
586        int ret, addedsize;
587        size_t retlen;
588        uint32_t freed_len;
589
590        if(unlikely(!ref)) {
591                pr_notice("EEEEEK. jffs2_mark_node_obsolete called with NULL node\n");
592                return;
593        }
594        if (ref_obsolete(ref)) {
595                jffs2_dbg(1, "%s(): called with already obsolete node at 0x%08x\n",
596                          __func__, ref_offset(ref));
597                return;
598        }
599        blocknr = ref->flash_offset / c->sector_size;
600        if (blocknr >= c->nr_blocks) {
601                pr_notice("raw node at 0x%08x is off the end of device!\n",
602                          ref->flash_offset);
603                BUG();
604        }
605        jeb = &c->blocks[blocknr];
606
607        if (jffs2_can_mark_obsolete(c) && !jffs2_is_readonly(c) &&
608            !(c->flags & (JFFS2_SB_FLAG_SCANNING | JFFS2_SB_FLAG_BUILDING))) {
609                /* Hm. This may confuse static lock analysis. If any of the above
610                   three conditions is false, we're going to return from this
611                   function without actually obliterating any nodes or freeing
612                   any jffs2_raw_node_refs. So we don't need to stop erases from
613                   happening, or protect against people holding an obsolete
614                   jffs2_raw_node_ref without the erase_completion_lock. */
615                mutex_lock(&c->erase_free_sem);
616        }
617
618        spin_lock(&c->erase_completion_lock);
619
620        freed_len = ref_totlen(c, jeb, ref);
621
622        if (ref_flags(ref) == REF_UNCHECKED) {
623                D1(if (unlikely(jeb->unchecked_size < freed_len)) {
624                                pr_notice("raw unchecked node of size 0x%08x freed from erase block %d at 0x%08x, but unchecked_size was already 0x%08x\n",
625                                          freed_len, blocknr,
626                                          ref->flash_offset, jeb->used_size);
627                        BUG();
628                })
629                        jffs2_dbg(1, "Obsoleting previously unchecked node at 0x%08x of len %x\n",
630                                  ref_offset(ref), freed_len);
631                jeb->unchecked_size -= freed_len;
632                c->unchecked_size -= freed_len;
633        } else {
634                D1(if (unlikely(jeb->used_size < freed_len)) {
635                                pr_notice("raw node of size 0x%08x freed from erase block %d at 0x%08x, but used_size was already 0x%08x\n",
636                                          freed_len, blocknr,
637                                          ref->flash_offset, jeb->used_size);
638                        BUG();
639                })
640                        jffs2_dbg(1, "Obsoleting node at 0x%08x of len %#x: ",
641                                  ref_offset(ref), freed_len);
642                jeb->used_size -= freed_len;
643                c->used_size -= freed_len;
644        }
645
646        // Take care, that wasted size is taken into concern
647        if ((jeb->dirty_size || ISDIRTY(jeb->wasted_size + freed_len)) && jeb != c->nextblock) {
648                jffs2_dbg(1, "Dirtying\n");
649                addedsize = freed_len;
650                jeb->dirty_size += freed_len;
651                c->dirty_size += freed_len;
652
653                /* Convert wasted space to dirty, if not a bad block */
654                if (jeb->wasted_size) {
655                        if (on_list(&jeb->list, &c->bad_used_list)) {
656                                jffs2_dbg(1, "Leaving block at %08x on the bad_used_list\n",
657                                          jeb->offset);
658                                addedsize = 0; /* To fool the refiling code later */
659                        } else {
660                                jffs2_dbg(1, "Converting %d bytes of wasted space to dirty in block at %08x\n",
661                                          jeb->wasted_size, jeb->offset);
662                                addedsize += jeb->wasted_size;
663                                jeb->dirty_size += jeb->wasted_size;
664                                c->dirty_size += jeb->wasted_size;
665                                c->wasted_size -= jeb->wasted_size;
666                                jeb->wasted_size = 0;
667                        }
668                }
669        } else {
670                jffs2_dbg(1, "Wasting\n");
671                addedsize = 0;
672                jeb->wasted_size += freed_len;
673                c->wasted_size += freed_len;
674        }
675        ref->flash_offset = ref_offset(ref) | REF_OBSOLETE;
676
677        jffs2_dbg_acct_sanity_check_nolock(c, jeb);
678        jffs2_dbg_acct_paranoia_check_nolock(c, jeb);
679
680        if (c->flags & JFFS2_SB_FLAG_SCANNING) {
681                /* Flash scanning is in progress. Don't muck about with the block
682                   lists because they're not ready yet, and don't actually
683                   obliterate nodes that look obsolete. If they weren't
684                   marked obsolete on the flash at the time they _became_
685                   obsolete, there was probably a reason for that. */
686                spin_unlock(&c->erase_completion_lock);
687                /* We didn't lock the erase_free_sem */
688                return;
689        }
690
691        if (jeb == c->nextblock) {
692                jffs2_dbg(2, "Not moving nextblock 0x%08x to dirty/erase_pending list\n",
693                          jeb->offset);
694        } else if (!jeb->used_size && !jeb->unchecked_size) {
695                if (jeb == c->gcblock) {
696                        jffs2_dbg(1, "gcblock at 0x%08x completely dirtied. Clearing gcblock...\n",
697                                  jeb->offset);
698                        c->gcblock = NULL;
699                } else {
700                        jffs2_dbg(1, "Eraseblock at 0x%08x completely dirtied. Removing from (dirty?) list...\n",
701                                  jeb->offset);
702                        list_del(&jeb->list);
703                }
704                if (jffs2_wbuf_dirty(c)) {
705                        jffs2_dbg(1, "...and adding to erasable_pending_wbuf_list\n");
706                        list_add_tail(&jeb->list, &c->erasable_pending_wbuf_list);
707                } else {
708                        if (jiffies & 127) {
709                                /* Most of the time, we just erase it immediately. Otherwise we
710                                   spend ages scanning it on mount, etc. */
711                                jffs2_dbg(1, "...and adding to erase_pending_list\n");
712                                list_add_tail(&jeb->list, &c->erase_pending_list);
713                                c->nr_erasing_blocks++;
714                                jffs2_garbage_collect_trigger(c);
715                        } else {
716                                /* Sometimes, however, we leave it elsewhere so it doesn't get
717                                   immediately reused, and we spread the load a bit. */
718                                jffs2_dbg(1, "...and adding to erasable_list\n");
719                                list_add_tail(&jeb->list, &c->erasable_list);
720                        }
721                }
722                jffs2_dbg(1, "Done OK\n");
723        } else if (jeb == c->gcblock) {
724                jffs2_dbg(2, "Not moving gcblock 0x%08x to dirty_list\n",
725                          jeb->offset);
726        } else if (ISDIRTY(jeb->dirty_size) && !ISDIRTY(jeb->dirty_size - addedsize)) {
727                jffs2_dbg(1, "Eraseblock at 0x%08x is freshly dirtied. Removing from clean list...\n",
728                          jeb->offset);
729                list_del(&jeb->list);
730                jffs2_dbg(1, "...and adding to dirty_list\n");
731                list_add_tail(&jeb->list, &c->dirty_list);
732        } else if (VERYDIRTY(c, jeb->dirty_size) &&
733                   !VERYDIRTY(c, jeb->dirty_size - addedsize)) {
734                jffs2_dbg(1, "Eraseblock at 0x%08x is now very dirty. Removing from dirty list...\n",
735                          jeb->offset);
736                list_del(&jeb->list);
737                jffs2_dbg(1, "...and adding to very_dirty_list\n");
738                list_add_tail(&jeb->list, &c->very_dirty_list);
739        } else {
740                jffs2_dbg(1, "Eraseblock at 0x%08x not moved anywhere. (free 0x%08x, dirty 0x%08x, used 0x%08x)\n",
741                          jeb->offset, jeb->free_size, jeb->dirty_size,
742                          jeb->used_size);
743        }
744
745        spin_unlock(&c->erase_completion_lock);
746
747        if (!jffs2_can_mark_obsolete(c) || jffs2_is_readonly(c) ||
748                (c->flags & JFFS2_SB_FLAG_BUILDING)) {
749                /* We didn't lock the erase_free_sem */
750                return;
751        }
752
753        /* The erase_free_sem is locked, and has been since before we marked the node obsolete
754           and potentially put its eraseblock onto the erase_pending_list. Thus, we know that
755           the block hasn't _already_ been erased, and that 'ref' itself hasn't been freed yet
756           by jffs2_free_jeb_node_refs() in erase.c. Which is nice. */
757
758        jffs2_dbg(1, "obliterating obsoleted node at 0x%08x\n",
759                  ref_offset(ref));
760        ret = jffs2_flash_read(c, ref_offset(ref), sizeof(n), &retlen, (char *)&n);
761        if (ret) {
762                pr_warn("Read error reading from obsoleted node at 0x%08x: %d\n",
763                        ref_offset(ref), ret);
764                goto out_erase_sem;
765        }
766        if (retlen != sizeof(n)) {
767                pr_warn("Short read from obsoleted node at 0x%08x: %zd\n",
768                        ref_offset(ref), retlen);
769                goto out_erase_sem;
770        }
771        if (PAD(je32_to_cpu(n.totlen)) != PAD(freed_len)) {
772                pr_warn("Node totlen on flash (0x%08x) != totlen from node ref (0x%08x)\n",
773                        je32_to_cpu(n.totlen), freed_len);
774                goto out_erase_sem;
775        }
776        if (!(je16_to_cpu(n.nodetype) & JFFS2_NODE_ACCURATE)) {
777                jffs2_dbg(1, "Node at 0x%08x was already marked obsolete (nodetype 0x%04x)\n",
778                          ref_offset(ref), je16_to_cpu(n.nodetype));
779                goto out_erase_sem;
780        }
781        /* XXX FIXME: This is ugly now */
782        n.nodetype = cpu_to_je16(je16_to_cpu(n.nodetype) & ~JFFS2_NODE_ACCURATE);
783        ret = jffs2_flash_write(c, ref_offset(ref), sizeof(n), &retlen, (char *)&n);
784        if (ret) {
785                pr_warn("Write error in obliterating obsoleted node at 0x%08x: %d\n",
786                        ref_offset(ref), ret);
787                goto out_erase_sem;
788        }
789        if (retlen != sizeof(n)) {
790                pr_warn("Short write in obliterating obsoleted node at 0x%08x: %zd\n",
791                        ref_offset(ref), retlen);
792                goto out_erase_sem;
793        }
794
795        /* Nodes which have been marked obsolete no longer need to be
796           associated with any inode. Remove them from the per-inode list.
797
798           Note we can't do this for NAND at the moment because we need
799           obsolete dirent nodes to stay on the lists, because of the
800           horridness in jffs2_garbage_collect_deletion_dirent(). Also
801           because we delete the inocache, and on NAND we need that to
802           stay around until all the nodes are actually erased, in order
803           to stop us from giving the same inode number to another newly
804           created inode. */
805        if (ref->next_in_ino) {
806                struct jffs2_inode_cache *ic;
807                struct jffs2_raw_node_ref **p;
808
809                spin_lock(&c->erase_completion_lock);
810
811                ic = jffs2_raw_ref_to_ic(ref);
812                for (p = &ic->nodes; (*p) != ref; p = &((*p)->next_in_ino))
813                        ;
814
815                *p = ref->next_in_ino;
816                ref->next_in_ino = NULL;
817
818                switch (ic->class) {
819#ifdef CONFIG_JFFS2_FS_XATTR
820                        case RAWNODE_CLASS_XATTR_DATUM:
821                                jffs2_release_xattr_datum(c, (struct jffs2_xattr_datum *)ic);
822                                break;
823                        case RAWNODE_CLASS_XATTR_REF:
824                                jffs2_release_xattr_ref(c, (struct jffs2_xattr_ref *)ic);
825                                break;
826#endif
827                        default:
828                                if (ic->nodes == (void *)ic && ic->pino_nlink == 0)
829                                        jffs2_del_ino_cache(c, ic);
830                                break;
831                }
832                spin_unlock(&c->erase_completion_lock);
833        }
834
835 out_erase_sem:
836        mutex_unlock(&c->erase_free_sem);
837}
838
839int jffs2_thread_should_wake(struct jffs2_sb_info *c)
840{
841        int ret = 0;
842        uint32_t dirty;
843        int nr_very_dirty = 0;
844        struct jffs2_eraseblock *jeb;
845
846        if (!list_empty(&c->erase_complete_list) ||
847            !list_empty(&c->erase_pending_list))
848                return 1;
849
850        if (c->unchecked_size) {
851                jffs2_dbg(1, "jffs2_thread_should_wake(): unchecked_size %d, checked_ino #%d\n",
852                          c->unchecked_size, c->checked_ino);
853                return 1;
854        }
855
856        /* dirty_size contains blocks on erase_pending_list
857         * those blocks are counted in c->nr_erasing_blocks.
858         * If one block is actually erased, it is not longer counted as dirty_space
859         * but it is counted in c->nr_erasing_blocks, so we add it and subtract it
860         * with c->nr_erasing_blocks * c->sector_size again.
861         * Blocks on erasable_list are counted as dirty_size, but not in c->nr_erasing_blocks
862         * This helps us to force gc and pick eventually a clean block to spread the load.
863         */
864        dirty = c->dirty_size + c->erasing_size - c->nr_erasing_blocks * c->sector_size;
865
866        if (c->nr_free_blocks + c->nr_erasing_blocks < c->resv_blocks_gctrigger &&
867                        (dirty > c->nospc_dirty_size))
868                ret = 1;
869
870        list_for_each_entry(jeb, &c->very_dirty_list, list) {
871                nr_very_dirty++;
872                if (nr_very_dirty == c->vdirty_blocks_gctrigger) {
873                        ret = 1;
874                        /* In debug mode, actually go through and count them all */
875                        D1(continue);
876                        break;
877                }
878        }
879
880        jffs2_dbg(1, "%s(): nr_free_blocks %d, nr_erasing_blocks %d, dirty_size 0x%x, vdirty_blocks %d: %s\n",
881                  __func__, c->nr_free_blocks, c->nr_erasing_blocks,
882                  c->dirty_size, nr_very_dirty, ret ? "yes" : "no");
883
884        return ret;
885}
Note: See TracBrowser for help on using the repository browser.