source: rtems/cpukit/libblock/src/bdbuf.c @ 4c5e2fc

4.10
Last change on this file since 4c5e2fc was 4c5e2fc, checked in by Chris Johns <chrisj@…>, on 11/03/11 at 04:23:07

2011-11-03 Chris Johns <chrisj@…>

PR 1948/filesystem

  • libfs/src/rfs/rtems-rfs-file-system.c, libfs/src/rfs/rtems-rfs-file-system.h, libfs/src/rfs/rtems-rfs-format.c, libfs/src/rfs/rtems-rfs-rtems.c: Add support for mount passing an ASCIIZ string containing configuration options.
  • libblock/src/bdbuf.c: Fix state labels in trace output.
  • Property mode set to 100644
File size: 81.1 KB
Line 
1/**
2 * @file
3 *
4 * @ingroup rtems_bdbuf
5 *
6 * Block device buffer management.
7 */
8
9/*
10 * Disk I/O buffering
11 * Buffer managment
12 *
13 * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
14 * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
15 *         Victor V. Vengerov <vvv@oktet.ru>
16 *         Alexander Kukuta <kam@oktet.ru>
17 *
18 * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org>
19 *    Rewritten to remove score mutex access. Fixes many performance
20 *    issues.
21 *
22 * Copyright (c) 2009 embedded brains GmbH.
23 *
24 * @(#) bdbuf.c,v 1.14 2004/04/17 08:15:17 ralf Exp
25 */
26
27/**
28 * Set to 1 to enable debug tracing.
29 */
30#define RTEMS_BDBUF_TRACE 0
31
32#if HAVE_CONFIG_H
33#include "config.h"
34#endif
35#include <limits.h>
36#include <errno.h>
37#include <assert.h>
38#include <stdio.h>
39#include <string.h>
40#include <inttypes.h>
41
42#include <rtems.h>
43#include <rtems/error.h>
44#include <rtems/malloc.h>
45
46#include "rtems/bdbuf.h"
47
48#define BDBUF_INVALID_DEV ((dev_t) -1)
49
50/*
51 * Simpler label for this file.
52 */
53#define bdbuf_config rtems_bdbuf_configuration
54
55/**
56 * A swapout transfer transaction data. This data is passed to a worked thread
57 * to handle the write phase of the transfer.
58 */
59typedef struct rtems_bdbuf_swapout_transfer
60{
61  rtems_chain_control   bds;         /**< The transfer list of BDs. */
62  dev_t                 dev;         /**< The device the transfer is for. */
63  bool                  syncing;     /**< The data is a sync'ing. */
64  rtems_blkdev_request* write_req;   /**< The write request array. */
65  uint32_t              bufs_per_bd; /**< Number of buffers per bd. */
66} rtems_bdbuf_swapout_transfer;
67
68/**
69 * Swapout worker thread. These are available to take processing from the
70 * main swapout thread and handle the I/O operation.
71 */
72typedef struct rtems_bdbuf_swapout_worker
73{
74  rtems_chain_node             link;     /**< The threads sit on a chain when
75                                          * idle. */
76  rtems_id                     id;       /**< The id of the task so we can wake
77                                          * it. */
78  volatile bool                enabled;  /**< The worker is enabled. */
79  rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this
80                                          * thread. */
81} rtems_bdbuf_swapout_worker;
82
83/**
84 * Buffer waiters synchronization.
85 */
86typedef struct rtems_bdbuf_waiters {
87  volatile unsigned count;
88  rtems_id sema;
89} rtems_bdbuf_waiters;
90
91/**
92 * The BD buffer cache.
93 */
94typedef struct rtems_bdbuf_cache
95{
96  rtems_id            swapout;           /**< Swapout task ID */
97  volatile bool       swapout_enabled;   /**< Swapout is only running if
98                                          * enabled. Set to false to kill the
99                                          * swap out task. It deletes itself. */
100  rtems_chain_control swapout_workers;   /**< The work threads for the swapout
101                                          * task. */
102
103  rtems_bdbuf_buffer* bds;               /**< Pointer to table of buffer
104                                          * descriptors. */
105  void*               buffers;           /**< The buffer's memory. */
106  size_t              buffer_min_count;  /**< Number of minimum size buffers
107                                          * that fit the buffer memory. */
108  size_t              max_bds_per_group; /**< The number of BDs of minimum
109                                          * buffer size that fit in a group. */
110  uint32_t            flags;             /**< Configuration flags. */
111
112  rtems_id            lock;              /**< The cache lock. It locks all
113                                          * cache data, BD and lists. */
114  rtems_id            sync_lock;         /**< Sync calls block writes. */
115  volatile bool       sync_active;       /**< True if a sync is active. */
116  volatile rtems_id   sync_requester;    /**< The sync requester. */
117  volatile dev_t      sync_device;       /**< The device to sync and
118                                          * BDBUF_INVALID_DEV not a device
119                                          * sync. */
120
121  rtems_bdbuf_buffer* tree;              /**< Buffer descriptor lookup AVL tree
122                                          * root. There is only one. */
123  rtems_chain_control lru;               /**< Least recently used list */
124  rtems_chain_control modified;          /**< Modified buffers list */
125  rtems_chain_control sync;              /**< Buffers to sync list */
126
127  rtems_bdbuf_waiters access_waiters;    /**< Wait for a buffer in
128                                          * ACCESS_CACHED, ACCESS_MODIFIED or
129                                          * ACCESS_EMPTY
130                                          * state. */
131  rtems_bdbuf_waiters transfer_waiters;  /**< Wait for a buffer in TRANSFER
132                                          * state. */
133  rtems_bdbuf_waiters buffer_waiters;    /**< Wait for a buffer and no one is
134                                          * available. */
135
136  size_t              group_count;       /**< The number of groups. */
137  rtems_bdbuf_group*  groups;            /**< The groups. */
138
139  bool                initialised;       /**< Initialised state. */
140} rtems_bdbuf_cache;
141
142/**
143 * Fatal errors
144 */
145#define RTEMS_BLKDEV_FATAL_ERROR(n) \
146  (((uint32_t)'B' << 24) | ((uint32_t)(n) & (uint32_t)0x00FFFFFF))
147
148#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_11      RTEMS_BLKDEV_FATAL_ERROR(1)
149#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_4       RTEMS_BLKDEV_FATAL_ERROR(2)
150#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_5       RTEMS_BLKDEV_FATAL_ERROR(3)
151#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_6       RTEMS_BLKDEV_FATAL_ERROR(4)
152#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_7       RTEMS_BLKDEV_FATAL_ERROR(5)
153#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_8       RTEMS_BLKDEV_FATAL_ERROR(6)
154#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_9       RTEMS_BLKDEV_FATAL_ERROR(7)
155#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_10      RTEMS_BLKDEV_FATAL_ERROR(8)
156#define RTEMS_BLKDEV_FATAL_BDBUF_TREE_RM       RTEMS_BLKDEV_FATAL_ERROR(9)
157#define RTEMS_BLKDEV_FATAL_BDBUF_SWAPOUT       RTEMS_BLKDEV_FATAL_ERROR(10)
158#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK     RTEMS_BLKDEV_FATAL_ERROR(11)
159#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK   RTEMS_BLKDEV_FATAL_ERROR(12)
160#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK    RTEMS_BLKDEV_FATAL_ERROR(13)
161#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK  RTEMS_BLKDEV_FATAL_ERROR(14)
162#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS   RTEMS_BLKDEV_FATAL_ERROR(15)
163#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2  RTEMS_BLKDEV_FATAL_ERROR(16)
164#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST   RTEMS_BLKDEV_FATAL_ERROR(17)
165#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO RTEMS_BLKDEV_FATAL_ERROR(18)
166#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE    RTEMS_BLKDEV_FATAL_ERROR(19)
167#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE       RTEMS_BLKDEV_FATAL_ERROR(20)
168#define RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM      RTEMS_BLKDEV_FATAL_ERROR(21)
169#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE  RTEMS_BLKDEV_FATAL_ERROR(22)
170#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START   RTEMS_BLKDEV_FATAL_ERROR(23)
171#define BLKDEV_FATAL_BDBUF_SWAPOUT_RE          RTEMS_BLKDEV_FATAL_ERROR(24)
172#define BLKDEV_FATAL_BDBUF_SWAPOUT_TS          RTEMS_BLKDEV_FATAL_ERROR(25)
173#define RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT     RTEMS_BLKDEV_FATAL_ERROR(26)
174#define RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE       RTEMS_BLKDEV_FATAL_ERROR(27)
175#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_0       RTEMS_BLKDEV_FATAL_ERROR(28)
176#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_1       RTEMS_BLKDEV_FATAL_ERROR(29)
177#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_2       RTEMS_BLKDEV_FATAL_ERROR(30)
178#define RTEMS_BLKDEV_FATAL_BDBUF_DISK_REL      RTEMS_BLKDEV_FATAL_ERROR(31)
179
180/**
181 * The events used in this code. These should be system events rather than
182 * application events.
183 */
184#define RTEMS_BDBUF_TRANSFER_SYNC  RTEMS_EVENT_1
185#define RTEMS_BDBUF_SWAPOUT_SYNC   RTEMS_EVENT_2
186
187/**
188 * The swap out task size. Should be more than enough for most drivers with
189 * tracing turned on.
190 */
191#define SWAPOUT_TASK_STACK_SIZE (8 * 1024)
192
193/**
194 * Lock semaphore attributes. This is used for locking type mutexes.
195 *
196 * @warning Priority inheritance is on.
197 */
198#define RTEMS_BDBUF_CACHE_LOCK_ATTRIBS \
199  (RTEMS_PRIORITY | RTEMS_BINARY_SEMAPHORE | \
200   RTEMS_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
201
202/**
203 * Waiter semaphore attributes.
204 *
205 * @warning Do not configure as inherit priority. If a driver is in the driver
206 *          initialisation table this locked semaphore will have the IDLE task
207 *          as the holder and a blocking task will raise the priority of the
208 *          IDLE task which can cause unsual side effects.
209 */
210#define RTEMS_BDBUF_CACHE_WAITER_ATTRIBS \
211  (RTEMS_PRIORITY | RTEMS_SIMPLE_BINARY_SEMAPHORE | \
212   RTEMS_NO_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
213
214/**
215 * Waiter timeout. Set to non-zero to find some info on a waiter that is
216 * waiting too long.
217 */
218#define RTEMS_BDBUF_WAIT_TIMEOUT RTEMS_NO_TIMEOUT
219#if !defined (RTEMS_BDBUF_WAIT_TIMEOUT)
220#define RTEMS_BDBUF_WAIT_TIMEOUT \
221  (TOD_MICROSECONDS_TO_TICKS (20000000))
222#endif
223
224/*
225 * The swap out task.
226 */
227static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
228
229/**
230 * The Buffer Descriptor cache.
231 */
232static rtems_bdbuf_cache bdbuf_cache;
233
234#if RTEMS_BDBUF_TRACE
235/**
236 * If true output the trace message.
237 */
238bool rtems_bdbuf_tracer;
239
240/**
241 * Return the number of items on the list.
242 *
243 * @param list The chain control.
244 * @return uint32_t The number of items on the list.
245 */
246uint32_t
247rtems_bdbuf_list_count (rtems_chain_control* list)
248{
249  rtems_chain_node* node = rtems_chain_first (list);
250  uint32_t          count = 0;
251  while (!rtems_chain_is_tail (list, node))
252  {
253    count++;
254    node = rtems_chain_next (node);
255  }
256  return count;
257}
258
259/**
260 * Show the usage for the bdbuf cache.
261 */
262void
263rtems_bdbuf_show_usage (void)
264{
265  uint32_t group;
266  uint32_t total = 0;
267  uint32_t val;
268
269  for (group = 0; group < bdbuf_cache.group_count; group++)
270    total += bdbuf_cache.groups[group].users;
271  printf ("bdbuf:group users=%lu", total);
272  val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
273  printf (", lru=%lu", val);
274  total = val;
275  val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
276  printf (", mod=%lu", val);
277  total += val;
278  val = rtems_bdbuf_list_count (&bdbuf_cache.sync);
279  printf (", sync=%lu", val);
280  total += val;
281  printf (", total=%lu\n", total);
282}
283
284/**
285 * Show the users for a group of a bd.
286 *
287 * @param where A label to show the context of output.
288 * @param bd The bd to show the users of.
289 */
290void
291rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd)
292{
293  const char* states[] =
294    { "FR", "EM", "CH", "AC", "AM", "MD", "SY", "TR" };
295
296  printf ("bdbuf:users: %15s: [%" PRIu32 " (%s)] %td:%td = %" PRIu32 " %s\n",
297          where,
298          bd->block, states[bd->state],
299          bd->group - bdbuf_cache.groups,
300          bd - bdbuf_cache.bds,
301          bd->group->users,
302          bd->group->users > 8 ? "<<<<<<<" : "");
303}
304#else
305#define rtems_bdbuf_tracer (0)
306#define rtems_bdbuf_show_usage() ((void) 0)
307#define rtems_bdbuf_show_users(_w, _b) ((void) 0)
308#endif
309
310/**
311 * The default maximum height of 32 allows for AVL trees having between
312 * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion.  You may
313 * change this compile-time constant as you wish.
314 */
315#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
316#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
317#endif
318
319static void
320rtems_bdbuf_fatal (rtems_bdbuf_buf_state state, uint32_t error)
321{
322  rtems_fatal_error_occurred ((((uint32_t) state) << 16) | error);
323}
324
325/**
326 * Searches for the node with specified dev/block.
327 *
328 * @param root pointer to the root node of the AVL-Tree
329 * @param dev device search key
330 * @param block block search key
331 * @retval NULL node with the specified dev/block is not found
332 * @return pointer to the node with specified dev/block
333 */
334static rtems_bdbuf_buffer *
335rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
336                        dev_t                dev,
337                        rtems_blkdev_bnum    block)
338{
339  rtems_bdbuf_buffer* p = *root;
340
341  while ((p != NULL) && ((p->dev != dev) || (p->block != block)))
342  {
343    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
344    {
345      p = p->avl.right;
346    }
347    else
348    {
349      p = p->avl.left;
350    }
351  }
352
353  return p;
354}
355
356/**
357 * Inserts the specified node to the AVl-Tree.
358 *
359 * @param root pointer to the root node of the AVL-Tree
360 * @param node Pointer to the node to add.
361 * @retval 0 The node added successfully
362 * @retval -1 An error occured
363 */
364static int
365rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
366                       rtems_bdbuf_buffer*  node)
367{
368  dev_t             dev = node->dev;
369  rtems_blkdev_bnum block = node->block;
370
371  rtems_bdbuf_buffer*  p = *root;
372  rtems_bdbuf_buffer*  q;
373  rtems_bdbuf_buffer*  p1;
374  rtems_bdbuf_buffer*  p2;
375  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
376  rtems_bdbuf_buffer** buf_prev = buf_stack;
377
378  bool modified = false;
379
380  if (p == NULL)
381  {
382    *root = node;
383    node->avl.left = NULL;
384    node->avl.right = NULL;
385    node->avl.bal = 0;
386    return 0;
387  }
388
389  while (p != NULL)
390  {
391    *buf_prev++ = p;
392
393    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
394    {
395      p->avl.cache = 1;
396      q = p->avl.right;
397      if (q == NULL)
398      {
399        q = node;
400        p->avl.right = q = node;
401        break;
402      }
403    }
404    else if ((p->dev != dev) || (p->block != block))
405    {
406      p->avl.cache = -1;
407      q = p->avl.left;
408      if (q == NULL)
409      {
410        q = node;
411        p->avl.left = q;
412        break;
413      }
414    }
415    else
416    {
417      return -1;
418    }
419
420    p = q;
421  }
422
423  q->avl.left = q->avl.right = NULL;
424  q->avl.bal = 0;
425  modified = true;
426  buf_prev--;
427
428  while (modified)
429  {
430    if (p->avl.cache == -1)
431    {
432      switch (p->avl.bal)
433      {
434        case 1:
435          p->avl.bal = 0;
436          modified = false;
437          break;
438
439        case 0:
440          p->avl.bal = -1;
441          break;
442
443        case -1:
444          p1 = p->avl.left;
445          if (p1->avl.bal == -1) /* simple LL-turn */
446          {
447            p->avl.left = p1->avl.right;
448            p1->avl.right = p;
449            p->avl.bal = 0;
450            p = p1;
451          }
452          else /* double LR-turn */
453          {
454            p2 = p1->avl.right;
455            p1->avl.right = p2->avl.left;
456            p2->avl.left = p1;
457            p->avl.left = p2->avl.right;
458            p2->avl.right = p;
459            if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
460            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
461            p = p2;
462          }
463          p->avl.bal = 0;
464          modified = false;
465          break;
466
467        default:
468          break;
469      }
470    }
471    else
472    {
473      switch (p->avl.bal)
474      {
475        case -1:
476          p->avl.bal = 0;
477          modified = false;
478          break;
479
480        case 0:
481          p->avl.bal = 1;
482          break;
483
484        case 1:
485          p1 = p->avl.right;
486          if (p1->avl.bal == 1) /* simple RR-turn */
487          {
488            p->avl.right = p1->avl.left;
489            p1->avl.left = p;
490            p->avl.bal = 0;
491            p = p1;
492          }
493          else /* double RL-turn */
494          {
495            p2 = p1->avl.left;
496            p1->avl.left = p2->avl.right;
497            p2->avl.right = p1;
498            p->avl.right = p2->avl.left;
499            p2->avl.left = p;
500            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
501            if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
502            p = p2;
503          }
504          p->avl.bal = 0;
505          modified = false;
506          break;
507
508        default:
509          break;
510      }
511    }
512    q = p;
513    if (buf_prev > buf_stack)
514    {
515      p = *--buf_prev;
516
517      if (p->avl.cache == -1)
518      {
519        p->avl.left = q;
520      }
521      else
522      {
523        p->avl.right = q;
524      }
525    }
526    else
527    {
528      *root = p;
529      break;
530    }
531  };
532
533  return 0;
534}
535
536
537/**
538 * Removes the node from the tree.
539 *
540 * @param root Pointer to pointer to the root node
541 * @param node Pointer to the node to remove
542 * @retval 0 Item removed
543 * @retval -1 No such item found
544 */
545static int
546rtems_bdbuf_avl_remove(rtems_bdbuf_buffer**      root,
547                       const rtems_bdbuf_buffer* node)
548{
549  dev_t             dev = node->dev;
550  rtems_blkdev_bnum block = node->block;
551
552  rtems_bdbuf_buffer*  p = *root;
553  rtems_bdbuf_buffer*  q;
554  rtems_bdbuf_buffer*  r;
555  rtems_bdbuf_buffer*  s;
556  rtems_bdbuf_buffer*  p1;
557  rtems_bdbuf_buffer*  p2;
558  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
559  rtems_bdbuf_buffer** buf_prev = buf_stack;
560
561  bool modified = false;
562
563  memset (buf_stack, 0, sizeof(buf_stack));
564
565  while (p != NULL)
566  {
567    *buf_prev++ = p;
568
569    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
570    {
571      p->avl.cache = 1;
572      p = p->avl.right;
573    }
574    else if ((p->dev != dev) || (p->block != block))
575    {
576      p->avl.cache = -1;
577      p = p->avl.left;
578    }
579    else
580    {
581      /* node found */
582      break;
583    }
584  }
585
586  if (p == NULL)
587  {
588    /* there is no such node */
589    return -1;
590  }
591
592  q = p;
593
594  buf_prev--;
595  if (buf_prev > buf_stack)
596  {
597    p = *(buf_prev - 1);
598  }
599  else
600  {
601    p = NULL;
602  }
603
604  /* at this moment q - is a node to delete, p is q's parent */
605  if (q->avl.right == NULL)
606  {
607    r = q->avl.left;
608    if (r != NULL)
609    {
610      r->avl.bal = 0;
611    }
612    q = r;
613  }
614  else
615  {
616    rtems_bdbuf_buffer **t;
617
618    r = q->avl.right;
619
620    if (r->avl.left == NULL)
621    {
622      r->avl.left = q->avl.left;
623      r->avl.bal = q->avl.bal;
624      r->avl.cache = 1;
625      *buf_prev++ = q = r;
626    }
627    else
628    {
629      t = buf_prev++;
630      s = r;
631
632      while (s->avl.left != NULL)
633      {
634        *buf_prev++ = r = s;
635        s = r->avl.left;
636        r->avl.cache = -1;
637      }
638
639      s->avl.left = q->avl.left;
640      r->avl.left = s->avl.right;
641      s->avl.right = q->avl.right;
642      s->avl.bal = q->avl.bal;
643      s->avl.cache = 1;
644
645      *t = q = s;
646    }
647  }
648
649  if (p != NULL)
650  {
651    if (p->avl.cache == -1)
652    {
653      p->avl.left = q;
654    }
655    else
656    {
657      p->avl.right = q;
658    }
659  }
660  else
661  {
662    *root = q;
663  }
664
665  modified = true;
666
667  while (modified)
668  {
669    if (buf_prev > buf_stack)
670    {
671      p = *--buf_prev;
672    }
673    else
674    {
675      break;
676    }
677
678    if (p->avl.cache == -1)
679    {
680      /* rebalance left branch */
681      switch (p->avl.bal)
682      {
683        case -1:
684          p->avl.bal = 0;
685          break;
686        case  0:
687          p->avl.bal = 1;
688          modified = false;
689          break;
690
691        case +1:
692          p1 = p->avl.right;
693
694          if (p1->avl.bal >= 0) /* simple RR-turn */
695          {
696            p->avl.right = p1->avl.left;
697            p1->avl.left = p;
698
699            if (p1->avl.bal == 0)
700            {
701              p1->avl.bal = -1;
702              modified = false;
703            }
704            else
705            {
706              p->avl.bal = 0;
707              p1->avl.bal = 0;
708            }
709            p = p1;
710          }
711          else /* double RL-turn */
712          {
713            p2 = p1->avl.left;
714
715            p1->avl.left = p2->avl.right;
716            p2->avl.right = p1;
717            p->avl.right = p2->avl.left;
718            p2->avl.left = p;
719
720            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
721            if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
722
723            p = p2;
724            p2->avl.bal = 0;
725          }
726          break;
727
728        default:
729          break;
730      }
731    }
732    else
733    {
734      /* rebalance right branch */
735      switch (p->avl.bal)
736      {
737        case +1:
738          p->avl.bal = 0;
739          break;
740
741        case  0:
742          p->avl.bal = -1;
743          modified = false;
744          break;
745
746        case -1:
747          p1 = p->avl.left;
748
749          if (p1->avl.bal <= 0) /* simple LL-turn */
750          {
751            p->avl.left = p1->avl.right;
752            p1->avl.right = p;
753            if (p1->avl.bal == 0)
754            {
755              p1->avl.bal = 1;
756              modified = false;
757            }
758            else
759            {
760              p->avl.bal = 0;
761              p1->avl.bal = 0;
762            }
763            p = p1;
764          }
765          else /* double LR-turn */
766          {
767            p2 = p1->avl.right;
768
769            p1->avl.right = p2->avl.left;
770            p2->avl.left = p1;
771            p->avl.left = p2->avl.right;
772            p2->avl.right = p;
773
774            if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
775            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
776
777            p = p2;
778            p2->avl.bal = 0;
779          }
780          break;
781
782        default:
783          break;
784      }
785    }
786
787    if (buf_prev > buf_stack)
788    {
789      q = *(buf_prev - 1);
790
791      if (q->avl.cache == -1)
792      {
793        q->avl.left = p;
794      }
795      else
796      {
797        q->avl.right = p;
798      }
799    }
800    else
801    {
802      *root = p;
803      break;
804    }
805
806  }
807
808  return 0;
809}
810
811static void
812rtems_bdbuf_set_state (rtems_bdbuf_buffer *bd, rtems_bdbuf_buf_state state)
813{
814  bd->state = state;
815}
816
817/**
818 * Change the block number for the block size to the block number for the media
819 * block size. We have to use 64bit maths. There is no short cut here.
820 *
821 * @param block The logical block number in the block size terms.
822 * @param block_size The block size.
823 * @param media_block_size The block size of the media.
824 * @return rtems_blkdev_bnum The media block number.
825 */
826static rtems_blkdev_bnum
827rtems_bdbuf_media_block (rtems_blkdev_bnum block,
828                         size_t            block_size,
829                         size_t            media_block_size)
830{
831  return (rtems_blkdev_bnum)
832    ((((uint64_t) block) * block_size) / media_block_size);
833}
834
835/**
836 * Lock the mutex. A single task can nest calls.
837 *
838 * @param lock The mutex to lock.
839 * @param fatal_error_code The error code if the call fails.
840 */
841static void
842rtems_bdbuf_lock (rtems_id lock, uint32_t fatal_error_code)
843{
844  rtems_status_code sc = rtems_semaphore_obtain (lock,
845                                                 RTEMS_WAIT,
846                                                 RTEMS_NO_TIMEOUT);
847  if (sc != RTEMS_SUCCESSFUL)
848    rtems_fatal_error_occurred (fatal_error_code);
849}
850
851/**
852 * Unlock the mutex.
853 *
854 * @param lock The mutex to unlock.
855 * @param fatal_error_code The error code if the call fails.
856 */
857static void
858rtems_bdbuf_unlock (rtems_id lock, uint32_t fatal_error_code)
859{
860  rtems_status_code sc = rtems_semaphore_release (lock);
861  if (sc != RTEMS_SUCCESSFUL)
862    rtems_fatal_error_occurred (fatal_error_code);
863}
864
865/**
866 * Lock the cache. A single task can nest calls.
867 */
868static void
869rtems_bdbuf_lock_cache (void)
870{
871  rtems_bdbuf_lock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK);
872}
873
874/**
875 * Unlock the cache.
876 */
877static void
878rtems_bdbuf_unlock_cache (void)
879{
880  rtems_bdbuf_unlock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK);
881}
882
883/**
884 * Lock the cache's sync. A single task can nest calls.
885 */
886static void
887rtems_bdbuf_lock_sync (void)
888{
889  rtems_bdbuf_lock (bdbuf_cache.sync_lock, RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK);
890}
891
892/**
893 * Unlock the cache's sync lock. Any blocked writers are woken.
894 */
895static void
896rtems_bdbuf_unlock_sync (void)
897{
898  rtems_bdbuf_unlock (bdbuf_cache.sync_lock,
899                      RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK);
900}
901
902static void
903rtems_bdbuf_group_obtain (rtems_bdbuf_buffer *bd)
904{
905  ++bd->group->users;
906}
907
908static void
909rtems_bdbuf_group_release (rtems_bdbuf_buffer *bd)
910{
911  --bd->group->users;
912}
913
914static rtems_mode
915rtems_bdbuf_disable_preemption (void)
916{
917  rtems_status_code sc = RTEMS_SUCCESSFUL;
918  rtems_mode prev_mode = 0;
919
920  sc = rtems_task_mode (RTEMS_NO_PREEMPT, RTEMS_PREEMPT_MASK, &prev_mode);
921  if (sc != RTEMS_SUCCESSFUL)
922    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS);
923
924  return prev_mode;
925}
926
927static void
928rtems_bdbuf_restore_preemption (rtems_mode prev_mode)
929{
930  rtems_status_code sc = RTEMS_SUCCESSFUL;
931
932  sc = rtems_task_mode (prev_mode, RTEMS_ALL_MODE_MASKS, &prev_mode);
933  if (sc != RTEMS_SUCCESSFUL)
934    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST);
935}
936
937/**
938 * Wait until woken. Semaphores are used so a number of tasks can wait and can
939 * be woken at once. Task events would require we maintain a list of tasks to
940 * be woken and this would require storage and we do not know the number of
941 * tasks that could be waiting.
942 *
943 * While we have the cache locked we can try and claim the semaphore and
944 * therefore know when we release the lock to the cache we will block until the
945 * semaphore is released. This may even happen before we get to block.
946 *
947 * A counter is used to save the release call when no one is waiting.
948 *
949 * The function assumes the cache is locked on entry and it will be locked on
950 * exit.
951 */
952static void
953rtems_bdbuf_anonymous_wait (rtems_bdbuf_waiters *waiters)
954{
955  rtems_status_code sc;
956  rtems_mode        prev_mode;
957
958  /*
959   * Indicate we are waiting.
960   */
961  ++waiters->count;
962
963  /*
964   * Disable preemption then unlock the cache and block.  There is no POSIX
965   * condition variable in the core API so this is a work around.
966   *
967   * The issue is a task could preempt after the cache is unlocked because it is
968   * blocking or just hits that window, and before this task has blocked on the
969   * semaphore. If the preempting task flushes the queue this task will not see
970   * the flush and may block for ever or until another transaction flushes this
971   * semaphore.
972   */
973  prev_mode = rtems_bdbuf_disable_preemption ();
974
975  /*
976   * Unlock the cache, wait, and lock the cache when we return.
977   */
978  rtems_bdbuf_unlock_cache ();
979
980  sc = rtems_semaphore_obtain (waiters->sema, RTEMS_WAIT, RTEMS_BDBUF_WAIT_TIMEOUT);
981
982  if (sc == RTEMS_TIMEOUT)
983    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO);
984
985  if (sc != RTEMS_UNSATISFIED)
986    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2);
987
988  rtems_bdbuf_lock_cache ();
989
990  rtems_bdbuf_restore_preemption (prev_mode);
991
992  --waiters->count;
993}
994
995static void
996rtems_bdbuf_wait (rtems_bdbuf_buffer *bd, rtems_bdbuf_waiters *waiters)
997{
998  rtems_bdbuf_group_obtain (bd);
999  ++bd->waiters;
1000  rtems_bdbuf_anonymous_wait (waiters);
1001  --bd->waiters;
1002  rtems_bdbuf_group_release (bd);
1003}
1004
1005/**
1006 * Wake a blocked resource. The resource has a counter that lets us know if
1007 * there are any waiters.
1008 */
1009static void
1010rtems_bdbuf_wake (const rtems_bdbuf_waiters *waiters)
1011{
1012  rtems_status_code sc = RTEMS_SUCCESSFUL;
1013
1014  if (waiters->count > 0)
1015  {
1016    sc = rtems_semaphore_flush (waiters->sema);
1017    if (sc != RTEMS_SUCCESSFUL)
1018      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE);
1019  }
1020}
1021
1022static void
1023rtems_bdbuf_wake_swapper (void)
1024{
1025  rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout,
1026                                           RTEMS_BDBUF_SWAPOUT_SYNC);
1027  if (sc != RTEMS_SUCCESSFUL)
1028    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
1029}
1030
1031static bool
1032rtems_bdbuf_has_buffer_waiters (void)
1033{
1034  return bdbuf_cache.buffer_waiters.count;
1035}
1036
1037static void
1038rtems_bdbuf_remove_from_tree (rtems_bdbuf_buffer *bd)
1039{
1040  if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1041    rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_TREE_RM);
1042}
1043
1044static void
1045rtems_bdbuf_remove_from_tree_and_lru_list (rtems_bdbuf_buffer *bd)
1046{
1047  switch (bd->state)
1048  {
1049    case RTEMS_BDBUF_STATE_FREE:
1050      break;
1051    case RTEMS_BDBUF_STATE_CACHED:
1052      rtems_bdbuf_remove_from_tree (bd);
1053      break;
1054    default:
1055      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_10);
1056  }
1057
1058  rtems_chain_extract (&bd->link);
1059}
1060
1061static void
1062rtems_bdbuf_make_free_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1063{
1064  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_FREE);
1065  rtems_chain_prepend (&bdbuf_cache.lru, &bd->link);
1066}
1067
1068static void
1069rtems_bdbuf_make_empty (rtems_bdbuf_buffer *bd)
1070{
1071  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY);
1072}
1073
1074static void
1075rtems_bdbuf_make_cached_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1076{
1077  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
1078  rtems_chain_append (&bdbuf_cache.lru, &bd->link);
1079}
1080
1081static void
1082rtems_bdbuf_discard_buffer (rtems_bdbuf_buffer *bd)
1083{
1084  rtems_bdbuf_make_empty (bd);
1085
1086  if (bd->waiters == 0)
1087  {
1088    rtems_bdbuf_remove_from_tree (bd);
1089    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1090  }
1091}
1092
1093static void
1094rtems_bdbuf_add_to_modified_list_after_access (rtems_bdbuf_buffer *bd)
1095{
1096  if (bdbuf_cache.sync_active && bdbuf_cache.sync_device == bd->dev)
1097  {
1098    rtems_bdbuf_unlock_cache ();
1099
1100    /*
1101     * Wait for the sync lock.
1102     */
1103    rtems_bdbuf_lock_sync ();
1104
1105    rtems_bdbuf_unlock_sync ();
1106    rtems_bdbuf_lock_cache ();
1107  }
1108
1109  /*
1110   * Only the first modified release sets the timer and any further user
1111   * accesses do not change the timer value which should move down. This
1112   * assumes the user's hold of the buffer is much less than the time on the
1113   * modified list. Resetting the timer on each access which could result in a
1114   * buffer never getting to 0 and never being forced onto disk. This raises a
1115   * difficult question. Is a snapshot of a block that is changing better than
1116   * nothing being written? We have tended to think we should hold changes for
1117   * only a specific period of time even if still changing and get onto disk
1118   * and letting the file system try and recover this position if it can.
1119   */
1120  if (bd->state == RTEMS_BDBUF_STATE_ACCESS_CACHED
1121        || bd->state == RTEMS_BDBUF_STATE_ACCESS_EMPTY)
1122    bd->hold_timer = bdbuf_config.swap_block_hold;
1123
1124  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_MODIFIED);
1125  rtems_chain_append (&bdbuf_cache.modified, &bd->link);
1126
1127  if (bd->waiters)
1128    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1129  else if (rtems_bdbuf_has_buffer_waiters ())
1130    rtems_bdbuf_wake_swapper ();
1131}
1132
1133static void
1134rtems_bdbuf_add_to_lru_list_after_access (rtems_bdbuf_buffer *bd)
1135{
1136  rtems_bdbuf_group_release (bd);
1137  rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1138
1139  if (bd->waiters)
1140    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1141  else
1142    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1143}
1144
1145/**
1146 * Compute the number of BDs per group for a given buffer size.
1147 *
1148 * @param size The buffer size. It can be any size and we scale up.
1149 */
1150static size_t
1151rtems_bdbuf_bds_per_group (size_t size)
1152{
1153  size_t bufs_per_size;
1154  size_t bds_per_size;
1155
1156  if (size > bdbuf_config.buffer_max)
1157    return 0;
1158
1159  bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1;
1160
1161  for (bds_per_size = 1;
1162       bds_per_size < bufs_per_size;
1163       bds_per_size <<= 1)
1164    ;
1165
1166  return bdbuf_cache.max_bds_per_group / bds_per_size;
1167}
1168
1169static void
1170rtems_bdbuf_discard_buffer_after_access (rtems_bdbuf_buffer *bd)
1171{
1172  rtems_bdbuf_group_release (bd);
1173  rtems_bdbuf_discard_buffer (bd);
1174
1175  if (bd->waiters)
1176    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1177  else
1178    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1179}
1180
1181/**
1182 * Reallocate a group. The BDs currently allocated in the group are removed
1183 * from the ALV tree and any lists then the new BD's are prepended to the ready
1184 * list of the cache.
1185 *
1186 * @param group The group to reallocate.
1187 * @param new_bds_per_group The new count of BDs per group.
1188 * @return A buffer of this group.
1189 */
1190static rtems_bdbuf_buffer *
1191rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
1192{
1193  rtems_bdbuf_buffer* bd;
1194  size_t              b;
1195  size_t              bufs_per_bd;
1196
1197  if (rtems_bdbuf_tracer)
1198    printf ("bdbuf:realloc: %tu: %zd -> %zd\n",
1199            group - bdbuf_cache.groups, group->bds_per_group,
1200            new_bds_per_group);
1201
1202  bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group;
1203
1204  for (b = 0, bd = group->bdbuf;
1205       b < group->bds_per_group;
1206       b++, bd += bufs_per_bd)
1207    rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1208
1209  group->bds_per_group = new_bds_per_group;
1210  bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
1211
1212  for (b = 1, bd = group->bdbuf + bufs_per_bd;
1213       b < group->bds_per_group;
1214       b++, bd += bufs_per_bd)
1215    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1216
1217  if (b > 1)
1218    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1219
1220  return group->bdbuf;
1221}
1222
1223static void
1224rtems_bdbuf_setup_empty_buffer (rtems_bdbuf_buffer *bd,
1225                                dev_t               dev,
1226                                rtems_blkdev_bnum   block)
1227{
1228  bd->dev       = dev;
1229  bd->block     = block;
1230  bd->avl.left  = NULL;
1231  bd->avl.right = NULL;
1232  bd->waiters   = 0;
1233
1234  if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
1235    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE);
1236
1237  rtems_bdbuf_make_empty (bd);
1238}
1239
1240static rtems_bdbuf_buffer *
1241rtems_bdbuf_get_buffer_from_lru_list (dev_t             dev,
1242                                      rtems_blkdev_bnum block,
1243                                      size_t            bds_per_group)
1244{
1245  rtems_chain_node *node = rtems_chain_first (&bdbuf_cache.lru);
1246
1247  while (!rtems_chain_is_tail (&bdbuf_cache.lru, node))
1248  {
1249    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
1250    rtems_bdbuf_buffer *empty_bd = NULL;
1251
1252    if (rtems_bdbuf_tracer)
1253      printf ("bdbuf:next-bd: %tu (%td:%" PRId32 ") %zd -> %zd\n",
1254              bd - bdbuf_cache.bds,
1255              bd->group - bdbuf_cache.groups, bd->group->users,
1256              bd->group->bds_per_group, bds_per_group);
1257
1258    /*
1259     * If nobody waits for this BD, we may recycle it.
1260     */
1261    if (bd->waiters == 0)
1262    {
1263      if (bd->group->bds_per_group == bds_per_group)
1264      {
1265        rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1266
1267        empty_bd = bd;
1268      }
1269      else if (bd->group->users == 0)
1270        empty_bd = rtems_bdbuf_group_realloc (bd->group, bds_per_group);
1271    }
1272
1273    if (empty_bd != NULL)
1274    {
1275      rtems_bdbuf_setup_empty_buffer (empty_bd, dev, block);
1276
1277      return empty_bd;
1278    }
1279
1280    node = rtems_chain_next (node);
1281  }
1282
1283  return NULL;
1284}
1285
1286/**
1287 * Initialise the cache.
1288 *
1289 * @return rtems_status_code The initialisation status.
1290 */
1291rtems_status_code
1292rtems_bdbuf_init (void)
1293{
1294  rtems_bdbuf_group*  group;
1295  rtems_bdbuf_buffer* bd;
1296  uint8_t*            buffer;
1297  size_t              b;
1298  size_t              cache_aligment;
1299  rtems_status_code   sc;
1300  rtems_mode          prev_mode;
1301
1302  if (rtems_bdbuf_tracer)
1303    printf ("bdbuf:init\n");
1304
1305  if (rtems_interrupt_is_in_progress())
1306    return RTEMS_CALLED_FROM_ISR;
1307
1308  /*
1309   * Check the configuration table values.
1310   */
1311  if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0)
1312    return RTEMS_INVALID_NUMBER;
1313
1314  /*
1315   * We use a special variable to manage the initialisation incase we have
1316   * completing threads doing this. You may get errors if the another thread
1317   * makes a call and we have not finished initialisation.
1318   */
1319  prev_mode = rtems_bdbuf_disable_preemption ();
1320  if (bdbuf_cache.initialised)
1321  {
1322    rtems_bdbuf_restore_preemption (prev_mode);
1323    return RTEMS_RESOURCE_IN_USE;
1324  }
1325
1326  memset(&bdbuf_cache, 0, sizeof(bdbuf_cache));
1327  bdbuf_cache.initialised = true;
1328  rtems_bdbuf_restore_preemption (prev_mode);
1329
1330  /*
1331   * For unspecified cache alignments we use the CPU alignment.
1332   */
1333  cache_aligment = 32; /* FIXME rtems_cache_get_data_line_size() */
1334  if (cache_aligment <= 0)
1335    cache_aligment = CPU_ALIGNMENT;
1336
1337  bdbuf_cache.sync_device = BDBUF_INVALID_DEV;
1338
1339  rtems_chain_initialize_empty (&bdbuf_cache.swapout_workers);
1340  rtems_chain_initialize_empty (&bdbuf_cache.lru);
1341  rtems_chain_initialize_empty (&bdbuf_cache.modified);
1342  rtems_chain_initialize_empty (&bdbuf_cache.sync);
1343
1344  /*
1345   * Create the locks for the cache.
1346   */
1347  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'l'),
1348                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1349                               &bdbuf_cache.lock);
1350  if (sc != RTEMS_SUCCESSFUL)
1351    goto error;
1352
1353  rtems_bdbuf_lock_cache ();
1354
1355  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 's'),
1356                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1357                               &bdbuf_cache.sync_lock);
1358  if (sc != RTEMS_SUCCESSFUL)
1359    goto error;
1360
1361  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'a'),
1362                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1363                               &bdbuf_cache.access_waiters.sema);
1364  if (sc != RTEMS_SUCCESSFUL)
1365    goto error;
1366
1367  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 't'),
1368                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1369                               &bdbuf_cache.transfer_waiters.sema);
1370  if (sc != RTEMS_SUCCESSFUL)
1371    goto error;
1372
1373  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'b'),
1374                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1375                               &bdbuf_cache.buffer_waiters.sema);
1376  if (sc != RTEMS_SUCCESSFUL)
1377    goto error;
1378
1379  /*
1380   * Compute the various number of elements in the cache.
1381   */
1382  bdbuf_cache.buffer_min_count =
1383    bdbuf_config.size / bdbuf_config.buffer_min;
1384  bdbuf_cache.max_bds_per_group =
1385    bdbuf_config.buffer_max / bdbuf_config.buffer_min;
1386  bdbuf_cache.group_count =
1387    bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group;
1388
1389  /*
1390   * Allocate the memory for the buffer descriptors.
1391   */
1392  bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer),
1393                            bdbuf_cache.buffer_min_count);
1394  if (!bdbuf_cache.bds)
1395    goto error;
1396
1397  /*
1398   * Allocate the memory for the buffer descriptors.
1399   */
1400  bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group),
1401                               bdbuf_cache.group_count);
1402  if (!bdbuf_cache.groups)
1403    goto error;
1404
1405  /*
1406   * Allocate memory for buffer memory. The buffer memory will be cache
1407   * aligned. It is possible to free the memory allocated by rtems_memalign()
1408   * with free(). Return 0 if allocated.
1409   *
1410   * The memory allocate allows a
1411   */
1412  if (rtems_memalign ((void **) &bdbuf_cache.buffers,
1413                      cache_aligment,
1414                      bdbuf_cache.buffer_min_count * bdbuf_config.buffer_min) != 0)
1415    goto error;
1416
1417  /*
1418   * The cache is empty after opening so we need to add all the buffers to it
1419   * and initialise the groups.
1420   */
1421  for (b = 0, group = bdbuf_cache.groups,
1422         bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers;
1423       b < bdbuf_cache.buffer_min_count;
1424       b++, bd++, buffer += bdbuf_config.buffer_min)
1425  {
1426    bd->dev    = BDBUF_INVALID_DEV;
1427    bd->group  = group;
1428    bd->buffer = buffer;
1429
1430    rtems_chain_append (&bdbuf_cache.lru, &bd->link);
1431
1432    if ((b % bdbuf_cache.max_bds_per_group) ==
1433        (bdbuf_cache.max_bds_per_group - 1))
1434      group++;
1435  }
1436
1437  for (b = 0,
1438         group = bdbuf_cache.groups,
1439         bd = bdbuf_cache.bds;
1440       b < bdbuf_cache.group_count;
1441       b++,
1442         group++,
1443         bd += bdbuf_cache.max_bds_per_group)
1444  {
1445    group->bds_per_group = bdbuf_cache.max_bds_per_group;
1446    group->bdbuf = bd;
1447  }
1448
1449  /*
1450   * Create and start swapout task. This task will create and manage the worker
1451   * threads.
1452   */
1453  bdbuf_cache.swapout_enabled = true;
1454
1455  sc = rtems_task_create (rtems_build_name('B', 'S', 'W', 'P'),
1456                          bdbuf_config.swapout_priority ?
1457                            bdbuf_config.swapout_priority :
1458                            RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT,
1459                          SWAPOUT_TASK_STACK_SIZE,
1460                          RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
1461                          RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
1462                          &bdbuf_cache.swapout);
1463  if (sc != RTEMS_SUCCESSFUL)
1464    goto error;
1465
1466  sc = rtems_task_start (bdbuf_cache.swapout,
1467                         rtems_bdbuf_swapout_task,
1468                         (rtems_task_argument) &bdbuf_cache);
1469  if (sc != RTEMS_SUCCESSFUL)
1470    goto error;
1471
1472  rtems_bdbuf_unlock_cache ();
1473
1474  return RTEMS_SUCCESSFUL;
1475
1476error:
1477
1478  if (bdbuf_cache.swapout != 0)
1479    rtems_task_delete (bdbuf_cache.swapout);
1480
1481  free (bdbuf_cache.buffers);
1482  free (bdbuf_cache.groups);
1483  free (bdbuf_cache.bds);
1484
1485  rtems_semaphore_delete (bdbuf_cache.buffer_waiters.sema);
1486  rtems_semaphore_delete (bdbuf_cache.access_waiters.sema);
1487  rtems_semaphore_delete (bdbuf_cache.transfer_waiters.sema);
1488  rtems_semaphore_delete (bdbuf_cache.sync_lock);
1489
1490  if (bdbuf_cache.lock != 0)
1491  {
1492    rtems_bdbuf_unlock_cache ();
1493    rtems_semaphore_delete (bdbuf_cache.lock);
1494  }
1495
1496  bdbuf_cache.initialised = false;
1497
1498  return RTEMS_UNSATISFIED;
1499}
1500
1501static void
1502rtems_bdbuf_wait_for_event (rtems_event_set event)
1503{
1504  rtems_status_code sc = RTEMS_SUCCESSFUL;
1505  rtems_event_set   out = 0;
1506
1507  sc = rtems_event_receive (event,
1508                            RTEMS_EVENT_ALL | RTEMS_WAIT,
1509                            RTEMS_NO_TIMEOUT,
1510                            &out);
1511
1512  if (sc != RTEMS_SUCCESSFUL || out != event)
1513    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT);
1514}
1515
1516static void
1517rtems_bdbuf_wait_for_access (rtems_bdbuf_buffer *bd)
1518{
1519  while (true)
1520  {
1521    switch (bd->state)
1522    {
1523      case RTEMS_BDBUF_STATE_MODIFIED:
1524        rtems_bdbuf_group_release (bd);
1525        /* Fall through */
1526      case RTEMS_BDBUF_STATE_CACHED:
1527        rtems_chain_extract (&bd->link);
1528        /* Fall through */
1529      case RTEMS_BDBUF_STATE_EMPTY:
1530        return;
1531      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1532      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1533      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1534      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1535        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1536        break;
1537      case RTEMS_BDBUF_STATE_SYNC:
1538      case RTEMS_BDBUF_STATE_TRANSFER:
1539      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1540        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1541        break;
1542      default:
1543        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_7);
1544    }
1545  }
1546}
1547
1548static void
1549rtems_bdbuf_request_sync_for_modified_buffer (rtems_bdbuf_buffer *bd)
1550{
1551  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1552  rtems_chain_extract (&bd->link);
1553  rtems_chain_append (&bdbuf_cache.sync, &bd->link);
1554  rtems_bdbuf_wake_swapper ();
1555}
1556
1557/**
1558 * @brief Waits until the buffer is ready for recycling.
1559 *
1560 * @retval @c true Buffer is valid and may be recycled.
1561 * @retval @c false Buffer is invalid and has to searched again.
1562 */
1563static bool
1564rtems_bdbuf_wait_for_recycle (rtems_bdbuf_buffer *bd)
1565{
1566  while (true)
1567  {
1568    switch (bd->state)
1569    {
1570      case RTEMS_BDBUF_STATE_FREE:
1571        return true;
1572      case RTEMS_BDBUF_STATE_MODIFIED:
1573        rtems_bdbuf_request_sync_for_modified_buffer (bd);
1574        break;
1575      case RTEMS_BDBUF_STATE_CACHED:
1576      case RTEMS_BDBUF_STATE_EMPTY:
1577        if (bd->waiters == 0)
1578          return true;
1579        else
1580        {
1581          /*
1582           * It is essential that we wait here without a special wait count and
1583           * without the group in use.  Otherwise we could trigger a wait ping
1584           * pong with another recycle waiter.  The state of the buffer is
1585           * arbitrary afterwards.
1586           */
1587          rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1588          return false;
1589        }
1590      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1591      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1592      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1593      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1594        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1595        break;
1596      case RTEMS_BDBUF_STATE_SYNC:
1597      case RTEMS_BDBUF_STATE_TRANSFER:
1598      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1599        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1600        break;
1601      default:
1602        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_8);
1603    }
1604  }
1605}
1606
1607static void
1608rtems_bdbuf_wait_for_sync_done (rtems_bdbuf_buffer *bd)
1609{
1610  while (true)
1611  {
1612    switch (bd->state)
1613    {
1614      case RTEMS_BDBUF_STATE_CACHED:
1615      case RTEMS_BDBUF_STATE_EMPTY:
1616      case RTEMS_BDBUF_STATE_MODIFIED:
1617      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1618      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1619      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1620      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1621        return;
1622      case RTEMS_BDBUF_STATE_SYNC:
1623      case RTEMS_BDBUF_STATE_TRANSFER:
1624      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1625        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1626        break;
1627      default:
1628        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_9);
1629    }
1630  }
1631}
1632
1633static void
1634rtems_bdbuf_wait_for_buffer (void)
1635{
1636  if (!rtems_chain_is_empty (&bdbuf_cache.modified))
1637    rtems_bdbuf_wake_swapper ();
1638
1639  rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1640}
1641
1642static void
1643rtems_bdbuf_sync_after_access (rtems_bdbuf_buffer *bd)
1644{
1645  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1646
1647  rtems_chain_append (&bdbuf_cache.sync, &bd->link);
1648
1649  if (bd->waiters)
1650    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1651
1652  rtems_bdbuf_wake_swapper ();
1653  rtems_bdbuf_wait_for_sync_done (bd);
1654
1655  /*
1656   * We may have created a cached or empty buffer which may be recycled.
1657   */
1658  if (bd->waiters == 0
1659        && (bd->state == RTEMS_BDBUF_STATE_CACHED
1660          || bd->state == RTEMS_BDBUF_STATE_EMPTY))
1661  {
1662    if (bd->state == RTEMS_BDBUF_STATE_EMPTY)
1663    {
1664      rtems_bdbuf_remove_from_tree (bd);
1665      rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1666    }
1667    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1668  }
1669}
1670
1671static rtems_bdbuf_buffer *
1672rtems_bdbuf_get_buffer_for_read_ahead (dev_t             dev,
1673                                       rtems_blkdev_bnum block,
1674                                       size_t            bds_per_group)
1675{
1676  rtems_bdbuf_buffer *bd = NULL;
1677
1678  bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dev, block);
1679
1680  if (bd == NULL)
1681  {
1682    bd = rtems_bdbuf_get_buffer_from_lru_list (dev, block, bds_per_group);
1683
1684    if (bd != NULL)
1685      rtems_bdbuf_group_obtain (bd);
1686  }
1687  else
1688    /*
1689     * The buffer is in the cache.  So it is already available or in use, and
1690     * thus no need for a read ahead.
1691     */
1692    bd = NULL;
1693
1694  return bd;
1695}
1696
1697static rtems_bdbuf_buffer *
1698rtems_bdbuf_get_buffer_for_access (dev_t             dev,
1699                                   rtems_blkdev_bnum block,
1700                                   size_t            bds_per_group)
1701{
1702  rtems_bdbuf_buffer *bd = NULL;
1703
1704  do
1705  {
1706    bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dev, block);
1707
1708    if (bd != NULL)
1709    {
1710      if (bd->group->bds_per_group != bds_per_group)
1711      {
1712        if (rtems_bdbuf_wait_for_recycle (bd))
1713        {
1714          rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1715          rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1716          rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1717        }
1718        bd = NULL;
1719      }
1720    }
1721    else
1722    {
1723      bd = rtems_bdbuf_get_buffer_from_lru_list (dev, block, bds_per_group);
1724
1725      if (bd == NULL)
1726        rtems_bdbuf_wait_for_buffer ();
1727    }
1728  }
1729  while (bd == NULL);
1730
1731  rtems_bdbuf_wait_for_access (bd);
1732  rtems_bdbuf_group_obtain (bd);
1733
1734  return bd;
1735}
1736
1737static rtems_status_code
1738rtems_bdbuf_obtain_disk (dev_t               dev,
1739                         rtems_blkdev_bnum   block,
1740                         rtems_disk_device **dd_ptr,
1741                         rtems_blkdev_bnum  *media_block_ptr,
1742                         size_t             *bds_per_group_ptr)
1743{
1744  rtems_disk_device *dd = NULL;
1745
1746  if (!bdbuf_cache.initialised)
1747    return RTEMS_NOT_CONFIGURED;
1748
1749  /*
1750   * Do not hold the cache lock when obtaining the disk table.
1751   */
1752  dd = rtems_disk_obtain (dev);
1753  if (dd == NULL)
1754    return RTEMS_INVALID_ID;
1755
1756  *dd_ptr = dd;
1757
1758  if (media_block_ptr != NULL)
1759  {
1760    /*
1761     * Compute the media block number. Drivers work with media block number not
1762     * the block number a BD may have as this depends on the block size set by
1763     * the user.
1764     */
1765    rtems_blkdev_bnum mb = rtems_bdbuf_media_block (block,
1766                                                    dd->block_size,
1767                                                    dd->media_block_size);
1768    if (mb >= dd->size)
1769    {
1770      rtems_disk_release(dd);
1771      return RTEMS_INVALID_NUMBER;
1772    }
1773
1774    *media_block_ptr = mb + dd->start;
1775  }
1776
1777  if (bds_per_group_ptr != NULL)
1778  {
1779    size_t bds_per_group = rtems_bdbuf_bds_per_group (dd->block_size);
1780
1781    if (bds_per_group == 0)
1782    {
1783      rtems_disk_release (dd);
1784      return RTEMS_INVALID_NUMBER;
1785    }
1786
1787    *bds_per_group_ptr = bds_per_group;
1788  }
1789
1790  return RTEMS_SUCCESSFUL;
1791}
1792
1793static void
1794rtems_bdbuf_release_disk (rtems_disk_device *dd)
1795{
1796  rtems_status_code sc = RTEMS_SUCCESSFUL;
1797
1798  sc = rtems_disk_release (dd);
1799  if (sc != RTEMS_SUCCESSFUL)
1800    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_DISK_REL);
1801}
1802
1803rtems_status_code
1804rtems_bdbuf_get (dev_t                dev,
1805                 rtems_blkdev_bnum    block,
1806                 rtems_bdbuf_buffer **bd_ptr)
1807{
1808  rtems_status_code   sc = RTEMS_SUCCESSFUL;
1809  rtems_disk_device  *dd = NULL;
1810  rtems_bdbuf_buffer *bd = NULL;
1811  rtems_blkdev_bnum   media_block = 0;
1812  size_t              bds_per_group = 0;
1813
1814  sc = rtems_bdbuf_obtain_disk (dev, block, &dd, &media_block, &bds_per_group);
1815  if (sc != RTEMS_SUCCESSFUL)
1816    return sc;
1817
1818  rtems_bdbuf_lock_cache ();
1819
1820  /*
1821   * Print the block index relative to the physical disk.
1822   */
1823  if (rtems_bdbuf_tracer)
1824    printf ("bdbuf:get: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
1825            media_block, block, (unsigned) dev);
1826
1827  bd = rtems_bdbuf_get_buffer_for_access (dev, media_block, bds_per_group);
1828
1829  switch (bd->state)
1830  {
1831    case RTEMS_BDBUF_STATE_CACHED:
1832      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
1833      break;
1834    case RTEMS_BDBUF_STATE_EMPTY:
1835      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_EMPTY);
1836      break;
1837    case RTEMS_BDBUF_STATE_MODIFIED:
1838      /*
1839       * To get a modified buffer could be considered a bug in the caller
1840       * because you should not be getting an already modified buffer but user
1841       * may have modified a byte in a block then decided to seek the start and
1842       * write the whole block and the file system will have no record of this
1843       * so just gets the block to fill.
1844       */
1845      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
1846      break;
1847    default:
1848      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_2);
1849      break;
1850  }
1851
1852  if (rtems_bdbuf_tracer)
1853  {
1854    rtems_bdbuf_show_users ("get", bd);
1855    rtems_bdbuf_show_usage ();
1856  }
1857
1858  rtems_bdbuf_unlock_cache ();
1859
1860  rtems_bdbuf_release_disk (dd);
1861
1862  *bd_ptr = bd;
1863
1864  return RTEMS_SUCCESSFUL;
1865}
1866
1867/**
1868 * Call back handler called by the low level driver when the transfer has
1869 * completed. This function may be invoked from interrupt handler.
1870 *
1871 * @param arg Arbitrary argument specified in block device request
1872 *            structure (in this case - pointer to the appropriate
1873 *            block device request structure).
1874 * @param status I/O completion status
1875 */
1876static void
1877rtems_bdbuf_transfer_done (void* arg, rtems_status_code status)
1878{
1879  rtems_blkdev_request* req = (rtems_blkdev_request*) arg;
1880
1881  req->status = status;
1882
1883  rtems_event_send (req->io_task, RTEMS_BDBUF_TRANSFER_SYNC);
1884}
1885
1886static void
1887rtems_bdbuf_create_read_request (const rtems_disk_device *dd,
1888                                 rtems_blkdev_bnum        media_block,
1889                                 size_t                   bds_per_group,
1890                                 rtems_blkdev_request    *req,
1891                                 rtems_bdbuf_buffer     **bd_ptr)
1892{
1893  rtems_bdbuf_buffer *bd = NULL;
1894  rtems_blkdev_bnum   media_block_end = dd->start + dd->size;
1895  rtems_blkdev_bnum   media_block_count = dd->block_size / dd->media_block_size;
1896  dev_t               dev = dd->dev;
1897  uint32_t            block_size = dd->block_size;
1898  uint32_t            transfer_index = 1;
1899  uint32_t            transfer_count = bdbuf_config.max_read_ahead_blocks + 1;
1900
1901  if (media_block_end - media_block < transfer_count)
1902    transfer_count = media_block_end - media_block;
1903
1904  req->req = RTEMS_BLKDEV_REQ_READ;
1905  req->req_done = rtems_bdbuf_transfer_done;
1906  req->done_arg = req;
1907  req->io_task = rtems_task_self ();
1908  req->status = RTEMS_RESOURCE_IN_USE;
1909  req->bufnum = 0;
1910
1911  bd = rtems_bdbuf_get_buffer_for_access (dev, media_block, bds_per_group);
1912
1913  *bd_ptr = bd;
1914
1915  req->bufs [0].user   = bd;
1916  req->bufs [0].block  = media_block;
1917  req->bufs [0].length = block_size;
1918  req->bufs [0].buffer = bd->buffer;
1919
1920  if (rtems_bdbuf_tracer)
1921    rtems_bdbuf_show_users ("read", bd);
1922
1923  switch (bd->state)
1924  {
1925    case RTEMS_BDBUF_STATE_CACHED:
1926    case RTEMS_BDBUF_STATE_MODIFIED:
1927      return;
1928    case RTEMS_BDBUF_STATE_EMPTY:
1929      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1930      break;
1931    default:
1932      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_1);
1933      break;
1934  }
1935
1936  while (transfer_index < transfer_count)
1937  {
1938    media_block += media_block_count;
1939
1940    bd = rtems_bdbuf_get_buffer_for_read_ahead (dev, media_block,
1941                                                bds_per_group);
1942
1943    if (bd == NULL)
1944      break;
1945
1946    rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1947
1948    req->bufs [transfer_index].user   = bd;
1949    req->bufs [transfer_index].block  = media_block;
1950    req->bufs [transfer_index].length = block_size;
1951    req->bufs [transfer_index].buffer = bd->buffer;
1952
1953    if (rtems_bdbuf_tracer)
1954      rtems_bdbuf_show_users ("read-ahead", bd);
1955
1956    ++transfer_index;
1957  }
1958
1959  req->bufnum = transfer_index;
1960}
1961
1962static rtems_status_code
1963rtems_bdbuf_execute_transfer_request (const rtems_disk_device *dd,
1964                                      rtems_blkdev_request    *req,
1965                                      bool                     cache_locked)
1966{
1967  rtems_status_code sc = RTEMS_SUCCESSFUL;
1968  int result = 0;
1969  uint32_t transfer_index = 0;
1970  bool wake_transfer_waiters = false;
1971  bool wake_buffer_waiters = false;
1972
1973  if (cache_locked)
1974    rtems_bdbuf_unlock_cache ();
1975
1976  result = dd->ioctl (dd->phys_dev, RTEMS_BLKIO_REQUEST, req);
1977
1978  if (result == 0)
1979  {
1980    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
1981    sc = req->status;
1982  }
1983  else
1984    sc = RTEMS_IO_ERROR;
1985
1986  rtems_bdbuf_lock_cache ();
1987
1988  for (transfer_index = 0; transfer_index < req->bufnum; ++transfer_index)
1989  {
1990    rtems_bdbuf_buffer *bd = req->bufs [transfer_index].user;
1991    bool waiters = bd->waiters;
1992
1993    if (waiters)
1994      wake_transfer_waiters = true;
1995    else
1996      wake_buffer_waiters = true;
1997
1998    rtems_bdbuf_group_release (bd);
1999
2000    if (sc == RTEMS_SUCCESSFUL && bd->state == RTEMS_BDBUF_STATE_TRANSFER)
2001      rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
2002    else
2003      rtems_bdbuf_discard_buffer (bd);
2004
2005    if (rtems_bdbuf_tracer)
2006      rtems_bdbuf_show_users ("transfer", bd);
2007  }
2008
2009  if (wake_transfer_waiters)
2010    rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2011
2012  if (wake_buffer_waiters)
2013    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2014
2015  if (!cache_locked)
2016    rtems_bdbuf_unlock_cache ();
2017
2018  if (sc == RTEMS_SUCCESSFUL || sc == RTEMS_UNSATISFIED)
2019    return sc;
2020  else
2021    return RTEMS_IO_ERROR;
2022}
2023
2024rtems_status_code
2025rtems_bdbuf_read (dev_t                dev,
2026                  rtems_blkdev_bnum    block,
2027                  rtems_bdbuf_buffer **bd_ptr)
2028{
2029  rtems_status_code     sc = RTEMS_SUCCESSFUL;
2030  rtems_disk_device    *dd = NULL;
2031  rtems_blkdev_request *req = NULL;
2032  rtems_bdbuf_buffer   *bd = NULL;
2033  rtems_blkdev_bnum     media_block = 0;
2034  size_t                bds_per_group = 0;
2035
2036  sc = rtems_bdbuf_obtain_disk (dev, block, &dd, &media_block, &bds_per_group);
2037  if (sc != RTEMS_SUCCESSFUL)
2038    return sc;
2039
2040  /*
2041   * TODO: This type of request structure is wrong and should be removed.
2042   */
2043#define bdbuf_alloc(size) __builtin_alloca (size)
2044
2045  req = bdbuf_alloc (sizeof (rtems_blkdev_request) +
2046                     sizeof (rtems_blkdev_sg_buffer) *
2047                      (bdbuf_config.max_read_ahead_blocks + 1));
2048
2049  if (rtems_bdbuf_tracer)
2050    printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
2051            media_block + dd->start, block, (unsigned) dev);
2052
2053  rtems_bdbuf_lock_cache ();
2054  rtems_bdbuf_create_read_request (dd, media_block, bds_per_group, req, &bd);
2055
2056  if (req->bufnum > 0)
2057  {
2058    sc = rtems_bdbuf_execute_transfer_request (dd, req, true);
2059    if (sc == RTEMS_SUCCESSFUL)
2060    {
2061      rtems_chain_extract (&bd->link);
2062      rtems_bdbuf_group_obtain (bd);
2063    }
2064  }
2065
2066  if (sc == RTEMS_SUCCESSFUL)
2067  {
2068    switch (bd->state)
2069    {
2070      case RTEMS_BDBUF_STATE_CACHED:
2071        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2072        break;
2073      case RTEMS_BDBUF_STATE_MODIFIED:
2074        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
2075        break;
2076      default:
2077        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_4);
2078        break;
2079    }
2080
2081    if (rtems_bdbuf_tracer)
2082    {
2083      rtems_bdbuf_show_users ("read", bd);
2084      rtems_bdbuf_show_usage ();
2085    }
2086
2087    *bd_ptr = bd;
2088  }
2089  else
2090    *bd_ptr = NULL;
2091
2092  rtems_bdbuf_unlock_cache ();
2093  rtems_bdbuf_release_disk (dd);
2094
2095  return sc;
2096}
2097
2098static rtems_status_code
2099rtems_bdbuf_check_bd_and_lock_cache (rtems_bdbuf_buffer *bd, const char *kind)
2100{
2101  if (!bdbuf_cache.initialised)
2102    return RTEMS_NOT_CONFIGURED;
2103  if (bd == NULL)
2104    return RTEMS_INVALID_ADDRESS;
2105  if (rtems_bdbuf_tracer)
2106  {
2107    printf ("bdbuf:%s: %" PRIu32 "\n", kind, bd->block);
2108    rtems_bdbuf_show_users (kind, bd);
2109  }
2110  rtems_bdbuf_lock_cache();
2111
2112  return RTEMS_SUCCESSFUL;
2113}
2114
2115rtems_status_code
2116rtems_bdbuf_release (rtems_bdbuf_buffer *bd)
2117{
2118  rtems_status_code sc = RTEMS_SUCCESSFUL;
2119
2120  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release");
2121  if (sc != RTEMS_SUCCESSFUL)
2122    return sc;
2123
2124  switch (bd->state)
2125  {
2126    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2127      rtems_bdbuf_add_to_lru_list_after_access (bd);
2128      break;
2129    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2130    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2131      rtems_bdbuf_discard_buffer_after_access (bd);
2132      break;
2133    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2134      rtems_bdbuf_add_to_modified_list_after_access (bd);
2135      break;
2136    default:
2137      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_0);
2138      break;
2139  }
2140
2141  if (rtems_bdbuf_tracer)
2142    rtems_bdbuf_show_usage ();
2143
2144  rtems_bdbuf_unlock_cache ();
2145
2146  return RTEMS_SUCCESSFUL;
2147}
2148
2149rtems_status_code
2150rtems_bdbuf_release_modified (rtems_bdbuf_buffer *bd)
2151{
2152  rtems_status_code sc = RTEMS_SUCCESSFUL;
2153
2154  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release modified");
2155  if (sc != RTEMS_SUCCESSFUL)
2156    return sc;
2157
2158  switch (bd->state)
2159  {
2160    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2161    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2162    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2163      rtems_bdbuf_add_to_modified_list_after_access (bd);
2164      break;
2165    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2166      rtems_bdbuf_discard_buffer_after_access (bd);
2167      break;
2168    default:
2169      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_6);
2170      break;
2171  }
2172
2173  if (rtems_bdbuf_tracer)
2174    rtems_bdbuf_show_usage ();
2175
2176  rtems_bdbuf_unlock_cache ();
2177
2178  return RTEMS_SUCCESSFUL;
2179}
2180
2181rtems_status_code
2182rtems_bdbuf_sync (rtems_bdbuf_buffer *bd)
2183{
2184  rtems_status_code sc = RTEMS_SUCCESSFUL;
2185
2186  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "sync");
2187  if (sc != RTEMS_SUCCESSFUL)
2188    return sc;
2189
2190  switch (bd->state)
2191  {
2192    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2193    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2194    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2195      rtems_bdbuf_sync_after_access (bd);
2196      break;
2197    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2198      rtems_bdbuf_discard_buffer_after_access (bd);
2199      break;
2200    default:
2201      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_5);
2202      break;
2203  }
2204
2205  if (rtems_bdbuf_tracer)
2206    rtems_bdbuf_show_usage ();
2207
2208  rtems_bdbuf_unlock_cache ();
2209
2210  return RTEMS_SUCCESSFUL;
2211}
2212
2213rtems_status_code
2214rtems_bdbuf_syncdev (dev_t dev)
2215{
2216  rtems_status_code  sc = RTEMS_SUCCESSFUL;
2217  rtems_disk_device *dd = NULL;
2218
2219  if (rtems_bdbuf_tracer)
2220    printf ("bdbuf:syncdev: %08x\n", (unsigned) dev);
2221
2222  sc = rtems_bdbuf_obtain_disk (dev, 0, &dd, NULL, NULL);
2223  if (sc != RTEMS_SUCCESSFUL)
2224    return sc;
2225
2226  /*
2227   * Take the sync lock before locking the cache. Once we have the sync lock we
2228   * can lock the cache. If another thread has the sync lock it will cause this
2229   * thread to block until it owns the sync lock then it can own the cache. The
2230   * sync lock can only be obtained with the cache unlocked.
2231   */
2232  rtems_bdbuf_lock_sync ();
2233  rtems_bdbuf_lock_cache ();
2234
2235  /*
2236   * Set the cache to have a sync active for a specific device and let the swap
2237   * out task know the id of the requester to wake when done.
2238   *
2239   * The swap out task will negate the sync active flag when no more buffers
2240   * for the device are held on the "modified for sync" queues.
2241   */
2242  bdbuf_cache.sync_active    = true;
2243  bdbuf_cache.sync_requester = rtems_task_self ();
2244  bdbuf_cache.sync_device    = dev;
2245
2246  rtems_bdbuf_wake_swapper ();
2247  rtems_bdbuf_unlock_cache ();
2248  rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
2249  rtems_bdbuf_unlock_sync ();
2250  rtems_bdbuf_release_disk (dd);
2251
2252  return RTEMS_SUCCESSFUL;
2253}
2254
2255static int
2256rtems_bdbuf_null_disk_ioctl (rtems_disk_device *dd, uint32_t req, void *arg)
2257{
2258  return -1;
2259}
2260
2261/**
2262 * Swapout transfer to the driver. The driver will break this I/O into groups
2263 * of consecutive write requests is multiple consecutive buffers are required
2264 * by the driver. The cache is not locked.
2265 *
2266 * @param transfer The transfer transaction.
2267 */
2268static void
2269rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer)
2270{
2271  rtems_chain_node *node;
2272  static rtems_disk_device null_disk = {
2273    .phys_dev = &null_disk,
2274    .capabilities = 0,
2275    .ioctl = rtems_bdbuf_null_disk_ioctl
2276  };
2277
2278  if (rtems_bdbuf_tracer)
2279    printf ("bdbuf:swapout transfer: %08x\n", (unsigned) transfer->dev);
2280
2281  /*
2282   * If there are buffers to transfer to the media transfer them.
2283   */
2284  if (!rtems_chain_is_empty (&transfer->bds))
2285  {
2286    /*
2287     * The last block number used when the driver only supports
2288     * continuous blocks in a single request.
2289     */
2290    uint32_t last_block = 0;
2291
2292    /*
2293     * Number of buffers per bd. This is used to detect the next
2294     * block.
2295     */
2296    uint32_t bufs_per_bd = 0;
2297
2298    /*
2299     * Obtain the disk device. The cache's mutex has been released to avoid a
2300     * dead lock.
2301     */
2302    rtems_disk_device *dd = rtems_disk_obtain (transfer->dev);
2303
2304    if (dd == NULL)
2305      dd = &null_disk;
2306
2307    bufs_per_bd = dd->block_size / bdbuf_config.buffer_min;
2308
2309    /*
2310     * Take as many buffers as configured and pass to the driver. Note, the
2311     * API to the drivers has an array of buffers and if a chain was passed
2312     * we could have just passed the list. If the driver API is updated it
2313     * should be possible to make this change with little effect in this
2314     * code. The array that is passed is broken in design and should be
2315     * removed. Merging members of a struct into the first member is
2316     * trouble waiting to happen.
2317     */
2318    transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2319    transfer->write_req->bufnum = 0;
2320
2321    while ((node = rtems_chain_get(&transfer->bds)) != NULL)
2322    {
2323      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2324      bool                write = false;
2325
2326      /*
2327       * If the device only accepts sequential buffers and this is not the
2328       * first buffer (the first is always sequential, and the buffer is not
2329       * sequential then put the buffer back on the transfer chain and write
2330       * the committed buffers.
2331       */
2332
2333      if (rtems_bdbuf_tracer)
2334        printf ("bdbuf:swapout write: bd:%" PRIu32 ", bufnum:%" PRIu32 " mode:%s\n",
2335                bd->block, transfer->write_req->bufnum,
2336                dd->phys_dev->capabilities &
2337                RTEMS_BLKDEV_CAP_MULTISECTOR_CONT ? "MULIT" : "SCAT");
2338
2339      if ((dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) &&
2340          transfer->write_req->bufnum &&
2341          (bd->block != (last_block + bufs_per_bd)))
2342      {
2343        rtems_chain_prepend (&transfer->bds, &bd->link);
2344        write = true;
2345      }
2346      else
2347      {
2348        rtems_blkdev_sg_buffer* buf;
2349        buf = &transfer->write_req->bufs[transfer->write_req->bufnum];
2350        transfer->write_req->bufnum++;
2351        buf->user   = bd;
2352        buf->block  = bd->block;
2353        buf->length = dd->block_size;
2354        buf->buffer = bd->buffer;
2355        last_block  = bd->block;
2356      }
2357
2358      /*
2359       * Perform the transfer if there are no more buffers, or the transfer
2360       * size has reached the configured max. value.
2361       */
2362
2363      if (rtems_chain_is_empty (&transfer->bds) ||
2364          (transfer->write_req->bufnum >= bdbuf_config.max_write_blocks))
2365        write = true;
2366
2367      if (write)
2368      {
2369        rtems_bdbuf_execute_transfer_request (dd, transfer->write_req, false);
2370
2371        transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2372        transfer->write_req->bufnum = 0;
2373      }
2374    }
2375
2376    if (dd != &null_disk)
2377    {
2378      /*
2379       * If sync'ing and the deivce is capability of handling a sync IO control
2380       * call perform the call.
2381       */
2382      if (transfer->syncing &&
2383          (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_SYNC))
2384      {
2385        /* int result = */ dd->ioctl (dd->phys_dev, RTEMS_BLKDEV_REQ_SYNC, NULL);
2386        /* How should the error be handled ? */
2387      }
2388     
2389      rtems_disk_release (dd);
2390    }
2391  }
2392}
2393
2394/**
2395 * Process the modified list of buffers. There is a sync or modified list that
2396 * needs to be handled so we have a common function to do the work.
2397 *
2398 * @param dev The device to handle. If BDBUF_INVALID_DEV no device is selected
2399 * so select the device of the first buffer to be written to disk.
2400 * @param chain The modified chain to process.
2401 * @param transfer The chain to append buffers to be written too.
2402 * @param sync_active If true this is a sync operation so expire all timers.
2403 * @param update_timers If true update the timers.
2404 * @param timer_delta It update_timers is true update the timers by this
2405 *                    amount.
2406 */
2407static void
2408rtems_bdbuf_swapout_modified_processing (dev_t*               dev,
2409                                         rtems_chain_control* chain,
2410                                         rtems_chain_control* transfer,
2411                                         bool                 sync_active,
2412                                         bool                 update_timers,
2413                                         uint32_t             timer_delta)
2414{
2415  if (!rtems_chain_is_empty (chain))
2416  {
2417    rtems_chain_node* node = rtems_chain_head (chain);
2418    bool              sync_all;
2419   
2420    node = node->next;
2421
2422    /*
2423     * A sync active with no valid dev means sync all.
2424     */
2425    if (sync_active && (*dev == BDBUF_INVALID_DEV))
2426      sync_all = true;
2427    else
2428      sync_all = false;
2429   
2430    while (!rtems_chain_is_tail (chain, node))
2431    {
2432      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2433
2434      /*
2435       * Check if the buffer's hold timer has reached 0. If a sync is active
2436       * or someone waits for a buffer written force all the timers to 0.
2437       *
2438       * @note Lots of sync requests will skew this timer. It should be based
2439       *       on TOD to be accurate. Does it matter ?
2440       */
2441      if (sync_all || (sync_active && (*dev == bd->dev))
2442          || rtems_bdbuf_has_buffer_waiters ())
2443        bd->hold_timer = 0;
2444
2445      if (bd->hold_timer)
2446      {
2447        if (update_timers)
2448        {
2449          if (bd->hold_timer > timer_delta)
2450            bd->hold_timer -= timer_delta;
2451          else
2452            bd->hold_timer = 0;
2453        }
2454
2455        if (bd->hold_timer)
2456        {
2457          node = node->next;
2458          continue;
2459        }
2460      }
2461
2462      /*
2463       * This assumes we can set dev_t to BDBUF_INVALID_DEV which is just an
2464       * assumption. Cannot use the transfer list being empty the sync dev
2465       * calls sets the dev to use.
2466       */
2467      if (*dev == BDBUF_INVALID_DEV)
2468        *dev = bd->dev;
2469
2470      if (bd->dev == *dev)
2471      {
2472        rtems_chain_node* next_node = node->next;
2473        rtems_chain_node* tnode = rtems_chain_tail (transfer);
2474
2475        /*
2476         * The blocks on the transfer list are sorted in block order. This
2477         * means multi-block transfers for drivers that require consecutive
2478         * blocks perform better with sorted blocks and for real disks it may
2479         * help lower head movement.
2480         */
2481
2482        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2483
2484        rtems_chain_extract (node);
2485
2486        tnode = tnode->previous;
2487
2488        while (node && !rtems_chain_is_head (transfer, tnode))
2489        {
2490          rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
2491
2492          if (bd->block > tbd->block)
2493          {
2494            rtems_chain_insert (tnode, node);
2495            node = NULL;
2496          }
2497          else
2498            tnode = tnode->previous;
2499        }
2500
2501        if (node)
2502          rtems_chain_prepend (transfer, node);
2503
2504        node = next_node;
2505      }
2506      else
2507      {
2508        node = node->next;
2509      }
2510    }
2511  }
2512}
2513
2514/**
2515 * Process the cache's modified buffers. Check the sync list first then the
2516 * modified list extracting the buffers suitable to be written to disk. We have
2517 * a device at a time. The task level loop will repeat this operation while
2518 * there are buffers to be written. If the transfer fails place the buffers
2519 * back on the modified list and try again later. The cache is unlocked while
2520 * the buffers are being written to disk.
2521 *
2522 * @param timer_delta It update_timers is true update the timers by this
2523 *                    amount.
2524 * @param update_timers If true update the timers.
2525 * @param transfer The transfer transaction data.
2526 *
2527 * @retval true Buffers where written to disk so scan again.
2528 * @retval false No buffers where written to disk.
2529 */
2530static bool
2531rtems_bdbuf_swapout_processing (unsigned long                 timer_delta,
2532                                bool                          update_timers,
2533                                rtems_bdbuf_swapout_transfer* transfer)
2534{
2535  rtems_bdbuf_swapout_worker* worker;
2536  bool                        transfered_buffers = false;
2537
2538  rtems_bdbuf_lock_cache ();
2539
2540  /*
2541   * If a sync is active do not use a worker because the current code does not
2542   * cleaning up after. We need to know the buffers have been written when
2543   * syncing to release sync lock and currently worker threads do not return to
2544   * here. We do not know the worker is the last in a sequence of sync writes
2545   * until after we have it running so we do not know to tell it to release the
2546   * lock. The simplest solution is to get the main swap out task perform all
2547   * sync operations.
2548   */
2549  if (bdbuf_cache.sync_active)
2550    worker = NULL;
2551  else
2552  {
2553    worker = (rtems_bdbuf_swapout_worker*)
2554      rtems_chain_get (&bdbuf_cache.swapout_workers);
2555    if (worker)
2556      transfer = &worker->transfer;
2557  }
2558
2559  rtems_chain_initialize_empty (&transfer->bds);
2560  transfer->dev = BDBUF_INVALID_DEV;
2561  transfer->syncing = bdbuf_cache.sync_active;
2562 
2563  /*
2564   * When the sync is for a device limit the sync to that device. If the sync
2565   * is for a buffer handle process the devices in the order on the sync
2566   * list. This means the dev is BDBUF_INVALID_DEV.
2567   */
2568  if (bdbuf_cache.sync_active)
2569    transfer->dev = bdbuf_cache.sync_device;
2570   
2571  /*
2572   * If we have any buffers in the sync queue move them to the modified
2573   * list. The first sync buffer will select the device we use.
2574   */
2575  rtems_bdbuf_swapout_modified_processing (&transfer->dev,
2576                                           &bdbuf_cache.sync,
2577                                           &transfer->bds,
2578                                           true, false,
2579                                           timer_delta);
2580
2581  /*
2582   * Process the cache's modified list.
2583   */
2584  rtems_bdbuf_swapout_modified_processing (&transfer->dev,
2585                                           &bdbuf_cache.modified,
2586                                           &transfer->bds,
2587                                           bdbuf_cache.sync_active,
2588                                           update_timers,
2589                                           timer_delta);
2590
2591  /*
2592   * We have all the buffers that have been modified for this device so the
2593   * cache can be unlocked because the state of each buffer has been set to
2594   * TRANSFER.
2595   */
2596  rtems_bdbuf_unlock_cache ();
2597
2598  /*
2599   * If there are buffers to transfer to the media transfer them.
2600   */
2601  if (!rtems_chain_is_empty (&transfer->bds))
2602  {
2603    if (worker)
2604    {
2605      rtems_status_code sc = rtems_event_send (worker->id,
2606                                               RTEMS_BDBUF_SWAPOUT_SYNC);
2607      if (sc != RTEMS_SUCCESSFUL)
2608        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
2609    }
2610    else
2611    {
2612      rtems_bdbuf_swapout_write (transfer);
2613    }
2614
2615    transfered_buffers = true;
2616  }
2617
2618  if (bdbuf_cache.sync_active && !transfered_buffers)
2619  {
2620    rtems_id sync_requester;
2621    rtems_bdbuf_lock_cache ();
2622    sync_requester = bdbuf_cache.sync_requester;
2623    bdbuf_cache.sync_active = false;
2624    bdbuf_cache.sync_requester = 0;
2625    rtems_bdbuf_unlock_cache ();
2626    if (sync_requester)
2627      rtems_event_send (sync_requester, RTEMS_BDBUF_TRANSFER_SYNC);
2628  }
2629
2630  return transfered_buffers;
2631}
2632
2633/**
2634 * Allocate the write request and initialise it for good measure.
2635 *
2636 * @return rtems_blkdev_request* The write reference memory.
2637 */
2638static rtems_blkdev_request*
2639rtems_bdbuf_swapout_writereq_alloc (void)
2640{
2641  /*
2642   * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
2643   * I am disappointment at finding code like this in RTEMS. The request should
2644   * have been a rtems_chain_control. Simple, fast and less storage as the node
2645   * is already part of the buffer structure.
2646   */
2647  rtems_blkdev_request* write_req =
2648    malloc (sizeof (rtems_blkdev_request) +
2649            (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer)));
2650
2651  if (!write_req)
2652    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2653
2654  write_req->req = RTEMS_BLKDEV_REQ_WRITE;
2655  write_req->req_done = rtems_bdbuf_transfer_done;
2656  write_req->done_arg = write_req;
2657  write_req->io_task = rtems_task_self ();
2658
2659  return write_req;
2660}
2661
2662/**
2663 * The swapout worker thread body.
2664 *
2665 * @param arg A pointer to the worker thread's private data.
2666 * @return rtems_task Not used.
2667 */
2668static rtems_task
2669rtems_bdbuf_swapout_worker_task (rtems_task_argument arg)
2670{
2671  rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg;
2672
2673  while (worker->enabled)
2674  {
2675    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_SWAPOUT_SYNC);
2676
2677    rtems_bdbuf_swapout_write (&worker->transfer);
2678
2679    rtems_bdbuf_lock_cache ();
2680
2681    rtems_chain_initialize_empty (&worker->transfer.bds);
2682    worker->transfer.dev = BDBUF_INVALID_DEV;
2683
2684    rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link);
2685
2686    rtems_bdbuf_unlock_cache ();
2687  }
2688
2689  free (worker->transfer.write_req);
2690  free (worker);
2691
2692  rtems_task_delete (RTEMS_SELF);
2693}
2694
2695/**
2696 * Open the swapout worker threads.
2697 */
2698static void
2699rtems_bdbuf_swapout_workers_open (void)
2700{
2701  rtems_status_code sc;
2702  size_t            w;
2703
2704  rtems_bdbuf_lock_cache ();
2705
2706  for (w = 0; w < bdbuf_config.swapout_workers; w++)
2707  {
2708    rtems_bdbuf_swapout_worker* worker;
2709
2710    worker = malloc (sizeof (rtems_bdbuf_swapout_worker));
2711    if (!worker)
2712      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2713
2714    rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link);
2715    worker->enabled = true;
2716    worker->transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2717
2718    rtems_chain_initialize_empty (&worker->transfer.bds);
2719    worker->transfer.dev = BDBUF_INVALID_DEV;
2720
2721    sc = rtems_task_create (rtems_build_name('B', 'D', 'o', 'a' + w),
2722                            (bdbuf_config.swapout_priority ?
2723                             bdbuf_config.swapout_priority :
2724                             RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT),
2725                            SWAPOUT_TASK_STACK_SIZE,
2726                            RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
2727                            RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
2728                            &worker->id);
2729    if (sc != RTEMS_SUCCESSFUL)
2730      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE);
2731
2732    sc = rtems_task_start (worker->id,
2733                           rtems_bdbuf_swapout_worker_task,
2734                           (rtems_task_argument) worker);
2735    if (sc != RTEMS_SUCCESSFUL)
2736      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START);
2737  }
2738
2739  rtems_bdbuf_unlock_cache ();
2740}
2741
2742/**
2743 * Close the swapout worker threads.
2744 */
2745static void
2746rtems_bdbuf_swapout_workers_close (void)
2747{
2748  rtems_chain_node* node;
2749
2750  rtems_bdbuf_lock_cache ();
2751
2752  node = rtems_chain_first (&bdbuf_cache.swapout_workers);
2753  while (!rtems_chain_is_tail (&bdbuf_cache.swapout_workers, node))
2754  {
2755    rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node;
2756    worker->enabled = false;
2757    rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC);
2758    node = rtems_chain_next (node);
2759  }
2760
2761  rtems_bdbuf_unlock_cache ();
2762}
2763
2764/**
2765 * Body of task which takes care on flushing modified buffers to the disk.
2766 *
2767 * @param arg A pointer to the global cache data. Use the global variable and
2768 *            not this.
2769 * @return rtems_task Not used.
2770 */
2771static rtems_task
2772rtems_bdbuf_swapout_task (rtems_task_argument arg)
2773{
2774  rtems_bdbuf_swapout_transfer transfer;
2775  uint32_t                     period_in_ticks;
2776  const uint32_t               period_in_msecs = bdbuf_config.swapout_period;;
2777  uint32_t                     timer_delta;
2778
2779  transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2780  rtems_chain_initialize_empty (&transfer.bds);
2781  transfer.dev = BDBUF_INVALID_DEV;
2782  transfer.syncing = false;
2783
2784  /*
2785   * Localise the period.
2786   */
2787  period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
2788
2789  /*
2790   * This is temporary. Needs to be changed to use the real time clock.
2791   */
2792  timer_delta = period_in_msecs;
2793
2794  /*
2795   * Create the worker threads.
2796   */
2797  rtems_bdbuf_swapout_workers_open ();
2798
2799  while (bdbuf_cache.swapout_enabled)
2800  {
2801    rtems_event_set   out;
2802    rtems_status_code sc;
2803
2804    /*
2805     * Only update the timers once in the processing cycle.
2806     */
2807    bool update_timers = true;
2808
2809    /*
2810     * If we write buffers to any disk perform a check again. We only write a
2811     * single device at a time and the cache may have more than one device's
2812     * buffers modified waiting to be written.
2813     */
2814    bool transfered_buffers;
2815
2816    do
2817    {
2818      transfered_buffers = false;
2819
2820      /*
2821       * Extact all the buffers we find for a specific device. The device is
2822       * the first one we find on a modified list. Process the sync queue of
2823       * buffers first.
2824       */
2825      if (rtems_bdbuf_swapout_processing (timer_delta,
2826                                          update_timers,
2827                                          &transfer))
2828      {
2829        transfered_buffers = true;
2830      }
2831
2832      /*
2833       * Only update the timers once.
2834       */
2835      update_timers = false;
2836    }
2837    while (transfered_buffers);
2838
2839    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2840                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2841                              period_in_ticks,
2842                              &out);
2843
2844    if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
2845      rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2846  }
2847
2848  rtems_bdbuf_swapout_workers_close ();
2849
2850  free (transfer.write_req);
2851
2852  rtems_task_delete (RTEMS_SELF);
2853}
2854
2855static void
2856rtems_bdbuf_purge_list (rtems_chain_control *purge_list)
2857{
2858  bool wake_buffer_waiters = false;
2859  rtems_chain_node *node = NULL;
2860
2861  while ((node = rtems_chain_get (purge_list)) != NULL)
2862  {
2863    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
2864
2865    if (bd->waiters == 0)
2866      wake_buffer_waiters = true;
2867
2868    rtems_bdbuf_discard_buffer (bd);
2869  }
2870
2871  if (wake_buffer_waiters)
2872    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2873}
2874
2875typedef bool (*rtems_bdbuf_purge_compare)(dev_t a, dev_t b);
2876
2877static void
2878rtems_bdbuf_gather_for_purge (rtems_chain_control *purge_list,
2879                              rtems_bdbuf_purge_compare compare,
2880                              dev_t dev)
2881{
2882  rtems_bdbuf_buffer *stack [RTEMS_BDBUF_AVL_MAX_HEIGHT];
2883  rtems_bdbuf_buffer **prev = stack;
2884  rtems_bdbuf_buffer *cur = bdbuf_cache.tree;
2885
2886  *prev = NULL;
2887
2888  while (cur != NULL)
2889  {
2890    if ((*compare) (cur->dev, dev))
2891    {
2892      switch (cur->state)
2893      {
2894        case RTEMS_BDBUF_STATE_FREE:
2895        case RTEMS_BDBUF_STATE_EMPTY:
2896        case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2897        case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
2898          break;
2899        case RTEMS_BDBUF_STATE_SYNC:
2900          rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2901          /* Fall through */
2902        case RTEMS_BDBUF_STATE_MODIFIED:
2903          rtems_bdbuf_group_release (cur);
2904          /* Fall through */
2905        case RTEMS_BDBUF_STATE_CACHED:
2906          rtems_chain_extract (&cur->link);
2907          rtems_chain_append (purge_list, &cur->link);
2908          break;
2909        case RTEMS_BDBUF_STATE_TRANSFER:
2910          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_TRANSFER_PURGED);
2911          break;
2912        case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2913        case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2914        case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2915          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_ACCESS_PURGED);
2916          break;
2917        default:
2918          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_STATE_11);
2919      }
2920    }
2921
2922    if (cur->avl.left != NULL)
2923    {
2924      /* Left */
2925      ++prev;
2926      *prev = cur;
2927      cur = cur->avl.left;
2928    }
2929    else if (cur->avl.right != NULL)
2930    {
2931      /* Right */
2932      ++prev;
2933      *prev = cur;
2934      cur = cur->avl.right;
2935    }
2936    else
2937    {
2938      while (*prev != NULL && cur == (*prev)->avl.right)
2939      {
2940        /* Up */
2941        cur = *prev;
2942        --prev;
2943      }
2944      if (*prev != NULL)
2945        /* Right */
2946        cur = (*prev)->avl.right;
2947      else
2948        /* Finished */
2949        cur = NULL;
2950    }
2951  }
2952}
2953
2954static void
2955rtems_bdbuf_purge (rtems_bdbuf_purge_compare compare, dev_t dev)
2956{
2957  rtems_chain_control purge_list;
2958
2959  rtems_chain_initialize_empty (&purge_list);
2960  rtems_bdbuf_lock_cache ();
2961  rtems_bdbuf_gather_for_purge (&purge_list, compare, dev);
2962  rtems_bdbuf_purge_list (&purge_list);
2963  rtems_bdbuf_unlock_cache ();
2964}
2965
2966static bool
2967rtems_bdbuf_purge_compare_dev (dev_t a, dev_t b)
2968{
2969  return a == b;
2970}
2971
2972void
2973rtems_bdbuf_purge_dev (dev_t dev)
2974{
2975  rtems_bdbuf_purge (rtems_bdbuf_purge_compare_dev, dev);
2976}
2977
2978static bool
2979rtems_bdbuf_purge_compare_major (dev_t a, dev_t b)
2980{
2981  return rtems_filesystem_dev_major_t (a) == rtems_filesystem_dev_major_t (b);
2982}
2983
2984void
2985rtems_bdbuf_purge_major (rtems_device_major_number major)
2986{
2987  dev_t dev = rtems_filesystem_make_dev_t (major, 0);
2988
2989  rtems_bdbuf_purge (rtems_bdbuf_purge_compare_major, dev);
2990}
Note: See TracBrowser for help on using the repository browser.