source: rtems/cpukit/libblock/src/bdbuf.c @ 796967c

4.115
Last change on this file since 796967c was 796967c, checked in by Sebastian Huber <sebastian.huber@…>, on 02/28/12 at 16:19:49

libblock: Change bdbuf API

The functions

o rtems_bdbuf_get(),
o rtems_bdbuf_read(),
o rtems_bdbuf_syncdev(), and
o rtems_bdbuf_purge_dev(),

use now the disk device instead of the device identifier. This makes
bdbuf independent of rtems_disk_obtain() and rtems_disk_release(). It
is the responsiblity of the file system to obtain the disk device. This
also reduces the overhead to get a buffer.

The key for the AVL tree uses now the disk device instead of the device
identifier. The pointer is interpreted as an unsigned integer. This
reduces the memory overhead and makes the comparison operation a bit
faster.

Removed function rtems_bdbuf_purge_major(). This function was too
destructive and could have unpredictable side effects.

  • Property mode set to 100644
File size: 79.2 KB
Line 
1/**
2 * @file
3 *
4 * @ingroup rtems_bdbuf
5 *
6 * Block device buffer management.
7 */
8
9/*
10 * Disk I/O buffering
11 * Buffer managment
12 *
13 * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
14 * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
15 *         Victor V. Vengerov <vvv@oktet.ru>
16 *         Alexander Kukuta <kam@oktet.ru>
17 *
18 * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org>
19 *    Rewritten to remove score mutex access. Fixes many performance
20 *    issues.
21 *
22 * Copyright (c) 2009-2012 embedded brains GmbH.
23 *
24 * @(#) bdbuf.c,v 1.14 2004/04/17 08:15:17 ralf Exp
25 */
26
27/**
28 * Set to 1 to enable debug tracing.
29 */
30#define RTEMS_BDBUF_TRACE 0
31
32#if HAVE_CONFIG_H
33#include "config.h"
34#endif
35#include <limits.h>
36#include <errno.h>
37#include <stdio.h>
38#include <string.h>
39#include <inttypes.h>
40
41#include <rtems.h>
42#include <rtems/error.h>
43#include <rtems/malloc.h>
44
45#include "rtems/bdbuf.h"
46
47#define BDBUF_INVALID_DEV NULL
48
49/*
50 * Simpler label for this file.
51 */
52#define bdbuf_config rtems_bdbuf_configuration
53
54/**
55 * A swapout transfer transaction data. This data is passed to a worked thread
56 * to handle the write phase of the transfer.
57 */
58typedef struct rtems_bdbuf_swapout_transfer
59{
60  rtems_chain_control   bds;         /**< The transfer list of BDs. */
61  const rtems_disk_device *dd;       /**< The device the transfer is for. */
62  bool                  syncing;     /**< The data is a sync'ing. */
63  rtems_blkdev_request* write_req;   /**< The write request array. */
64  uint32_t              bufs_per_bd; /**< Number of buffers per bd. */
65} rtems_bdbuf_swapout_transfer;
66
67/**
68 * Swapout worker thread. These are available to take processing from the
69 * main swapout thread and handle the I/O operation.
70 */
71typedef struct rtems_bdbuf_swapout_worker
72{
73  rtems_chain_node             link;     /**< The threads sit on a chain when
74                                          * idle. */
75  rtems_id                     id;       /**< The id of the task so we can wake
76                                          * it. */
77  bool                         enabled;  /**< The worker is enabled. */
78  rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this
79                                          * thread. */
80} rtems_bdbuf_swapout_worker;
81
82/**
83 * Buffer waiters synchronization.
84 */
85typedef struct rtems_bdbuf_waiters {
86  unsigned count;
87  rtems_id sema;
88} rtems_bdbuf_waiters;
89
90/**
91 * The BD buffer cache.
92 */
93typedef struct rtems_bdbuf_cache
94{
95  rtems_id            swapout;           /**< Swapout task ID */
96  bool                swapout_enabled;   /**< Swapout is only running if
97                                          * enabled. Set to false to kill the
98                                          * swap out task. It deletes itself. */
99  rtems_chain_control swapout_workers;   /**< The work threads for the swapout
100                                          * task. */
101
102  rtems_bdbuf_buffer* bds;               /**< Pointer to table of buffer
103                                          * descriptors. */
104  void*               buffers;           /**< The buffer's memory. */
105  size_t              buffer_min_count;  /**< Number of minimum size buffers
106                                          * that fit the buffer memory. */
107  size_t              max_bds_per_group; /**< The number of BDs of minimum
108                                          * buffer size that fit in a group. */
109  uint32_t            flags;             /**< Configuration flags. */
110
111  rtems_id            lock;              /**< The cache lock. It locks all
112                                          * cache data, BD and lists. */
113  rtems_id            sync_lock;         /**< Sync calls block writes. */
114  bool                sync_active;       /**< True if a sync is active. */
115  rtems_id            sync_requester;    /**< The sync requester. */
116  const rtems_disk_device *sync_device;  /**< The device to sync and
117                                          * BDBUF_INVALID_DEV not a device
118                                          * sync. */
119
120  rtems_bdbuf_buffer* tree;              /**< Buffer descriptor lookup AVL tree
121                                          * root. There is only one. */
122  rtems_chain_control lru;               /**< Least recently used list */
123  rtems_chain_control modified;          /**< Modified buffers list */
124  rtems_chain_control sync;              /**< Buffers to sync list */
125
126  rtems_bdbuf_waiters access_waiters;    /**< Wait for a buffer in
127                                          * ACCESS_CACHED, ACCESS_MODIFIED or
128                                          * ACCESS_EMPTY
129                                          * state. */
130  rtems_bdbuf_waiters transfer_waiters;  /**< Wait for a buffer in TRANSFER
131                                          * state. */
132  rtems_bdbuf_waiters buffer_waiters;    /**< Wait for a buffer and no one is
133                                          * available. */
134
135  size_t              group_count;       /**< The number of groups. */
136  rtems_bdbuf_group*  groups;            /**< The groups. */
137
138  bool                initialised;       /**< Initialised state. */
139} rtems_bdbuf_cache;
140
141/**
142 * Fatal errors
143 */
144#define RTEMS_BLKDEV_FATAL_ERROR(n) \
145  (((uint32_t)'B' << 24) | ((uint32_t)(n) & (uint32_t)0x00FFFFFF))
146
147#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_11      RTEMS_BLKDEV_FATAL_ERROR(1)
148#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_4       RTEMS_BLKDEV_FATAL_ERROR(2)
149#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_5       RTEMS_BLKDEV_FATAL_ERROR(3)
150#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_6       RTEMS_BLKDEV_FATAL_ERROR(4)
151#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_7       RTEMS_BLKDEV_FATAL_ERROR(5)
152#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_8       RTEMS_BLKDEV_FATAL_ERROR(6)
153#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_9       RTEMS_BLKDEV_FATAL_ERROR(7)
154#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_10      RTEMS_BLKDEV_FATAL_ERROR(8)
155#define RTEMS_BLKDEV_FATAL_BDBUF_TREE_RM       RTEMS_BLKDEV_FATAL_ERROR(9)
156#define RTEMS_BLKDEV_FATAL_BDBUF_SWAPOUT       RTEMS_BLKDEV_FATAL_ERROR(10)
157#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK     RTEMS_BLKDEV_FATAL_ERROR(11)
158#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK   RTEMS_BLKDEV_FATAL_ERROR(12)
159#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK    RTEMS_BLKDEV_FATAL_ERROR(13)
160#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK  RTEMS_BLKDEV_FATAL_ERROR(14)
161#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS   RTEMS_BLKDEV_FATAL_ERROR(15)
162#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2  RTEMS_BLKDEV_FATAL_ERROR(16)
163#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST   RTEMS_BLKDEV_FATAL_ERROR(17)
164#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO RTEMS_BLKDEV_FATAL_ERROR(18)
165#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE    RTEMS_BLKDEV_FATAL_ERROR(19)
166#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE       RTEMS_BLKDEV_FATAL_ERROR(20)
167#define RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM      RTEMS_BLKDEV_FATAL_ERROR(21)
168#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE  RTEMS_BLKDEV_FATAL_ERROR(22)
169#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START   RTEMS_BLKDEV_FATAL_ERROR(23)
170#define BLKDEV_FATAL_BDBUF_SWAPOUT_RE          RTEMS_BLKDEV_FATAL_ERROR(24)
171#define BLKDEV_FATAL_BDBUF_SWAPOUT_TS          RTEMS_BLKDEV_FATAL_ERROR(25)
172#define RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT     RTEMS_BLKDEV_FATAL_ERROR(26)
173#define RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE       RTEMS_BLKDEV_FATAL_ERROR(27)
174#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_0       RTEMS_BLKDEV_FATAL_ERROR(28)
175#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_1       RTEMS_BLKDEV_FATAL_ERROR(29)
176#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_2       RTEMS_BLKDEV_FATAL_ERROR(30)
177
178/**
179 * The events used in this code. These should be system events rather than
180 * application events.
181 */
182#define RTEMS_BDBUF_TRANSFER_SYNC  RTEMS_EVENT_1
183#define RTEMS_BDBUF_SWAPOUT_SYNC   RTEMS_EVENT_2
184
185/**
186 * The swap out task size. Should be more than enough for most drivers with
187 * tracing turned on.
188 */
189#define SWAPOUT_TASK_STACK_SIZE (8 * 1024)
190
191/**
192 * Lock semaphore attributes. This is used for locking type mutexes.
193 *
194 * @warning Priority inheritance is on.
195 */
196#define RTEMS_BDBUF_CACHE_LOCK_ATTRIBS \
197  (RTEMS_PRIORITY | RTEMS_BINARY_SEMAPHORE | \
198   RTEMS_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
199
200/**
201 * Waiter semaphore attributes.
202 *
203 * @warning Do not configure as inherit priority. If a driver is in the driver
204 *          initialisation table this locked semaphore will have the IDLE task
205 *          as the holder and a blocking task will raise the priority of the
206 *          IDLE task which can cause unsual side effects.
207 */
208#define RTEMS_BDBUF_CACHE_WAITER_ATTRIBS \
209  (RTEMS_PRIORITY | RTEMS_SIMPLE_BINARY_SEMAPHORE | \
210   RTEMS_NO_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
211
212/**
213 * Waiter timeout. Set to non-zero to find some info on a waiter that is
214 * waiting too long.
215 */
216#define RTEMS_BDBUF_WAIT_TIMEOUT RTEMS_NO_TIMEOUT
217#if !defined (RTEMS_BDBUF_WAIT_TIMEOUT)
218#define RTEMS_BDBUF_WAIT_TIMEOUT \
219  (TOD_MICROSECONDS_TO_TICKS (20000000))
220#endif
221
222/*
223 * The swap out task.
224 */
225static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
226
227/**
228 * The Buffer Descriptor cache.
229 */
230static rtems_bdbuf_cache bdbuf_cache;
231
232#if RTEMS_BDBUF_TRACE
233/**
234 * If true output the trace message.
235 */
236bool rtems_bdbuf_tracer;
237
238/**
239 * Return the number of items on the list.
240 *
241 * @param list The chain control.
242 * @return uint32_t The number of items on the list.
243 */
244uint32_t
245rtems_bdbuf_list_count (rtems_chain_control* list)
246{
247  rtems_chain_node* node = rtems_chain_first (list);
248  uint32_t          count = 0;
249  while (!rtems_chain_is_tail (list, node))
250  {
251    count++;
252    node = rtems_chain_next (node);
253  }
254  return count;
255}
256
257/**
258 * Show the usage for the bdbuf cache.
259 */
260void
261rtems_bdbuf_show_usage (void)
262{
263  uint32_t group;
264  uint32_t total = 0;
265  uint32_t val;
266
267  for (group = 0; group < bdbuf_cache.group_count; group++)
268    total += bdbuf_cache.groups[group].users;
269  printf ("bdbuf:group users=%lu", total);
270  val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
271  printf (", lru=%lu", val);
272  total = val;
273  val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
274  printf (", mod=%lu", val);
275  total += val;
276  val = rtems_bdbuf_list_count (&bdbuf_cache.sync);
277  printf (", sync=%lu", val);
278  total += val;
279  printf (", total=%lu\n", total);
280}
281
282/**
283 * Show the users for a group of a bd.
284 *
285 * @param where A label to show the context of output.
286 * @param bd The bd to show the users of.
287 */
288void
289rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd)
290{
291  const char* states[] =
292    { "FR", "EM", "CH", "AC", "AM", "MD", "SY", "TR" };
293
294  printf ("bdbuf:users: %15s: [%" PRIu32 " (%s)] %td:%td = %" PRIu32 " %s\n",
295          where,
296          bd->block, states[bd->state],
297          bd->group - bdbuf_cache.groups,
298          bd - bdbuf_cache.bds,
299          bd->group->users,
300          bd->group->users > 8 ? "<<<<<<<" : "");
301}
302#else
303#define rtems_bdbuf_tracer (0)
304#define rtems_bdbuf_show_usage() ((void) 0)
305#define rtems_bdbuf_show_users(_w, _b) ((void) 0)
306#endif
307
308/**
309 * The default maximum height of 32 allows for AVL trees having between
310 * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion.  You may
311 * change this compile-time constant as you wish.
312 */
313#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
314#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
315#endif
316
317static void
318rtems_bdbuf_fatal (rtems_bdbuf_buf_state state, uint32_t error)
319{
320  rtems_fatal_error_occurred ((((uint32_t) state) << 16) | error);
321}
322
323/**
324 * Searches for the node with specified dd/block.
325 *
326 * @param root pointer to the root node of the AVL-Tree
327 * @param dd disk device search key
328 * @param block block search key
329 * @retval NULL node with the specified dd/block is not found
330 * @return pointer to the node with specified dd/block
331 */
332static rtems_bdbuf_buffer *
333rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
334                        const rtems_disk_device *dd,
335                        rtems_blkdev_bnum    block)
336{
337  rtems_bdbuf_buffer* p = *root;
338
339  while ((p != NULL) && ((p->dd != dd) || (p->block != block)))
340  {
341    if (((uintptr_t) p->dd < (uintptr_t) dd)
342        || ((p->dd == dd) && (p->block < block)))
343    {
344      p = p->avl.right;
345    }
346    else
347    {
348      p = p->avl.left;
349    }
350  }
351
352  return p;
353}
354
355/**
356 * Inserts the specified node to the AVl-Tree.
357 *
358 * @param root pointer to the root node of the AVL-Tree
359 * @param node Pointer to the node to add.
360 * @retval 0 The node added successfully
361 * @retval -1 An error occured
362 */
363static int
364rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
365                       rtems_bdbuf_buffer*  node)
366{
367  const rtems_disk_device *dd = node->dd;
368  rtems_blkdev_bnum block = node->block;
369
370  rtems_bdbuf_buffer*  p = *root;
371  rtems_bdbuf_buffer*  q;
372  rtems_bdbuf_buffer*  p1;
373  rtems_bdbuf_buffer*  p2;
374  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
375  rtems_bdbuf_buffer** buf_prev = buf_stack;
376
377  bool modified = false;
378
379  if (p == NULL)
380  {
381    *root = node;
382    node->avl.left = NULL;
383    node->avl.right = NULL;
384    node->avl.bal = 0;
385    return 0;
386  }
387
388  while (p != NULL)
389  {
390    *buf_prev++ = p;
391
392    if (((uintptr_t) p->dd < (uintptr_t) dd)
393        || ((p->dd == dd) && (p->block < block)))
394    {
395      p->avl.cache = 1;
396      q = p->avl.right;
397      if (q == NULL)
398      {
399        q = node;
400        p->avl.right = q = node;
401        break;
402      }
403    }
404    else if ((p->dd != dd) || (p->block != block))
405    {
406      p->avl.cache = -1;
407      q = p->avl.left;
408      if (q == NULL)
409      {
410        q = node;
411        p->avl.left = q;
412        break;
413      }
414    }
415    else
416    {
417      return -1;
418    }
419
420    p = q;
421  }
422
423  q->avl.left = q->avl.right = NULL;
424  q->avl.bal = 0;
425  modified = true;
426  buf_prev--;
427
428  while (modified)
429  {
430    if (p->avl.cache == -1)
431    {
432      switch (p->avl.bal)
433      {
434        case 1:
435          p->avl.bal = 0;
436          modified = false;
437          break;
438
439        case 0:
440          p->avl.bal = -1;
441          break;
442
443        case -1:
444          p1 = p->avl.left;
445          if (p1->avl.bal == -1) /* simple LL-turn */
446          {
447            p->avl.left = p1->avl.right;
448            p1->avl.right = p;
449            p->avl.bal = 0;
450            p = p1;
451          }
452          else /* double LR-turn */
453          {
454            p2 = p1->avl.right;
455            p1->avl.right = p2->avl.left;
456            p2->avl.left = p1;
457            p->avl.left = p2->avl.right;
458            p2->avl.right = p;
459            if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
460            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
461            p = p2;
462          }
463          p->avl.bal = 0;
464          modified = false;
465          break;
466
467        default:
468          break;
469      }
470    }
471    else
472    {
473      switch (p->avl.bal)
474      {
475        case -1:
476          p->avl.bal = 0;
477          modified = false;
478          break;
479
480        case 0:
481          p->avl.bal = 1;
482          break;
483
484        case 1:
485          p1 = p->avl.right;
486          if (p1->avl.bal == 1) /* simple RR-turn */
487          {
488            p->avl.right = p1->avl.left;
489            p1->avl.left = p;
490            p->avl.bal = 0;
491            p = p1;
492          }
493          else /* double RL-turn */
494          {
495            p2 = p1->avl.left;
496            p1->avl.left = p2->avl.right;
497            p2->avl.right = p1;
498            p->avl.right = p2->avl.left;
499            p2->avl.left = p;
500            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
501            if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
502            p = p2;
503          }
504          p->avl.bal = 0;
505          modified = false;
506          break;
507
508        default:
509          break;
510      }
511    }
512    q = p;
513    if (buf_prev > buf_stack)
514    {
515      p = *--buf_prev;
516
517      if (p->avl.cache == -1)
518      {
519        p->avl.left = q;
520      }
521      else
522      {
523        p->avl.right = q;
524      }
525    }
526    else
527    {
528      *root = p;
529      break;
530    }
531  };
532
533  return 0;
534}
535
536
537/**
538 * Removes the node from the tree.
539 *
540 * @param root Pointer to pointer to the root node
541 * @param node Pointer to the node to remove
542 * @retval 0 Item removed
543 * @retval -1 No such item found
544 */
545static int
546rtems_bdbuf_avl_remove(rtems_bdbuf_buffer**      root,
547                       const rtems_bdbuf_buffer* node)
548{
549  const rtems_disk_device *dd = node->dd;
550  rtems_blkdev_bnum block = node->block;
551
552  rtems_bdbuf_buffer*  p = *root;
553  rtems_bdbuf_buffer*  q;
554  rtems_bdbuf_buffer*  r;
555  rtems_bdbuf_buffer*  s;
556  rtems_bdbuf_buffer*  p1;
557  rtems_bdbuf_buffer*  p2;
558  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
559  rtems_bdbuf_buffer** buf_prev = buf_stack;
560
561  bool modified = false;
562
563  memset (buf_stack, 0, sizeof(buf_stack));
564
565  while (p != NULL)
566  {
567    *buf_prev++ = p;
568
569    if (((uintptr_t) p->dd < (uintptr_t) dd)
570        || ((p->dd == dd) && (p->block < block)))
571    {
572      p->avl.cache = 1;
573      p = p->avl.right;
574    }
575    else if ((p->dd != dd) || (p->block != block))
576    {
577      p->avl.cache = -1;
578      p = p->avl.left;
579    }
580    else
581    {
582      /* node found */
583      break;
584    }
585  }
586
587  if (p == NULL)
588  {
589    /* there is no such node */
590    return -1;
591  }
592
593  q = p;
594
595  buf_prev--;
596  if (buf_prev > buf_stack)
597  {
598    p = *(buf_prev - 1);
599  }
600  else
601  {
602    p = NULL;
603  }
604
605  /* at this moment q - is a node to delete, p is q's parent */
606  if (q->avl.right == NULL)
607  {
608    r = q->avl.left;
609    if (r != NULL)
610    {
611      r->avl.bal = 0;
612    }
613    q = r;
614  }
615  else
616  {
617    rtems_bdbuf_buffer **t;
618
619    r = q->avl.right;
620
621    if (r->avl.left == NULL)
622    {
623      r->avl.left = q->avl.left;
624      r->avl.bal = q->avl.bal;
625      r->avl.cache = 1;
626      *buf_prev++ = q = r;
627    }
628    else
629    {
630      t = buf_prev++;
631      s = r;
632
633      while (s->avl.left != NULL)
634      {
635        *buf_prev++ = r = s;
636        s = r->avl.left;
637        r->avl.cache = -1;
638      }
639
640      s->avl.left = q->avl.left;
641      r->avl.left = s->avl.right;
642      s->avl.right = q->avl.right;
643      s->avl.bal = q->avl.bal;
644      s->avl.cache = 1;
645
646      *t = q = s;
647    }
648  }
649
650  if (p != NULL)
651  {
652    if (p->avl.cache == -1)
653    {
654      p->avl.left = q;
655    }
656    else
657    {
658      p->avl.right = q;
659    }
660  }
661  else
662  {
663    *root = q;
664  }
665
666  modified = true;
667
668  while (modified)
669  {
670    if (buf_prev > buf_stack)
671    {
672      p = *--buf_prev;
673    }
674    else
675    {
676      break;
677    }
678
679    if (p->avl.cache == -1)
680    {
681      /* rebalance left branch */
682      switch (p->avl.bal)
683      {
684        case -1:
685          p->avl.bal = 0;
686          break;
687        case  0:
688          p->avl.bal = 1;
689          modified = false;
690          break;
691
692        case +1:
693          p1 = p->avl.right;
694
695          if (p1->avl.bal >= 0) /* simple RR-turn */
696          {
697            p->avl.right = p1->avl.left;
698            p1->avl.left = p;
699
700            if (p1->avl.bal == 0)
701            {
702              p1->avl.bal = -1;
703              modified = false;
704            }
705            else
706            {
707              p->avl.bal = 0;
708              p1->avl.bal = 0;
709            }
710            p = p1;
711          }
712          else /* double RL-turn */
713          {
714            p2 = p1->avl.left;
715
716            p1->avl.left = p2->avl.right;
717            p2->avl.right = p1;
718            p->avl.right = p2->avl.left;
719            p2->avl.left = p;
720
721            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
722            if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
723
724            p = p2;
725            p2->avl.bal = 0;
726          }
727          break;
728
729        default:
730          break;
731      }
732    }
733    else
734    {
735      /* rebalance right branch */
736      switch (p->avl.bal)
737      {
738        case +1:
739          p->avl.bal = 0;
740          break;
741
742        case  0:
743          p->avl.bal = -1;
744          modified = false;
745          break;
746
747        case -1:
748          p1 = p->avl.left;
749
750          if (p1->avl.bal <= 0) /* simple LL-turn */
751          {
752            p->avl.left = p1->avl.right;
753            p1->avl.right = p;
754            if (p1->avl.bal == 0)
755            {
756              p1->avl.bal = 1;
757              modified = false;
758            }
759            else
760            {
761              p->avl.bal = 0;
762              p1->avl.bal = 0;
763            }
764            p = p1;
765          }
766          else /* double LR-turn */
767          {
768            p2 = p1->avl.right;
769
770            p1->avl.right = p2->avl.left;
771            p2->avl.left = p1;
772            p->avl.left = p2->avl.right;
773            p2->avl.right = p;
774
775            if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
776            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
777
778            p = p2;
779            p2->avl.bal = 0;
780          }
781          break;
782
783        default:
784          break;
785      }
786    }
787
788    if (buf_prev > buf_stack)
789    {
790      q = *(buf_prev - 1);
791
792      if (q->avl.cache == -1)
793      {
794        q->avl.left = p;
795      }
796      else
797      {
798        q->avl.right = p;
799      }
800    }
801    else
802    {
803      *root = p;
804      break;
805    }
806
807  }
808
809  return 0;
810}
811
812static void
813rtems_bdbuf_set_state (rtems_bdbuf_buffer *bd, rtems_bdbuf_buf_state state)
814{
815  bd->state = state;
816}
817
818/**
819 * Change the block number for the block size to the block number for the media
820 * block size. We have to use 64bit maths. There is no short cut here.
821 *
822 * @param block The logical block number in the block size terms.
823 * @param block_size The block size.
824 * @param media_block_size The block size of the media.
825 * @return rtems_blkdev_bnum The media block number.
826 */
827static rtems_blkdev_bnum
828rtems_bdbuf_media_block (rtems_blkdev_bnum block,
829                         size_t            block_size,
830                         size_t            media_block_size)
831{
832  return (rtems_blkdev_bnum)
833    ((((uint64_t) block) * block_size) / media_block_size);
834}
835
836/**
837 * Lock the mutex. A single task can nest calls.
838 *
839 * @param lock The mutex to lock.
840 * @param fatal_error_code The error code if the call fails.
841 */
842static void
843rtems_bdbuf_lock (rtems_id lock, uint32_t fatal_error_code)
844{
845  rtems_status_code sc = rtems_semaphore_obtain (lock,
846                                                 RTEMS_WAIT,
847                                                 RTEMS_NO_TIMEOUT);
848  if (sc != RTEMS_SUCCESSFUL)
849    rtems_fatal_error_occurred (fatal_error_code);
850}
851
852/**
853 * Unlock the mutex.
854 *
855 * @param lock The mutex to unlock.
856 * @param fatal_error_code The error code if the call fails.
857 */
858static void
859rtems_bdbuf_unlock (rtems_id lock, uint32_t fatal_error_code)
860{
861  rtems_status_code sc = rtems_semaphore_release (lock);
862  if (sc != RTEMS_SUCCESSFUL)
863    rtems_fatal_error_occurred (fatal_error_code);
864}
865
866/**
867 * Lock the cache. A single task can nest calls.
868 */
869static void
870rtems_bdbuf_lock_cache (void)
871{
872  rtems_bdbuf_lock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK);
873}
874
875/**
876 * Unlock the cache.
877 */
878static void
879rtems_bdbuf_unlock_cache (void)
880{
881  rtems_bdbuf_unlock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK);
882}
883
884/**
885 * Lock the cache's sync. A single task can nest calls.
886 */
887static void
888rtems_bdbuf_lock_sync (void)
889{
890  rtems_bdbuf_lock (bdbuf_cache.sync_lock, RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK);
891}
892
893/**
894 * Unlock the cache's sync lock. Any blocked writers are woken.
895 */
896static void
897rtems_bdbuf_unlock_sync (void)
898{
899  rtems_bdbuf_unlock (bdbuf_cache.sync_lock,
900                      RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK);
901}
902
903static void
904rtems_bdbuf_group_obtain (rtems_bdbuf_buffer *bd)
905{
906  ++bd->group->users;
907}
908
909static void
910rtems_bdbuf_group_release (rtems_bdbuf_buffer *bd)
911{
912  --bd->group->users;
913}
914
915static rtems_mode
916rtems_bdbuf_disable_preemption (void)
917{
918  rtems_status_code sc = RTEMS_SUCCESSFUL;
919  rtems_mode prev_mode = 0;
920
921  sc = rtems_task_mode (RTEMS_NO_PREEMPT, RTEMS_PREEMPT_MASK, &prev_mode);
922  if (sc != RTEMS_SUCCESSFUL)
923    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS);
924
925  return prev_mode;
926}
927
928static void
929rtems_bdbuf_restore_preemption (rtems_mode prev_mode)
930{
931  rtems_status_code sc = RTEMS_SUCCESSFUL;
932
933  sc = rtems_task_mode (prev_mode, RTEMS_ALL_MODE_MASKS, &prev_mode);
934  if (sc != RTEMS_SUCCESSFUL)
935    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST);
936}
937
938/**
939 * Wait until woken. Semaphores are used so a number of tasks can wait and can
940 * be woken at once. Task events would require we maintain a list of tasks to
941 * be woken and this would require storage and we do not know the number of
942 * tasks that could be waiting.
943 *
944 * While we have the cache locked we can try and claim the semaphore and
945 * therefore know when we release the lock to the cache we will block until the
946 * semaphore is released. This may even happen before we get to block.
947 *
948 * A counter is used to save the release call when no one is waiting.
949 *
950 * The function assumes the cache is locked on entry and it will be locked on
951 * exit.
952 */
953static void
954rtems_bdbuf_anonymous_wait (rtems_bdbuf_waiters *waiters)
955{
956  rtems_status_code sc;
957  rtems_mode        prev_mode;
958
959  /*
960   * Indicate we are waiting.
961   */
962  ++waiters->count;
963
964  /*
965   * Disable preemption then unlock the cache and block.  There is no POSIX
966   * condition variable in the core API so this is a work around.
967   *
968   * The issue is a task could preempt after the cache is unlocked because it is
969   * blocking or just hits that window, and before this task has blocked on the
970   * semaphore. If the preempting task flushes the queue this task will not see
971   * the flush and may block for ever or until another transaction flushes this
972   * semaphore.
973   */
974  prev_mode = rtems_bdbuf_disable_preemption ();
975
976  /*
977   * Unlock the cache, wait, and lock the cache when we return.
978   */
979  rtems_bdbuf_unlock_cache ();
980
981  sc = rtems_semaphore_obtain (waiters->sema, RTEMS_WAIT, RTEMS_BDBUF_WAIT_TIMEOUT);
982
983  if (sc == RTEMS_TIMEOUT)
984    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO);
985
986  if (sc != RTEMS_UNSATISFIED)
987    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2);
988
989  rtems_bdbuf_lock_cache ();
990
991  rtems_bdbuf_restore_preemption (prev_mode);
992
993  --waiters->count;
994}
995
996static void
997rtems_bdbuf_wait (rtems_bdbuf_buffer *bd, rtems_bdbuf_waiters *waiters)
998{
999  rtems_bdbuf_group_obtain (bd);
1000  ++bd->waiters;
1001  rtems_bdbuf_anonymous_wait (waiters);
1002  --bd->waiters;
1003  rtems_bdbuf_group_release (bd);
1004}
1005
1006/**
1007 * Wake a blocked resource. The resource has a counter that lets us know if
1008 * there are any waiters.
1009 */
1010static void
1011rtems_bdbuf_wake (const rtems_bdbuf_waiters *waiters)
1012{
1013  rtems_status_code sc = RTEMS_SUCCESSFUL;
1014
1015  if (waiters->count > 0)
1016  {
1017    sc = rtems_semaphore_flush (waiters->sema);
1018    if (sc != RTEMS_SUCCESSFUL)
1019      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE);
1020  }
1021}
1022
1023static void
1024rtems_bdbuf_wake_swapper (void)
1025{
1026  rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout,
1027                                           RTEMS_BDBUF_SWAPOUT_SYNC);
1028  if (sc != RTEMS_SUCCESSFUL)
1029    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
1030}
1031
1032static bool
1033rtems_bdbuf_has_buffer_waiters (void)
1034{
1035  return bdbuf_cache.buffer_waiters.count;
1036}
1037
1038static void
1039rtems_bdbuf_remove_from_tree (rtems_bdbuf_buffer *bd)
1040{
1041  if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1042    rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_TREE_RM);
1043}
1044
1045static void
1046rtems_bdbuf_remove_from_tree_and_lru_list (rtems_bdbuf_buffer *bd)
1047{
1048  switch (bd->state)
1049  {
1050    case RTEMS_BDBUF_STATE_FREE:
1051      break;
1052    case RTEMS_BDBUF_STATE_CACHED:
1053      rtems_bdbuf_remove_from_tree (bd);
1054      break;
1055    default:
1056      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_10);
1057  }
1058
1059  rtems_chain_extract (&bd->link);
1060}
1061
1062static void
1063rtems_bdbuf_make_free_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1064{
1065  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_FREE);
1066  rtems_chain_prepend (&bdbuf_cache.lru, &bd->link);
1067}
1068
1069static void
1070rtems_bdbuf_make_empty (rtems_bdbuf_buffer *bd)
1071{
1072  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY);
1073}
1074
1075static void
1076rtems_bdbuf_make_cached_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1077{
1078  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
1079  rtems_chain_append (&bdbuf_cache.lru, &bd->link);
1080}
1081
1082static void
1083rtems_bdbuf_discard_buffer (rtems_bdbuf_buffer *bd)
1084{
1085  rtems_bdbuf_make_empty (bd);
1086
1087  if (bd->waiters == 0)
1088  {
1089    rtems_bdbuf_remove_from_tree (bd);
1090    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1091  }
1092}
1093
1094static void
1095rtems_bdbuf_add_to_modified_list_after_access (rtems_bdbuf_buffer *bd)
1096{
1097  if (bdbuf_cache.sync_active && bdbuf_cache.sync_device == bd->dd)
1098  {
1099    rtems_bdbuf_unlock_cache ();
1100
1101    /*
1102     * Wait for the sync lock.
1103     */
1104    rtems_bdbuf_lock_sync ();
1105
1106    rtems_bdbuf_unlock_sync ();
1107    rtems_bdbuf_lock_cache ();
1108  }
1109
1110  /*
1111   * Only the first modified release sets the timer and any further user
1112   * accesses do not change the timer value which should move down. This
1113   * assumes the user's hold of the buffer is much less than the time on the
1114   * modified list. Resetting the timer on each access which could result in a
1115   * buffer never getting to 0 and never being forced onto disk. This raises a
1116   * difficult question. Is a snapshot of a block that is changing better than
1117   * nothing being written? We have tended to think we should hold changes for
1118   * only a specific period of time even if still changing and get onto disk
1119   * and letting the file system try and recover this position if it can.
1120   */
1121  if (bd->state == RTEMS_BDBUF_STATE_ACCESS_CACHED
1122        || bd->state == RTEMS_BDBUF_STATE_ACCESS_EMPTY)
1123    bd->hold_timer = bdbuf_config.swap_block_hold;
1124
1125  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_MODIFIED);
1126  rtems_chain_append (&bdbuf_cache.modified, &bd->link);
1127
1128  if (bd->waiters)
1129    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1130  else if (rtems_bdbuf_has_buffer_waiters ())
1131    rtems_bdbuf_wake_swapper ();
1132}
1133
1134static void
1135rtems_bdbuf_add_to_lru_list_after_access (rtems_bdbuf_buffer *bd)
1136{
1137  rtems_bdbuf_group_release (bd);
1138  rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1139
1140  if (bd->waiters)
1141    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1142  else
1143    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1144}
1145
1146/**
1147 * Compute the number of BDs per group for a given buffer size.
1148 *
1149 * @param size The buffer size. It can be any size and we scale up.
1150 */
1151static size_t
1152rtems_bdbuf_bds_per_group (size_t size)
1153{
1154  size_t bufs_per_size;
1155  size_t bds_per_size;
1156
1157  if (size > bdbuf_config.buffer_max)
1158    return 0;
1159
1160  bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1;
1161
1162  for (bds_per_size = 1;
1163       bds_per_size < bufs_per_size;
1164       bds_per_size <<= 1)
1165    ;
1166
1167  return bdbuf_cache.max_bds_per_group / bds_per_size;
1168}
1169
1170static void
1171rtems_bdbuf_discard_buffer_after_access (rtems_bdbuf_buffer *bd)
1172{
1173  rtems_bdbuf_group_release (bd);
1174  rtems_bdbuf_discard_buffer (bd);
1175
1176  if (bd->waiters)
1177    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1178  else
1179    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1180}
1181
1182/**
1183 * Reallocate a group. The BDs currently allocated in the group are removed
1184 * from the ALV tree and any lists then the new BD's are prepended to the ready
1185 * list of the cache.
1186 *
1187 * @param group The group to reallocate.
1188 * @param new_bds_per_group The new count of BDs per group.
1189 * @return A buffer of this group.
1190 */
1191static rtems_bdbuf_buffer *
1192rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
1193{
1194  rtems_bdbuf_buffer* bd;
1195  size_t              b;
1196  size_t              bufs_per_bd;
1197
1198  if (rtems_bdbuf_tracer)
1199    printf ("bdbuf:realloc: %tu: %zd -> %zd\n",
1200            group - bdbuf_cache.groups, group->bds_per_group,
1201            new_bds_per_group);
1202
1203  bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group;
1204
1205  for (b = 0, bd = group->bdbuf;
1206       b < group->bds_per_group;
1207       b++, bd += bufs_per_bd)
1208    rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1209
1210  group->bds_per_group = new_bds_per_group;
1211  bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
1212
1213  for (b = 1, bd = group->bdbuf + bufs_per_bd;
1214       b < group->bds_per_group;
1215       b++, bd += bufs_per_bd)
1216    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1217
1218  if (b > 1)
1219    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1220
1221  return group->bdbuf;
1222}
1223
1224static void
1225rtems_bdbuf_setup_empty_buffer (rtems_bdbuf_buffer *bd,
1226                                const rtems_disk_device *dd,
1227                                rtems_blkdev_bnum   block)
1228{
1229  bd->dd        = dd ;
1230  bd->block     = block;
1231  bd->avl.left  = NULL;
1232  bd->avl.right = NULL;
1233  bd->waiters   = 0;
1234
1235  if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
1236    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE);
1237
1238  rtems_bdbuf_make_empty (bd);
1239}
1240
1241static rtems_bdbuf_buffer *
1242rtems_bdbuf_get_buffer_from_lru_list (const rtems_disk_device *dd,
1243                                      rtems_blkdev_bnum block,
1244                                      size_t            bds_per_group)
1245{
1246  rtems_chain_node *node = rtems_chain_first (&bdbuf_cache.lru);
1247
1248  while (!rtems_chain_is_tail (&bdbuf_cache.lru, node))
1249  {
1250    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
1251    rtems_bdbuf_buffer *empty_bd = NULL;
1252
1253    if (rtems_bdbuf_tracer)
1254      printf ("bdbuf:next-bd: %tu (%td:%" PRId32 ") %zd -> %zd\n",
1255              bd - bdbuf_cache.bds,
1256              bd->group - bdbuf_cache.groups, bd->group->users,
1257              bd->group->bds_per_group, bds_per_group);
1258
1259    /*
1260     * If nobody waits for this BD, we may recycle it.
1261     */
1262    if (bd->waiters == 0)
1263    {
1264      if (bd->group->bds_per_group == bds_per_group)
1265      {
1266        rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1267
1268        empty_bd = bd;
1269      }
1270      else if (bd->group->users == 0)
1271        empty_bd = rtems_bdbuf_group_realloc (bd->group, bds_per_group);
1272    }
1273
1274    if (empty_bd != NULL)
1275    {
1276      rtems_bdbuf_setup_empty_buffer (empty_bd, dd, block);
1277
1278      return empty_bd;
1279    }
1280
1281    node = rtems_chain_next (node);
1282  }
1283
1284  return NULL;
1285}
1286
1287/**
1288 * Initialise the cache.
1289 *
1290 * @return rtems_status_code The initialisation status.
1291 */
1292rtems_status_code
1293rtems_bdbuf_init (void)
1294{
1295  rtems_bdbuf_group*  group;
1296  rtems_bdbuf_buffer* bd;
1297  uint8_t*            buffer;
1298  size_t              b;
1299  size_t              cache_aligment;
1300  rtems_status_code   sc;
1301  rtems_mode          prev_mode;
1302
1303  if (rtems_bdbuf_tracer)
1304    printf ("bdbuf:init\n");
1305
1306  if (rtems_interrupt_is_in_progress())
1307    return RTEMS_CALLED_FROM_ISR;
1308
1309  /*
1310   * Check the configuration table values.
1311   */
1312  if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0)
1313    return RTEMS_INVALID_NUMBER;
1314
1315  /*
1316   * We use a special variable to manage the initialisation incase we have
1317   * completing threads doing this. You may get errors if the another thread
1318   * makes a call and we have not finished initialisation.
1319   */
1320  prev_mode = rtems_bdbuf_disable_preemption ();
1321  if (bdbuf_cache.initialised)
1322  {
1323    rtems_bdbuf_restore_preemption (prev_mode);
1324    return RTEMS_RESOURCE_IN_USE;
1325  }
1326
1327  memset(&bdbuf_cache, 0, sizeof(bdbuf_cache));
1328  bdbuf_cache.initialised = true;
1329  rtems_bdbuf_restore_preemption (prev_mode);
1330
1331  /*
1332   * For unspecified cache alignments we use the CPU alignment.
1333   */
1334  cache_aligment = 32; /* FIXME rtems_cache_get_data_line_size() */
1335  if (cache_aligment <= 0)
1336    cache_aligment = CPU_ALIGNMENT;
1337
1338  bdbuf_cache.sync_device = BDBUF_INVALID_DEV;
1339
1340  rtems_chain_initialize_empty (&bdbuf_cache.swapout_workers);
1341  rtems_chain_initialize_empty (&bdbuf_cache.lru);
1342  rtems_chain_initialize_empty (&bdbuf_cache.modified);
1343  rtems_chain_initialize_empty (&bdbuf_cache.sync);
1344
1345  /*
1346   * Create the locks for the cache.
1347   */
1348  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'l'),
1349                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1350                               &bdbuf_cache.lock);
1351  if (sc != RTEMS_SUCCESSFUL)
1352    goto error;
1353
1354  rtems_bdbuf_lock_cache ();
1355
1356  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 's'),
1357                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1358                               &bdbuf_cache.sync_lock);
1359  if (sc != RTEMS_SUCCESSFUL)
1360    goto error;
1361
1362  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'a'),
1363                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1364                               &bdbuf_cache.access_waiters.sema);
1365  if (sc != RTEMS_SUCCESSFUL)
1366    goto error;
1367
1368  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 't'),
1369                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1370                               &bdbuf_cache.transfer_waiters.sema);
1371  if (sc != RTEMS_SUCCESSFUL)
1372    goto error;
1373
1374  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'b'),
1375                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1376                               &bdbuf_cache.buffer_waiters.sema);
1377  if (sc != RTEMS_SUCCESSFUL)
1378    goto error;
1379
1380  /*
1381   * Compute the various number of elements in the cache.
1382   */
1383  bdbuf_cache.buffer_min_count =
1384    bdbuf_config.size / bdbuf_config.buffer_min;
1385  bdbuf_cache.max_bds_per_group =
1386    bdbuf_config.buffer_max / bdbuf_config.buffer_min;
1387  bdbuf_cache.group_count =
1388    bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group;
1389
1390  /*
1391   * Allocate the memory for the buffer descriptors.
1392   */
1393  bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer),
1394                            bdbuf_cache.buffer_min_count);
1395  if (!bdbuf_cache.bds)
1396    goto error;
1397
1398  /*
1399   * Allocate the memory for the buffer descriptors.
1400   */
1401  bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group),
1402                               bdbuf_cache.group_count);
1403  if (!bdbuf_cache.groups)
1404    goto error;
1405
1406  /*
1407   * Allocate memory for buffer memory. The buffer memory will be cache
1408   * aligned. It is possible to free the memory allocated by rtems_memalign()
1409   * with free(). Return 0 if allocated.
1410   *
1411   * The memory allocate allows a
1412   */
1413  if (rtems_memalign ((void **) &bdbuf_cache.buffers,
1414                      cache_aligment,
1415                      bdbuf_cache.buffer_min_count * bdbuf_config.buffer_min) != 0)
1416    goto error;
1417
1418  /*
1419   * The cache is empty after opening so we need to add all the buffers to it
1420   * and initialise the groups.
1421   */
1422  for (b = 0, group = bdbuf_cache.groups,
1423         bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers;
1424       b < bdbuf_cache.buffer_min_count;
1425       b++, bd++, buffer += bdbuf_config.buffer_min)
1426  {
1427    bd->dd    = BDBUF_INVALID_DEV;
1428    bd->group  = group;
1429    bd->buffer = buffer;
1430
1431    rtems_chain_append (&bdbuf_cache.lru, &bd->link);
1432
1433    if ((b % bdbuf_cache.max_bds_per_group) ==
1434        (bdbuf_cache.max_bds_per_group - 1))
1435      group++;
1436  }
1437
1438  for (b = 0,
1439         group = bdbuf_cache.groups,
1440         bd = bdbuf_cache.bds;
1441       b < bdbuf_cache.group_count;
1442       b++,
1443         group++,
1444         bd += bdbuf_cache.max_bds_per_group)
1445  {
1446    group->bds_per_group = bdbuf_cache.max_bds_per_group;
1447    group->bdbuf = bd;
1448  }
1449
1450  /*
1451   * Create and start swapout task. This task will create and manage the worker
1452   * threads.
1453   */
1454  bdbuf_cache.swapout_enabled = true;
1455
1456  sc = rtems_task_create (rtems_build_name('B', 'S', 'W', 'P'),
1457                          bdbuf_config.swapout_priority ?
1458                            bdbuf_config.swapout_priority :
1459                            RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT,
1460                          SWAPOUT_TASK_STACK_SIZE,
1461                          RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
1462                          RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
1463                          &bdbuf_cache.swapout);
1464  if (sc != RTEMS_SUCCESSFUL)
1465    goto error;
1466
1467  sc = rtems_task_start (bdbuf_cache.swapout,
1468                         rtems_bdbuf_swapout_task,
1469                         (rtems_task_argument) &bdbuf_cache);
1470  if (sc != RTEMS_SUCCESSFUL)
1471    goto error;
1472
1473  rtems_bdbuf_unlock_cache ();
1474
1475  return RTEMS_SUCCESSFUL;
1476
1477error:
1478
1479  if (bdbuf_cache.swapout != 0)
1480    rtems_task_delete (bdbuf_cache.swapout);
1481
1482  free (bdbuf_cache.buffers);
1483  free (bdbuf_cache.groups);
1484  free (bdbuf_cache.bds);
1485
1486  rtems_semaphore_delete (bdbuf_cache.buffer_waiters.sema);
1487  rtems_semaphore_delete (bdbuf_cache.access_waiters.sema);
1488  rtems_semaphore_delete (bdbuf_cache.transfer_waiters.sema);
1489  rtems_semaphore_delete (bdbuf_cache.sync_lock);
1490
1491  if (bdbuf_cache.lock != 0)
1492  {
1493    rtems_bdbuf_unlock_cache ();
1494    rtems_semaphore_delete (bdbuf_cache.lock);
1495  }
1496
1497  bdbuf_cache.initialised = false;
1498
1499  return RTEMS_UNSATISFIED;
1500}
1501
1502static void
1503rtems_bdbuf_wait_for_event (rtems_event_set event)
1504{
1505  rtems_status_code sc = RTEMS_SUCCESSFUL;
1506  rtems_event_set   out = 0;
1507
1508  sc = rtems_event_receive (event,
1509                            RTEMS_EVENT_ALL | RTEMS_WAIT,
1510                            RTEMS_NO_TIMEOUT,
1511                            &out);
1512
1513  if (sc != RTEMS_SUCCESSFUL || out != event)
1514    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT);
1515}
1516
1517static void
1518rtems_bdbuf_wait_for_access (rtems_bdbuf_buffer *bd)
1519{
1520  while (true)
1521  {
1522    switch (bd->state)
1523    {
1524      case RTEMS_BDBUF_STATE_MODIFIED:
1525        rtems_bdbuf_group_release (bd);
1526        /* Fall through */
1527      case RTEMS_BDBUF_STATE_CACHED:
1528        rtems_chain_extract (&bd->link);
1529        /* Fall through */
1530      case RTEMS_BDBUF_STATE_EMPTY:
1531        return;
1532      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1533      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1534      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1535      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1536        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1537        break;
1538      case RTEMS_BDBUF_STATE_SYNC:
1539      case RTEMS_BDBUF_STATE_TRANSFER:
1540      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1541        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1542        break;
1543      default:
1544        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_7);
1545    }
1546  }
1547}
1548
1549static void
1550rtems_bdbuf_request_sync_for_modified_buffer (rtems_bdbuf_buffer *bd)
1551{
1552  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1553  rtems_chain_extract (&bd->link);
1554  rtems_chain_append (&bdbuf_cache.sync, &bd->link);
1555  rtems_bdbuf_wake_swapper ();
1556}
1557
1558/**
1559 * @brief Waits until the buffer is ready for recycling.
1560 *
1561 * @retval @c true Buffer is valid and may be recycled.
1562 * @retval @c false Buffer is invalid and has to searched again.
1563 */
1564static bool
1565rtems_bdbuf_wait_for_recycle (rtems_bdbuf_buffer *bd)
1566{
1567  while (true)
1568  {
1569    switch (bd->state)
1570    {
1571      case RTEMS_BDBUF_STATE_FREE:
1572        return true;
1573      case RTEMS_BDBUF_STATE_MODIFIED:
1574        rtems_bdbuf_request_sync_for_modified_buffer (bd);
1575        break;
1576      case RTEMS_BDBUF_STATE_CACHED:
1577      case RTEMS_BDBUF_STATE_EMPTY:
1578        if (bd->waiters == 0)
1579          return true;
1580        else
1581        {
1582          /*
1583           * It is essential that we wait here without a special wait count and
1584           * without the group in use.  Otherwise we could trigger a wait ping
1585           * pong with another recycle waiter.  The state of the buffer is
1586           * arbitrary afterwards.
1587           */
1588          rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1589          return false;
1590        }
1591      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1592      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1593      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1594      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1595        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1596        break;
1597      case RTEMS_BDBUF_STATE_SYNC:
1598      case RTEMS_BDBUF_STATE_TRANSFER:
1599      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1600        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1601        break;
1602      default:
1603        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_8);
1604    }
1605  }
1606}
1607
1608static void
1609rtems_bdbuf_wait_for_sync_done (rtems_bdbuf_buffer *bd)
1610{
1611  while (true)
1612  {
1613    switch (bd->state)
1614    {
1615      case RTEMS_BDBUF_STATE_CACHED:
1616      case RTEMS_BDBUF_STATE_EMPTY:
1617      case RTEMS_BDBUF_STATE_MODIFIED:
1618      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1619      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1620      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1621      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1622        return;
1623      case RTEMS_BDBUF_STATE_SYNC:
1624      case RTEMS_BDBUF_STATE_TRANSFER:
1625      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1626        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1627        break;
1628      default:
1629        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_9);
1630    }
1631  }
1632}
1633
1634static void
1635rtems_bdbuf_wait_for_buffer (void)
1636{
1637  if (!rtems_chain_is_empty (&bdbuf_cache.modified))
1638    rtems_bdbuf_wake_swapper ();
1639
1640  rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1641}
1642
1643static void
1644rtems_bdbuf_sync_after_access (rtems_bdbuf_buffer *bd)
1645{
1646  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1647
1648  rtems_chain_append (&bdbuf_cache.sync, &bd->link);
1649
1650  if (bd->waiters)
1651    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1652
1653  rtems_bdbuf_wake_swapper ();
1654  rtems_bdbuf_wait_for_sync_done (bd);
1655
1656  /*
1657   * We may have created a cached or empty buffer which may be recycled.
1658   */
1659  if (bd->waiters == 0
1660        && (bd->state == RTEMS_BDBUF_STATE_CACHED
1661          || bd->state == RTEMS_BDBUF_STATE_EMPTY))
1662  {
1663    if (bd->state == RTEMS_BDBUF_STATE_EMPTY)
1664    {
1665      rtems_bdbuf_remove_from_tree (bd);
1666      rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1667    }
1668    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1669  }
1670}
1671
1672static rtems_bdbuf_buffer *
1673rtems_bdbuf_get_buffer_for_read_ahead (const rtems_disk_device *dd,
1674                                       rtems_blkdev_bnum block,
1675                                       size_t            bds_per_group)
1676{
1677  rtems_bdbuf_buffer *bd = NULL;
1678
1679  bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1680
1681  if (bd == NULL)
1682  {
1683    bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block, bds_per_group);
1684
1685    if (bd != NULL)
1686      rtems_bdbuf_group_obtain (bd);
1687  }
1688  else
1689    /*
1690     * The buffer is in the cache.  So it is already available or in use, and
1691     * thus no need for a read ahead.
1692     */
1693    bd = NULL;
1694
1695  return bd;
1696}
1697
1698static rtems_bdbuf_buffer *
1699rtems_bdbuf_get_buffer_for_access (const rtems_disk_device *dd,
1700                                   rtems_blkdev_bnum block,
1701                                   size_t            bds_per_group)
1702{
1703  rtems_bdbuf_buffer *bd = NULL;
1704
1705  do
1706  {
1707    bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1708
1709    if (bd != NULL)
1710    {
1711      if (bd->group->bds_per_group != bds_per_group)
1712      {
1713        if (rtems_bdbuf_wait_for_recycle (bd))
1714        {
1715          rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1716          rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1717          rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1718        }
1719        bd = NULL;
1720      }
1721    }
1722    else
1723    {
1724      bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block, bds_per_group);
1725
1726      if (bd == NULL)
1727        rtems_bdbuf_wait_for_buffer ();
1728    }
1729  }
1730  while (bd == NULL);
1731
1732  rtems_bdbuf_wait_for_access (bd);
1733  rtems_bdbuf_group_obtain (bd);
1734
1735  return bd;
1736}
1737
1738static rtems_status_code
1739rtems_bdbuf_obtain_disk (const rtems_disk_device *dd,
1740                         rtems_blkdev_bnum   block,
1741                         rtems_blkdev_bnum  *media_block_ptr,
1742                         size_t             *bds_per_group_ptr)
1743{
1744  if (!bdbuf_cache.initialised)
1745    return RTEMS_NOT_CONFIGURED;
1746
1747  if (media_block_ptr != NULL)
1748  {
1749    /*
1750     * Compute the media block number. Drivers work with media block number not
1751     * the block number a BD may have as this depends on the block size set by
1752     * the user.
1753     */
1754    rtems_blkdev_bnum mb = rtems_bdbuf_media_block (block,
1755                                                    dd->block_size,
1756                                                    dd->media_block_size);
1757    if (mb >= dd->size)
1758    {
1759      return RTEMS_INVALID_NUMBER;
1760    }
1761
1762    *media_block_ptr = mb + dd->start;
1763  }
1764
1765  if (bds_per_group_ptr != NULL)
1766  {
1767    size_t bds_per_group = rtems_bdbuf_bds_per_group (dd->block_size);
1768
1769    if (bds_per_group == 0)
1770    {
1771      return RTEMS_INVALID_NUMBER;
1772    }
1773
1774    *bds_per_group_ptr = bds_per_group;
1775  }
1776
1777  return RTEMS_SUCCESSFUL;
1778}
1779
1780rtems_status_code
1781rtems_bdbuf_get (const rtems_disk_device *dd,
1782                 rtems_blkdev_bnum    block,
1783                 rtems_bdbuf_buffer **bd_ptr)
1784{
1785  rtems_status_code   sc = RTEMS_SUCCESSFUL;
1786  rtems_bdbuf_buffer *bd = NULL;
1787  rtems_blkdev_bnum   media_block = 0;
1788  size_t              bds_per_group = 0;
1789
1790  sc = rtems_bdbuf_obtain_disk (dd, block, &media_block, &bds_per_group);
1791  if (sc != RTEMS_SUCCESSFUL)
1792    return sc;
1793
1794  rtems_bdbuf_lock_cache ();
1795
1796  /*
1797   * Print the block index relative to the physical disk.
1798   */
1799  if (rtems_bdbuf_tracer)
1800    printf ("bdbuf:get: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
1801            media_block, block, (unsigned) dd->dev);
1802
1803  bd = rtems_bdbuf_get_buffer_for_access (dd, media_block, bds_per_group);
1804
1805  switch (bd->state)
1806  {
1807    case RTEMS_BDBUF_STATE_CACHED:
1808      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
1809      break;
1810    case RTEMS_BDBUF_STATE_EMPTY:
1811      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_EMPTY);
1812      break;
1813    case RTEMS_BDBUF_STATE_MODIFIED:
1814      /*
1815       * To get a modified buffer could be considered a bug in the caller
1816       * because you should not be getting an already modified buffer but user
1817       * may have modified a byte in a block then decided to seek the start and
1818       * write the whole block and the file system will have no record of this
1819       * so just gets the block to fill.
1820       */
1821      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
1822      break;
1823    default:
1824      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_2);
1825      break;
1826  }
1827
1828  if (rtems_bdbuf_tracer)
1829  {
1830    rtems_bdbuf_show_users ("get", bd);
1831    rtems_bdbuf_show_usage ();
1832  }
1833
1834  rtems_bdbuf_unlock_cache ();
1835
1836  *bd_ptr = bd;
1837
1838  return RTEMS_SUCCESSFUL;
1839}
1840
1841/**
1842 * Call back handler called by the low level driver when the transfer has
1843 * completed. This function may be invoked from interrupt handler.
1844 *
1845 * @param arg Arbitrary argument specified in block device request
1846 *            structure (in this case - pointer to the appropriate
1847 *            block device request structure).
1848 * @param status I/O completion status
1849 */
1850static void
1851rtems_bdbuf_transfer_done (void* arg, rtems_status_code status)
1852{
1853  rtems_blkdev_request* req = (rtems_blkdev_request*) arg;
1854
1855  req->status = status;
1856
1857  rtems_event_send (req->io_task, RTEMS_BDBUF_TRANSFER_SYNC);
1858}
1859
1860static void
1861rtems_bdbuf_create_read_request (const rtems_disk_device *dd,
1862                                 rtems_blkdev_bnum        media_block,
1863                                 size_t                   bds_per_group,
1864                                 rtems_blkdev_request    *req,
1865                                 rtems_bdbuf_buffer     **bd_ptr)
1866{
1867  rtems_bdbuf_buffer *bd = NULL;
1868  rtems_blkdev_bnum   media_block_end = dd->start + dd->size;
1869  rtems_blkdev_bnum   media_block_count = dd->block_size / dd->media_block_size;
1870  uint32_t            block_size = dd->block_size;
1871  uint32_t            transfer_index = 1;
1872  uint32_t            transfer_count = bdbuf_config.max_read_ahead_blocks + 1;
1873
1874  if (media_block_end - media_block < transfer_count)
1875    transfer_count = media_block_end - media_block;
1876
1877  req->req = RTEMS_BLKDEV_REQ_READ;
1878  req->req_done = rtems_bdbuf_transfer_done;
1879  req->done_arg = req;
1880  req->io_task = rtems_task_self ();
1881  req->status = RTEMS_RESOURCE_IN_USE;
1882  req->bufnum = 0;
1883
1884  bd = rtems_bdbuf_get_buffer_for_access (dd, media_block, bds_per_group);
1885
1886  *bd_ptr = bd;
1887
1888  req->bufs [0].user   = bd;
1889  req->bufs [0].block  = media_block;
1890  req->bufs [0].length = block_size;
1891  req->bufs [0].buffer = bd->buffer;
1892
1893  if (rtems_bdbuf_tracer)
1894    rtems_bdbuf_show_users ("read", bd);
1895
1896  switch (bd->state)
1897  {
1898    case RTEMS_BDBUF_STATE_CACHED:
1899    case RTEMS_BDBUF_STATE_MODIFIED:
1900      return;
1901    case RTEMS_BDBUF_STATE_EMPTY:
1902      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1903      break;
1904    default:
1905      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_1);
1906      break;
1907  }
1908
1909  while (transfer_index < transfer_count)
1910  {
1911    media_block += media_block_count;
1912
1913    bd = rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block,
1914                                                bds_per_group);
1915
1916    if (bd == NULL)
1917      break;
1918
1919    rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1920
1921    req->bufs [transfer_index].user   = bd;
1922    req->bufs [transfer_index].block  = media_block;
1923    req->bufs [transfer_index].length = block_size;
1924    req->bufs [transfer_index].buffer = bd->buffer;
1925
1926    if (rtems_bdbuf_tracer)
1927      rtems_bdbuf_show_users ("read-ahead", bd);
1928
1929    ++transfer_index;
1930  }
1931
1932  req->bufnum = transfer_index;
1933}
1934
1935static rtems_status_code
1936rtems_bdbuf_execute_transfer_request (const rtems_disk_device *dd,
1937                                      rtems_blkdev_request    *req,
1938                                      bool                     cache_locked)
1939{
1940  rtems_status_code sc = RTEMS_SUCCESSFUL;
1941  int result = 0;
1942  uint32_t transfer_index = 0;
1943  bool wake_transfer_waiters = false;
1944  bool wake_buffer_waiters = false;
1945
1946  if (cache_locked)
1947    rtems_bdbuf_unlock_cache ();
1948
1949  result = dd->ioctl (dd->phys_dev, RTEMS_BLKIO_REQUEST, req);
1950
1951  if (result == 0)
1952  {
1953    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
1954    sc = req->status;
1955  }
1956  else
1957    sc = RTEMS_IO_ERROR;
1958
1959  rtems_bdbuf_lock_cache ();
1960
1961  for (transfer_index = 0; transfer_index < req->bufnum; ++transfer_index)
1962  {
1963    rtems_bdbuf_buffer *bd = req->bufs [transfer_index].user;
1964    bool waiters = bd->waiters;
1965
1966    if (waiters)
1967      wake_transfer_waiters = true;
1968    else
1969      wake_buffer_waiters = true;
1970
1971    rtems_bdbuf_group_release (bd);
1972
1973    if (sc == RTEMS_SUCCESSFUL && bd->state == RTEMS_BDBUF_STATE_TRANSFER)
1974      rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1975    else
1976      rtems_bdbuf_discard_buffer (bd);
1977
1978    if (rtems_bdbuf_tracer)
1979      rtems_bdbuf_show_users ("transfer", bd);
1980  }
1981
1982  if (wake_transfer_waiters)
1983    rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
1984
1985  if (wake_buffer_waiters)
1986    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1987
1988  if (!cache_locked)
1989    rtems_bdbuf_unlock_cache ();
1990
1991  if (sc == RTEMS_SUCCESSFUL || sc == RTEMS_UNSATISFIED)
1992    return sc;
1993  else
1994    return RTEMS_IO_ERROR;
1995}
1996
1997rtems_status_code
1998rtems_bdbuf_read (const rtems_disk_device *dd,
1999                  rtems_blkdev_bnum    block,
2000                  rtems_bdbuf_buffer **bd_ptr)
2001{
2002  rtems_status_code     sc = RTEMS_SUCCESSFUL;
2003  rtems_blkdev_request *req = NULL;
2004  rtems_bdbuf_buffer   *bd = NULL;
2005  rtems_blkdev_bnum     media_block = 0;
2006  size_t                bds_per_group = 0;
2007
2008  sc = rtems_bdbuf_obtain_disk (dd, block, &media_block, &bds_per_group);
2009  if (sc != RTEMS_SUCCESSFUL)
2010    return sc;
2011
2012  /*
2013   * TODO: This type of request structure is wrong and should be removed.
2014   */
2015#define bdbuf_alloc(size) __builtin_alloca (size)
2016
2017  req = bdbuf_alloc (sizeof (rtems_blkdev_request) +
2018                     sizeof (rtems_blkdev_sg_buffer) *
2019                      (bdbuf_config.max_read_ahead_blocks + 1));
2020
2021  if (rtems_bdbuf_tracer)
2022    printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
2023            media_block + dd->start, block, (unsigned) dd->dev);
2024
2025  rtems_bdbuf_lock_cache ();
2026  rtems_bdbuf_create_read_request (dd, media_block, bds_per_group, req, &bd);
2027
2028  if (req->bufnum > 0)
2029  {
2030    sc = rtems_bdbuf_execute_transfer_request (dd, req, true);
2031    if (sc == RTEMS_SUCCESSFUL)
2032    {
2033      rtems_chain_extract (&bd->link);
2034      rtems_bdbuf_group_obtain (bd);
2035    }
2036  }
2037
2038  if (sc == RTEMS_SUCCESSFUL)
2039  {
2040    switch (bd->state)
2041    {
2042      case RTEMS_BDBUF_STATE_CACHED:
2043        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2044        break;
2045      case RTEMS_BDBUF_STATE_MODIFIED:
2046        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
2047        break;
2048      default:
2049        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_4);
2050        break;
2051    }
2052
2053    if (rtems_bdbuf_tracer)
2054    {
2055      rtems_bdbuf_show_users ("read", bd);
2056      rtems_bdbuf_show_usage ();
2057    }
2058
2059    *bd_ptr = bd;
2060  }
2061  else
2062    *bd_ptr = NULL;
2063
2064  rtems_bdbuf_unlock_cache ();
2065
2066  return sc;
2067}
2068
2069static rtems_status_code
2070rtems_bdbuf_check_bd_and_lock_cache (rtems_bdbuf_buffer *bd, const char *kind)
2071{
2072  if (!bdbuf_cache.initialised)
2073    return RTEMS_NOT_CONFIGURED;
2074  if (bd == NULL)
2075    return RTEMS_INVALID_ADDRESS;
2076  if (rtems_bdbuf_tracer)
2077  {
2078    printf ("bdbuf:%s: %" PRIu32 "\n", kind, bd->block);
2079    rtems_bdbuf_show_users (kind, bd);
2080  }
2081  rtems_bdbuf_lock_cache();
2082
2083  return RTEMS_SUCCESSFUL;
2084}
2085
2086rtems_status_code
2087rtems_bdbuf_release (rtems_bdbuf_buffer *bd)
2088{
2089  rtems_status_code sc = RTEMS_SUCCESSFUL;
2090
2091  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release");
2092  if (sc != RTEMS_SUCCESSFUL)
2093    return sc;
2094
2095  switch (bd->state)
2096  {
2097    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2098      rtems_bdbuf_add_to_lru_list_after_access (bd);
2099      break;
2100    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2101    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2102      rtems_bdbuf_discard_buffer_after_access (bd);
2103      break;
2104    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2105      rtems_bdbuf_add_to_modified_list_after_access (bd);
2106      break;
2107    default:
2108      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_0);
2109      break;
2110  }
2111
2112  if (rtems_bdbuf_tracer)
2113    rtems_bdbuf_show_usage ();
2114
2115  rtems_bdbuf_unlock_cache ();
2116
2117  return RTEMS_SUCCESSFUL;
2118}
2119
2120rtems_status_code
2121rtems_bdbuf_release_modified (rtems_bdbuf_buffer *bd)
2122{
2123  rtems_status_code sc = RTEMS_SUCCESSFUL;
2124
2125  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release modified");
2126  if (sc != RTEMS_SUCCESSFUL)
2127    return sc;
2128
2129  switch (bd->state)
2130  {
2131    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2132    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2133    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2134      rtems_bdbuf_add_to_modified_list_after_access (bd);
2135      break;
2136    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2137      rtems_bdbuf_discard_buffer_after_access (bd);
2138      break;
2139    default:
2140      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_6);
2141      break;
2142  }
2143
2144  if (rtems_bdbuf_tracer)
2145    rtems_bdbuf_show_usage ();
2146
2147  rtems_bdbuf_unlock_cache ();
2148
2149  return RTEMS_SUCCESSFUL;
2150}
2151
2152rtems_status_code
2153rtems_bdbuf_sync (rtems_bdbuf_buffer *bd)
2154{
2155  rtems_status_code sc = RTEMS_SUCCESSFUL;
2156
2157  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "sync");
2158  if (sc != RTEMS_SUCCESSFUL)
2159    return sc;
2160
2161  switch (bd->state)
2162  {
2163    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2164    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2165    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2166      rtems_bdbuf_sync_after_access (bd);
2167      break;
2168    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2169      rtems_bdbuf_discard_buffer_after_access (bd);
2170      break;
2171    default:
2172      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_5);
2173      break;
2174  }
2175
2176  if (rtems_bdbuf_tracer)
2177    rtems_bdbuf_show_usage ();
2178
2179  rtems_bdbuf_unlock_cache ();
2180
2181  return RTEMS_SUCCESSFUL;
2182}
2183
2184rtems_status_code
2185rtems_bdbuf_syncdev (const rtems_disk_device *dd)
2186{
2187  rtems_status_code  sc = RTEMS_SUCCESSFUL;
2188
2189  if (rtems_bdbuf_tracer)
2190    printf ("bdbuf:syncdev: %08x\n", (unsigned) dd->dev);
2191
2192  sc = rtems_bdbuf_obtain_disk (dd, 0, NULL, NULL);
2193  if (sc != RTEMS_SUCCESSFUL)
2194    return sc;
2195
2196  /*
2197   * Take the sync lock before locking the cache. Once we have the sync lock we
2198   * can lock the cache. If another thread has the sync lock it will cause this
2199   * thread to block until it owns the sync lock then it can own the cache. The
2200   * sync lock can only be obtained with the cache unlocked.
2201   */
2202  rtems_bdbuf_lock_sync ();
2203  rtems_bdbuf_lock_cache ();
2204
2205  /*
2206   * Set the cache to have a sync active for a specific device and let the swap
2207   * out task know the id of the requester to wake when done.
2208   *
2209   * The swap out task will negate the sync active flag when no more buffers
2210   * for the device are held on the "modified for sync" queues.
2211   */
2212  bdbuf_cache.sync_active    = true;
2213  bdbuf_cache.sync_requester = rtems_task_self ();
2214  bdbuf_cache.sync_device    = dd;
2215
2216  rtems_bdbuf_wake_swapper ();
2217  rtems_bdbuf_unlock_cache ();
2218  rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
2219  rtems_bdbuf_unlock_sync ();
2220
2221  return RTEMS_SUCCESSFUL;
2222}
2223
2224/**
2225 * Swapout transfer to the driver. The driver will break this I/O into groups
2226 * of consecutive write requests is multiple consecutive buffers are required
2227 * by the driver. The cache is not locked.
2228 *
2229 * @param transfer The transfer transaction.
2230 */
2231static void
2232rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer)
2233{
2234  rtems_chain_node *node;
2235
2236  if (rtems_bdbuf_tracer)
2237    printf ("bdbuf:swapout transfer: %08x\n", (unsigned) transfer->dd->dev);
2238
2239  /*
2240   * If there are buffers to transfer to the media transfer them.
2241   */
2242  if (!rtems_chain_is_empty (&transfer->bds))
2243  {
2244    /*
2245     * The last block number used when the driver only supports
2246     * continuous blocks in a single request.
2247     */
2248    uint32_t last_block = 0;
2249
2250    /*
2251     * Number of buffers per bd. This is used to detect the next
2252     * block.
2253     */
2254    uint32_t bufs_per_bd = 0;
2255
2256    const rtems_disk_device *dd = transfer->dd;
2257
2258    bufs_per_bd = dd->block_size / bdbuf_config.buffer_min;
2259
2260    /*
2261     * Take as many buffers as configured and pass to the driver. Note, the
2262     * API to the drivers has an array of buffers and if a chain was passed
2263     * we could have just passed the list. If the driver API is updated it
2264     * should be possible to make this change with little effect in this
2265     * code. The array that is passed is broken in design and should be
2266     * removed. Merging members of a struct into the first member is
2267     * trouble waiting to happen.
2268     */
2269    transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2270    transfer->write_req->bufnum = 0;
2271
2272    while ((node = rtems_chain_get(&transfer->bds)) != NULL)
2273    {
2274      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2275      bool                write = false;
2276
2277      /*
2278       * If the device only accepts sequential buffers and this is not the
2279       * first buffer (the first is always sequential, and the buffer is not
2280       * sequential then put the buffer back on the transfer chain and write
2281       * the committed buffers.
2282       */
2283
2284      if (rtems_bdbuf_tracer)
2285        printf ("bdbuf:swapout write: bd:%" PRIu32 ", bufnum:%" PRIu32 " mode:%s\n",
2286                bd->block, transfer->write_req->bufnum,
2287                dd->phys_dev->capabilities &
2288                RTEMS_BLKDEV_CAP_MULTISECTOR_CONT ? "MULIT" : "SCAT");
2289
2290      if ((dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) &&
2291          transfer->write_req->bufnum &&
2292          (bd->block != (last_block + bufs_per_bd)))
2293      {
2294        rtems_chain_prepend (&transfer->bds, &bd->link);
2295        write = true;
2296      }
2297      else
2298      {
2299        rtems_blkdev_sg_buffer* buf;
2300        buf = &transfer->write_req->bufs[transfer->write_req->bufnum];
2301        transfer->write_req->bufnum++;
2302        buf->user   = bd;
2303        buf->block  = bd->block;
2304        buf->length = dd->block_size;
2305        buf->buffer = bd->buffer;
2306        last_block  = bd->block;
2307      }
2308
2309      /*
2310       * Perform the transfer if there are no more buffers, or the transfer
2311       * size has reached the configured max. value.
2312       */
2313
2314      if (rtems_chain_is_empty (&transfer->bds) ||
2315          (transfer->write_req->bufnum >= bdbuf_config.max_write_blocks))
2316        write = true;
2317
2318      if (write)
2319      {
2320        rtems_bdbuf_execute_transfer_request (dd, transfer->write_req, false);
2321
2322        transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2323        transfer->write_req->bufnum = 0;
2324      }
2325    }
2326
2327    /*
2328     * If sync'ing and the deivce is capability of handling a sync IO control
2329     * call perform the call.
2330     */
2331    if (transfer->syncing &&
2332        (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_SYNC))
2333    {
2334      /* int result = */ dd->ioctl (dd->phys_dev, RTEMS_BLKDEV_REQ_SYNC, NULL);
2335      /* How should the error be handled ? */
2336    }
2337  }
2338}
2339
2340/**
2341 * Process the modified list of buffers. There is a sync or modified list that
2342 * needs to be handled so we have a common function to do the work.
2343 *
2344 * @param dd_ptr Pointer to the device to handle. If BDBUF_INVALID_DEV no
2345 * device is selected so select the device of the first buffer to be written to
2346 * disk.
2347 * @param chain The modified chain to process.
2348 * @param transfer The chain to append buffers to be written too.
2349 * @param sync_active If true this is a sync operation so expire all timers.
2350 * @param update_timers If true update the timers.
2351 * @param timer_delta It update_timers is true update the timers by this
2352 *                    amount.
2353 */
2354static void
2355rtems_bdbuf_swapout_modified_processing (const rtems_disk_device **dd_ptr,
2356                                         rtems_chain_control* chain,
2357                                         rtems_chain_control* transfer,
2358                                         bool                 sync_active,
2359                                         bool                 update_timers,
2360                                         uint32_t             timer_delta)
2361{
2362  if (!rtems_chain_is_empty (chain))
2363  {
2364    rtems_chain_node* node = rtems_chain_head (chain);
2365    bool              sync_all;
2366   
2367    node = node->next;
2368
2369    /*
2370     * A sync active with no valid dev means sync all.
2371     */
2372    if (sync_active && (*dd_ptr == BDBUF_INVALID_DEV))
2373      sync_all = true;
2374    else
2375      sync_all = false;
2376   
2377    while (!rtems_chain_is_tail (chain, node))
2378    {
2379      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2380
2381      /*
2382       * Check if the buffer's hold timer has reached 0. If a sync is active
2383       * or someone waits for a buffer written force all the timers to 0.
2384       *
2385       * @note Lots of sync requests will skew this timer. It should be based
2386       *       on TOD to be accurate. Does it matter ?
2387       */
2388      if (sync_all || (sync_active && (*dd_ptr == bd->dd))
2389          || rtems_bdbuf_has_buffer_waiters ())
2390        bd->hold_timer = 0;
2391
2392      if (bd->hold_timer)
2393      {
2394        if (update_timers)
2395        {
2396          if (bd->hold_timer > timer_delta)
2397            bd->hold_timer -= timer_delta;
2398          else
2399            bd->hold_timer = 0;
2400        }
2401
2402        if (bd->hold_timer)
2403        {
2404          node = node->next;
2405          continue;
2406        }
2407      }
2408
2409      /*
2410       * This assumes we can set it to BDBUF_INVALID_DEV which is just an
2411       * assumption. Cannot use the transfer list being empty the sync dev
2412       * calls sets the dev to use.
2413       */
2414      if (*dd_ptr == BDBUF_INVALID_DEV)
2415        *dd_ptr = bd->dd;
2416
2417      if (bd->dd == *dd_ptr)
2418      {
2419        rtems_chain_node* next_node = node->next;
2420        rtems_chain_node* tnode = rtems_chain_tail (transfer);
2421
2422        /*
2423         * The blocks on the transfer list are sorted in block order. This
2424         * means multi-block transfers for drivers that require consecutive
2425         * blocks perform better with sorted blocks and for real disks it may
2426         * help lower head movement.
2427         */
2428
2429        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2430
2431        rtems_chain_extract (node);
2432
2433        tnode = tnode->previous;
2434
2435        while (node && !rtems_chain_is_head (transfer, tnode))
2436        {
2437          rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
2438
2439          if (bd->block > tbd->block)
2440          {
2441            rtems_chain_insert (tnode, node);
2442            node = NULL;
2443          }
2444          else
2445            tnode = tnode->previous;
2446        }
2447
2448        if (node)
2449          rtems_chain_prepend (transfer, node);
2450
2451        node = next_node;
2452      }
2453      else
2454      {
2455        node = node->next;
2456      }
2457    }
2458  }
2459}
2460
2461/**
2462 * Process the cache's modified buffers. Check the sync list first then the
2463 * modified list extracting the buffers suitable to be written to disk. We have
2464 * a device at a time. The task level loop will repeat this operation while
2465 * there are buffers to be written. If the transfer fails place the buffers
2466 * back on the modified list and try again later. The cache is unlocked while
2467 * the buffers are being written to disk.
2468 *
2469 * @param timer_delta It update_timers is true update the timers by this
2470 *                    amount.
2471 * @param update_timers If true update the timers.
2472 * @param transfer The transfer transaction data.
2473 *
2474 * @retval true Buffers where written to disk so scan again.
2475 * @retval false No buffers where written to disk.
2476 */
2477static bool
2478rtems_bdbuf_swapout_processing (unsigned long                 timer_delta,
2479                                bool                          update_timers,
2480                                rtems_bdbuf_swapout_transfer* transfer)
2481{
2482  rtems_bdbuf_swapout_worker* worker;
2483  bool                        transfered_buffers = false;
2484
2485  rtems_bdbuf_lock_cache ();
2486
2487  /*
2488   * If a sync is active do not use a worker because the current code does not
2489   * cleaning up after. We need to know the buffers have been written when
2490   * syncing to release sync lock and currently worker threads do not return to
2491   * here. We do not know the worker is the last in a sequence of sync writes
2492   * until after we have it running so we do not know to tell it to release the
2493   * lock. The simplest solution is to get the main swap out task perform all
2494   * sync operations.
2495   */
2496  if (bdbuf_cache.sync_active)
2497    worker = NULL;
2498  else
2499  {
2500    worker = (rtems_bdbuf_swapout_worker*)
2501      rtems_chain_get (&bdbuf_cache.swapout_workers);
2502    if (worker)
2503      transfer = &worker->transfer;
2504  }
2505
2506  rtems_chain_initialize_empty (&transfer->bds);
2507  transfer->dd = BDBUF_INVALID_DEV;
2508  transfer->syncing = bdbuf_cache.sync_active;
2509 
2510  /*
2511   * When the sync is for a device limit the sync to that device. If the sync
2512   * is for a buffer handle process the devices in the order on the sync
2513   * list. This means the dev is BDBUF_INVALID_DEV.
2514   */
2515  if (bdbuf_cache.sync_active)
2516    transfer->dd = bdbuf_cache.sync_device;
2517   
2518  /*
2519   * If we have any buffers in the sync queue move them to the modified
2520   * list. The first sync buffer will select the device we use.
2521   */
2522  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2523                                           &bdbuf_cache.sync,
2524                                           &transfer->bds,
2525                                           true, false,
2526                                           timer_delta);
2527
2528  /*
2529   * Process the cache's modified list.
2530   */
2531  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2532                                           &bdbuf_cache.modified,
2533                                           &transfer->bds,
2534                                           bdbuf_cache.sync_active,
2535                                           update_timers,
2536                                           timer_delta);
2537
2538  /*
2539   * We have all the buffers that have been modified for this device so the
2540   * cache can be unlocked because the state of each buffer has been set to
2541   * TRANSFER.
2542   */
2543  rtems_bdbuf_unlock_cache ();
2544
2545  /*
2546   * If there are buffers to transfer to the media transfer them.
2547   */
2548  if (!rtems_chain_is_empty (&transfer->bds))
2549  {
2550    if (worker)
2551    {
2552      rtems_status_code sc = rtems_event_send (worker->id,
2553                                               RTEMS_BDBUF_SWAPOUT_SYNC);
2554      if (sc != RTEMS_SUCCESSFUL)
2555        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
2556    }
2557    else
2558    {
2559      rtems_bdbuf_swapout_write (transfer);
2560    }
2561
2562    transfered_buffers = true;
2563  }
2564
2565  if (bdbuf_cache.sync_active && !transfered_buffers)
2566  {
2567    rtems_id sync_requester;
2568    rtems_bdbuf_lock_cache ();
2569    sync_requester = bdbuf_cache.sync_requester;
2570    bdbuf_cache.sync_active = false;
2571    bdbuf_cache.sync_requester = 0;
2572    rtems_bdbuf_unlock_cache ();
2573    if (sync_requester)
2574      rtems_event_send (sync_requester, RTEMS_BDBUF_TRANSFER_SYNC);
2575  }
2576
2577  return transfered_buffers;
2578}
2579
2580/**
2581 * Allocate the write request and initialise it for good measure.
2582 *
2583 * @return rtems_blkdev_request* The write reference memory.
2584 */
2585static rtems_blkdev_request*
2586rtems_bdbuf_swapout_writereq_alloc (void)
2587{
2588  /*
2589   * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
2590   * I am disappointment at finding code like this in RTEMS. The request should
2591   * have been a rtems_chain_control. Simple, fast and less storage as the node
2592   * is already part of the buffer structure.
2593   */
2594  rtems_blkdev_request* write_req =
2595    malloc (sizeof (rtems_blkdev_request) +
2596            (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer)));
2597
2598  if (!write_req)
2599    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2600
2601  write_req->req = RTEMS_BLKDEV_REQ_WRITE;
2602  write_req->req_done = rtems_bdbuf_transfer_done;
2603  write_req->done_arg = write_req;
2604  write_req->io_task = rtems_task_self ();
2605
2606  return write_req;
2607}
2608
2609/**
2610 * The swapout worker thread body.
2611 *
2612 * @param arg A pointer to the worker thread's private data.
2613 * @return rtems_task Not used.
2614 */
2615static rtems_task
2616rtems_bdbuf_swapout_worker_task (rtems_task_argument arg)
2617{
2618  rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg;
2619
2620  while (worker->enabled)
2621  {
2622    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_SWAPOUT_SYNC);
2623
2624    rtems_bdbuf_swapout_write (&worker->transfer);
2625
2626    rtems_bdbuf_lock_cache ();
2627
2628    rtems_chain_initialize_empty (&worker->transfer.bds);
2629    worker->transfer.dd = BDBUF_INVALID_DEV;
2630
2631    rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link);
2632
2633    rtems_bdbuf_unlock_cache ();
2634  }
2635
2636  free (worker->transfer.write_req);
2637  free (worker);
2638
2639  rtems_task_delete (RTEMS_SELF);
2640}
2641
2642/**
2643 * Open the swapout worker threads.
2644 */
2645static void
2646rtems_bdbuf_swapout_workers_open (void)
2647{
2648  rtems_status_code sc;
2649  size_t            w;
2650
2651  rtems_bdbuf_lock_cache ();
2652
2653  for (w = 0; w < bdbuf_config.swapout_workers; w++)
2654  {
2655    rtems_bdbuf_swapout_worker* worker;
2656
2657    worker = malloc (sizeof (rtems_bdbuf_swapout_worker));
2658    if (!worker)
2659      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2660
2661    rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link);
2662    worker->enabled = true;
2663    worker->transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2664
2665    rtems_chain_initialize_empty (&worker->transfer.bds);
2666    worker->transfer.dd = BDBUF_INVALID_DEV;
2667
2668    sc = rtems_task_create (rtems_build_name('B', 'D', 'o', 'a' + w),
2669                            (bdbuf_config.swapout_priority ?
2670                             bdbuf_config.swapout_priority :
2671                             RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT),
2672                            SWAPOUT_TASK_STACK_SIZE,
2673                            RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
2674                            RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
2675                            &worker->id);
2676    if (sc != RTEMS_SUCCESSFUL)
2677      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE);
2678
2679    sc = rtems_task_start (worker->id,
2680                           rtems_bdbuf_swapout_worker_task,
2681                           (rtems_task_argument) worker);
2682    if (sc != RTEMS_SUCCESSFUL)
2683      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START);
2684  }
2685
2686  rtems_bdbuf_unlock_cache ();
2687}
2688
2689/**
2690 * Close the swapout worker threads.
2691 */
2692static void
2693rtems_bdbuf_swapout_workers_close (void)
2694{
2695  rtems_chain_node* node;
2696
2697  rtems_bdbuf_lock_cache ();
2698
2699  node = rtems_chain_first (&bdbuf_cache.swapout_workers);
2700  while (!rtems_chain_is_tail (&bdbuf_cache.swapout_workers, node))
2701  {
2702    rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node;
2703    worker->enabled = false;
2704    rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC);
2705    node = rtems_chain_next (node);
2706  }
2707
2708  rtems_bdbuf_unlock_cache ();
2709}
2710
2711/**
2712 * Body of task which takes care on flushing modified buffers to the disk.
2713 *
2714 * @param arg A pointer to the global cache data. Use the global variable and
2715 *            not this.
2716 * @return rtems_task Not used.
2717 */
2718static rtems_task
2719rtems_bdbuf_swapout_task (rtems_task_argument arg)
2720{
2721  rtems_bdbuf_swapout_transfer transfer;
2722  uint32_t                     period_in_ticks;
2723  const uint32_t               period_in_msecs = bdbuf_config.swapout_period;;
2724  uint32_t                     timer_delta;
2725
2726  transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2727  rtems_chain_initialize_empty (&transfer.bds);
2728  transfer.dd = BDBUF_INVALID_DEV;
2729  transfer.syncing = false;
2730
2731  /*
2732   * Localise the period.
2733   */
2734  period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
2735
2736  /*
2737   * This is temporary. Needs to be changed to use the real time clock.
2738   */
2739  timer_delta = period_in_msecs;
2740
2741  /*
2742   * Create the worker threads.
2743   */
2744  rtems_bdbuf_swapout_workers_open ();
2745
2746  while (bdbuf_cache.swapout_enabled)
2747  {
2748    rtems_event_set   out;
2749    rtems_status_code sc;
2750
2751    /*
2752     * Only update the timers once in the processing cycle.
2753     */
2754    bool update_timers = true;
2755
2756    /*
2757     * If we write buffers to any disk perform a check again. We only write a
2758     * single device at a time and the cache may have more than one device's
2759     * buffers modified waiting to be written.
2760     */
2761    bool transfered_buffers;
2762
2763    do
2764    {
2765      transfered_buffers = false;
2766
2767      /*
2768       * Extact all the buffers we find for a specific device. The device is
2769       * the first one we find on a modified list. Process the sync queue of
2770       * buffers first.
2771       */
2772      if (rtems_bdbuf_swapout_processing (timer_delta,
2773                                          update_timers,
2774                                          &transfer))
2775      {
2776        transfered_buffers = true;
2777      }
2778
2779      /*
2780       * Only update the timers once.
2781       */
2782      update_timers = false;
2783    }
2784    while (transfered_buffers);
2785
2786    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2787                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2788                              period_in_ticks,
2789                              &out);
2790
2791    if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
2792      rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2793  }
2794
2795  rtems_bdbuf_swapout_workers_close ();
2796
2797  free (transfer.write_req);
2798
2799  rtems_task_delete (RTEMS_SELF);
2800}
2801
2802static void
2803rtems_bdbuf_purge_list (rtems_chain_control *purge_list)
2804{
2805  bool wake_buffer_waiters = false;
2806  rtems_chain_node *node = NULL;
2807
2808  while ((node = rtems_chain_get (purge_list)) != NULL)
2809  {
2810    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
2811
2812    if (bd->waiters == 0)
2813      wake_buffer_waiters = true;
2814
2815    rtems_bdbuf_discard_buffer (bd);
2816  }
2817
2818  if (wake_buffer_waiters)
2819    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2820}
2821
2822static void
2823rtems_bdbuf_gather_for_purge (rtems_chain_control *purge_list,
2824                              const rtems_disk_device *dd)
2825{
2826  rtems_bdbuf_buffer *stack [RTEMS_BDBUF_AVL_MAX_HEIGHT];
2827  rtems_bdbuf_buffer **prev = stack;
2828  rtems_bdbuf_buffer *cur = bdbuf_cache.tree;
2829
2830  *prev = NULL;
2831
2832  while (cur != NULL)
2833  {
2834    if (cur->dd == dd)
2835    {
2836      switch (cur->state)
2837      {
2838        case RTEMS_BDBUF_STATE_FREE:
2839        case RTEMS_BDBUF_STATE_EMPTY:
2840        case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2841        case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
2842          break;
2843        case RTEMS_BDBUF_STATE_SYNC:
2844          rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2845          /* Fall through */
2846        case RTEMS_BDBUF_STATE_MODIFIED:
2847          rtems_bdbuf_group_release (cur);
2848          /* Fall through */
2849        case RTEMS_BDBUF_STATE_CACHED:
2850          rtems_chain_extract (&cur->link);
2851          rtems_chain_append (purge_list, &cur->link);
2852          break;
2853        case RTEMS_BDBUF_STATE_TRANSFER:
2854          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_TRANSFER_PURGED);
2855          break;
2856        case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2857        case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2858        case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2859          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_ACCESS_PURGED);
2860          break;
2861        default:
2862          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_STATE_11);
2863      }
2864    }
2865
2866    if (cur->avl.left != NULL)
2867    {
2868      /* Left */
2869      ++prev;
2870      *prev = cur;
2871      cur = cur->avl.left;
2872    }
2873    else if (cur->avl.right != NULL)
2874    {
2875      /* Right */
2876      ++prev;
2877      *prev = cur;
2878      cur = cur->avl.right;
2879    }
2880    else
2881    {
2882      while (*prev != NULL && cur == (*prev)->avl.right)
2883      {
2884        /* Up */
2885        cur = *prev;
2886        --prev;
2887      }
2888      if (*prev != NULL)
2889        /* Right */
2890        cur = (*prev)->avl.right;
2891      else
2892        /* Finished */
2893        cur = NULL;
2894    }
2895  }
2896}
2897
2898void
2899rtems_bdbuf_purge_dev (const rtems_disk_device *dd)
2900{
2901  rtems_chain_control purge_list;
2902
2903  rtems_chain_initialize_empty (&purge_list);
2904  rtems_bdbuf_lock_cache ();
2905  rtems_bdbuf_gather_for_purge (&purge_list, dd);
2906  rtems_bdbuf_purge_list (&purge_list);
2907  rtems_bdbuf_unlock_cache ();
2908}
Note: See TracBrowser for help on using the repository browser.