source: rtems/cpukit/libblock/src/bdbuf.c @ 50041a1

Last change on this file since 50041a1 was e4f88fc, checked in by Sebastian Huber <sebastian.huber@…>, on 03/26/12 at 12:58:35

libblock: Add rtems_bdbuf_set_block_size()

The new function rtems_bdbuf_set_block_size() must be used to set the
block size of a disk device. It will check if the block size is valid
and set the new fields block_to_media_block_shift and bds_per_group of
the rtems_disk_device structure. This helps to avoid complex arithmetic
operations in the block device buffer get and read path.

  • Property mode set to 100644
File size: 79.5 KB
Line 
1/**
2 * @file
3 *
4 * @ingroup rtems_bdbuf
5 *
6 * Block device buffer management.
7 */
8
9/*
10 * Disk I/O buffering
11 * Buffer managment
12 *
13 * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
14 * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
15 *         Victor V. Vengerov <vvv@oktet.ru>
16 *         Alexander Kukuta <kam@oktet.ru>
17 *
18 * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org>
19 *    Rewritten to remove score mutex access. Fixes many performance
20 *    issues.
21 *
22 * Copyright (c) 2009-2012 embedded brains GmbH.
23 *
24 * @(#) bdbuf.c,v 1.14 2004/04/17 08:15:17 ralf Exp
25 */
26
27/**
28 * Set to 1 to enable debug tracing.
29 */
30#define RTEMS_BDBUF_TRACE 0
31
32#if HAVE_CONFIG_H
33#include "config.h"
34#endif
35#include <limits.h>
36#include <errno.h>
37#include <stdio.h>
38#include <string.h>
39#include <inttypes.h>
40
41#include <rtems.h>
42#include <rtems/error.h>
43#include <rtems/malloc.h>
44
45#include "rtems/bdbuf.h"
46
47#define BDBUF_INVALID_DEV NULL
48
49/*
50 * Simpler label for this file.
51 */
52#define bdbuf_config rtems_bdbuf_configuration
53
54/**
55 * A swapout transfer transaction data. This data is passed to a worked thread
56 * to handle the write phase of the transfer.
57 */
58typedef struct rtems_bdbuf_swapout_transfer
59{
60  rtems_chain_control   bds;         /**< The transfer list of BDs. */
61  const rtems_disk_device *dd;       /**< The device the transfer is for. */
62  bool                  syncing;     /**< The data is a sync'ing. */
63  rtems_blkdev_request* write_req;   /**< The write request array. */
64  uint32_t              bufs_per_bd; /**< Number of buffers per bd. */
65} rtems_bdbuf_swapout_transfer;
66
67/**
68 * Swapout worker thread. These are available to take processing from the
69 * main swapout thread and handle the I/O operation.
70 */
71typedef struct rtems_bdbuf_swapout_worker
72{
73  rtems_chain_node             link;     /**< The threads sit on a chain when
74                                          * idle. */
75  rtems_id                     id;       /**< The id of the task so we can wake
76                                          * it. */
77  bool                         enabled;  /**< The worker is enabled. */
78  rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this
79                                          * thread. */
80} rtems_bdbuf_swapout_worker;
81
82/**
83 * Buffer waiters synchronization.
84 */
85typedef struct rtems_bdbuf_waiters {
86  unsigned count;
87  rtems_id sema;
88} rtems_bdbuf_waiters;
89
90/**
91 * The BD buffer cache.
92 */
93typedef struct rtems_bdbuf_cache
94{
95  rtems_id            swapout;           /**< Swapout task ID */
96  bool                swapout_enabled;   /**< Swapout is only running if
97                                          * enabled. Set to false to kill the
98                                          * swap out task. It deletes itself. */
99  rtems_chain_control swapout_workers;   /**< The work threads for the swapout
100                                          * task. */
101
102  rtems_bdbuf_buffer* bds;               /**< Pointer to table of buffer
103                                          * descriptors. */
104  void*               buffers;           /**< The buffer's memory. */
105  size_t              buffer_min_count;  /**< Number of minimum size buffers
106                                          * that fit the buffer memory. */
107  size_t              max_bds_per_group; /**< The number of BDs of minimum
108                                          * buffer size that fit in a group. */
109  uint32_t            flags;             /**< Configuration flags. */
110
111  rtems_id            lock;              /**< The cache lock. It locks all
112                                          * cache data, BD and lists. */
113  rtems_id            sync_lock;         /**< Sync calls block writes. */
114  bool                sync_active;       /**< True if a sync is active. */
115  rtems_id            sync_requester;    /**< The sync requester. */
116  const rtems_disk_device *sync_device;  /**< The device to sync and
117                                          * BDBUF_INVALID_DEV not a device
118                                          * sync. */
119
120  rtems_bdbuf_buffer* tree;              /**< Buffer descriptor lookup AVL tree
121                                          * root. There is only one. */
122  rtems_chain_control lru;               /**< Least recently used list */
123  rtems_chain_control modified;          /**< Modified buffers list */
124  rtems_chain_control sync;              /**< Buffers to sync list */
125
126  rtems_bdbuf_waiters access_waiters;    /**< Wait for a buffer in
127                                          * ACCESS_CACHED, ACCESS_MODIFIED or
128                                          * ACCESS_EMPTY
129                                          * state. */
130  rtems_bdbuf_waiters transfer_waiters;  /**< Wait for a buffer in TRANSFER
131                                          * state. */
132  rtems_bdbuf_waiters buffer_waiters;    /**< Wait for a buffer and no one is
133                                          * available. */
134
135  size_t              group_count;       /**< The number of groups. */
136  rtems_bdbuf_group*  groups;            /**< The groups. */
137
138  bool                initialised;       /**< Initialised state. */
139} rtems_bdbuf_cache;
140
141/**
142 * Fatal errors
143 */
144#define RTEMS_BLKDEV_FATAL_ERROR(n) \
145  (((uint32_t)'B' << 24) | ((uint32_t)(n) & (uint32_t)0x00FFFFFF))
146
147#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_11      RTEMS_BLKDEV_FATAL_ERROR(1)
148#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_4       RTEMS_BLKDEV_FATAL_ERROR(2)
149#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_5       RTEMS_BLKDEV_FATAL_ERROR(3)
150#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_6       RTEMS_BLKDEV_FATAL_ERROR(4)
151#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_7       RTEMS_BLKDEV_FATAL_ERROR(5)
152#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_8       RTEMS_BLKDEV_FATAL_ERROR(6)
153#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_9       RTEMS_BLKDEV_FATAL_ERROR(7)
154#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_10      RTEMS_BLKDEV_FATAL_ERROR(8)
155#define RTEMS_BLKDEV_FATAL_BDBUF_TREE_RM       RTEMS_BLKDEV_FATAL_ERROR(9)
156#define RTEMS_BLKDEV_FATAL_BDBUF_SWAPOUT       RTEMS_BLKDEV_FATAL_ERROR(10)
157#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK     RTEMS_BLKDEV_FATAL_ERROR(11)
158#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK   RTEMS_BLKDEV_FATAL_ERROR(12)
159#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK    RTEMS_BLKDEV_FATAL_ERROR(13)
160#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK  RTEMS_BLKDEV_FATAL_ERROR(14)
161#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS   RTEMS_BLKDEV_FATAL_ERROR(15)
162#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2  RTEMS_BLKDEV_FATAL_ERROR(16)
163#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST   RTEMS_BLKDEV_FATAL_ERROR(17)
164#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO RTEMS_BLKDEV_FATAL_ERROR(18)
165#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE    RTEMS_BLKDEV_FATAL_ERROR(19)
166#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE       RTEMS_BLKDEV_FATAL_ERROR(20)
167#define RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM      RTEMS_BLKDEV_FATAL_ERROR(21)
168#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE  RTEMS_BLKDEV_FATAL_ERROR(22)
169#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START   RTEMS_BLKDEV_FATAL_ERROR(23)
170#define BLKDEV_FATAL_BDBUF_SWAPOUT_RE          RTEMS_BLKDEV_FATAL_ERROR(24)
171#define BLKDEV_FATAL_BDBUF_SWAPOUT_TS          RTEMS_BLKDEV_FATAL_ERROR(25)
172#define RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT     RTEMS_BLKDEV_FATAL_ERROR(26)
173#define RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE       RTEMS_BLKDEV_FATAL_ERROR(27)
174#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_0       RTEMS_BLKDEV_FATAL_ERROR(28)
175#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_1       RTEMS_BLKDEV_FATAL_ERROR(29)
176#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_2       RTEMS_BLKDEV_FATAL_ERROR(30)
177
178/**
179 * The events used in this code. These should be system events rather than
180 * application events.
181 */
182#define RTEMS_BDBUF_TRANSFER_SYNC  RTEMS_EVENT_1
183#define RTEMS_BDBUF_SWAPOUT_SYNC   RTEMS_EVENT_2
184
185/**
186 * The swap out task size. Should be more than enough for most drivers with
187 * tracing turned on.
188 */
189#define SWAPOUT_TASK_STACK_SIZE (8 * 1024)
190
191/**
192 * Lock semaphore attributes. This is used for locking type mutexes.
193 *
194 * @warning Priority inheritance is on.
195 */
196#define RTEMS_BDBUF_CACHE_LOCK_ATTRIBS \
197  (RTEMS_PRIORITY | RTEMS_BINARY_SEMAPHORE | \
198   RTEMS_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
199
200/**
201 * Waiter semaphore attributes.
202 *
203 * @warning Do not configure as inherit priority. If a driver is in the driver
204 *          initialisation table this locked semaphore will have the IDLE task
205 *          as the holder and a blocking task will raise the priority of the
206 *          IDLE task which can cause unsual side effects.
207 */
208#define RTEMS_BDBUF_CACHE_WAITER_ATTRIBS \
209  (RTEMS_PRIORITY | RTEMS_SIMPLE_BINARY_SEMAPHORE | \
210   RTEMS_NO_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
211
212/**
213 * Waiter timeout. Set to non-zero to find some info on a waiter that is
214 * waiting too long.
215 */
216#define RTEMS_BDBUF_WAIT_TIMEOUT RTEMS_NO_TIMEOUT
217#if !defined (RTEMS_BDBUF_WAIT_TIMEOUT)
218#define RTEMS_BDBUF_WAIT_TIMEOUT \
219  (TOD_MICROSECONDS_TO_TICKS (20000000))
220#endif
221
222/*
223 * The swap out task.
224 */
225static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
226
227/**
228 * The Buffer Descriptor cache.
229 */
230static rtems_bdbuf_cache bdbuf_cache;
231
232#if RTEMS_BDBUF_TRACE
233/**
234 * If true output the trace message.
235 */
236bool rtems_bdbuf_tracer;
237
238/**
239 * Return the number of items on the list.
240 *
241 * @param list The chain control.
242 * @return uint32_t The number of items on the list.
243 */
244uint32_t
245rtems_bdbuf_list_count (rtems_chain_control* list)
246{
247  rtems_chain_node* node = rtems_chain_first (list);
248  uint32_t          count = 0;
249  while (!rtems_chain_is_tail (list, node))
250  {
251    count++;
252    node = rtems_chain_next (node);
253  }
254  return count;
255}
256
257/**
258 * Show the usage for the bdbuf cache.
259 */
260void
261rtems_bdbuf_show_usage (void)
262{
263  uint32_t group;
264  uint32_t total = 0;
265  uint32_t val;
266
267  for (group = 0; group < bdbuf_cache.group_count; group++)
268    total += bdbuf_cache.groups[group].users;
269  printf ("bdbuf:group users=%lu", total);
270  val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
271  printf (", lru=%lu", val);
272  total = val;
273  val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
274  printf (", mod=%lu", val);
275  total += val;
276  val = rtems_bdbuf_list_count (&bdbuf_cache.sync);
277  printf (", sync=%lu", val);
278  total += val;
279  printf (", total=%lu\n", total);
280}
281
282/**
283 * Show the users for a group of a bd.
284 *
285 * @param where A label to show the context of output.
286 * @param bd The bd to show the users of.
287 */
288void
289rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd)
290{
291  const char* states[] =
292    { "FR", "EM", "CH", "AC", "AM", "MD", "SY", "TR" };
293
294  printf ("bdbuf:users: %15s: [%" PRIu32 " (%s)] %td:%td = %" PRIu32 " %s\n",
295          where,
296          bd->block, states[bd->state],
297          bd->group - bdbuf_cache.groups,
298          bd - bdbuf_cache.bds,
299          bd->group->users,
300          bd->group->users > 8 ? "<<<<<<<" : "");
301}
302#else
303#define rtems_bdbuf_tracer (0)
304#define rtems_bdbuf_show_usage() ((void) 0)
305#define rtems_bdbuf_show_users(_w, _b) ((void) 0)
306#endif
307
308/**
309 * The default maximum height of 32 allows for AVL trees having between
310 * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion.  You may
311 * change this compile-time constant as you wish.
312 */
313#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
314#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
315#endif
316
317static void
318rtems_bdbuf_fatal (rtems_bdbuf_buf_state state, uint32_t error)
319{
320  rtems_fatal_error_occurred ((((uint32_t) state) << 16) | error);
321}
322
323/**
324 * Searches for the node with specified dd/block.
325 *
326 * @param root pointer to the root node of the AVL-Tree
327 * @param dd disk device search key
328 * @param block block search key
329 * @retval NULL node with the specified dd/block is not found
330 * @return pointer to the node with specified dd/block
331 */
332static rtems_bdbuf_buffer *
333rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
334                        const rtems_disk_device *dd,
335                        rtems_blkdev_bnum    block)
336{
337  rtems_bdbuf_buffer* p = *root;
338
339  while ((p != NULL) && ((p->dd != dd) || (p->block != block)))
340  {
341    if (((uintptr_t) p->dd < (uintptr_t) dd)
342        || ((p->dd == dd) && (p->block < block)))
343    {
344      p = p->avl.right;
345    }
346    else
347    {
348      p = p->avl.left;
349    }
350  }
351
352  return p;
353}
354
355/**
356 * Inserts the specified node to the AVl-Tree.
357 *
358 * @param root pointer to the root node of the AVL-Tree
359 * @param node Pointer to the node to add.
360 * @retval 0 The node added successfully
361 * @retval -1 An error occured
362 */
363static int
364rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
365                       rtems_bdbuf_buffer*  node)
366{
367  const rtems_disk_device *dd = node->dd;
368  rtems_blkdev_bnum block = node->block;
369
370  rtems_bdbuf_buffer*  p = *root;
371  rtems_bdbuf_buffer*  q;
372  rtems_bdbuf_buffer*  p1;
373  rtems_bdbuf_buffer*  p2;
374  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
375  rtems_bdbuf_buffer** buf_prev = buf_stack;
376
377  bool modified = false;
378
379  if (p == NULL)
380  {
381    *root = node;
382    node->avl.left = NULL;
383    node->avl.right = NULL;
384    node->avl.bal = 0;
385    return 0;
386  }
387
388  while (p != NULL)
389  {
390    *buf_prev++ = p;
391
392    if (((uintptr_t) p->dd < (uintptr_t) dd)
393        || ((p->dd == dd) && (p->block < block)))
394    {
395      p->avl.cache = 1;
396      q = p->avl.right;
397      if (q == NULL)
398      {
399        q = node;
400        p->avl.right = q = node;
401        break;
402      }
403    }
404    else if ((p->dd != dd) || (p->block != block))
405    {
406      p->avl.cache = -1;
407      q = p->avl.left;
408      if (q == NULL)
409      {
410        q = node;
411        p->avl.left = q;
412        break;
413      }
414    }
415    else
416    {
417      return -1;
418    }
419
420    p = q;
421  }
422
423  q->avl.left = q->avl.right = NULL;
424  q->avl.bal = 0;
425  modified = true;
426  buf_prev--;
427
428  while (modified)
429  {
430    if (p->avl.cache == -1)
431    {
432      switch (p->avl.bal)
433      {
434        case 1:
435          p->avl.bal = 0;
436          modified = false;
437          break;
438
439        case 0:
440          p->avl.bal = -1;
441          break;
442
443        case -1:
444          p1 = p->avl.left;
445          if (p1->avl.bal == -1) /* simple LL-turn */
446          {
447            p->avl.left = p1->avl.right;
448            p1->avl.right = p;
449            p->avl.bal = 0;
450            p = p1;
451          }
452          else /* double LR-turn */
453          {
454            p2 = p1->avl.right;
455            p1->avl.right = p2->avl.left;
456            p2->avl.left = p1;
457            p->avl.left = p2->avl.right;
458            p2->avl.right = p;
459            if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
460            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
461            p = p2;
462          }
463          p->avl.bal = 0;
464          modified = false;
465          break;
466
467        default:
468          break;
469      }
470    }
471    else
472    {
473      switch (p->avl.bal)
474      {
475        case -1:
476          p->avl.bal = 0;
477          modified = false;
478          break;
479
480        case 0:
481          p->avl.bal = 1;
482          break;
483
484        case 1:
485          p1 = p->avl.right;
486          if (p1->avl.bal == 1) /* simple RR-turn */
487          {
488            p->avl.right = p1->avl.left;
489            p1->avl.left = p;
490            p->avl.bal = 0;
491            p = p1;
492          }
493          else /* double RL-turn */
494          {
495            p2 = p1->avl.left;
496            p1->avl.left = p2->avl.right;
497            p2->avl.right = p1;
498            p->avl.right = p2->avl.left;
499            p2->avl.left = p;
500            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
501            if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
502            p = p2;
503          }
504          p->avl.bal = 0;
505          modified = false;
506          break;
507
508        default:
509          break;
510      }
511    }
512    q = p;
513    if (buf_prev > buf_stack)
514    {
515      p = *--buf_prev;
516
517      if (p->avl.cache == -1)
518      {
519        p->avl.left = q;
520      }
521      else
522      {
523        p->avl.right = q;
524      }
525    }
526    else
527    {
528      *root = p;
529      break;
530    }
531  };
532
533  return 0;
534}
535
536
537/**
538 * Removes the node from the tree.
539 *
540 * @param root Pointer to pointer to the root node
541 * @param node Pointer to the node to remove
542 * @retval 0 Item removed
543 * @retval -1 No such item found
544 */
545static int
546rtems_bdbuf_avl_remove(rtems_bdbuf_buffer**      root,
547                       const rtems_bdbuf_buffer* node)
548{
549  const rtems_disk_device *dd = node->dd;
550  rtems_blkdev_bnum block = node->block;
551
552  rtems_bdbuf_buffer*  p = *root;
553  rtems_bdbuf_buffer*  q;
554  rtems_bdbuf_buffer*  r;
555  rtems_bdbuf_buffer*  s;
556  rtems_bdbuf_buffer*  p1;
557  rtems_bdbuf_buffer*  p2;
558  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
559  rtems_bdbuf_buffer** buf_prev = buf_stack;
560
561  bool modified = false;
562
563  memset (buf_stack, 0, sizeof(buf_stack));
564
565  while (p != NULL)
566  {
567    *buf_prev++ = p;
568
569    if (((uintptr_t) p->dd < (uintptr_t) dd)
570        || ((p->dd == dd) && (p->block < block)))
571    {
572      p->avl.cache = 1;
573      p = p->avl.right;
574    }
575    else if ((p->dd != dd) || (p->block != block))
576    {
577      p->avl.cache = -1;
578      p = p->avl.left;
579    }
580    else
581    {
582      /* node found */
583      break;
584    }
585  }
586
587  if (p == NULL)
588  {
589    /* there is no such node */
590    return -1;
591  }
592
593  q = p;
594
595  buf_prev--;
596  if (buf_prev > buf_stack)
597  {
598    p = *(buf_prev - 1);
599  }
600  else
601  {
602    p = NULL;
603  }
604
605  /* at this moment q - is a node to delete, p is q's parent */
606  if (q->avl.right == NULL)
607  {
608    r = q->avl.left;
609    if (r != NULL)
610    {
611      r->avl.bal = 0;
612    }
613    q = r;
614  }
615  else
616  {
617    rtems_bdbuf_buffer **t;
618
619    r = q->avl.right;
620
621    if (r->avl.left == NULL)
622    {
623      r->avl.left = q->avl.left;
624      r->avl.bal = q->avl.bal;
625      r->avl.cache = 1;
626      *buf_prev++ = q = r;
627    }
628    else
629    {
630      t = buf_prev++;
631      s = r;
632
633      while (s->avl.left != NULL)
634      {
635        *buf_prev++ = r = s;
636        s = r->avl.left;
637        r->avl.cache = -1;
638      }
639
640      s->avl.left = q->avl.left;
641      r->avl.left = s->avl.right;
642      s->avl.right = q->avl.right;
643      s->avl.bal = q->avl.bal;
644      s->avl.cache = 1;
645
646      *t = q = s;
647    }
648  }
649
650  if (p != NULL)
651  {
652    if (p->avl.cache == -1)
653    {
654      p->avl.left = q;
655    }
656    else
657    {
658      p->avl.right = q;
659    }
660  }
661  else
662  {
663    *root = q;
664  }
665
666  modified = true;
667
668  while (modified)
669  {
670    if (buf_prev > buf_stack)
671    {
672      p = *--buf_prev;
673    }
674    else
675    {
676      break;
677    }
678
679    if (p->avl.cache == -1)
680    {
681      /* rebalance left branch */
682      switch (p->avl.bal)
683      {
684        case -1:
685          p->avl.bal = 0;
686          break;
687        case  0:
688          p->avl.bal = 1;
689          modified = false;
690          break;
691
692        case +1:
693          p1 = p->avl.right;
694
695          if (p1->avl.bal >= 0) /* simple RR-turn */
696          {
697            p->avl.right = p1->avl.left;
698            p1->avl.left = p;
699
700            if (p1->avl.bal == 0)
701            {
702              p1->avl.bal = -1;
703              modified = false;
704            }
705            else
706            {
707              p->avl.bal = 0;
708              p1->avl.bal = 0;
709            }
710            p = p1;
711          }
712          else /* double RL-turn */
713          {
714            p2 = p1->avl.left;
715
716            p1->avl.left = p2->avl.right;
717            p2->avl.right = p1;
718            p->avl.right = p2->avl.left;
719            p2->avl.left = p;
720
721            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
722            if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
723
724            p = p2;
725            p2->avl.bal = 0;
726          }
727          break;
728
729        default:
730          break;
731      }
732    }
733    else
734    {
735      /* rebalance right branch */
736      switch (p->avl.bal)
737      {
738        case +1:
739          p->avl.bal = 0;
740          break;
741
742        case  0:
743          p->avl.bal = -1;
744          modified = false;
745          break;
746
747        case -1:
748          p1 = p->avl.left;
749
750          if (p1->avl.bal <= 0) /* simple LL-turn */
751          {
752            p->avl.left = p1->avl.right;
753            p1->avl.right = p;
754            if (p1->avl.bal == 0)
755            {
756              p1->avl.bal = 1;
757              modified = false;
758            }
759            else
760            {
761              p->avl.bal = 0;
762              p1->avl.bal = 0;
763            }
764            p = p1;
765          }
766          else /* double LR-turn */
767          {
768            p2 = p1->avl.right;
769
770            p1->avl.right = p2->avl.left;
771            p2->avl.left = p1;
772            p->avl.left = p2->avl.right;
773            p2->avl.right = p;
774
775            if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
776            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
777
778            p = p2;
779            p2->avl.bal = 0;
780          }
781          break;
782
783        default:
784          break;
785      }
786    }
787
788    if (buf_prev > buf_stack)
789    {
790      q = *(buf_prev - 1);
791
792      if (q->avl.cache == -1)
793      {
794        q->avl.left = p;
795      }
796      else
797      {
798        q->avl.right = p;
799      }
800    }
801    else
802    {
803      *root = p;
804      break;
805    }
806
807  }
808
809  return 0;
810}
811
812static void
813rtems_bdbuf_set_state (rtems_bdbuf_buffer *bd, rtems_bdbuf_buf_state state)
814{
815  bd->state = state;
816}
817
818static rtems_blkdev_bnum
819rtems_bdbuf_media_block (const rtems_disk_device *dd, rtems_blkdev_bnum block)
820{
821  if (dd->block_to_media_block_shift >= 0)
822    return block << dd->block_to_media_block_shift;
823  else
824    /*
825     * Change the block number for the block size to the block number for the media
826     * block size. We have to use 64bit maths. There is no short cut here.
827     */
828    return (rtems_blkdev_bnum)
829      ((((uint64_t) block) * dd->block_size) / dd->media_block_size);
830}
831
832/**
833 * Lock the mutex. A single task can nest calls.
834 *
835 * @param lock The mutex to lock.
836 * @param fatal_error_code The error code if the call fails.
837 */
838static void
839rtems_bdbuf_lock (rtems_id lock, uint32_t fatal_error_code)
840{
841  rtems_status_code sc = rtems_semaphore_obtain (lock,
842                                                 RTEMS_WAIT,
843                                                 RTEMS_NO_TIMEOUT);
844  if (sc != RTEMS_SUCCESSFUL)
845    rtems_fatal_error_occurred (fatal_error_code);
846}
847
848/**
849 * Unlock the mutex.
850 *
851 * @param lock The mutex to unlock.
852 * @param fatal_error_code The error code if the call fails.
853 */
854static void
855rtems_bdbuf_unlock (rtems_id lock, uint32_t fatal_error_code)
856{
857  rtems_status_code sc = rtems_semaphore_release (lock);
858  if (sc != RTEMS_SUCCESSFUL)
859    rtems_fatal_error_occurred (fatal_error_code);
860}
861
862/**
863 * Lock the cache. A single task can nest calls.
864 */
865static void
866rtems_bdbuf_lock_cache (void)
867{
868  rtems_bdbuf_lock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK);
869}
870
871/**
872 * Unlock the cache.
873 */
874static void
875rtems_bdbuf_unlock_cache (void)
876{
877  rtems_bdbuf_unlock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK);
878}
879
880/**
881 * Lock the cache's sync. A single task can nest calls.
882 */
883static void
884rtems_bdbuf_lock_sync (void)
885{
886  rtems_bdbuf_lock (bdbuf_cache.sync_lock, RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK);
887}
888
889/**
890 * Unlock the cache's sync lock. Any blocked writers are woken.
891 */
892static void
893rtems_bdbuf_unlock_sync (void)
894{
895  rtems_bdbuf_unlock (bdbuf_cache.sync_lock,
896                      RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK);
897}
898
899static void
900rtems_bdbuf_group_obtain (rtems_bdbuf_buffer *bd)
901{
902  ++bd->group->users;
903}
904
905static void
906rtems_bdbuf_group_release (rtems_bdbuf_buffer *bd)
907{
908  --bd->group->users;
909}
910
911static rtems_mode
912rtems_bdbuf_disable_preemption (void)
913{
914  rtems_status_code sc = RTEMS_SUCCESSFUL;
915  rtems_mode prev_mode = 0;
916
917  sc = rtems_task_mode (RTEMS_NO_PREEMPT, RTEMS_PREEMPT_MASK, &prev_mode);
918  if (sc != RTEMS_SUCCESSFUL)
919    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS);
920
921  return prev_mode;
922}
923
924static void
925rtems_bdbuf_restore_preemption (rtems_mode prev_mode)
926{
927  rtems_status_code sc = RTEMS_SUCCESSFUL;
928
929  sc = rtems_task_mode (prev_mode, RTEMS_ALL_MODE_MASKS, &prev_mode);
930  if (sc != RTEMS_SUCCESSFUL)
931    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST);
932}
933
934/**
935 * Wait until woken. Semaphores are used so a number of tasks can wait and can
936 * be woken at once. Task events would require we maintain a list of tasks to
937 * be woken and this would require storage and we do not know the number of
938 * tasks that could be waiting.
939 *
940 * While we have the cache locked we can try and claim the semaphore and
941 * therefore know when we release the lock to the cache we will block until the
942 * semaphore is released. This may even happen before we get to block.
943 *
944 * A counter is used to save the release call when no one is waiting.
945 *
946 * The function assumes the cache is locked on entry and it will be locked on
947 * exit.
948 */
949static void
950rtems_bdbuf_anonymous_wait (rtems_bdbuf_waiters *waiters)
951{
952  rtems_status_code sc;
953  rtems_mode        prev_mode;
954
955  /*
956   * Indicate we are waiting.
957   */
958  ++waiters->count;
959
960  /*
961   * Disable preemption then unlock the cache and block.  There is no POSIX
962   * condition variable in the core API so this is a work around.
963   *
964   * The issue is a task could preempt after the cache is unlocked because it is
965   * blocking or just hits that window, and before this task has blocked on the
966   * semaphore. If the preempting task flushes the queue this task will not see
967   * the flush and may block for ever or until another transaction flushes this
968   * semaphore.
969   */
970  prev_mode = rtems_bdbuf_disable_preemption ();
971
972  /*
973   * Unlock the cache, wait, and lock the cache when we return.
974   */
975  rtems_bdbuf_unlock_cache ();
976
977  sc = rtems_semaphore_obtain (waiters->sema, RTEMS_WAIT, RTEMS_BDBUF_WAIT_TIMEOUT);
978
979  if (sc == RTEMS_TIMEOUT)
980    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO);
981
982  if (sc != RTEMS_UNSATISFIED)
983    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2);
984
985  rtems_bdbuf_lock_cache ();
986
987  rtems_bdbuf_restore_preemption (prev_mode);
988
989  --waiters->count;
990}
991
992static void
993rtems_bdbuf_wait (rtems_bdbuf_buffer *bd, rtems_bdbuf_waiters *waiters)
994{
995  rtems_bdbuf_group_obtain (bd);
996  ++bd->waiters;
997  rtems_bdbuf_anonymous_wait (waiters);
998  --bd->waiters;
999  rtems_bdbuf_group_release (bd);
1000}
1001
1002/**
1003 * Wake a blocked resource. The resource has a counter that lets us know if
1004 * there are any waiters.
1005 */
1006static void
1007rtems_bdbuf_wake (const rtems_bdbuf_waiters *waiters)
1008{
1009  rtems_status_code sc = RTEMS_SUCCESSFUL;
1010
1011  if (waiters->count > 0)
1012  {
1013    sc = rtems_semaphore_flush (waiters->sema);
1014    if (sc != RTEMS_SUCCESSFUL)
1015      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE);
1016  }
1017}
1018
1019static void
1020rtems_bdbuf_wake_swapper (void)
1021{
1022  rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout,
1023                                           RTEMS_BDBUF_SWAPOUT_SYNC);
1024  if (sc != RTEMS_SUCCESSFUL)
1025    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
1026}
1027
1028static bool
1029rtems_bdbuf_has_buffer_waiters (void)
1030{
1031  return bdbuf_cache.buffer_waiters.count;
1032}
1033
1034static void
1035rtems_bdbuf_remove_from_tree (rtems_bdbuf_buffer *bd)
1036{
1037  if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1038    rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_TREE_RM);
1039}
1040
1041static void
1042rtems_bdbuf_remove_from_tree_and_lru_list (rtems_bdbuf_buffer *bd)
1043{
1044  switch (bd->state)
1045  {
1046    case RTEMS_BDBUF_STATE_FREE:
1047      break;
1048    case RTEMS_BDBUF_STATE_CACHED:
1049      rtems_bdbuf_remove_from_tree (bd);
1050      break;
1051    default:
1052      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_10);
1053  }
1054
1055  rtems_chain_extract_unprotected (&bd->link);
1056}
1057
1058static void
1059rtems_bdbuf_make_free_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1060{
1061  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_FREE);
1062  rtems_chain_prepend_unprotected (&bdbuf_cache.lru, &bd->link);
1063}
1064
1065static void
1066rtems_bdbuf_make_empty (rtems_bdbuf_buffer *bd)
1067{
1068  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY);
1069}
1070
1071static void
1072rtems_bdbuf_make_cached_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1073{
1074  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
1075  rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
1076}
1077
1078static void
1079rtems_bdbuf_discard_buffer (rtems_bdbuf_buffer *bd)
1080{
1081  rtems_bdbuf_make_empty (bd);
1082
1083  if (bd->waiters == 0)
1084  {
1085    rtems_bdbuf_remove_from_tree (bd);
1086    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1087  }
1088}
1089
1090static void
1091rtems_bdbuf_add_to_modified_list_after_access (rtems_bdbuf_buffer *bd)
1092{
1093  if (bdbuf_cache.sync_active && bdbuf_cache.sync_device == bd->dd)
1094  {
1095    rtems_bdbuf_unlock_cache ();
1096
1097    /*
1098     * Wait for the sync lock.
1099     */
1100    rtems_bdbuf_lock_sync ();
1101
1102    rtems_bdbuf_unlock_sync ();
1103    rtems_bdbuf_lock_cache ();
1104  }
1105
1106  /*
1107   * Only the first modified release sets the timer and any further user
1108   * accesses do not change the timer value which should move down. This
1109   * assumes the user's hold of the buffer is much less than the time on the
1110   * modified list. Resetting the timer on each access which could result in a
1111   * buffer never getting to 0 and never being forced onto disk. This raises a
1112   * difficult question. Is a snapshot of a block that is changing better than
1113   * nothing being written? We have tended to think we should hold changes for
1114   * only a specific period of time even if still changing and get onto disk
1115   * and letting the file system try and recover this position if it can.
1116   */
1117  if (bd->state == RTEMS_BDBUF_STATE_ACCESS_CACHED
1118        || bd->state == RTEMS_BDBUF_STATE_ACCESS_EMPTY)
1119    bd->hold_timer = bdbuf_config.swap_block_hold;
1120
1121  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_MODIFIED);
1122  rtems_chain_append_unprotected (&bdbuf_cache.modified, &bd->link);
1123
1124  if (bd->waiters)
1125    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1126  else if (rtems_bdbuf_has_buffer_waiters ())
1127    rtems_bdbuf_wake_swapper ();
1128}
1129
1130static void
1131rtems_bdbuf_add_to_lru_list_after_access (rtems_bdbuf_buffer *bd)
1132{
1133  rtems_bdbuf_group_release (bd);
1134  rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1135
1136  if (bd->waiters)
1137    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1138  else
1139    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1140}
1141
1142/**
1143 * Compute the number of BDs per group for a given buffer size.
1144 *
1145 * @param size The buffer size. It can be any size and we scale up.
1146 */
1147static size_t
1148rtems_bdbuf_bds_per_group (size_t size)
1149{
1150  size_t bufs_per_size;
1151  size_t bds_per_size;
1152
1153  if (size > bdbuf_config.buffer_max)
1154    return 0;
1155
1156  bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1;
1157
1158  for (bds_per_size = 1;
1159       bds_per_size < bufs_per_size;
1160       bds_per_size <<= 1)
1161    ;
1162
1163  return bdbuf_cache.max_bds_per_group / bds_per_size;
1164}
1165
1166static void
1167rtems_bdbuf_discard_buffer_after_access (rtems_bdbuf_buffer *bd)
1168{
1169  rtems_bdbuf_group_release (bd);
1170  rtems_bdbuf_discard_buffer (bd);
1171
1172  if (bd->waiters)
1173    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1174  else
1175    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1176}
1177
1178/**
1179 * Reallocate a group. The BDs currently allocated in the group are removed
1180 * from the ALV tree and any lists then the new BD's are prepended to the ready
1181 * list of the cache.
1182 *
1183 * @param group The group to reallocate.
1184 * @param new_bds_per_group The new count of BDs per group.
1185 * @return A buffer of this group.
1186 */
1187static rtems_bdbuf_buffer *
1188rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
1189{
1190  rtems_bdbuf_buffer* bd;
1191  size_t              b;
1192  size_t              bufs_per_bd;
1193
1194  if (rtems_bdbuf_tracer)
1195    printf ("bdbuf:realloc: %tu: %zd -> %zd\n",
1196            group - bdbuf_cache.groups, group->bds_per_group,
1197            new_bds_per_group);
1198
1199  bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group;
1200
1201  for (b = 0, bd = group->bdbuf;
1202       b < group->bds_per_group;
1203       b++, bd += bufs_per_bd)
1204    rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1205
1206  group->bds_per_group = new_bds_per_group;
1207  bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
1208
1209  for (b = 1, bd = group->bdbuf + bufs_per_bd;
1210       b < group->bds_per_group;
1211       b++, bd += bufs_per_bd)
1212    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1213
1214  if (b > 1)
1215    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1216
1217  return group->bdbuf;
1218}
1219
1220static void
1221rtems_bdbuf_setup_empty_buffer (rtems_bdbuf_buffer *bd,
1222                                const rtems_disk_device *dd,
1223                                rtems_blkdev_bnum   block)
1224{
1225  bd->dd        = dd ;
1226  bd->block     = block;
1227  bd->avl.left  = NULL;
1228  bd->avl.right = NULL;
1229  bd->waiters   = 0;
1230
1231  if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
1232    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE);
1233
1234  rtems_bdbuf_make_empty (bd);
1235}
1236
1237static rtems_bdbuf_buffer *
1238rtems_bdbuf_get_buffer_from_lru_list (const rtems_disk_device *dd,
1239                                      rtems_blkdev_bnum block,
1240                                      size_t            bds_per_group)
1241{
1242  rtems_chain_node *node = rtems_chain_first (&bdbuf_cache.lru);
1243
1244  while (!rtems_chain_is_tail (&bdbuf_cache.lru, node))
1245  {
1246    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
1247    rtems_bdbuf_buffer *empty_bd = NULL;
1248
1249    if (rtems_bdbuf_tracer)
1250      printf ("bdbuf:next-bd: %tu (%td:%" PRId32 ") %zd -> %zd\n",
1251              bd - bdbuf_cache.bds,
1252              bd->group - bdbuf_cache.groups, bd->group->users,
1253              bd->group->bds_per_group, bds_per_group);
1254
1255    /*
1256     * If nobody waits for this BD, we may recycle it.
1257     */
1258    if (bd->waiters == 0)
1259    {
1260      if (bd->group->bds_per_group == bds_per_group)
1261      {
1262        rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1263
1264        empty_bd = bd;
1265      }
1266      else if (bd->group->users == 0)
1267        empty_bd = rtems_bdbuf_group_realloc (bd->group, bds_per_group);
1268    }
1269
1270    if (empty_bd != NULL)
1271    {
1272      rtems_bdbuf_setup_empty_buffer (empty_bd, dd, block);
1273
1274      return empty_bd;
1275    }
1276
1277    node = rtems_chain_next (node);
1278  }
1279
1280  return NULL;
1281}
1282
1283/**
1284 * Initialise the cache.
1285 *
1286 * @return rtems_status_code The initialisation status.
1287 */
1288rtems_status_code
1289rtems_bdbuf_init (void)
1290{
1291  rtems_bdbuf_group*  group;
1292  rtems_bdbuf_buffer* bd;
1293  uint8_t*            buffer;
1294  size_t              b;
1295  size_t              cache_aligment;
1296  rtems_status_code   sc;
1297  rtems_mode          prev_mode;
1298
1299  if (rtems_bdbuf_tracer)
1300    printf ("bdbuf:init\n");
1301
1302  if (rtems_interrupt_is_in_progress())
1303    return RTEMS_CALLED_FROM_ISR;
1304
1305  /*
1306   * Check the configuration table values.
1307   */
1308  if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0)
1309    return RTEMS_INVALID_NUMBER;
1310
1311  /*
1312   * We use a special variable to manage the initialisation incase we have
1313   * completing threads doing this. You may get errors if the another thread
1314   * makes a call and we have not finished initialisation.
1315   */
1316  prev_mode = rtems_bdbuf_disable_preemption ();
1317  if (bdbuf_cache.initialised)
1318  {
1319    rtems_bdbuf_restore_preemption (prev_mode);
1320    return RTEMS_RESOURCE_IN_USE;
1321  }
1322
1323  memset(&bdbuf_cache, 0, sizeof(bdbuf_cache));
1324  bdbuf_cache.initialised = true;
1325  rtems_bdbuf_restore_preemption (prev_mode);
1326
1327  /*
1328   * For unspecified cache alignments we use the CPU alignment.
1329   */
1330  cache_aligment = 32; /* FIXME rtems_cache_get_data_line_size() */
1331  if (cache_aligment <= 0)
1332    cache_aligment = CPU_ALIGNMENT;
1333
1334  bdbuf_cache.sync_device = BDBUF_INVALID_DEV;
1335
1336  rtems_chain_initialize_empty (&bdbuf_cache.swapout_workers);
1337  rtems_chain_initialize_empty (&bdbuf_cache.lru);
1338  rtems_chain_initialize_empty (&bdbuf_cache.modified);
1339  rtems_chain_initialize_empty (&bdbuf_cache.sync);
1340
1341  /*
1342   * Create the locks for the cache.
1343   */
1344  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'l'),
1345                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1346                               &bdbuf_cache.lock);
1347  if (sc != RTEMS_SUCCESSFUL)
1348    goto error;
1349
1350  rtems_bdbuf_lock_cache ();
1351
1352  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 's'),
1353                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1354                               &bdbuf_cache.sync_lock);
1355  if (sc != RTEMS_SUCCESSFUL)
1356    goto error;
1357
1358  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'a'),
1359                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1360                               &bdbuf_cache.access_waiters.sema);
1361  if (sc != RTEMS_SUCCESSFUL)
1362    goto error;
1363
1364  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 't'),
1365                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1366                               &bdbuf_cache.transfer_waiters.sema);
1367  if (sc != RTEMS_SUCCESSFUL)
1368    goto error;
1369
1370  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'b'),
1371                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1372                               &bdbuf_cache.buffer_waiters.sema);
1373  if (sc != RTEMS_SUCCESSFUL)
1374    goto error;
1375
1376  /*
1377   * Compute the various number of elements in the cache.
1378   */
1379  bdbuf_cache.buffer_min_count =
1380    bdbuf_config.size / bdbuf_config.buffer_min;
1381  bdbuf_cache.max_bds_per_group =
1382    bdbuf_config.buffer_max / bdbuf_config.buffer_min;
1383  bdbuf_cache.group_count =
1384    bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group;
1385
1386  /*
1387   * Allocate the memory for the buffer descriptors.
1388   */
1389  bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer),
1390                            bdbuf_cache.buffer_min_count);
1391  if (!bdbuf_cache.bds)
1392    goto error;
1393
1394  /*
1395   * Allocate the memory for the buffer descriptors.
1396   */
1397  bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group),
1398                               bdbuf_cache.group_count);
1399  if (!bdbuf_cache.groups)
1400    goto error;
1401
1402  /*
1403   * Allocate memory for buffer memory. The buffer memory will be cache
1404   * aligned. It is possible to free the memory allocated by rtems_memalign()
1405   * with free(). Return 0 if allocated.
1406   *
1407   * The memory allocate allows a
1408   */
1409  if (rtems_memalign ((void **) &bdbuf_cache.buffers,
1410                      cache_aligment,
1411                      bdbuf_cache.buffer_min_count * bdbuf_config.buffer_min) != 0)
1412    goto error;
1413
1414  /*
1415   * The cache is empty after opening so we need to add all the buffers to it
1416   * and initialise the groups.
1417   */
1418  for (b = 0, group = bdbuf_cache.groups,
1419         bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers;
1420       b < bdbuf_cache.buffer_min_count;
1421       b++, bd++, buffer += bdbuf_config.buffer_min)
1422  {
1423    bd->dd    = BDBUF_INVALID_DEV;
1424    bd->group  = group;
1425    bd->buffer = buffer;
1426
1427    rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
1428
1429    if ((b % bdbuf_cache.max_bds_per_group) ==
1430        (bdbuf_cache.max_bds_per_group - 1))
1431      group++;
1432  }
1433
1434  for (b = 0,
1435         group = bdbuf_cache.groups,
1436         bd = bdbuf_cache.bds;
1437       b < bdbuf_cache.group_count;
1438       b++,
1439         group++,
1440         bd += bdbuf_cache.max_bds_per_group)
1441  {
1442    group->bds_per_group = bdbuf_cache.max_bds_per_group;
1443    group->bdbuf = bd;
1444  }
1445
1446  /*
1447   * Create and start swapout task. This task will create and manage the worker
1448   * threads.
1449   */
1450  bdbuf_cache.swapout_enabled = true;
1451
1452  sc = rtems_task_create (rtems_build_name('B', 'S', 'W', 'P'),
1453                          bdbuf_config.swapout_priority ?
1454                            bdbuf_config.swapout_priority :
1455                            RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT,
1456                          SWAPOUT_TASK_STACK_SIZE,
1457                          RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
1458                          RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
1459                          &bdbuf_cache.swapout);
1460  if (sc != RTEMS_SUCCESSFUL)
1461    goto error;
1462
1463  sc = rtems_task_start (bdbuf_cache.swapout,
1464                         rtems_bdbuf_swapout_task,
1465                         (rtems_task_argument) &bdbuf_cache);
1466  if (sc != RTEMS_SUCCESSFUL)
1467    goto error;
1468
1469  rtems_bdbuf_unlock_cache ();
1470
1471  return RTEMS_SUCCESSFUL;
1472
1473error:
1474
1475  if (bdbuf_cache.swapout != 0)
1476    rtems_task_delete (bdbuf_cache.swapout);
1477
1478  free (bdbuf_cache.buffers);
1479  free (bdbuf_cache.groups);
1480  free (bdbuf_cache.bds);
1481
1482  rtems_semaphore_delete (bdbuf_cache.buffer_waiters.sema);
1483  rtems_semaphore_delete (bdbuf_cache.access_waiters.sema);
1484  rtems_semaphore_delete (bdbuf_cache.transfer_waiters.sema);
1485  rtems_semaphore_delete (bdbuf_cache.sync_lock);
1486
1487  if (bdbuf_cache.lock != 0)
1488  {
1489    rtems_bdbuf_unlock_cache ();
1490    rtems_semaphore_delete (bdbuf_cache.lock);
1491  }
1492
1493  bdbuf_cache.initialised = false;
1494
1495  return RTEMS_UNSATISFIED;
1496}
1497
1498static void
1499rtems_bdbuf_wait_for_event (rtems_event_set event)
1500{
1501  rtems_status_code sc = RTEMS_SUCCESSFUL;
1502  rtems_event_set   out = 0;
1503
1504  sc = rtems_event_receive (event,
1505                            RTEMS_EVENT_ALL | RTEMS_WAIT,
1506                            RTEMS_NO_TIMEOUT,
1507                            &out);
1508
1509  if (sc != RTEMS_SUCCESSFUL || out != event)
1510    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT);
1511}
1512
1513static void
1514rtems_bdbuf_wait_for_access (rtems_bdbuf_buffer *bd)
1515{
1516  while (true)
1517  {
1518    switch (bd->state)
1519    {
1520      case RTEMS_BDBUF_STATE_MODIFIED:
1521        rtems_bdbuf_group_release (bd);
1522        /* Fall through */
1523      case RTEMS_BDBUF_STATE_CACHED:
1524        rtems_chain_extract_unprotected (&bd->link);
1525        /* Fall through */
1526      case RTEMS_BDBUF_STATE_EMPTY:
1527        return;
1528      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1529      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1530      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1531      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1532        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1533        break;
1534      case RTEMS_BDBUF_STATE_SYNC:
1535      case RTEMS_BDBUF_STATE_TRANSFER:
1536      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1537        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1538        break;
1539      default:
1540        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_7);
1541    }
1542  }
1543}
1544
1545static void
1546rtems_bdbuf_request_sync_for_modified_buffer (rtems_bdbuf_buffer *bd)
1547{
1548  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1549  rtems_chain_extract_unprotected (&bd->link);
1550  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1551  rtems_bdbuf_wake_swapper ();
1552}
1553
1554/**
1555 * @brief Waits until the buffer is ready for recycling.
1556 *
1557 * @retval @c true Buffer is valid and may be recycled.
1558 * @retval @c false Buffer is invalid and has to searched again.
1559 */
1560static bool
1561rtems_bdbuf_wait_for_recycle (rtems_bdbuf_buffer *bd)
1562{
1563  while (true)
1564  {
1565    switch (bd->state)
1566    {
1567      case RTEMS_BDBUF_STATE_FREE:
1568        return true;
1569      case RTEMS_BDBUF_STATE_MODIFIED:
1570        rtems_bdbuf_request_sync_for_modified_buffer (bd);
1571        break;
1572      case RTEMS_BDBUF_STATE_CACHED:
1573      case RTEMS_BDBUF_STATE_EMPTY:
1574        if (bd->waiters == 0)
1575          return true;
1576        else
1577        {
1578          /*
1579           * It is essential that we wait here without a special wait count and
1580           * without the group in use.  Otherwise we could trigger a wait ping
1581           * pong with another recycle waiter.  The state of the buffer is
1582           * arbitrary afterwards.
1583           */
1584          rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1585          return false;
1586        }
1587      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1588      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1589      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1590      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1591        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1592        break;
1593      case RTEMS_BDBUF_STATE_SYNC:
1594      case RTEMS_BDBUF_STATE_TRANSFER:
1595      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1596        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1597        break;
1598      default:
1599        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_8);
1600    }
1601  }
1602}
1603
1604static void
1605rtems_bdbuf_wait_for_sync_done (rtems_bdbuf_buffer *bd)
1606{
1607  while (true)
1608  {
1609    switch (bd->state)
1610    {
1611      case RTEMS_BDBUF_STATE_CACHED:
1612      case RTEMS_BDBUF_STATE_EMPTY:
1613      case RTEMS_BDBUF_STATE_MODIFIED:
1614      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1615      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1616      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1617      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1618        return;
1619      case RTEMS_BDBUF_STATE_SYNC:
1620      case RTEMS_BDBUF_STATE_TRANSFER:
1621      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1622        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1623        break;
1624      default:
1625        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_9);
1626    }
1627  }
1628}
1629
1630static void
1631rtems_bdbuf_wait_for_buffer (void)
1632{
1633  if (!rtems_chain_is_empty (&bdbuf_cache.modified))
1634    rtems_bdbuf_wake_swapper ();
1635
1636  rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1637}
1638
1639static void
1640rtems_bdbuf_sync_after_access (rtems_bdbuf_buffer *bd)
1641{
1642  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1643
1644  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1645
1646  if (bd->waiters)
1647    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1648
1649  rtems_bdbuf_wake_swapper ();
1650  rtems_bdbuf_wait_for_sync_done (bd);
1651
1652  /*
1653   * We may have created a cached or empty buffer which may be recycled.
1654   */
1655  if (bd->waiters == 0
1656        && (bd->state == RTEMS_BDBUF_STATE_CACHED
1657          || bd->state == RTEMS_BDBUF_STATE_EMPTY))
1658  {
1659    if (bd->state == RTEMS_BDBUF_STATE_EMPTY)
1660    {
1661      rtems_bdbuf_remove_from_tree (bd);
1662      rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1663    }
1664    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1665  }
1666}
1667
1668static rtems_bdbuf_buffer *
1669rtems_bdbuf_get_buffer_for_read_ahead (const rtems_disk_device *dd,
1670                                       rtems_blkdev_bnum block,
1671                                       size_t            bds_per_group)
1672{
1673  rtems_bdbuf_buffer *bd = NULL;
1674
1675  bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1676
1677  if (bd == NULL)
1678  {
1679    bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block, bds_per_group);
1680
1681    if (bd != NULL)
1682      rtems_bdbuf_group_obtain (bd);
1683  }
1684  else
1685    /*
1686     * The buffer is in the cache.  So it is already available or in use, and
1687     * thus no need for a read ahead.
1688     */
1689    bd = NULL;
1690
1691  return bd;
1692}
1693
1694static rtems_bdbuf_buffer *
1695rtems_bdbuf_get_buffer_for_access (const rtems_disk_device *dd,
1696                                   rtems_blkdev_bnum block,
1697                                   size_t            bds_per_group)
1698{
1699  rtems_bdbuf_buffer *bd = NULL;
1700
1701  do
1702  {
1703    bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1704
1705    if (bd != NULL)
1706    {
1707      if (bd->group->bds_per_group != bds_per_group)
1708      {
1709        if (rtems_bdbuf_wait_for_recycle (bd))
1710        {
1711          rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1712          rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1713          rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1714        }
1715        bd = NULL;
1716      }
1717    }
1718    else
1719    {
1720      bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block, bds_per_group);
1721
1722      if (bd == NULL)
1723        rtems_bdbuf_wait_for_buffer ();
1724    }
1725  }
1726  while (bd == NULL);
1727
1728  rtems_bdbuf_wait_for_access (bd);
1729  rtems_bdbuf_group_obtain (bd);
1730
1731  return bd;
1732}
1733
1734static rtems_status_code
1735rtems_bdbuf_get_media_block (const rtems_disk_device *dd,
1736                             rtems_blkdev_bnum        block,
1737                             rtems_blkdev_bnum       *media_block_ptr)
1738{
1739  /*
1740   * Compute the media block number. Drivers work with media block number not
1741   * the block number a BD may have as this depends on the block size set by
1742   * the user.
1743   */
1744  rtems_blkdev_bnum mb = rtems_bdbuf_media_block (dd, block);
1745  if (mb >= dd->size)
1746  {
1747    return RTEMS_INVALID_NUMBER;
1748  }
1749
1750  *media_block_ptr = mb + dd->start;
1751
1752  return RTEMS_SUCCESSFUL;
1753}
1754
1755rtems_status_code
1756rtems_bdbuf_get (const rtems_disk_device *dd,
1757                 rtems_blkdev_bnum    block,
1758                 rtems_bdbuf_buffer **bd_ptr)
1759{
1760  rtems_status_code   sc = RTEMS_SUCCESSFUL;
1761  rtems_bdbuf_buffer *bd = NULL;
1762  rtems_blkdev_bnum   media_block = 0;
1763
1764  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
1765  if (sc != RTEMS_SUCCESSFUL)
1766    return sc;
1767
1768  rtems_bdbuf_lock_cache ();
1769
1770  /*
1771   * Print the block index relative to the physical disk.
1772   */
1773  if (rtems_bdbuf_tracer)
1774    printf ("bdbuf:get: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
1775            media_block, block, (unsigned) dd->dev);
1776
1777  bd = rtems_bdbuf_get_buffer_for_access (dd, media_block, dd->bds_per_group);
1778
1779  switch (bd->state)
1780  {
1781    case RTEMS_BDBUF_STATE_CACHED:
1782      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
1783      break;
1784    case RTEMS_BDBUF_STATE_EMPTY:
1785      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_EMPTY);
1786      break;
1787    case RTEMS_BDBUF_STATE_MODIFIED:
1788      /*
1789       * To get a modified buffer could be considered a bug in the caller
1790       * because you should not be getting an already modified buffer but user
1791       * may have modified a byte in a block then decided to seek the start and
1792       * write the whole block and the file system will have no record of this
1793       * so just gets the block to fill.
1794       */
1795      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
1796      break;
1797    default:
1798      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_2);
1799      break;
1800  }
1801
1802  if (rtems_bdbuf_tracer)
1803  {
1804    rtems_bdbuf_show_users ("get", bd);
1805    rtems_bdbuf_show_usage ();
1806  }
1807
1808  rtems_bdbuf_unlock_cache ();
1809
1810  *bd_ptr = bd;
1811
1812  return RTEMS_SUCCESSFUL;
1813}
1814
1815/**
1816 * Call back handler called by the low level driver when the transfer has
1817 * completed. This function may be invoked from interrupt handler.
1818 *
1819 * @param arg Arbitrary argument specified in block device request
1820 *            structure (in this case - pointer to the appropriate
1821 *            block device request structure).
1822 * @param status I/O completion status
1823 */
1824static void
1825rtems_bdbuf_transfer_done (void* arg, rtems_status_code status)
1826{
1827  rtems_blkdev_request* req = (rtems_blkdev_request*) arg;
1828
1829  req->status = status;
1830
1831  rtems_event_send (req->io_task, RTEMS_BDBUF_TRANSFER_SYNC);
1832}
1833
1834static void
1835rtems_bdbuf_create_read_request (const rtems_disk_device *dd,
1836                                 rtems_blkdev_bnum        media_block,
1837                                 size_t                   bds_per_group,
1838                                 rtems_blkdev_request    *req,
1839                                 rtems_bdbuf_buffer     **bd_ptr)
1840{
1841  rtems_bdbuf_buffer *bd = NULL;
1842  rtems_blkdev_bnum   media_block_end = dd->start + dd->size;
1843  rtems_blkdev_bnum   media_block_count = dd->block_to_media_block_shift >= 0 ?
1844    dd->block_size >> dd->block_to_media_block_shift
1845      : dd->block_size / dd->media_block_size;
1846  uint32_t            block_size = dd->block_size;
1847  uint32_t            transfer_index = 1;
1848  uint32_t            transfer_count = bdbuf_config.max_read_ahead_blocks + 1;
1849
1850  if (media_block_end - media_block < transfer_count)
1851    transfer_count = media_block_end - media_block;
1852
1853  req->req = RTEMS_BLKDEV_REQ_READ;
1854  req->req_done = rtems_bdbuf_transfer_done;
1855  req->done_arg = req;
1856  req->io_task = rtems_task_self ();
1857  req->status = RTEMS_RESOURCE_IN_USE;
1858  req->bufnum = 0;
1859
1860  bd = rtems_bdbuf_get_buffer_for_access (dd, media_block, bds_per_group);
1861
1862  *bd_ptr = bd;
1863
1864  req->bufs [0].user   = bd;
1865  req->bufs [0].block  = media_block;
1866  req->bufs [0].length = block_size;
1867  req->bufs [0].buffer = bd->buffer;
1868
1869  if (rtems_bdbuf_tracer)
1870    rtems_bdbuf_show_users ("read", bd);
1871
1872  switch (bd->state)
1873  {
1874    case RTEMS_BDBUF_STATE_CACHED:
1875    case RTEMS_BDBUF_STATE_MODIFIED:
1876      return;
1877    case RTEMS_BDBUF_STATE_EMPTY:
1878      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1879      break;
1880    default:
1881      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_1);
1882      break;
1883  }
1884
1885  while (transfer_index < transfer_count)
1886  {
1887    media_block += media_block_count;
1888
1889    bd = rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block,
1890                                                bds_per_group);
1891
1892    if (bd == NULL)
1893      break;
1894
1895    rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1896
1897    req->bufs [transfer_index].user   = bd;
1898    req->bufs [transfer_index].block  = media_block;
1899    req->bufs [transfer_index].length = block_size;
1900    req->bufs [transfer_index].buffer = bd->buffer;
1901
1902    if (rtems_bdbuf_tracer)
1903      rtems_bdbuf_show_users ("read-ahead", bd);
1904
1905    ++transfer_index;
1906  }
1907
1908  req->bufnum = transfer_index;
1909}
1910
1911static rtems_status_code
1912rtems_bdbuf_execute_transfer_request (const rtems_disk_device *dd,
1913                                      rtems_blkdev_request    *req,
1914                                      bool                     cache_locked)
1915{
1916  rtems_status_code sc = RTEMS_SUCCESSFUL;
1917  int result = 0;
1918  uint32_t transfer_index = 0;
1919  bool wake_transfer_waiters = false;
1920  bool wake_buffer_waiters = false;
1921
1922  if (cache_locked)
1923    rtems_bdbuf_unlock_cache ();
1924
1925  result = dd->ioctl (dd->phys_dev, RTEMS_BLKIO_REQUEST, req);
1926
1927  if (result == 0)
1928  {
1929    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
1930    sc = req->status;
1931  }
1932  else
1933    sc = RTEMS_IO_ERROR;
1934
1935  rtems_bdbuf_lock_cache ();
1936
1937  for (transfer_index = 0; transfer_index < req->bufnum; ++transfer_index)
1938  {
1939    rtems_bdbuf_buffer *bd = req->bufs [transfer_index].user;
1940    bool waiters = bd->waiters;
1941
1942    if (waiters)
1943      wake_transfer_waiters = true;
1944    else
1945      wake_buffer_waiters = true;
1946
1947    rtems_bdbuf_group_release (bd);
1948
1949    if (sc == RTEMS_SUCCESSFUL && bd->state == RTEMS_BDBUF_STATE_TRANSFER)
1950      rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1951    else
1952      rtems_bdbuf_discard_buffer (bd);
1953
1954    if (rtems_bdbuf_tracer)
1955      rtems_bdbuf_show_users ("transfer", bd);
1956  }
1957
1958  if (wake_transfer_waiters)
1959    rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
1960
1961  if (wake_buffer_waiters)
1962    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1963
1964  if (!cache_locked)
1965    rtems_bdbuf_unlock_cache ();
1966
1967  if (sc == RTEMS_SUCCESSFUL || sc == RTEMS_UNSATISFIED)
1968    return sc;
1969  else
1970    return RTEMS_IO_ERROR;
1971}
1972
1973rtems_status_code
1974rtems_bdbuf_read (const rtems_disk_device *dd,
1975                  rtems_blkdev_bnum    block,
1976                  rtems_bdbuf_buffer **bd_ptr)
1977{
1978  rtems_status_code     sc = RTEMS_SUCCESSFUL;
1979  rtems_blkdev_request *req = NULL;
1980  rtems_bdbuf_buffer   *bd = NULL;
1981  rtems_blkdev_bnum     media_block = 0;
1982
1983  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
1984  if (sc != RTEMS_SUCCESSFUL)
1985    return sc;
1986
1987  /*
1988   * TODO: This type of request structure is wrong and should be removed.
1989   */
1990#define bdbuf_alloc(size) __builtin_alloca (size)
1991
1992  req = bdbuf_alloc (sizeof (rtems_blkdev_request) +
1993                     sizeof (rtems_blkdev_sg_buffer) *
1994                      (bdbuf_config.max_read_ahead_blocks + 1));
1995
1996  if (rtems_bdbuf_tracer)
1997    printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
1998            media_block + dd->start, block, (unsigned) dd->dev);
1999
2000  rtems_bdbuf_lock_cache ();
2001  rtems_bdbuf_create_read_request (dd, media_block, dd->bds_per_group, req, &bd);
2002
2003  if (req->bufnum > 0)
2004  {
2005    sc = rtems_bdbuf_execute_transfer_request (dd, req, true);
2006    if (sc == RTEMS_SUCCESSFUL)
2007    {
2008      rtems_chain_extract_unprotected (&bd->link);
2009      rtems_bdbuf_group_obtain (bd);
2010    }
2011  }
2012
2013  if (sc == RTEMS_SUCCESSFUL)
2014  {
2015    switch (bd->state)
2016    {
2017      case RTEMS_BDBUF_STATE_CACHED:
2018        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2019        break;
2020      case RTEMS_BDBUF_STATE_MODIFIED:
2021        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
2022        break;
2023      default:
2024        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_4);
2025        break;
2026    }
2027
2028    if (rtems_bdbuf_tracer)
2029    {
2030      rtems_bdbuf_show_users ("read", bd);
2031      rtems_bdbuf_show_usage ();
2032    }
2033
2034    *bd_ptr = bd;
2035  }
2036  else
2037    *bd_ptr = NULL;
2038
2039  rtems_bdbuf_unlock_cache ();
2040
2041  return sc;
2042}
2043
2044static rtems_status_code
2045rtems_bdbuf_check_bd_and_lock_cache (rtems_bdbuf_buffer *bd, const char *kind)
2046{
2047  if (bd == NULL)
2048    return RTEMS_INVALID_ADDRESS;
2049  if (rtems_bdbuf_tracer)
2050  {
2051    printf ("bdbuf:%s: %" PRIu32 "\n", kind, bd->block);
2052    rtems_bdbuf_show_users (kind, bd);
2053  }
2054  rtems_bdbuf_lock_cache();
2055
2056  return RTEMS_SUCCESSFUL;
2057}
2058
2059rtems_status_code
2060rtems_bdbuf_release (rtems_bdbuf_buffer *bd)
2061{
2062  rtems_status_code sc = RTEMS_SUCCESSFUL;
2063
2064  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release");
2065  if (sc != RTEMS_SUCCESSFUL)
2066    return sc;
2067
2068  switch (bd->state)
2069  {
2070    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2071      rtems_bdbuf_add_to_lru_list_after_access (bd);
2072      break;
2073    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2074    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2075      rtems_bdbuf_discard_buffer_after_access (bd);
2076      break;
2077    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2078      rtems_bdbuf_add_to_modified_list_after_access (bd);
2079      break;
2080    default:
2081      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_0);
2082      break;
2083  }
2084
2085  if (rtems_bdbuf_tracer)
2086    rtems_bdbuf_show_usage ();
2087
2088  rtems_bdbuf_unlock_cache ();
2089
2090  return RTEMS_SUCCESSFUL;
2091}
2092
2093rtems_status_code
2094rtems_bdbuf_release_modified (rtems_bdbuf_buffer *bd)
2095{
2096  rtems_status_code sc = RTEMS_SUCCESSFUL;
2097
2098  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release modified");
2099  if (sc != RTEMS_SUCCESSFUL)
2100    return sc;
2101
2102  switch (bd->state)
2103  {
2104    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2105    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2106    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2107      rtems_bdbuf_add_to_modified_list_after_access (bd);
2108      break;
2109    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2110      rtems_bdbuf_discard_buffer_after_access (bd);
2111      break;
2112    default:
2113      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_6);
2114      break;
2115  }
2116
2117  if (rtems_bdbuf_tracer)
2118    rtems_bdbuf_show_usage ();
2119
2120  rtems_bdbuf_unlock_cache ();
2121
2122  return RTEMS_SUCCESSFUL;
2123}
2124
2125rtems_status_code
2126rtems_bdbuf_sync (rtems_bdbuf_buffer *bd)
2127{
2128  rtems_status_code sc = RTEMS_SUCCESSFUL;
2129
2130  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "sync");
2131  if (sc != RTEMS_SUCCESSFUL)
2132    return sc;
2133
2134  switch (bd->state)
2135  {
2136    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2137    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2138    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2139      rtems_bdbuf_sync_after_access (bd);
2140      break;
2141    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2142      rtems_bdbuf_discard_buffer_after_access (bd);
2143      break;
2144    default:
2145      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_5);
2146      break;
2147  }
2148
2149  if (rtems_bdbuf_tracer)
2150    rtems_bdbuf_show_usage ();
2151
2152  rtems_bdbuf_unlock_cache ();
2153
2154  return RTEMS_SUCCESSFUL;
2155}
2156
2157rtems_status_code
2158rtems_bdbuf_syncdev (const rtems_disk_device *dd)
2159{
2160  if (rtems_bdbuf_tracer)
2161    printf ("bdbuf:syncdev: %08x\n", (unsigned) dd->dev);
2162
2163  /*
2164   * Take the sync lock before locking the cache. Once we have the sync lock we
2165   * can lock the cache. If another thread has the sync lock it will cause this
2166   * thread to block until it owns the sync lock then it can own the cache. The
2167   * sync lock can only be obtained with the cache unlocked.
2168   */
2169  rtems_bdbuf_lock_sync ();
2170  rtems_bdbuf_lock_cache ();
2171
2172  /*
2173   * Set the cache to have a sync active for a specific device and let the swap
2174   * out task know the id of the requester to wake when done.
2175   *
2176   * The swap out task will negate the sync active flag when no more buffers
2177   * for the device are held on the "modified for sync" queues.
2178   */
2179  bdbuf_cache.sync_active    = true;
2180  bdbuf_cache.sync_requester = rtems_task_self ();
2181  bdbuf_cache.sync_device    = dd;
2182
2183  rtems_bdbuf_wake_swapper ();
2184  rtems_bdbuf_unlock_cache ();
2185  rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
2186  rtems_bdbuf_unlock_sync ();
2187
2188  return RTEMS_SUCCESSFUL;
2189}
2190
2191/**
2192 * Swapout transfer to the driver. The driver will break this I/O into groups
2193 * of consecutive write requests is multiple consecutive buffers are required
2194 * by the driver. The cache is not locked.
2195 *
2196 * @param transfer The transfer transaction.
2197 */
2198static void
2199rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer)
2200{
2201  rtems_chain_node *node;
2202
2203  if (rtems_bdbuf_tracer)
2204    printf ("bdbuf:swapout transfer: %08x\n", (unsigned) transfer->dd->dev);
2205
2206  /*
2207   * If there are buffers to transfer to the media transfer them.
2208   */
2209  if (!rtems_chain_is_empty (&transfer->bds))
2210  {
2211    /*
2212     * The last block number used when the driver only supports
2213     * continuous blocks in a single request.
2214     */
2215    uint32_t last_block = 0;
2216
2217    /*
2218     * Number of buffers per bd. This is used to detect the next
2219     * block.
2220     */
2221    uint32_t bufs_per_bd = 0;
2222
2223    const rtems_disk_device *dd = transfer->dd;
2224
2225    bufs_per_bd = dd->block_size / bdbuf_config.buffer_min;
2226
2227    /*
2228     * Take as many buffers as configured and pass to the driver. Note, the
2229     * API to the drivers has an array of buffers and if a chain was passed
2230     * we could have just passed the list. If the driver API is updated it
2231     * should be possible to make this change with little effect in this
2232     * code. The array that is passed is broken in design and should be
2233     * removed. Merging members of a struct into the first member is
2234     * trouble waiting to happen.
2235     */
2236    transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2237    transfer->write_req->bufnum = 0;
2238
2239    while ((node = rtems_chain_get_unprotected(&transfer->bds)) != NULL)
2240    {
2241      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2242      bool                write = false;
2243
2244      /*
2245       * If the device only accepts sequential buffers and this is not the
2246       * first buffer (the first is always sequential, and the buffer is not
2247       * sequential then put the buffer back on the transfer chain and write
2248       * the committed buffers.
2249       */
2250
2251      if (rtems_bdbuf_tracer)
2252        printf ("bdbuf:swapout write: bd:%" PRIu32 ", bufnum:%" PRIu32 " mode:%s\n",
2253                bd->block, transfer->write_req->bufnum,
2254                dd->phys_dev->capabilities &
2255                RTEMS_BLKDEV_CAP_MULTISECTOR_CONT ? "MULIT" : "SCAT");
2256
2257      if ((dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) &&
2258          transfer->write_req->bufnum &&
2259          (bd->block != (last_block + bufs_per_bd)))
2260      {
2261        rtems_chain_prepend_unprotected (&transfer->bds, &bd->link);
2262        write = true;
2263      }
2264      else
2265      {
2266        rtems_blkdev_sg_buffer* buf;
2267        buf = &transfer->write_req->bufs[transfer->write_req->bufnum];
2268        transfer->write_req->bufnum++;
2269        buf->user   = bd;
2270        buf->block  = bd->block;
2271        buf->length = dd->block_size;
2272        buf->buffer = bd->buffer;
2273        last_block  = bd->block;
2274      }
2275
2276      /*
2277       * Perform the transfer if there are no more buffers, or the transfer
2278       * size has reached the configured max. value.
2279       */
2280
2281      if (rtems_chain_is_empty (&transfer->bds) ||
2282          (transfer->write_req->bufnum >= bdbuf_config.max_write_blocks))
2283        write = true;
2284
2285      if (write)
2286      {
2287        rtems_bdbuf_execute_transfer_request (dd, transfer->write_req, false);
2288
2289        transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2290        transfer->write_req->bufnum = 0;
2291      }
2292    }
2293
2294    /*
2295     * If sync'ing and the deivce is capability of handling a sync IO control
2296     * call perform the call.
2297     */
2298    if (transfer->syncing &&
2299        (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_SYNC))
2300    {
2301      /* int result = */ dd->ioctl (dd->phys_dev, RTEMS_BLKDEV_REQ_SYNC, NULL);
2302      /* How should the error be handled ? */
2303    }
2304  }
2305}
2306
2307/**
2308 * Process the modified list of buffers. There is a sync or modified list that
2309 * needs to be handled so we have a common function to do the work.
2310 *
2311 * @param dd_ptr Pointer to the device to handle. If BDBUF_INVALID_DEV no
2312 * device is selected so select the device of the first buffer to be written to
2313 * disk.
2314 * @param chain The modified chain to process.
2315 * @param transfer The chain to append buffers to be written too.
2316 * @param sync_active If true this is a sync operation so expire all timers.
2317 * @param update_timers If true update the timers.
2318 * @param timer_delta It update_timers is true update the timers by this
2319 *                    amount.
2320 */
2321static void
2322rtems_bdbuf_swapout_modified_processing (const rtems_disk_device **dd_ptr,
2323                                         rtems_chain_control* chain,
2324                                         rtems_chain_control* transfer,
2325                                         bool                 sync_active,
2326                                         bool                 update_timers,
2327                                         uint32_t             timer_delta)
2328{
2329  if (!rtems_chain_is_empty (chain))
2330  {
2331    rtems_chain_node* node = rtems_chain_head (chain);
2332    bool              sync_all;
2333   
2334    node = node->next;
2335
2336    /*
2337     * A sync active with no valid dev means sync all.
2338     */
2339    if (sync_active && (*dd_ptr == BDBUF_INVALID_DEV))
2340      sync_all = true;
2341    else
2342      sync_all = false;
2343   
2344    while (!rtems_chain_is_tail (chain, node))
2345    {
2346      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2347
2348      /*
2349       * Check if the buffer's hold timer has reached 0. If a sync is active
2350       * or someone waits for a buffer written force all the timers to 0.
2351       *
2352       * @note Lots of sync requests will skew this timer. It should be based
2353       *       on TOD to be accurate. Does it matter ?
2354       */
2355      if (sync_all || (sync_active && (*dd_ptr == bd->dd))
2356          || rtems_bdbuf_has_buffer_waiters ())
2357        bd->hold_timer = 0;
2358
2359      if (bd->hold_timer)
2360      {
2361        if (update_timers)
2362        {
2363          if (bd->hold_timer > timer_delta)
2364            bd->hold_timer -= timer_delta;
2365          else
2366            bd->hold_timer = 0;
2367        }
2368
2369        if (bd->hold_timer)
2370        {
2371          node = node->next;
2372          continue;
2373        }
2374      }
2375
2376      /*
2377       * This assumes we can set it to BDBUF_INVALID_DEV which is just an
2378       * assumption. Cannot use the transfer list being empty the sync dev
2379       * calls sets the dev to use.
2380       */
2381      if (*dd_ptr == BDBUF_INVALID_DEV)
2382        *dd_ptr = bd->dd;
2383
2384      if (bd->dd == *dd_ptr)
2385      {
2386        rtems_chain_node* next_node = node->next;
2387        rtems_chain_node* tnode = rtems_chain_tail (transfer);
2388
2389        /*
2390         * The blocks on the transfer list are sorted in block order. This
2391         * means multi-block transfers for drivers that require consecutive
2392         * blocks perform better with sorted blocks and for real disks it may
2393         * help lower head movement.
2394         */
2395
2396        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2397
2398        rtems_chain_extract_unprotected (node);
2399
2400        tnode = tnode->previous;
2401
2402        while (node && !rtems_chain_is_head (transfer, tnode))
2403        {
2404          rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
2405
2406          if (bd->block > tbd->block)
2407          {
2408            rtems_chain_insert_unprotected (tnode, node);
2409            node = NULL;
2410          }
2411          else
2412            tnode = tnode->previous;
2413        }
2414
2415        if (node)
2416          rtems_chain_prepend_unprotected (transfer, node);
2417
2418        node = next_node;
2419      }
2420      else
2421      {
2422        node = node->next;
2423      }
2424    }
2425  }
2426}
2427
2428/**
2429 * Process the cache's modified buffers. Check the sync list first then the
2430 * modified list extracting the buffers suitable to be written to disk. We have
2431 * a device at a time. The task level loop will repeat this operation while
2432 * there are buffers to be written. If the transfer fails place the buffers
2433 * back on the modified list and try again later. The cache is unlocked while
2434 * the buffers are being written to disk.
2435 *
2436 * @param timer_delta It update_timers is true update the timers by this
2437 *                    amount.
2438 * @param update_timers If true update the timers.
2439 * @param transfer The transfer transaction data.
2440 *
2441 * @retval true Buffers where written to disk so scan again.
2442 * @retval false No buffers where written to disk.
2443 */
2444static bool
2445rtems_bdbuf_swapout_processing (unsigned long                 timer_delta,
2446                                bool                          update_timers,
2447                                rtems_bdbuf_swapout_transfer* transfer)
2448{
2449  rtems_bdbuf_swapout_worker* worker;
2450  bool                        transfered_buffers = false;
2451
2452  rtems_bdbuf_lock_cache ();
2453
2454  /*
2455   * If a sync is active do not use a worker because the current code does not
2456   * cleaning up after. We need to know the buffers have been written when
2457   * syncing to release sync lock and currently worker threads do not return to
2458   * here. We do not know the worker is the last in a sequence of sync writes
2459   * until after we have it running so we do not know to tell it to release the
2460   * lock. The simplest solution is to get the main swap out task perform all
2461   * sync operations.
2462   */
2463  if (bdbuf_cache.sync_active)
2464    worker = NULL;
2465  else
2466  {
2467    worker = (rtems_bdbuf_swapout_worker*)
2468      rtems_chain_get_unprotected (&bdbuf_cache.swapout_workers);
2469    if (worker)
2470      transfer = &worker->transfer;
2471  }
2472
2473  rtems_chain_initialize_empty (&transfer->bds);
2474  transfer->dd = BDBUF_INVALID_DEV;
2475  transfer->syncing = bdbuf_cache.sync_active;
2476 
2477  /*
2478   * When the sync is for a device limit the sync to that device. If the sync
2479   * is for a buffer handle process the devices in the order on the sync
2480   * list. This means the dev is BDBUF_INVALID_DEV.
2481   */
2482  if (bdbuf_cache.sync_active)
2483    transfer->dd = bdbuf_cache.sync_device;
2484   
2485  /*
2486   * If we have any buffers in the sync queue move them to the modified
2487   * list. The first sync buffer will select the device we use.
2488   */
2489  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2490                                           &bdbuf_cache.sync,
2491                                           &transfer->bds,
2492                                           true, false,
2493                                           timer_delta);
2494
2495  /*
2496   * Process the cache's modified list.
2497   */
2498  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2499                                           &bdbuf_cache.modified,
2500                                           &transfer->bds,
2501                                           bdbuf_cache.sync_active,
2502                                           update_timers,
2503                                           timer_delta);
2504
2505  /*
2506   * We have all the buffers that have been modified for this device so the
2507   * cache can be unlocked because the state of each buffer has been set to
2508   * TRANSFER.
2509   */
2510  rtems_bdbuf_unlock_cache ();
2511
2512  /*
2513   * If there are buffers to transfer to the media transfer them.
2514   */
2515  if (!rtems_chain_is_empty (&transfer->bds))
2516  {
2517    if (worker)
2518    {
2519      rtems_status_code sc = rtems_event_send (worker->id,
2520                                               RTEMS_BDBUF_SWAPOUT_SYNC);
2521      if (sc != RTEMS_SUCCESSFUL)
2522        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
2523    }
2524    else
2525    {
2526      rtems_bdbuf_swapout_write (transfer);
2527    }
2528
2529    transfered_buffers = true;
2530  }
2531
2532  if (bdbuf_cache.sync_active && !transfered_buffers)
2533  {
2534    rtems_id sync_requester;
2535    rtems_bdbuf_lock_cache ();
2536    sync_requester = bdbuf_cache.sync_requester;
2537    bdbuf_cache.sync_active = false;
2538    bdbuf_cache.sync_requester = 0;
2539    rtems_bdbuf_unlock_cache ();
2540    if (sync_requester)
2541      rtems_event_send (sync_requester, RTEMS_BDBUF_TRANSFER_SYNC);
2542  }
2543
2544  return transfered_buffers;
2545}
2546
2547/**
2548 * Allocate the write request and initialise it for good measure.
2549 *
2550 * @return rtems_blkdev_request* The write reference memory.
2551 */
2552static rtems_blkdev_request*
2553rtems_bdbuf_swapout_writereq_alloc (void)
2554{
2555  /*
2556   * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
2557   * I am disappointment at finding code like this in RTEMS. The request should
2558   * have been a rtems_chain_control. Simple, fast and less storage as the node
2559   * is already part of the buffer structure.
2560   */
2561  rtems_blkdev_request* write_req =
2562    malloc (sizeof (rtems_blkdev_request) +
2563            (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer)));
2564
2565  if (!write_req)
2566    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2567
2568  write_req->req = RTEMS_BLKDEV_REQ_WRITE;
2569  write_req->req_done = rtems_bdbuf_transfer_done;
2570  write_req->done_arg = write_req;
2571  write_req->io_task = rtems_task_self ();
2572
2573  return write_req;
2574}
2575
2576/**
2577 * The swapout worker thread body.
2578 *
2579 * @param arg A pointer to the worker thread's private data.
2580 * @return rtems_task Not used.
2581 */
2582static rtems_task
2583rtems_bdbuf_swapout_worker_task (rtems_task_argument arg)
2584{
2585  rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg;
2586
2587  while (worker->enabled)
2588  {
2589    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_SWAPOUT_SYNC);
2590
2591    rtems_bdbuf_swapout_write (&worker->transfer);
2592
2593    rtems_bdbuf_lock_cache ();
2594
2595    rtems_chain_initialize_empty (&worker->transfer.bds);
2596    worker->transfer.dd = BDBUF_INVALID_DEV;
2597
2598    rtems_chain_append_unprotected (&bdbuf_cache.swapout_workers, &worker->link);
2599
2600    rtems_bdbuf_unlock_cache ();
2601  }
2602
2603  free (worker->transfer.write_req);
2604  free (worker);
2605
2606  rtems_task_delete (RTEMS_SELF);
2607}
2608
2609/**
2610 * Open the swapout worker threads.
2611 */
2612static void
2613rtems_bdbuf_swapout_workers_open (void)
2614{
2615  rtems_status_code sc;
2616  size_t            w;
2617
2618  rtems_bdbuf_lock_cache ();
2619
2620  for (w = 0; w < bdbuf_config.swapout_workers; w++)
2621  {
2622    rtems_bdbuf_swapout_worker* worker;
2623
2624    worker = malloc (sizeof (rtems_bdbuf_swapout_worker));
2625    if (!worker)
2626      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2627
2628    rtems_chain_append_unprotected (&bdbuf_cache.swapout_workers, &worker->link);
2629    worker->enabled = true;
2630    worker->transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2631
2632    rtems_chain_initialize_empty (&worker->transfer.bds);
2633    worker->transfer.dd = BDBUF_INVALID_DEV;
2634
2635    sc = rtems_task_create (rtems_build_name('B', 'D', 'o', 'a' + w),
2636                            (bdbuf_config.swapout_priority ?
2637                             bdbuf_config.swapout_priority :
2638                             RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT),
2639                            SWAPOUT_TASK_STACK_SIZE,
2640                            RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
2641                            RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
2642                            &worker->id);
2643    if (sc != RTEMS_SUCCESSFUL)
2644      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE);
2645
2646    sc = rtems_task_start (worker->id,
2647                           rtems_bdbuf_swapout_worker_task,
2648                           (rtems_task_argument) worker);
2649    if (sc != RTEMS_SUCCESSFUL)
2650      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START);
2651  }
2652
2653  rtems_bdbuf_unlock_cache ();
2654}
2655
2656/**
2657 * Close the swapout worker threads.
2658 */
2659static void
2660rtems_bdbuf_swapout_workers_close (void)
2661{
2662  rtems_chain_node* node;
2663
2664  rtems_bdbuf_lock_cache ();
2665
2666  node = rtems_chain_first (&bdbuf_cache.swapout_workers);
2667  while (!rtems_chain_is_tail (&bdbuf_cache.swapout_workers, node))
2668  {
2669    rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node;
2670    worker->enabled = false;
2671    rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC);
2672    node = rtems_chain_next (node);
2673  }
2674
2675  rtems_bdbuf_unlock_cache ();
2676}
2677
2678/**
2679 * Body of task which takes care on flushing modified buffers to the disk.
2680 *
2681 * @param arg A pointer to the global cache data. Use the global variable and
2682 *            not this.
2683 * @return rtems_task Not used.
2684 */
2685static rtems_task
2686rtems_bdbuf_swapout_task (rtems_task_argument arg)
2687{
2688  rtems_bdbuf_swapout_transfer transfer;
2689  uint32_t                     period_in_ticks;
2690  const uint32_t               period_in_msecs = bdbuf_config.swapout_period;;
2691  uint32_t                     timer_delta;
2692
2693  transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2694  rtems_chain_initialize_empty (&transfer.bds);
2695  transfer.dd = BDBUF_INVALID_DEV;
2696  transfer.syncing = false;
2697
2698  /*
2699   * Localise the period.
2700   */
2701  period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
2702
2703  /*
2704   * This is temporary. Needs to be changed to use the real time clock.
2705   */
2706  timer_delta = period_in_msecs;
2707
2708  /*
2709   * Create the worker threads.
2710   */
2711  rtems_bdbuf_swapout_workers_open ();
2712
2713  while (bdbuf_cache.swapout_enabled)
2714  {
2715    rtems_event_set   out;
2716    rtems_status_code sc;
2717
2718    /*
2719     * Only update the timers once in the processing cycle.
2720     */
2721    bool update_timers = true;
2722
2723    /*
2724     * If we write buffers to any disk perform a check again. We only write a
2725     * single device at a time and the cache may have more than one device's
2726     * buffers modified waiting to be written.
2727     */
2728    bool transfered_buffers;
2729
2730    do
2731    {
2732      transfered_buffers = false;
2733
2734      /*
2735       * Extact all the buffers we find for a specific device. The device is
2736       * the first one we find on a modified list. Process the sync queue of
2737       * buffers first.
2738       */
2739      if (rtems_bdbuf_swapout_processing (timer_delta,
2740                                          update_timers,
2741                                          &transfer))
2742      {
2743        transfered_buffers = true;
2744      }
2745
2746      /*
2747       * Only update the timers once.
2748       */
2749      update_timers = false;
2750    }
2751    while (transfered_buffers);
2752
2753    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2754                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2755                              period_in_ticks,
2756                              &out);
2757
2758    if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
2759      rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2760  }
2761
2762  rtems_bdbuf_swapout_workers_close ();
2763
2764  free (transfer.write_req);
2765
2766  rtems_task_delete (RTEMS_SELF);
2767}
2768
2769static void
2770rtems_bdbuf_purge_list (rtems_chain_control *purge_list)
2771{
2772  bool wake_buffer_waiters = false;
2773  rtems_chain_node *node = NULL;
2774
2775  while ((node = rtems_chain_get_unprotected (purge_list)) != NULL)
2776  {
2777    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
2778
2779    if (bd->waiters == 0)
2780      wake_buffer_waiters = true;
2781
2782    rtems_bdbuf_discard_buffer (bd);
2783  }
2784
2785  if (wake_buffer_waiters)
2786    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2787}
2788
2789static void
2790rtems_bdbuf_gather_for_purge (rtems_chain_control *purge_list,
2791                              const rtems_disk_device *dd)
2792{
2793  rtems_bdbuf_buffer *stack [RTEMS_BDBUF_AVL_MAX_HEIGHT];
2794  rtems_bdbuf_buffer **prev = stack;
2795  rtems_bdbuf_buffer *cur = bdbuf_cache.tree;
2796
2797  *prev = NULL;
2798
2799  while (cur != NULL)
2800  {
2801    if (cur->dd == dd)
2802    {
2803      switch (cur->state)
2804      {
2805        case RTEMS_BDBUF_STATE_FREE:
2806        case RTEMS_BDBUF_STATE_EMPTY:
2807        case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2808        case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
2809          break;
2810        case RTEMS_BDBUF_STATE_SYNC:
2811          rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2812          /* Fall through */
2813        case RTEMS_BDBUF_STATE_MODIFIED:
2814          rtems_bdbuf_group_release (cur);
2815          /* Fall through */
2816        case RTEMS_BDBUF_STATE_CACHED:
2817          rtems_chain_extract_unprotected (&cur->link);
2818          rtems_chain_append_unprotected (purge_list, &cur->link);
2819          break;
2820        case RTEMS_BDBUF_STATE_TRANSFER:
2821          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_TRANSFER_PURGED);
2822          break;
2823        case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2824        case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2825        case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2826          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_ACCESS_PURGED);
2827          break;
2828        default:
2829          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_STATE_11);
2830      }
2831    }
2832
2833    if (cur->avl.left != NULL)
2834    {
2835      /* Left */
2836      ++prev;
2837      *prev = cur;
2838      cur = cur->avl.left;
2839    }
2840    else if (cur->avl.right != NULL)
2841    {
2842      /* Right */
2843      ++prev;
2844      *prev = cur;
2845      cur = cur->avl.right;
2846    }
2847    else
2848    {
2849      while (*prev != NULL && cur == (*prev)->avl.right)
2850      {
2851        /* Up */
2852        cur = *prev;
2853        --prev;
2854      }
2855      if (*prev != NULL)
2856        /* Right */
2857        cur = (*prev)->avl.right;
2858      else
2859        /* Finished */
2860        cur = NULL;
2861    }
2862  }
2863}
2864
2865void
2866rtems_bdbuf_purge_dev (const rtems_disk_device *dd)
2867{
2868  rtems_chain_control purge_list;
2869
2870  rtems_chain_initialize_empty (&purge_list);
2871  rtems_bdbuf_lock_cache ();
2872  rtems_bdbuf_gather_for_purge (&purge_list, dd);
2873  rtems_bdbuf_purge_list (&purge_list);
2874  rtems_bdbuf_unlock_cache ();
2875}
2876
2877rtems_status_code
2878rtems_bdbuf_set_block_size (rtems_disk_device *dd, uint32_t block_size)
2879{
2880  rtems_status_code sc = RTEMS_SUCCESSFUL;
2881
2882  rtems_bdbuf_lock_cache ();
2883
2884  if (block_size > 0)
2885  {
2886    size_t bds_per_group = rtems_bdbuf_bds_per_group (block_size);
2887
2888    if (bds_per_group != 0)
2889    {
2890      int block_to_media_block_shift = 0;
2891      uint32_t media_blocks_per_block = block_size / dd->media_block_size;
2892      uint32_t one = 1;
2893
2894      while ((one << block_to_media_block_shift) < media_blocks_per_block)
2895      {
2896        ++block_to_media_block_shift;
2897      }
2898
2899      if ((dd->media_block_size << block_to_media_block_shift) != block_size)
2900        block_to_media_block_shift = -1;
2901
2902      dd->block_size = block_size;
2903      dd->block_to_media_block_shift = block_to_media_block_shift;
2904      dd->bds_per_group = bds_per_group;
2905    }
2906    else
2907    {
2908      sc = RTEMS_INVALID_NUMBER;
2909    }
2910  }
2911  else
2912  {
2913    sc = RTEMS_INVALID_NUMBER;
2914  }
2915
2916  rtems_bdbuf_unlock_cache ();
2917
2918  return sc;
2919}
Note: See TracBrowser for help on using the repository browser.