source: rtems/cpukit/libblock/src/bdbuf.c @ b1d4e42

4.104.11
Last change on this file since b1d4e42 was b1d4e42, checked in by Sebastian Huber <sebastian.huber@…>, on Apr 29, 2010 at 11:34:59 AM

2010-04-29 Sebastian Huber <sebastian.huber@…>

PR1507

  • libblock/src/bdbuf.c: Avoid NULL pointer access.
  • libblock/src/diskdevs.c: Fixed bounds check in rtems_disk_next().
  • Property mode set to 100644
File size: 80.2 KB
Line 
1/**
2 * @file
3 *
4 * @ingroup rtems_bdbuf
5 *
6 * Block device buffer management.
7 */
8
9/*
10 * Disk I/O buffering
11 * Buffer managment
12 *
13 * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
14 * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
15 *         Victor V. Vengerov <vvv@oktet.ru>
16 *         Alexander Kukuta <kam@oktet.ru>
17 *
18 * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org>
19 *    Rewritten to remove score mutex access. Fixes many performance
20 *    issues.
21 *
22 * Copyright (c) 2009 embedded brains GmbH.
23 *
24 * @(#) bdbuf.c,v 1.14 2004/04/17 08:15:17 ralf Exp
25 */
26
27/**
28 * Set to 1 to enable debug tracing.
29 */
30#define RTEMS_BDBUF_TRACE 0
31
32#if HAVE_CONFIG_H
33#include "config.h"
34#endif
35#include <limits.h>
36#include <errno.h>
37#include <assert.h>
38#include <stdio.h>
39#include <string.h>
40#include <inttypes.h>
41
42#include <rtems.h>
43#include <rtems/error.h>
44#include <rtems/malloc.h>
45
46#include "rtems/bdbuf.h"
47
48#define BDBUF_INVALID_DEV ((dev_t) -1)
49
50/*
51 * Simpler label for this file.
52 */
53#define bdbuf_config rtems_bdbuf_configuration
54
55/**
56 * A swapout transfer transaction data. This data is passed to a worked thread
57 * to handle the write phase of the transfer.
58 */
59typedef struct rtems_bdbuf_swapout_transfer
60{
61  rtems_chain_control   bds;         /**< The transfer list of BDs. */
62  dev_t                 dev;         /**< The device the transfer is for. */
63  rtems_blkdev_request* write_req;   /**< The write request array. */
64  uint32_t              bufs_per_bd; /**< Number of buffers per bd. */
65} rtems_bdbuf_swapout_transfer;
66
67/**
68 * Swapout worker thread. These are available to take processing from the
69 * main swapout thread and handle the I/O operation.
70 */
71typedef struct rtems_bdbuf_swapout_worker
72{
73  rtems_chain_node             link;     /**< The threads sit on a chain when
74                                          * idle. */
75  rtems_id                     id;       /**< The id of the task so we can wake
76                                          * it. */
77  volatile bool                enabled;  /**< The worked is enabled. */
78  rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this
79                                          * thread. */
80} rtems_bdbuf_swapout_worker;
81
82/**
83 * Buffer waiters synchronization.
84 */
85typedef struct rtems_bdbuf_waiters {
86  volatile unsigned count;
87  rtems_id sema;
88} rtems_bdbuf_waiters;
89
90/**
91 * The BD buffer cache.
92 */
93typedef struct rtems_bdbuf_cache
94{
95  rtems_id            swapout;           /**< Swapout task ID */
96  volatile bool       swapout_enabled;   /**< Swapout is only running if
97                                          * enabled. Set to false to kill the
98                                          * swap out task. It deletes itself. */
99  rtems_chain_control swapout_workers;   /**< The work threads for the swapout
100                                          * task. */
101
102  rtems_bdbuf_buffer* bds;               /**< Pointer to table of buffer
103                                          * descriptors. */
104  void*               buffers;           /**< The buffer's memory. */
105  size_t              buffer_min_count;  /**< Number of minimum size buffers
106                                          * that fit the buffer memory. */
107  size_t              max_bds_per_group; /**< The number of BDs of minimum
108                                          * buffer size that fit in a group. */
109  uint32_t            flags;             /**< Configuration flags. */
110
111  rtems_id            lock;              /**< The cache lock. It locks all
112                                          * cache data, BD and lists. */
113  rtems_id            sync_lock;         /**< Sync calls block writes. */
114  volatile bool       sync_active;       /**< True if a sync is active. */
115  volatile rtems_id   sync_requester;    /**< The sync requester. */
116  volatile dev_t      sync_device;       /**< The device to sync and
117                                          * BDBUF_INVALID_DEV not a device
118                                          * sync. */
119
120  rtems_bdbuf_buffer* tree;              /**< Buffer descriptor lookup AVL tree
121                                          * root. There is only one. */
122  rtems_chain_control lru;               /**< Least recently used list */
123  rtems_chain_control modified;          /**< Modified buffers list */
124  rtems_chain_control sync;              /**< Buffers to sync list */
125
126  rtems_bdbuf_waiters access_waiters;    /**< Wait for a buffer in
127                                          * ACCESS_CACHED, ACCESS_MODIFIED or
128                                          * ACCESS_EMPTY
129                                          * state. */
130  rtems_bdbuf_waiters transfer_waiters;  /**< Wait for a buffer in TRANSFER
131                                          * state. */
132  rtems_bdbuf_waiters buffer_waiters;    /**< Wait for a buffer and no one is
133                                          * available. */
134
135  size_t              group_count;       /**< The number of groups. */
136  rtems_bdbuf_group*  groups;            /**< The groups. */
137
138  bool                initialised;       /**< Initialised state. */
139} rtems_bdbuf_cache;
140
141/**
142 * Fatal errors
143 */
144#define RTEMS_BLKDEV_FATAL_ERROR(n) \
145  (((uint32_t)'B' << 24) | ((uint32_t)(n) & (uint32_t)0x00FFFFFF))
146
147#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_11      RTEMS_BLKDEV_FATAL_ERROR(1)
148#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_4       RTEMS_BLKDEV_FATAL_ERROR(2)
149#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_5       RTEMS_BLKDEV_FATAL_ERROR(3)
150#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_6       RTEMS_BLKDEV_FATAL_ERROR(4)
151#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_7       RTEMS_BLKDEV_FATAL_ERROR(5)
152#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_8       RTEMS_BLKDEV_FATAL_ERROR(6)
153#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_9       RTEMS_BLKDEV_FATAL_ERROR(7)
154#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_10      RTEMS_BLKDEV_FATAL_ERROR(8)
155#define RTEMS_BLKDEV_FATAL_BDBUF_TREE_RM       RTEMS_BLKDEV_FATAL_ERROR(9)
156#define RTEMS_BLKDEV_FATAL_BDBUF_SWAPOUT       RTEMS_BLKDEV_FATAL_ERROR(10)
157#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK     RTEMS_BLKDEV_FATAL_ERROR(11)
158#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK   RTEMS_BLKDEV_FATAL_ERROR(12)
159#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK    RTEMS_BLKDEV_FATAL_ERROR(13)
160#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK  RTEMS_BLKDEV_FATAL_ERROR(14)
161#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS   RTEMS_BLKDEV_FATAL_ERROR(15)
162#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2  RTEMS_BLKDEV_FATAL_ERROR(16)
163#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST   RTEMS_BLKDEV_FATAL_ERROR(17)
164#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO RTEMS_BLKDEV_FATAL_ERROR(18)
165#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE    RTEMS_BLKDEV_FATAL_ERROR(19)
166#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE       RTEMS_BLKDEV_FATAL_ERROR(20)
167#define RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM      RTEMS_BLKDEV_FATAL_ERROR(21)
168#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE  RTEMS_BLKDEV_FATAL_ERROR(22)
169#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START   RTEMS_BLKDEV_FATAL_ERROR(23)
170#define BLKDEV_FATAL_BDBUF_SWAPOUT_RE          RTEMS_BLKDEV_FATAL_ERROR(24)
171#define BLKDEV_FATAL_BDBUF_SWAPOUT_TS          RTEMS_BLKDEV_FATAL_ERROR(25)
172#define RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT     RTEMS_BLKDEV_FATAL_ERROR(26)
173#define RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE       RTEMS_BLKDEV_FATAL_ERROR(27)
174#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_0       RTEMS_BLKDEV_FATAL_ERROR(28)
175#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_1       RTEMS_BLKDEV_FATAL_ERROR(29)
176#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_2       RTEMS_BLKDEV_FATAL_ERROR(30)
177#define RTEMS_BLKDEV_FATAL_BDBUF_DISK_REL      RTEMS_BLKDEV_FATAL_ERROR(31)
178
179/**
180 * The events used in this code. These should be system events rather than
181 * application events.
182 */
183#define RTEMS_BDBUF_TRANSFER_SYNC  RTEMS_EVENT_1
184#define RTEMS_BDBUF_SWAPOUT_SYNC   RTEMS_EVENT_2
185
186/**
187 * The swap out task size. Should be more than enough for most drivers with
188 * tracing turned on.
189 */
190#define SWAPOUT_TASK_STACK_SIZE (8 * 1024)
191
192/**
193 * Lock semaphore attributes. This is used for locking type mutexes.
194 *
195 * @warning Priority inheritance is on.
196 */
197#define RTEMS_BDBUF_CACHE_LOCK_ATTRIBS \
198  (RTEMS_PRIORITY | RTEMS_BINARY_SEMAPHORE | \
199   RTEMS_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
200
201/**
202 * Waiter semaphore attributes.
203 *
204 * @warning Do not configure as inherit priority. If a driver is in the driver
205 *          initialisation table this locked semaphore will have the IDLE task
206 *          as the holder and a blocking task will raise the priority of the
207 *          IDLE task which can cause unsual side effects.
208 */
209#define RTEMS_BDBUF_CACHE_WAITER_ATTRIBS \
210  (RTEMS_PRIORITY | RTEMS_SIMPLE_BINARY_SEMAPHORE | \
211   RTEMS_NO_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
212
213/**
214 * Waiter timeout. Set to non-zero to find some info on a waiter that is
215 * waiting too long.
216 */
217#define RTEMS_BDBUF_WAIT_TIMEOUT RTEMS_NO_TIMEOUT
218#if !defined (RTEMS_BDBUF_WAIT_TIMEOUT)
219#define RTEMS_BDBUF_WAIT_TIMEOUT \
220  (TOD_MICROSECONDS_TO_TICKS (20000000))
221#endif
222
223/*
224 * The swap out task.
225 */
226static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
227
228/**
229 * The Buffer Descriptor cache.
230 */
231static rtems_bdbuf_cache bdbuf_cache;
232
233#if RTEMS_BDBUF_TRACE
234/**
235 * If true output the trace message.
236 */
237bool rtems_bdbuf_tracer;
238
239/**
240 * Return the number of items on the list.
241 *
242 * @param list The chain control.
243 * @return uint32_t The number of items on the list.
244 */
245uint32_t
246rtems_bdbuf_list_count (rtems_chain_control* list)
247{
248  rtems_chain_node* node = rtems_chain_first (list);
249  uint32_t          count = 0;
250  while (!rtems_chain_is_tail (list, node))
251  {
252    count++;
253    node = rtems_chain_next (node);
254  }
255  return count;
256}
257
258/**
259 * Show the usage for the bdbuf cache.
260 */
261void
262rtems_bdbuf_show_usage (void)
263{
264  uint32_t group;
265  uint32_t total = 0;
266  uint32_t val;
267
268  for (group = 0; group < bdbuf_cache.group_count; group++)
269    total += bdbuf_cache.groups[group].users;
270  printf ("bdbuf:group users=%lu", total);
271  val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
272  printf (", lru=%lu", val);
273  total = val;
274  val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
275  printf (", mod=%lu", val);
276  total += val;
277  val = rtems_bdbuf_list_count (&bdbuf_cache.sync);
278  printf (", sync=%lu", val);
279  total += val;
280  printf (", total=%lu\n", total);
281}
282
283/**
284 * Show the users for a group of a bd.
285 *
286 * @param where A label to show the context of output.
287 * @param bd The bd to show the users of.
288 */
289void
290rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd)
291{
292  const char* states[] =
293    { "EM", "FR", "CH", "AC", "AM", "MD", "SY", "TR" };
294
295  printf ("bdbuf:users: %15s: [%" PRIu32 " (%s)] %td:%td = %" PRIu32 " %s\n",
296          where,
297          bd->block, states[bd->state],
298          bd->group - bdbuf_cache.groups,
299          bd - bdbuf_cache.bds,
300          bd->group->users,
301          bd->group->users > 8 ? "<<<<<<<" : "");
302}
303#else
304#define rtems_bdbuf_tracer (0)
305#define rtems_bdbuf_show_usage() ((void) 0)
306#define rtems_bdbuf_show_users(_w, _b) ((void) 0)
307#endif
308
309/**
310 * The default maximum height of 32 allows for AVL trees having between
311 * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion.  You may
312 * change this compile-time constant as you wish.
313 */
314#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
315#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
316#endif
317
318static void
319rtems_bdbuf_fatal (rtems_bdbuf_buf_state state, uint32_t error)
320{
321  rtems_fatal_error_occurred ((((uint32_t) state) << 16) | error);
322}
323
324/**
325 * Searches for the node with specified dev/block.
326 *
327 * @param root pointer to the root node of the AVL-Tree
328 * @param dev device search key
329 * @param block block search key
330 * @retval NULL node with the specified dev/block is not found
331 * @return pointer to the node with specified dev/block
332 */
333static rtems_bdbuf_buffer *
334rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
335                        dev_t                dev,
336                        rtems_blkdev_bnum    block)
337{
338  rtems_bdbuf_buffer* p = *root;
339
340  while ((p != NULL) && ((p->dev != dev) || (p->block != block)))
341  {
342    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
343    {
344      p = p->avl.right;
345    }
346    else
347    {
348      p = p->avl.left;
349    }
350  }
351
352  return p;
353}
354
355/**
356 * Inserts the specified node to the AVl-Tree.
357 *
358 * @param root pointer to the root node of the AVL-Tree
359 * @param node Pointer to the node to add.
360 * @retval 0 The node added successfully
361 * @retval -1 An error occured
362 */
363static int
364rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
365                       rtems_bdbuf_buffer*  node)
366{
367  dev_t             dev = node->dev;
368  rtems_blkdev_bnum block = node->block;
369
370  rtems_bdbuf_buffer*  p = *root;
371  rtems_bdbuf_buffer*  q;
372  rtems_bdbuf_buffer*  p1;
373  rtems_bdbuf_buffer*  p2;
374  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
375  rtems_bdbuf_buffer** buf_prev = buf_stack;
376
377  bool modified = false;
378
379  if (p == NULL)
380  {
381    *root = node;
382    node->avl.left = NULL;
383    node->avl.right = NULL;
384    node->avl.bal = 0;
385    return 0;
386  }
387
388  while (p != NULL)
389  {
390    *buf_prev++ = p;
391
392    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
393    {
394      p->avl.cache = 1;
395      q = p->avl.right;
396      if (q == NULL)
397      {
398        q = node;
399        p->avl.right = q = node;
400        break;
401      }
402    }
403    else if ((p->dev != dev) || (p->block != block))
404    {
405      p->avl.cache = -1;
406      q = p->avl.left;
407      if (q == NULL)
408      {
409        q = node;
410        p->avl.left = q;
411        break;
412      }
413    }
414    else
415    {
416      return -1;
417    }
418
419    p = q;
420  }
421
422  q->avl.left = q->avl.right = NULL;
423  q->avl.bal = 0;
424  modified = true;
425  buf_prev--;
426
427  while (modified)
428  {
429    if (p->avl.cache == -1)
430    {
431      switch (p->avl.bal)
432      {
433        case 1:
434          p->avl.bal = 0;
435          modified = false;
436          break;
437
438        case 0:
439          p->avl.bal = -1;
440          break;
441
442        case -1:
443          p1 = p->avl.left;
444          if (p1->avl.bal == -1) /* simple LL-turn */
445          {
446            p->avl.left = p1->avl.right;
447            p1->avl.right = p;
448            p->avl.bal = 0;
449            p = p1;
450          }
451          else /* double LR-turn */
452          {
453            p2 = p1->avl.right;
454            p1->avl.right = p2->avl.left;
455            p2->avl.left = p1;
456            p->avl.left = p2->avl.right;
457            p2->avl.right = p;
458            if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
459            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
460            p = p2;
461          }
462          p->avl.bal = 0;
463          modified = false;
464          break;
465
466        default:
467          break;
468      }
469    }
470    else
471    {
472      switch (p->avl.bal)
473      {
474        case -1:
475          p->avl.bal = 0;
476          modified = false;
477          break;
478
479        case 0:
480          p->avl.bal = 1;
481          break;
482
483        case 1:
484          p1 = p->avl.right;
485          if (p1->avl.bal == 1) /* simple RR-turn */
486          {
487            p->avl.right = p1->avl.left;
488            p1->avl.left = p;
489            p->avl.bal = 0;
490            p = p1;
491          }
492          else /* double RL-turn */
493          {
494            p2 = p1->avl.left;
495            p1->avl.left = p2->avl.right;
496            p2->avl.right = p1;
497            p->avl.right = p2->avl.left;
498            p2->avl.left = p;
499            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
500            if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
501            p = p2;
502          }
503          p->avl.bal = 0;
504          modified = false;
505          break;
506
507        default:
508          break;
509      }
510    }
511    q = p;
512    if (buf_prev > buf_stack)
513    {
514      p = *--buf_prev;
515
516      if (p->avl.cache == -1)
517      {
518        p->avl.left = q;
519      }
520      else
521      {
522        p->avl.right = q;
523      }
524    }
525    else
526    {
527      *root = p;
528      break;
529    }
530  };
531
532  return 0;
533}
534
535
536/**
537 * Removes the node from the tree.
538 *
539 * @param root Pointer to pointer to the root node
540 * @param node Pointer to the node to remove
541 * @retval 0 Item removed
542 * @retval -1 No such item found
543 */
544static int
545rtems_bdbuf_avl_remove(rtems_bdbuf_buffer**      root,
546                       const rtems_bdbuf_buffer* node)
547{
548  dev_t             dev = node->dev;
549  rtems_blkdev_bnum block = node->block;
550
551  rtems_bdbuf_buffer*  p = *root;
552  rtems_bdbuf_buffer*  q;
553  rtems_bdbuf_buffer*  r;
554  rtems_bdbuf_buffer*  s;
555  rtems_bdbuf_buffer*  p1;
556  rtems_bdbuf_buffer*  p2;
557  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
558  rtems_bdbuf_buffer** buf_prev = buf_stack;
559
560  bool modified = false;
561
562  memset (buf_stack, 0, sizeof(buf_stack));
563
564  while (p != NULL)
565  {
566    *buf_prev++ = p;
567
568    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
569    {
570      p->avl.cache = 1;
571      p = p->avl.right;
572    }
573    else if ((p->dev != dev) || (p->block != block))
574    {
575      p->avl.cache = -1;
576      p = p->avl.left;
577    }
578    else
579    {
580      /* node found */
581      break;
582    }
583  }
584
585  if (p == NULL)
586  {
587    /* there is no such node */
588    return -1;
589  }
590
591  q = p;
592
593  buf_prev--;
594  if (buf_prev > buf_stack)
595  {
596    p = *(buf_prev - 1);
597  }
598  else
599  {
600    p = NULL;
601  }
602
603  /* at this moment q - is a node to delete, p is q's parent */
604  if (q->avl.right == NULL)
605  {
606    r = q->avl.left;
607    if (r != NULL)
608    {
609      r->avl.bal = 0;
610    }
611    q = r;
612  }
613  else
614  {
615    rtems_bdbuf_buffer **t;
616
617    r = q->avl.right;
618
619    if (r->avl.left == NULL)
620    {
621      r->avl.left = q->avl.left;
622      r->avl.bal = q->avl.bal;
623      r->avl.cache = 1;
624      *buf_prev++ = q = r;
625    }
626    else
627    {
628      t = buf_prev++;
629      s = r;
630
631      while (s->avl.left != NULL)
632      {
633        *buf_prev++ = r = s;
634        s = r->avl.left;
635        r->avl.cache = -1;
636      }
637
638      s->avl.left = q->avl.left;
639      r->avl.left = s->avl.right;
640      s->avl.right = q->avl.right;
641      s->avl.bal = q->avl.bal;
642      s->avl.cache = 1;
643
644      *t = q = s;
645    }
646  }
647
648  if (p != NULL)
649  {
650    if (p->avl.cache == -1)
651    {
652      p->avl.left = q;
653    }
654    else
655    {
656      p->avl.right = q;
657    }
658  }
659  else
660  {
661    *root = q;
662  }
663
664  modified = true;
665
666  while (modified)
667  {
668    if (buf_prev > buf_stack)
669    {
670      p = *--buf_prev;
671    }
672    else
673    {
674      break;
675    }
676
677    if (p->avl.cache == -1)
678    {
679      /* rebalance left branch */
680      switch (p->avl.bal)
681      {
682        case -1:
683          p->avl.bal = 0;
684          break;
685        case  0:
686          p->avl.bal = 1;
687          modified = false;
688          break;
689
690        case +1:
691          p1 = p->avl.right;
692
693          if (p1->avl.bal >= 0) /* simple RR-turn */
694          {
695            p->avl.right = p1->avl.left;
696            p1->avl.left = p;
697
698            if (p1->avl.bal == 0)
699            {
700              p1->avl.bal = -1;
701              modified = false;
702            }
703            else
704            {
705              p->avl.bal = 0;
706              p1->avl.bal = 0;
707            }
708            p = p1;
709          }
710          else /* double RL-turn */
711          {
712            p2 = p1->avl.left;
713
714            p1->avl.left = p2->avl.right;
715            p2->avl.right = p1;
716            p->avl.right = p2->avl.left;
717            p2->avl.left = p;
718
719            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
720            if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
721
722            p = p2;
723            p2->avl.bal = 0;
724          }
725          break;
726
727        default:
728          break;
729      }
730    }
731    else
732    {
733      /* rebalance right branch */
734      switch (p->avl.bal)
735      {
736        case +1:
737          p->avl.bal = 0;
738          break;
739
740        case  0:
741          p->avl.bal = -1;
742          modified = false;
743          break;
744
745        case -1:
746          p1 = p->avl.left;
747
748          if (p1->avl.bal <= 0) /* simple LL-turn */
749          {
750            p->avl.left = p1->avl.right;
751            p1->avl.right = p;
752            if (p1->avl.bal == 0)
753            {
754              p1->avl.bal = 1;
755              modified = false;
756            }
757            else
758            {
759              p->avl.bal = 0;
760              p1->avl.bal = 0;
761            }
762            p = p1;
763          }
764          else /* double LR-turn */
765          {
766            p2 = p1->avl.right;
767
768            p1->avl.right = p2->avl.left;
769            p2->avl.left = p1;
770            p->avl.left = p2->avl.right;
771            p2->avl.right = p;
772
773            if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
774            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
775
776            p = p2;
777            p2->avl.bal = 0;
778          }
779          break;
780
781        default:
782          break;
783      }
784    }
785
786    if (buf_prev > buf_stack)
787    {
788      q = *(buf_prev - 1);
789
790      if (q->avl.cache == -1)
791      {
792        q->avl.left = p;
793      }
794      else
795      {
796        q->avl.right = p;
797      }
798    }
799    else
800    {
801      *root = p;
802      break;
803    }
804
805  }
806
807  return 0;
808}
809
810static void
811rtems_bdbuf_set_state (rtems_bdbuf_buffer *bd, rtems_bdbuf_buf_state state)
812{
813  bd->state = state;
814}
815
816/**
817 * Change the block number for the block size to the block number for the media
818 * block size. We have to use 64bit maths. There is no short cut here.
819 *
820 * @param block The logical block number in the block size terms.
821 * @param block_size The block size.
822 * @param media_block_size The block size of the media.
823 * @return rtems_blkdev_bnum The media block number.
824 */
825static rtems_blkdev_bnum
826rtems_bdbuf_media_block (rtems_blkdev_bnum block,
827                         size_t            block_size,
828                         size_t            media_block_size)
829{
830  return (rtems_blkdev_bnum)
831    ((((uint64_t) block) * block_size) / media_block_size);
832}
833
834/**
835 * Lock the mutex. A single task can nest calls.
836 *
837 * @param lock The mutex to lock.
838 * @param fatal_error_code The error code if the call fails.
839 */
840static void
841rtems_bdbuf_lock (rtems_id lock, uint32_t fatal_error_code)
842{
843  rtems_status_code sc = rtems_semaphore_obtain (lock,
844                                                 RTEMS_WAIT,
845                                                 RTEMS_NO_TIMEOUT);
846  if (sc != RTEMS_SUCCESSFUL)
847    rtems_fatal_error_occurred (fatal_error_code);
848}
849
850/**
851 * Unlock the mutex.
852 *
853 * @param lock The mutex to unlock.
854 * @param fatal_error_code The error code if the call fails.
855 */
856static void
857rtems_bdbuf_unlock (rtems_id lock, uint32_t fatal_error_code)
858{
859  rtems_status_code sc = rtems_semaphore_release (lock);
860  if (sc != RTEMS_SUCCESSFUL)
861    rtems_fatal_error_occurred (fatal_error_code);
862}
863
864/**
865 * Lock the cache. A single task can nest calls.
866 */
867static void
868rtems_bdbuf_lock_cache (void)
869{
870  rtems_bdbuf_lock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK);
871}
872
873/**
874 * Unlock the cache.
875 */
876static void
877rtems_bdbuf_unlock_cache (void)
878{
879  rtems_bdbuf_unlock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK);
880}
881
882/**
883 * Lock the cache's sync. A single task can nest calls.
884 */
885static void
886rtems_bdbuf_lock_sync (void)
887{
888  rtems_bdbuf_lock (bdbuf_cache.sync_lock, RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK);
889}
890
891/**
892 * Unlock the cache's sync lock. Any blocked writers are woken.
893 */
894static void
895rtems_bdbuf_unlock_sync (void)
896{
897  rtems_bdbuf_unlock (bdbuf_cache.sync_lock,
898                      RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK);
899}
900
901static void
902rtems_bdbuf_group_obtain (rtems_bdbuf_buffer *bd)
903{
904  ++bd->group->users;
905}
906
907static void
908rtems_bdbuf_group_release (rtems_bdbuf_buffer *bd)
909{
910  --bd->group->users;
911}
912
913static rtems_mode
914rtems_bdbuf_disable_preemption (void)
915{
916  rtems_status_code sc = RTEMS_SUCCESSFUL;
917  rtems_mode prev_mode = 0;
918
919  sc = rtems_task_mode (RTEMS_NO_PREEMPT, RTEMS_PREEMPT_MASK, &prev_mode);
920  if (sc != RTEMS_SUCCESSFUL)
921    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS);
922
923  return prev_mode;
924}
925
926static void
927rtems_bdbuf_restore_preemption (rtems_mode prev_mode)
928{
929  rtems_status_code sc = RTEMS_SUCCESSFUL;
930
931  sc = rtems_task_mode (prev_mode, RTEMS_ALL_MODE_MASKS, &prev_mode);
932  if (sc != RTEMS_SUCCESSFUL)
933    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST);
934}
935
936/**
937 * Wait until woken. Semaphores are used so a number of tasks can wait and can
938 * be woken at once. Task events would require we maintain a list of tasks to
939 * be woken and this would require storage and we do not know the number of
940 * tasks that could be waiting.
941 *
942 * While we have the cache locked we can try and claim the semaphore and
943 * therefore know when we release the lock to the cache we will block until the
944 * semaphore is released. This may even happen before we get to block.
945 *
946 * A counter is used to save the release call when no one is waiting.
947 *
948 * The function assumes the cache is locked on entry and it will be locked on
949 * exit.
950 */
951static void
952rtems_bdbuf_anonymous_wait (rtems_bdbuf_waiters *waiters)
953{
954  rtems_status_code sc;
955  rtems_mode        prev_mode;
956
957  /*
958   * Indicate we are waiting.
959   */
960  ++waiters->count;
961
962  /*
963   * Disable preemption then unlock the cache and block.  There is no POSIX
964   * condition variable in the core API so this is a work around.
965   *
966   * The issue is a task could preempt after the cache is unlocked because it is
967   * blocking or just hits that window, and before this task has blocked on the
968   * semaphore. If the preempting task flushes the queue this task will not see
969   * the flush and may block for ever or until another transaction flushes this
970   * semaphore.
971   */
972  prev_mode = rtems_bdbuf_disable_preemption ();
973
974  /*
975   * Unlock the cache, wait, and lock the cache when we return.
976   */
977  rtems_bdbuf_unlock_cache ();
978
979  sc = rtems_semaphore_obtain (waiters->sema, RTEMS_WAIT, RTEMS_BDBUF_WAIT_TIMEOUT);
980
981  if (sc == RTEMS_TIMEOUT)
982    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO);
983
984  if (sc != RTEMS_UNSATISFIED)
985    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2);
986
987  rtems_bdbuf_lock_cache ();
988
989  rtems_bdbuf_restore_preemption (prev_mode);
990
991  --waiters->count;
992}
993
994static void
995rtems_bdbuf_wait (rtems_bdbuf_buffer *bd, rtems_bdbuf_waiters *waiters)
996{
997  rtems_bdbuf_group_obtain (bd);
998  ++bd->waiters;
999  rtems_bdbuf_anonymous_wait (waiters);
1000  --bd->waiters;
1001  rtems_bdbuf_group_release (bd);
1002}
1003
1004/**
1005 * Wake a blocked resource. The resource has a counter that lets us know if
1006 * there are any waiters.
1007 */
1008static void
1009rtems_bdbuf_wake (const rtems_bdbuf_waiters *waiters)
1010{
1011  rtems_status_code sc = RTEMS_SUCCESSFUL;
1012
1013  if (waiters->count > 0)
1014  {
1015    sc = rtems_semaphore_flush (waiters->sema);
1016    if (sc != RTEMS_SUCCESSFUL)
1017      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE);
1018  }
1019}
1020
1021static void
1022rtems_bdbuf_wake_swapper (void)
1023{
1024  rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout,
1025                                           RTEMS_BDBUF_SWAPOUT_SYNC);
1026  if (sc != RTEMS_SUCCESSFUL)
1027    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
1028}
1029
1030static bool
1031rtems_bdbuf_has_buffer_waiters (void)
1032{
1033  return bdbuf_cache.buffer_waiters.count;
1034}
1035
1036static void
1037rtems_bdbuf_remove_from_tree (rtems_bdbuf_buffer *bd)
1038{
1039  if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1040    rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_TREE_RM);
1041}
1042
1043static void
1044rtems_bdbuf_remove_from_tree_and_lru_list (rtems_bdbuf_buffer *bd)
1045{
1046  switch (bd->state)
1047  {
1048    case RTEMS_BDBUF_STATE_FREE:
1049      break;
1050    case RTEMS_BDBUF_STATE_CACHED:
1051      rtems_bdbuf_remove_from_tree (bd);
1052      break;
1053    default:
1054      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_10);
1055  }
1056
1057  rtems_chain_extract (&bd->link);
1058}
1059
1060static void
1061rtems_bdbuf_make_free_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1062{
1063  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_FREE);
1064  rtems_chain_prepend (&bdbuf_cache.lru, &bd->link);
1065}
1066
1067static void
1068rtems_bdbuf_make_empty (rtems_bdbuf_buffer *bd)
1069{
1070  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY);
1071}
1072
1073static void
1074rtems_bdbuf_make_cached_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1075{
1076  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
1077  rtems_chain_append (&bdbuf_cache.lru, &bd->link);
1078}
1079
1080static void
1081rtems_bdbuf_discard_buffer (rtems_bdbuf_buffer *bd)
1082{
1083  rtems_bdbuf_make_empty (bd);
1084
1085  if (bd->waiters == 0)
1086  {
1087    rtems_bdbuf_remove_from_tree (bd);
1088    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1089  }
1090}
1091
1092static void
1093rtems_bdbuf_add_to_modified_list_after_access (rtems_bdbuf_buffer *bd)
1094{
1095  if (bdbuf_cache.sync_active && bdbuf_cache.sync_device == bd->dev)
1096  {
1097    rtems_bdbuf_unlock_cache ();
1098
1099    /*
1100     * Wait for the sync lock.
1101     */
1102    rtems_bdbuf_lock_sync ();
1103
1104    rtems_bdbuf_unlock_sync ();
1105    rtems_bdbuf_lock_cache ();
1106  }
1107
1108  /*
1109   * Only the first modified release sets the timer and any further user
1110   * accesses do not change the timer value which should move down. This
1111   * assumes the user's hold of the buffer is much less than the time on the
1112   * modified list. Resetting the timer on each access which could result in a
1113   * buffer never getting to 0 and never being forced onto disk. This raises a
1114   * difficult question. Is a snapshot of a block that is changing better than
1115   * nothing being written? We have tended to think we should hold changes for
1116   * only a specific period of time even if still changing and get onto disk
1117   * and letting the file system try and recover this position if it can.
1118   */
1119  if (bd->state == RTEMS_BDBUF_STATE_ACCESS_CACHED
1120        || bd->state == RTEMS_BDBUF_STATE_ACCESS_EMPTY)
1121    bd->hold_timer = bdbuf_config.swap_block_hold;
1122
1123  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_MODIFIED);
1124  rtems_chain_append (&bdbuf_cache.modified, &bd->link);
1125
1126  if (bd->waiters)
1127    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1128  else if (rtems_bdbuf_has_buffer_waiters ())
1129    rtems_bdbuf_wake_swapper ();
1130}
1131
1132static void
1133rtems_bdbuf_add_to_lru_list_after_access (rtems_bdbuf_buffer *bd)
1134{
1135  rtems_bdbuf_group_release (bd);
1136  rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1137
1138  if (bd->waiters)
1139    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1140  else
1141    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1142}
1143
1144/**
1145 * Compute the number of BDs per group for a given buffer size.
1146 *
1147 * @param size The buffer size. It can be any size and we scale up.
1148 */
1149static size_t
1150rtems_bdbuf_bds_per_group (size_t size)
1151{
1152  size_t bufs_per_size;
1153  size_t bds_per_size;
1154
1155  if (size > bdbuf_config.buffer_max)
1156    return 0;
1157
1158  bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1;
1159
1160  for (bds_per_size = 1;
1161       bds_per_size < bufs_per_size;
1162       bds_per_size <<= 1)
1163    ;
1164
1165  return bdbuf_cache.max_bds_per_group / bds_per_size;
1166}
1167
1168static void
1169rtems_bdbuf_discard_buffer_after_access (rtems_bdbuf_buffer *bd)
1170{
1171  rtems_bdbuf_group_release (bd);
1172  rtems_bdbuf_discard_buffer (bd);
1173
1174  if (bd->waiters)
1175    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1176  else
1177    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1178}
1179
1180/**
1181 * Reallocate a group. The BDs currently allocated in the group are removed
1182 * from the ALV tree and any lists then the new BD's are prepended to the ready
1183 * list of the cache.
1184 *
1185 * @param group The group to reallocate.
1186 * @param new_bds_per_group The new count of BDs per group.
1187 * @return A buffer of this group.
1188 */
1189static rtems_bdbuf_buffer *
1190rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
1191{
1192  rtems_bdbuf_buffer* bd;
1193  size_t              b;
1194  size_t              bufs_per_bd;
1195
1196  if (rtems_bdbuf_tracer)
1197    printf ("bdbuf:realloc: %tu: %zd -> %zd\n",
1198            group - bdbuf_cache.groups, group->bds_per_group,
1199            new_bds_per_group);
1200
1201  bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group;
1202
1203  for (b = 0, bd = group->bdbuf;
1204       b < group->bds_per_group;
1205       b++, bd += bufs_per_bd)
1206    rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1207
1208  group->bds_per_group = new_bds_per_group;
1209  bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
1210
1211  for (b = 1, bd = group->bdbuf + bufs_per_bd;
1212       b < group->bds_per_group;
1213       b++, bd += bufs_per_bd)
1214    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1215
1216  if (b > 1)
1217    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1218
1219  return group->bdbuf;
1220}
1221
1222static void
1223rtems_bdbuf_setup_empty_buffer (rtems_bdbuf_buffer *bd,
1224                                dev_t               dev,
1225                                rtems_blkdev_bnum   block)
1226{
1227  bd->dev       = dev;
1228  bd->block     = block;
1229  bd->avl.left  = NULL;
1230  bd->avl.right = NULL;
1231  bd->waiters   = 0;
1232
1233  if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
1234    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE);
1235
1236  rtems_bdbuf_make_empty (bd);
1237}
1238
1239static rtems_bdbuf_buffer *
1240rtems_bdbuf_get_buffer_from_lru_list (dev_t             dev,
1241                                      rtems_blkdev_bnum block,
1242                                      size_t            bds_per_group)
1243{
1244  rtems_chain_node *node = rtems_chain_first (&bdbuf_cache.lru);
1245
1246  while (!rtems_chain_is_tail (&bdbuf_cache.lru, node))
1247  {
1248    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
1249    rtems_bdbuf_buffer *empty_bd = NULL;
1250
1251    if (rtems_bdbuf_tracer)
1252      printf ("bdbuf:next-bd: %tu (%td:%" PRId32 ") %zd -> %zd\n",
1253              bd - bdbuf_cache.bds,
1254              bd->group - bdbuf_cache.groups, bd->group->users,
1255              bd->group->bds_per_group, bds_per_group);
1256
1257    /*
1258     * If nobody waits for this BD, we may recycle it.
1259     */
1260    if (bd->waiters == 0)
1261    {
1262      if (bd->group->bds_per_group == bds_per_group)
1263      {
1264        rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1265
1266        empty_bd = bd;
1267      }
1268      else if (bd->group->users == 0)
1269        empty_bd = rtems_bdbuf_group_realloc (bd->group, bds_per_group);
1270    }
1271
1272    if (empty_bd != NULL)
1273    {
1274      rtems_bdbuf_setup_empty_buffer (empty_bd, dev, block);
1275
1276      return empty_bd;
1277    }
1278
1279    node = rtems_chain_next (node);
1280  }
1281
1282  return NULL;
1283}
1284
1285/**
1286 * Initialise the cache.
1287 *
1288 * @return rtems_status_code The initialisation status.
1289 */
1290rtems_status_code
1291rtems_bdbuf_init (void)
1292{
1293  rtems_bdbuf_group*  group;
1294  rtems_bdbuf_buffer* bd;
1295  uint8_t*            buffer;
1296  size_t              b;
1297  size_t              cache_aligment;
1298  rtems_status_code   sc;
1299  rtems_mode          prev_mode;
1300
1301  if (rtems_bdbuf_tracer)
1302    printf ("bdbuf:init\n");
1303
1304  if (rtems_interrupt_is_in_progress())
1305    return RTEMS_CALLED_FROM_ISR;
1306
1307  /*
1308   * Check the configuration table values.
1309   */
1310  if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0)
1311    return RTEMS_INVALID_NUMBER;
1312
1313  /*
1314   * We use a special variable to manage the initialisation incase we have
1315   * completing threads doing this. You may get errors if the another thread
1316   * makes a call and we have not finished initialisation.
1317   */
1318  prev_mode = rtems_bdbuf_disable_preemption ();
1319  if (bdbuf_cache.initialised)
1320  {
1321    rtems_bdbuf_restore_preemption (prev_mode);
1322
1323    return RTEMS_RESOURCE_IN_USE;
1324  }
1325  memset(&bdbuf_cache, 0, sizeof(bdbuf_cache));
1326  bdbuf_cache.initialised = true;
1327  rtems_bdbuf_restore_preemption (prev_mode);
1328
1329  /*
1330   * For unspecified cache alignments we use the CPU alignment.
1331   */
1332  cache_aligment = 32; /* FIXME rtems_cache_get_data_line_size() */
1333  if (cache_aligment <= 0)
1334    cache_aligment = CPU_ALIGNMENT;
1335
1336  bdbuf_cache.sync_device = BDBUF_INVALID_DEV;
1337
1338  rtems_chain_initialize_empty (&bdbuf_cache.swapout_workers);
1339  rtems_chain_initialize_empty (&bdbuf_cache.lru);
1340  rtems_chain_initialize_empty (&bdbuf_cache.modified);
1341  rtems_chain_initialize_empty (&bdbuf_cache.sync);
1342
1343  /*
1344   * Create the locks for the cache.
1345   */
1346  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'l'),
1347                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1348                               &bdbuf_cache.lock);
1349  if (sc != RTEMS_SUCCESSFUL)
1350    goto error;
1351
1352  rtems_bdbuf_lock_cache ();
1353
1354  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 's'),
1355                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1356                               &bdbuf_cache.sync_lock);
1357  if (sc != RTEMS_SUCCESSFUL)
1358    goto error;
1359
1360  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'a'),
1361                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1362                               &bdbuf_cache.access_waiters.sema);
1363  if (sc != RTEMS_SUCCESSFUL)
1364    goto error;
1365
1366  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 't'),
1367                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1368                               &bdbuf_cache.transfer_waiters.sema);
1369  if (sc != RTEMS_SUCCESSFUL)
1370    goto error;
1371
1372  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'b'),
1373                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1374                               &bdbuf_cache.buffer_waiters.sema);
1375  if (sc != RTEMS_SUCCESSFUL)
1376    goto error;
1377
1378  /*
1379   * Compute the various number of elements in the cache.
1380   */
1381  bdbuf_cache.buffer_min_count =
1382    bdbuf_config.size / bdbuf_config.buffer_min;
1383  bdbuf_cache.max_bds_per_group =
1384    bdbuf_config.buffer_max / bdbuf_config.buffer_min;
1385  bdbuf_cache.group_count =
1386    bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group;
1387
1388  /*
1389   * Allocate the memory for the buffer descriptors.
1390   */
1391  bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer),
1392                            bdbuf_cache.buffer_min_count);
1393  if (!bdbuf_cache.bds)
1394    goto error;
1395
1396  /*
1397   * Allocate the memory for the buffer descriptors.
1398   */
1399  bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group),
1400                               bdbuf_cache.group_count);
1401  if (!bdbuf_cache.groups)
1402    goto error;
1403
1404  /*
1405   * Allocate memory for buffer memory. The buffer memory will be cache
1406   * aligned. It is possible to free the memory allocated by rtems_memalign()
1407   * with free(). Return 0 if allocated.
1408   *
1409   * The memory allocate allows a
1410   */
1411  if (rtems_memalign ((void **) &bdbuf_cache.buffers,
1412                      cache_aligment,
1413                      bdbuf_cache.buffer_min_count * bdbuf_config.buffer_min) != 0)
1414    goto error;
1415
1416  /*
1417   * The cache is empty after opening so we need to add all the buffers to it
1418   * and initialise the groups.
1419   */
1420  for (b = 0, group = bdbuf_cache.groups,
1421         bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers;
1422       b < bdbuf_cache.buffer_min_count;
1423       b++, bd++, buffer += bdbuf_config.buffer_min)
1424  {
1425    bd->dev    = BDBUF_INVALID_DEV;
1426    bd->group  = group;
1427    bd->buffer = buffer;
1428
1429    rtems_chain_append (&bdbuf_cache.lru, &bd->link);
1430
1431    if ((b % bdbuf_cache.max_bds_per_group) ==
1432        (bdbuf_cache.max_bds_per_group - 1))
1433      group++;
1434  }
1435
1436  for (b = 0,
1437         group = bdbuf_cache.groups,
1438         bd = bdbuf_cache.bds;
1439       b < bdbuf_cache.group_count;
1440       b++,
1441         group++,
1442         bd += bdbuf_cache.max_bds_per_group)
1443  {
1444    group->bds_per_group = bdbuf_cache.max_bds_per_group;
1445    group->bdbuf = bd;
1446  }
1447
1448  /*
1449   * Create and start swapout task. This task will create and manage the worker
1450   * threads.
1451   */
1452  bdbuf_cache.swapout_enabled = true;
1453
1454  sc = rtems_task_create (rtems_build_name('B', 'S', 'W', 'P'),
1455                          bdbuf_config.swapout_priority ?
1456                            bdbuf_config.swapout_priority :
1457                            RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT,
1458                          SWAPOUT_TASK_STACK_SIZE,
1459                          RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
1460                          RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
1461                          &bdbuf_cache.swapout);
1462  if (sc != RTEMS_SUCCESSFUL)
1463    goto error;
1464
1465  sc = rtems_task_start (bdbuf_cache.swapout,
1466                         rtems_bdbuf_swapout_task,
1467                         (rtems_task_argument) &bdbuf_cache);
1468  if (sc != RTEMS_SUCCESSFUL)
1469    goto error;
1470
1471  rtems_bdbuf_unlock_cache ();
1472
1473  return RTEMS_SUCCESSFUL;
1474
1475error:
1476
1477  if (bdbuf_cache.swapout != 0)
1478    rtems_task_delete (bdbuf_cache.swapout);
1479
1480  free (bdbuf_cache.buffers);
1481  free (bdbuf_cache.groups);
1482  free (bdbuf_cache.bds);
1483
1484  rtems_semaphore_delete (bdbuf_cache.buffer_waiters.sema);
1485  rtems_semaphore_delete (bdbuf_cache.access_waiters.sema);
1486  rtems_semaphore_delete (bdbuf_cache.transfer_waiters.sema);
1487  rtems_semaphore_delete (bdbuf_cache.sync_lock);
1488
1489  if (bdbuf_cache.lock != 0)
1490  {
1491    rtems_bdbuf_unlock_cache ();
1492    rtems_semaphore_delete (bdbuf_cache.lock);
1493  }
1494
1495  bdbuf_cache.initialised = false;
1496
1497  return RTEMS_UNSATISFIED;
1498}
1499
1500static void
1501rtems_bdbuf_wait_for_event (rtems_event_set event)
1502{
1503  rtems_status_code sc = RTEMS_SUCCESSFUL;
1504  rtems_event_set   out = 0;
1505
1506  sc = rtems_event_receive (event,
1507                            RTEMS_EVENT_ALL | RTEMS_WAIT,
1508                            RTEMS_NO_TIMEOUT,
1509                            &out);
1510
1511  if (sc != RTEMS_SUCCESSFUL || out != event)
1512    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT);
1513}
1514
1515static void
1516rtems_bdbuf_wait_for_access (rtems_bdbuf_buffer *bd)
1517{
1518  while (true)
1519  {
1520    switch (bd->state)
1521    {
1522      case RTEMS_BDBUF_STATE_MODIFIED:
1523        rtems_bdbuf_group_release (bd);
1524        /* Fall through */
1525      case RTEMS_BDBUF_STATE_CACHED:
1526        rtems_chain_extract (&bd->link);
1527        /* Fall through */
1528      case RTEMS_BDBUF_STATE_EMPTY:
1529        return;
1530      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1531      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1532      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1533      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1534        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1535        break;
1536      case RTEMS_BDBUF_STATE_SYNC:
1537      case RTEMS_BDBUF_STATE_TRANSFER:
1538      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1539        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1540        break;
1541      default:
1542        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_7);
1543    }
1544  }
1545}
1546
1547static void
1548rtems_bdbuf_request_sync_for_modified_buffer (rtems_bdbuf_buffer *bd)
1549{
1550  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1551  rtems_chain_extract (&bd->link);
1552  rtems_chain_append (&bdbuf_cache.sync, &bd->link);
1553  rtems_bdbuf_wake_swapper ();
1554}
1555
1556/**
1557 * @brief Waits until the buffer is ready for recycling.
1558 *
1559 * @retval @c true Buffer is valid and may be recycled.
1560 * @retval @c false Buffer is invalid and has to searched again.
1561 */
1562static bool
1563rtems_bdbuf_wait_for_recycle (rtems_bdbuf_buffer *bd)
1564{
1565  while (true)
1566  {
1567    switch (bd->state)
1568    {
1569      case RTEMS_BDBUF_STATE_FREE:
1570        return true;
1571      case RTEMS_BDBUF_STATE_MODIFIED:
1572        rtems_bdbuf_request_sync_for_modified_buffer (bd);
1573        break;
1574      case RTEMS_BDBUF_STATE_CACHED:
1575      case RTEMS_BDBUF_STATE_EMPTY:
1576        if (bd->waiters == 0)
1577          return true;
1578        else
1579        {
1580          /*
1581           * It is essential that we wait here without a special wait count and
1582           * without the group in use.  Otherwise we could trigger a wait ping
1583           * pong with another recycle waiter.  The state of the buffer is
1584           * arbitrary afterwards.
1585           */
1586          rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1587          return false;
1588        }
1589      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1590      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1591      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1592      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1593        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1594        break;
1595      case RTEMS_BDBUF_STATE_SYNC:
1596      case RTEMS_BDBUF_STATE_TRANSFER:
1597      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1598        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1599        break;
1600      default:
1601        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_8);
1602    }
1603  }
1604}
1605
1606static void
1607rtems_bdbuf_wait_for_sync_done (rtems_bdbuf_buffer *bd)
1608{
1609  while (true)
1610  {
1611    switch (bd->state)
1612    {
1613      case RTEMS_BDBUF_STATE_CACHED:
1614      case RTEMS_BDBUF_STATE_EMPTY:
1615      case RTEMS_BDBUF_STATE_MODIFIED:
1616      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1617      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1618      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1619      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1620        return;
1621      case RTEMS_BDBUF_STATE_SYNC:
1622      case RTEMS_BDBUF_STATE_TRANSFER:
1623      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1624        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1625        break;
1626      default:
1627        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_9);
1628    }
1629  }
1630}
1631
1632static void
1633rtems_bdbuf_wait_for_buffer (void)
1634{
1635  if (!rtems_chain_is_empty (&bdbuf_cache.modified))
1636    rtems_bdbuf_wake_swapper ();
1637
1638  rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1639}
1640
1641static void
1642rtems_bdbuf_sync_after_access (rtems_bdbuf_buffer *bd)
1643{
1644  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1645
1646  rtems_chain_append (&bdbuf_cache.sync, &bd->link);
1647
1648  if (bd->waiters)
1649    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1650
1651  rtems_bdbuf_wake_swapper ();
1652  rtems_bdbuf_wait_for_sync_done (bd);
1653
1654  /*
1655   * We may have created a cached or empty buffer which may be recycled.
1656   */
1657  if (bd->waiters == 0
1658        && (bd->state == RTEMS_BDBUF_STATE_CACHED
1659          || bd->state == RTEMS_BDBUF_STATE_EMPTY))
1660  {
1661    if (bd->state == RTEMS_BDBUF_STATE_EMPTY)
1662    {
1663      rtems_bdbuf_remove_from_tree (bd);
1664      rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1665    }
1666    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1667  }
1668}
1669
1670static rtems_bdbuf_buffer *
1671rtems_bdbuf_get_buffer_for_read_ahead (dev_t             dev,
1672                                       rtems_blkdev_bnum block,
1673                                       size_t            bds_per_group)
1674{
1675  rtems_bdbuf_buffer *bd = NULL;
1676
1677  bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dev, block);
1678
1679  if (bd == NULL)
1680  {
1681    bd = rtems_bdbuf_get_buffer_from_lru_list (dev, block, bds_per_group);
1682
1683    if (bd != NULL)
1684      rtems_bdbuf_group_obtain (bd);
1685  }
1686  else
1687    /*
1688     * The buffer is in the cache.  So it is already available or in use, and
1689     * thus no need for a read ahead.
1690     */
1691    bd = NULL;
1692
1693  return bd;
1694}
1695
1696static rtems_bdbuf_buffer *
1697rtems_bdbuf_get_buffer_for_access (dev_t             dev,
1698                                   rtems_blkdev_bnum block,
1699                                   size_t            bds_per_group)
1700{
1701  rtems_bdbuf_buffer *bd = NULL;
1702
1703  do
1704  {
1705    bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dev, block);
1706
1707    if (bd != NULL)
1708    {
1709      if (bd->group->bds_per_group != bds_per_group)
1710      {
1711        if (rtems_bdbuf_wait_for_recycle (bd))
1712        {
1713          rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1714          rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1715          rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1716        }
1717        bd = NULL;
1718      }
1719    }
1720    else
1721    {
1722      bd = rtems_bdbuf_get_buffer_from_lru_list (dev, block, bds_per_group);
1723
1724      if (bd == NULL)
1725        rtems_bdbuf_wait_for_buffer ();
1726    }
1727  }
1728  while (bd == NULL);
1729
1730  rtems_bdbuf_wait_for_access (bd);
1731  rtems_bdbuf_group_obtain (bd);
1732
1733  return bd;
1734}
1735
1736static rtems_status_code
1737rtems_bdbuf_obtain_disk (dev_t               dev,
1738                         rtems_blkdev_bnum   block,
1739                         rtems_disk_device **dd_ptr,
1740                         rtems_blkdev_bnum  *media_block_ptr,
1741                         size_t             *bds_per_group_ptr)
1742{
1743  rtems_disk_device *dd = NULL;
1744
1745  if (!bdbuf_cache.initialised)
1746    return RTEMS_NOT_CONFIGURED;
1747
1748  /*
1749   * Do not hold the cache lock when obtaining the disk table.
1750   */
1751  dd = rtems_disk_obtain (dev);
1752  if (dd == NULL)
1753    return RTEMS_INVALID_ID;
1754
1755  *dd_ptr = dd;
1756
1757  if (media_block_ptr != NULL)
1758  {
1759    /*
1760     * Compute the media block number. Drivers work with media block number not
1761     * the block number a BD may have as this depends on the block size set by
1762     * the user.
1763     */
1764    rtems_blkdev_bnum mb = rtems_bdbuf_media_block (block,
1765                                                    dd->block_size,
1766                                                    dd->media_block_size);
1767    if (mb >= dd->size)
1768    {
1769      rtems_disk_release(dd);
1770      return RTEMS_INVALID_NUMBER;
1771    }
1772
1773    *media_block_ptr = mb + dd->start;
1774  }
1775
1776  if (bds_per_group_ptr != NULL)
1777  {
1778    size_t bds_per_group = rtems_bdbuf_bds_per_group (dd->block_size);
1779
1780    if (bds_per_group == 0)
1781    {
1782      rtems_disk_release (dd);
1783      return RTEMS_INVALID_NUMBER;
1784    }
1785
1786    *bds_per_group_ptr = bds_per_group;
1787  }
1788
1789  return RTEMS_SUCCESSFUL;
1790}
1791
1792static void
1793rtems_bdbuf_release_disk (rtems_disk_device *dd)
1794{
1795  rtems_status_code sc = RTEMS_SUCCESSFUL;
1796
1797  sc = rtems_disk_release (dd);
1798  if (sc != RTEMS_SUCCESSFUL)
1799    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_DISK_REL);
1800}
1801
1802rtems_status_code
1803rtems_bdbuf_get (dev_t                dev,
1804                 rtems_blkdev_bnum    block,
1805                 rtems_bdbuf_buffer **bd_ptr)
1806{
1807  rtems_status_code   sc = RTEMS_SUCCESSFUL;
1808  rtems_disk_device  *dd = NULL;
1809  rtems_bdbuf_buffer *bd = NULL;
1810  rtems_blkdev_bnum   media_block = 0;
1811  size_t              bds_per_group = 0;
1812
1813  sc = rtems_bdbuf_obtain_disk (dev, block, &dd, &media_block, &bds_per_group);
1814  if (sc != RTEMS_SUCCESSFUL)
1815    return sc;
1816
1817  rtems_bdbuf_lock_cache ();
1818
1819  /*
1820   * Print the block index relative to the physical disk.
1821   */
1822  if (rtems_bdbuf_tracer)
1823    printf ("bdbuf:get: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
1824            media_block, block, (unsigned) dev);
1825
1826  bd = rtems_bdbuf_get_buffer_for_access (dev, media_block, bds_per_group);
1827
1828  switch (bd->state)
1829  {
1830    case RTEMS_BDBUF_STATE_CACHED:
1831      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
1832      break;
1833    case RTEMS_BDBUF_STATE_EMPTY:
1834      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_EMPTY);
1835      break;
1836    case RTEMS_BDBUF_STATE_MODIFIED:
1837      /*
1838       * To get a modified buffer could be considered a bug in the caller
1839       * because you should not be getting an already modified buffer but user
1840       * may have modified a byte in a block then decided to seek the start and
1841       * write the whole block and the file system will have no record of this
1842       * so just gets the block to fill.
1843       */
1844      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
1845      break;
1846    default:
1847      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_2);
1848      break;
1849  }
1850
1851  if (rtems_bdbuf_tracer)
1852  {
1853    rtems_bdbuf_show_users ("get", bd);
1854    rtems_bdbuf_show_usage ();
1855  }
1856
1857  rtems_bdbuf_unlock_cache ();
1858
1859  rtems_bdbuf_release_disk (dd);
1860
1861  *bd_ptr = bd;
1862
1863  return RTEMS_SUCCESSFUL;
1864}
1865
1866/**
1867 * Call back handler called by the low level driver when the transfer has
1868 * completed. This function may be invoked from interrupt handler.
1869 *
1870 * @param arg Arbitrary argument specified in block device request
1871 *            structure (in this case - pointer to the appropriate
1872 *            block device request structure).
1873 * @param status I/O completion status
1874 */
1875static void
1876rtems_bdbuf_transfer_done (void* arg, rtems_status_code status)
1877{
1878  rtems_blkdev_request* req = (rtems_blkdev_request*) arg;
1879
1880  req->status = status;
1881
1882  rtems_event_send (req->io_task, RTEMS_BDBUF_TRANSFER_SYNC);
1883}
1884
1885static void
1886rtems_bdbuf_create_read_request (const rtems_disk_device *dd,
1887                                 rtems_blkdev_bnum        media_block,
1888                                 size_t                   bds_per_group,
1889                                 rtems_blkdev_request    *req,
1890                                 rtems_bdbuf_buffer     **bd_ptr)
1891{
1892  rtems_bdbuf_buffer *bd = NULL;
1893  rtems_blkdev_bnum   media_block_end = dd->start + dd->size;
1894  rtems_blkdev_bnum   media_block_count = dd->block_size / dd->media_block_size;
1895  dev_t               dev = dd->dev;
1896  uint32_t            block_size = dd->block_size;
1897  uint32_t            transfer_index = 1;
1898  uint32_t            transfer_count = bdbuf_config.max_read_ahead_blocks + 1;
1899
1900  if (media_block_end - media_block < transfer_count)
1901    transfer_count = media_block_end - media_block;
1902
1903  req->req = RTEMS_BLKDEV_REQ_READ;
1904  req->req_done = rtems_bdbuf_transfer_done;
1905  req->done_arg = req;
1906  req->io_task = rtems_task_self ();
1907  req->status = RTEMS_RESOURCE_IN_USE;
1908  req->bufnum = 0;
1909
1910  bd = rtems_bdbuf_get_buffer_for_access (dev, media_block, bds_per_group);
1911
1912  *bd_ptr = bd;
1913
1914  req->bufs [0].user   = bd;
1915  req->bufs [0].block  = media_block;
1916  req->bufs [0].length = block_size;
1917  req->bufs [0].buffer = bd->buffer;
1918
1919  if (rtems_bdbuf_tracer)
1920    rtems_bdbuf_show_users ("read", bd);
1921
1922  switch (bd->state)
1923  {
1924    case RTEMS_BDBUF_STATE_CACHED:
1925    case RTEMS_BDBUF_STATE_MODIFIED:
1926      return;
1927    case RTEMS_BDBUF_STATE_EMPTY:
1928      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1929      break;
1930    default:
1931      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_1);
1932      break;
1933  }
1934
1935  while (transfer_index < transfer_count)
1936  {
1937    media_block += media_block_count;
1938
1939    bd = rtems_bdbuf_get_buffer_for_read_ahead (dev, media_block,
1940                                                bds_per_group);
1941
1942    if (bd == NULL)
1943      break;
1944
1945    rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1946
1947    req->bufs [transfer_index].user   = bd;
1948    req->bufs [transfer_index].block  = media_block;
1949    req->bufs [transfer_index].length = block_size;
1950    req->bufs [transfer_index].buffer = bd->buffer;
1951
1952    if (rtems_bdbuf_tracer)
1953      rtems_bdbuf_show_users ("read-ahead", bd);
1954
1955    ++transfer_index;
1956  }
1957
1958  req->bufnum = transfer_index;
1959}
1960
1961static rtems_status_code
1962rtems_bdbuf_execute_transfer_request (const rtems_disk_device *dd,
1963                                      rtems_blkdev_request    *req,
1964                                      bool                     cache_locked)
1965{
1966  rtems_status_code sc = RTEMS_SUCCESSFUL;
1967  int result = 0;
1968  uint32_t transfer_index = 0;
1969  bool wake_transfer_waiters = false;
1970  bool wake_buffer_waiters = false;
1971
1972  if (cache_locked)
1973    rtems_bdbuf_unlock_cache ();
1974
1975  result = dd->ioctl (dd->phys_dev, RTEMS_BLKIO_REQUEST, req);
1976
1977  if (result == 0)
1978  {
1979    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
1980    sc = req->status;
1981  }
1982  else
1983    sc = RTEMS_IO_ERROR;
1984
1985  rtems_bdbuf_lock_cache ();
1986
1987  for (transfer_index = 0; transfer_index < req->bufnum; ++transfer_index)
1988  {
1989    rtems_bdbuf_buffer *bd = req->bufs [transfer_index].user;
1990    bool waiters = bd->waiters;
1991
1992    if (waiters)
1993      wake_transfer_waiters = true;
1994    else
1995      wake_buffer_waiters = true;
1996
1997    rtems_bdbuf_group_release (bd);
1998
1999    if (sc == RTEMS_SUCCESSFUL && bd->state == RTEMS_BDBUF_STATE_TRANSFER)
2000      rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
2001    else
2002      rtems_bdbuf_discard_buffer (bd);
2003
2004    if (rtems_bdbuf_tracer)
2005      rtems_bdbuf_show_users ("transfer", bd);
2006  }
2007
2008  if (wake_transfer_waiters)
2009    rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2010
2011  if (wake_buffer_waiters)
2012    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2013
2014  if (!cache_locked)
2015    rtems_bdbuf_unlock_cache ();
2016
2017  if (sc == RTEMS_SUCCESSFUL || sc == RTEMS_UNSATISFIED)
2018    return sc;
2019  else
2020    return RTEMS_IO_ERROR;
2021}
2022
2023rtems_status_code
2024rtems_bdbuf_read (dev_t                dev,
2025                  rtems_blkdev_bnum    block,
2026                  rtems_bdbuf_buffer **bd_ptr)
2027{
2028  rtems_status_code     sc = RTEMS_SUCCESSFUL;
2029  rtems_disk_device    *dd = NULL;
2030  rtems_blkdev_request *req = NULL;
2031  rtems_bdbuf_buffer   *bd = NULL;
2032  rtems_blkdev_bnum     media_block = 0;
2033  size_t                bds_per_group = 0;
2034
2035  sc = rtems_bdbuf_obtain_disk (dev, block, &dd, &media_block, &bds_per_group);
2036  if (sc != RTEMS_SUCCESSFUL)
2037    return sc;
2038
2039  /*
2040   * TODO: This type of request structure is wrong and should be removed.
2041   */
2042#define bdbuf_alloc(size) __builtin_alloca (size)
2043
2044  req = bdbuf_alloc (sizeof (rtems_blkdev_request) +
2045                     sizeof ( rtems_blkdev_sg_buffer) *
2046                      (bdbuf_config.max_read_ahead_blocks + 1));
2047
2048  if (rtems_bdbuf_tracer)
2049    printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
2050            media_block + dd->start, block, (unsigned) dev);
2051
2052  rtems_bdbuf_lock_cache ();
2053  rtems_bdbuf_create_read_request (dd, media_block, bds_per_group, req, &bd);
2054
2055  if (req->bufnum > 0)
2056  {
2057    sc = rtems_bdbuf_execute_transfer_request (dd, req, true);
2058    if (sc == RTEMS_SUCCESSFUL)
2059    {
2060      rtems_chain_extract (&bd->link);
2061      rtems_bdbuf_group_obtain (bd);
2062    }
2063  }
2064
2065  if (sc == RTEMS_SUCCESSFUL)
2066  {
2067    switch (bd->state)
2068    {
2069      case RTEMS_BDBUF_STATE_CACHED:
2070        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2071        break;
2072      case RTEMS_BDBUF_STATE_MODIFIED:
2073        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
2074        break;
2075      default:
2076        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_4);
2077        break;
2078    }
2079
2080    if (rtems_bdbuf_tracer)
2081    {
2082      rtems_bdbuf_show_users ("read", bd);
2083      rtems_bdbuf_show_usage ();
2084    }
2085
2086    *bd_ptr = bd;
2087  }
2088  else
2089    *bd_ptr = NULL;
2090
2091  rtems_bdbuf_unlock_cache ();
2092  rtems_bdbuf_release_disk (dd);
2093
2094  return sc;
2095}
2096
2097static rtems_status_code
2098rtems_bdbuf_check_bd_and_lock_cache (rtems_bdbuf_buffer *bd, const char *kind)
2099{
2100  if (!bdbuf_cache.initialised)
2101    return RTEMS_NOT_CONFIGURED;
2102  if (bd == NULL)
2103    return RTEMS_INVALID_ADDRESS;
2104  if (rtems_bdbuf_tracer)
2105  {
2106    printf ("bdbuf:%s: %" PRIu32 "\n", kind, bd->block);
2107    rtems_bdbuf_show_users (kind, bd);
2108  }
2109  rtems_bdbuf_lock_cache();
2110
2111  return RTEMS_SUCCESSFUL;
2112}
2113
2114rtems_status_code
2115rtems_bdbuf_release (rtems_bdbuf_buffer *bd)
2116{
2117  rtems_status_code sc = RTEMS_SUCCESSFUL;
2118
2119  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release");
2120  if (sc != RTEMS_SUCCESSFUL)
2121    return sc;
2122
2123  switch (bd->state)
2124  {
2125    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2126      rtems_bdbuf_add_to_lru_list_after_access (bd);
2127      break;
2128    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2129    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2130      rtems_bdbuf_discard_buffer_after_access (bd);
2131      break;
2132    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2133      rtems_bdbuf_add_to_modified_list_after_access (bd);
2134      break;
2135    default:
2136      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_0);
2137      break;
2138  }
2139
2140  if (rtems_bdbuf_tracer)
2141    rtems_bdbuf_show_usage ();
2142
2143  rtems_bdbuf_unlock_cache ();
2144
2145  return RTEMS_SUCCESSFUL;
2146}
2147
2148rtems_status_code
2149rtems_bdbuf_release_modified (rtems_bdbuf_buffer *bd)
2150{
2151  rtems_status_code sc = RTEMS_SUCCESSFUL;
2152
2153  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release modified");
2154  if (sc != RTEMS_SUCCESSFUL)
2155    return sc;
2156
2157  switch (bd->state)
2158  {
2159    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2160    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2161    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2162      rtems_bdbuf_add_to_modified_list_after_access (bd);
2163      break;
2164    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2165      rtems_bdbuf_discard_buffer_after_access (bd);
2166      break;
2167    default:
2168      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_6);
2169      break;
2170  }
2171
2172  if (rtems_bdbuf_tracer)
2173    rtems_bdbuf_show_usage ();
2174
2175  rtems_bdbuf_unlock_cache ();
2176
2177  return RTEMS_SUCCESSFUL;
2178}
2179
2180rtems_status_code
2181rtems_bdbuf_sync (rtems_bdbuf_buffer *bd)
2182{
2183  rtems_status_code sc = RTEMS_SUCCESSFUL;
2184
2185  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "sync");
2186  if (sc != RTEMS_SUCCESSFUL)
2187    return sc;
2188
2189  switch (bd->state)
2190  {
2191    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2192    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2193    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2194      rtems_bdbuf_sync_after_access (bd);
2195      break;
2196    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2197      rtems_bdbuf_discard_buffer_after_access (bd);
2198      break;
2199    default:
2200      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_5);
2201      break;
2202  }
2203
2204  if (rtems_bdbuf_tracer)
2205    rtems_bdbuf_show_usage ();
2206
2207  rtems_bdbuf_unlock_cache ();
2208
2209  return RTEMS_SUCCESSFUL;
2210}
2211
2212rtems_status_code
2213rtems_bdbuf_syncdev (dev_t dev)
2214{
2215  rtems_status_code  sc = RTEMS_SUCCESSFUL;
2216  rtems_disk_device *dd = NULL;
2217
2218  if (rtems_bdbuf_tracer)
2219    printf ("bdbuf:syncdev: %08x\n", (unsigned) dev);
2220
2221  sc = rtems_bdbuf_obtain_disk (dev, 0, &dd, NULL, NULL);
2222  if (sc != RTEMS_SUCCESSFUL)
2223    return sc;
2224
2225  /*
2226   * Take the sync lock before locking the cache. Once we have the sync lock we
2227   * can lock the cache. If another thread has the sync lock it will cause this
2228   * thread to block until it owns the sync lock then it can own the cache. The
2229   * sync lock can only be obtained with the cache unlocked.
2230   */
2231  rtems_bdbuf_lock_sync ();
2232  rtems_bdbuf_lock_cache ();
2233
2234  /*
2235   * Set the cache to have a sync active for a specific device and let the swap
2236   * out task know the id of the requester to wake when done.
2237   *
2238   * The swap out task will negate the sync active flag when no more buffers
2239   * for the device are held on the "modified for sync" queues.
2240   */
2241  bdbuf_cache.sync_active    = true;
2242  bdbuf_cache.sync_requester = rtems_task_self ();
2243  bdbuf_cache.sync_device    = dev;
2244
2245  rtems_bdbuf_wake_swapper ();
2246  rtems_bdbuf_unlock_cache ();
2247  rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
2248  rtems_bdbuf_unlock_sync ();
2249  rtems_bdbuf_release_disk (dd);
2250
2251  return RTEMS_SUCCESSFUL;
2252}
2253
2254static int
2255rtems_bdbuf_null_disk_ioctl (rtems_disk_device *dd, uint32_t req, void *arg)
2256{
2257  return -1;
2258}
2259
2260/**
2261 * Swapout transfer to the driver. The driver will break this I/O into groups
2262 * of consecutive write requests is multiple consecutive buffers are required
2263 * by the driver.
2264 *
2265 * @param transfer The transfer transaction.
2266 */
2267static void
2268rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer)
2269{
2270  rtems_chain_node *node;
2271  static rtems_disk_device null_disk = {
2272    .phys_dev = &null_disk,
2273    .capabilities = 0,
2274    .ioctl = rtems_bdbuf_null_disk_ioctl
2275  };
2276
2277  if (rtems_bdbuf_tracer)
2278    printf ("bdbuf:swapout transfer: %08x\n", (unsigned) transfer->dev);
2279
2280  /*
2281   * If there are buffers to transfer to the media transfer them.
2282   */
2283  if (!rtems_chain_is_empty (&transfer->bds))
2284  {
2285    /*
2286     * The last block number used when the driver only supports
2287     * continuous blocks in a single request.
2288     */
2289    uint32_t last_block = 0;
2290
2291    /*
2292     * Number of buffers per bd. This is used to detect the next
2293     * block.
2294     */
2295    uint32_t bufs_per_bd = 0;
2296
2297    /*
2298     * Obtain the disk device. The cache's mutex has been released to avoid a
2299     * dead lock.
2300     */
2301    rtems_disk_device *dd = rtems_disk_obtain (transfer->dev);
2302
2303    if (dd == NULL)
2304      dd = &null_disk;
2305
2306    bufs_per_bd = dd->block_size / bdbuf_config.buffer_min;
2307
2308    /*
2309     * Take as many buffers as configured and pass to the driver. Note, the
2310     * API to the drivers has an array of buffers and if a chain was passed
2311     * we could have just passed the list. If the driver API is updated it
2312     * should be possible to make this change with little effect in this
2313     * code. The array that is passed is broken in design and should be
2314     * removed. Merging members of a struct into the first member is
2315     * trouble waiting to happen.
2316     */
2317    transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2318    transfer->write_req->bufnum = 0;
2319
2320    while ((node = rtems_chain_get(&transfer->bds)) != NULL)
2321    {
2322      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2323      bool                write = false;
2324
2325      /*
2326       * If the device only accepts sequential buffers and this is not the
2327       * first buffer (the first is always sequential, and the buffer is not
2328       * sequential then put the buffer back on the transfer chain and write
2329       * the committed buffers.
2330       */
2331
2332      if (rtems_bdbuf_tracer)
2333        printf ("bdbuf:swapout write: bd:%" PRIu32 ", bufnum:%" PRIu32 " mode:%s\n",
2334                bd->block, transfer->write_req->bufnum,
2335                dd->phys_dev->capabilities &
2336                RTEMS_BLKDEV_CAP_MULTISECTOR_CONT ? "MULIT" : "SCAT");
2337
2338      if ((dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) &&
2339          transfer->write_req->bufnum &&
2340          (bd->block != (last_block + bufs_per_bd)))
2341      {
2342        rtems_chain_prepend (&transfer->bds, &bd->link);
2343        write = true;
2344      }
2345      else
2346      {
2347        rtems_blkdev_sg_buffer* buf;
2348        buf = &transfer->write_req->bufs[transfer->write_req->bufnum];
2349        transfer->write_req->bufnum++;
2350        buf->user   = bd;
2351        buf->block  = bd->block;
2352        buf->length = dd->block_size;
2353        buf->buffer = bd->buffer;
2354        last_block  = bd->block;
2355      }
2356
2357      /*
2358       * Perform the transfer if there are no more buffers, or the transfer
2359       * size has reached the configured max. value.
2360       */
2361
2362      if (rtems_chain_is_empty (&transfer->bds) ||
2363          (transfer->write_req->bufnum >= bdbuf_config.max_write_blocks))
2364        write = true;
2365
2366      if (write)
2367      {
2368        rtems_bdbuf_execute_transfer_request (dd, transfer->write_req, false);
2369
2370        transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2371        transfer->write_req->bufnum = 0;
2372      }
2373    }
2374
2375    if (dd != &null_disk)
2376      rtems_disk_release (dd);
2377  }
2378}
2379
2380/**
2381 * Process the modified list of buffers. There is a sync or modified list that
2382 * needs to be handled so we have a common function to do the work.
2383 *
2384 * @param dev The device to handle. If BDBUF_INVALID_DEV no device is selected
2385 * so select the device of the first buffer to be written to disk.
2386 * @param chain The modified chain to process.
2387 * @param transfer The chain to append buffers to be written too.
2388 * @param sync_active If true this is a sync operation so expire all timers.
2389 * @param update_timers If true update the timers.
2390 * @param timer_delta It update_timers is true update the timers by this
2391 *                    amount.
2392 */
2393static void
2394rtems_bdbuf_swapout_modified_processing (dev_t*               dev,
2395                                         rtems_chain_control* chain,
2396                                         rtems_chain_control* transfer,
2397                                         bool                 sync_active,
2398                                         bool                 update_timers,
2399                                         uint32_t             timer_delta)
2400{
2401  if (!rtems_chain_is_empty (chain))
2402  {
2403    rtems_chain_node* node = rtems_chain_head (chain);
2404    node = node->next;
2405
2406    while (!rtems_chain_is_tail (chain, node))
2407    {
2408      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2409
2410      /*
2411       * Check if the buffer's hold timer has reached 0. If a sync is active
2412       * or someone waits for a buffer force all the timers to 0.
2413       *
2414       * @note Lots of sync requests will skew this timer. It should be based
2415       *       on TOD to be accurate. Does it matter ?
2416       */
2417      if (sync_active || rtems_bdbuf_has_buffer_waiters ())
2418        bd->hold_timer = 0;
2419
2420      if (bd->hold_timer)
2421      {
2422        if (update_timers)
2423        {
2424          if (bd->hold_timer > timer_delta)
2425            bd->hold_timer -= timer_delta;
2426          else
2427            bd->hold_timer = 0;
2428        }
2429
2430        if (bd->hold_timer)
2431        {
2432          node = node->next;
2433          continue;
2434        }
2435      }
2436
2437      /*
2438       * This assumes we can set dev_t to BDBUF_INVALID_DEV which is just an
2439       * assumption. Cannot use the transfer list being empty the sync dev
2440       * calls sets the dev to use.
2441       */
2442      if (*dev == BDBUF_INVALID_DEV)
2443        *dev = bd->dev;
2444
2445      if (bd->dev == *dev)
2446      {
2447        rtems_chain_node* next_node = node->next;
2448        rtems_chain_node* tnode = rtems_chain_tail (transfer);
2449
2450        /*
2451         * The blocks on the transfer list are sorted in block order. This
2452         * means multi-block transfers for drivers that require consecutive
2453         * blocks perform better with sorted blocks and for real disks it may
2454         * help lower head movement.
2455         */
2456
2457        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2458
2459        rtems_chain_extract (node);
2460
2461        tnode = tnode->previous;
2462
2463        while (node && !rtems_chain_is_head (transfer, tnode))
2464        {
2465          rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
2466
2467          if (bd->block > tbd->block)
2468          {
2469            rtems_chain_insert (tnode, node);
2470            node = NULL;
2471          }
2472          else
2473            tnode = tnode->previous;
2474        }
2475
2476        if (node)
2477          rtems_chain_prepend (transfer, node);
2478
2479        node = next_node;
2480      }
2481      else
2482      {
2483        node = node->next;
2484      }
2485    }
2486  }
2487}
2488
2489/**
2490 * Process the cache's modified buffers. Check the sync list first then the
2491 * modified list extracting the buffers suitable to be written to disk. We have
2492 * a device at a time. The task level loop will repeat this operation while
2493 * there are buffers to be written. If the transfer fails place the buffers
2494 * back on the modified list and try again later. The cache is unlocked while
2495 * the buffers are being written to disk.
2496 *
2497 * @param timer_delta It update_timers is true update the timers by this
2498 *                    amount.
2499 * @param update_timers If true update the timers.
2500 * @param transfer The transfer transaction data.
2501 *
2502 * @retval true Buffers where written to disk so scan again.
2503 * @retval false No buffers where written to disk.
2504 */
2505static bool
2506rtems_bdbuf_swapout_processing (unsigned long                 timer_delta,
2507                                bool                          update_timers,
2508                                rtems_bdbuf_swapout_transfer* transfer)
2509{
2510  rtems_bdbuf_swapout_worker* worker;
2511  bool                        transfered_buffers = false;
2512
2513  rtems_bdbuf_lock_cache ();
2514
2515  /*
2516   * If a sync is active do not use a worker because the current code does not
2517   * cleaning up after. We need to know the buffers have been written when
2518   * syncing to the release sync lock and currently worker threads do not
2519   * return to here. We do not know the worker is the last in a sequence of
2520   * sync writes until after we have it running so we do not know to tell it to
2521   * release the lock. The simplest solution is to get the main swap out task
2522   * perform all sync operations.
2523   */
2524  if (bdbuf_cache.sync_active)
2525    worker = NULL;
2526  else
2527  {
2528    worker = (rtems_bdbuf_swapout_worker*)
2529      rtems_chain_get (&bdbuf_cache.swapout_workers);
2530    if (worker)
2531      transfer = &worker->transfer;
2532  }
2533
2534  rtems_chain_initialize_empty (&transfer->bds);
2535  transfer->dev = BDBUF_INVALID_DEV;
2536
2537  /*
2538   * When the sync is for a device limit the sync to that device. If the sync
2539   * is for a buffer handle process the devices in the order on the sync
2540   * list. This means the dev is BDBUF_INVALID_DEV.
2541   */
2542  if (bdbuf_cache.sync_active)
2543    transfer->dev = bdbuf_cache.sync_device;
2544
2545  /*
2546   * If we have any buffers in the sync queue move them to the modified
2547   * list. The first sync buffer will select the device we use.
2548   */
2549  rtems_bdbuf_swapout_modified_processing (&transfer->dev,
2550                                           &bdbuf_cache.sync,
2551                                           &transfer->bds,
2552                                           true, false,
2553                                           timer_delta);
2554
2555  /*
2556   * Process the cache's modified list.
2557   */
2558  rtems_bdbuf_swapout_modified_processing (&transfer->dev,
2559                                           &bdbuf_cache.modified,
2560                                           &transfer->bds,
2561                                           bdbuf_cache.sync_active,
2562                                           update_timers,
2563                                           timer_delta);
2564
2565  /*
2566   * We have all the buffers that have been modified for this device so the
2567   * cache can be unlocked because the state of each buffer has been set to
2568   * TRANSFER.
2569   */
2570  rtems_bdbuf_unlock_cache ();
2571
2572  /*
2573   * If there are buffers to transfer to the media transfer them.
2574   */
2575  if (!rtems_chain_is_empty (&transfer->bds))
2576  {
2577    if (worker)
2578    {
2579      rtems_status_code sc = rtems_event_send (worker->id,
2580                                               RTEMS_BDBUF_SWAPOUT_SYNC);
2581      if (sc != RTEMS_SUCCESSFUL)
2582        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
2583    }
2584    else
2585    {
2586      rtems_bdbuf_swapout_write (transfer);
2587    }
2588
2589    transfered_buffers = true;
2590  }
2591
2592  if (bdbuf_cache.sync_active && !transfered_buffers)
2593  {
2594    rtems_id sync_requester;
2595    rtems_bdbuf_lock_cache ();
2596    sync_requester = bdbuf_cache.sync_requester;
2597    bdbuf_cache.sync_active = false;
2598    bdbuf_cache.sync_requester = 0;
2599    rtems_bdbuf_unlock_cache ();
2600    if (sync_requester)
2601      rtems_event_send (sync_requester, RTEMS_BDBUF_TRANSFER_SYNC);
2602  }
2603
2604  return transfered_buffers;
2605}
2606
2607/**
2608 * Allocate the write request and initialise it for good measure.
2609 *
2610 * @return rtems_blkdev_request* The write reference memory.
2611 */
2612static rtems_blkdev_request*
2613rtems_bdbuf_swapout_writereq_alloc (void)
2614{
2615  /*
2616   * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
2617   * I am disappointment at finding code like this in RTEMS. The request should
2618   * have been a rtems_chain_control. Simple, fast and less storage as the node
2619   * is already part of the buffer structure.
2620   */
2621  rtems_blkdev_request* write_req =
2622    malloc (sizeof (rtems_blkdev_request) +
2623            (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer)));
2624
2625  if (!write_req)
2626    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2627
2628  write_req->req = RTEMS_BLKDEV_REQ_WRITE;
2629  write_req->req_done = rtems_bdbuf_transfer_done;
2630  write_req->done_arg = write_req;
2631  write_req->io_task = rtems_task_self ();
2632
2633  return write_req;
2634}
2635
2636/**
2637 * The swapout worker thread body.
2638 *
2639 * @param arg A pointer to the worker thread's private data.
2640 * @return rtems_task Not used.
2641 */
2642static rtems_task
2643rtems_bdbuf_swapout_worker_task (rtems_task_argument arg)
2644{
2645  rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg;
2646
2647  while (worker->enabled)
2648  {
2649    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_SWAPOUT_SYNC);
2650
2651    rtems_bdbuf_swapout_write (&worker->transfer);
2652
2653    rtems_bdbuf_lock_cache ();
2654
2655    rtems_chain_initialize_empty (&worker->transfer.bds);
2656    worker->transfer.dev = BDBUF_INVALID_DEV;
2657
2658    rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link);
2659
2660    rtems_bdbuf_unlock_cache ();
2661  }
2662
2663  free (worker->transfer.write_req);
2664  free (worker);
2665
2666  rtems_task_delete (RTEMS_SELF);
2667}
2668
2669/**
2670 * Open the swapout worker threads.
2671 */
2672static void
2673rtems_bdbuf_swapout_workers_open (void)
2674{
2675  rtems_status_code sc;
2676  size_t            w;
2677
2678  rtems_bdbuf_lock_cache ();
2679
2680  for (w = 0; w < bdbuf_config.swapout_workers; w++)
2681  {
2682    rtems_bdbuf_swapout_worker* worker;
2683
2684    worker = malloc (sizeof (rtems_bdbuf_swapout_worker));
2685    if (!worker)
2686      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2687
2688    rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link);
2689    worker->enabled = true;
2690    worker->transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2691
2692    rtems_chain_initialize_empty (&worker->transfer.bds);
2693    worker->transfer.dev = BDBUF_INVALID_DEV;
2694
2695    sc = rtems_task_create (rtems_build_name('B', 'D', 'o', 'a' + w),
2696                            (bdbuf_config.swapout_priority ?
2697                             bdbuf_config.swapout_priority :
2698                             RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT),
2699                            SWAPOUT_TASK_STACK_SIZE,
2700                            RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
2701                            RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
2702                            &worker->id);
2703    if (sc != RTEMS_SUCCESSFUL)
2704      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE);
2705
2706    sc = rtems_task_start (worker->id,
2707                           rtems_bdbuf_swapout_worker_task,
2708                           (rtems_task_argument) worker);
2709    if (sc != RTEMS_SUCCESSFUL)
2710      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START);
2711  }
2712
2713  rtems_bdbuf_unlock_cache ();
2714}
2715
2716/**
2717 * Close the swapout worker threads.
2718 */
2719static void
2720rtems_bdbuf_swapout_workers_close (void)
2721{
2722  rtems_chain_node* node;
2723
2724  rtems_bdbuf_lock_cache ();
2725
2726  node = rtems_chain_first (&bdbuf_cache.swapout_workers);
2727  while (!rtems_chain_is_tail (&bdbuf_cache.swapout_workers, node))
2728  {
2729    rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node;
2730    worker->enabled = false;
2731    rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC);
2732    node = rtems_chain_next (node);
2733  }
2734
2735  rtems_bdbuf_unlock_cache ();
2736}
2737
2738/**
2739 * Body of task which takes care on flushing modified buffers to the disk.
2740 *
2741 * @param arg A pointer to the global cache data. Use the global variable and
2742 *            not this.
2743 * @return rtems_task Not used.
2744 */
2745static rtems_task
2746rtems_bdbuf_swapout_task (rtems_task_argument arg)
2747{
2748  rtems_bdbuf_swapout_transfer transfer;
2749  uint32_t                     period_in_ticks;
2750  const uint32_t               period_in_msecs = bdbuf_config.swapout_period;;
2751  uint32_t                     timer_delta;
2752
2753  transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2754  rtems_chain_initialize_empty (&transfer.bds);
2755  transfer.dev = BDBUF_INVALID_DEV;
2756
2757  /*
2758   * Localise the period.
2759   */
2760  period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
2761
2762  /*
2763   * This is temporary. Needs to be changed to use the real time clock.
2764   */
2765  timer_delta = period_in_msecs;
2766
2767  /*
2768   * Create the worker threads.
2769   */
2770  rtems_bdbuf_swapout_workers_open ();
2771
2772  while (bdbuf_cache.swapout_enabled)
2773  {
2774    rtems_event_set   out;
2775    rtems_status_code sc;
2776
2777    /*
2778     * Only update the timers once in the processing cycle.
2779     */
2780    bool update_timers = true;
2781
2782    /*
2783     * If we write buffers to any disk perform a check again. We only write a
2784     * single device at a time and the cache may have more than one device's
2785     * buffers modified waiting to be written.
2786     */
2787    bool transfered_buffers;
2788
2789    do
2790    {
2791      transfered_buffers = false;
2792
2793      /*
2794       * Extact all the buffers we find for a specific device. The device is
2795       * the first one we find on a modified list. Process the sync queue of
2796       * buffers first.
2797       */
2798      if (rtems_bdbuf_swapout_processing (timer_delta,
2799                                          update_timers,
2800                                          &transfer))
2801      {
2802        transfered_buffers = true;
2803      }
2804
2805      /*
2806       * Only update the timers once.
2807       */
2808      update_timers = false;
2809    }
2810    while (transfered_buffers);
2811
2812    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2813                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2814                              period_in_ticks,
2815                              &out);
2816
2817    if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
2818      rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2819  }
2820
2821  rtems_bdbuf_swapout_workers_close ();
2822
2823  free (transfer.write_req);
2824
2825  rtems_task_delete (RTEMS_SELF);
2826}
2827
2828static void
2829rtems_bdbuf_purge_list (rtems_chain_control *purge_list)
2830{
2831  bool wake_buffer_waiters = false;
2832  rtems_chain_node *node = NULL;
2833
2834  while ((node = rtems_chain_get (purge_list)) != NULL)
2835  {
2836    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
2837
2838    if (bd->waiters == 0)
2839      wake_buffer_waiters = true;
2840
2841    rtems_bdbuf_discard_buffer (bd);
2842  }
2843
2844  if (wake_buffer_waiters)
2845    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2846}
2847
2848typedef bool (*rtems_bdbuf_purge_compare)(dev_t a, dev_t b);
2849
2850static void
2851rtems_bdbuf_gather_for_purge (rtems_chain_control *purge_list,
2852                              rtems_bdbuf_purge_compare compare,
2853                              dev_t dev)
2854{
2855  rtems_bdbuf_buffer *stack [RTEMS_BDBUF_AVL_MAX_HEIGHT];
2856  rtems_bdbuf_buffer **prev = stack;
2857  rtems_bdbuf_buffer *cur = bdbuf_cache.tree;
2858
2859  *prev = NULL;
2860
2861  while (cur != NULL)
2862  {
2863    if ((*compare) (cur->dev, dev))
2864    {
2865      switch (cur->state)
2866      {
2867        case RTEMS_BDBUF_STATE_FREE:
2868        case RTEMS_BDBUF_STATE_EMPTY:
2869        case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2870        case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
2871          break;
2872        case RTEMS_BDBUF_STATE_SYNC:
2873          rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2874          /* Fall through */
2875        case RTEMS_BDBUF_STATE_MODIFIED:
2876          rtems_bdbuf_group_release (cur);
2877          /* Fall through */
2878        case RTEMS_BDBUF_STATE_CACHED:
2879          rtems_chain_extract (&cur->link);
2880          rtems_chain_append (purge_list, &cur->link);
2881          break;
2882        case RTEMS_BDBUF_STATE_TRANSFER:
2883          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_TRANSFER_PURGED);
2884          break;
2885        case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2886        case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2887        case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2888          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_ACCESS_PURGED);
2889          break;
2890        default:
2891          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_STATE_11);
2892      }
2893    }
2894
2895    if (cur->avl.left != NULL)
2896    {
2897      /* Left */
2898      ++prev;
2899      *prev = cur;
2900      cur = cur->avl.left;
2901    }
2902    else if (cur->avl.right != NULL)
2903    {
2904      /* Right */
2905      ++prev;
2906      *prev = cur;
2907      cur = cur->avl.right;
2908    }
2909    else
2910    {
2911      while (*prev != NULL && cur == (*prev)->avl.right)
2912      {
2913        /* Up */
2914        cur = *prev;
2915        --prev;
2916      }
2917      if (*prev != NULL)
2918        /* Right */
2919        cur = (*prev)->avl.right;
2920      else
2921        /* Finished */
2922        cur = NULL;
2923    }
2924  }
2925}
2926
2927static void
2928rtems_bdbuf_purge (rtems_bdbuf_purge_compare compare, dev_t dev)
2929{
2930  rtems_chain_control purge_list;
2931
2932  rtems_chain_initialize_empty (&purge_list);
2933  rtems_bdbuf_lock_cache ();
2934  rtems_bdbuf_gather_for_purge (&purge_list, compare, dev);
2935  rtems_bdbuf_purge_list (&purge_list);
2936  rtems_bdbuf_unlock_cache ();
2937}
2938
2939static bool
2940rtems_bdbuf_purge_compare_dev (dev_t a, dev_t b)
2941{
2942  return a == b;
2943}
2944
2945void
2946rtems_bdbuf_purge_dev (dev_t dev)
2947{
2948  rtems_bdbuf_purge (rtems_bdbuf_purge_compare_dev, dev);
2949}
2950
2951static bool
2952rtems_bdbuf_purge_compare_major (dev_t a, dev_t b)
2953{
2954  return rtems_filesystem_dev_major_t (a) == rtems_filesystem_dev_major_t (b);
2955}
2956
2957void
2958rtems_bdbuf_purge_major (rtems_device_major_number major)
2959{
2960  dev_t dev = rtems_filesystem_make_dev_t (major, 0);
2961
2962  rtems_bdbuf_purge (rtems_bdbuf_purge_compare_major, dev);
2963}
Note: See TracBrowser for help on using the repository browser.