source: rtems/cpukit/libblock/src/bdbuf.c @ b467782b

4.11
Last change on this file since b467782b was b467782b, checked in by Sebastian Huber <sebastian.huber@…>, on Mar 26, 2012 at 12:58:35 PM

libblock: Add rtems_bdbuf_set_block_size()

The new function rtems_bdbuf_set_block_size() must be used to set the
block size of a disk device. It will check if the block size is valid
and set the new fields block_to_media_block_shift and bds_per_group of
the rtems_disk_device structure. This helps to avoid complex arithmetic
operations in the block device buffer get and read path.

  • Property mode set to 100644
File size: 79.7 KB
Line 
1/**
2 * @file
3 *
4 * @ingroup rtems_bdbuf
5 *
6 * Block device buffer management.
7 */
8
9/*
10 * Disk I/O buffering
11 * Buffer managment
12 *
13 * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
14 * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
15 *         Victor V. Vengerov <vvv@oktet.ru>
16 *         Alexander Kukuta <kam@oktet.ru>
17 *
18 * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org>
19 *    Rewritten to remove score mutex access. Fixes many performance
20 *    issues.
21 *
22 * Copyright (c) 2009-2012 embedded brains GmbH.
23 *
24 * @(#) bdbuf.c,v 1.14 2004/04/17 08:15:17 ralf Exp
25 */
26
27/**
28 * Set to 1 to enable debug tracing.
29 */
30#define RTEMS_BDBUF_TRACE 0
31
32#if HAVE_CONFIG_H
33#include "config.h"
34#endif
35#include <limits.h>
36#include <errno.h>
37#include <stdio.h>
38#include <string.h>
39#include <inttypes.h>
40
41#include <rtems.h>
42#include <rtems/error.h>
43#include <rtems/malloc.h>
44
45#include "rtems/bdbuf.h"
46
47#define BDBUF_INVALID_DEV NULL
48
49/*
50 * Simpler label for this file.
51 */
52#define bdbuf_config rtems_bdbuf_configuration
53
54/**
55 * A swapout transfer transaction data. This data is passed to a worked thread
56 * to handle the write phase of the transfer.
57 */
58typedef struct rtems_bdbuf_swapout_transfer
59{
60  rtems_chain_control   bds;         /**< The transfer list of BDs. */
61  const rtems_disk_device *dd;       /**< The device the transfer is for. */
62  bool                  syncing;     /**< The data is a sync'ing. */
63  rtems_blkdev_request* write_req;   /**< The write request array. */
64  uint32_t              bufs_per_bd; /**< Number of buffers per bd. */
65} rtems_bdbuf_swapout_transfer;
66
67/**
68 * Swapout worker thread. These are available to take processing from the
69 * main swapout thread and handle the I/O operation.
70 */
71typedef struct rtems_bdbuf_swapout_worker
72{
73  rtems_chain_node             link;     /**< The threads sit on a chain when
74                                          * idle. */
75  rtems_id                     id;       /**< The id of the task so we can wake
76                                          * it. */
77  bool                         enabled;  /**< The worker is enabled. */
78  rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this
79                                          * thread. */
80} rtems_bdbuf_swapout_worker;
81
82/**
83 * Buffer waiters synchronization.
84 */
85typedef struct rtems_bdbuf_waiters {
86  unsigned count;
87  rtems_id sema;
88} rtems_bdbuf_waiters;
89
90/**
91 * The BD buffer cache.
92 */
93typedef struct rtems_bdbuf_cache
94{
95  rtems_id            swapout;           /**< Swapout task ID */
96  bool                swapout_enabled;   /**< Swapout is only running if
97                                          * enabled. Set to false to kill the
98                                          * swap out task. It deletes itself. */
99  rtems_chain_control swapout_workers;   /**< The work threads for the swapout
100                                          * task. */
101
102  rtems_bdbuf_buffer* bds;               /**< Pointer to table of buffer
103                                          * descriptors. */
104  void*               buffers;           /**< The buffer's memory. */
105  size_t              buffer_min_count;  /**< Number of minimum size buffers
106                                          * that fit the buffer memory. */
107  size_t              max_bds_per_group; /**< The number of BDs of minimum
108                                          * buffer size that fit in a group. */
109  uint32_t            flags;             /**< Configuration flags. */
110
111  rtems_id            lock;              /**< The cache lock. It locks all
112                                          * cache data, BD and lists. */
113  rtems_id            sync_lock;         /**< Sync calls block writes. */
114  bool                sync_active;       /**< True if a sync is active. */
115  rtems_id            sync_requester;    /**< The sync requester. */
116  const rtems_disk_device *sync_device;  /**< The device to sync and
117                                          * BDBUF_INVALID_DEV not a device
118                                          * sync. */
119
120  rtems_bdbuf_buffer* tree;              /**< Buffer descriptor lookup AVL tree
121                                          * root. There is only one. */
122  rtems_chain_control lru;               /**< Least recently used list */
123  rtems_chain_control modified;          /**< Modified buffers list */
124  rtems_chain_control sync;              /**< Buffers to sync list */
125
126  rtems_bdbuf_waiters access_waiters;    /**< Wait for a buffer in
127                                          * ACCESS_CACHED, ACCESS_MODIFIED or
128                                          * ACCESS_EMPTY
129                                          * state. */
130  rtems_bdbuf_waiters transfer_waiters;  /**< Wait for a buffer in TRANSFER
131                                          * state. */
132  rtems_bdbuf_waiters buffer_waiters;    /**< Wait for a buffer and no one is
133                                          * available. */
134
135  size_t              group_count;       /**< The number of groups. */
136  rtems_bdbuf_group*  groups;            /**< The groups. */
137
138  bool                initialised;       /**< Initialised state. */
139} rtems_bdbuf_cache;
140
141/**
142 * Fatal errors
143 */
144#define RTEMS_BLKDEV_FATAL_ERROR(n) \
145  (((uint32_t)'B' << 24) | ((uint32_t)(n) & (uint32_t)0x00FFFFFF))
146
147#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_11      RTEMS_BLKDEV_FATAL_ERROR(1)
148#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_4       RTEMS_BLKDEV_FATAL_ERROR(2)
149#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_5       RTEMS_BLKDEV_FATAL_ERROR(3)
150#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_6       RTEMS_BLKDEV_FATAL_ERROR(4)
151#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_7       RTEMS_BLKDEV_FATAL_ERROR(5)
152#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_8       RTEMS_BLKDEV_FATAL_ERROR(6)
153#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_9       RTEMS_BLKDEV_FATAL_ERROR(7)
154#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_10      RTEMS_BLKDEV_FATAL_ERROR(8)
155#define RTEMS_BLKDEV_FATAL_BDBUF_TREE_RM       RTEMS_BLKDEV_FATAL_ERROR(9)
156#define RTEMS_BLKDEV_FATAL_BDBUF_SWAPOUT       RTEMS_BLKDEV_FATAL_ERROR(10)
157
158/*
159 * The lock/unlock fatal errors occur in case the bdbuf is not initialized with
160 * rtems_bdbuf_init().  General system corruption like stack overflow etc. may
161 * also trigger these fatal errors.
162 */
163#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK     RTEMS_BLKDEV_FATAL_ERROR(11)
164#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK   RTEMS_BLKDEV_FATAL_ERROR(12)
165#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK    RTEMS_BLKDEV_FATAL_ERROR(13)
166#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK  RTEMS_BLKDEV_FATAL_ERROR(14)
167
168#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS   RTEMS_BLKDEV_FATAL_ERROR(15)
169#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2  RTEMS_BLKDEV_FATAL_ERROR(16)
170#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST   RTEMS_BLKDEV_FATAL_ERROR(17)
171#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO RTEMS_BLKDEV_FATAL_ERROR(18)
172#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE    RTEMS_BLKDEV_FATAL_ERROR(19)
173#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE       RTEMS_BLKDEV_FATAL_ERROR(20)
174#define RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM      RTEMS_BLKDEV_FATAL_ERROR(21)
175#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE  RTEMS_BLKDEV_FATAL_ERROR(22)
176#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START   RTEMS_BLKDEV_FATAL_ERROR(23)
177#define BLKDEV_FATAL_BDBUF_SWAPOUT_RE          RTEMS_BLKDEV_FATAL_ERROR(24)
178#define BLKDEV_FATAL_BDBUF_SWAPOUT_TS          RTEMS_BLKDEV_FATAL_ERROR(25)
179#define RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT     RTEMS_BLKDEV_FATAL_ERROR(26)
180#define RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE       RTEMS_BLKDEV_FATAL_ERROR(27)
181#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_0       RTEMS_BLKDEV_FATAL_ERROR(28)
182#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_1       RTEMS_BLKDEV_FATAL_ERROR(29)
183#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_2       RTEMS_BLKDEV_FATAL_ERROR(30)
184
185/**
186 * The events used in this code. These should be system events rather than
187 * application events.
188 */
189#define RTEMS_BDBUF_TRANSFER_SYNC  RTEMS_EVENT_1
190#define RTEMS_BDBUF_SWAPOUT_SYNC   RTEMS_EVENT_2
191
192/**
193 * The swap out task size. Should be more than enough for most drivers with
194 * tracing turned on.
195 */
196#define SWAPOUT_TASK_STACK_SIZE (8 * 1024)
197
198/**
199 * Lock semaphore attributes. This is used for locking type mutexes.
200 *
201 * @warning Priority inheritance is on.
202 */
203#define RTEMS_BDBUF_CACHE_LOCK_ATTRIBS \
204  (RTEMS_PRIORITY | RTEMS_BINARY_SEMAPHORE | \
205   RTEMS_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
206
207/**
208 * Waiter semaphore attributes.
209 *
210 * @warning Do not configure as inherit priority. If a driver is in the driver
211 *          initialisation table this locked semaphore will have the IDLE task
212 *          as the holder and a blocking task will raise the priority of the
213 *          IDLE task which can cause unsual side effects.
214 */
215#define RTEMS_BDBUF_CACHE_WAITER_ATTRIBS \
216  (RTEMS_PRIORITY | RTEMS_SIMPLE_BINARY_SEMAPHORE | \
217   RTEMS_NO_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
218
219/**
220 * Waiter timeout. Set to non-zero to find some info on a waiter that is
221 * waiting too long.
222 */
223#define RTEMS_BDBUF_WAIT_TIMEOUT RTEMS_NO_TIMEOUT
224#if !defined (RTEMS_BDBUF_WAIT_TIMEOUT)
225#define RTEMS_BDBUF_WAIT_TIMEOUT \
226  (TOD_MICROSECONDS_TO_TICKS (20000000))
227#endif
228
229/*
230 * The swap out task.
231 */
232static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
233
234/**
235 * The Buffer Descriptor cache.
236 */
237static rtems_bdbuf_cache bdbuf_cache;
238
239#if RTEMS_BDBUF_TRACE
240/**
241 * If true output the trace message.
242 */
243bool rtems_bdbuf_tracer;
244
245/**
246 * Return the number of items on the list.
247 *
248 * @param list The chain control.
249 * @return uint32_t The number of items on the list.
250 */
251uint32_t
252rtems_bdbuf_list_count (rtems_chain_control* list)
253{
254  rtems_chain_node* node = rtems_chain_first (list);
255  uint32_t          count = 0;
256  while (!rtems_chain_is_tail (list, node))
257  {
258    count++;
259    node = rtems_chain_next (node);
260  }
261  return count;
262}
263
264/**
265 * Show the usage for the bdbuf cache.
266 */
267void
268rtems_bdbuf_show_usage (void)
269{
270  uint32_t group;
271  uint32_t total = 0;
272  uint32_t val;
273
274  for (group = 0; group < bdbuf_cache.group_count; group++)
275    total += bdbuf_cache.groups[group].users;
276  printf ("bdbuf:group users=%lu", total);
277  val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
278  printf (", lru=%lu", val);
279  total = val;
280  val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
281  printf (", mod=%lu", val);
282  total += val;
283  val = rtems_bdbuf_list_count (&bdbuf_cache.sync);
284  printf (", sync=%lu", val);
285  total += val;
286  printf (", total=%lu\n", total);
287}
288
289/**
290 * Show the users for a group of a bd.
291 *
292 * @param where A label to show the context of output.
293 * @param bd The bd to show the users of.
294 */
295void
296rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd)
297{
298  const char* states[] =
299    { "FR", "EM", "CH", "AC", "AM", "MD", "SY", "TR" };
300
301  printf ("bdbuf:users: %15s: [%" PRIu32 " (%s)] %td:%td = %" PRIu32 " %s\n",
302          where,
303          bd->block, states[bd->state],
304          bd->group - bdbuf_cache.groups,
305          bd - bdbuf_cache.bds,
306          bd->group->users,
307          bd->group->users > 8 ? "<<<<<<<" : "");
308}
309#else
310#define rtems_bdbuf_tracer (0)
311#define rtems_bdbuf_show_usage() ((void) 0)
312#define rtems_bdbuf_show_users(_w, _b) ((void) 0)
313#endif
314
315/**
316 * The default maximum height of 32 allows for AVL trees having between
317 * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion.  You may
318 * change this compile-time constant as you wish.
319 */
320#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
321#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
322#endif
323
324static void
325rtems_bdbuf_fatal (rtems_bdbuf_buf_state state, uint32_t error)
326{
327  rtems_fatal_error_occurred ((((uint32_t) state) << 16) | error);
328}
329
330/**
331 * Searches for the node with specified dd/block.
332 *
333 * @param root pointer to the root node of the AVL-Tree
334 * @param dd disk device search key
335 * @param block block search key
336 * @retval NULL node with the specified dd/block is not found
337 * @return pointer to the node with specified dd/block
338 */
339static rtems_bdbuf_buffer *
340rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
341                        const rtems_disk_device *dd,
342                        rtems_blkdev_bnum    block)
343{
344  rtems_bdbuf_buffer* p = *root;
345
346  while ((p != NULL) && ((p->dd != dd) || (p->block != block)))
347  {
348    if (((uintptr_t) p->dd < (uintptr_t) dd)
349        || ((p->dd == dd) && (p->block < block)))
350    {
351      p = p->avl.right;
352    }
353    else
354    {
355      p = p->avl.left;
356    }
357  }
358
359  return p;
360}
361
362/**
363 * Inserts the specified node to the AVl-Tree.
364 *
365 * @param root pointer to the root node of the AVL-Tree
366 * @param node Pointer to the node to add.
367 * @retval 0 The node added successfully
368 * @retval -1 An error occured
369 */
370static int
371rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
372                       rtems_bdbuf_buffer*  node)
373{
374  const rtems_disk_device *dd = node->dd;
375  rtems_blkdev_bnum block = node->block;
376
377  rtems_bdbuf_buffer*  p = *root;
378  rtems_bdbuf_buffer*  q;
379  rtems_bdbuf_buffer*  p1;
380  rtems_bdbuf_buffer*  p2;
381  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
382  rtems_bdbuf_buffer** buf_prev = buf_stack;
383
384  bool modified = false;
385
386  if (p == NULL)
387  {
388    *root = node;
389    node->avl.left = NULL;
390    node->avl.right = NULL;
391    node->avl.bal = 0;
392    return 0;
393  }
394
395  while (p != NULL)
396  {
397    *buf_prev++ = p;
398
399    if (((uintptr_t) p->dd < (uintptr_t) dd)
400        || ((p->dd == dd) && (p->block < block)))
401    {
402      p->avl.cache = 1;
403      q = p->avl.right;
404      if (q == NULL)
405      {
406        q = node;
407        p->avl.right = q = node;
408        break;
409      }
410    }
411    else if ((p->dd != dd) || (p->block != block))
412    {
413      p->avl.cache = -1;
414      q = p->avl.left;
415      if (q == NULL)
416      {
417        q = node;
418        p->avl.left = q;
419        break;
420      }
421    }
422    else
423    {
424      return -1;
425    }
426
427    p = q;
428  }
429
430  q->avl.left = q->avl.right = NULL;
431  q->avl.bal = 0;
432  modified = true;
433  buf_prev--;
434
435  while (modified)
436  {
437    if (p->avl.cache == -1)
438    {
439      switch (p->avl.bal)
440      {
441        case 1:
442          p->avl.bal = 0;
443          modified = false;
444          break;
445
446        case 0:
447          p->avl.bal = -1;
448          break;
449
450        case -1:
451          p1 = p->avl.left;
452          if (p1->avl.bal == -1) /* simple LL-turn */
453          {
454            p->avl.left = p1->avl.right;
455            p1->avl.right = p;
456            p->avl.bal = 0;
457            p = p1;
458          }
459          else /* double LR-turn */
460          {
461            p2 = p1->avl.right;
462            p1->avl.right = p2->avl.left;
463            p2->avl.left = p1;
464            p->avl.left = p2->avl.right;
465            p2->avl.right = p;
466            if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
467            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
468            p = p2;
469          }
470          p->avl.bal = 0;
471          modified = false;
472          break;
473
474        default:
475          break;
476      }
477    }
478    else
479    {
480      switch (p->avl.bal)
481      {
482        case -1:
483          p->avl.bal = 0;
484          modified = false;
485          break;
486
487        case 0:
488          p->avl.bal = 1;
489          break;
490
491        case 1:
492          p1 = p->avl.right;
493          if (p1->avl.bal == 1) /* simple RR-turn */
494          {
495            p->avl.right = p1->avl.left;
496            p1->avl.left = p;
497            p->avl.bal = 0;
498            p = p1;
499          }
500          else /* double RL-turn */
501          {
502            p2 = p1->avl.left;
503            p1->avl.left = p2->avl.right;
504            p2->avl.right = p1;
505            p->avl.right = p2->avl.left;
506            p2->avl.left = p;
507            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
508            if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
509            p = p2;
510          }
511          p->avl.bal = 0;
512          modified = false;
513          break;
514
515        default:
516          break;
517      }
518    }
519    q = p;
520    if (buf_prev > buf_stack)
521    {
522      p = *--buf_prev;
523
524      if (p->avl.cache == -1)
525      {
526        p->avl.left = q;
527      }
528      else
529      {
530        p->avl.right = q;
531      }
532    }
533    else
534    {
535      *root = p;
536      break;
537    }
538  };
539
540  return 0;
541}
542
543
544/**
545 * Removes the node from the tree.
546 *
547 * @param root Pointer to pointer to the root node
548 * @param node Pointer to the node to remove
549 * @retval 0 Item removed
550 * @retval -1 No such item found
551 */
552static int
553rtems_bdbuf_avl_remove(rtems_bdbuf_buffer**      root,
554                       const rtems_bdbuf_buffer* node)
555{
556  const rtems_disk_device *dd = node->dd;
557  rtems_blkdev_bnum block = node->block;
558
559  rtems_bdbuf_buffer*  p = *root;
560  rtems_bdbuf_buffer*  q;
561  rtems_bdbuf_buffer*  r;
562  rtems_bdbuf_buffer*  s;
563  rtems_bdbuf_buffer*  p1;
564  rtems_bdbuf_buffer*  p2;
565  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
566  rtems_bdbuf_buffer** buf_prev = buf_stack;
567
568  bool modified = false;
569
570  memset (buf_stack, 0, sizeof(buf_stack));
571
572  while (p != NULL)
573  {
574    *buf_prev++ = p;
575
576    if (((uintptr_t) p->dd < (uintptr_t) dd)
577        || ((p->dd == dd) && (p->block < block)))
578    {
579      p->avl.cache = 1;
580      p = p->avl.right;
581    }
582    else if ((p->dd != dd) || (p->block != block))
583    {
584      p->avl.cache = -1;
585      p = p->avl.left;
586    }
587    else
588    {
589      /* node found */
590      break;
591    }
592  }
593
594  if (p == NULL)
595  {
596    /* there is no such node */
597    return -1;
598  }
599
600  q = p;
601
602  buf_prev--;
603  if (buf_prev > buf_stack)
604  {
605    p = *(buf_prev - 1);
606  }
607  else
608  {
609    p = NULL;
610  }
611
612  /* at this moment q - is a node to delete, p is q's parent */
613  if (q->avl.right == NULL)
614  {
615    r = q->avl.left;
616    if (r != NULL)
617    {
618      r->avl.bal = 0;
619    }
620    q = r;
621  }
622  else
623  {
624    rtems_bdbuf_buffer **t;
625
626    r = q->avl.right;
627
628    if (r->avl.left == NULL)
629    {
630      r->avl.left = q->avl.left;
631      r->avl.bal = q->avl.bal;
632      r->avl.cache = 1;
633      *buf_prev++ = q = r;
634    }
635    else
636    {
637      t = buf_prev++;
638      s = r;
639
640      while (s->avl.left != NULL)
641      {
642        *buf_prev++ = r = s;
643        s = r->avl.left;
644        r->avl.cache = -1;
645      }
646
647      s->avl.left = q->avl.left;
648      r->avl.left = s->avl.right;
649      s->avl.right = q->avl.right;
650      s->avl.bal = q->avl.bal;
651      s->avl.cache = 1;
652
653      *t = q = s;
654    }
655  }
656
657  if (p != NULL)
658  {
659    if (p->avl.cache == -1)
660    {
661      p->avl.left = q;
662    }
663    else
664    {
665      p->avl.right = q;
666    }
667  }
668  else
669  {
670    *root = q;
671  }
672
673  modified = true;
674
675  while (modified)
676  {
677    if (buf_prev > buf_stack)
678    {
679      p = *--buf_prev;
680    }
681    else
682    {
683      break;
684    }
685
686    if (p->avl.cache == -1)
687    {
688      /* rebalance left branch */
689      switch (p->avl.bal)
690      {
691        case -1:
692          p->avl.bal = 0;
693          break;
694        case  0:
695          p->avl.bal = 1;
696          modified = false;
697          break;
698
699        case +1:
700          p1 = p->avl.right;
701
702          if (p1->avl.bal >= 0) /* simple RR-turn */
703          {
704            p->avl.right = p1->avl.left;
705            p1->avl.left = p;
706
707            if (p1->avl.bal == 0)
708            {
709              p1->avl.bal = -1;
710              modified = false;
711            }
712            else
713            {
714              p->avl.bal = 0;
715              p1->avl.bal = 0;
716            }
717            p = p1;
718          }
719          else /* double RL-turn */
720          {
721            p2 = p1->avl.left;
722
723            p1->avl.left = p2->avl.right;
724            p2->avl.right = p1;
725            p->avl.right = p2->avl.left;
726            p2->avl.left = p;
727
728            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
729            if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
730
731            p = p2;
732            p2->avl.bal = 0;
733          }
734          break;
735
736        default:
737          break;
738      }
739    }
740    else
741    {
742      /* rebalance right branch */
743      switch (p->avl.bal)
744      {
745        case +1:
746          p->avl.bal = 0;
747          break;
748
749        case  0:
750          p->avl.bal = -1;
751          modified = false;
752          break;
753
754        case -1:
755          p1 = p->avl.left;
756
757          if (p1->avl.bal <= 0) /* simple LL-turn */
758          {
759            p->avl.left = p1->avl.right;
760            p1->avl.right = p;
761            if (p1->avl.bal == 0)
762            {
763              p1->avl.bal = 1;
764              modified = false;
765            }
766            else
767            {
768              p->avl.bal = 0;
769              p1->avl.bal = 0;
770            }
771            p = p1;
772          }
773          else /* double LR-turn */
774          {
775            p2 = p1->avl.right;
776
777            p1->avl.right = p2->avl.left;
778            p2->avl.left = p1;
779            p->avl.left = p2->avl.right;
780            p2->avl.right = p;
781
782            if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
783            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
784
785            p = p2;
786            p2->avl.bal = 0;
787          }
788          break;
789
790        default:
791          break;
792      }
793    }
794
795    if (buf_prev > buf_stack)
796    {
797      q = *(buf_prev - 1);
798
799      if (q->avl.cache == -1)
800      {
801        q->avl.left = p;
802      }
803      else
804      {
805        q->avl.right = p;
806      }
807    }
808    else
809    {
810      *root = p;
811      break;
812    }
813
814  }
815
816  return 0;
817}
818
819static void
820rtems_bdbuf_set_state (rtems_bdbuf_buffer *bd, rtems_bdbuf_buf_state state)
821{
822  bd->state = state;
823}
824
825static rtems_blkdev_bnum
826rtems_bdbuf_media_block (const rtems_disk_device *dd, rtems_blkdev_bnum block)
827{
828  if (dd->block_to_media_block_shift >= 0)
829    return block << dd->block_to_media_block_shift;
830  else
831    /*
832     * Change the block number for the block size to the block number for the media
833     * block size. We have to use 64bit maths. There is no short cut here.
834     */
835    return (rtems_blkdev_bnum)
836      ((((uint64_t) block) * dd->block_size) / dd->media_block_size);
837}
838
839/**
840 * Lock the mutex. A single task can nest calls.
841 *
842 * @param lock The mutex to lock.
843 * @param fatal_error_code The error code if the call fails.
844 */
845static void
846rtems_bdbuf_lock (rtems_id lock, uint32_t fatal_error_code)
847{
848  rtems_status_code sc = rtems_semaphore_obtain (lock,
849                                                 RTEMS_WAIT,
850                                                 RTEMS_NO_TIMEOUT);
851  if (sc != RTEMS_SUCCESSFUL)
852    rtems_fatal_error_occurred (fatal_error_code);
853}
854
855/**
856 * Unlock the mutex.
857 *
858 * @param lock The mutex to unlock.
859 * @param fatal_error_code The error code if the call fails.
860 */
861static void
862rtems_bdbuf_unlock (rtems_id lock, uint32_t fatal_error_code)
863{
864  rtems_status_code sc = rtems_semaphore_release (lock);
865  if (sc != RTEMS_SUCCESSFUL)
866    rtems_fatal_error_occurred (fatal_error_code);
867}
868
869/**
870 * Lock the cache. A single task can nest calls.
871 */
872static void
873rtems_bdbuf_lock_cache (void)
874{
875  rtems_bdbuf_lock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK);
876}
877
878/**
879 * Unlock the cache.
880 */
881static void
882rtems_bdbuf_unlock_cache (void)
883{
884  rtems_bdbuf_unlock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK);
885}
886
887/**
888 * Lock the cache's sync. A single task can nest calls.
889 */
890static void
891rtems_bdbuf_lock_sync (void)
892{
893  rtems_bdbuf_lock (bdbuf_cache.sync_lock, RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK);
894}
895
896/**
897 * Unlock the cache's sync lock. Any blocked writers are woken.
898 */
899static void
900rtems_bdbuf_unlock_sync (void)
901{
902  rtems_bdbuf_unlock (bdbuf_cache.sync_lock,
903                      RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK);
904}
905
906static void
907rtems_bdbuf_group_obtain (rtems_bdbuf_buffer *bd)
908{
909  ++bd->group->users;
910}
911
912static void
913rtems_bdbuf_group_release (rtems_bdbuf_buffer *bd)
914{
915  --bd->group->users;
916}
917
918static rtems_mode
919rtems_bdbuf_disable_preemption (void)
920{
921  rtems_status_code sc = RTEMS_SUCCESSFUL;
922  rtems_mode prev_mode = 0;
923
924  sc = rtems_task_mode (RTEMS_NO_PREEMPT, RTEMS_PREEMPT_MASK, &prev_mode);
925  if (sc != RTEMS_SUCCESSFUL)
926    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS);
927
928  return prev_mode;
929}
930
931static void
932rtems_bdbuf_restore_preemption (rtems_mode prev_mode)
933{
934  rtems_status_code sc = RTEMS_SUCCESSFUL;
935
936  sc = rtems_task_mode (prev_mode, RTEMS_ALL_MODE_MASKS, &prev_mode);
937  if (sc != RTEMS_SUCCESSFUL)
938    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST);
939}
940
941/**
942 * Wait until woken. Semaphores are used so a number of tasks can wait and can
943 * be woken at once. Task events would require we maintain a list of tasks to
944 * be woken and this would require storage and we do not know the number of
945 * tasks that could be waiting.
946 *
947 * While we have the cache locked we can try and claim the semaphore and
948 * therefore know when we release the lock to the cache we will block until the
949 * semaphore is released. This may even happen before we get to block.
950 *
951 * A counter is used to save the release call when no one is waiting.
952 *
953 * The function assumes the cache is locked on entry and it will be locked on
954 * exit.
955 */
956static void
957rtems_bdbuf_anonymous_wait (rtems_bdbuf_waiters *waiters)
958{
959  rtems_status_code sc;
960  rtems_mode        prev_mode;
961
962  /*
963   * Indicate we are waiting.
964   */
965  ++waiters->count;
966
967  /*
968   * Disable preemption then unlock the cache and block.  There is no POSIX
969   * condition variable in the core API so this is a work around.
970   *
971   * The issue is a task could preempt after the cache is unlocked because it is
972   * blocking or just hits that window, and before this task has blocked on the
973   * semaphore. If the preempting task flushes the queue this task will not see
974   * the flush and may block for ever or until another transaction flushes this
975   * semaphore.
976   */
977  prev_mode = rtems_bdbuf_disable_preemption ();
978
979  /*
980   * Unlock the cache, wait, and lock the cache when we return.
981   */
982  rtems_bdbuf_unlock_cache ();
983
984  sc = rtems_semaphore_obtain (waiters->sema, RTEMS_WAIT, RTEMS_BDBUF_WAIT_TIMEOUT);
985
986  if (sc == RTEMS_TIMEOUT)
987    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO);
988
989  if (sc != RTEMS_UNSATISFIED)
990    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2);
991
992  rtems_bdbuf_lock_cache ();
993
994  rtems_bdbuf_restore_preemption (prev_mode);
995
996  --waiters->count;
997}
998
999static void
1000rtems_bdbuf_wait (rtems_bdbuf_buffer *bd, rtems_bdbuf_waiters *waiters)
1001{
1002  rtems_bdbuf_group_obtain (bd);
1003  ++bd->waiters;
1004  rtems_bdbuf_anonymous_wait (waiters);
1005  --bd->waiters;
1006  rtems_bdbuf_group_release (bd);
1007}
1008
1009/**
1010 * Wake a blocked resource. The resource has a counter that lets us know if
1011 * there are any waiters.
1012 */
1013static void
1014rtems_bdbuf_wake (const rtems_bdbuf_waiters *waiters)
1015{
1016  rtems_status_code sc = RTEMS_SUCCESSFUL;
1017
1018  if (waiters->count > 0)
1019  {
1020    sc = rtems_semaphore_flush (waiters->sema);
1021    if (sc != RTEMS_SUCCESSFUL)
1022      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE);
1023  }
1024}
1025
1026static void
1027rtems_bdbuf_wake_swapper (void)
1028{
1029  rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout,
1030                                           RTEMS_BDBUF_SWAPOUT_SYNC);
1031  if (sc != RTEMS_SUCCESSFUL)
1032    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
1033}
1034
1035static bool
1036rtems_bdbuf_has_buffer_waiters (void)
1037{
1038  return bdbuf_cache.buffer_waiters.count;
1039}
1040
1041static void
1042rtems_bdbuf_remove_from_tree (rtems_bdbuf_buffer *bd)
1043{
1044  if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1045    rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_TREE_RM);
1046}
1047
1048static void
1049rtems_bdbuf_remove_from_tree_and_lru_list (rtems_bdbuf_buffer *bd)
1050{
1051  switch (bd->state)
1052  {
1053    case RTEMS_BDBUF_STATE_FREE:
1054      break;
1055    case RTEMS_BDBUF_STATE_CACHED:
1056      rtems_bdbuf_remove_from_tree (bd);
1057      break;
1058    default:
1059      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_10);
1060  }
1061
1062  rtems_chain_extract_unprotected (&bd->link);
1063}
1064
1065static void
1066rtems_bdbuf_make_free_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1067{
1068  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_FREE);
1069  rtems_chain_prepend_unprotected (&bdbuf_cache.lru, &bd->link);
1070}
1071
1072static void
1073rtems_bdbuf_make_empty (rtems_bdbuf_buffer *bd)
1074{
1075  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY);
1076}
1077
1078static void
1079rtems_bdbuf_make_cached_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1080{
1081  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
1082  rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
1083}
1084
1085static void
1086rtems_bdbuf_discard_buffer (rtems_bdbuf_buffer *bd)
1087{
1088  rtems_bdbuf_make_empty (bd);
1089
1090  if (bd->waiters == 0)
1091  {
1092    rtems_bdbuf_remove_from_tree (bd);
1093    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1094  }
1095}
1096
1097static void
1098rtems_bdbuf_add_to_modified_list_after_access (rtems_bdbuf_buffer *bd)
1099{
1100  if (bdbuf_cache.sync_active && bdbuf_cache.sync_device == bd->dd)
1101  {
1102    rtems_bdbuf_unlock_cache ();
1103
1104    /*
1105     * Wait for the sync lock.
1106     */
1107    rtems_bdbuf_lock_sync ();
1108
1109    rtems_bdbuf_unlock_sync ();
1110    rtems_bdbuf_lock_cache ();
1111  }
1112
1113  /*
1114   * Only the first modified release sets the timer and any further user
1115   * accesses do not change the timer value which should move down. This
1116   * assumes the user's hold of the buffer is much less than the time on the
1117   * modified list. Resetting the timer on each access which could result in a
1118   * buffer never getting to 0 and never being forced onto disk. This raises a
1119   * difficult question. Is a snapshot of a block that is changing better than
1120   * nothing being written? We have tended to think we should hold changes for
1121   * only a specific period of time even if still changing and get onto disk
1122   * and letting the file system try and recover this position if it can.
1123   */
1124  if (bd->state == RTEMS_BDBUF_STATE_ACCESS_CACHED
1125        || bd->state == RTEMS_BDBUF_STATE_ACCESS_EMPTY)
1126    bd->hold_timer = bdbuf_config.swap_block_hold;
1127
1128  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_MODIFIED);
1129  rtems_chain_append_unprotected (&bdbuf_cache.modified, &bd->link);
1130
1131  if (bd->waiters)
1132    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1133  else if (rtems_bdbuf_has_buffer_waiters ())
1134    rtems_bdbuf_wake_swapper ();
1135}
1136
1137static void
1138rtems_bdbuf_add_to_lru_list_after_access (rtems_bdbuf_buffer *bd)
1139{
1140  rtems_bdbuf_group_release (bd);
1141  rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1142
1143  if (bd->waiters)
1144    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1145  else
1146    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1147}
1148
1149/**
1150 * Compute the number of BDs per group for a given buffer size.
1151 *
1152 * @param size The buffer size. It can be any size and we scale up.
1153 */
1154static size_t
1155rtems_bdbuf_bds_per_group (size_t size)
1156{
1157  size_t bufs_per_size;
1158  size_t bds_per_size;
1159
1160  if (size > bdbuf_config.buffer_max)
1161    return 0;
1162
1163  bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1;
1164
1165  for (bds_per_size = 1;
1166       bds_per_size < bufs_per_size;
1167       bds_per_size <<= 1)
1168    ;
1169
1170  return bdbuf_cache.max_bds_per_group / bds_per_size;
1171}
1172
1173static void
1174rtems_bdbuf_discard_buffer_after_access (rtems_bdbuf_buffer *bd)
1175{
1176  rtems_bdbuf_group_release (bd);
1177  rtems_bdbuf_discard_buffer (bd);
1178
1179  if (bd->waiters)
1180    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1181  else
1182    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1183}
1184
1185/**
1186 * Reallocate a group. The BDs currently allocated in the group are removed
1187 * from the ALV tree and any lists then the new BD's are prepended to the ready
1188 * list of the cache.
1189 *
1190 * @param group The group to reallocate.
1191 * @param new_bds_per_group The new count of BDs per group.
1192 * @return A buffer of this group.
1193 */
1194static rtems_bdbuf_buffer *
1195rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
1196{
1197  rtems_bdbuf_buffer* bd;
1198  size_t              b;
1199  size_t              bufs_per_bd;
1200
1201  if (rtems_bdbuf_tracer)
1202    printf ("bdbuf:realloc: %tu: %zd -> %zd\n",
1203            group - bdbuf_cache.groups, group->bds_per_group,
1204            new_bds_per_group);
1205
1206  bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group;
1207
1208  for (b = 0, bd = group->bdbuf;
1209       b < group->bds_per_group;
1210       b++, bd += bufs_per_bd)
1211    rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1212
1213  group->bds_per_group = new_bds_per_group;
1214  bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
1215
1216  for (b = 1, bd = group->bdbuf + bufs_per_bd;
1217       b < group->bds_per_group;
1218       b++, bd += bufs_per_bd)
1219    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1220
1221  if (b > 1)
1222    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1223
1224  return group->bdbuf;
1225}
1226
1227static void
1228rtems_bdbuf_setup_empty_buffer (rtems_bdbuf_buffer *bd,
1229                                const rtems_disk_device *dd,
1230                                rtems_blkdev_bnum   block)
1231{
1232  bd->dd        = dd ;
1233  bd->block     = block;
1234  bd->avl.left  = NULL;
1235  bd->avl.right = NULL;
1236  bd->waiters   = 0;
1237
1238  if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
1239    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE);
1240
1241  rtems_bdbuf_make_empty (bd);
1242}
1243
1244static rtems_bdbuf_buffer *
1245rtems_bdbuf_get_buffer_from_lru_list (const rtems_disk_device *dd,
1246                                      rtems_blkdev_bnum block,
1247                                      size_t            bds_per_group)
1248{
1249  rtems_chain_node *node = rtems_chain_first (&bdbuf_cache.lru);
1250
1251  while (!rtems_chain_is_tail (&bdbuf_cache.lru, node))
1252  {
1253    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
1254    rtems_bdbuf_buffer *empty_bd = NULL;
1255
1256    if (rtems_bdbuf_tracer)
1257      printf ("bdbuf:next-bd: %tu (%td:%" PRId32 ") %zd -> %zd\n",
1258              bd - bdbuf_cache.bds,
1259              bd->group - bdbuf_cache.groups, bd->group->users,
1260              bd->group->bds_per_group, bds_per_group);
1261
1262    /*
1263     * If nobody waits for this BD, we may recycle it.
1264     */
1265    if (bd->waiters == 0)
1266    {
1267      if (bd->group->bds_per_group == bds_per_group)
1268      {
1269        rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1270
1271        empty_bd = bd;
1272      }
1273      else if (bd->group->users == 0)
1274        empty_bd = rtems_bdbuf_group_realloc (bd->group, bds_per_group);
1275    }
1276
1277    if (empty_bd != NULL)
1278    {
1279      rtems_bdbuf_setup_empty_buffer (empty_bd, dd, block);
1280
1281      return empty_bd;
1282    }
1283
1284    node = rtems_chain_next (node);
1285  }
1286
1287  return NULL;
1288}
1289
1290/**
1291 * Initialise the cache.
1292 *
1293 * @return rtems_status_code The initialisation status.
1294 */
1295rtems_status_code
1296rtems_bdbuf_init (void)
1297{
1298  rtems_bdbuf_group*  group;
1299  rtems_bdbuf_buffer* bd;
1300  uint8_t*            buffer;
1301  size_t              b;
1302  size_t              cache_aligment;
1303  rtems_status_code   sc;
1304  rtems_mode          prev_mode;
1305
1306  if (rtems_bdbuf_tracer)
1307    printf ("bdbuf:init\n");
1308
1309  if (rtems_interrupt_is_in_progress())
1310    return RTEMS_CALLED_FROM_ISR;
1311
1312  /*
1313   * Check the configuration table values.
1314   */
1315  if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0)
1316    return RTEMS_INVALID_NUMBER;
1317
1318  /*
1319   * We use a special variable to manage the initialisation incase we have
1320   * completing threads doing this. You may get errors if the another thread
1321   * makes a call and we have not finished initialisation.
1322   */
1323  prev_mode = rtems_bdbuf_disable_preemption ();
1324  if (bdbuf_cache.initialised)
1325  {
1326    rtems_bdbuf_restore_preemption (prev_mode);
1327    return RTEMS_RESOURCE_IN_USE;
1328  }
1329
1330  memset(&bdbuf_cache, 0, sizeof(bdbuf_cache));
1331  bdbuf_cache.initialised = true;
1332  rtems_bdbuf_restore_preemption (prev_mode);
1333
1334  /*
1335   * For unspecified cache alignments we use the CPU alignment.
1336   */
1337  cache_aligment = 32; /* FIXME rtems_cache_get_data_line_size() */
1338  if (cache_aligment <= 0)
1339    cache_aligment = CPU_ALIGNMENT;
1340
1341  bdbuf_cache.sync_device = BDBUF_INVALID_DEV;
1342
1343  rtems_chain_initialize_empty (&bdbuf_cache.swapout_workers);
1344  rtems_chain_initialize_empty (&bdbuf_cache.lru);
1345  rtems_chain_initialize_empty (&bdbuf_cache.modified);
1346  rtems_chain_initialize_empty (&bdbuf_cache.sync);
1347
1348  /*
1349   * Create the locks for the cache.
1350   */
1351  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'l'),
1352                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1353                               &bdbuf_cache.lock);
1354  if (sc != RTEMS_SUCCESSFUL)
1355    goto error;
1356
1357  rtems_bdbuf_lock_cache ();
1358
1359  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 's'),
1360                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1361                               &bdbuf_cache.sync_lock);
1362  if (sc != RTEMS_SUCCESSFUL)
1363    goto error;
1364
1365  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'a'),
1366                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1367                               &bdbuf_cache.access_waiters.sema);
1368  if (sc != RTEMS_SUCCESSFUL)
1369    goto error;
1370
1371  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 't'),
1372                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1373                               &bdbuf_cache.transfer_waiters.sema);
1374  if (sc != RTEMS_SUCCESSFUL)
1375    goto error;
1376
1377  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'b'),
1378                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1379                               &bdbuf_cache.buffer_waiters.sema);
1380  if (sc != RTEMS_SUCCESSFUL)
1381    goto error;
1382
1383  /*
1384   * Compute the various number of elements in the cache.
1385   */
1386  bdbuf_cache.buffer_min_count =
1387    bdbuf_config.size / bdbuf_config.buffer_min;
1388  bdbuf_cache.max_bds_per_group =
1389    bdbuf_config.buffer_max / bdbuf_config.buffer_min;
1390  bdbuf_cache.group_count =
1391    bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group;
1392
1393  /*
1394   * Allocate the memory for the buffer descriptors.
1395   */
1396  bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer),
1397                            bdbuf_cache.buffer_min_count);
1398  if (!bdbuf_cache.bds)
1399    goto error;
1400
1401  /*
1402   * Allocate the memory for the buffer descriptors.
1403   */
1404  bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group),
1405                               bdbuf_cache.group_count);
1406  if (!bdbuf_cache.groups)
1407    goto error;
1408
1409  /*
1410   * Allocate memory for buffer memory. The buffer memory will be cache
1411   * aligned. It is possible to free the memory allocated by rtems_memalign()
1412   * with free(). Return 0 if allocated.
1413   *
1414   * The memory allocate allows a
1415   */
1416  if (rtems_memalign ((void **) &bdbuf_cache.buffers,
1417                      cache_aligment,
1418                      bdbuf_cache.buffer_min_count * bdbuf_config.buffer_min) != 0)
1419    goto error;
1420
1421  /*
1422   * The cache is empty after opening so we need to add all the buffers to it
1423   * and initialise the groups.
1424   */
1425  for (b = 0, group = bdbuf_cache.groups,
1426         bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers;
1427       b < bdbuf_cache.buffer_min_count;
1428       b++, bd++, buffer += bdbuf_config.buffer_min)
1429  {
1430    bd->dd    = BDBUF_INVALID_DEV;
1431    bd->group  = group;
1432    bd->buffer = buffer;
1433
1434    rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
1435
1436    if ((b % bdbuf_cache.max_bds_per_group) ==
1437        (bdbuf_cache.max_bds_per_group - 1))
1438      group++;
1439  }
1440
1441  for (b = 0,
1442         group = bdbuf_cache.groups,
1443         bd = bdbuf_cache.bds;
1444       b < bdbuf_cache.group_count;
1445       b++,
1446         group++,
1447         bd += bdbuf_cache.max_bds_per_group)
1448  {
1449    group->bds_per_group = bdbuf_cache.max_bds_per_group;
1450    group->bdbuf = bd;
1451  }
1452
1453  /*
1454   * Create and start swapout task. This task will create and manage the worker
1455   * threads.
1456   */
1457  bdbuf_cache.swapout_enabled = true;
1458
1459  sc = rtems_task_create (rtems_build_name('B', 'S', 'W', 'P'),
1460                          bdbuf_config.swapout_priority ?
1461                            bdbuf_config.swapout_priority :
1462                            RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT,
1463                          SWAPOUT_TASK_STACK_SIZE,
1464                          RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
1465                          RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
1466                          &bdbuf_cache.swapout);
1467  if (sc != RTEMS_SUCCESSFUL)
1468    goto error;
1469
1470  sc = rtems_task_start (bdbuf_cache.swapout,
1471                         rtems_bdbuf_swapout_task,
1472                         (rtems_task_argument) &bdbuf_cache);
1473  if (sc != RTEMS_SUCCESSFUL)
1474    goto error;
1475
1476  rtems_bdbuf_unlock_cache ();
1477
1478  return RTEMS_SUCCESSFUL;
1479
1480error:
1481
1482  if (bdbuf_cache.swapout != 0)
1483    rtems_task_delete (bdbuf_cache.swapout);
1484
1485  free (bdbuf_cache.buffers);
1486  free (bdbuf_cache.groups);
1487  free (bdbuf_cache.bds);
1488
1489  rtems_semaphore_delete (bdbuf_cache.buffer_waiters.sema);
1490  rtems_semaphore_delete (bdbuf_cache.access_waiters.sema);
1491  rtems_semaphore_delete (bdbuf_cache.transfer_waiters.sema);
1492  rtems_semaphore_delete (bdbuf_cache.sync_lock);
1493
1494  if (bdbuf_cache.lock != 0)
1495  {
1496    rtems_bdbuf_unlock_cache ();
1497    rtems_semaphore_delete (bdbuf_cache.lock);
1498  }
1499
1500  bdbuf_cache.initialised = false;
1501
1502  return RTEMS_UNSATISFIED;
1503}
1504
1505static void
1506rtems_bdbuf_wait_for_event (rtems_event_set event)
1507{
1508  rtems_status_code sc = RTEMS_SUCCESSFUL;
1509  rtems_event_set   out = 0;
1510
1511  sc = rtems_event_receive (event,
1512                            RTEMS_EVENT_ALL | RTEMS_WAIT,
1513                            RTEMS_NO_TIMEOUT,
1514                            &out);
1515
1516  if (sc != RTEMS_SUCCESSFUL || out != event)
1517    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT);
1518}
1519
1520static void
1521rtems_bdbuf_wait_for_access (rtems_bdbuf_buffer *bd)
1522{
1523  while (true)
1524  {
1525    switch (bd->state)
1526    {
1527      case RTEMS_BDBUF_STATE_MODIFIED:
1528        rtems_bdbuf_group_release (bd);
1529        /* Fall through */
1530      case RTEMS_BDBUF_STATE_CACHED:
1531        rtems_chain_extract_unprotected (&bd->link);
1532        /* Fall through */
1533      case RTEMS_BDBUF_STATE_EMPTY:
1534        return;
1535      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1536      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1537      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1538      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1539        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1540        break;
1541      case RTEMS_BDBUF_STATE_SYNC:
1542      case RTEMS_BDBUF_STATE_TRANSFER:
1543      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1544        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1545        break;
1546      default:
1547        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_7);
1548    }
1549  }
1550}
1551
1552static void
1553rtems_bdbuf_request_sync_for_modified_buffer (rtems_bdbuf_buffer *bd)
1554{
1555  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1556  rtems_chain_extract_unprotected (&bd->link);
1557  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1558  rtems_bdbuf_wake_swapper ();
1559}
1560
1561/**
1562 * @brief Waits until the buffer is ready for recycling.
1563 *
1564 * @retval @c true Buffer is valid and may be recycled.
1565 * @retval @c false Buffer is invalid and has to searched again.
1566 */
1567static bool
1568rtems_bdbuf_wait_for_recycle (rtems_bdbuf_buffer *bd)
1569{
1570  while (true)
1571  {
1572    switch (bd->state)
1573    {
1574      case RTEMS_BDBUF_STATE_FREE:
1575        return true;
1576      case RTEMS_BDBUF_STATE_MODIFIED:
1577        rtems_bdbuf_request_sync_for_modified_buffer (bd);
1578        break;
1579      case RTEMS_BDBUF_STATE_CACHED:
1580      case RTEMS_BDBUF_STATE_EMPTY:
1581        if (bd->waiters == 0)
1582          return true;
1583        else
1584        {
1585          /*
1586           * It is essential that we wait here without a special wait count and
1587           * without the group in use.  Otherwise we could trigger a wait ping
1588           * pong with another recycle waiter.  The state of the buffer is
1589           * arbitrary afterwards.
1590           */
1591          rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1592          return false;
1593        }
1594      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1595      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1596      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1597      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1598        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1599        break;
1600      case RTEMS_BDBUF_STATE_SYNC:
1601      case RTEMS_BDBUF_STATE_TRANSFER:
1602      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1603        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1604        break;
1605      default:
1606        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_8);
1607    }
1608  }
1609}
1610
1611static void
1612rtems_bdbuf_wait_for_sync_done (rtems_bdbuf_buffer *bd)
1613{
1614  while (true)
1615  {
1616    switch (bd->state)
1617    {
1618      case RTEMS_BDBUF_STATE_CACHED:
1619      case RTEMS_BDBUF_STATE_EMPTY:
1620      case RTEMS_BDBUF_STATE_MODIFIED:
1621      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1622      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1623      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1624      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1625        return;
1626      case RTEMS_BDBUF_STATE_SYNC:
1627      case RTEMS_BDBUF_STATE_TRANSFER:
1628      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1629        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1630        break;
1631      default:
1632        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_9);
1633    }
1634  }
1635}
1636
1637static void
1638rtems_bdbuf_wait_for_buffer (void)
1639{
1640  if (!rtems_chain_is_empty (&bdbuf_cache.modified))
1641    rtems_bdbuf_wake_swapper ();
1642
1643  rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1644}
1645
1646static void
1647rtems_bdbuf_sync_after_access (rtems_bdbuf_buffer *bd)
1648{
1649  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1650
1651  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1652
1653  if (bd->waiters)
1654    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1655
1656  rtems_bdbuf_wake_swapper ();
1657  rtems_bdbuf_wait_for_sync_done (bd);
1658
1659  /*
1660   * We may have created a cached or empty buffer which may be recycled.
1661   */
1662  if (bd->waiters == 0
1663        && (bd->state == RTEMS_BDBUF_STATE_CACHED
1664          || bd->state == RTEMS_BDBUF_STATE_EMPTY))
1665  {
1666    if (bd->state == RTEMS_BDBUF_STATE_EMPTY)
1667    {
1668      rtems_bdbuf_remove_from_tree (bd);
1669      rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1670    }
1671    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1672  }
1673}
1674
1675static rtems_bdbuf_buffer *
1676rtems_bdbuf_get_buffer_for_read_ahead (const rtems_disk_device *dd,
1677                                       rtems_blkdev_bnum block,
1678                                       size_t            bds_per_group)
1679{
1680  rtems_bdbuf_buffer *bd = NULL;
1681
1682  bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1683
1684  if (bd == NULL)
1685  {
1686    bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block, bds_per_group);
1687
1688    if (bd != NULL)
1689      rtems_bdbuf_group_obtain (bd);
1690  }
1691  else
1692    /*
1693     * The buffer is in the cache.  So it is already available or in use, and
1694     * thus no need for a read ahead.
1695     */
1696    bd = NULL;
1697
1698  return bd;
1699}
1700
1701static rtems_bdbuf_buffer *
1702rtems_bdbuf_get_buffer_for_access (const rtems_disk_device *dd,
1703                                   rtems_blkdev_bnum block,
1704                                   size_t            bds_per_group)
1705{
1706  rtems_bdbuf_buffer *bd = NULL;
1707
1708  do
1709  {
1710    bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1711
1712    if (bd != NULL)
1713    {
1714      if (bd->group->bds_per_group != bds_per_group)
1715      {
1716        if (rtems_bdbuf_wait_for_recycle (bd))
1717        {
1718          rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1719          rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1720          rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1721        }
1722        bd = NULL;
1723      }
1724    }
1725    else
1726    {
1727      bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block, bds_per_group);
1728
1729      if (bd == NULL)
1730        rtems_bdbuf_wait_for_buffer ();
1731    }
1732  }
1733  while (bd == NULL);
1734
1735  rtems_bdbuf_wait_for_access (bd);
1736  rtems_bdbuf_group_obtain (bd);
1737
1738  return bd;
1739}
1740
1741static rtems_status_code
1742rtems_bdbuf_get_media_block (const rtems_disk_device *dd,
1743                             rtems_blkdev_bnum        block,
1744                             rtems_blkdev_bnum       *media_block_ptr)
1745{
1746  /*
1747   * Compute the media block number. Drivers work with media block number not
1748   * the block number a BD may have as this depends on the block size set by
1749   * the user.
1750   */
1751  rtems_blkdev_bnum mb = rtems_bdbuf_media_block (dd, block);
1752  if (mb >= dd->size)
1753  {
1754    return RTEMS_INVALID_ID;
1755  }
1756
1757  *media_block_ptr = mb + dd->start;
1758
1759  return RTEMS_SUCCESSFUL;
1760}
1761
1762rtems_status_code
1763rtems_bdbuf_get (const rtems_disk_device *dd,
1764                 rtems_blkdev_bnum    block,
1765                 rtems_bdbuf_buffer **bd_ptr)
1766{
1767  rtems_status_code   sc = RTEMS_SUCCESSFUL;
1768  rtems_bdbuf_buffer *bd = NULL;
1769  rtems_blkdev_bnum   media_block = 0;
1770
1771  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
1772  if (sc != RTEMS_SUCCESSFUL)
1773    return sc;
1774
1775  rtems_bdbuf_lock_cache ();
1776
1777  /*
1778   * Print the block index relative to the physical disk.
1779   */
1780  if (rtems_bdbuf_tracer)
1781    printf ("bdbuf:get: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
1782            media_block, block, (unsigned) dd->dev);
1783
1784  bd = rtems_bdbuf_get_buffer_for_access (dd, media_block, dd->bds_per_group);
1785
1786  switch (bd->state)
1787  {
1788    case RTEMS_BDBUF_STATE_CACHED:
1789      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
1790      break;
1791    case RTEMS_BDBUF_STATE_EMPTY:
1792      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_EMPTY);
1793      break;
1794    case RTEMS_BDBUF_STATE_MODIFIED:
1795      /*
1796       * To get a modified buffer could be considered a bug in the caller
1797       * because you should not be getting an already modified buffer but user
1798       * may have modified a byte in a block then decided to seek the start and
1799       * write the whole block and the file system will have no record of this
1800       * so just gets the block to fill.
1801       */
1802      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
1803      break;
1804    default:
1805      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_2);
1806      break;
1807  }
1808
1809  if (rtems_bdbuf_tracer)
1810  {
1811    rtems_bdbuf_show_users ("get", bd);
1812    rtems_bdbuf_show_usage ();
1813  }
1814
1815  rtems_bdbuf_unlock_cache ();
1816
1817  *bd_ptr = bd;
1818
1819  return RTEMS_SUCCESSFUL;
1820}
1821
1822/**
1823 * Call back handler called by the low level driver when the transfer has
1824 * completed. This function may be invoked from interrupt handler.
1825 *
1826 * @param arg Arbitrary argument specified in block device request
1827 *            structure (in this case - pointer to the appropriate
1828 *            block device request structure).
1829 * @param status I/O completion status
1830 */
1831static void
1832rtems_bdbuf_transfer_done (void* arg, rtems_status_code status)
1833{
1834  rtems_blkdev_request* req = (rtems_blkdev_request*) arg;
1835
1836  req->status = status;
1837
1838  rtems_event_send (req->io_task, RTEMS_BDBUF_TRANSFER_SYNC);
1839}
1840
1841static void
1842rtems_bdbuf_create_read_request (const rtems_disk_device *dd,
1843                                 rtems_blkdev_bnum        media_block,
1844                                 size_t                   bds_per_group,
1845                                 rtems_blkdev_request    *req,
1846                                 rtems_bdbuf_buffer     **bd_ptr)
1847{
1848  rtems_bdbuf_buffer *bd = NULL;
1849  rtems_blkdev_bnum   media_block_end = dd->start + dd->size;
1850  rtems_blkdev_bnum   media_block_count = dd->block_to_media_block_shift >= 0 ?
1851    dd->block_size >> dd->block_to_media_block_shift
1852      : dd->block_size / dd->media_block_size;
1853  uint32_t            block_size = dd->block_size;
1854  uint32_t            transfer_index = 1;
1855  uint32_t            transfer_count = bdbuf_config.max_read_ahead_blocks + 1;
1856
1857  if (media_block_end - media_block < transfer_count)
1858    transfer_count = media_block_end - media_block;
1859
1860  req->req = RTEMS_BLKDEV_REQ_READ;
1861  req->req_done = rtems_bdbuf_transfer_done;
1862  req->done_arg = req;
1863  req->io_task = rtems_task_self ();
1864  req->status = RTEMS_RESOURCE_IN_USE;
1865  req->bufnum = 0;
1866
1867  bd = rtems_bdbuf_get_buffer_for_access (dd, media_block, bds_per_group);
1868
1869  *bd_ptr = bd;
1870
1871  req->bufs [0].user   = bd;
1872  req->bufs [0].block  = media_block;
1873  req->bufs [0].length = block_size;
1874  req->bufs [0].buffer = bd->buffer;
1875
1876  if (rtems_bdbuf_tracer)
1877    rtems_bdbuf_show_users ("read", bd);
1878
1879  switch (bd->state)
1880  {
1881    case RTEMS_BDBUF_STATE_CACHED:
1882    case RTEMS_BDBUF_STATE_MODIFIED:
1883      return;
1884    case RTEMS_BDBUF_STATE_EMPTY:
1885      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1886      break;
1887    default:
1888      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_1);
1889      break;
1890  }
1891
1892  while (transfer_index < transfer_count)
1893  {
1894    media_block += media_block_count;
1895
1896    bd = rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block,
1897                                                bds_per_group);
1898
1899    if (bd == NULL)
1900      break;
1901
1902    rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1903
1904    req->bufs [transfer_index].user   = bd;
1905    req->bufs [transfer_index].block  = media_block;
1906    req->bufs [transfer_index].length = block_size;
1907    req->bufs [transfer_index].buffer = bd->buffer;
1908
1909    if (rtems_bdbuf_tracer)
1910      rtems_bdbuf_show_users ("read-ahead", bd);
1911
1912    ++transfer_index;
1913  }
1914
1915  req->bufnum = transfer_index;
1916}
1917
1918static rtems_status_code
1919rtems_bdbuf_execute_transfer_request (const rtems_disk_device *dd,
1920                                      rtems_blkdev_request    *req,
1921                                      bool                     cache_locked)
1922{
1923  rtems_status_code sc = RTEMS_SUCCESSFUL;
1924  int result = 0;
1925  uint32_t transfer_index = 0;
1926  bool wake_transfer_waiters = false;
1927  bool wake_buffer_waiters = false;
1928
1929  if (cache_locked)
1930    rtems_bdbuf_unlock_cache ();
1931
1932  result = dd->ioctl (dd->phys_dev, RTEMS_BLKIO_REQUEST, req);
1933
1934  if (result == 0)
1935  {
1936    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
1937    sc = req->status;
1938  }
1939  else
1940    sc = RTEMS_IO_ERROR;
1941
1942  rtems_bdbuf_lock_cache ();
1943
1944  for (transfer_index = 0; transfer_index < req->bufnum; ++transfer_index)
1945  {
1946    rtems_bdbuf_buffer *bd = req->bufs [transfer_index].user;
1947    bool waiters = bd->waiters;
1948
1949    if (waiters)
1950      wake_transfer_waiters = true;
1951    else
1952      wake_buffer_waiters = true;
1953
1954    rtems_bdbuf_group_release (bd);
1955
1956    if (sc == RTEMS_SUCCESSFUL && bd->state == RTEMS_BDBUF_STATE_TRANSFER)
1957      rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1958    else
1959      rtems_bdbuf_discard_buffer (bd);
1960
1961    if (rtems_bdbuf_tracer)
1962      rtems_bdbuf_show_users ("transfer", bd);
1963  }
1964
1965  if (wake_transfer_waiters)
1966    rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
1967
1968  if (wake_buffer_waiters)
1969    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1970
1971  if (!cache_locked)
1972    rtems_bdbuf_unlock_cache ();
1973
1974  if (sc == RTEMS_SUCCESSFUL || sc == RTEMS_UNSATISFIED)
1975    return sc;
1976  else
1977    return RTEMS_IO_ERROR;
1978}
1979
1980rtems_status_code
1981rtems_bdbuf_read (const rtems_disk_device *dd,
1982                  rtems_blkdev_bnum    block,
1983                  rtems_bdbuf_buffer **bd_ptr)
1984{
1985  rtems_status_code     sc = RTEMS_SUCCESSFUL;
1986  rtems_blkdev_request *req = NULL;
1987  rtems_bdbuf_buffer   *bd = NULL;
1988  rtems_blkdev_bnum     media_block = 0;
1989
1990  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
1991  if (sc != RTEMS_SUCCESSFUL)
1992    return sc;
1993
1994  /*
1995   * TODO: This type of request structure is wrong and should be removed.
1996   */
1997#define bdbuf_alloc(size) __builtin_alloca (size)
1998
1999  req = bdbuf_alloc (sizeof (rtems_blkdev_request) +
2000                     sizeof (rtems_blkdev_sg_buffer) *
2001                      (bdbuf_config.max_read_ahead_blocks + 1));
2002
2003  if (rtems_bdbuf_tracer)
2004    printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
2005            media_block + dd->start, block, (unsigned) dd->dev);
2006
2007  rtems_bdbuf_lock_cache ();
2008  rtems_bdbuf_create_read_request (dd, media_block, dd->bds_per_group, req, &bd);
2009
2010  if (req->bufnum > 0)
2011  {
2012    sc = rtems_bdbuf_execute_transfer_request (dd, req, true);
2013    if (sc == RTEMS_SUCCESSFUL)
2014    {
2015      rtems_chain_extract_unprotected (&bd->link);
2016      rtems_bdbuf_group_obtain (bd);
2017    }
2018  }
2019
2020  if (sc == RTEMS_SUCCESSFUL)
2021  {
2022    switch (bd->state)
2023    {
2024      case RTEMS_BDBUF_STATE_CACHED:
2025        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2026        break;
2027      case RTEMS_BDBUF_STATE_MODIFIED:
2028        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
2029        break;
2030      default:
2031        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_4);
2032        break;
2033    }
2034
2035    if (rtems_bdbuf_tracer)
2036    {
2037      rtems_bdbuf_show_users ("read", bd);
2038      rtems_bdbuf_show_usage ();
2039    }
2040
2041    *bd_ptr = bd;
2042  }
2043  else
2044    *bd_ptr = NULL;
2045
2046  rtems_bdbuf_unlock_cache ();
2047
2048  return sc;
2049}
2050
2051static rtems_status_code
2052rtems_bdbuf_check_bd_and_lock_cache (rtems_bdbuf_buffer *bd, const char *kind)
2053{
2054  if (bd == NULL)
2055    return RTEMS_INVALID_ADDRESS;
2056  if (rtems_bdbuf_tracer)
2057  {
2058    printf ("bdbuf:%s: %" PRIu32 "\n", kind, bd->block);
2059    rtems_bdbuf_show_users (kind, bd);
2060  }
2061  rtems_bdbuf_lock_cache();
2062
2063  return RTEMS_SUCCESSFUL;
2064}
2065
2066rtems_status_code
2067rtems_bdbuf_release (rtems_bdbuf_buffer *bd)
2068{
2069  rtems_status_code sc = RTEMS_SUCCESSFUL;
2070
2071  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release");
2072  if (sc != RTEMS_SUCCESSFUL)
2073    return sc;
2074
2075  switch (bd->state)
2076  {
2077    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2078      rtems_bdbuf_add_to_lru_list_after_access (bd);
2079      break;
2080    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2081    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2082      rtems_bdbuf_discard_buffer_after_access (bd);
2083      break;
2084    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2085      rtems_bdbuf_add_to_modified_list_after_access (bd);
2086      break;
2087    default:
2088      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_0);
2089      break;
2090  }
2091
2092  if (rtems_bdbuf_tracer)
2093    rtems_bdbuf_show_usage ();
2094
2095  rtems_bdbuf_unlock_cache ();
2096
2097  return RTEMS_SUCCESSFUL;
2098}
2099
2100rtems_status_code
2101rtems_bdbuf_release_modified (rtems_bdbuf_buffer *bd)
2102{
2103  rtems_status_code sc = RTEMS_SUCCESSFUL;
2104
2105  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release modified");
2106  if (sc != RTEMS_SUCCESSFUL)
2107    return sc;
2108
2109  switch (bd->state)
2110  {
2111    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2112    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2113    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2114      rtems_bdbuf_add_to_modified_list_after_access (bd);
2115      break;
2116    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2117      rtems_bdbuf_discard_buffer_after_access (bd);
2118      break;
2119    default:
2120      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_6);
2121      break;
2122  }
2123
2124  if (rtems_bdbuf_tracer)
2125    rtems_bdbuf_show_usage ();
2126
2127  rtems_bdbuf_unlock_cache ();
2128
2129  return RTEMS_SUCCESSFUL;
2130}
2131
2132rtems_status_code
2133rtems_bdbuf_sync (rtems_bdbuf_buffer *bd)
2134{
2135  rtems_status_code sc = RTEMS_SUCCESSFUL;
2136
2137  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "sync");
2138  if (sc != RTEMS_SUCCESSFUL)
2139    return sc;
2140
2141  switch (bd->state)
2142  {
2143    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2144    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2145    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2146      rtems_bdbuf_sync_after_access (bd);
2147      break;
2148    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2149      rtems_bdbuf_discard_buffer_after_access (bd);
2150      break;
2151    default:
2152      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_5);
2153      break;
2154  }
2155
2156  if (rtems_bdbuf_tracer)
2157    rtems_bdbuf_show_usage ();
2158
2159  rtems_bdbuf_unlock_cache ();
2160
2161  return RTEMS_SUCCESSFUL;
2162}
2163
2164rtems_status_code
2165rtems_bdbuf_syncdev (const rtems_disk_device *dd)
2166{
2167  if (rtems_bdbuf_tracer)
2168    printf ("bdbuf:syncdev: %08x\n", (unsigned) dd->dev);
2169
2170  /*
2171   * Take the sync lock before locking the cache. Once we have the sync lock we
2172   * can lock the cache. If another thread has the sync lock it will cause this
2173   * thread to block until it owns the sync lock then it can own the cache. The
2174   * sync lock can only be obtained with the cache unlocked.
2175   */
2176  rtems_bdbuf_lock_sync ();
2177  rtems_bdbuf_lock_cache ();
2178
2179  /*
2180   * Set the cache to have a sync active for a specific device and let the swap
2181   * out task know the id of the requester to wake when done.
2182   *
2183   * The swap out task will negate the sync active flag when no more buffers
2184   * for the device are held on the "modified for sync" queues.
2185   */
2186  bdbuf_cache.sync_active    = true;
2187  bdbuf_cache.sync_requester = rtems_task_self ();
2188  bdbuf_cache.sync_device    = dd;
2189
2190  rtems_bdbuf_wake_swapper ();
2191  rtems_bdbuf_unlock_cache ();
2192  rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
2193  rtems_bdbuf_unlock_sync ();
2194
2195  return RTEMS_SUCCESSFUL;
2196}
2197
2198/**
2199 * Swapout transfer to the driver. The driver will break this I/O into groups
2200 * of consecutive write requests is multiple consecutive buffers are required
2201 * by the driver. The cache is not locked.
2202 *
2203 * @param transfer The transfer transaction.
2204 */
2205static void
2206rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer)
2207{
2208  rtems_chain_node *node;
2209
2210  if (rtems_bdbuf_tracer)
2211    printf ("bdbuf:swapout transfer: %08x\n", (unsigned) transfer->dd->dev);
2212
2213  /*
2214   * If there are buffers to transfer to the media transfer them.
2215   */
2216  if (!rtems_chain_is_empty (&transfer->bds))
2217  {
2218    /*
2219     * The last block number used when the driver only supports
2220     * continuous blocks in a single request.
2221     */
2222    uint32_t last_block = 0;
2223
2224    /*
2225     * Number of buffers per bd. This is used to detect the next
2226     * block.
2227     */
2228    uint32_t bufs_per_bd = 0;
2229
2230    const rtems_disk_device *dd = transfer->dd;
2231
2232    bufs_per_bd = dd->block_size / bdbuf_config.buffer_min;
2233
2234    /*
2235     * Take as many buffers as configured and pass to the driver. Note, the
2236     * API to the drivers has an array of buffers and if a chain was passed
2237     * we could have just passed the list. If the driver API is updated it
2238     * should be possible to make this change with little effect in this
2239     * code. The array that is passed is broken in design and should be
2240     * removed. Merging members of a struct into the first member is
2241     * trouble waiting to happen.
2242     */
2243    transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2244    transfer->write_req->bufnum = 0;
2245
2246    while ((node = rtems_chain_get_unprotected(&transfer->bds)) != NULL)
2247    {
2248      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2249      bool                write = false;
2250
2251      /*
2252       * If the device only accepts sequential buffers and this is not the
2253       * first buffer (the first is always sequential, and the buffer is not
2254       * sequential then put the buffer back on the transfer chain and write
2255       * the committed buffers.
2256       */
2257
2258      if (rtems_bdbuf_tracer)
2259        printf ("bdbuf:swapout write: bd:%" PRIu32 ", bufnum:%" PRIu32 " mode:%s\n",
2260                bd->block, transfer->write_req->bufnum,
2261                dd->phys_dev->capabilities &
2262                RTEMS_BLKDEV_CAP_MULTISECTOR_CONT ? "MULIT" : "SCAT");
2263
2264      if ((dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) &&
2265          transfer->write_req->bufnum &&
2266          (bd->block != (last_block + bufs_per_bd)))
2267      {
2268        rtems_chain_prepend_unprotected (&transfer->bds, &bd->link);
2269        write = true;
2270      }
2271      else
2272      {
2273        rtems_blkdev_sg_buffer* buf;
2274        buf = &transfer->write_req->bufs[transfer->write_req->bufnum];
2275        transfer->write_req->bufnum++;
2276        buf->user   = bd;
2277        buf->block  = bd->block;
2278        buf->length = dd->block_size;
2279        buf->buffer = bd->buffer;
2280        last_block  = bd->block;
2281      }
2282
2283      /*
2284       * Perform the transfer if there are no more buffers, or the transfer
2285       * size has reached the configured max. value.
2286       */
2287
2288      if (rtems_chain_is_empty (&transfer->bds) ||
2289          (transfer->write_req->bufnum >= bdbuf_config.max_write_blocks))
2290        write = true;
2291
2292      if (write)
2293      {
2294        rtems_bdbuf_execute_transfer_request (dd, transfer->write_req, false);
2295
2296        transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2297        transfer->write_req->bufnum = 0;
2298      }
2299    }
2300
2301    /*
2302     * If sync'ing and the deivce is capability of handling a sync IO control
2303     * call perform the call.
2304     */
2305    if (transfer->syncing &&
2306        (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_SYNC))
2307    {
2308      /* int result = */ dd->ioctl (dd->phys_dev, RTEMS_BLKDEV_REQ_SYNC, NULL);
2309      /* How should the error be handled ? */
2310    }
2311  }
2312}
2313
2314/**
2315 * Process the modified list of buffers. There is a sync or modified list that
2316 * needs to be handled so we have a common function to do the work.
2317 *
2318 * @param dd_ptr Pointer to the device to handle. If BDBUF_INVALID_DEV no
2319 * device is selected so select the device of the first buffer to be written to
2320 * disk.
2321 * @param chain The modified chain to process.
2322 * @param transfer The chain to append buffers to be written too.
2323 * @param sync_active If true this is a sync operation so expire all timers.
2324 * @param update_timers If true update the timers.
2325 * @param timer_delta It update_timers is true update the timers by this
2326 *                    amount.
2327 */
2328static void
2329rtems_bdbuf_swapout_modified_processing (const rtems_disk_device **dd_ptr,
2330                                         rtems_chain_control* chain,
2331                                         rtems_chain_control* transfer,
2332                                         bool                 sync_active,
2333                                         bool                 update_timers,
2334                                         uint32_t             timer_delta)
2335{
2336  if (!rtems_chain_is_empty (chain))
2337  {
2338    rtems_chain_node* node = rtems_chain_head (chain);
2339    bool              sync_all;
2340   
2341    node = node->next;
2342
2343    /*
2344     * A sync active with no valid dev means sync all.
2345     */
2346    if (sync_active && (*dd_ptr == BDBUF_INVALID_DEV))
2347      sync_all = true;
2348    else
2349      sync_all = false;
2350   
2351    while (!rtems_chain_is_tail (chain, node))
2352    {
2353      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2354
2355      /*
2356       * Check if the buffer's hold timer has reached 0. If a sync is active
2357       * or someone waits for a buffer written force all the timers to 0.
2358       *
2359       * @note Lots of sync requests will skew this timer. It should be based
2360       *       on TOD to be accurate. Does it matter ?
2361       */
2362      if (sync_all || (sync_active && (*dd_ptr == bd->dd))
2363          || rtems_bdbuf_has_buffer_waiters ())
2364        bd->hold_timer = 0;
2365
2366      if (bd->hold_timer)
2367      {
2368        if (update_timers)
2369        {
2370          if (bd->hold_timer > timer_delta)
2371            bd->hold_timer -= timer_delta;
2372          else
2373            bd->hold_timer = 0;
2374        }
2375
2376        if (bd->hold_timer)
2377        {
2378          node = node->next;
2379          continue;
2380        }
2381      }
2382
2383      /*
2384       * This assumes we can set it to BDBUF_INVALID_DEV which is just an
2385       * assumption. Cannot use the transfer list being empty the sync dev
2386       * calls sets the dev to use.
2387       */
2388      if (*dd_ptr == BDBUF_INVALID_DEV)
2389        *dd_ptr = bd->dd;
2390
2391      if (bd->dd == *dd_ptr)
2392      {
2393        rtems_chain_node* next_node = node->next;
2394        rtems_chain_node* tnode = rtems_chain_tail (transfer);
2395
2396        /*
2397         * The blocks on the transfer list are sorted in block order. This
2398         * means multi-block transfers for drivers that require consecutive
2399         * blocks perform better with sorted blocks and for real disks it may
2400         * help lower head movement.
2401         */
2402
2403        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2404
2405        rtems_chain_extract_unprotected (node);
2406
2407        tnode = tnode->previous;
2408
2409        while (node && !rtems_chain_is_head (transfer, tnode))
2410        {
2411          rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
2412
2413          if (bd->block > tbd->block)
2414          {
2415            rtems_chain_insert_unprotected (tnode, node);
2416            node = NULL;
2417          }
2418          else
2419            tnode = tnode->previous;
2420        }
2421
2422        if (node)
2423          rtems_chain_prepend_unprotected (transfer, node);
2424
2425        node = next_node;
2426      }
2427      else
2428      {
2429        node = node->next;
2430      }
2431    }
2432  }
2433}
2434
2435/**
2436 * Process the cache's modified buffers. Check the sync list first then the
2437 * modified list extracting the buffers suitable to be written to disk. We have
2438 * a device at a time. The task level loop will repeat this operation while
2439 * there are buffers to be written. If the transfer fails place the buffers
2440 * back on the modified list and try again later. The cache is unlocked while
2441 * the buffers are being written to disk.
2442 *
2443 * @param timer_delta It update_timers is true update the timers by this
2444 *                    amount.
2445 * @param update_timers If true update the timers.
2446 * @param transfer The transfer transaction data.
2447 *
2448 * @retval true Buffers where written to disk so scan again.
2449 * @retval false No buffers where written to disk.
2450 */
2451static bool
2452rtems_bdbuf_swapout_processing (unsigned long                 timer_delta,
2453                                bool                          update_timers,
2454                                rtems_bdbuf_swapout_transfer* transfer)
2455{
2456  rtems_bdbuf_swapout_worker* worker;
2457  bool                        transfered_buffers = false;
2458
2459  rtems_bdbuf_lock_cache ();
2460
2461  /*
2462   * If a sync is active do not use a worker because the current code does not
2463   * cleaning up after. We need to know the buffers have been written when
2464   * syncing to release sync lock and currently worker threads do not return to
2465   * here. We do not know the worker is the last in a sequence of sync writes
2466   * until after we have it running so we do not know to tell it to release the
2467   * lock. The simplest solution is to get the main swap out task perform all
2468   * sync operations.
2469   */
2470  if (bdbuf_cache.sync_active)
2471    worker = NULL;
2472  else
2473  {
2474    worker = (rtems_bdbuf_swapout_worker*)
2475      rtems_chain_get_unprotected (&bdbuf_cache.swapout_workers);
2476    if (worker)
2477      transfer = &worker->transfer;
2478  }
2479
2480  rtems_chain_initialize_empty (&transfer->bds);
2481  transfer->dd = BDBUF_INVALID_DEV;
2482  transfer->syncing = bdbuf_cache.sync_active;
2483 
2484  /*
2485   * When the sync is for a device limit the sync to that device. If the sync
2486   * is for a buffer handle process the devices in the order on the sync
2487   * list. This means the dev is BDBUF_INVALID_DEV.
2488   */
2489  if (bdbuf_cache.sync_active)
2490    transfer->dd = bdbuf_cache.sync_device;
2491   
2492  /*
2493   * If we have any buffers in the sync queue move them to the modified
2494   * list. The first sync buffer will select the device we use.
2495   */
2496  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2497                                           &bdbuf_cache.sync,
2498                                           &transfer->bds,
2499                                           true, false,
2500                                           timer_delta);
2501
2502  /*
2503   * Process the cache's modified list.
2504   */
2505  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2506                                           &bdbuf_cache.modified,
2507                                           &transfer->bds,
2508                                           bdbuf_cache.sync_active,
2509                                           update_timers,
2510                                           timer_delta);
2511
2512  /*
2513   * We have all the buffers that have been modified for this device so the
2514   * cache can be unlocked because the state of each buffer has been set to
2515   * TRANSFER.
2516   */
2517  rtems_bdbuf_unlock_cache ();
2518
2519  /*
2520   * If there are buffers to transfer to the media transfer them.
2521   */
2522  if (!rtems_chain_is_empty (&transfer->bds))
2523  {
2524    if (worker)
2525    {
2526      rtems_status_code sc = rtems_event_send (worker->id,
2527                                               RTEMS_BDBUF_SWAPOUT_SYNC);
2528      if (sc != RTEMS_SUCCESSFUL)
2529        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
2530    }
2531    else
2532    {
2533      rtems_bdbuf_swapout_write (transfer);
2534    }
2535
2536    transfered_buffers = true;
2537  }
2538
2539  if (bdbuf_cache.sync_active && !transfered_buffers)
2540  {
2541    rtems_id sync_requester;
2542    rtems_bdbuf_lock_cache ();
2543    sync_requester = bdbuf_cache.sync_requester;
2544    bdbuf_cache.sync_active = false;
2545    bdbuf_cache.sync_requester = 0;
2546    rtems_bdbuf_unlock_cache ();
2547    if (sync_requester)
2548      rtems_event_send (sync_requester, RTEMS_BDBUF_TRANSFER_SYNC);
2549  }
2550
2551  return transfered_buffers;
2552}
2553
2554/**
2555 * Allocate the write request and initialise it for good measure.
2556 *
2557 * @return rtems_blkdev_request* The write reference memory.
2558 */
2559static rtems_blkdev_request*
2560rtems_bdbuf_swapout_writereq_alloc (void)
2561{
2562  /*
2563   * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
2564   * I am disappointment at finding code like this in RTEMS. The request should
2565   * have been a rtems_chain_control. Simple, fast and less storage as the node
2566   * is already part of the buffer structure.
2567   */
2568  rtems_blkdev_request* write_req =
2569    malloc (sizeof (rtems_blkdev_request) +
2570            (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer)));
2571
2572  if (!write_req)
2573    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2574
2575  write_req->req = RTEMS_BLKDEV_REQ_WRITE;
2576  write_req->req_done = rtems_bdbuf_transfer_done;
2577  write_req->done_arg = write_req;
2578  write_req->io_task = rtems_task_self ();
2579
2580  return write_req;
2581}
2582
2583/**
2584 * The swapout worker thread body.
2585 *
2586 * @param arg A pointer to the worker thread's private data.
2587 * @return rtems_task Not used.
2588 */
2589static rtems_task
2590rtems_bdbuf_swapout_worker_task (rtems_task_argument arg)
2591{
2592  rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg;
2593
2594  while (worker->enabled)
2595  {
2596    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_SWAPOUT_SYNC);
2597
2598    rtems_bdbuf_swapout_write (&worker->transfer);
2599
2600    rtems_bdbuf_lock_cache ();
2601
2602    rtems_chain_initialize_empty (&worker->transfer.bds);
2603    worker->transfer.dd = BDBUF_INVALID_DEV;
2604
2605    rtems_chain_append_unprotected (&bdbuf_cache.swapout_workers, &worker->link);
2606
2607    rtems_bdbuf_unlock_cache ();
2608  }
2609
2610  free (worker->transfer.write_req);
2611  free (worker);
2612
2613  rtems_task_delete (RTEMS_SELF);
2614}
2615
2616/**
2617 * Open the swapout worker threads.
2618 */
2619static void
2620rtems_bdbuf_swapout_workers_open (void)
2621{
2622  rtems_status_code sc;
2623  size_t            w;
2624
2625  rtems_bdbuf_lock_cache ();
2626
2627  for (w = 0; w < bdbuf_config.swapout_workers; w++)
2628  {
2629    rtems_bdbuf_swapout_worker* worker;
2630
2631    worker = malloc (sizeof (rtems_bdbuf_swapout_worker));
2632    if (!worker)
2633      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2634
2635    rtems_chain_append_unprotected (&bdbuf_cache.swapout_workers, &worker->link);
2636    worker->enabled = true;
2637    worker->transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2638
2639    rtems_chain_initialize_empty (&worker->transfer.bds);
2640    worker->transfer.dd = BDBUF_INVALID_DEV;
2641
2642    sc = rtems_task_create (rtems_build_name('B', 'D', 'o', 'a' + w),
2643                            (bdbuf_config.swapout_priority ?
2644                             bdbuf_config.swapout_priority :
2645                             RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT),
2646                            SWAPOUT_TASK_STACK_SIZE,
2647                            RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
2648                            RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
2649                            &worker->id);
2650    if (sc != RTEMS_SUCCESSFUL)
2651      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE);
2652
2653    sc = rtems_task_start (worker->id,
2654                           rtems_bdbuf_swapout_worker_task,
2655                           (rtems_task_argument) worker);
2656    if (sc != RTEMS_SUCCESSFUL)
2657      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START);
2658  }
2659
2660  rtems_bdbuf_unlock_cache ();
2661}
2662
2663/**
2664 * Close the swapout worker threads.
2665 */
2666static void
2667rtems_bdbuf_swapout_workers_close (void)
2668{
2669  rtems_chain_node* node;
2670
2671  rtems_bdbuf_lock_cache ();
2672
2673  node = rtems_chain_first (&bdbuf_cache.swapout_workers);
2674  while (!rtems_chain_is_tail (&bdbuf_cache.swapout_workers, node))
2675  {
2676    rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node;
2677    worker->enabled = false;
2678    rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC);
2679    node = rtems_chain_next (node);
2680  }
2681
2682  rtems_bdbuf_unlock_cache ();
2683}
2684
2685/**
2686 * Body of task which takes care on flushing modified buffers to the disk.
2687 *
2688 * @param arg A pointer to the global cache data. Use the global variable and
2689 *            not this.
2690 * @return rtems_task Not used.
2691 */
2692static rtems_task
2693rtems_bdbuf_swapout_task (rtems_task_argument arg)
2694{
2695  rtems_bdbuf_swapout_transfer transfer;
2696  uint32_t                     period_in_ticks;
2697  const uint32_t               period_in_msecs = bdbuf_config.swapout_period;;
2698  uint32_t                     timer_delta;
2699
2700  transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2701  rtems_chain_initialize_empty (&transfer.bds);
2702  transfer.dd = BDBUF_INVALID_DEV;
2703  transfer.syncing = false;
2704
2705  /*
2706   * Localise the period.
2707   */
2708  period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
2709
2710  /*
2711   * This is temporary. Needs to be changed to use the real time clock.
2712   */
2713  timer_delta = period_in_msecs;
2714
2715  /*
2716   * Create the worker threads.
2717   */
2718  rtems_bdbuf_swapout_workers_open ();
2719
2720  while (bdbuf_cache.swapout_enabled)
2721  {
2722    rtems_event_set   out;
2723    rtems_status_code sc;
2724
2725    /*
2726     * Only update the timers once in the processing cycle.
2727     */
2728    bool update_timers = true;
2729
2730    /*
2731     * If we write buffers to any disk perform a check again. We only write a
2732     * single device at a time and the cache may have more than one device's
2733     * buffers modified waiting to be written.
2734     */
2735    bool transfered_buffers;
2736
2737    do
2738    {
2739      transfered_buffers = false;
2740
2741      /*
2742       * Extact all the buffers we find for a specific device. The device is
2743       * the first one we find on a modified list. Process the sync queue of
2744       * buffers first.
2745       */
2746      if (rtems_bdbuf_swapout_processing (timer_delta,
2747                                          update_timers,
2748                                          &transfer))
2749      {
2750        transfered_buffers = true;
2751      }
2752
2753      /*
2754       * Only update the timers once.
2755       */
2756      update_timers = false;
2757    }
2758    while (transfered_buffers);
2759
2760    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2761                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2762                              period_in_ticks,
2763                              &out);
2764
2765    if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
2766      rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2767  }
2768
2769  rtems_bdbuf_swapout_workers_close ();
2770
2771  free (transfer.write_req);
2772
2773  rtems_task_delete (RTEMS_SELF);
2774}
2775
2776static void
2777rtems_bdbuf_purge_list (rtems_chain_control *purge_list)
2778{
2779  bool wake_buffer_waiters = false;
2780  rtems_chain_node *node = NULL;
2781
2782  while ((node = rtems_chain_get_unprotected (purge_list)) != NULL)
2783  {
2784    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
2785
2786    if (bd->waiters == 0)
2787      wake_buffer_waiters = true;
2788
2789    rtems_bdbuf_discard_buffer (bd);
2790  }
2791
2792  if (wake_buffer_waiters)
2793    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2794}
2795
2796static void
2797rtems_bdbuf_gather_for_purge (rtems_chain_control *purge_list,
2798                              const rtems_disk_device *dd)
2799{
2800  rtems_bdbuf_buffer *stack [RTEMS_BDBUF_AVL_MAX_HEIGHT];
2801  rtems_bdbuf_buffer **prev = stack;
2802  rtems_bdbuf_buffer *cur = bdbuf_cache.tree;
2803
2804  *prev = NULL;
2805
2806  while (cur != NULL)
2807  {
2808    if (cur->dd == dd)
2809    {
2810      switch (cur->state)
2811      {
2812        case RTEMS_BDBUF_STATE_FREE:
2813        case RTEMS_BDBUF_STATE_EMPTY:
2814        case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2815        case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
2816          break;
2817        case RTEMS_BDBUF_STATE_SYNC:
2818          rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2819          /* Fall through */
2820        case RTEMS_BDBUF_STATE_MODIFIED:
2821          rtems_bdbuf_group_release (cur);
2822          /* Fall through */
2823        case RTEMS_BDBUF_STATE_CACHED:
2824          rtems_chain_extract_unprotected (&cur->link);
2825          rtems_chain_append_unprotected (purge_list, &cur->link);
2826          break;
2827        case RTEMS_BDBUF_STATE_TRANSFER:
2828          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_TRANSFER_PURGED);
2829          break;
2830        case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2831        case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2832        case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2833          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_ACCESS_PURGED);
2834          break;
2835        default:
2836          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_STATE_11);
2837      }
2838    }
2839
2840    if (cur->avl.left != NULL)
2841    {
2842      /* Left */
2843      ++prev;
2844      *prev = cur;
2845      cur = cur->avl.left;
2846    }
2847    else if (cur->avl.right != NULL)
2848    {
2849      /* Right */
2850      ++prev;
2851      *prev = cur;
2852      cur = cur->avl.right;
2853    }
2854    else
2855    {
2856      while (*prev != NULL && cur == (*prev)->avl.right)
2857      {
2858        /* Up */
2859        cur = *prev;
2860        --prev;
2861      }
2862      if (*prev != NULL)
2863        /* Right */
2864        cur = (*prev)->avl.right;
2865      else
2866        /* Finished */
2867        cur = NULL;
2868    }
2869  }
2870}
2871
2872void
2873rtems_bdbuf_purge_dev (const rtems_disk_device *dd)
2874{
2875  rtems_chain_control purge_list;
2876
2877  rtems_chain_initialize_empty (&purge_list);
2878  rtems_bdbuf_lock_cache ();
2879  rtems_bdbuf_gather_for_purge (&purge_list, dd);
2880  rtems_bdbuf_purge_list (&purge_list);
2881  rtems_bdbuf_unlock_cache ();
2882}
2883
2884rtems_status_code
2885rtems_bdbuf_set_block_size (rtems_disk_device *dd, uint32_t block_size)
2886{
2887  rtems_status_code sc = RTEMS_SUCCESSFUL;
2888
2889  rtems_bdbuf_lock_cache ();
2890
2891  if (block_size > 0)
2892  {
2893    size_t bds_per_group = rtems_bdbuf_bds_per_group (block_size);
2894
2895    if (bds_per_group != 0)
2896    {
2897      int block_to_media_block_shift = 0;
2898      uint32_t media_blocks_per_block = block_size / dd->media_block_size;
2899      uint32_t one = 1;
2900
2901      while ((one << block_to_media_block_shift) < media_blocks_per_block)
2902      {
2903        ++block_to_media_block_shift;
2904      }
2905
2906      if ((dd->media_block_size << block_to_media_block_shift) != block_size)
2907        block_to_media_block_shift = -1;
2908
2909      dd->block_size = block_size;
2910      dd->block_to_media_block_shift = block_to_media_block_shift;
2911      dd->bds_per_group = bds_per_group;
2912    }
2913    else
2914    {
2915      sc = RTEMS_INVALID_NUMBER;
2916    }
2917  }
2918  else
2919  {
2920    sc = RTEMS_INVALID_NUMBER;
2921  }
2922
2923  rtems_bdbuf_unlock_cache ();
2924
2925  return sc;
2926}
Note: See TracBrowser for help on using the repository browser.