source: rtems/cpukit/libblock/src/bdbuf.c @ 1e0a551

4.104.11
Last change on this file since 1e0a551 was 1e0a551, checked in by Thomas Doerfler <Thomas.Doerfler@…>, on Dec 18, 2009 at 3:59:30 PM

Update for block device API change

  • Property mode set to 100644
File size: 78.0 KB
Line 
1/**
2 * @file
3 *
4 * @ingroup rtems_bdbuf
5 *
6 * Block device buffer management.
7 */
8
9/*
10 * Disk I/O buffering
11 * Buffer managment
12 *
13 * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
14 * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
15 *         Victor V. Vengerov <vvv@oktet.ru>
16 *         Alexander Kukuta <kam@oktet.ru>
17 *
18 * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org>
19 *    Rewritten to remove score mutex access. Fixes many performance
20 *    issues.
21 *
22 * Copyright (c) 2009 embedded brains GmbH.
23 *
24 * @(#) bdbuf.c,v 1.14 2004/04/17 08:15:17 ralf Exp
25 */
26
27/**
28 * Set to 1 to enable debug tracing.
29 */
30#define RTEMS_BDBUF_TRACE 0
31
32#if HAVE_CONFIG_H
33#include "config.h"
34#endif
35#include <limits.h>
36#include <errno.h>
37#include <assert.h>
38#include <stdio.h>
39#include <string.h>
40#include <inttypes.h>
41
42#include <rtems.h>
43#include <rtems/error.h>
44#include <rtems/malloc.h>
45
46#include "rtems/bdbuf.h"
47
48#define BDBUF_INVALID_DEV ((dev_t) -1)
49
50/*
51 * Simpler label for this file.
52 */
53#define bdbuf_config rtems_bdbuf_configuration
54
55/**
56 * A swapout transfer transaction data. This data is passed to a worked thread
57 * to handle the write phase of the transfer.
58 */
59typedef struct rtems_bdbuf_swapout_transfer
60{
61  rtems_chain_control   bds;         /**< The transfer list of BDs. */
62  dev_t                 dev;         /**< The device the transfer is for. */
63  rtems_blkdev_request* write_req;   /**< The write request array. */
64  uint32_t              bufs_per_bd; /**< Number of buffers per bd. */
65} rtems_bdbuf_swapout_transfer;
66
67/**
68 * Swapout worker thread. These are available to take processing from the
69 * main swapout thread and handle the I/O operation.
70 */
71typedef struct rtems_bdbuf_swapout_worker
72{
73  rtems_chain_node             link;     /**< The threads sit on a chain when
74                                          * idle. */
75  rtems_id                     id;       /**< The id of the task so we can wake
76                                          * it. */
77  volatile bool                enabled;  /**< The worked is enabled. */
78  rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this
79                                          * thread. */
80} rtems_bdbuf_swapout_worker;
81
82/**
83 * Buffer waiters synchronization.
84 */
85typedef struct rtems_bdbuf_waiters {
86  volatile unsigned count;
87  rtems_id sema;
88} rtems_bdbuf_waiters;
89
90/**
91 * The BD buffer cache.
92 */
93typedef struct rtems_bdbuf_cache
94{
95  rtems_id            swapout;           /**< Swapout task ID */
96  volatile bool       swapout_enabled;   /**< Swapout is only running if
97                                          * enabled. Set to false to kill the
98                                          * swap out task. It deletes itself. */
99  rtems_chain_control swapout_workers;   /**< The work threads for the swapout
100                                          * task. */
101
102  rtems_bdbuf_buffer* bds;               /**< Pointer to table of buffer
103                                          * descriptors. */
104  void*               buffers;           /**< The buffer's memory. */
105  size_t              buffer_min_count;  /**< Number of minimum size buffers
106                                          * that fit the buffer memory. */
107  size_t              max_bds_per_group; /**< The number of BDs of minimum
108                                          * buffer size that fit in a group. */
109  uint32_t            flags;             /**< Configuration flags. */
110
111  rtems_id            lock;              /**< The cache lock. It locks all
112                                          * cache data, BD and lists. */
113  rtems_id            sync_lock;         /**< Sync calls block writes. */
114  volatile bool       sync_active;       /**< True if a sync is active. */
115  volatile rtems_id   sync_requester;    /**< The sync requester. */
116  volatile dev_t      sync_device;       /**< The device to sync and
117                                          * BDBUF_INVALID_DEV not a device
118                                          * sync. */
119
120  rtems_bdbuf_buffer* tree;              /**< Buffer descriptor lookup AVL tree
121                                          * root. There is only one. */
122  rtems_chain_control lru;               /**< Least recently used list */
123  rtems_chain_control modified;          /**< Modified buffers list */
124  rtems_chain_control sync;              /**< Buffers to sync list */
125
126  rtems_bdbuf_waiters access_waiters;    /**< Wait for a buffer in ACCESS
127                                          * state. */
128  rtems_bdbuf_waiters transfer_waiters;  /**< Wait for a buffer in TRANSFER
129                                          * state. */
130  rtems_bdbuf_waiters buffer_waiters;    /**< Wait for a buffer and no one is
131                                          * available. */
132
133  size_t              group_count;       /**< The number of groups. */
134  rtems_bdbuf_group*  groups;            /**< The groups. */
135
136  bool                initialised;       /**< Initialised state. */
137} rtems_bdbuf_cache;
138
139/**
140 * Fatal errors
141 */
142#define RTEMS_BLKDEV_FATAL_ERROR(n) \
143  (((uint32_t)'B' << 24) | ((uint32_t)(n) & (uint32_t)0x00FFFFFF))
144
145#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_3       RTEMS_BLKDEV_FATAL_ERROR(1)
146#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_4       RTEMS_BLKDEV_FATAL_ERROR(2)
147#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_5       RTEMS_BLKDEV_FATAL_ERROR(3)
148#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_6       RTEMS_BLKDEV_FATAL_ERROR(4)
149#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_7       RTEMS_BLKDEV_FATAL_ERROR(5)
150#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_8       RTEMS_BLKDEV_FATAL_ERROR(6)
151#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_9       RTEMS_BLKDEV_FATAL_ERROR(7)
152#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_10      RTEMS_BLKDEV_FATAL_ERROR(8)
153#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_RM      RTEMS_BLKDEV_FATAL_ERROR(9)
154#define RTEMS_BLKDEV_FATAL_BDBUF_SWAPOUT       RTEMS_BLKDEV_FATAL_ERROR(10)
155#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK     RTEMS_BLKDEV_FATAL_ERROR(11)
156#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK   RTEMS_BLKDEV_FATAL_ERROR(12)
157#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK    RTEMS_BLKDEV_FATAL_ERROR(13)
158#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK  RTEMS_BLKDEV_FATAL_ERROR(14)
159#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS   RTEMS_BLKDEV_FATAL_ERROR(15)
160#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2  RTEMS_BLKDEV_FATAL_ERROR(16)
161#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST   RTEMS_BLKDEV_FATAL_ERROR(17)
162#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO RTEMS_BLKDEV_FATAL_ERROR(18)
163#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE    RTEMS_BLKDEV_FATAL_ERROR(19)
164#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE       RTEMS_BLKDEV_FATAL_ERROR(20)
165#define RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM      RTEMS_BLKDEV_FATAL_ERROR(21)
166#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE  RTEMS_BLKDEV_FATAL_ERROR(22)
167#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START   RTEMS_BLKDEV_FATAL_ERROR(23)
168#define BLKDEV_FATAL_BDBUF_SWAPOUT_RE          RTEMS_BLKDEV_FATAL_ERROR(24)
169#define BLKDEV_FATAL_BDBUF_SWAPOUT_TS          RTEMS_BLKDEV_FATAL_ERROR(25)
170#define RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT     RTEMS_BLKDEV_FATAL_ERROR(26)
171#define RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE       RTEMS_BLKDEV_FATAL_ERROR(27)
172#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_0       RTEMS_BLKDEV_FATAL_ERROR(28)
173#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_1       RTEMS_BLKDEV_FATAL_ERROR(29)
174#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_2       RTEMS_BLKDEV_FATAL_ERROR(30)
175#define RTEMS_BLKDEV_FATAL_BDBUF_DISK_REL      RTEMS_BLKDEV_FATAL_ERROR(31)
176
177/**
178 * The events used in this code. These should be system events rather than
179 * application events.
180 */
181#define RTEMS_BDBUF_TRANSFER_SYNC  RTEMS_EVENT_1
182#define RTEMS_BDBUF_SWAPOUT_SYNC   RTEMS_EVENT_2
183
184/**
185 * The swap out task size. Should be more than enough for most drivers with
186 * tracing turned on.
187 */
188#define SWAPOUT_TASK_STACK_SIZE (8 * 1024)
189
190/**
191 * Lock semaphore attributes. This is used for locking type mutexes.
192 *
193 * @warning Priority inheritance is on.
194 */
195#define RTEMS_BDBUF_CACHE_LOCK_ATTRIBS \
196  (RTEMS_PRIORITY | RTEMS_BINARY_SEMAPHORE | \
197   RTEMS_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
198
199/**
200 * Waiter semaphore attributes.
201 *
202 * @warning Do not configure as inherit priority. If a driver is in the driver
203 *          initialisation table this locked semaphore will have the IDLE task
204 *          as the holder and a blocking task will raise the priority of the
205 *          IDLE task which can cause unsual side effects.
206 */
207#define RTEMS_BDBUF_CACHE_WAITER_ATTRIBS \
208  (RTEMS_PRIORITY | RTEMS_SIMPLE_BINARY_SEMAPHORE | \
209   RTEMS_NO_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
210
211/**
212 * Waiter timeout. Set to non-zero to find some info on a waiter that is
213 * waiting too long.
214 */
215#define RTEMS_BDBUF_WAIT_TIMEOUT RTEMS_NO_TIMEOUT
216#if !defined (RTEMS_BDBUF_WAIT_TIMEOUT)
217#define RTEMS_BDBUF_WAIT_TIMEOUT \
218  (TOD_MICROSECONDS_TO_TICKS (20000000))
219#endif
220
221/*
222 * The swap out task.
223 */
224static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
225
226/**
227 * The Buffer Descriptor cache.
228 */
229static rtems_bdbuf_cache bdbuf_cache;
230
231#if RTEMS_BDBUF_TRACE
232/**
233 * If true output the trace message.
234 */
235bool rtems_bdbuf_tracer;
236
237/**
238 * Return the number of items on the list.
239 *
240 * @param list The chain control.
241 * @return uint32_t The number of items on the list.
242 */
243uint32_t
244rtems_bdbuf_list_count (rtems_chain_control* list)
245{
246  rtems_chain_node* node = rtems_chain_first (list);
247  uint32_t          count = 0;
248  while (!rtems_chain_is_tail (list, node))
249  {
250    count++;
251    node = rtems_chain_next (node);
252  }
253  return count;
254}
255
256/**
257 * Show the usage for the bdbuf cache.
258 */
259void
260rtems_bdbuf_show_usage (void)
261{
262  uint32_t group;
263  uint32_t total = 0;
264  uint32_t val;
265
266  for (group = 0; group < bdbuf_cache.group_count; group++)
267    total += bdbuf_cache.groups[group].users;
268  printf ("bdbuf:group users=%lu", total);
269  val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
270  printf (", lru=%lu", val);
271  total = val;
272  val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
273  printf (", mod=%lu", val);
274  total += val;
275  val = rtems_bdbuf_list_count (&bdbuf_cache.sync);
276  printf (", sync=%lu", val);
277  total += val;
278  printf (", total=%lu\n", total);
279}
280
281/**
282 * Show the users for a group of a bd.
283 *
284 * @param where A label to show the context of output.
285 * @param bd The bd to show the users of.
286 */
287void
288rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd)
289{
290  const char* states[] =
291    { "EM", "FR", "CH", "AC", "AM", "MD", "SY", "TR" };
292
293  printf ("bdbuf:users: %15s: [%" PRIu32 " (%s)] %td:%td = %" PRIu32 " %s\n",
294          where,
295          bd->block, states[bd->state],
296          bd->group - bdbuf_cache.groups,
297          bd - bdbuf_cache.bds,
298          bd->group->users,
299          bd->group->users > 8 ? "<<<<<<<" : "");
300}
301#else
302#define rtems_bdbuf_tracer (0)
303#define rtems_bdbuf_show_usage() ((void) 0)
304#define rtems_bdbuf_show_users(_w, _b) ((void) 0)
305#endif
306
307/**
308 * The default maximum height of 32 allows for AVL trees having between
309 * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion.  You may
310 * change this compile-time constant as you wish.
311 */
312#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
313#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
314#endif
315
316static void
317rtems_bdbuf_fatal (rtems_bdbuf_buf_state state, uint32_t error)
318{
319  rtems_fatal_error_occurred ((((uint32_t) state) << 16) | error);
320}
321
322/**
323 * Searches for the node with specified dev/block.
324 *
325 * @param root pointer to the root node of the AVL-Tree
326 * @param dev device search key
327 * @param block block search key
328 * @retval NULL node with the specified dev/block is not found
329 * @return pointer to the node with specified dev/block
330 */
331static rtems_bdbuf_buffer *
332rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
333                        dev_t                dev,
334                        rtems_blkdev_bnum    block)
335{
336  rtems_bdbuf_buffer* p = *root;
337
338  while ((p != NULL) && ((p->dev != dev) || (p->block != block)))
339  {
340    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
341    {
342      p = p->avl.right;
343    }
344    else
345    {
346      p = p->avl.left;
347    }
348  }
349
350  return p;
351}
352
353/**
354 * Inserts the specified node to the AVl-Tree.
355 *
356 * @param root pointer to the root node of the AVL-Tree
357 * @param node Pointer to the node to add.
358 * @retval 0 The node added successfully
359 * @retval -1 An error occured
360 */
361static int
362rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
363                       rtems_bdbuf_buffer*  node)
364{
365  dev_t             dev = node->dev;
366  rtems_blkdev_bnum block = node->block;
367
368  rtems_bdbuf_buffer*  p = *root;
369  rtems_bdbuf_buffer*  q;
370  rtems_bdbuf_buffer*  p1;
371  rtems_bdbuf_buffer*  p2;
372  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
373  rtems_bdbuf_buffer** buf_prev = buf_stack;
374
375  bool modified = false;
376
377  if (p == NULL)
378  {
379    *root = node;
380    node->avl.left = NULL;
381    node->avl.right = NULL;
382    node->avl.bal = 0;
383    return 0;
384  }
385
386  while (p != NULL)
387  {
388    *buf_prev++ = p;
389
390    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
391    {
392      p->avl.cache = 1;
393      q = p->avl.right;
394      if (q == NULL)
395      {
396        q = node;
397        p->avl.right = q = node;
398        break;
399      }
400    }
401    else if ((p->dev != dev) || (p->block != block))
402    {
403      p->avl.cache = -1;
404      q = p->avl.left;
405      if (q == NULL)
406      {
407        q = node;
408        p->avl.left = q;
409        break;
410      }
411    }
412    else
413    {
414      return -1;
415    }
416
417    p = q;
418  }
419
420  q->avl.left = q->avl.right = NULL;
421  q->avl.bal = 0;
422  modified = true;
423  buf_prev--;
424
425  while (modified)
426  {
427    if (p->avl.cache == -1)
428    {
429      switch (p->avl.bal)
430      {
431        case 1:
432          p->avl.bal = 0;
433          modified = false;
434          break;
435
436        case 0:
437          p->avl.bal = -1;
438          break;
439
440        case -1:
441          p1 = p->avl.left;
442          if (p1->avl.bal == -1) /* simple LL-turn */
443          {
444            p->avl.left = p1->avl.right;
445            p1->avl.right = p;
446            p->avl.bal = 0;
447            p = p1;
448          }
449          else /* double LR-turn */
450          {
451            p2 = p1->avl.right;
452            p1->avl.right = p2->avl.left;
453            p2->avl.left = p1;
454            p->avl.left = p2->avl.right;
455            p2->avl.right = p;
456            if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
457            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
458            p = p2;
459          }
460          p->avl.bal = 0;
461          modified = false;
462          break;
463
464        default:
465          break;
466      }
467    }
468    else
469    {
470      switch (p->avl.bal)
471      {
472        case -1:
473          p->avl.bal = 0;
474          modified = false;
475          break;
476
477        case 0:
478          p->avl.bal = 1;
479          break;
480
481        case 1:
482          p1 = p->avl.right;
483          if (p1->avl.bal == 1) /* simple RR-turn */
484          {
485            p->avl.right = p1->avl.left;
486            p1->avl.left = p;
487            p->avl.bal = 0;
488            p = p1;
489          }
490          else /* double RL-turn */
491          {
492            p2 = p1->avl.left;
493            p1->avl.left = p2->avl.right;
494            p2->avl.right = p1;
495            p->avl.right = p2->avl.left;
496            p2->avl.left = p;
497            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
498            if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
499            p = p2;
500          }
501          p->avl.bal = 0;
502          modified = false;
503          break;
504
505        default:
506          break;
507      }
508    }
509    q = p;
510    if (buf_prev > buf_stack)
511    {
512      p = *--buf_prev;
513
514      if (p->avl.cache == -1)
515      {
516        p->avl.left = q;
517      }
518      else
519      {
520        p->avl.right = q;
521      }
522    }
523    else
524    {
525      *root = p;
526      break;
527    }
528  };
529
530  return 0;
531}
532
533
534/**
535 * Removes the node from the tree.
536 *
537 * @param root Pointer to pointer to the root node
538 * @param node Pointer to the node to remove
539 * @retval 0 Item removed
540 * @retval -1 No such item found
541 */
542static int
543rtems_bdbuf_avl_remove(rtems_bdbuf_buffer**      root,
544                       const rtems_bdbuf_buffer* node)
545{
546  dev_t             dev = node->dev;
547  rtems_blkdev_bnum block = node->block;
548
549  rtems_bdbuf_buffer*  p = *root;
550  rtems_bdbuf_buffer*  q;
551  rtems_bdbuf_buffer*  r;
552  rtems_bdbuf_buffer*  s;
553  rtems_bdbuf_buffer*  p1;
554  rtems_bdbuf_buffer*  p2;
555  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
556  rtems_bdbuf_buffer** buf_prev = buf_stack;
557
558  bool modified = false;
559
560  memset (buf_stack, 0, sizeof(buf_stack));
561
562  while (p != NULL)
563  {
564    *buf_prev++ = p;
565
566    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
567    {
568      p->avl.cache = 1;
569      p = p->avl.right;
570    }
571    else if ((p->dev != dev) || (p->block != block))
572    {
573      p->avl.cache = -1;
574      p = p->avl.left;
575    }
576    else
577    {
578      /* node found */
579      break;
580    }
581  }
582
583  if (p == NULL)
584  {
585    /* there is no such node */
586    return -1;
587  }
588
589  q = p;
590
591  buf_prev--;
592  if (buf_prev > buf_stack)
593  {
594    p = *(buf_prev - 1);
595  }
596  else
597  {
598    p = NULL;
599  }
600
601  /* at this moment q - is a node to delete, p is q's parent */
602  if (q->avl.right == NULL)
603  {
604    r = q->avl.left;
605    if (r != NULL)
606    {
607      r->avl.bal = 0;
608    }
609    q = r;
610  }
611  else
612  {
613    rtems_bdbuf_buffer **t;
614
615    r = q->avl.right;
616
617    if (r->avl.left == NULL)
618    {
619      r->avl.left = q->avl.left;
620      r->avl.bal = q->avl.bal;
621      r->avl.cache = 1;
622      *buf_prev++ = q = r;
623    }
624    else
625    {
626      t = buf_prev++;
627      s = r;
628
629      while (s->avl.left != NULL)
630      {
631        *buf_prev++ = r = s;
632        s = r->avl.left;
633        r->avl.cache = -1;
634      }
635
636      s->avl.left = q->avl.left;
637      r->avl.left = s->avl.right;
638      s->avl.right = q->avl.right;
639      s->avl.bal = q->avl.bal;
640      s->avl.cache = 1;
641
642      *t = q = s;
643    }
644  }
645
646  if (p != NULL)
647  {
648    if (p->avl.cache == -1)
649    {
650      p->avl.left = q;
651    }
652    else
653    {
654      p->avl.right = q;
655    }
656  }
657  else
658  {
659    *root = q;
660  }
661
662  modified = true;
663
664  while (modified)
665  {
666    if (buf_prev > buf_stack)
667    {
668      p = *--buf_prev;
669    }
670    else
671    {
672      break;
673    }
674
675    if (p->avl.cache == -1)
676    {
677      /* rebalance left branch */
678      switch (p->avl.bal)
679      {
680        case -1:
681          p->avl.bal = 0;
682          break;
683        case  0:
684          p->avl.bal = 1;
685          modified = false;
686          break;
687
688        case +1:
689          p1 = p->avl.right;
690
691          if (p1->avl.bal >= 0) /* simple RR-turn */
692          {
693            p->avl.right = p1->avl.left;
694            p1->avl.left = p;
695
696            if (p1->avl.bal == 0)
697            {
698              p1->avl.bal = -1;
699              modified = false;
700            }
701            else
702            {
703              p->avl.bal = 0;
704              p1->avl.bal = 0;
705            }
706            p = p1;
707          }
708          else /* double RL-turn */
709          {
710            p2 = p1->avl.left;
711
712            p1->avl.left = p2->avl.right;
713            p2->avl.right = p1;
714            p->avl.right = p2->avl.left;
715            p2->avl.left = p;
716
717            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
718            if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
719
720            p = p2;
721            p2->avl.bal = 0;
722          }
723          break;
724
725        default:
726          break;
727      }
728    }
729    else
730    {
731      /* rebalance right branch */
732      switch (p->avl.bal)
733      {
734        case +1:
735          p->avl.bal = 0;
736          break;
737
738        case  0:
739          p->avl.bal = -1;
740          modified = false;
741          break;
742
743        case -1:
744          p1 = p->avl.left;
745
746          if (p1->avl.bal <= 0) /* simple LL-turn */
747          {
748            p->avl.left = p1->avl.right;
749            p1->avl.right = p;
750            if (p1->avl.bal == 0)
751            {
752              p1->avl.bal = 1;
753              modified = false;
754            }
755            else
756            {
757              p->avl.bal = 0;
758              p1->avl.bal = 0;
759            }
760            p = p1;
761          }
762          else /* double LR-turn */
763          {
764            p2 = p1->avl.right;
765
766            p1->avl.right = p2->avl.left;
767            p2->avl.left = p1;
768            p->avl.left = p2->avl.right;
769            p2->avl.right = p;
770
771            if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
772            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
773
774            p = p2;
775            p2->avl.bal = 0;
776          }
777          break;
778
779        default:
780          break;
781      }
782    }
783
784    if (buf_prev > buf_stack)
785    {
786      q = *(buf_prev - 1);
787
788      if (q->avl.cache == -1)
789      {
790        q->avl.left = p;
791      }
792      else
793      {
794        q->avl.right = p;
795      }
796    }
797    else
798    {
799      *root = p;
800      break;
801    }
802
803  }
804
805  return 0;
806}
807
808static void
809rtems_bdbuf_set_state (rtems_bdbuf_buffer *bd, rtems_bdbuf_buf_state state)
810{
811  bd->state = state;
812}
813
814/**
815 * Change the block number for the block size to the block number for the media
816 * block size. We have to use 64bit maths. There is no short cut here.
817 *
818 * @param block The logical block number in the block size terms.
819 * @param block_size The block size.
820 * @param media_block_size The block size of the media.
821 * @return rtems_blkdev_bnum The media block number.
822 */
823static rtems_blkdev_bnum
824rtems_bdbuf_media_block (rtems_blkdev_bnum block,
825                         size_t            block_size,
826                         size_t            media_block_size)
827{
828  return (rtems_blkdev_bnum)
829    ((((uint64_t) block) * block_size) / media_block_size);
830}
831
832/**
833 * Lock the mutex. A single task can nest calls.
834 *
835 * @param lock The mutex to lock.
836 * @param fatal_error_code The error code if the call fails.
837 */
838static void
839rtems_bdbuf_lock (rtems_id lock, uint32_t fatal_error_code)
840{
841  rtems_status_code sc = rtems_semaphore_obtain (lock,
842                                                 RTEMS_WAIT,
843                                                 RTEMS_NO_TIMEOUT);
844  if (sc != RTEMS_SUCCESSFUL)
845    rtems_fatal_error_occurred (fatal_error_code);
846}
847
848/**
849 * Unlock the mutex.
850 *
851 * @param lock The mutex to unlock.
852 * @param fatal_error_code The error code if the call fails.
853 */
854static void
855rtems_bdbuf_unlock (rtems_id lock, uint32_t fatal_error_code)
856{
857  rtems_status_code sc = rtems_semaphore_release (lock);
858  if (sc != RTEMS_SUCCESSFUL)
859    rtems_fatal_error_occurred (fatal_error_code);
860}
861
862/**
863 * Lock the cache. A single task can nest calls.
864 */
865static void
866rtems_bdbuf_lock_cache (void)
867{
868  rtems_bdbuf_lock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK);
869}
870
871/**
872 * Unlock the cache.
873 */
874static void
875rtems_bdbuf_unlock_cache (void)
876{
877  rtems_bdbuf_unlock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK);
878}
879
880/**
881 * Lock the cache's sync. A single task can nest calls.
882 */
883static void
884rtems_bdbuf_lock_sync (void)
885{
886  rtems_bdbuf_lock (bdbuf_cache.sync_lock, RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK);
887}
888
889/**
890 * Unlock the cache's sync lock. Any blocked writers are woken.
891 */
892static void
893rtems_bdbuf_unlock_sync (void)
894{
895  rtems_bdbuf_unlock (bdbuf_cache.sync_lock,
896                      RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK);
897}
898
899static void
900rtems_bdbuf_group_obtain (rtems_bdbuf_buffer *bd)
901{
902  ++bd->group->users;
903}
904
905static void
906rtems_bdbuf_group_release (rtems_bdbuf_buffer *bd)
907{
908  --bd->group->users;
909}
910
911static rtems_mode
912rtems_bdbuf_disable_preemption (void)
913{
914  rtems_status_code sc = RTEMS_SUCCESSFUL;
915  rtems_mode prev_mode = 0;
916
917  sc = rtems_task_mode (RTEMS_NO_PREEMPT, RTEMS_PREEMPT_MASK, &prev_mode);
918  if (sc != RTEMS_SUCCESSFUL)
919    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS);
920
921  return prev_mode;
922}
923
924static void
925rtems_bdbuf_restore_preemption (rtems_mode prev_mode)
926{
927  rtems_status_code sc = RTEMS_SUCCESSFUL;
928
929  sc = rtems_task_mode (prev_mode, RTEMS_ALL_MODE_MASKS, &prev_mode);
930  if (sc != RTEMS_SUCCESSFUL)
931    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST);
932}
933
934/**
935 * Wait until woken. Semaphores are used so a number of tasks can wait and can
936 * be woken at once. Task events would require we maintain a list of tasks to
937 * be woken and this would require storage and we do not know the number of
938 * tasks that could be waiting.
939 *
940 * While we have the cache locked we can try and claim the semaphore and
941 * therefore know when we release the lock to the cache we will block until the
942 * semaphore is released. This may even happen before we get to block.
943 *
944 * A counter is used to save the release call when no one is waiting.
945 *
946 * The function assumes the cache is locked on entry and it will be locked on
947 * exit.
948 */
949static void
950rtems_bdbuf_anonymous_wait (rtems_bdbuf_waiters *waiters)
951{
952  rtems_status_code sc;
953  rtems_mode        prev_mode;
954
955  /*
956   * Indicate we are waiting.
957   */
958  ++waiters->count;
959
960  /*
961   * Disable preemption then unlock the cache and block.  There is no POSIX
962   * condition variable in the core API so this is a work around.
963   *
964   * The issue is a task could preempt after the cache is unlocked because it is
965   * blocking or just hits that window, and before this task has blocked on the
966   * semaphore. If the preempting task flushes the queue this task will not see
967   * the flush and may block for ever or until another transaction flushes this
968   * semaphore.
969   */
970  prev_mode = rtems_bdbuf_disable_preemption ();
971
972  /*
973   * Unlock the cache, wait, and lock the cache when we return.
974   */
975  rtems_bdbuf_unlock_cache ();
976
977  sc = rtems_semaphore_obtain (waiters->sema, RTEMS_WAIT, RTEMS_BDBUF_WAIT_TIMEOUT);
978
979  if (sc == RTEMS_TIMEOUT)
980    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO);
981
982  if (sc != RTEMS_UNSATISFIED)
983    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2);
984
985  rtems_bdbuf_lock_cache ();
986
987  rtems_bdbuf_restore_preemption (prev_mode);
988
989  --waiters->count;
990}
991
992static void
993rtems_bdbuf_wait (rtems_bdbuf_buffer *bd, rtems_bdbuf_waiters *waiters)
994{
995  rtems_bdbuf_group_obtain (bd);
996  ++bd->waiters;
997  rtems_bdbuf_anonymous_wait (waiters);
998  --bd->waiters;
999  rtems_bdbuf_group_release (bd);
1000}
1001
1002/**
1003 * Wake a blocked resource. The resource has a counter that lets us know if
1004 * there are any waiters.
1005 */
1006static void
1007rtems_bdbuf_wake (const rtems_bdbuf_waiters *waiters)
1008{
1009  rtems_status_code sc = RTEMS_SUCCESSFUL;
1010
1011  if (waiters->count > 0)
1012  {
1013    sc = rtems_semaphore_flush (waiters->sema);
1014    if (sc != RTEMS_SUCCESSFUL)
1015      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE);
1016  }
1017}
1018
1019static void
1020rtems_bdbuf_wake_swapper (void)
1021{
1022  rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout,
1023                                           RTEMS_BDBUF_SWAPOUT_SYNC);
1024  if (sc != RTEMS_SUCCESSFUL)
1025    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
1026}
1027
1028static bool
1029rtems_bdbuf_has_buffer_waiters (void)
1030{
1031  return bdbuf_cache.buffer_waiters.count;
1032}
1033
1034static void
1035rtems_bdbuf_add_to_modified_list_after_access (rtems_bdbuf_buffer *bd)
1036{
1037  if (bdbuf_cache.sync_active && bdbuf_cache.sync_device == bd->dev)
1038  {
1039    rtems_bdbuf_unlock_cache ();
1040
1041    /*
1042     * Wait for the sync lock.
1043     */
1044    rtems_bdbuf_lock_sync ();
1045
1046    rtems_bdbuf_unlock_sync ();
1047    rtems_bdbuf_lock_cache ();
1048  }
1049
1050  /*
1051   * Only the first modified release sets the timer and any further user
1052   * accesses do not change the timer value which should move down. This
1053   * assumes the user's hold of the buffer is much less than the time on the
1054   * modified list. Resetting the timer on each access which could result in a
1055   * buffer never getting to 0 and never being forced onto disk. This raises a
1056   * difficult question. Is a snapshot of a block that is changing better than
1057   * nothing being written ? We have tended to think we should hold changes for
1058   * only a specific period of time even if still changing and get onto disk
1059   * and letting the file system try and recover this position if it can.
1060   */
1061  if (bd->state == RTEMS_BDBUF_STATE_ACCESS)
1062    bd->hold_timer = bdbuf_config.swap_block_hold;
1063
1064  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_MODIFIED);
1065
1066  rtems_chain_append (&bdbuf_cache.modified, &bd->link);
1067
1068  if (bd->waiters)
1069    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1070  else if (rtems_bdbuf_has_buffer_waiters ())
1071    rtems_bdbuf_wake_swapper ();
1072}
1073
1074static void
1075rtems_bdbuf_add_to_lru_list_after_access (rtems_bdbuf_buffer *bd)
1076{
1077  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
1078
1079  rtems_chain_append (&bdbuf_cache.lru, &bd->link);
1080
1081  rtems_bdbuf_group_release (bd);
1082
1083  if (bd->waiters)
1084    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1085  else
1086    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1087}
1088
1089static void
1090rtems_bdbuf_add_to_sync_list_after_access (rtems_bdbuf_buffer *bd)
1091{
1092  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1093
1094  rtems_chain_append (&bdbuf_cache.sync, &bd->link);
1095
1096  if (bd->waiters)
1097    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1098}
1099
1100/**
1101 * Compute the number of BDs per group for a given buffer size.
1102 *
1103 * @param size The buffer size. It can be any size and we scale up.
1104 */
1105static size_t
1106rtems_bdbuf_bds_per_group (size_t size)
1107{
1108  size_t bufs_per_size;
1109  size_t bds_per_size;
1110
1111  if (size > bdbuf_config.buffer_max)
1112    return 0;
1113
1114  bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1;
1115
1116  for (bds_per_size = 1;
1117       bds_per_size < bufs_per_size;
1118       bds_per_size <<= 1)
1119    ;
1120
1121  return bdbuf_cache.max_bds_per_group / bds_per_size;
1122}
1123
1124static void
1125rtems_bdbuf_remove_from_cache_and_lru_list (rtems_bdbuf_buffer *bd)
1126{
1127  switch (bd->state)
1128  {
1129    case RTEMS_BDBUF_STATE_EMPTY:
1130      break;
1131    case RTEMS_BDBUF_STATE_CACHED:
1132      if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1133        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_3);
1134      break;
1135    default:
1136      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_10);
1137  }
1138
1139  rtems_chain_extract (&bd->link);
1140}
1141
1142static void
1143rtems_bdbuf_make_empty_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1144{
1145  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY);
1146  rtems_chain_prepend (&bdbuf_cache.lru, &bd->link);
1147}
1148
1149/**
1150 * Reallocate a group. The BDs currently allocated in the group are removed
1151 * from the ALV tree and any lists then the new BD's are prepended to the ready
1152 * list of the cache.
1153 *
1154 * @param group The group to reallocate.
1155 * @param new_bds_per_group The new count of BDs per group.
1156 * @return A buffer of this group.
1157 */
1158static rtems_bdbuf_buffer *
1159rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
1160{
1161  rtems_bdbuf_buffer* bd;
1162  size_t              b;
1163  size_t              bufs_per_bd;
1164
1165  if (rtems_bdbuf_tracer)
1166    printf ("bdbuf:realloc: %tu: %zd -> %zd\n",
1167            group - bdbuf_cache.groups, group->bds_per_group,
1168            new_bds_per_group);
1169
1170  bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group;
1171
1172  for (b = 0, bd = group->bdbuf;
1173       b < group->bds_per_group;
1174       b++, bd += bufs_per_bd)
1175    rtems_bdbuf_remove_from_cache_and_lru_list (bd);
1176
1177  group->bds_per_group = new_bds_per_group;
1178  bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
1179
1180  for (b = 1, bd = group->bdbuf + bufs_per_bd;
1181       b < group->bds_per_group;
1182       b++, bd += bufs_per_bd)
1183    rtems_bdbuf_make_empty_and_add_to_lru_list (bd);
1184
1185  if (b > 1)
1186    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1187
1188  return group->bdbuf;
1189}
1190
1191static void
1192rtems_bdbuf_recycle_buffer (rtems_bdbuf_buffer *bd,
1193                            dev_t               dev,
1194                            rtems_blkdev_bnum   block)
1195{
1196  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_FRESH);
1197
1198  bd->dev       = dev;
1199  bd->block     = block;
1200  bd->avl.left  = NULL;
1201  bd->avl.right = NULL;
1202  bd->error     = 0;
1203  bd->waiters   = 0;
1204
1205  if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
1206    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE);
1207}
1208
1209static rtems_bdbuf_buffer *
1210rtems_bdbuf_get_buffer_from_lru_list (dev_t             dev,
1211                                      rtems_blkdev_bnum block,
1212                                      size_t            bds_per_group)
1213{
1214  rtems_chain_node *node = rtems_chain_first (&bdbuf_cache.lru);
1215
1216  while (!rtems_chain_is_tail (&bdbuf_cache.lru, node))
1217  {
1218    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
1219    rtems_bdbuf_buffer *recycle_bd = NULL;
1220
1221    if (rtems_bdbuf_tracer)
1222      printf ("bdbuf:next-bd: %tu (%td:%" PRId32 ") %zd -> %zd\n",
1223              bd - bdbuf_cache.bds,
1224              bd->group - bdbuf_cache.groups, bd->group->users,
1225              bd->group->bds_per_group, bds_per_group);
1226
1227    /*
1228     * If nobody waits for this BD, we may recycle it.
1229     */
1230    if (bd->waiters == 0)
1231    {
1232      if (bd->group->bds_per_group == bds_per_group)
1233      {
1234        rtems_bdbuf_remove_from_cache_and_lru_list (bd);
1235
1236        recycle_bd = bd;
1237      }
1238      else if (bd->group->users == 0)
1239        recycle_bd = rtems_bdbuf_group_realloc (bd->group, bds_per_group);
1240    }
1241
1242    if (recycle_bd != NULL)
1243    {
1244      rtems_bdbuf_recycle_buffer (recycle_bd, dev, block);
1245
1246      return recycle_bd;
1247    }
1248
1249    node = rtems_chain_next (node);
1250  }
1251
1252  return NULL;
1253}
1254
1255/**
1256 * Initialise the cache.
1257 *
1258 * @return rtems_status_code The initialisation status.
1259 */
1260rtems_status_code
1261rtems_bdbuf_init (void)
1262{
1263  rtems_bdbuf_group*  group;
1264  rtems_bdbuf_buffer* bd;
1265  uint8_t*            buffer;
1266  size_t              b;
1267  size_t              cache_aligment;
1268  rtems_status_code   sc;
1269  rtems_mode          prev_mode;
1270
1271  if (rtems_bdbuf_tracer)
1272    printf ("bdbuf:init\n");
1273
1274  if (rtems_interrupt_is_in_progress())
1275    return RTEMS_CALLED_FROM_ISR;
1276
1277  /*
1278   * Check the configuration table values.
1279   */
1280  if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0)
1281    return RTEMS_INVALID_NUMBER;
1282
1283  /*
1284   * We use a special variable to manage the initialisation incase we have
1285   * completing threads doing this. You may get errors if the another thread
1286   * makes a call and we have not finished initialisation.
1287   */
1288  prev_mode = rtems_bdbuf_disable_preemption ();
1289  if (bdbuf_cache.initialised)
1290  {
1291    rtems_bdbuf_restore_preemption (prev_mode);
1292
1293    return RTEMS_RESOURCE_IN_USE;
1294  }
1295  memset(&bdbuf_cache, 0, sizeof(bdbuf_cache));
1296  bdbuf_cache.initialised = true;
1297  rtems_bdbuf_restore_preemption (prev_mode);
1298
1299  /*
1300   * For unspecified cache alignments we use the CPU alignment.
1301   */
1302  cache_aligment = 32; /* FIXME rtems_cache_get_data_line_size() */
1303  if (cache_aligment <= 0)
1304    cache_aligment = CPU_ALIGNMENT;
1305
1306  bdbuf_cache.sync_device = BDBUF_INVALID_DEV;
1307
1308  rtems_chain_initialize_empty (&bdbuf_cache.swapout_workers);
1309  rtems_chain_initialize_empty (&bdbuf_cache.lru);
1310  rtems_chain_initialize_empty (&bdbuf_cache.modified);
1311  rtems_chain_initialize_empty (&bdbuf_cache.sync);
1312
1313  /*
1314   * Create the locks for the cache.
1315   */
1316  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'l'),
1317                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1318                               &bdbuf_cache.lock);
1319  if (sc != RTEMS_SUCCESSFUL)
1320    goto error;
1321
1322  rtems_bdbuf_lock_cache ();
1323
1324  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 's'),
1325                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1326                               &bdbuf_cache.sync_lock);
1327  if (sc != RTEMS_SUCCESSFUL)
1328    goto error;
1329
1330  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'a'),
1331                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1332                               &bdbuf_cache.access_waiters.sema);
1333  if (sc != RTEMS_SUCCESSFUL)
1334    goto error;
1335
1336  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 't'),
1337                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1338                               &bdbuf_cache.transfer_waiters.sema);
1339  if (sc != RTEMS_SUCCESSFUL)
1340    goto error;
1341
1342  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'b'),
1343                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1344                               &bdbuf_cache.buffer_waiters.sema);
1345  if (sc != RTEMS_SUCCESSFUL)
1346    goto error;
1347
1348  /*
1349   * Compute the various number of elements in the cache.
1350   */
1351  bdbuf_cache.buffer_min_count =
1352    bdbuf_config.size / bdbuf_config.buffer_min;
1353  bdbuf_cache.max_bds_per_group =
1354    bdbuf_config.buffer_max / bdbuf_config.buffer_min;
1355  bdbuf_cache.group_count =
1356    bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group;
1357
1358  /*
1359   * Allocate the memory for the buffer descriptors.
1360   */
1361  bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer),
1362                            bdbuf_cache.buffer_min_count);
1363  if (!bdbuf_cache.bds)
1364    goto error;
1365
1366  /*
1367   * Allocate the memory for the buffer descriptors.
1368   */
1369  bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group),
1370                               bdbuf_cache.group_count);
1371  if (!bdbuf_cache.groups)
1372    goto error;
1373
1374  /*
1375   * Allocate memory for buffer memory. The buffer memory will be cache
1376   * aligned. It is possible to free the memory allocated by rtems_memalign()
1377   * with free(). Return 0 if allocated.
1378   *
1379   * The memory allocate allows a
1380   */
1381  if (rtems_memalign ((void **) &bdbuf_cache.buffers,
1382                      cache_aligment,
1383                      bdbuf_cache.buffer_min_count * bdbuf_config.buffer_min) != 0)
1384    goto error;
1385
1386  /*
1387   * The cache is empty after opening so we need to add all the buffers to it
1388   * and initialise the groups.
1389   */
1390  for (b = 0, group = bdbuf_cache.groups,
1391         bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers;
1392       b < bdbuf_cache.buffer_min_count;
1393       b++, bd++, buffer += bdbuf_config.buffer_min)
1394  {
1395    bd->dev    = BDBUF_INVALID_DEV;
1396    bd->group  = group;
1397    bd->buffer = buffer;
1398
1399    rtems_chain_append (&bdbuf_cache.lru, &bd->link);
1400
1401    if ((b % bdbuf_cache.max_bds_per_group) ==
1402        (bdbuf_cache.max_bds_per_group - 1))
1403      group++;
1404  }
1405
1406  for (b = 0,
1407         group = bdbuf_cache.groups,
1408         bd = bdbuf_cache.bds;
1409       b < bdbuf_cache.group_count;
1410       b++,
1411         group++,
1412         bd += bdbuf_cache.max_bds_per_group)
1413  {
1414    group->bds_per_group = bdbuf_cache.max_bds_per_group;
1415    group->bdbuf = bd;
1416  }
1417
1418  /*
1419   * Create and start swapout task. This task will create and manage the worker
1420   * threads.
1421   */
1422  bdbuf_cache.swapout_enabled = true;
1423
1424  sc = rtems_task_create (rtems_build_name('B', 'S', 'W', 'P'),
1425                          bdbuf_config.swapout_priority ?
1426                            bdbuf_config.swapout_priority :
1427                            RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT,
1428                          SWAPOUT_TASK_STACK_SIZE,
1429                          RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
1430                          RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
1431                          &bdbuf_cache.swapout);
1432  if (sc != RTEMS_SUCCESSFUL)
1433    goto error;
1434
1435  sc = rtems_task_start (bdbuf_cache.swapout,
1436                         rtems_bdbuf_swapout_task,
1437                         (rtems_task_argument) &bdbuf_cache);
1438  if (sc != RTEMS_SUCCESSFUL)
1439    goto error;
1440
1441  rtems_bdbuf_unlock_cache ();
1442
1443  return RTEMS_SUCCESSFUL;
1444
1445error:
1446
1447  if (bdbuf_cache.swapout != 0)
1448    rtems_task_delete (bdbuf_cache.swapout);
1449
1450  free (bdbuf_cache.buffers);
1451  free (bdbuf_cache.groups);
1452  free (bdbuf_cache.bds);
1453
1454  rtems_semaphore_delete (bdbuf_cache.buffer_waiters.sema);
1455  rtems_semaphore_delete (bdbuf_cache.access_waiters.sema);
1456  rtems_semaphore_delete (bdbuf_cache.transfer_waiters.sema);
1457  rtems_semaphore_delete (bdbuf_cache.sync_lock);
1458
1459  if (bdbuf_cache.lock != 0)
1460  {
1461    rtems_bdbuf_unlock_cache ();
1462    rtems_semaphore_delete (bdbuf_cache.lock);
1463  }
1464
1465  bdbuf_cache.initialised = false;
1466
1467  return RTEMS_UNSATISFIED;
1468}
1469
1470static void
1471rtems_bdbuf_wait_for_event (rtems_event_set event)
1472{
1473  rtems_status_code sc = RTEMS_SUCCESSFUL;
1474  rtems_event_set   out = 0;
1475
1476  sc = rtems_event_receive (event,
1477                            RTEMS_EVENT_ALL | RTEMS_WAIT,
1478                            RTEMS_NO_TIMEOUT,
1479                            &out);
1480
1481  if (sc != RTEMS_SUCCESSFUL || out != event)
1482    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT);
1483}
1484
1485static void
1486rtems_bdbuf_wait_for_access (rtems_bdbuf_buffer *bd)
1487{
1488  while (true)
1489  {
1490    switch (bd->state)
1491    {
1492      case RTEMS_BDBUF_STATE_FRESH:
1493        return;
1494      case RTEMS_BDBUF_STATE_MODIFIED:
1495        rtems_bdbuf_group_release (bd);
1496        /* Fall through */
1497      case RTEMS_BDBUF_STATE_CACHED:
1498        rtems_chain_extract (&bd->link);
1499        return;
1500      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1501      case RTEMS_BDBUF_STATE_ACCESS:
1502        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1503        break;
1504      case RTEMS_BDBUF_STATE_TRANSFER:
1505      case RTEMS_BDBUF_STATE_SYNC:
1506        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1507        break;
1508      default:
1509        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_7);
1510    }
1511  }
1512}
1513
1514static void
1515rtems_bdbuf_request_sync_for_modified_buffer (rtems_bdbuf_buffer *bd)
1516{
1517  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1518  rtems_chain_extract (&bd->link);
1519  rtems_chain_append (&bdbuf_cache.sync, &bd->link);
1520  rtems_bdbuf_wake_swapper ();
1521}
1522
1523/**
1524 * @brief Waits until the buffer is ready for recycling.
1525 *
1526 * @retval @c true Buffer is valid and may be recycled.
1527 * @retval @c false Buffer is invalid and has to searched again.
1528 */
1529static bool
1530rtems_bdbuf_wait_for_recycle (rtems_bdbuf_buffer *bd)
1531{
1532  while (true)
1533  {
1534    switch (bd->state)
1535    {
1536      case RTEMS_BDBUF_STATE_EMPTY:
1537        return true;
1538      case RTEMS_BDBUF_STATE_MODIFIED:
1539        rtems_bdbuf_request_sync_for_modified_buffer (bd);
1540        break;
1541      case RTEMS_BDBUF_STATE_CACHED:
1542        if (bd->waiters == 0)
1543          return true;
1544        else
1545        {
1546          /*
1547           * It is essential that we wait here without a special wait count and
1548           * without the group in use.  Otherwise we could trigger a wait ping
1549           * pong with another recycle waiter.  The state of the buffer is
1550           * arbitrary afterwards.
1551           */
1552          rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1553          return false;
1554        }
1555      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1556      case RTEMS_BDBUF_STATE_ACCESS:
1557        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1558        break;
1559      case RTEMS_BDBUF_STATE_TRANSFER:
1560      case RTEMS_BDBUF_STATE_SYNC:
1561        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1562        break;
1563      default:
1564        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_8);
1565    }
1566  }
1567
1568  return true;
1569}
1570
1571static void
1572rtems_bdbuf_wait_for_sync_done (rtems_bdbuf_buffer *bd)
1573{
1574  while (true)
1575  {
1576    switch (bd->state)
1577    {
1578      case RTEMS_BDBUF_STATE_CACHED:
1579      case RTEMS_BDBUF_STATE_MODIFIED:
1580      case RTEMS_BDBUF_STATE_ACCESS:
1581      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1582        return;
1583      case RTEMS_BDBUF_STATE_SYNC:
1584      case RTEMS_BDBUF_STATE_TRANSFER:
1585        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1586        break;
1587      default:
1588        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_9);
1589    }
1590  }
1591}
1592
1593static void
1594rtems_bdbuf_wait_for_buffer (void)
1595{
1596  if (!rtems_chain_is_empty (&bdbuf_cache.modified))
1597    rtems_bdbuf_wake_swapper ();
1598
1599  rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1600}
1601
1602static rtems_bdbuf_buffer *
1603rtems_bdbuf_get_buffer_for_read_ahead (dev_t             dev,
1604                                       rtems_blkdev_bnum block,
1605                                       size_t            bds_per_group)
1606{
1607  rtems_bdbuf_buffer *bd = NULL;
1608
1609  bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dev, block);
1610
1611  if (bd == NULL)
1612  {
1613    bd = rtems_bdbuf_get_buffer_from_lru_list (dev, block, bds_per_group);
1614
1615    if (bd != NULL)
1616      rtems_bdbuf_group_obtain (bd);
1617  }
1618  else
1619    /*
1620     * The buffer is in the cache.  So it is already available or in use, and
1621     * thus no need for a read ahead.
1622     */
1623    bd = NULL;
1624
1625  return bd;
1626}
1627
1628static rtems_bdbuf_buffer *
1629rtems_bdbuf_get_buffer_for_access (dev_t             dev,
1630                                   rtems_blkdev_bnum block,
1631                                   size_t            bds_per_group)
1632{
1633  rtems_bdbuf_buffer *bd = NULL;
1634
1635  do
1636  {
1637    bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dev, block);
1638
1639    if (bd != NULL)
1640    {
1641      if (bd->group->bds_per_group != bds_per_group)
1642      {
1643        if (rtems_bdbuf_wait_for_recycle (bd))
1644        {
1645          rtems_bdbuf_remove_from_cache_and_lru_list (bd);
1646          rtems_bdbuf_make_empty_and_add_to_lru_list (bd);
1647          rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1648        }
1649        bd = NULL;
1650      }
1651    }
1652    else
1653    {
1654      bd = rtems_bdbuf_get_buffer_from_lru_list (dev, block, bds_per_group);
1655
1656      if (bd == NULL)
1657        rtems_bdbuf_wait_for_buffer ();
1658    }
1659  }
1660  while (bd == NULL);
1661
1662  rtems_bdbuf_wait_for_access (bd);
1663  rtems_bdbuf_group_obtain (bd);
1664
1665  return bd;
1666}
1667
1668static rtems_status_code
1669rtems_bdbuf_obtain_disk (dev_t               dev,
1670                         rtems_blkdev_bnum   block,
1671                         rtems_disk_device **dd_ptr,
1672                         rtems_blkdev_bnum  *media_block_ptr,
1673                         size_t             *bds_per_group_ptr)
1674{
1675  rtems_disk_device *dd = NULL;
1676
1677  if (!bdbuf_cache.initialised)
1678    return RTEMS_NOT_CONFIGURED;
1679
1680  /*
1681   * Do not hold the cache lock when obtaining the disk table.
1682   */
1683  dd = rtems_disk_obtain (dev);
1684  if (dd == NULL)
1685    return RTEMS_INVALID_ID;
1686
1687  *dd_ptr = dd;
1688
1689  if (media_block_ptr != NULL)
1690  {
1691    /*
1692     * Compute the media block number. Drivers work with media block number not
1693     * the block number a BD may have as this depends on the block size set by
1694     * the user.
1695     */
1696    rtems_blkdev_bnum mb = rtems_bdbuf_media_block (block,
1697                                                    dd->block_size,
1698                                                    dd->media_block_size);
1699    if (mb >= dd->size)
1700    {
1701      rtems_disk_release(dd);
1702      return RTEMS_INVALID_NUMBER;
1703    }
1704
1705    *media_block_ptr = mb + dd->start;
1706  }
1707
1708  if (bds_per_group_ptr != NULL)
1709  {
1710    size_t bds_per_group = rtems_bdbuf_bds_per_group (dd->block_size);
1711
1712    if (bds_per_group == 0)
1713    {
1714      rtems_disk_release (dd);
1715      return RTEMS_INVALID_NUMBER;
1716    }
1717
1718    *bds_per_group_ptr = bds_per_group;
1719  }
1720
1721  return RTEMS_SUCCESSFUL;
1722}
1723
1724static void
1725rtems_bdbuf_release_disk (rtems_disk_device *dd)
1726{
1727  rtems_status_code sc = RTEMS_SUCCESSFUL;
1728
1729  sc = rtems_disk_release (dd);
1730  if (sc != RTEMS_SUCCESSFUL)
1731    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_DISK_REL);
1732}
1733
1734rtems_status_code
1735rtems_bdbuf_get (dev_t                dev,
1736                 rtems_blkdev_bnum    block,
1737                 rtems_bdbuf_buffer **bd_ptr)
1738{
1739  rtems_status_code   sc = RTEMS_SUCCESSFUL;
1740  rtems_disk_device  *dd = NULL;
1741  rtems_bdbuf_buffer *bd = NULL;
1742  rtems_blkdev_bnum   media_block = 0;
1743  size_t              bds_per_group = 0;
1744
1745  sc = rtems_bdbuf_obtain_disk (dev, block, &dd, &media_block, &bds_per_group);
1746  if (sc != RTEMS_SUCCESSFUL)
1747    return sc;
1748
1749  rtems_bdbuf_lock_cache ();
1750
1751  /*
1752   * Print the block index relative to the physical disk.
1753   */
1754  if (rtems_bdbuf_tracer)
1755    printf ("bdbuf:get: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
1756            media_block, block, (unsigned) dev);
1757
1758  bd = rtems_bdbuf_get_buffer_for_access (dev, media_block, bds_per_group);
1759
1760  switch (bd->state)
1761  {
1762    case RTEMS_BDBUF_STATE_CACHED:
1763    case RTEMS_BDBUF_STATE_FRESH:
1764      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS);
1765      break;
1766    case RTEMS_BDBUF_STATE_MODIFIED:
1767      /*
1768       * To get a modified buffer could be considered a bug in the caller
1769       * because you should not be getting an already modified buffer but user
1770       * may have modified a byte in a block then decided to seek the start and
1771       * write the whole block and the file system will have no record of this
1772       * so just gets the block to fill.
1773       */
1774      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
1775      break;
1776    default:
1777      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_2);
1778      break;
1779  }
1780
1781  if (rtems_bdbuf_tracer)
1782  {
1783    rtems_bdbuf_show_users ("get", bd);
1784    rtems_bdbuf_show_usage ();
1785  }
1786
1787  rtems_bdbuf_unlock_cache ();
1788
1789  rtems_bdbuf_release_disk (dd);
1790
1791  *bd_ptr = bd;
1792
1793  return RTEMS_SUCCESSFUL;
1794}
1795
1796/**
1797 * Call back handler called by the low level driver when the transfer has
1798 * completed. This function may be invoked from interrupt handler.
1799 *
1800 * @param arg Arbitrary argument specified in block device request
1801 *            structure (in this case - pointer to the appropriate
1802 *            block device request structure).
1803 * @param status I/O completion status
1804 * @param error errno error code if status != RTEMS_SUCCESSFUL
1805 */
1806static void
1807rtems_bdbuf_read_done (void* arg, rtems_status_code status, int error)
1808{
1809  rtems_blkdev_request* req = (rtems_blkdev_request*) arg;
1810
1811  req->error = error;
1812  req->status = status;
1813
1814  rtems_event_send (req->io_task, RTEMS_BDBUF_TRANSFER_SYNC);
1815}
1816
1817static void
1818rtems_bdbuf_create_read_request (rtems_blkdev_request *req,
1819                                 rtems_disk_device    *dd,
1820                                 rtems_blkdev_bnum     media_block,
1821                                 size_t                bds_per_group)
1822{
1823  rtems_bdbuf_buffer *bd = NULL;
1824  rtems_blkdev_bnum   media_block_end = dd->start + dd->size;
1825  rtems_blkdev_bnum   media_block_count = dd->block_size / dd->media_block_size;
1826  dev_t               dev = dd->dev;
1827  uint32_t            block_size = dd->block_size;
1828  uint32_t            transfer_index = 1;
1829  uint32_t            transfer_count = bdbuf_config.max_read_ahead_blocks + 1;
1830
1831  if (media_block_end - media_block < transfer_count)
1832    transfer_count = media_block_end - media_block;
1833
1834  req->bufnum = 0;
1835
1836  bd = rtems_bdbuf_get_buffer_for_access (dev, media_block, bds_per_group);
1837
1838  req->bufs [0].user   = bd;
1839  req->bufs [0].block  = media_block;
1840  req->bufs [0].length = block_size;
1841  req->bufs [0].buffer = bd->buffer;
1842
1843  if (rtems_bdbuf_tracer)
1844    rtems_bdbuf_show_users ("read", bd);
1845
1846  switch (bd->state)
1847  {
1848    case RTEMS_BDBUF_STATE_CACHED:
1849    case RTEMS_BDBUF_STATE_MODIFIED:
1850      return;
1851    case RTEMS_BDBUF_STATE_FRESH:
1852      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1853      break;
1854    default:
1855      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_1);
1856      break;
1857  }
1858
1859  while (transfer_index < transfer_count)
1860  {
1861    media_block += media_block_count;
1862
1863    bd = rtems_bdbuf_get_buffer_for_read_ahead (dev, media_block,
1864                                                bds_per_group);
1865
1866    if (bd == NULL)
1867      break;
1868
1869    rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1870
1871    req->bufs [transfer_index].user   = bd;
1872    req->bufs [transfer_index].block  = media_block;
1873    req->bufs [transfer_index].length = block_size;
1874    req->bufs [transfer_index].buffer = bd->buffer;
1875
1876    if (rtems_bdbuf_tracer)
1877      rtems_bdbuf_show_users ("read-ahead", bd);
1878
1879    ++transfer_index;
1880  }
1881
1882  req->bufnum = transfer_index;
1883}
1884
1885static rtems_bdbuf_buffer *
1886rtems_bdbuf_execute_read_request (rtems_blkdev_request *req,
1887                                  rtems_disk_device    *dd)
1888{
1889  if (req->bufnum)
1890  {
1891    /*
1892     * Unlock the cache. We have the buffer for the block and it will be in the
1893     * access or transfer state. We may also have a number of read ahead blocks
1894     * if we need to transfer data. At this point any other threads can gain
1895     * access to the cache and if they are after any of the buffers we have
1896     * they will block and be woken when the buffer is returned to the cache.
1897     *
1898     * If a transfer is needed the I/O operation will occur with pre-emption
1899     * enabled and the cache unlocked. This is a change to the previous version
1900     * of the bdbuf code.
1901     */
1902    int      result = 0;
1903    int      error = 0;
1904    uint32_t transfer_index = 0;
1905    bool     wake_transfer = false;
1906    bool     wake_buffer = false;
1907
1908    rtems_bdbuf_unlock_cache ();
1909
1910    req->req = RTEMS_BLKDEV_REQ_READ;
1911    req->req_done = rtems_bdbuf_read_done;
1912    req->done_arg = req;
1913    req->io_task = rtems_task_self ();
1914    req->status = RTEMS_RESOURCE_IN_USE;
1915    req->error = 0;
1916
1917    result = dd->ioctl (dd->phys_dev, RTEMS_BLKIO_REQUEST, req);
1918
1919    if (result == 0)
1920    {
1921      rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
1922      error = req->error;
1923    }
1924    else
1925      error = errno;
1926
1927    rtems_bdbuf_lock_cache ();
1928
1929    for (transfer_index = 0; transfer_index < req->bufnum; ++transfer_index)
1930    {
1931      rtems_bdbuf_buffer *bd = req->bufs [transfer_index].user;
1932      bool waiters = bd->waiters;
1933
1934      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
1935
1936      if (waiters)
1937        wake_transfer = true;
1938
1939      bd->error = error;
1940
1941      if (rtems_bdbuf_tracer)
1942        rtems_bdbuf_show_users ("read-ahead", bd);
1943
1944      if (transfer_index > 0)
1945      {
1946        /*
1947         * This is a read ahead buffer.
1948         */
1949
1950        rtems_bdbuf_group_release (bd);
1951
1952        if (!waiters)
1953          wake_buffer = true;
1954
1955        if (error == 0 || waiters)
1956          rtems_chain_append (&bdbuf_cache.lru, &bd->link);
1957        else
1958        {
1959          rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY);
1960          rtems_chain_prepend (&bdbuf_cache.lru, &bd->link);
1961          if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1962            rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_RM);
1963        }
1964      }
1965    }
1966
1967    if (wake_transfer)
1968      rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
1969
1970    if (wake_buffer)
1971      rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1972  }
1973
1974  return req->bufs [0].user;
1975}
1976
1977rtems_status_code
1978rtems_bdbuf_read (dev_t                dev,
1979                  rtems_blkdev_bnum    block,
1980                  rtems_bdbuf_buffer **bd_ptr)
1981{
1982  rtems_status_code     sc = RTEMS_SUCCESSFUL;
1983  rtems_disk_device    *dd = NULL;
1984  rtems_bdbuf_buffer   *bd = NULL;
1985  rtems_blkdev_request *req = NULL;
1986  rtems_blkdev_bnum     media_block = 0;
1987  size_t                bds_per_group = 0;
1988
1989  sc = rtems_bdbuf_obtain_disk (dev, block, &dd, &media_block, &bds_per_group);
1990  if (sc != RTEMS_SUCCESSFUL)
1991    return sc;
1992
1993  /*
1994   * TODO: This type of request structure is wrong and should be removed.
1995   */
1996#define bdbuf_alloc(size) __builtin_alloca (size)
1997
1998  req = bdbuf_alloc (sizeof (rtems_blkdev_request) +
1999                     sizeof ( rtems_blkdev_sg_buffer) *
2000                      (bdbuf_config.max_read_ahead_blocks + 1));
2001
2002  if (rtems_bdbuf_tracer)
2003    printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
2004            media_block + dd->start, block, (unsigned) dev);
2005
2006  rtems_bdbuf_lock_cache ();
2007  rtems_bdbuf_create_read_request (req, dd, media_block, bds_per_group);
2008
2009  bd = rtems_bdbuf_execute_read_request (req, dd);
2010
2011  switch (bd->state)
2012  {
2013    case RTEMS_BDBUF_STATE_CACHED:
2014      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS);
2015      break;
2016    case RTEMS_BDBUF_STATE_MODIFIED:
2017      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
2018      break;
2019    default:
2020      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_4);
2021      break;
2022  }
2023
2024  if (rtems_bdbuf_tracer)
2025  {
2026    rtems_bdbuf_show_users ("read", bd);
2027    rtems_bdbuf_show_usage ();
2028  }
2029
2030  rtems_bdbuf_unlock_cache ();
2031  rtems_bdbuf_release_disk (dd);
2032
2033  *bd_ptr = bd;
2034
2035  return RTEMS_SUCCESSFUL;
2036}
2037
2038static rtems_status_code
2039rtems_bdbuf_check_bd_and_lock_cache (rtems_bdbuf_buffer *bd, const char *kind)
2040{
2041  if (!bdbuf_cache.initialised)
2042    return RTEMS_NOT_CONFIGURED;
2043  if (bd == NULL)
2044    return RTEMS_INVALID_ADDRESS;
2045  if (rtems_bdbuf_tracer)
2046  {
2047    printf ("bdbuf:%s: %" PRIu32 "\n", kind, bd->block);
2048    rtems_bdbuf_show_users (kind, bd);
2049  }
2050  rtems_bdbuf_lock_cache();
2051
2052  return RTEMS_SUCCESSFUL;
2053}
2054
2055rtems_status_code
2056rtems_bdbuf_release (rtems_bdbuf_buffer *bd)
2057{
2058  rtems_status_code sc = RTEMS_SUCCESSFUL;
2059
2060  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release");
2061  if (sc != RTEMS_SUCCESSFUL)
2062    return sc;
2063
2064  switch (bd->state)
2065  {
2066    case RTEMS_BDBUF_STATE_ACCESS:
2067      rtems_bdbuf_add_to_lru_list_after_access (bd);
2068      break;
2069    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2070      rtems_bdbuf_add_to_modified_list_after_access (bd);
2071      break;
2072    default:
2073      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_0);
2074      break;
2075  }
2076
2077  if (rtems_bdbuf_tracer)
2078    rtems_bdbuf_show_usage ();
2079
2080  rtems_bdbuf_unlock_cache ();
2081
2082  return RTEMS_SUCCESSFUL;
2083}
2084
2085rtems_status_code
2086rtems_bdbuf_release_modified (rtems_bdbuf_buffer *bd)
2087{
2088  rtems_status_code sc = RTEMS_SUCCESSFUL;
2089
2090  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release modified");
2091  if (sc != RTEMS_SUCCESSFUL)
2092    return sc;
2093
2094  switch (bd->state)
2095  {
2096    case RTEMS_BDBUF_STATE_ACCESS:
2097    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2098      rtems_bdbuf_add_to_modified_list_after_access (bd);
2099      break;
2100    default:
2101      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_6);
2102      break;
2103  }
2104
2105  if (rtems_bdbuf_tracer)
2106    rtems_bdbuf_show_usage ();
2107
2108  rtems_bdbuf_unlock_cache ();
2109
2110  return RTEMS_SUCCESSFUL;
2111}
2112
2113rtems_status_code
2114rtems_bdbuf_sync (rtems_bdbuf_buffer *bd)
2115{
2116  rtems_status_code sc = RTEMS_SUCCESSFUL;
2117
2118  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "sync");
2119  if (sc != RTEMS_SUCCESSFUL)
2120    return sc;
2121
2122  switch (bd->state)
2123  {
2124    case RTEMS_BDBUF_STATE_ACCESS:
2125    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2126      rtems_bdbuf_add_to_sync_list_after_access (bd);
2127      break;
2128    default:
2129      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_5);
2130      break;
2131  }
2132
2133  if (rtems_bdbuf_tracer)
2134    rtems_bdbuf_show_usage ();
2135
2136  rtems_bdbuf_wake_swapper ();
2137  rtems_bdbuf_wait_for_sync_done (bd);
2138
2139  /*
2140   * If no one intercepts the sync, we created a cached buffer which may be
2141   * recycled.
2142   */
2143  if (bd->state == RTEMS_BDBUF_STATE_CACHED && bd->waiters == 0)
2144    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2145
2146  rtems_bdbuf_unlock_cache ();
2147
2148  return RTEMS_SUCCESSFUL;
2149}
2150
2151rtems_status_code
2152rtems_bdbuf_syncdev (dev_t dev)
2153{
2154  rtems_status_code  sc = RTEMS_SUCCESSFUL;
2155  rtems_disk_device *dd = NULL;
2156
2157  if (rtems_bdbuf_tracer)
2158    printf ("bdbuf:syncdev: %08x\n", (unsigned) dev);
2159
2160  sc = rtems_bdbuf_obtain_disk (dev, 0, &dd, NULL, NULL);
2161  if (sc != RTEMS_SUCCESSFUL)
2162    return sc;
2163
2164  /*
2165   * Take the sync lock before locking the cache. Once we have the sync lock we
2166   * can lock the cache. If another thread has the sync lock it will cause this
2167   * thread to block until it owns the sync lock then it can own the cache. The
2168   * sync lock can only be obtained with the cache unlocked.
2169   */
2170  rtems_bdbuf_lock_sync ();
2171  rtems_bdbuf_lock_cache ();
2172
2173  /*
2174   * Set the cache to have a sync active for a specific device and let the swap
2175   * out task know the id of the requester to wake when done.
2176   *
2177   * The swap out task will negate the sync active flag when no more buffers
2178   * for the device are held on the "modified for sync" queues.
2179   */
2180  bdbuf_cache.sync_active    = true;
2181  bdbuf_cache.sync_requester = rtems_task_self ();
2182  bdbuf_cache.sync_device    = dev;
2183
2184  rtems_bdbuf_wake_swapper ();
2185  rtems_bdbuf_unlock_cache ();
2186  rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
2187  rtems_bdbuf_unlock_sync ();
2188  rtems_bdbuf_release_disk (dd);
2189
2190  return RTEMS_SUCCESSFUL;
2191}
2192
2193/**
2194 * Call back handler called by the low level driver when the transfer has
2195 * completed. This function may be invoked from interrupt handlers.
2196 *
2197 * @param arg Arbitrary argument specified in block device request
2198 *            structure (in this case - pointer to the appropriate
2199 *            block device request structure).
2200 * @param status I/O completion status
2201 * @param error errno error code if status != RTEMS_SUCCESSFUL
2202 */
2203static void
2204rtems_bdbuf_write_done(void *arg, rtems_status_code status, int error)
2205{
2206  rtems_blkdev_request* req = (rtems_blkdev_request*) arg;
2207
2208  req->error = error;
2209  req->status = status;
2210
2211  rtems_event_send (req->io_task, RTEMS_BDBUF_TRANSFER_SYNC);
2212}
2213
2214/**
2215 * Swapout transfer to the driver. The driver will break this I/O into groups
2216 * of consecutive write requests is multiple consecutive buffers are required
2217 * by the driver.
2218 *
2219 * @param transfer The transfer transaction.
2220 */
2221static void
2222rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer)
2223{
2224  rtems_disk_device* dd;
2225
2226  if (rtems_bdbuf_tracer)
2227    printf ("bdbuf:swapout transfer: %08x\n", (unsigned) transfer->dev);
2228
2229  /*
2230   * If there are buffers to transfer to the media transfer them.
2231   */
2232  if (!rtems_chain_is_empty (&transfer->bds))
2233  {
2234    /*
2235     * Obtain the disk device. The cache's mutex has been released to avoid a
2236     * dead lock.
2237     */
2238    dd = rtems_disk_obtain (transfer->dev);
2239    if (dd)
2240    {
2241      /*
2242       * The last block number used when the driver only supports
2243       * continuous blocks in a single request.
2244       */
2245      uint32_t last_block = 0;
2246
2247      /*
2248       * Number of buffers per bd. This is used to detect the next
2249       * block.
2250       */
2251      uint32_t bufs_per_bd = dd->block_size / bdbuf_config.buffer_min;
2252
2253      /*
2254       * Take as many buffers as configured and pass to the driver. Note, the
2255       * API to the drivers has an array of buffers and if a chain was passed
2256       * we could have just passed the list. If the driver API is updated it
2257       * should be possible to make this change with little effect in this
2258       * code. The array that is passed is broken in design and should be
2259       * removed. Merging members of a struct into the first member is
2260       * trouble waiting to happen.
2261       */
2262      transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2263      transfer->write_req->error = 0;
2264      transfer->write_req->bufnum = 0;
2265
2266      while (!rtems_chain_is_empty (&transfer->bds))
2267      {
2268        rtems_bdbuf_buffer* bd =
2269          (rtems_bdbuf_buffer*) rtems_chain_get (&transfer->bds);
2270
2271        bool write = false;
2272
2273        /*
2274         * If the device only accepts sequential buffers and this is not the
2275         * first buffer (the first is always sequential, and the buffer is not
2276         * sequential then put the buffer back on the transfer chain and write
2277         * the committed buffers.
2278         */
2279
2280        if (rtems_bdbuf_tracer)
2281          printf ("bdbuf:swapout write: bd:%" PRIu32 ", bufnum:%" PRIu32 " mode:%s\n",
2282                  bd->block, transfer->write_req->bufnum,
2283                  dd->phys_dev->capabilities &
2284                  RTEMS_BLKDEV_CAP_MULTISECTOR_CONT ? "MULIT" : "SCAT");
2285
2286        if ((dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) &&
2287            transfer->write_req->bufnum &&
2288            (bd->block != (last_block + bufs_per_bd)))
2289        {
2290          rtems_chain_prepend (&transfer->bds, &bd->link);
2291          write = true;
2292        }
2293        else
2294        {
2295          rtems_blkdev_sg_buffer* buf;
2296          buf = &transfer->write_req->bufs[transfer->write_req->bufnum];
2297          transfer->write_req->bufnum++;
2298          buf->user   = bd;
2299          buf->block  = bd->block;
2300          buf->length = dd->block_size;
2301          buf->buffer = bd->buffer;
2302          last_block  = bd->block;
2303        }
2304
2305        /*
2306         * Perform the transfer if there are no more buffers, or the transfer
2307         * size has reached the configured max. value.
2308         */
2309
2310        if (rtems_chain_is_empty (&transfer->bds) ||
2311            (transfer->write_req->bufnum >= bdbuf_config.max_write_blocks))
2312          write = true;
2313
2314        if (write)
2315        {
2316          int result;
2317          uint32_t b;
2318
2319          if (rtems_bdbuf_tracer)
2320            printf ("bdbuf:swapout write: writing bufnum:%" PRIu32 "\n",
2321                    transfer->write_req->bufnum);
2322
2323          /*
2324           * Perform the transfer. No cache locks, no preemption, only the disk
2325           * device is being held.
2326           */
2327          result = dd->ioctl (dd->phys_dev, RTEMS_BLKIO_REQUEST,
2328                              transfer->write_req);
2329          if (result < 0)
2330          {
2331            rtems_bdbuf_lock_cache ();
2332
2333            for (b = 0; b < transfer->write_req->bufnum; b++)
2334            {
2335              bd = transfer->write_req->bufs[b].user;
2336              rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_MODIFIED);
2337              bd->error = errno;
2338
2339              /*
2340               * Place back on the cache's modified queue and try again.
2341               *
2342               * @warning Not sure this is the best option but I do not know
2343               *          what else can be done.
2344               */
2345              rtems_chain_append (&bdbuf_cache.modified, &bd->link);
2346            }
2347          }
2348          else
2349          {
2350            rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
2351
2352            rtems_bdbuf_lock_cache ();
2353
2354            for (b = 0; b < transfer->write_req->bufnum; b++)
2355            {
2356              bd = transfer->write_req->bufs[b].user;
2357              rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
2358              bd->error = 0;
2359
2360              rtems_bdbuf_group_release (bd);
2361
2362              if (rtems_bdbuf_tracer)
2363                rtems_bdbuf_show_users ("write", bd);
2364
2365              rtems_chain_append (&bdbuf_cache.lru, &bd->link);
2366
2367              if (bd->waiters)
2368                rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2369              else
2370                rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2371            }
2372          }
2373
2374          if (rtems_bdbuf_tracer)
2375            rtems_bdbuf_show_usage ();
2376
2377          rtems_bdbuf_unlock_cache ();
2378
2379          transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2380          transfer->write_req->error = 0;
2381          transfer->write_req->bufnum = 0;
2382        }
2383      }
2384
2385      rtems_disk_release (dd);
2386    }
2387    else
2388    {
2389      /*
2390       * We have buffers but no device. Put the BDs back onto the
2391       * ready queue and exit.
2392       */
2393      /* @todo fixme */
2394    }
2395  }
2396}
2397
2398/**
2399 * Process the modified list of buffers. There is a sync or modified list that
2400 * needs to be handled so we have a common function to do the work.
2401 *
2402 * @param dev The device to handle. If BDBUF_INVALID_DEV no device is selected
2403 * so select the device of the first buffer to be written to disk.
2404 * @param chain The modified chain to process.
2405 * @param transfer The chain to append buffers to be written too.
2406 * @param sync_active If true this is a sync operation so expire all timers.
2407 * @param update_timers If true update the timers.
2408 * @param timer_delta It update_timers is true update the timers by this
2409 *                    amount.
2410 */
2411static void
2412rtems_bdbuf_swapout_modified_processing (dev_t*               dev,
2413                                         rtems_chain_control* chain,
2414                                         rtems_chain_control* transfer,
2415                                         bool                 sync_active,
2416                                         bool                 update_timers,
2417                                         uint32_t             timer_delta)
2418{
2419  if (!rtems_chain_is_empty (chain))
2420  {
2421    rtems_chain_node* node = rtems_chain_head (chain);
2422    node = node->next;
2423
2424    while (!rtems_chain_is_tail (chain, node))
2425    {
2426      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2427
2428      /*
2429       * Check if the buffer's hold timer has reached 0. If a sync is active
2430       * or someone waits for a buffer force all the timers to 0.
2431       *
2432       * @note Lots of sync requests will skew this timer. It should be based
2433       *       on TOD to be accurate. Does it matter ?
2434       */
2435      if (sync_active || rtems_bdbuf_has_buffer_waiters ())
2436        bd->hold_timer = 0;
2437
2438      if (bd->hold_timer)
2439      {
2440        if (update_timers)
2441        {
2442          if (bd->hold_timer > timer_delta)
2443            bd->hold_timer -= timer_delta;
2444          else
2445            bd->hold_timer = 0;
2446        }
2447
2448        if (bd->hold_timer)
2449        {
2450          node = node->next;
2451          continue;
2452        }
2453      }
2454
2455      /*
2456       * This assumes we can set dev_t to BDBUF_INVALID_DEV which is just an
2457       * assumption. Cannot use the transfer list being empty the sync dev
2458       * calls sets the dev to use.
2459       */
2460      if (*dev == BDBUF_INVALID_DEV)
2461        *dev = bd->dev;
2462
2463      if (bd->dev == *dev)
2464      {
2465        rtems_chain_node* next_node = node->next;
2466        rtems_chain_node* tnode = rtems_chain_tail (transfer);
2467
2468        /*
2469         * The blocks on the transfer list are sorted in block order. This
2470         * means multi-block transfers for drivers that require consecutive
2471         * blocks perform better with sorted blocks and for real disks it may
2472         * help lower head movement.
2473         */
2474
2475        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2476
2477        rtems_chain_extract (node);
2478
2479        tnode = tnode->previous;
2480
2481        while (node && !rtems_chain_is_head (transfer, tnode))
2482        {
2483          rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
2484
2485          if (bd->block > tbd->block)
2486          {
2487            rtems_chain_insert (tnode, node);
2488            node = NULL;
2489          }
2490          else
2491            tnode = tnode->previous;
2492        }
2493
2494        if (node)
2495          rtems_chain_prepend (transfer, node);
2496
2497        node = next_node;
2498      }
2499      else
2500      {
2501        node = node->next;
2502      }
2503    }
2504  }
2505}
2506
2507/**
2508 * Process the cache's modified buffers. Check the sync list first then the
2509 * modified list extracting the buffers suitable to be written to disk. We have
2510 * a device at a time. The task level loop will repeat this operation while
2511 * there are buffers to be written. If the transfer fails place the buffers
2512 * back on the modified list and try again later. The cache is unlocked while
2513 * the buffers are being written to disk.
2514 *
2515 * @param timer_delta It update_timers is true update the timers by this
2516 *                    amount.
2517 * @param update_timers If true update the timers.
2518 * @param transfer The transfer transaction data.
2519 *
2520 * @retval true Buffers where written to disk so scan again.
2521 * @retval false No buffers where written to disk.
2522 */
2523static bool
2524rtems_bdbuf_swapout_processing (unsigned long                 timer_delta,
2525                                bool                          update_timers,
2526                                rtems_bdbuf_swapout_transfer* transfer)
2527{
2528  rtems_bdbuf_swapout_worker* worker;
2529  bool                        transfered_buffers = false;
2530
2531  rtems_bdbuf_lock_cache ();
2532
2533  /*
2534   * If a sync is active do not use a worker because the current code does not
2535   * cleaning up after. We need to know the buffers have been written when
2536   * syncing to the release sync lock and currently worker threads do not
2537   * return to here. We do not know the worker is the last in a sequence of
2538   * sync writes until after we have it running so we do not know to tell it to
2539   * release the lock. The simplest solution is to get the main swap out task
2540   * perform all sync operations.
2541   */
2542  if (bdbuf_cache.sync_active)
2543    worker = NULL;
2544  else
2545  {
2546    worker = (rtems_bdbuf_swapout_worker*)
2547      rtems_chain_get (&bdbuf_cache.swapout_workers);
2548    if (worker)
2549      transfer = &worker->transfer;
2550  }
2551
2552  rtems_chain_initialize_empty (&transfer->bds);
2553  transfer->dev = BDBUF_INVALID_DEV;
2554
2555  /*
2556   * When the sync is for a device limit the sync to that device. If the sync
2557   * is for a buffer handle process the devices in the order on the sync
2558   * list. This means the dev is BDBUF_INVALID_DEV.
2559   */
2560  if (bdbuf_cache.sync_active)
2561    transfer->dev = bdbuf_cache.sync_device;
2562
2563  /*
2564   * If we have any buffers in the sync queue move them to the modified
2565   * list. The first sync buffer will select the device we use.
2566   */
2567  rtems_bdbuf_swapout_modified_processing (&transfer->dev,
2568                                           &bdbuf_cache.sync,
2569                                           &transfer->bds,
2570                                           true, false,
2571                                           timer_delta);
2572
2573  /*
2574   * Process the cache's modified list.
2575   */
2576  rtems_bdbuf_swapout_modified_processing (&transfer->dev,
2577                                           &bdbuf_cache.modified,
2578                                           &transfer->bds,
2579                                           bdbuf_cache.sync_active,
2580                                           update_timers,
2581                                           timer_delta);
2582
2583  /*
2584   * We have all the buffers that have been modified for this device so the
2585   * cache can be unlocked because the state of each buffer has been set to
2586   * TRANSFER.
2587   */
2588  rtems_bdbuf_unlock_cache ();
2589
2590  /*
2591   * If there are buffers to transfer to the media transfer them.
2592   */
2593  if (!rtems_chain_is_empty (&transfer->bds))
2594  {
2595    if (worker)
2596    {
2597      rtems_status_code sc = rtems_event_send (worker->id,
2598                                               RTEMS_BDBUF_SWAPOUT_SYNC);
2599      if (sc != RTEMS_SUCCESSFUL)
2600        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
2601    }
2602    else
2603    {
2604      rtems_bdbuf_swapout_write (transfer);
2605    }
2606
2607    transfered_buffers = true;
2608  }
2609
2610  if (bdbuf_cache.sync_active && !transfered_buffers)
2611  {
2612    rtems_id sync_requester;
2613    rtems_bdbuf_lock_cache ();
2614    sync_requester = bdbuf_cache.sync_requester;
2615    bdbuf_cache.sync_active = false;
2616    bdbuf_cache.sync_requester = 0;
2617    rtems_bdbuf_unlock_cache ();
2618    if (sync_requester)
2619      rtems_event_send (sync_requester, RTEMS_BDBUF_TRANSFER_SYNC);
2620  }
2621
2622  return transfered_buffers;
2623}
2624
2625/**
2626 * Allocate the write request and initialise it for good measure.
2627 *
2628 * @return rtems_blkdev_request* The write reference memory.
2629 */
2630static rtems_blkdev_request*
2631rtems_bdbuf_swapout_writereq_alloc (void)
2632{
2633  /*
2634   * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
2635   * I am disappointment at finding code like this in RTEMS. The request should
2636   * have been a rtems_chain_control. Simple, fast and less storage as the node
2637   * is already part of the buffer structure.
2638   */
2639  rtems_blkdev_request* write_req =
2640    malloc (sizeof (rtems_blkdev_request) +
2641            (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer)));
2642
2643  if (!write_req)
2644    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2645
2646  write_req->req = RTEMS_BLKDEV_REQ_WRITE;
2647  write_req->req_done = rtems_bdbuf_write_done;
2648  write_req->done_arg = write_req;
2649  write_req->io_task = rtems_task_self ();
2650
2651  return write_req;
2652}
2653
2654/**
2655 * The swapout worker thread body.
2656 *
2657 * @param arg A pointer to the worker thread's private data.
2658 * @return rtems_task Not used.
2659 */
2660static rtems_task
2661rtems_bdbuf_swapout_worker_task (rtems_task_argument arg)
2662{
2663  rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg;
2664
2665  while (worker->enabled)
2666  {
2667    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_SWAPOUT_SYNC);
2668
2669    rtems_bdbuf_swapout_write (&worker->transfer);
2670
2671    rtems_bdbuf_lock_cache ();
2672
2673    rtems_chain_initialize_empty (&worker->transfer.bds);
2674    worker->transfer.dev = BDBUF_INVALID_DEV;
2675
2676    rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link);
2677
2678    rtems_bdbuf_unlock_cache ();
2679  }
2680
2681  free (worker->transfer.write_req);
2682  free (worker);
2683
2684  rtems_task_delete (RTEMS_SELF);
2685}
2686
2687/**
2688 * Open the swapout worker threads.
2689 */
2690static void
2691rtems_bdbuf_swapout_workers_open (void)
2692{
2693  rtems_status_code sc;
2694  size_t            w;
2695
2696  rtems_bdbuf_lock_cache ();
2697
2698  for (w = 0; w < bdbuf_config.swapout_workers; w++)
2699  {
2700    rtems_bdbuf_swapout_worker* worker;
2701
2702    worker = malloc (sizeof (rtems_bdbuf_swapout_worker));
2703    if (!worker)
2704      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2705
2706    rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link);
2707    worker->enabled = true;
2708    worker->transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2709
2710    rtems_chain_initialize_empty (&worker->transfer.bds);
2711    worker->transfer.dev = BDBUF_INVALID_DEV;
2712
2713    sc = rtems_task_create (rtems_build_name('B', 'D', 'o', 'a' + w),
2714                            (bdbuf_config.swapout_priority ?
2715                             bdbuf_config.swapout_priority :
2716                             RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT),
2717                            SWAPOUT_TASK_STACK_SIZE,
2718                            RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
2719                            RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
2720                            &worker->id);
2721    if (sc != RTEMS_SUCCESSFUL)
2722      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE);
2723
2724    sc = rtems_task_start (worker->id,
2725                           rtems_bdbuf_swapout_worker_task,
2726                           (rtems_task_argument) worker);
2727    if (sc != RTEMS_SUCCESSFUL)
2728      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START);
2729  }
2730
2731  rtems_bdbuf_unlock_cache ();
2732}
2733
2734/**
2735 * Close the swapout worker threads.
2736 */
2737static void
2738rtems_bdbuf_swapout_workers_close (void)
2739{
2740  rtems_chain_node* node;
2741
2742  rtems_bdbuf_lock_cache ();
2743
2744  node = rtems_chain_first (&bdbuf_cache.swapout_workers);
2745  while (!rtems_chain_is_tail (&bdbuf_cache.swapout_workers, node))
2746  {
2747    rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node;
2748    worker->enabled = false;
2749    rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC);
2750    node = rtems_chain_next (node);
2751  }
2752
2753  rtems_bdbuf_unlock_cache ();
2754}
2755
2756/**
2757 * Body of task which takes care on flushing modified buffers to the disk.
2758 *
2759 * @param arg A pointer to the global cache data. Use the global variable and
2760 *            not this.
2761 * @return rtems_task Not used.
2762 */
2763static rtems_task
2764rtems_bdbuf_swapout_task (rtems_task_argument arg)
2765{
2766  rtems_bdbuf_swapout_transfer transfer;
2767  uint32_t                     period_in_ticks;
2768  const uint32_t               period_in_msecs = bdbuf_config.swapout_period;;
2769  uint32_t                     timer_delta;
2770
2771  transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2772  rtems_chain_initialize_empty (&transfer.bds);
2773  transfer.dev = BDBUF_INVALID_DEV;
2774
2775  /*
2776   * Localise the period.
2777   */
2778  period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
2779
2780  /*
2781   * This is temporary. Needs to be changed to use the real time clock.
2782   */
2783  timer_delta = period_in_msecs;
2784
2785  /*
2786   * Create the worker threads.
2787   */
2788  rtems_bdbuf_swapout_workers_open ();
2789
2790  while (bdbuf_cache.swapout_enabled)
2791  {
2792    rtems_event_set   out;
2793    rtems_status_code sc;
2794
2795    /*
2796     * Only update the timers once in the processing cycle.
2797     */
2798    bool update_timers = true;
2799
2800    /*
2801     * If we write buffers to any disk perform a check again. We only write a
2802     * single device at a time and the cache may have more than one device's
2803     * buffers modified waiting to be written.
2804     */
2805    bool transfered_buffers;
2806
2807    do
2808    {
2809      transfered_buffers = false;
2810
2811      /*
2812       * Extact all the buffers we find for a specific device. The device is
2813       * the first one we find on a modified list. Process the sync queue of
2814       * buffers first.
2815       */
2816      if (rtems_bdbuf_swapout_processing (timer_delta,
2817                                          update_timers,
2818                                          &transfer))
2819      {
2820        transfered_buffers = true;
2821      }
2822
2823      /*
2824       * Only update the timers once.
2825       */
2826      update_timers = false;
2827    }
2828    while (transfered_buffers);
2829
2830    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2831                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2832                              period_in_ticks,
2833                              &out);
2834
2835    if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
2836      rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2837  }
2838
2839  rtems_bdbuf_swapout_workers_close ();
2840
2841  free (transfer.write_req);
2842
2843  rtems_task_delete (RTEMS_SELF);
2844}
Note: See TracBrowser for help on using the repository browser.