source: rtems/cpukit/libblock/src/bdbuf.c @ 253c3a1d

4.104.11
Last change on this file since 253c3a1d was 253c3a1d, checked in by Ralf Corsepius <ralf.corsepius@…>, on Oct 13, 2009 at 3:29:45 PM

2009-10-13 Ralf Corsépius <ralf.corsepius@…>

  • libblock/src/bdbuf.c: Fix broken printf formats. Use size_t for sizes.
  • Property mode set to 100644
File size: 85.0 KB
Line 
1/**
2 * @file
3 *
4 * @ingroup rtems_bdbuf
5 *
6 * Block device buffer management.
7 */
8
9/*
10 * Disk I/O buffering
11 * Buffer managment
12 *
13 * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
14 * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
15 *         Victor V. Vengerov <vvv@oktet.ru>
16 *         Alexander Kukuta <kam@oktet.ru>
17 *
18 * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org>
19 *    Rewritten to remove score mutex access. Fixes many performance
20 *    issues.
21 *
22 * @(#) bdbuf.c,v 1.14 2004/04/17 08:15:17 ralf Exp
23 */
24
25/**
26 * Set to 1 to enable debug tracing.
27 */
28#define RTEMS_BDBUF_TRACE 0
29
30#if HAVE_CONFIG_H
31#include "config.h"
32#endif
33
34#include <inttypes.h>
35
36#include <rtems.h>
37#include <rtems/error.h>
38#include <rtems/malloc.h>
39#include <limits.h>
40#include <errno.h>
41#include <assert.h>
42#include <stdio.h>
43
44#include "rtems/bdbuf.h"
45
46/*
47 * Simpler label for this file.
48 */
49#define bdbuf_config rtems_bdbuf_configuration
50
51/**
52 * A swapout transfer transaction data. This data is passed to a worked thread
53 * to handle the write phase of the transfer.
54 */
55typedef struct rtems_bdbuf_swapout_transfer
56{
57  rtems_chain_control   bds;         /**< The transfer list of BDs. */
58  dev_t                 dev;         /**< The device the transfer is for. */
59  rtems_blkdev_request* write_req;   /**< The write request array. */
60  uint32_t              bufs_per_bd; /**< Number of buffers per bd. */
61} rtems_bdbuf_swapout_transfer;
62
63/**
64 * Swapout worker thread. These are available to take processing from the
65 * main swapout thread and handle the I/O operation.
66 */
67typedef struct rtems_bdbuf_swapout_worker
68{
69  rtems_chain_node             link;     /**< The threads sit on a chain when
70                                          * idle. */
71  rtems_id                     id;       /**< The id of the task so we can wake
72                                          * it. */
73  volatile bool                enabled;  /**< The worked is enabled. */
74  rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this
75                                          * thread. */
76} rtems_bdbuf_swapout_worker;
77
78/**
79 * The BD buffer cache.
80 */
81typedef struct rtems_bdbuf_cache
82{
83  rtems_id            swapout;           /**< Swapout task ID */
84  volatile bool       swapout_enabled;   /**< Swapout is only running if
85                                          * enabled. Set to false to kill the
86                                          * swap out task. It deletes itself. */
87  rtems_chain_control swapout_workers;   /**< The work threads for the swapout
88                                          * task. */
89 
90  rtems_bdbuf_buffer* bds;               /**< Pointer to table of buffer
91                                          * descriptors. */
92  void*               buffers;           /**< The buffer's memory. */
93  size_t              buffer_min_count;  /**< Number of minimum size buffers
94                                          * that fit the buffer memory. */
95  size_t              max_bds_per_group; /**< The number of BDs of minimum
96                                          * buffer size that fit in a group. */
97  uint32_t            flags;             /**< Configuration flags. */
98
99  rtems_id            lock;              /**< The cache lock. It locks all
100                                          * cache data, BD and lists. */
101  rtems_id            sync_lock;         /**< Sync calls block writes. */
102  volatile bool       sync_active;       /**< True if a sync is active. */
103  volatile rtems_id   sync_requester;    /**< The sync requester. */
104  volatile dev_t      sync_device;       /**< The device to sync and -1 not a
105                                          * device sync. */
106
107  rtems_bdbuf_buffer* tree;              /**< Buffer descriptor lookup AVL tree
108                                          * root. There is only one. */
109  rtems_chain_control ready;             /**< Free buffers list, read-ahead, or
110                                          * resized group buffers. */
111  rtems_chain_control lru;               /**< Least recently used list */
112  rtems_chain_control modified;          /**< Modified buffers list */
113  rtems_chain_control sync;              /**< Buffers to sync list */
114
115  rtems_id            access;            /**< Obtain if waiting for a buffer in
116                                          * the ACCESS state. */
117  volatile uint32_t   access_waiters;    /**< Count of access blockers. */
118  rtems_id            transfer;          /**< Obtain if waiting for a buffer in
119                                          * the TRANSFER state. */
120  volatile uint32_t   transfer_waiters;  /**< Count of transfer blockers. */
121  rtems_id            waiting;           /**< Obtain if waiting for a buffer
122                                          * and the none are available. */
123  volatile uint32_t   wait_waiters;      /**< Count of waiting blockers. */
124
125  size_t              group_count;       /**< The number of groups. */
126  rtems_bdbuf_group*  groups;            /**< The groups. */
127 
128  bool                initialised;       /**< Initialised state. */
129} rtems_bdbuf_cache;
130
131/**
132 * Fatal errors
133 */
134#define RTEMS_BLKDEV_FATAL_ERROR(n) \
135  (((uint32_t)'B' << 24) | ((uint32_t)(n) & (uint32_t)0x00FFFFFF))
136
137#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_1 RTEMS_BLKDEV_FATAL_ERROR(1)
138#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_2 RTEMS_BLKDEV_FATAL_ERROR(2)
139#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_3 RTEMS_BLKDEV_FATAL_ERROR(3)
140#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_4 RTEMS_BLKDEV_FATAL_ERROR(4)
141#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_5 RTEMS_BLKDEV_FATAL_ERROR(5)
142#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_6 RTEMS_BLKDEV_FATAL_ERROR(6)
143#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_7 RTEMS_BLKDEV_FATAL_ERROR(7)
144#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_8 RTEMS_BLKDEV_FATAL_ERROR(8)
145#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_9 RTEMS_BLKDEV_FATAL_ERROR(9)
146#define RTEMS_BLKDEV_FATAL_BDBUF_SWAPOUT       RTEMS_BLKDEV_FATAL_ERROR(10)
147#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK     RTEMS_BLKDEV_FATAL_ERROR(11)
148#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK   RTEMS_BLKDEV_FATAL_ERROR(12)
149#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK    RTEMS_BLKDEV_FATAL_ERROR(13)
150#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK  RTEMS_BLKDEV_FATAL_ERROR(14)
151#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_1  RTEMS_BLKDEV_FATAL_ERROR(15)
152#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2  RTEMS_BLKDEV_FATAL_ERROR(16)
153#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_3  RTEMS_BLKDEV_FATAL_ERROR(17)
154#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO RTEMS_BLKDEV_FATAL_ERROR(18)
155#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE    RTEMS_BLKDEV_FATAL_ERROR(19)
156#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE       RTEMS_BLKDEV_FATAL_ERROR(20)
157#define RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM      RTEMS_BLKDEV_FATAL_ERROR(21)
158#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE  RTEMS_BLKDEV_FATAL_ERROR(22)
159#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START   RTEMS_BLKDEV_FATAL_ERROR(23)
160#define BLKDEV_FATAL_BDBUF_SWAPOUT_RE          RTEMS_BLKDEV_FATAL_ERROR(24)
161#define BLKDEV_FATAL_BDBUF_SWAPOUT_TS          RTEMS_BLKDEV_FATAL_ERROR(25)
162
163/**
164 * The events used in this code. These should be system events rather than
165 * application events.
166 */
167#define RTEMS_BDBUF_TRANSFER_SYNC  RTEMS_EVENT_1
168#define RTEMS_BDBUF_SWAPOUT_SYNC   RTEMS_EVENT_2
169
170/**
171 * The swap out task size. Should be more than enough for most drivers with
172 * tracing turned on.
173 */
174#define SWAPOUT_TASK_STACK_SIZE (8 * 1024)
175
176/**
177 * Lock semaphore attributes. This is used for locking type mutexes.
178 *
179 * @warning Priority inheritance is on.
180 */
181#define RTEMS_BDBUF_CACHE_LOCK_ATTRIBS \
182  (RTEMS_PRIORITY | RTEMS_BINARY_SEMAPHORE | \
183   RTEMS_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
184
185/**
186 * Waiter semaphore attributes.
187 *
188 * @warning Do not configure as inherit priority. If a driver is in the driver
189 *          initialisation table this locked semaphore will have the IDLE task
190 *          as the holder and a blocking task will raise the priority of the
191 *          IDLE task which can cause unsual side effects.
192 */
193#define RTEMS_BDBUF_CACHE_WAITER_ATTRIBS \
194  (RTEMS_PRIORITY | RTEMS_SIMPLE_BINARY_SEMAPHORE | \
195   RTEMS_NO_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
196
197/**
198 * Waiter timeout. Set to non-zero to find some info on a waiter that is
199 * waiting too long.
200 */
201#define RTEMS_BDBUF_WAIT_TIMEOUT RTEMS_NO_TIMEOUT
202#if !defined (RTEMS_BDBUF_WAIT_TIMEOUT)
203#define RTEMS_BDBUF_WAIT_TIMEOUT \
204  (TOD_MICROSECONDS_TO_TICKS (20000000))
205#endif
206
207/*
208 * The swap out task.
209 */
210static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
211
212/**
213 * The Buffer Descriptor cache.
214 */
215static rtems_bdbuf_cache bdbuf_cache;
216
217#if RTEMS_BDBUF_TRACE
218/**
219 * If true output the trace message.
220 */
221bool rtems_bdbuf_tracer;
222
223/**
224 * Return the number of items on the list.
225 *
226 * @param list The chain control.
227 * @return uint32_t The number of items on the list.
228 */
229uint32_t
230rtems_bdbuf_list_count (rtems_chain_control* list)
231{
232  rtems_chain_node* node = rtems_chain_first (list);
233  uint32_t          count = 0;
234  while (!rtems_chain_is_tail (list, node))
235  {
236    count++;
237    node = rtems_chain_next (node);
238  }
239  return count;
240}
241
242/**
243 * Show the usage for the bdbuf cache.
244 */
245void
246rtems_bdbuf_show_usage (void)
247{
248  uint32_t group;
249  uint32_t total = 0;
250  uint32_t val;
251  for (group = 0; group < bdbuf_cache.group_count; group++)
252    total += bdbuf_cache.groups[group].users;
253  printf ("bdbuf:group users=%lu", total);
254  val = rtems_bdbuf_list_count (&bdbuf_cache.ready);
255  printf (", ready=%lu", val);
256  total = val;
257  val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
258  printf (", lru=%lu", val);
259  total += val;
260  val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
261  printf (", mod=%lu", val);
262  total += val;
263  val = rtems_bdbuf_list_count (&bdbuf_cache.sync);
264  printf (", sync=%lu", val);
265  total += val;
266  printf (", total=%lu\n", total);
267}
268
269/**
270 * Show the users for a group of a bd.
271 *
272 * @param where A label to show the context of output.
273 * @param bd The bd to show the users of.
274 */
275void
276rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd)
277{
278  const char* states[] =
279    { "EM", "RA", "CH", "AC", "MD", "AM", "SY", "TR" };
280  printf ("bdbuf:users: %15s: [%ld (%s)] %ld:%ld = %lu %s\n",
281          where,
282          bd->block, states[bd->state],
283          bd->group - bdbuf_cache.groups,
284          bd - bdbuf_cache.bds,
285          bd->group->users,
286          bd->group->users > 8 ? "<<<<<<<" : "");
287}
288#else
289#define rtems_bdbuf_tracer (0)
290#define rtems_bdbuf_show_usage()
291#define rtems_bdbuf_show_users(_w, _b)
292#endif
293
294/**
295 * The default maximum height of 32 allows for AVL trees having between
296 * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion.  You may
297 * change this compile-time constant as you wish.
298 */
299#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
300#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
301#endif
302
303/**
304 * Searches for the node with specified dev/block.
305 *
306 * @param root pointer to the root node of the AVL-Tree
307 * @param dev device search key
308 * @param block block search key
309 * @retval NULL node with the specified dev/block is not found
310 * @return pointer to the node with specified dev/block
311 */
312static rtems_bdbuf_buffer *
313rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
314                        dev_t                dev,
315                        rtems_blkdev_bnum    block)
316{
317  rtems_bdbuf_buffer* p = *root;
318
319  while ((p != NULL) && ((p->dev != dev) || (p->block != block)))
320  {
321    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
322    {
323      p = p->avl.right;
324    }
325    else
326    {
327      p = p->avl.left;
328    }
329  }
330
331  return p;
332}
333
334/**
335 * Inserts the specified node to the AVl-Tree.
336 *
337 * @param root pointer to the root node of the AVL-Tree
338 * @param node Pointer to the node to add.
339 * @retval 0 The node added successfully
340 * @retval -1 An error occured
341 */
342static int
343rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
344                       rtems_bdbuf_buffer*  node)
345{
346  dev_t             dev = node->dev;
347  rtems_blkdev_bnum block = node->block;
348
349  rtems_bdbuf_buffer*  p = *root;
350  rtems_bdbuf_buffer*  q;
351  rtems_bdbuf_buffer*  p1;
352  rtems_bdbuf_buffer*  p2;
353  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
354  rtems_bdbuf_buffer** buf_prev = buf_stack;
355
356  bool modified = false;
357
358  if (p == NULL)
359  {
360    *root = node;
361    node->avl.left = NULL;
362    node->avl.right = NULL;
363    node->avl.bal = 0;
364    return 0;
365  }
366
367  while (p != NULL)
368  {
369    *buf_prev++ = p;
370
371    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
372    {
373      p->avl.cache = 1;
374      q = p->avl.right;
375      if (q == NULL)
376      {
377        q = node;
378        p->avl.right = q = node;
379        break;
380      }
381    }
382    else if ((p->dev != dev) || (p->block != block))
383    {
384      p->avl.cache = -1;
385      q = p->avl.left;
386      if (q == NULL)
387      {
388        q = node;
389        p->avl.left = q;
390        break;
391      }
392    }
393    else
394    {
395      return -1;
396    }
397
398    p = q;
399  }
400 
401  q->avl.left = q->avl.right = NULL;
402  q->avl.bal = 0;
403  modified = true;
404  buf_prev--;
405
406  while (modified)
407  {
408    if (p->avl.cache == -1)
409    {
410      switch (p->avl.bal)
411      {
412        case 1:
413          p->avl.bal = 0;
414          modified = false;
415          break;
416
417        case 0:
418          p->avl.bal = -1;
419          break;
420
421        case -1:
422          p1 = p->avl.left;
423          if (p1->avl.bal == -1) /* simple LL-turn */
424          {
425            p->avl.left = p1->avl.right;
426            p1->avl.right = p;
427            p->avl.bal = 0;
428            p = p1;
429          }
430          else /* double LR-turn */
431          {
432            p2 = p1->avl.right;
433            p1->avl.right = p2->avl.left;
434            p2->avl.left = p1;
435            p->avl.left = p2->avl.right;
436            p2->avl.right = p;
437            if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
438            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
439            p = p2;
440          }
441          p->avl.bal = 0;
442          modified = false;
443          break;
444
445        default:
446          break;
447      }
448    }
449    else
450    {
451      switch (p->avl.bal)
452      {
453        case -1:
454          p->avl.bal = 0;
455          modified = false;
456          break;
457
458        case 0:
459          p->avl.bal = 1;
460          break;
461
462        case 1:
463          p1 = p->avl.right;
464          if (p1->avl.bal == 1) /* simple RR-turn */
465          {
466            p->avl.right = p1->avl.left;
467            p1->avl.left = p;
468            p->avl.bal = 0;
469            p = p1;
470          }
471          else /* double RL-turn */
472          {
473            p2 = p1->avl.left;
474            p1->avl.left = p2->avl.right;
475            p2->avl.right = p1;
476            p->avl.right = p2->avl.left;
477            p2->avl.left = p;
478            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
479            if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
480            p = p2;
481          }
482          p->avl.bal = 0;
483          modified = false;
484          break;
485
486        default:
487          break;
488      }
489    }
490    q = p;
491    if (buf_prev > buf_stack)
492    {
493      p = *--buf_prev;
494
495      if (p->avl.cache == -1)
496      {
497        p->avl.left = q;
498      }
499      else
500      {
501        p->avl.right = q;
502      }
503    }
504    else
505    {
506      *root = p;
507      break;
508    }
509  };
510
511  return 0;
512}
513
514
515/**
516 * Removes the node from the tree.
517 *
518 * @param root Pointer to pointer to the root node
519 * @param node Pointer to the node to remove
520 * @retval 0 Item removed
521 * @retval -1 No such item found
522 */
523static int
524rtems_bdbuf_avl_remove(rtems_bdbuf_buffer**      root,
525                       const rtems_bdbuf_buffer* node)
526{
527  dev_t             dev = node->dev;
528  rtems_blkdev_bnum block = node->block;
529
530  rtems_bdbuf_buffer*  p = *root;
531  rtems_bdbuf_buffer*  q;
532  rtems_bdbuf_buffer*  r;
533  rtems_bdbuf_buffer*  s;
534  rtems_bdbuf_buffer*  p1;
535  rtems_bdbuf_buffer*  p2;
536  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
537  rtems_bdbuf_buffer** buf_prev = buf_stack;
538
539  bool modified = false;
540
541  memset (buf_stack, 0, sizeof(buf_stack));
542
543  while (p != NULL)
544  {
545    *buf_prev++ = p;
546
547    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
548    {
549      p->avl.cache = 1;
550      p = p->avl.right;
551    }
552    else if ((p->dev != dev) || (p->block != block))
553    {
554      p->avl.cache = -1;
555      p = p->avl.left;
556    }
557    else
558    {
559      /* node found */
560      break;
561    }
562  }
563
564  if (p == NULL)
565  {
566    /* there is no such node */
567    return -1;
568  }
569
570  q = p;
571
572  buf_prev--;
573  if (buf_prev > buf_stack)
574  {
575    p = *(buf_prev - 1);
576  }
577  else
578  {
579    p = NULL;
580  }
581
582  /* at this moment q - is a node to delete, p is q's parent */
583  if (q->avl.right == NULL)
584  {
585    r = q->avl.left;
586    if (r != NULL)
587    {
588      r->avl.bal = 0;
589    }
590    q = r;
591  }
592  else
593  {
594    rtems_bdbuf_buffer **t;
595
596    r = q->avl.right;
597
598    if (r->avl.left == NULL)
599    {
600      r->avl.left = q->avl.left;
601      r->avl.bal = q->avl.bal;
602      r->avl.cache = 1;
603      *buf_prev++ = q = r;
604    }
605    else
606    {
607      t = buf_prev++;
608      s = r;
609
610      while (s->avl.left != NULL)
611      {
612        *buf_prev++ = r = s;
613        s = r->avl.left;
614        r->avl.cache = -1;
615      }
616
617      s->avl.left = q->avl.left;
618      r->avl.left = s->avl.right;
619      s->avl.right = q->avl.right;
620      s->avl.bal = q->avl.bal;
621      s->avl.cache = 1;
622
623      *t = q = s;
624    }
625  }
626
627  if (p != NULL)
628  {
629    if (p->avl.cache == -1)
630    {
631      p->avl.left = q;
632    }
633    else
634    {
635      p->avl.right = q;
636    }
637  }
638  else
639  {
640    *root = q;
641  }
642
643  modified = true;
644
645  while (modified)
646  {
647    if (buf_prev > buf_stack)
648    {
649      p = *--buf_prev;
650    }
651    else
652    {
653      break;
654    }
655
656    if (p->avl.cache == -1)
657    {
658      /* rebalance left branch */
659      switch (p->avl.bal)
660      {
661        case -1:
662          p->avl.bal = 0;
663          break;
664        case  0:
665          p->avl.bal = 1;
666          modified = false;
667          break;
668
669        case +1:
670          p1 = p->avl.right;
671
672          if (p1->avl.bal >= 0) /* simple RR-turn */
673          {
674            p->avl.right = p1->avl.left;
675            p1->avl.left = p;
676
677            if (p1->avl.bal == 0)
678            {
679              p1->avl.bal = -1;
680              modified = false;
681            }
682            else
683            {
684              p->avl.bal = 0;
685              p1->avl.bal = 0;
686            }
687            p = p1;
688          }
689          else /* double RL-turn */
690          {
691            p2 = p1->avl.left;
692
693            p1->avl.left = p2->avl.right;
694            p2->avl.right = p1;
695            p->avl.right = p2->avl.left;
696            p2->avl.left = p;
697
698            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
699            if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
700
701            p = p2;
702            p2->avl.bal = 0;
703          }
704          break;
705
706        default:
707          break;
708      }
709    }
710    else
711    {
712      /* rebalance right branch */
713      switch (p->avl.bal)
714      {
715        case +1:
716          p->avl.bal = 0;
717          break;
718
719        case  0:
720          p->avl.bal = -1;
721          modified = false;
722          break;
723
724        case -1:
725          p1 = p->avl.left;
726
727          if (p1->avl.bal <= 0) /* simple LL-turn */
728          {
729            p->avl.left = p1->avl.right;
730            p1->avl.right = p;
731            if (p1->avl.bal == 0)
732            {
733              p1->avl.bal = 1;
734              modified = false;
735            }
736            else
737            {
738              p->avl.bal = 0;
739              p1->avl.bal = 0;
740            }
741            p = p1;
742          }
743          else /* double LR-turn */
744          {
745            p2 = p1->avl.right;
746
747            p1->avl.right = p2->avl.left;
748            p2->avl.left = p1;
749            p->avl.left = p2->avl.right;
750            p2->avl.right = p;
751
752            if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
753            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
754
755            p = p2;
756            p2->avl.bal = 0;
757          }
758          break;
759
760        default:
761          break;
762      }
763    }
764
765    if (buf_prev > buf_stack)
766    {
767      q = *(buf_prev - 1);
768
769      if (q->avl.cache == -1)
770      {
771        q->avl.left = p;
772      }
773      else
774      {
775        q->avl.right = p;
776      }
777    }
778    else
779    {
780      *root = p;
781      break;
782    }
783
784  }
785
786  return 0;
787}
788
789/**
790 * Change the block number for the block size to the block number for the media
791 * block size. We have to use 64bit maths. There is no short cut here.
792 *
793 * @param block The logical block number in the block size terms.
794 * @param block_size The block size.
795 * @param media_block_size The block size of the media.
796 * @return rtems_blkdev_bnum The media block number.
797 */
798static rtems_blkdev_bnum
799rtems_bdbuf_media_block (rtems_blkdev_bnum block,
800                         size_t            block_size,
801                         size_t            media_block_size)
802{
803  return (((uint64_t) block) * block_size) / media_block_size;
804}
805
806/**
807 * Lock the mutex. A single task can nest calls.
808 *
809 * @param lock The mutex to lock.
810 * @param fatal_error_code The error code if the call fails.
811 */
812static void
813rtems_bdbuf_lock (rtems_id lock, uint32_t fatal_error_code)
814{
815  rtems_status_code sc = rtems_semaphore_obtain (lock,
816                                                 RTEMS_WAIT,
817                                                 RTEMS_NO_TIMEOUT);
818  if (sc != RTEMS_SUCCESSFUL)
819    rtems_fatal_error_occurred (fatal_error_code);
820}
821
822/**
823 * Unlock the mutex.
824 *
825 * @param lock The mutex to unlock.
826 * @param fatal_error_code The error code if the call fails.
827 */
828static void
829rtems_bdbuf_unlock (rtems_id lock, uint32_t fatal_error_code)
830{
831  rtems_status_code sc = rtems_semaphore_release (lock);
832  if (sc != RTEMS_SUCCESSFUL)
833    rtems_fatal_error_occurred (fatal_error_code);
834}
835
836/**
837 * Lock the cache. A single task can nest calls.
838 */
839static void
840rtems_bdbuf_lock_cache (void)
841{
842  rtems_bdbuf_lock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK);
843}
844
845/**
846 * Unlock the cache.
847 */
848static void
849rtems_bdbuf_unlock_cache (void)
850{
851  rtems_bdbuf_unlock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK);
852}
853
854/**
855 * Lock the cache's sync. A single task can nest calls.
856 */
857static void
858rtems_bdbuf_lock_sync (void)
859{
860  rtems_bdbuf_lock (bdbuf_cache.sync_lock, RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK);
861}
862
863/**
864 * Unlock the cache's sync lock. Any blocked writers are woken.
865 */
866static void
867rtems_bdbuf_unlock_sync (void)
868{
869  rtems_bdbuf_unlock (bdbuf_cache.sync_lock,
870                      RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK);
871}
872
873/**
874 * Wait until woken. Semaphores are used so a number of tasks can wait and can
875 * be woken at once. Task events would require we maintain a list of tasks to
876 * be woken and this would require storgage and we do not know the number of
877 * tasks that could be waiting.
878 *
879 * While we have the cache locked we can try and claim the semaphore and
880 * therefore know when we release the lock to the cache we will block until the
881 * semaphore is released. This may even happen before we get to block.
882 *
883 * A counter is used to save the release call when no one is waiting.
884 *
885 * The function assumes the cache is locked on entry and it will be locked on
886 * exit.
887 *
888 * @param sema The semaphore to block on and wait.
889 * @param waiters The wait counter for this semaphore.
890 */
891static void
892rtems_bdbuf_wait (rtems_id* sema, volatile uint32_t* waiters)
893{
894  rtems_status_code sc;
895  rtems_mode        prev_mode;
896 
897  /*
898   * Indicate we are waiting.
899   */
900  *waiters += 1;
901
902  /*
903   * Disable preemption then unlock the cache and block.  There is no POSIX
904   * condition variable in the core API so this is a work around.
905   *
906   * The issue is a task could preempt after the cache is unlocked because it is
907   * blocking or just hits that window, and before this task has blocked on the
908   * semaphore. If the preempting task flushes the queue this task will not see
909   * the flush and may block for ever or until another transaction flushes this
910   * semaphore.
911   */
912  sc = rtems_task_mode (RTEMS_NO_PREEMPT, RTEMS_PREEMPT_MASK, &prev_mode);
913
914  if (sc != RTEMS_SUCCESSFUL)
915    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_1);
916 
917  /*
918   * Unlock the cache, wait, and lock the cache when we return.
919   */
920  rtems_bdbuf_unlock_cache ();
921
922  sc = rtems_semaphore_obtain (*sema, RTEMS_WAIT, RTEMS_BDBUF_WAIT_TIMEOUT);
923
924  if (sc == RTEMS_TIMEOUT)
925    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO);
926 
927  if (sc != RTEMS_UNSATISFIED)
928    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2);
929 
930  rtems_bdbuf_lock_cache ();
931
932  sc = rtems_task_mode (prev_mode, RTEMS_ALL_MODE_MASKS, &prev_mode);
933
934  if (sc != RTEMS_SUCCESSFUL)
935    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_3);
936 
937  *waiters -= 1;
938}
939
940/**
941 * Wake a blocked resource. The resource has a counter that lets us know if
942 * there are any waiters.
943 *
944 * @param sema The semaphore to release.
945 * @param waiters The wait counter for this semaphore.
946 */
947static void
948rtems_bdbuf_wake (rtems_id sema, volatile uint32_t* waiters)
949{
950  if (*waiters)
951  {
952    rtems_status_code sc;
953
954    sc = rtems_semaphore_flush (sema);
955 
956    if (sc != RTEMS_SUCCESSFUL)
957      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE);
958  }
959}
960
961/**
962 * Add a buffer descriptor to the modified list. This modified list is treated
963 * a litte differently to the other lists. To access it you must have the cache
964 * locked and this is assumed to be the case on entry to this call.
965 *
966 * If the cache has a device being sync'ed and the bd is for that device the
967 * call must block and wait until the sync is over before adding the bd to the
968 * modified list. Once a sync happens for a device no bd's can be added the
969 * modified list. The disk image is forced to be snapshot at that moment in
970 * time.
971 *
972 * @note Do not lower the group user count as the modified list is a user of
973 * the buffer.
974 *
975 * @param bd The bd to queue to the cache's modified list.
976 */
977static void
978rtems_bdbuf_append_modified (rtems_bdbuf_buffer* bd)
979{
980  /*
981   * If the cache has a device being sync'ed check if this bd is for that
982   * device. If it is unlock the cache and block on the sync lock. Once we have
983   * the sync lock release it.
984   */
985  if (bdbuf_cache.sync_active && (bdbuf_cache.sync_device == bd->dev))
986  {
987    rtems_bdbuf_unlock_cache ();
988    /* Wait for the sync lock */
989    rtems_bdbuf_lock_sync ();
990    rtems_bdbuf_unlock_sync ();
991    rtems_bdbuf_lock_cache ();
992  }
993     
994  bd->state = RTEMS_BDBUF_STATE_MODIFIED;
995
996  rtems_chain_append (&bdbuf_cache.modified, &bd->link);
997}
998
999/**
1000 * Wait the swapper task.
1001 */
1002static void
1003rtems_bdbuf_wake_swapper (void)
1004{
1005  rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout,
1006                                           RTEMS_BDBUF_SWAPOUT_SYNC);
1007  if (sc != RTEMS_SUCCESSFUL)
1008    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
1009}
1010
1011/**
1012 * Compute the number of BDs per group for a given buffer size.
1013 *
1014 * @param size The buffer size. It can be any size and we scale up.
1015 */
1016static size_t
1017rtems_bdbuf_bds_per_group (size_t size)
1018{
1019  size_t bufs_per_size;
1020  size_t bds_per_size;
1021 
1022  if (size > rtems_bdbuf_configuration.buffer_max)
1023    return 0;
1024 
1025  bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1;
1026 
1027  for (bds_per_size = 1;
1028       bds_per_size < bufs_per_size;
1029       bds_per_size <<= 1)
1030    ;
1031
1032  return bdbuf_cache.max_bds_per_group / bds_per_size;
1033}
1034
1035/**
1036 * Reallocate a group. The BDs currently allocated in the group are removed
1037 * from the ALV tree and any lists then the new BD's are prepended to the ready
1038 * list of the cache.
1039 *
1040 * @param group The group to reallocate.
1041 * @param new_bds_per_group The new count of BDs per group.
1042 */
1043static void
1044rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
1045{
1046  rtems_bdbuf_buffer* bd;
1047  size_t              b;
1048  size_t              bufs_per_bd;
1049
1050  if (rtems_bdbuf_tracer)
1051    printf ("bdbuf:realloc: %tu: %zd -> %zd\n",
1052            group - bdbuf_cache.groups, group->bds_per_group,
1053            new_bds_per_group);
1054 
1055  bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group;
1056 
1057  for (b = 0, bd = group->bdbuf;
1058       b < group->bds_per_group;
1059       b++, bd += bufs_per_bd)
1060  {
1061    switch (bd->state)
1062    {
1063      case RTEMS_BDBUF_STATE_EMPTY:
1064        break;
1065      case RTEMS_BDBUF_STATE_CACHED:
1066      case RTEMS_BDBUF_STATE_READ_AHEAD:
1067        if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1068          rtems_fatal_error_occurred ((bd->state << 16) |
1069                                      RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_1);
1070        break;
1071      default:
1072        rtems_fatal_error_occurred ((bd->state << 16) |
1073                                    RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_8);
1074    }
1075   
1076    rtems_chain_extract (&bd->link);
1077  }
1078 
1079  group->bds_per_group = new_bds_per_group;
1080  bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
1081 
1082  for (b = 0, bd = group->bdbuf;
1083       b < group->bds_per_group;
1084       b++, bd += bufs_per_bd)
1085  {
1086    bd->state = RTEMS_BDBUF_STATE_EMPTY;
1087    rtems_chain_prepend (&bdbuf_cache.ready, &bd->link);
1088  }
1089}
1090
1091/**
1092 * Get the next BD from the list. This call assumes the cache is locked.
1093 *
1094 * @param bds_per_group The number of BDs per block we are need.
1095 * @param list The list to find the BD on.
1096 * @return The next BD if found or NULL is none are available.
1097 */
1098static rtems_bdbuf_buffer*
1099rtems_bdbuf_get_next_bd (size_t               bds_per_group,
1100                         rtems_chain_control* list)
1101{
1102  rtems_chain_node* node = rtems_chain_first (list);
1103  while (!rtems_chain_is_tail (list, node))
1104  {
1105    rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
1106
1107    if (rtems_bdbuf_tracer)
1108      printf ("bdbuf:next-bd: %tu (%td:%" PRId32 ") %zd -> %zd\n",
1109              bd - bdbuf_cache.bds,
1110              bd->group - bdbuf_cache.groups, bd->group->users,
1111              bd->group->bds_per_group, bds_per_group);
1112
1113    /*
1114     * If this bd is already part of a group that supports the same number of
1115     * BDs per group return it. If the bd is part of another group check the
1116     * number of users and if 0 we can take this group and resize it.
1117     */
1118    if (bd->group->bds_per_group == bds_per_group)
1119    {
1120      rtems_chain_extract (node);
1121      return bd;
1122    }
1123
1124    if (bd->group->users == 0)
1125    {
1126      /*
1127       * We use the group to locate the start of the BDs for this group.
1128       */
1129      rtems_bdbuf_group_realloc (bd->group, bds_per_group);
1130      bd = (rtems_bdbuf_buffer*) rtems_chain_get (&bdbuf_cache.ready);
1131      return bd;
1132    }
1133
1134    node = rtems_chain_next (node);
1135  }
1136 
1137  return NULL;
1138}
1139
1140/**
1141 * Initialise the cache.
1142 *
1143 * @return rtems_status_code The initialisation status.
1144 */
1145rtems_status_code
1146rtems_bdbuf_init (void)
1147{
1148  rtems_bdbuf_group*  group;
1149  rtems_bdbuf_buffer* bd;
1150  uint8_t*            buffer;
1151  int                 b;
1152  int                 cache_aligment;
1153  rtems_status_code   sc;
1154
1155  if (rtems_bdbuf_tracer)
1156    printf ("bdbuf:init\n");
1157
1158  /*
1159   * Check the configuration table values.
1160   */
1161  if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0)
1162    return RTEMS_INVALID_NUMBER;
1163 
1164  /*
1165   * We use a special variable to manage the initialisation incase we have
1166   * completing threads doing this. You may get errors if the another thread
1167   * makes a call and we have not finished initialisation.
1168   */
1169  if (bdbuf_cache.initialised)
1170    return RTEMS_RESOURCE_IN_USE;
1171
1172  bdbuf_cache.initialised = true;
1173 
1174  /*
1175   * For unspecified cache alignments we use the CPU alignment.
1176   */
1177  cache_aligment = 32; /* FIXME rtems_cache_get_data_line_size() */
1178  if (cache_aligment <= 0)
1179    cache_aligment = CPU_ALIGNMENT;
1180
1181  bdbuf_cache.sync_active    = false;
1182  bdbuf_cache.sync_device    = -1;
1183  bdbuf_cache.sync_requester = 0;
1184  bdbuf_cache.tree           = NULL;
1185
1186  rtems_chain_initialize_empty (&bdbuf_cache.swapout_workers);
1187  rtems_chain_initialize_empty (&bdbuf_cache.ready);
1188  rtems_chain_initialize_empty (&bdbuf_cache.lru);
1189  rtems_chain_initialize_empty (&bdbuf_cache.modified);
1190  rtems_chain_initialize_empty (&bdbuf_cache.sync);
1191
1192  bdbuf_cache.access           = 0;
1193  bdbuf_cache.access_waiters   = 0;
1194  bdbuf_cache.transfer         = 0;
1195  bdbuf_cache.transfer_waiters = 0;
1196  bdbuf_cache.waiting          = 0;
1197  bdbuf_cache.wait_waiters     = 0;
1198
1199  /*
1200   * Create the locks for the cache.
1201   */
1202  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'l'),
1203                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1204                               &bdbuf_cache.lock);
1205  if (sc != RTEMS_SUCCESSFUL)
1206  {
1207    bdbuf_cache.initialised = false;
1208    return sc;
1209  }
1210
1211  rtems_bdbuf_lock_cache ();
1212 
1213  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 's'),
1214                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1215                               &bdbuf_cache.sync_lock);
1216  if (sc != RTEMS_SUCCESSFUL)
1217  {
1218    rtems_bdbuf_unlock_cache ();
1219    rtems_semaphore_delete (bdbuf_cache.lock);
1220    bdbuf_cache.initialised = false;
1221    return sc;
1222  }
1223 
1224  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'a'),
1225                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1226                               &bdbuf_cache.access);
1227  if (sc != RTEMS_SUCCESSFUL)
1228  {
1229    rtems_semaphore_delete (bdbuf_cache.sync_lock);
1230    rtems_bdbuf_unlock_cache ();
1231    rtems_semaphore_delete (bdbuf_cache.lock);
1232    bdbuf_cache.initialised = false;
1233    return sc;
1234  }
1235
1236  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 't'),
1237                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1238                               &bdbuf_cache.transfer);
1239  if (sc != RTEMS_SUCCESSFUL)
1240  {
1241    rtems_semaphore_delete (bdbuf_cache.access);
1242    rtems_semaphore_delete (bdbuf_cache.sync_lock);
1243    rtems_bdbuf_unlock_cache ();
1244    rtems_semaphore_delete (bdbuf_cache.lock);
1245    bdbuf_cache.initialised = false;
1246    return sc;
1247  }
1248
1249  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'w'),
1250                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1251                               &bdbuf_cache.waiting);
1252  if (sc != RTEMS_SUCCESSFUL)
1253  {
1254    rtems_semaphore_delete (bdbuf_cache.transfer);
1255    rtems_semaphore_delete (bdbuf_cache.access);
1256    rtems_semaphore_delete (bdbuf_cache.sync_lock);
1257    rtems_bdbuf_unlock_cache ();
1258    rtems_semaphore_delete (bdbuf_cache.lock);
1259    bdbuf_cache.initialised = false;
1260    return sc;
1261  }
1262 
1263  /*
1264   * Compute the various number of elements in the cache.
1265   */
1266  bdbuf_cache.buffer_min_count =
1267    bdbuf_config.size / bdbuf_config.buffer_min;
1268  bdbuf_cache.max_bds_per_group =
1269    bdbuf_config.buffer_max / bdbuf_config.buffer_min;
1270  bdbuf_cache.group_count =
1271    bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group;
1272
1273  /*
1274   * Allocate the memory for the buffer descriptors.
1275   */
1276  bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer),
1277                            bdbuf_cache.buffer_min_count);
1278  if (!bdbuf_cache.bds)
1279  {
1280    rtems_semaphore_delete (bdbuf_cache.transfer);
1281    rtems_semaphore_delete (bdbuf_cache.access);
1282    rtems_semaphore_delete (bdbuf_cache.sync_lock);
1283    rtems_bdbuf_unlock_cache ();
1284    rtems_semaphore_delete (bdbuf_cache.lock);
1285    bdbuf_cache.initialised = false;
1286    return RTEMS_NO_MEMORY;
1287  }
1288
1289  /*
1290   * Allocate the memory for the buffer descriptors.
1291   */
1292  bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group),
1293                               bdbuf_cache.group_count);
1294  if (!bdbuf_cache.groups)
1295  {
1296    free (bdbuf_cache.bds);
1297    rtems_semaphore_delete (bdbuf_cache.transfer);
1298    rtems_semaphore_delete (bdbuf_cache.access);
1299    rtems_semaphore_delete (bdbuf_cache.sync_lock);
1300    rtems_bdbuf_unlock_cache ();
1301    rtems_semaphore_delete (bdbuf_cache.lock);
1302    bdbuf_cache.initialised = false;
1303    return RTEMS_NO_MEMORY;
1304  }
1305 
1306  /*
1307   * Allocate memory for buffer memory. The buffer memory will be cache
1308   * aligned. It is possible to free the memory allocated by rtems_memalign()
1309   * with free(). Return 0 if allocated.
1310   *
1311   * The memory allocate allows a
1312   */
1313  if (rtems_memalign ((void **) &bdbuf_cache.buffers,
1314                      cache_aligment,
1315                      bdbuf_cache.buffer_min_count * bdbuf_config.buffer_min) != 0)
1316  {
1317    free (bdbuf_cache.groups);
1318    free (bdbuf_cache.bds);
1319    rtems_semaphore_delete (bdbuf_cache.transfer);
1320    rtems_semaphore_delete (bdbuf_cache.access);
1321    rtems_semaphore_delete (bdbuf_cache.sync_lock);
1322    rtems_bdbuf_unlock_cache ();
1323    rtems_semaphore_delete (bdbuf_cache.lock);
1324    bdbuf_cache.initialised = false;
1325    return RTEMS_NO_MEMORY;
1326  }
1327
1328  /*
1329   * The cache is empty after opening so we need to add all the buffers to it
1330   * and initialise the groups.
1331   */
1332  for (b = 0, group = bdbuf_cache.groups,
1333         bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers;
1334       b < bdbuf_cache.buffer_min_count;
1335       b++, bd++, buffer += bdbuf_config.buffer_min)
1336  {
1337    bd->dev        = -1;
1338    bd->group      = group;
1339    bd->buffer     = buffer;
1340    bd->avl.left   = NULL;
1341    bd->avl.right  = NULL;
1342    bd->state      = RTEMS_BDBUF_STATE_EMPTY;
1343    bd->error      = 0;
1344    bd->waiters    = 0;
1345    bd->hold_timer = 0;
1346    bd->references = 0;
1347    bd->user       = NULL;
1348   
1349    rtems_chain_append (&bdbuf_cache.ready, &bd->link);
1350
1351    if ((b % bdbuf_cache.max_bds_per_group) ==
1352        (bdbuf_cache.max_bds_per_group - 1))
1353      group++;
1354  }
1355
1356  for (b = 0,
1357         group = bdbuf_cache.groups,
1358         bd = bdbuf_cache.bds;
1359       b < bdbuf_cache.group_count;
1360       b++,
1361         group++,
1362         bd += bdbuf_cache.max_bds_per_group)
1363  {
1364    group->bds_per_group = bdbuf_cache.max_bds_per_group;
1365    group->users = 0;
1366    group->bdbuf = bd;
1367  }
1368         
1369  /*
1370   * Create and start swapout task. This task will create and manage the worker
1371   * threads.
1372   */
1373  bdbuf_cache.swapout_enabled = true;
1374 
1375  sc = rtems_task_create (rtems_build_name('B', 'S', 'W', 'P'),
1376                          (bdbuf_config.swapout_priority ?
1377                           bdbuf_config.swapout_priority :
1378                           RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT),
1379                          SWAPOUT_TASK_STACK_SIZE,
1380                          RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
1381                          RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
1382                          &bdbuf_cache.swapout);
1383  if (sc != RTEMS_SUCCESSFUL)
1384  {
1385    free (bdbuf_cache.buffers);
1386    free (bdbuf_cache.groups);
1387    free (bdbuf_cache.bds);
1388    rtems_semaphore_delete (bdbuf_cache.transfer);
1389    rtems_semaphore_delete (bdbuf_cache.access);
1390    rtems_semaphore_delete (bdbuf_cache.sync_lock);
1391    rtems_bdbuf_unlock_cache ();
1392    rtems_semaphore_delete (bdbuf_cache.lock);
1393    bdbuf_cache.initialised = false;
1394    return sc;
1395  }
1396
1397  sc = rtems_task_start (bdbuf_cache.swapout,
1398                         rtems_bdbuf_swapout_task,
1399                         (rtems_task_argument) &bdbuf_cache);
1400  if (sc != RTEMS_SUCCESSFUL)
1401  {
1402    rtems_task_delete (bdbuf_cache.swapout);
1403    free (bdbuf_cache.buffers);
1404    free (bdbuf_cache.groups);
1405    free (bdbuf_cache.bds);
1406    rtems_semaphore_delete (bdbuf_cache.transfer);
1407    rtems_semaphore_delete (bdbuf_cache.access);
1408    rtems_semaphore_delete (bdbuf_cache.sync_lock);
1409    rtems_bdbuf_unlock_cache ();
1410    rtems_semaphore_delete (bdbuf_cache.lock);
1411    bdbuf_cache.initialised = false;
1412    return sc;
1413  }
1414
1415  rtems_bdbuf_unlock_cache ();
1416 
1417  return RTEMS_SUCCESSFUL;
1418}
1419
1420/**
1421 * Get a buffer for this device and block. This function returns a buffer once
1422 * placed into the AVL tree. If no buffer is available and it is not a read
1423 * ahead request and no buffers are waiting to the written to disk wait until a
1424 * buffer is available. If buffers are waiting to be written to disk and none
1425 * are available expire the hold timer's of the queued buffers and wake the
1426 * swap out task. If the buffer is for a read ahead transfer return NULL if
1427 * there are no buffers available or the buffer is already in the cache.
1428 *
1429 * The AVL tree of buffers for the cache is searched and if not found obtain a
1430 * buffer and insert it into the AVL tree. Buffers are first obtained from the
1431 * ready list until all empty/ready buffers are used. Once all buffers are in
1432 * use the LRU list is searched for a buffer of the same group size or a group
1433 * that has no active buffers in use. A buffer taken from the LRU list is
1434 * removed from the AVL tree and assigned the new block number. The ready or
1435 * LRU list buffer is initialised to this device and block. If no buffers are
1436 * available due to the ready and LRU lists being empty a check is made of the
1437 * modified list. Buffers may be queued waiting for the hold timer to
1438 * expire. These buffers should be written to disk and returned to the LRU list
1439 * where they can be used. If buffers are on the modified list the max. write
1440 * block size of buffers have their hold timer's expired and the swap out task
1441 * woken. The caller then blocks on the waiting semaphore and counter. When
1442 * buffers return from the upper layers (access) or lower driver (transfer) the
1443 * blocked caller task is woken and this procedure is repeated. The repeat
1444 * handles a case of a another thread pre-empting getting a buffer first and
1445 * adding it to the AVL tree.
1446 *
1447 * A buffer located in the AVL tree means it is already in the cache and maybe
1448 * in use somewhere. The buffer can be either:
1449 *
1450 * # Cached. Not being accessed or part of a media transfer.
1451 * # Access or modifed access. Is with an upper layer being accessed.
1452 * # Transfer. Is with the driver and part of a media transfer.
1453 *
1454 * If cached we assign the new state, extract it from any list it maybe part of
1455 * and return to the user.
1456 *
1457 * This function assumes the cache the buffer is being taken from is locked and
1458 * it will make sure the cache is locked when it returns. The cache will be
1459 * unlocked if the call could block.
1460 *
1461 * Variable sized buffer is handled by groups. A group is the size of the
1462 * maximum buffer that can be allocated. The group can size in multiples of the
1463 * minimum buffer size where the mulitples are 1,2,4,8, etc. If the buffer is
1464 * found in the AVL tree the number of BDs in the group is check and if
1465 * different the buffer size for the block has changed. The buffer needs to be
1466 * invalidated.
1467 *
1468 * @param dd The disk device. Has the configured block size.
1469 * @param bds_per_group The number of BDs in a group for this block.
1470 * @param block Absolute media block number for the device
1471 * @param read_ahead The get is for a read ahead buffer if true
1472 * @return RTEMS status code (if operation completed successfully or error
1473 *         code if error is occured)
1474 */
1475static rtems_bdbuf_buffer*
1476rtems_bdbuf_get_buffer (rtems_disk_device* dd,
1477                        size_t             bds_per_group,
1478                        rtems_blkdev_bnum  block,
1479                        bool               read_ahead)
1480{
1481  dev_t               device = dd->dev;
1482  rtems_bdbuf_buffer* bd;
1483  bool                available;
1484 
1485  /*
1486   * Loop until we get a buffer. Under load we could find no buffers are
1487   * available requiring this task to wait until some become available before
1488   * proceeding. There is no timeout. If this call is to block and the buffer
1489   * is for a read ahead buffer return NULL. The read ahead is nice but not
1490   * that important.
1491   *
1492   * The search procedure is repeated as another thread could have pre-empted
1493   * us while we waited for a buffer, obtained an empty buffer and loaded the
1494   * AVL tree with the one we are after. In this case we move down and wait for
1495   * the buffer to return to the cache.
1496   */
1497  do
1498  {
1499    /*
1500     * Search for buffer descriptor for this dev/block key.
1501     */
1502    bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, device, block);
1503
1504    /*
1505     * No buffer in the cache for this block. We need to obtain a buffer and
1506     * this means take a buffer that is ready to use. If all buffers are in use
1507     * take the least recently used buffer. If there are none then the cache is
1508     * empty. All the buffers are either queued to be written to disk or with
1509     * the user. We cannot do much with the buffers with the user how-ever with
1510     * the modified buffers waiting to be written to disk flush the maximum
1511     * number transfered in a block to disk. After this all that can be done is
1512     * to wait for a buffer to return to the cache.
1513     */
1514    if (!bd)
1515    {
1516      /*
1517       * Assign new buffer descriptor from the ready list if one is present. If
1518       * the ready queue is empty get the oldest buffer from LRU list. If the
1519       * LRU list is empty there are no available buffers check the modified
1520       * list.
1521       */
1522      bd = rtems_bdbuf_get_next_bd (bds_per_group, &bdbuf_cache.ready);
1523
1524      if (!bd)
1525      {
1526        /*
1527         * No unused or read-ahead buffers.
1528         *
1529         * If this is a read ahead buffer just return. No need to place further
1530         * pressure on the cache by reading something that may be needed when
1531         * we have data in the cache that was needed and may still be in the
1532         * future.
1533         */
1534        if (read_ahead)
1535          return NULL;
1536
1537        /*
1538         * Check the LRU list.
1539         */
1540        bd = rtems_bdbuf_get_next_bd (bds_per_group, &bdbuf_cache.lru);
1541       
1542        if (bd)
1543        {
1544          /*
1545           * Remove the buffer from the AVL tree if the state says it is in the
1546           * cache or a read ahead buffer. The buffer could be in the empty
1547           * state as a result of reallocations.
1548           */
1549          switch (bd->state)
1550          {
1551            case RTEMS_BDBUF_STATE_CACHED:
1552            case RTEMS_BDBUF_STATE_READ_AHEAD:
1553              if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1554                rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_2);
1555              break;
1556            default:
1557              break;
1558          }
1559        }
1560        else
1561        {
1562          /*
1563           * If there are buffers on the modified list expire the hold timer
1564           * and wake the swap out task then wait else just go and wait.
1565           *
1566           * The check for an empty list is made so the swapper is only woken
1567           * when if timers are changed.
1568           */
1569          if (!rtems_chain_is_empty (&bdbuf_cache.modified))
1570          {
1571            rtems_chain_node* node = rtems_chain_first (&bdbuf_cache.modified);
1572            uint32_t          write_blocks = 0;
1573           
1574            while ((write_blocks < bdbuf_config.max_write_blocks) &&
1575                   !rtems_chain_is_tail (&bdbuf_cache.modified, node))
1576            {
1577              rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
1578              bd->hold_timer = 0;
1579              write_blocks++;
1580              node = rtems_chain_next (node);
1581            }
1582
1583            rtems_bdbuf_wake_swapper ();
1584          }
1585         
1586          /*
1587           * Wait for a buffer to be returned to the cache. The buffer will be
1588           * placed on the LRU list.
1589           */
1590          rtems_bdbuf_wait (&bdbuf_cache.waiting, &bdbuf_cache.wait_waiters);
1591        }
1592      }
1593      else
1594      {
1595        /*
1596         * We have a new buffer for this block.
1597         */
1598        if ((bd->state != RTEMS_BDBUF_STATE_EMPTY) &&
1599            (bd->state != RTEMS_BDBUF_STATE_READ_AHEAD))
1600          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_3);
1601
1602        if (bd->state == RTEMS_BDBUF_STATE_READ_AHEAD)
1603        {
1604          if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1605            rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_4);
1606        }
1607      }
1608
1609      if (bd)
1610      {
1611        bd->dev       = device;
1612        bd->block     = block;
1613        bd->avl.left  = NULL;
1614        bd->avl.right = NULL;
1615        bd->state     = RTEMS_BDBUF_STATE_EMPTY;
1616        bd->error     = 0;
1617        bd->waiters   = 0;
1618
1619        if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
1620          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_5);
1621
1622        return bd;
1623      }
1624    }
1625    else
1626    {
1627      /*
1628       * We have the buffer for the block from the cache. Check if the buffer
1629       * in the cache is the same size and the requested size we are after.
1630       */
1631      if (bd->group->bds_per_group != bds_per_group)
1632      {
1633        /*
1634         * Remove the buffer from the AVL tree.
1635         */
1636        if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1637          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_2);
1638        bd->state = RTEMS_BDBUF_STATE_EMPTY;
1639        rtems_chain_extract (&bd->link);
1640        rtems_chain_prepend (&bdbuf_cache.ready, &bd->link);
1641        bd = NULL;
1642      }
1643    }
1644  }
1645  while (!bd);
1646
1647  /*
1648   * If the buffer is for read ahead and it exists in the AVL cache or is being
1649   * accessed or being transfered then return NULL stopping further read ahead
1650   * requests.
1651   */
1652  if (read_ahead)
1653    return NULL;
1654
1655  /*
1656   * Loop waiting for the buffer to enter the cached state. If the buffer is in
1657   * the access or transfer state then wait until it is not.
1658   */
1659  available = false;
1660  while (!available)
1661  {
1662    switch (bd->state)
1663    {
1664      case RTEMS_BDBUF_STATE_CACHED:
1665      case RTEMS_BDBUF_STATE_MODIFIED:
1666      case RTEMS_BDBUF_STATE_READ_AHEAD:
1667        available = true;
1668        break;
1669
1670      case RTEMS_BDBUF_STATE_ACCESS:
1671      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1672        bd->waiters++;
1673        rtems_bdbuf_wait (&bdbuf_cache.access, &bdbuf_cache.access_waiters);
1674        bd->waiters--;
1675        break;
1676
1677      case RTEMS_BDBUF_STATE_SYNC:
1678      case RTEMS_BDBUF_STATE_TRANSFER:
1679        bd->waiters++;
1680        rtems_bdbuf_wait (&bdbuf_cache.transfer, &bdbuf_cache.transfer_waiters);
1681        bd->waiters--;
1682        break;
1683
1684      default:
1685        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_6);
1686    }
1687  }
1688
1689  /*
1690   * Buffer is linked to the LRU, modifed, or sync lists. Remove it from there.
1691   */
1692  rtems_chain_extract (&bd->link);
1693
1694  return bd;
1695}
1696
1697rtems_status_code
1698rtems_bdbuf_get (dev_t                device,
1699                 rtems_blkdev_bnum    block,
1700                 rtems_bdbuf_buffer** bdp)
1701{
1702  rtems_disk_device*  dd;
1703  rtems_bdbuf_buffer* bd;
1704  rtems_blkdev_bnum   media_block;
1705  size_t              bds_per_group;
1706
1707  if (!bdbuf_cache.initialised)
1708    return RTEMS_NOT_CONFIGURED;
1709
1710  /*
1711   * Do not hold the cache lock when obtaining the disk table.
1712   */
1713  dd = rtems_disk_obtain (device);
1714  if (!dd)
1715    return RTEMS_INVALID_ID;
1716
1717  /*
1718   * Compute the media block number. Drivers work with media block number not
1719   * the block number a BD may have as this depends on the block size set by
1720   * the user.
1721   */
1722  media_block = rtems_bdbuf_media_block (block,
1723                                         dd->block_size,
1724                                         dd->media_block_size);
1725  if (media_block >= dd->size)
1726  {
1727    rtems_disk_release(dd);
1728    return RTEMS_INVALID_NUMBER;
1729  }
1730
1731  bds_per_group = rtems_bdbuf_bds_per_group (dd->block_size);
1732  if (!bds_per_group)
1733  {
1734    rtems_disk_release (dd);
1735    return RTEMS_INVALID_NUMBER;
1736  }
1737
1738  media_block += dd->start;
1739
1740  rtems_bdbuf_lock_cache ();
1741
1742  /*
1743   * Print the block index relative to the physical disk.
1744   */
1745  if (rtems_bdbuf_tracer)
1746    printf ("bdbuf:get: %lu (%lu) (dev = %08x)\n",
1747            media_block, block, (unsigned int) device);
1748
1749  bd = rtems_bdbuf_get_buffer (dd, bds_per_group, media_block, false);
1750
1751  /*
1752   * This could be considered a bug in the caller because you should not be
1753   * getting an already modified buffer but user may have modified a byte in a
1754   * block then decided to seek the start and write the whole block and the
1755   * file system will have no record of this so just gets the block to fill.
1756   */
1757  if (bd->state == RTEMS_BDBUF_STATE_MODIFIED)
1758    bd->state = RTEMS_BDBUF_STATE_ACCESS_MODIFIED;
1759  else
1760  {
1761    bd->state = RTEMS_BDBUF_STATE_ACCESS;
1762    /*
1763     * Indicate a buffer in this group is being used.
1764     */
1765    bd->group->users++;
1766  }
1767 
1768  if (rtems_bdbuf_tracer)
1769  {
1770    rtems_bdbuf_show_users ("get", bd);
1771    rtems_bdbuf_show_usage ();
1772  }
1773
1774  rtems_bdbuf_unlock_cache ();
1775
1776  rtems_disk_release(dd);
1777
1778  *bdp = bd;
1779
1780  return RTEMS_SUCCESSFUL;
1781}
1782
1783/**
1784 * Call back handler called by the low level driver when the transfer has
1785 * completed. This function may be invoked from interrupt handler.
1786 *
1787 * @param arg Arbitrary argument specified in block device request
1788 *            structure (in this case - pointer to the appropriate
1789 *            block device request structure).
1790 * @param status I/O completion status
1791 * @param error errno error code if status != RTEMS_SUCCESSFUL
1792 */
1793static void
1794rtems_bdbuf_read_done (void* arg, rtems_status_code status, int error)
1795{
1796  rtems_blkdev_request* req = (rtems_blkdev_request*) arg;
1797
1798  req->error = error;
1799  req->status = status;
1800
1801  rtems_event_send (req->io_task, RTEMS_BDBUF_TRANSFER_SYNC);
1802}
1803
1804rtems_status_code
1805rtems_bdbuf_read (dev_t                device,
1806                  rtems_blkdev_bnum    block,
1807                  rtems_bdbuf_buffer** bdp)
1808{
1809  rtems_disk_device*    dd;
1810  rtems_bdbuf_buffer*   bd = NULL;
1811  uint32_t              read_ahead_count;
1812  rtems_blkdev_request* req;
1813  size_t                bds_per_group;
1814  rtems_blkdev_bnum     media_block;
1815  rtems_blkdev_bnum     media_block_count;
1816 
1817  if (!bdbuf_cache.initialised)
1818    return RTEMS_NOT_CONFIGURED;
1819
1820  /*
1821   * @todo This type of request structure is wrong and should be removed.
1822   */
1823#define bdbuf_alloc(size) __builtin_alloca (size)
1824
1825  req = bdbuf_alloc (sizeof (rtems_blkdev_request) +
1826                     (sizeof ( rtems_blkdev_sg_buffer) *
1827                      rtems_bdbuf_configuration.max_read_ahead_blocks));
1828
1829  /*
1830   * Do not hold the cache lock when obtaining the disk table.
1831   */
1832  dd = rtems_disk_obtain (device);
1833  if (!dd)
1834    return RTEMS_INVALID_ID;
1835 
1836  /*
1837   * Compute the media block number. Drivers work with media block number not
1838   * the block number a BD may have as this depends on the block size set by
1839   * the user.
1840   */
1841  media_block = rtems_bdbuf_media_block (block,
1842                                         dd->block_size,
1843                                         dd->media_block_size);
1844  if (media_block >= dd->size)
1845  {
1846    rtems_disk_release(dd);
1847    return RTEMS_INVALID_NUMBER;
1848  }
1849 
1850  bds_per_group = rtems_bdbuf_bds_per_group (dd->block_size);
1851  if (!bds_per_group)
1852  {
1853    rtems_disk_release (dd);
1854    return RTEMS_INVALID_NUMBER;
1855  }
1856 
1857  /*
1858   * Print the block index relative to the physical disk and the user block
1859   * number
1860   */
1861  if (rtems_bdbuf_tracer)
1862    printf ("bdbuf:read: %lu (%lu) (dev = %08x)\n",
1863            media_block + dd->start, block, (unsigned int) device);
1864
1865  /*
1866   * Read the block plus the required number of blocks ahead. The number of
1867   * blocks to read ahead is configured by the user and limited by the size of
1868   * the disk or reaching a read ahead block that is also cached.
1869   *
1870   * Limit the blocks read by the size of the disk.
1871   */
1872  if ((rtems_bdbuf_configuration.max_read_ahead_blocks + media_block) < dd->size)
1873    read_ahead_count = rtems_bdbuf_configuration.max_read_ahead_blocks;
1874  else
1875    read_ahead_count = dd->size - media_block;
1876
1877  media_block_count = dd->block_size / dd->media_block_size;
1878 
1879  req->bufnum = 0;
1880
1881  rtems_bdbuf_lock_cache ();
1882
1883  while (req->bufnum < read_ahead_count)
1884  {
1885    /*
1886     * Get the buffer for the requested block. If the block is cached then
1887     * return it. If it is not cached transfer the block from the disk media
1888     * into memory.
1889     *
1890     * We need to clean up any buffers allocated and not passed back to the
1891     * caller.
1892     */
1893    bd = rtems_bdbuf_get_buffer (dd, bds_per_group, media_block + dd->start,
1894                                 req->bufnum == 0 ? false : true);
1895
1896    /*
1897     * Read ahead buffer is in the cache or none available. Read what we
1898     * can.
1899     */
1900    if (!bd)
1901      break;
1902
1903    /*
1904     * Is the block we are interested in the cache ?
1905     */
1906    if ((bd->state == RTEMS_BDBUF_STATE_CACHED) ||
1907        (bd->state == RTEMS_BDBUF_STATE_MODIFIED))
1908      break;
1909
1910    bd->state = RTEMS_BDBUF_STATE_TRANSFER;
1911    bd->error = 0;
1912
1913    /*
1914     * The buffer will be passed to the driver so this buffer has a user.
1915     */
1916    bd->group->users++;
1917
1918    if (rtems_bdbuf_tracer)
1919      rtems_bdbuf_show_users ("reading", bd);
1920   
1921    /*
1922     * @todo The use of these req blocks is not a great design. The req is a
1923     *       struct with a single 'bufs' declared in the req struct and the
1924     *       others are added in the outer level struct. This relies on the
1925     *       structs joining as a single array and that assumes the compiler
1926     *       packs the structs. Why not just place on a list ? The BD has a
1927     *       node that can be used.
1928     */
1929    req->bufs[req->bufnum].user   = bd;
1930    req->bufs[req->bufnum].block  = media_block + dd->start;
1931    req->bufs[req->bufnum].length = dd->block_size;
1932    req->bufs[req->bufnum].buffer = bd->buffer;
1933    req->bufnum++;
1934
1935    /*
1936     * Move the media block count by the number of media blocks in the
1937     * disk device's set block size.
1938     */
1939    media_block += media_block_count;
1940  }
1941
1942  /*
1943   * Transfer any requested buffers. If the request count is 0 we have found
1944   * the block in the cache so return it.
1945   */
1946  if (req->bufnum)
1947  {
1948    /*
1949     * Unlock the cache. We have the buffer for the block and it will be in the
1950     * access or transfer state. We may also have a number of read ahead blocks
1951     * if we need to transfer data. At this point any other threads can gain
1952     * access to the cache and if they are after any of the buffers we have
1953     * they will block and be woken when the buffer is returned to the cache.
1954     *
1955     * If a transfer is needed the I/O operation will occur with pre-emption
1956     * enabled and the cache unlocked. This is a change to the previous version
1957     * of the bdbuf code.
1958     */
1959    rtems_event_set out;
1960    int             result;
1961    uint32_t        b;
1962    bool            wake_transfer;
1963
1964    /*
1965     * Flush any events.
1966     */
1967    rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
1968                         RTEMS_EVENT_ALL | RTEMS_NO_WAIT,
1969                         0, &out);
1970                         
1971    rtems_bdbuf_unlock_cache ();
1972
1973    req->req = RTEMS_BLKDEV_REQ_READ;
1974    req->req_done = rtems_bdbuf_read_done;
1975    req->done_arg = req;
1976    req->io_task = rtems_task_self ();
1977    req->status = RTEMS_RESOURCE_IN_USE;
1978    req->error = 0;
1979 
1980    result = dd->ioctl (dd, RTEMS_BLKIO_REQUEST, req);
1981
1982    /*
1983     * Inspection of the DOS FS code shows the result from this function is
1984     * handled and a buffer must be returned.
1985     */
1986    if (result < 0)
1987    {
1988      req->error = errno;
1989      req->status = RTEMS_IO_ERROR;
1990    }
1991    else
1992    {
1993      rtems_status_code sc;
1994     
1995      sc = rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
1996                                RTEMS_EVENT_ALL | RTEMS_WAIT,
1997                                0, &out);
1998
1999      if (sc != RTEMS_SUCCESSFUL)
2000        rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2001    }
2002
2003    wake_transfer = false;
2004   
2005    rtems_bdbuf_lock_cache ();
2006
2007    for (b = 1; b < req->bufnum; b++)
2008    {
2009      bd = req->bufs[b].user;
2010      if (!bd->error)
2011        bd->error = req->error;
2012      bd->state = RTEMS_BDBUF_STATE_READ_AHEAD;
2013      bd->group->users--;
2014
2015      if (rtems_bdbuf_tracer)
2016        rtems_bdbuf_show_users ("read-ahead", bd);
2017
2018      rtems_chain_prepend (&bdbuf_cache.ready, &bd->link);
2019
2020      /*
2021       * If there is an error remove the BD from the AVL tree as it is invalid,
2022       * then wake any threads that may be waiting. A thread may have been
2023       * waiting for this block and assumed it was in the tree.
2024       */
2025      if (bd->error)
2026      {
2027        bd->state = RTEMS_BDBUF_STATE_EMPTY;
2028        if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
2029          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_9);
2030      }
2031
2032      if (bd->waiters)
2033        wake_transfer = true;
2034    }
2035
2036    if (wake_transfer)
2037      rtems_bdbuf_wake (bdbuf_cache.transfer, &bdbuf_cache.transfer_waiters);
2038    else
2039      rtems_bdbuf_wake (bdbuf_cache.waiting, &bdbuf_cache.wait_waiters);
2040   
2041    bd = req->bufs[0].user;
2042
2043    /*
2044     * One less user for the BD we return. The loop above is only for the read
2045     * head buffers. We do this here then increment again so the case of the
2046     * buffer in the cache or modified and no read leaves the user counts at
2047     * the correct level.
2048     */
2049    bd->group->users--;
2050
2051    if (rtems_bdbuf_tracer)
2052      rtems_bdbuf_show_users ("read-done", bd);
2053  }
2054
2055  /*
2056   * The data for this block is cached in the buffer.
2057   */
2058  if (bd->state == RTEMS_BDBUF_STATE_MODIFIED)
2059    bd->state = RTEMS_BDBUF_STATE_ACCESS_MODIFIED;
2060  else
2061  {
2062    /*
2063     * The file system is a user of the buffer.
2064     */
2065    bd->group->users++;
2066    bd->state = RTEMS_BDBUF_STATE_ACCESS;
2067  }
2068
2069  if (rtems_bdbuf_tracer)
2070  {
2071    rtems_bdbuf_show_users ("read", bd);
2072    rtems_bdbuf_show_usage ();
2073  }
2074 
2075  rtems_bdbuf_unlock_cache ();
2076  rtems_disk_release (dd);
2077
2078  *bdp = bd;
2079
2080  return RTEMS_SUCCESSFUL;
2081}
2082
2083rtems_status_code
2084rtems_bdbuf_release (rtems_bdbuf_buffer* bd)
2085{
2086  if (!bdbuf_cache.initialised)
2087    return RTEMS_NOT_CONFIGURED;
2088
2089  if (bd == NULL)
2090    return RTEMS_INVALID_ADDRESS;
2091
2092  rtems_bdbuf_lock_cache ();
2093
2094  if (rtems_bdbuf_tracer)
2095    printf ("bdbuf:release: %lu\n", bd->block);
2096 
2097  if (bd->state == RTEMS_BDBUF_STATE_ACCESS_MODIFIED)
2098  {
2099    rtems_bdbuf_append_modified (bd);
2100  }
2101  else
2102  {
2103    bd->state = RTEMS_BDBUF_STATE_CACHED;
2104    rtems_chain_append (&bdbuf_cache.lru, &bd->link);
2105
2106    /*
2107     * One less user for the group of bds.
2108     */
2109    bd->group->users--;
2110  }
2111 
2112  if (rtems_bdbuf_tracer)
2113    rtems_bdbuf_show_users ("release", bd);
2114 
2115  /*
2116   * If there are threads waiting to access the buffer wake them. Wake any
2117   * waiters if this buffer is placed back onto the LRU queue.
2118   */
2119  if (bd->waiters)
2120    rtems_bdbuf_wake (bdbuf_cache.access, &bdbuf_cache.access_waiters);
2121  else
2122    rtems_bdbuf_wake (bdbuf_cache.waiting, &bdbuf_cache.wait_waiters);
2123 
2124  if (rtems_bdbuf_tracer)
2125    rtems_bdbuf_show_usage ();
2126 
2127  rtems_bdbuf_unlock_cache ();
2128
2129  return RTEMS_SUCCESSFUL;
2130}
2131
2132rtems_status_code
2133rtems_bdbuf_release_modified (rtems_bdbuf_buffer* bd)
2134{
2135  if (!bdbuf_cache.initialised)
2136    return RTEMS_NOT_CONFIGURED;
2137
2138  if (!bd)
2139    return RTEMS_INVALID_ADDRESS;
2140
2141  rtems_bdbuf_lock_cache ();
2142
2143  if (rtems_bdbuf_tracer)
2144    printf ("bdbuf:release modified: %lu\n", bd->block);
2145
2146  bd->hold_timer = rtems_bdbuf_configuration.swap_block_hold;
2147 
2148  if (rtems_bdbuf_tracer)
2149    rtems_bdbuf_show_users ("release-modified", bd);
2150 
2151  rtems_bdbuf_append_modified (bd);
2152
2153  if (bd->waiters)
2154    rtems_bdbuf_wake (bdbuf_cache.access, &bdbuf_cache.access_waiters);
2155 
2156  if (rtems_bdbuf_tracer)
2157    rtems_bdbuf_show_usage ();
2158 
2159  rtems_bdbuf_unlock_cache ();
2160
2161  return RTEMS_SUCCESSFUL;
2162}
2163
2164rtems_status_code
2165rtems_bdbuf_sync (rtems_bdbuf_buffer* bd)
2166{
2167  bool available;
2168
2169  if (rtems_bdbuf_tracer)
2170    printf ("bdbuf:sync: %lu\n", bd->block);
2171 
2172  if (!bdbuf_cache.initialised)
2173    return RTEMS_NOT_CONFIGURED;
2174
2175  if (!bd)
2176    return RTEMS_INVALID_ADDRESS;
2177
2178  rtems_bdbuf_lock_cache ();
2179
2180  bd->state = RTEMS_BDBUF_STATE_SYNC;
2181
2182  rtems_chain_append (&bdbuf_cache.sync, &bd->link);
2183
2184  rtems_bdbuf_wake_swapper ();
2185
2186  available = false;
2187  while (!available)
2188  {
2189    switch (bd->state)
2190    {
2191      case RTEMS_BDBUF_STATE_CACHED:
2192      case RTEMS_BDBUF_STATE_READ_AHEAD:
2193      case RTEMS_BDBUF_STATE_MODIFIED:
2194      case RTEMS_BDBUF_STATE_ACCESS:
2195      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2196        available = true;
2197        break;
2198
2199      case RTEMS_BDBUF_STATE_SYNC:
2200      case RTEMS_BDBUF_STATE_TRANSFER:
2201        bd->waiters++;
2202        rtems_bdbuf_wait (&bdbuf_cache.transfer, &bdbuf_cache.transfer_waiters);
2203        bd->waiters--;
2204        break;
2205
2206      default:
2207        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_7);
2208    }
2209  }
2210
2211  rtems_bdbuf_unlock_cache ();
2212 
2213  return RTEMS_SUCCESSFUL;
2214}
2215
2216rtems_status_code
2217rtems_bdbuf_syncdev (dev_t dev)
2218{
2219  rtems_disk_device*  dd;
2220  rtems_status_code   sc;
2221  rtems_event_set     out;
2222
2223  if (rtems_bdbuf_tracer)
2224    printf ("bdbuf:syncdev: %08x\n", (unsigned int) dev);
2225
2226  if (!bdbuf_cache.initialised)
2227    return RTEMS_NOT_CONFIGURED;
2228
2229  /*
2230   * Do not hold the cache lock when obtaining the disk table.
2231   */
2232  dd = rtems_disk_obtain (dev);
2233  if (!dd)
2234    return RTEMS_INVALID_ID;
2235
2236  /*
2237   * Take the sync lock before locking the cache. Once we have the sync lock we
2238   * can lock the cache. If another thread has the sync lock it will cause this
2239   * thread to block until it owns the sync lock then it can own the cache. The
2240   * sync lock can only be obtained with the cache unlocked.
2241   */
2242 
2243  rtems_bdbuf_lock_sync ();
2244  rtems_bdbuf_lock_cache (); 
2245
2246  /*
2247   * Set the cache to have a sync active for a specific device and let the swap
2248   * out task know the id of the requester to wake when done.
2249   *
2250   * The swap out task will negate the sync active flag when no more buffers
2251   * for the device are held on the "modified for sync" queues.
2252   */
2253  bdbuf_cache.sync_active    = true;
2254  bdbuf_cache.sync_requester = rtems_task_self ();
2255  bdbuf_cache.sync_device    = dev;
2256 
2257  rtems_bdbuf_wake_swapper ();
2258  rtems_bdbuf_unlock_cache ();
2259 
2260  sc = rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
2261                            RTEMS_EVENT_ALL | RTEMS_WAIT,
2262                            0, &out);
2263
2264  if (sc != RTEMS_SUCCESSFUL)
2265    rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2266     
2267  rtems_bdbuf_unlock_sync ();
2268 
2269  return rtems_disk_release (dd);
2270}
2271
2272/**
2273 * Call back handler called by the low level driver when the transfer has
2274 * completed. This function may be invoked from interrupt handlers.
2275 *
2276 * @param arg Arbitrary argument specified in block device request
2277 *            structure (in this case - pointer to the appropriate
2278 *            block device request structure).
2279 * @param status I/O completion status
2280 * @param error errno error code if status != RTEMS_SUCCESSFUL
2281 */
2282static void
2283rtems_bdbuf_write_done(void *arg, rtems_status_code status, int error)
2284{
2285  rtems_blkdev_request* req = (rtems_blkdev_request*) arg;
2286
2287  req->error = error;
2288  req->status = status;
2289
2290  rtems_event_send (req->io_task, RTEMS_BDBUF_TRANSFER_SYNC);
2291}
2292
2293/**
2294 * Swapout transfer to the driver. The driver will break this I/O into groups
2295 * of consecutive write requests is multiple consecutive buffers are required
2296 * by the driver.
2297 *
2298 * @param transfer The transfer transaction.
2299 */
2300static void
2301rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer)
2302{
2303  rtems_disk_device* dd;
2304 
2305  if (rtems_bdbuf_tracer)
2306    printf ("bdbuf:swapout transfer: %08x\n", (unsigned int) transfer->dev);
2307
2308  /*
2309   * If there are buffers to transfer to the media transfer them.
2310   */
2311  if (!rtems_chain_is_empty (&transfer->bds))
2312  {
2313    /*
2314     * Obtain the disk device. The cache's mutex has been released to avoid a
2315     * dead lock.
2316     */
2317    dd = rtems_disk_obtain (transfer->dev);
2318    if (dd)
2319    {
2320      /*
2321       * The last block number used when the driver only supports
2322       * continuous blocks in a single request.
2323       */
2324      uint32_t last_block = 0;
2325
2326      /*
2327       * Number of buffers per bd. This is used to detect the next
2328       * block.
2329       */
2330      uint32_t bufs_per_bd = dd->block_size / bdbuf_config.buffer_min;
2331     
2332      /*
2333       * Take as many buffers as configured and pass to the driver. Note, the
2334       * API to the drivers has an array of buffers and if a chain was passed
2335       * we could have just passed the list. If the driver API is updated it
2336       * should be possible to make this change with little effect in this
2337       * code. The array that is passed is broken in design and should be
2338       * removed. Merging members of a struct into the first member is
2339       * trouble waiting to happen.
2340       */
2341      transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2342      transfer->write_req->error = 0;
2343      transfer->write_req->bufnum = 0;
2344
2345      while (!rtems_chain_is_empty (&transfer->bds))
2346      {
2347        rtems_bdbuf_buffer* bd =
2348          (rtems_bdbuf_buffer*) rtems_chain_get (&transfer->bds);
2349
2350        bool write = false;
2351       
2352        /*
2353         * If the device only accepts sequential buffers and this is not the
2354         * first buffer (the first is always sequential, and the buffer is not
2355         * sequential then put the buffer back on the transfer chain and write
2356         * the committed buffers.
2357         */
2358       
2359        if (rtems_bdbuf_tracer)
2360          printf ("bdbuf:swapout write: bd:%lu, bufnum:%lu mode:%s\n",
2361                  bd->block, transfer->write_req->bufnum,
2362                  dd->phys_dev->capabilities &
2363                  RTEMS_BLKDEV_CAP_MULTISECTOR_CONT ? "MULIT" : "SCAT");
2364       
2365        if ((dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) &&
2366            transfer->write_req->bufnum &&
2367            (bd->block != (last_block + bufs_per_bd)))
2368        {
2369          rtems_chain_prepend (&transfer->bds, &bd->link);
2370          write = true;
2371        }
2372        else
2373        {
2374          rtems_blkdev_sg_buffer* buf;
2375          buf = &transfer->write_req->bufs[transfer->write_req->bufnum];
2376          transfer->write_req->bufnum++;
2377          buf->user   = bd;
2378          buf->block  = bd->block;
2379          buf->length = dd->block_size;
2380          buf->buffer = bd->buffer;
2381          last_block  = bd->block;
2382        }
2383
2384        /*
2385         * Perform the transfer if there are no more buffers, or the transfer
2386         * size has reached the configured max. value.
2387         */
2388
2389        if (rtems_chain_is_empty (&transfer->bds) ||
2390            (transfer->write_req->bufnum >= rtems_bdbuf_configuration.max_write_blocks))
2391          write = true;
2392
2393        if (write)
2394        {
2395          int result;
2396          uint32_t b;
2397
2398          if (rtems_bdbuf_tracer)
2399            printf ("bdbuf:swapout write: writing bufnum:%lu\n",
2400                    transfer->write_req->bufnum);
2401
2402          /*
2403           * Perform the transfer. No cache locks, no preemption, only the disk
2404           * device is being held.
2405           */
2406          result = dd->ioctl (dd, RTEMS_BLKIO_REQUEST, transfer->write_req); 
2407          if (result < 0)
2408          {
2409            rtems_bdbuf_lock_cache ();
2410             
2411            for (b = 0; b < transfer->write_req->bufnum; b++)
2412            {
2413              bd = transfer->write_req->bufs[b].user;
2414              bd->state  = RTEMS_BDBUF_STATE_MODIFIED;
2415              bd->error = errno;
2416
2417              /*
2418               * Place back on the cache's modified queue and try again.
2419               *
2420               * @warning Not sure this is the best option but I do not know
2421               *          what else can be done.
2422               */
2423              rtems_chain_append (&bdbuf_cache.modified, &bd->link);
2424            }
2425          }
2426          else
2427          {
2428            rtems_status_code sc = 0;
2429            rtems_event_set   out;
2430
2431            sc = rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
2432                                      RTEMS_EVENT_ALL | RTEMS_WAIT,
2433                                      0, &out);
2434
2435            if (sc != RTEMS_SUCCESSFUL)
2436              rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2437
2438            rtems_bdbuf_lock_cache ();
2439
2440            for (b = 0; b < transfer->write_req->bufnum; b++)
2441            {
2442              bd = transfer->write_req->bufs[b].user;
2443              bd->state = RTEMS_BDBUF_STATE_CACHED;
2444              bd->error = 0;
2445
2446              /*
2447               * The buffer is now not modified so lower the user count for the group.
2448               */
2449              bd->group->users--;
2450
2451              if (rtems_bdbuf_tracer)
2452                rtems_bdbuf_show_users ("write", bd);
2453
2454              rtems_chain_append (&bdbuf_cache.lru, &bd->link);
2455             
2456              if (bd->waiters)
2457                rtems_bdbuf_wake (bdbuf_cache.transfer, &bdbuf_cache.transfer_waiters);
2458              else
2459                rtems_bdbuf_wake (bdbuf_cache.waiting, &bdbuf_cache.wait_waiters);
2460            }
2461          }
2462
2463          if (rtems_bdbuf_tracer)
2464            rtems_bdbuf_show_usage ();
2465
2466          rtems_bdbuf_unlock_cache ();
2467
2468          transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2469          transfer->write_req->error = 0;
2470          transfer->write_req->bufnum = 0;
2471        }
2472      }
2473         
2474      rtems_disk_release (dd);
2475    }
2476    else
2477    {
2478      /*
2479       * We have buffers but no device. Put the BDs back onto the
2480       * ready queue and exit.
2481       */
2482      /* @todo fixme */
2483    }
2484  }
2485}
2486
2487/**
2488 * Process the modified list of buffers. There is a sync or modified list that
2489 * needs to be handled so we have a common function to do the work.
2490 *
2491 * @param dev The device to handle. If -1 no device is selected so select the
2492 *            device of the first buffer to be written to disk.
2493 * @param chain The modified chain to process.
2494 * @param transfer The chain to append buffers to be written too.
2495 * @param sync_active If true this is a sync operation so expire all timers.
2496 * @param update_timers If true update the timers.
2497 * @param timer_delta It update_timers is true update the timers by this
2498 *                    amount.
2499 */
2500static void
2501rtems_bdbuf_swapout_modified_processing (dev_t*               dev,
2502                                         rtems_chain_control* chain,
2503                                         rtems_chain_control* transfer,
2504                                         bool                 sync_active,
2505                                         bool                 update_timers,
2506                                         uint32_t             timer_delta)
2507{
2508  if (!rtems_chain_is_empty (chain))
2509  {
2510    rtems_chain_node* node = rtems_chain_head (chain);
2511    node = node->next;
2512
2513    while (!rtems_chain_is_tail (chain, node))
2514    {
2515      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2516   
2517      /*
2518       * Check if the buffer's hold timer has reached 0. If a sync is active
2519       * force all the timers to 0.
2520       *
2521       * @note Lots of sync requests will skew this timer. It should be based
2522       *       on TOD to be accurate. Does it matter ?
2523       */
2524      if (sync_active)
2525        bd->hold_timer = 0;
2526 
2527      if (bd->hold_timer)
2528      {
2529        if (update_timers)
2530        {
2531          if (bd->hold_timer > timer_delta)
2532            bd->hold_timer -= timer_delta;
2533          else
2534            bd->hold_timer = 0;
2535        }
2536
2537        if (bd->hold_timer)
2538        {
2539          node = node->next;
2540          continue;
2541        }
2542      }
2543
2544      /*
2545       * This assumes we can set dev_t to -1 which is just an
2546       * assumption. Cannot use the transfer list being empty the sync dev
2547       * calls sets the dev to use.
2548       */
2549      if (*dev == (dev_t)-1)
2550        *dev = bd->dev;
2551
2552      if (bd->dev == *dev)
2553      {
2554        rtems_chain_node* next_node = node->next;
2555        rtems_chain_node* tnode = rtems_chain_tail (transfer);
2556   
2557        /*
2558         * The blocks on the transfer list are sorted in block order. This
2559         * means multi-block transfers for drivers that require consecutive
2560         * blocks perform better with sorted blocks and for real disks it may
2561         * help lower head movement.
2562         */
2563
2564        bd->state = RTEMS_BDBUF_STATE_TRANSFER;
2565
2566        rtems_chain_extract (node);
2567
2568        tnode = tnode->previous;
2569         
2570        while (node && !rtems_chain_is_head (transfer, tnode))
2571        {
2572          rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
2573
2574          if (bd->block > tbd->block)
2575          {
2576            rtems_chain_insert (tnode, node);
2577            node = NULL;
2578          }
2579          else
2580            tnode = tnode->previous;
2581        }
2582       
2583        if (node)
2584          rtems_chain_prepend (transfer, node);
2585         
2586        node = next_node;
2587      }
2588      else
2589      {
2590        node = node->next;
2591      }
2592    }
2593  }
2594}
2595
2596/**
2597 * Process the cache's modified buffers. Check the sync list first then the
2598 * modified list extracting the buffers suitable to be written to disk. We have
2599 * a device at a time. The task level loop will repeat this operation while
2600 * there are buffers to be written. If the transfer fails place the buffers
2601 * back on the modified list and try again later. The cache is unlocked while
2602 * the buffers are being written to disk.
2603 *
2604 * @param timer_delta It update_timers is true update the timers by this
2605 *                    amount.
2606 * @param update_timers If true update the timers.
2607 * @param transfer The transfer transaction data.
2608 *
2609 * @retval true Buffers where written to disk so scan again.
2610 * @retval false No buffers where written to disk.
2611 */
2612static bool
2613rtems_bdbuf_swapout_processing (unsigned long                 timer_delta,
2614                                bool                          update_timers,
2615                                rtems_bdbuf_swapout_transfer* transfer)
2616{
2617  rtems_bdbuf_swapout_worker* worker;
2618  bool                        transfered_buffers = false;
2619
2620  rtems_bdbuf_lock_cache ();
2621
2622  /*
2623   * If a sync is active do not use a worker because the current code does not
2624   * cleaning up after. We need to know the buffers have been written when
2625   * syncing to the release sync lock and currently worker threads do not
2626   * return to here. We do not know the worker is the last in a sequence of
2627   * sync writes until after we have it running so we do not know to tell it to
2628   * release the lock. The simplest solution is to get the main swap out task
2629   * perform all sync operations.
2630   */
2631  if (bdbuf_cache.sync_active)
2632    worker = NULL;
2633  else
2634  {
2635    worker = (rtems_bdbuf_swapout_worker*)
2636      rtems_chain_get (&bdbuf_cache.swapout_workers);
2637    if (worker)
2638      transfer = &worker->transfer;
2639  }
2640 
2641  rtems_chain_initialize_empty (&transfer->bds);
2642  transfer->dev = -1;
2643 
2644  /*
2645   * When the sync is for a device limit the sync to that device. If the sync
2646   * is for a buffer handle process the devices in the order on the sync
2647   * list. This means the dev is -1.
2648   */
2649  if (bdbuf_cache.sync_active)
2650    transfer->dev = bdbuf_cache.sync_device;
2651 
2652  /*
2653   * If we have any buffers in the sync queue move them to the modified
2654   * list. The first sync buffer will select the device we use.
2655   */
2656  rtems_bdbuf_swapout_modified_processing (&transfer->dev,
2657                                           &bdbuf_cache.sync,
2658                                           &transfer->bds,
2659                                           true, false,
2660                                           timer_delta);
2661
2662  /*
2663   * Process the cache's modified list.
2664   */
2665  rtems_bdbuf_swapout_modified_processing (&transfer->dev,
2666                                           &bdbuf_cache.modified,
2667                                           &transfer->bds,
2668                                           bdbuf_cache.sync_active,
2669                                           update_timers,
2670                                           timer_delta);
2671
2672  /*
2673   * We have all the buffers that have been modified for this device so the
2674   * cache can be unlocked because the state of each buffer has been set to
2675   * TRANSFER.
2676   */
2677  rtems_bdbuf_unlock_cache ();
2678
2679  /*
2680   * If there are buffers to transfer to the media transfer them.
2681   */
2682  if (!rtems_chain_is_empty (&transfer->bds))
2683  {
2684    if (worker)
2685    {
2686      rtems_status_code sc = rtems_event_send (worker->id,
2687                                               RTEMS_BDBUF_SWAPOUT_SYNC);
2688      if (sc != RTEMS_SUCCESSFUL)
2689        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
2690    }
2691    else
2692    {
2693      rtems_bdbuf_swapout_write (transfer);
2694    }
2695   
2696    transfered_buffers = true;
2697  }
2698   
2699  if (bdbuf_cache.sync_active && !transfered_buffers)
2700  {
2701    rtems_id sync_requester;
2702    rtems_bdbuf_lock_cache ();
2703    sync_requester = bdbuf_cache.sync_requester;
2704    bdbuf_cache.sync_active = false;
2705    bdbuf_cache.sync_requester = 0;
2706    rtems_bdbuf_unlock_cache ();
2707    if (sync_requester)
2708      rtems_event_send (sync_requester, RTEMS_BDBUF_TRANSFER_SYNC);
2709  }
2710 
2711  return transfered_buffers;
2712}
2713
2714/**
2715 * Allocate the write request and initialise it for good measure.
2716 *
2717 * @return rtems_blkdev_request* The write reference memory.
2718 */
2719static rtems_blkdev_request*
2720rtems_bdbuf_swapout_writereq_alloc (void)
2721{
2722  /*
2723   * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
2724   * I am disappointment at finding code like this in RTEMS. The request should
2725   * have been a rtems_chain_control. Simple, fast and less storage as the node
2726   * is already part of the buffer structure.
2727   */
2728  rtems_blkdev_request* write_req =
2729    malloc (sizeof (rtems_blkdev_request) +
2730            (rtems_bdbuf_configuration.max_write_blocks *
2731             sizeof (rtems_blkdev_sg_buffer)));
2732
2733  if (!write_req)
2734    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2735
2736  write_req->req = RTEMS_BLKDEV_REQ_WRITE;
2737  write_req->req_done = rtems_bdbuf_write_done;
2738  write_req->done_arg = write_req;
2739  write_req->io_task = rtems_task_self ();
2740
2741  return write_req;
2742}
2743
2744/**
2745 * The swapout worker thread body.
2746 *
2747 * @param arg A pointer to the worker thread's private data.
2748 * @return rtems_task Not used.
2749 */
2750static rtems_task
2751rtems_bdbuf_swapout_worker_task (rtems_task_argument arg)
2752{
2753  rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg;
2754
2755  while (worker->enabled)
2756  {
2757    rtems_event_set   out;
2758    rtems_status_code sc;
2759   
2760    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2761                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2762                              RTEMS_NO_TIMEOUT,
2763                              &out);
2764
2765    if (sc != RTEMS_SUCCESSFUL)
2766      rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2767
2768    rtems_bdbuf_swapout_write (&worker->transfer);
2769
2770    rtems_bdbuf_lock_cache ();
2771
2772    rtems_chain_initialize_empty (&worker->transfer.bds);
2773    worker->transfer.dev = -1;
2774
2775    rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link);
2776   
2777    rtems_bdbuf_unlock_cache ();
2778  }
2779
2780  free (worker->transfer.write_req);
2781  free (worker);
2782
2783  rtems_task_delete (RTEMS_SELF);
2784}
2785
2786/**
2787 * Open the swapout worker threads.
2788 */
2789static void
2790rtems_bdbuf_swapout_workers_open (void)
2791{
2792  rtems_status_code sc;
2793  int               w;
2794 
2795  rtems_bdbuf_lock_cache ();
2796 
2797  for (w = 0; w < rtems_bdbuf_configuration.swapout_workers; w++)
2798  {
2799    rtems_bdbuf_swapout_worker* worker;
2800
2801    worker = malloc (sizeof (rtems_bdbuf_swapout_worker));
2802    if (!worker)
2803      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2804
2805    rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link);
2806    worker->enabled = true;
2807    worker->transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2808   
2809    rtems_chain_initialize_empty (&worker->transfer.bds);
2810    worker->transfer.dev = -1;
2811
2812    sc = rtems_task_create (rtems_build_name('B', 'D', 'o', 'a' + w),
2813                            (rtems_bdbuf_configuration.swapout_priority ?
2814                             rtems_bdbuf_configuration.swapout_priority :
2815                             RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT),
2816                            SWAPOUT_TASK_STACK_SIZE,
2817                            RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
2818                            RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
2819                            &worker->id);
2820    if (sc != RTEMS_SUCCESSFUL)
2821      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE);
2822
2823    sc = rtems_task_start (worker->id,
2824                           rtems_bdbuf_swapout_worker_task,
2825                           (rtems_task_argument) worker);
2826    if (sc != RTEMS_SUCCESSFUL)
2827      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START);
2828  }
2829 
2830  rtems_bdbuf_unlock_cache ();
2831}
2832
2833/**
2834 * Close the swapout worker threads.
2835 */
2836static void
2837rtems_bdbuf_swapout_workers_close (void)
2838{
2839  rtems_chain_node* node;
2840 
2841  rtems_bdbuf_lock_cache ();
2842 
2843  node = rtems_chain_first (&bdbuf_cache.swapout_workers);
2844  while (!rtems_chain_is_tail (&bdbuf_cache.swapout_workers, node))
2845  {
2846    rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node;
2847    worker->enabled = false;
2848    rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC);
2849    node = rtems_chain_next (node);
2850  }
2851 
2852  rtems_bdbuf_unlock_cache ();
2853}
2854
2855/**
2856 * Body of task which takes care on flushing modified buffers to the disk.
2857 *
2858 * @param arg A pointer to the global cache data. Use the global variable and
2859 *            not this.
2860 * @return rtems_task Not used.
2861 */
2862static rtems_task
2863rtems_bdbuf_swapout_task (rtems_task_argument arg)
2864{
2865  rtems_bdbuf_swapout_transfer transfer;
2866  uint32_t                     period_in_ticks;
2867  const uint32_t               period_in_msecs = bdbuf_config.swapout_period;;
2868  uint32_t                     timer_delta;
2869
2870  transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2871  rtems_chain_initialize_empty (&transfer.bds);
2872  transfer.dev = -1;
2873
2874  /*
2875   * Localise the period.
2876   */
2877  period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
2878
2879  /*
2880   * This is temporary. Needs to be changed to use the real time clock.
2881   */
2882  timer_delta = period_in_msecs;
2883
2884  /*
2885   * Create the worker threads.
2886   */
2887  rtems_bdbuf_swapout_workers_open ();
2888 
2889  while (bdbuf_cache.swapout_enabled)
2890  {
2891    rtems_event_set   out;
2892    rtems_status_code sc;
2893
2894    /*
2895     * Only update the timers once in the processing cycle.
2896     */
2897    bool update_timers = true;
2898   
2899    /*
2900     * If we write buffers to any disk perform a check again. We only write a
2901     * single device at a time and the cache may have more than one device's
2902     * buffers modified waiting to be written.
2903     */
2904    bool transfered_buffers;
2905
2906    do
2907    {
2908      transfered_buffers = false;
2909
2910      /*
2911       * Extact all the buffers we find for a specific device. The device is
2912       * the first one we find on a modified list. Process the sync queue of
2913       * buffers first.
2914       */
2915      if (rtems_bdbuf_swapout_processing (timer_delta,
2916                                          update_timers,
2917                                          &transfer))
2918      {
2919        transfered_buffers = true;
2920      }
2921     
2922      /*
2923       * Only update the timers once.
2924       */
2925      update_timers = false;
2926    }
2927    while (transfered_buffers);
2928
2929    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2930                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2931                              period_in_ticks,
2932                              &out);
2933
2934    if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
2935      rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2936  }
2937
2938  rtems_bdbuf_swapout_workers_close ();
2939 
2940  free (transfer.write_req);
2941
2942  rtems_task_delete (RTEMS_SELF);
2943}
2944
Note: See TracBrowser for help on using the repository browser.