source: rtems/cpukit/libblock/src/bdbuf.c @ 3d5515b

4.104.11
Last change on this file since 3d5515b was 3d5515b, checked in by Thomas Doerfler <Thomas.Doerfler@…>, on Oct 30, 2009 at 10:57:39 AM

Fixed group usage counting. See test "libtests/block02".

  • Property mode set to 100644
File size: 83.3 KB
Line 
1/**
2 * @file
3 *
4 * @ingroup rtems_bdbuf
5 *
6 * Block device buffer management.
7 */
8
9/*
10 * Disk I/O buffering
11 * Buffer managment
12 *
13 * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
14 * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
15 *         Victor V. Vengerov <vvv@oktet.ru>
16 *         Alexander Kukuta <kam@oktet.ru>
17 *
18 * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org>
19 *    Rewritten to remove score mutex access. Fixes many performance
20 *    issues.
21 *
22 * @(#) bdbuf.c,v 1.14 2004/04/17 08:15:17 ralf Exp
23 */
24
25/**
26 * Set to 1 to enable debug tracing.
27 */
28#define RTEMS_BDBUF_TRACE 0
29
30#if HAVE_CONFIG_H
31#include "config.h"
32#endif
33#include <limits.h>
34#include <errno.h>
35#include <assert.h>
36#include <stdio.h>
37#include <string.h>
38#include <inttypes.h>
39
40#include <rtems.h>
41#include <rtems/error.h>
42#include <rtems/malloc.h>
43
44#include "rtems/bdbuf.h"
45
46#define BDBUF_INVALID_DEV ((dev_t) -1)
47
48/*
49 * Simpler label for this file.
50 */
51#define bdbuf_config rtems_bdbuf_configuration
52
53/**
54 * A swapout transfer transaction data. This data is passed to a worked thread
55 * to handle the write phase of the transfer.
56 */
57typedef struct rtems_bdbuf_swapout_transfer
58{
59  rtems_chain_control   bds;         /**< The transfer list of BDs. */
60  dev_t                 dev;         /**< The device the transfer is for. */
61  rtems_blkdev_request* write_req;   /**< The write request array. */
62  uint32_t              bufs_per_bd; /**< Number of buffers per bd. */
63} rtems_bdbuf_swapout_transfer;
64
65/**
66 * Swapout worker thread. These are available to take processing from the
67 * main swapout thread and handle the I/O operation.
68 */
69typedef struct rtems_bdbuf_swapout_worker
70{
71  rtems_chain_node             link;     /**< The threads sit on a chain when
72                                          * idle. */
73  rtems_id                     id;       /**< The id of the task so we can wake
74                                          * it. */
75  volatile bool                enabled;  /**< The worked is enabled. */
76  rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this
77                                          * thread. */
78} rtems_bdbuf_swapout_worker;
79
80/**
81 * Buffer waiters synchronization.
82 */
83typedef struct rtems_bdbuf_waiters {
84  volatile unsigned count;
85  rtems_id sema;
86} rtems_bdbuf_waiters;
87
88/**
89 * The BD buffer cache.
90 */
91typedef struct rtems_bdbuf_cache
92{
93  rtems_id            swapout;           /**< Swapout task ID */
94  volatile bool       swapout_enabled;   /**< Swapout is only running if
95                                          * enabled. Set to false to kill the
96                                          * swap out task. It deletes itself. */
97  rtems_chain_control swapout_workers;   /**< The work threads for the swapout
98                                          * task. */
99 
100  rtems_bdbuf_buffer* bds;               /**< Pointer to table of buffer
101                                          * descriptors. */
102  void*               buffers;           /**< The buffer's memory. */
103  size_t              buffer_min_count;  /**< Number of minimum size buffers
104                                          * that fit the buffer memory. */
105  size_t              max_bds_per_group; /**< The number of BDs of minimum
106                                          * buffer size that fit in a group. */
107  uint32_t            flags;             /**< Configuration flags. */
108
109  rtems_id            lock;              /**< The cache lock. It locks all
110                                          * cache data, BD and lists. */
111  rtems_id            sync_lock;         /**< Sync calls block writes. */
112  volatile bool       sync_active;       /**< True if a sync is active. */
113  volatile rtems_id   sync_requester;    /**< The sync requester. */
114  volatile dev_t      sync_device;       /**< The device to sync and
115                                          * BDBUF_INVALID_DEV not a device
116                                          * sync. */
117
118  rtems_bdbuf_buffer* tree;              /**< Buffer descriptor lookup AVL tree
119                                          * root. There is only one. */
120  rtems_chain_control ready;             /**< Free buffers list, read-ahead, or
121                                          * resized group buffers. */
122  rtems_chain_control lru;               /**< Least recently used list */
123  rtems_chain_control modified;          /**< Modified buffers list */
124  rtems_chain_control sync;              /**< Buffers to sync list */
125
126  rtems_bdbuf_waiters access_waiters;    /**< Wait for a buffer in ACCESS
127                                          * state. */
128  rtems_bdbuf_waiters transfer_waiters;  /**< Wait for a buffer in TRANSFER
129                                          * state. */
130  rtems_bdbuf_waiters buffer_waiters;    /**< Wait for a buffer and no one is
131                                          * available. */
132
133  size_t              group_count;       /**< The number of groups. */
134  rtems_bdbuf_group*  groups;            /**< The groups. */
135 
136  bool                initialised;       /**< Initialised state. */
137} rtems_bdbuf_cache;
138
139/**
140 * Fatal errors
141 */
142#define RTEMS_BLKDEV_FATAL_ERROR(n) \
143  (((uint32_t)'B' << 24) | ((uint32_t)(n) & (uint32_t)0x00FFFFFF))
144
145#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_1 RTEMS_BLKDEV_FATAL_ERROR(1)
146#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_2 RTEMS_BLKDEV_FATAL_ERROR(2)
147#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_3 RTEMS_BLKDEV_FATAL_ERROR(3)
148#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_4 RTEMS_BLKDEV_FATAL_ERROR(4)
149#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_5 RTEMS_BLKDEV_FATAL_ERROR(5)
150#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_6 RTEMS_BLKDEV_FATAL_ERROR(6)
151#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_7 RTEMS_BLKDEV_FATAL_ERROR(7)
152#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_8 RTEMS_BLKDEV_FATAL_ERROR(8)
153#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_9 RTEMS_BLKDEV_FATAL_ERROR(9)
154#define RTEMS_BLKDEV_FATAL_BDBUF_SWAPOUT       RTEMS_BLKDEV_FATAL_ERROR(10)
155#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK     RTEMS_BLKDEV_FATAL_ERROR(11)
156#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK   RTEMS_BLKDEV_FATAL_ERROR(12)
157#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK    RTEMS_BLKDEV_FATAL_ERROR(13)
158#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK  RTEMS_BLKDEV_FATAL_ERROR(14)
159#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS   RTEMS_BLKDEV_FATAL_ERROR(15)
160#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2  RTEMS_BLKDEV_FATAL_ERROR(16)
161#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST   RTEMS_BLKDEV_FATAL_ERROR(17)
162#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO RTEMS_BLKDEV_FATAL_ERROR(18)
163#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE    RTEMS_BLKDEV_FATAL_ERROR(19)
164#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE       RTEMS_BLKDEV_FATAL_ERROR(20)
165#define RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM      RTEMS_BLKDEV_FATAL_ERROR(21)
166#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE  RTEMS_BLKDEV_FATAL_ERROR(22)
167#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START   RTEMS_BLKDEV_FATAL_ERROR(23)
168#define BLKDEV_FATAL_BDBUF_SWAPOUT_RE          RTEMS_BLKDEV_FATAL_ERROR(24)
169#define BLKDEV_FATAL_BDBUF_SWAPOUT_TS          RTEMS_BLKDEV_FATAL_ERROR(25)
170
171/**
172 * The events used in this code. These should be system events rather than
173 * application events.
174 */
175#define RTEMS_BDBUF_TRANSFER_SYNC  RTEMS_EVENT_1
176#define RTEMS_BDBUF_SWAPOUT_SYNC   RTEMS_EVENT_2
177
178/**
179 * The swap out task size. Should be more than enough for most drivers with
180 * tracing turned on.
181 */
182#define SWAPOUT_TASK_STACK_SIZE (8 * 1024)
183
184/**
185 * Lock semaphore attributes. This is used for locking type mutexes.
186 *
187 * @warning Priority inheritance is on.
188 */
189#define RTEMS_BDBUF_CACHE_LOCK_ATTRIBS \
190  (RTEMS_PRIORITY | RTEMS_BINARY_SEMAPHORE | \
191   RTEMS_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
192
193/**
194 * Waiter semaphore attributes.
195 *
196 * @warning Do not configure as inherit priority. If a driver is in the driver
197 *          initialisation table this locked semaphore will have the IDLE task
198 *          as the holder and a blocking task will raise the priority of the
199 *          IDLE task which can cause unsual side effects.
200 */
201#define RTEMS_BDBUF_CACHE_WAITER_ATTRIBS \
202  (RTEMS_PRIORITY | RTEMS_SIMPLE_BINARY_SEMAPHORE | \
203   RTEMS_NO_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
204
205/**
206 * Waiter timeout. Set to non-zero to find some info on a waiter that is
207 * waiting too long.
208 */
209#define RTEMS_BDBUF_WAIT_TIMEOUT RTEMS_NO_TIMEOUT
210#if !defined (RTEMS_BDBUF_WAIT_TIMEOUT)
211#define RTEMS_BDBUF_WAIT_TIMEOUT \
212  (TOD_MICROSECONDS_TO_TICKS (20000000))
213#endif
214
215/*
216 * The swap out task.
217 */
218static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
219
220/**
221 * The Buffer Descriptor cache.
222 */
223static rtems_bdbuf_cache bdbuf_cache;
224
225#if RTEMS_BDBUF_TRACE
226/**
227 * If true output the trace message.
228 */
229bool rtems_bdbuf_tracer;
230
231/**
232 * Return the number of items on the list.
233 *
234 * @param list The chain control.
235 * @return uint32_t The number of items on the list.
236 */
237uint32_t
238rtems_bdbuf_list_count (rtems_chain_control* list)
239{
240  rtems_chain_node* node = rtems_chain_first (list);
241  uint32_t          count = 0;
242  while (!rtems_chain_is_tail (list, node))
243  {
244    count++;
245    node = rtems_chain_next (node);
246  }
247  return count;
248}
249
250/**
251 * Show the usage for the bdbuf cache.
252 */
253void
254rtems_bdbuf_show_usage (void)
255{
256  uint32_t group;
257  uint32_t total = 0;
258  uint32_t val;
259
260  for (group = 0; group < bdbuf_cache.group_count; group++)
261    total += bdbuf_cache.groups[group].users;
262  printf ("bdbuf:group users=%lu", total);
263  val = rtems_bdbuf_list_count (&bdbuf_cache.ready);
264  printf (", ready=%lu", val);
265  total = val;
266  val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
267  printf (", lru=%lu", val);
268  total += val;
269  val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
270  printf (", mod=%lu", val);
271  total += val;
272  val = rtems_bdbuf_list_count (&bdbuf_cache.sync);
273  printf (", sync=%lu", val);
274  total += val;
275  printf (", total=%lu\n", total);
276}
277
278/**
279 * Show the users for a group of a bd.
280 *
281 * @param where A label to show the context of output.
282 * @param bd The bd to show the users of.
283 */
284void
285rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd)
286{
287  const char* states[] =
288    { "EM", "RA", "CH", "AC", "MD", "AM", "SY", "TR" };
289
290  printf ("bdbuf:users: %15s: [%" PRIu32 " (%s)] %td:%td = %" PRIu32 " %s\n",
291          where,
292          bd->block, states[bd->state],
293          bd->group - bdbuf_cache.groups,
294          bd - bdbuf_cache.bds,
295          bd->group->users,
296          bd->group->users > 8 ? "<<<<<<<" : "");
297}
298#else
299#define rtems_bdbuf_tracer (0)
300#define rtems_bdbuf_show_usage()
301#define rtems_bdbuf_show_users(_w, _b)
302#endif
303
304/**
305 * The default maximum height of 32 allows for AVL trees having between
306 * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion.  You may
307 * change this compile-time constant as you wish.
308 */
309#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
310#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
311#endif
312
313/**
314 * Searches for the node with specified dev/block.
315 *
316 * @param root pointer to the root node of the AVL-Tree
317 * @param dev device search key
318 * @param block block search key
319 * @retval NULL node with the specified dev/block is not found
320 * @return pointer to the node with specified dev/block
321 */
322static rtems_bdbuf_buffer *
323rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
324                        dev_t                dev,
325                        rtems_blkdev_bnum    block)
326{
327  rtems_bdbuf_buffer* p = *root;
328
329  while ((p != NULL) && ((p->dev != dev) || (p->block != block)))
330  {
331    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
332    {
333      p = p->avl.right;
334    }
335    else
336    {
337      p = p->avl.left;
338    }
339  }
340
341  return p;
342}
343
344/**
345 * Inserts the specified node to the AVl-Tree.
346 *
347 * @param root pointer to the root node of the AVL-Tree
348 * @param node Pointer to the node to add.
349 * @retval 0 The node added successfully
350 * @retval -1 An error occured
351 */
352static int
353rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
354                       rtems_bdbuf_buffer*  node)
355{
356  dev_t             dev = node->dev;
357  rtems_blkdev_bnum block = node->block;
358
359  rtems_bdbuf_buffer*  p = *root;
360  rtems_bdbuf_buffer*  q;
361  rtems_bdbuf_buffer*  p1;
362  rtems_bdbuf_buffer*  p2;
363  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
364  rtems_bdbuf_buffer** buf_prev = buf_stack;
365
366  bool modified = false;
367
368  if (p == NULL)
369  {
370    *root = node;
371    node->avl.left = NULL;
372    node->avl.right = NULL;
373    node->avl.bal = 0;
374    return 0;
375  }
376
377  while (p != NULL)
378  {
379    *buf_prev++ = p;
380
381    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
382    {
383      p->avl.cache = 1;
384      q = p->avl.right;
385      if (q == NULL)
386      {
387        q = node;
388        p->avl.right = q = node;
389        break;
390      }
391    }
392    else if ((p->dev != dev) || (p->block != block))
393    {
394      p->avl.cache = -1;
395      q = p->avl.left;
396      if (q == NULL)
397      {
398        q = node;
399        p->avl.left = q;
400        break;
401      }
402    }
403    else
404    {
405      return -1;
406    }
407
408    p = q;
409  }
410 
411  q->avl.left = q->avl.right = NULL;
412  q->avl.bal = 0;
413  modified = true;
414  buf_prev--;
415
416  while (modified)
417  {
418    if (p->avl.cache == -1)
419    {
420      switch (p->avl.bal)
421      {
422        case 1:
423          p->avl.bal = 0;
424          modified = false;
425          break;
426
427        case 0:
428          p->avl.bal = -1;
429          break;
430
431        case -1:
432          p1 = p->avl.left;
433          if (p1->avl.bal == -1) /* simple LL-turn */
434          {
435            p->avl.left = p1->avl.right;
436            p1->avl.right = p;
437            p->avl.bal = 0;
438            p = p1;
439          }
440          else /* double LR-turn */
441          {
442            p2 = p1->avl.right;
443            p1->avl.right = p2->avl.left;
444            p2->avl.left = p1;
445            p->avl.left = p2->avl.right;
446            p2->avl.right = p;
447            if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
448            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
449            p = p2;
450          }
451          p->avl.bal = 0;
452          modified = false;
453          break;
454
455        default:
456          break;
457      }
458    }
459    else
460    {
461      switch (p->avl.bal)
462      {
463        case -1:
464          p->avl.bal = 0;
465          modified = false;
466          break;
467
468        case 0:
469          p->avl.bal = 1;
470          break;
471
472        case 1:
473          p1 = p->avl.right;
474          if (p1->avl.bal == 1) /* simple RR-turn */
475          {
476            p->avl.right = p1->avl.left;
477            p1->avl.left = p;
478            p->avl.bal = 0;
479            p = p1;
480          }
481          else /* double RL-turn */
482          {
483            p2 = p1->avl.left;
484            p1->avl.left = p2->avl.right;
485            p2->avl.right = p1;
486            p->avl.right = p2->avl.left;
487            p2->avl.left = p;
488            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
489            if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
490            p = p2;
491          }
492          p->avl.bal = 0;
493          modified = false;
494          break;
495
496        default:
497          break;
498      }
499    }
500    q = p;
501    if (buf_prev > buf_stack)
502    {
503      p = *--buf_prev;
504
505      if (p->avl.cache == -1)
506      {
507        p->avl.left = q;
508      }
509      else
510      {
511        p->avl.right = q;
512      }
513    }
514    else
515    {
516      *root = p;
517      break;
518    }
519  };
520
521  return 0;
522}
523
524
525/**
526 * Removes the node from the tree.
527 *
528 * @param root Pointer to pointer to the root node
529 * @param node Pointer to the node to remove
530 * @retval 0 Item removed
531 * @retval -1 No such item found
532 */
533static int
534rtems_bdbuf_avl_remove(rtems_bdbuf_buffer**      root,
535                       const rtems_bdbuf_buffer* node)
536{
537  dev_t             dev = node->dev;
538  rtems_blkdev_bnum block = node->block;
539
540  rtems_bdbuf_buffer*  p = *root;
541  rtems_bdbuf_buffer*  q;
542  rtems_bdbuf_buffer*  r;
543  rtems_bdbuf_buffer*  s;
544  rtems_bdbuf_buffer*  p1;
545  rtems_bdbuf_buffer*  p2;
546  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
547  rtems_bdbuf_buffer** buf_prev = buf_stack;
548
549  bool modified = false;
550
551  memset (buf_stack, 0, sizeof(buf_stack));
552
553  while (p != NULL)
554  {
555    *buf_prev++ = p;
556
557    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
558    {
559      p->avl.cache = 1;
560      p = p->avl.right;
561    }
562    else if ((p->dev != dev) || (p->block != block))
563    {
564      p->avl.cache = -1;
565      p = p->avl.left;
566    }
567    else
568    {
569      /* node found */
570      break;
571    }
572  }
573
574  if (p == NULL)
575  {
576    /* there is no such node */
577    return -1;
578  }
579
580  q = p;
581
582  buf_prev--;
583  if (buf_prev > buf_stack)
584  {
585    p = *(buf_prev - 1);
586  }
587  else
588  {
589    p = NULL;
590  }
591
592  /* at this moment q - is a node to delete, p is q's parent */
593  if (q->avl.right == NULL)
594  {
595    r = q->avl.left;
596    if (r != NULL)
597    {
598      r->avl.bal = 0;
599    }
600    q = r;
601  }
602  else
603  {
604    rtems_bdbuf_buffer **t;
605
606    r = q->avl.right;
607
608    if (r->avl.left == NULL)
609    {
610      r->avl.left = q->avl.left;
611      r->avl.bal = q->avl.bal;
612      r->avl.cache = 1;
613      *buf_prev++ = q = r;
614    }
615    else
616    {
617      t = buf_prev++;
618      s = r;
619
620      while (s->avl.left != NULL)
621      {
622        *buf_prev++ = r = s;
623        s = r->avl.left;
624        r->avl.cache = -1;
625      }
626
627      s->avl.left = q->avl.left;
628      r->avl.left = s->avl.right;
629      s->avl.right = q->avl.right;
630      s->avl.bal = q->avl.bal;
631      s->avl.cache = 1;
632
633      *t = q = s;
634    }
635  }
636
637  if (p != NULL)
638  {
639    if (p->avl.cache == -1)
640    {
641      p->avl.left = q;
642    }
643    else
644    {
645      p->avl.right = q;
646    }
647  }
648  else
649  {
650    *root = q;
651  }
652
653  modified = true;
654
655  while (modified)
656  {
657    if (buf_prev > buf_stack)
658    {
659      p = *--buf_prev;
660    }
661    else
662    {
663      break;
664    }
665
666    if (p->avl.cache == -1)
667    {
668      /* rebalance left branch */
669      switch (p->avl.bal)
670      {
671        case -1:
672          p->avl.bal = 0;
673          break;
674        case  0:
675          p->avl.bal = 1;
676          modified = false;
677          break;
678
679        case +1:
680          p1 = p->avl.right;
681
682          if (p1->avl.bal >= 0) /* simple RR-turn */
683          {
684            p->avl.right = p1->avl.left;
685            p1->avl.left = p;
686
687            if (p1->avl.bal == 0)
688            {
689              p1->avl.bal = -1;
690              modified = false;
691            }
692            else
693            {
694              p->avl.bal = 0;
695              p1->avl.bal = 0;
696            }
697            p = p1;
698          }
699          else /* double RL-turn */
700          {
701            p2 = p1->avl.left;
702
703            p1->avl.left = p2->avl.right;
704            p2->avl.right = p1;
705            p->avl.right = p2->avl.left;
706            p2->avl.left = p;
707
708            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
709            if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
710
711            p = p2;
712            p2->avl.bal = 0;
713          }
714          break;
715
716        default:
717          break;
718      }
719    }
720    else
721    {
722      /* rebalance right branch */
723      switch (p->avl.bal)
724      {
725        case +1:
726          p->avl.bal = 0;
727          break;
728
729        case  0:
730          p->avl.bal = -1;
731          modified = false;
732          break;
733
734        case -1:
735          p1 = p->avl.left;
736
737          if (p1->avl.bal <= 0) /* simple LL-turn */
738          {
739            p->avl.left = p1->avl.right;
740            p1->avl.right = p;
741            if (p1->avl.bal == 0)
742            {
743              p1->avl.bal = 1;
744              modified = false;
745            }
746            else
747            {
748              p->avl.bal = 0;
749              p1->avl.bal = 0;
750            }
751            p = p1;
752          }
753          else /* double LR-turn */
754          {
755            p2 = p1->avl.right;
756
757            p1->avl.right = p2->avl.left;
758            p2->avl.left = p1;
759            p->avl.left = p2->avl.right;
760            p2->avl.right = p;
761
762            if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
763            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
764
765            p = p2;
766            p2->avl.bal = 0;
767          }
768          break;
769
770        default:
771          break;
772      }
773    }
774
775    if (buf_prev > buf_stack)
776    {
777      q = *(buf_prev - 1);
778
779      if (q->avl.cache == -1)
780      {
781        q->avl.left = p;
782      }
783      else
784      {
785        q->avl.right = p;
786      }
787    }
788    else
789    {
790      *root = p;
791      break;
792    }
793
794  }
795
796  return 0;
797}
798
799/**
800 * Change the block number for the block size to the block number for the media
801 * block size. We have to use 64bit maths. There is no short cut here.
802 *
803 * @param block The logical block number in the block size terms.
804 * @param block_size The block size.
805 * @param media_block_size The block size of the media.
806 * @return rtems_blkdev_bnum The media block number.
807 */
808static rtems_blkdev_bnum
809rtems_bdbuf_media_block (rtems_blkdev_bnum block,
810                         size_t            block_size,
811                         size_t            media_block_size)
812{
813  return (((uint64_t) block) * block_size) / media_block_size;
814}
815
816/**
817 * Lock the mutex. A single task can nest calls.
818 *
819 * @param lock The mutex to lock.
820 * @param fatal_error_code The error code if the call fails.
821 */
822static void
823rtems_bdbuf_lock (rtems_id lock, uint32_t fatal_error_code)
824{
825  rtems_status_code sc = rtems_semaphore_obtain (lock,
826                                                 RTEMS_WAIT,
827                                                 RTEMS_NO_TIMEOUT);
828  if (sc != RTEMS_SUCCESSFUL)
829    rtems_fatal_error_occurred (fatal_error_code);
830}
831
832/**
833 * Unlock the mutex.
834 *
835 * @param lock The mutex to unlock.
836 * @param fatal_error_code The error code if the call fails.
837 */
838static void
839rtems_bdbuf_unlock (rtems_id lock, uint32_t fatal_error_code)
840{
841  rtems_status_code sc = rtems_semaphore_release (lock);
842  if (sc != RTEMS_SUCCESSFUL)
843    rtems_fatal_error_occurred (fatal_error_code);
844}
845
846/**
847 * Lock the cache. A single task can nest calls.
848 */
849static void
850rtems_bdbuf_lock_cache (void)
851{
852  rtems_bdbuf_lock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK);
853}
854
855/**
856 * Unlock the cache.
857 */
858static void
859rtems_bdbuf_unlock_cache (void)
860{
861  rtems_bdbuf_unlock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK);
862}
863
864/**
865 * Lock the cache's sync. A single task can nest calls.
866 */
867static void
868rtems_bdbuf_lock_sync (void)
869{
870  rtems_bdbuf_lock (bdbuf_cache.sync_lock, RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK);
871}
872
873/**
874 * Unlock the cache's sync lock. Any blocked writers are woken.
875 */
876static void
877rtems_bdbuf_unlock_sync (void)
878{
879  rtems_bdbuf_unlock (bdbuf_cache.sync_lock,
880                      RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK);
881}
882
883static rtems_mode
884rtems_bdbuf_disable_preemption (void)
885{
886  rtems_status_code sc = RTEMS_SUCCESSFUL;
887  rtems_mode prev_mode = 0;
888
889  sc = rtems_task_mode (RTEMS_NO_PREEMPT, RTEMS_PREEMPT_MASK, &prev_mode);
890  if (sc != RTEMS_SUCCESSFUL)
891    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS);
892
893  return prev_mode;
894}
895
896static void
897rtems_bdbuf_restore_preemption (rtems_mode prev_mode)
898{
899  rtems_status_code sc = RTEMS_SUCCESSFUL;
900
901  sc = rtems_task_mode (prev_mode, RTEMS_ALL_MODE_MASKS, &prev_mode);
902  if (sc != RTEMS_SUCCESSFUL)
903    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST);
904}
905
906/**
907 * Wait until woken. Semaphores are used so a number of tasks can wait and can
908 * be woken at once. Task events would require we maintain a list of tasks to
909 * be woken and this would require storgage and we do not know the number of
910 * tasks that could be waiting.
911 *
912 * While we have the cache locked we can try and claim the semaphore and
913 * therefore know when we release the lock to the cache we will block until the
914 * semaphore is released. This may even happen before we get to block.
915 *
916 * A counter is used to save the release call when no one is waiting.
917 *
918 * The function assumes the cache is locked on entry and it will be locked on
919 * exit.
920 *
921 * @param sema The semaphore to block on and wait.
922 * @param waiters The wait counter for this semaphore.
923 */
924static void
925rtems_bdbuf_wait (rtems_bdbuf_waiters* waiters)
926{
927  rtems_status_code sc;
928  rtems_mode        prev_mode;
929 
930  /*
931   * Indicate we are waiting.
932   */
933  ++waiters->count;
934
935  /*
936   * Disable preemption then unlock the cache and block.  There is no POSIX
937   * condition variable in the core API so this is a work around.
938   *
939   * The issue is a task could preempt after the cache is unlocked because it is
940   * blocking or just hits that window, and before this task has blocked on the
941   * semaphore. If the preempting task flushes the queue this task will not see
942   * the flush and may block for ever or until another transaction flushes this
943   * semaphore.
944   */
945  prev_mode = rtems_bdbuf_disable_preemption ();
946 
947  /*
948   * Unlock the cache, wait, and lock the cache when we return.
949   */
950  rtems_bdbuf_unlock_cache ();
951
952  sc = rtems_semaphore_obtain (waiters->sema, RTEMS_WAIT, RTEMS_BDBUF_WAIT_TIMEOUT);
953
954  if (sc == RTEMS_TIMEOUT)
955    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO);
956 
957  if (sc != RTEMS_UNSATISFIED)
958    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2);
959 
960  rtems_bdbuf_lock_cache ();
961
962  rtems_bdbuf_restore_preemption (prev_mode);
963 
964  --waiters->count;
965}
966
967/**
968 * Wake a blocked resource. The resource has a counter that lets us know if
969 * there are any waiters.
970 */
971static void
972rtems_bdbuf_wake (const rtems_bdbuf_waiters *waiters)
973{
974  rtems_status_code sc = RTEMS_SUCCESSFUL;
975
976  if (waiters->count > 0)
977  {
978    sc = rtems_semaphore_flush (waiters->sema);
979    if (sc != RTEMS_SUCCESSFUL)
980      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE);
981  }
982}
983
984/**
985 * Add a buffer descriptor to the modified list. This modified list is treated
986 * a litte differently to the other lists. To access it you must have the cache
987 * locked and this is assumed to be the case on entry to this call.
988 *
989 * If the cache has a device being sync'ed and the bd is for that device the
990 * call must block and wait until the sync is over before adding the bd to the
991 * modified list. Once a sync happens for a device no bd's can be added the
992 * modified list. The disk image is forced to be snapshot at that moment in
993 * time.
994 *
995 * @note Do not lower the group user count as the modified list is a user of
996 * the buffer.
997 *
998 * @param bd The bd to queue to the cache's modified list.
999 */
1000static void
1001rtems_bdbuf_append_modified (rtems_bdbuf_buffer* bd)
1002{
1003  /*
1004   * If the cache has a device being sync'ed check if this bd is for that
1005   * device. If it is unlock the cache and block on the sync lock. Once we have
1006   * the sync lock release it.
1007   */
1008  if (bdbuf_cache.sync_active && (bdbuf_cache.sync_device == bd->dev))
1009  {
1010    rtems_bdbuf_unlock_cache ();
1011    /* Wait for the sync lock */
1012    rtems_bdbuf_lock_sync ();
1013    rtems_bdbuf_unlock_sync ();
1014    rtems_bdbuf_lock_cache ();
1015  }
1016     
1017  bd->state = RTEMS_BDBUF_STATE_MODIFIED;
1018
1019  rtems_chain_append (&bdbuf_cache.modified, &bd->link);
1020}
1021
1022/**
1023 * Wait the swapper task.
1024 */
1025static void
1026rtems_bdbuf_wake_swapper (void)
1027{
1028  rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout,
1029                                           RTEMS_BDBUF_SWAPOUT_SYNC);
1030  if (sc != RTEMS_SUCCESSFUL)
1031    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
1032}
1033
1034/**
1035 * Compute the number of BDs per group for a given buffer size.
1036 *
1037 * @param size The buffer size. It can be any size and we scale up.
1038 */
1039static size_t
1040rtems_bdbuf_bds_per_group (size_t size)
1041{
1042  size_t bufs_per_size;
1043  size_t bds_per_size;
1044 
1045  if (size > rtems_bdbuf_configuration.buffer_max)
1046    return 0;
1047 
1048  bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1;
1049 
1050  for (bds_per_size = 1;
1051       bds_per_size < bufs_per_size;
1052       bds_per_size <<= 1)
1053    ;
1054
1055  return bdbuf_cache.max_bds_per_group / bds_per_size;
1056}
1057
1058/**
1059 * Reallocate a group. The BDs currently allocated in the group are removed
1060 * from the ALV tree and any lists then the new BD's are prepended to the ready
1061 * list of the cache.
1062 *
1063 * @param group The group to reallocate.
1064 * @param new_bds_per_group The new count of BDs per group.
1065 */
1066static void
1067rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
1068{
1069  rtems_bdbuf_buffer* bd;
1070  size_t              b;
1071  size_t              bufs_per_bd;
1072
1073  if (rtems_bdbuf_tracer)
1074    printf ("bdbuf:realloc: %tu: %zd -> %zd\n",
1075            group - bdbuf_cache.groups, group->bds_per_group,
1076            new_bds_per_group);
1077 
1078  bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group;
1079 
1080  for (b = 0, bd = group->bdbuf;
1081       b < group->bds_per_group;
1082       b++, bd += bufs_per_bd)
1083  {
1084    switch (bd->state)
1085    {
1086      case RTEMS_BDBUF_STATE_EMPTY:
1087        break;
1088      case RTEMS_BDBUF_STATE_CACHED:
1089      case RTEMS_BDBUF_STATE_READ_AHEAD:
1090        if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1091          rtems_fatal_error_occurred ((bd->state << 16) |
1092                                      RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_1);
1093        break;
1094      default:
1095        rtems_fatal_error_occurred ((bd->state << 16) |
1096                                    RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_8);
1097    }
1098   
1099    rtems_chain_extract (&bd->link);
1100  }
1101 
1102  group->bds_per_group = new_bds_per_group;
1103  bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
1104 
1105  for (b = 0, bd = group->bdbuf;
1106       b < group->bds_per_group;
1107       b++, bd += bufs_per_bd)
1108  {
1109    bd->state = RTEMS_BDBUF_STATE_EMPTY;
1110    rtems_chain_prepend (&bdbuf_cache.ready, &bd->link);
1111  }
1112}
1113
1114/**
1115 * Get the next BD from the list. This call assumes the cache is locked.
1116 *
1117 * @param bds_per_group The number of BDs per block we are need.
1118 * @param list The list to find the BD on.
1119 * @return The next BD if found or NULL is none are available.
1120 */
1121static rtems_bdbuf_buffer*
1122rtems_bdbuf_get_next_bd (size_t               bds_per_group,
1123                         rtems_chain_control* list)
1124{
1125  rtems_chain_node* node = rtems_chain_first (list);
1126  while (!rtems_chain_is_tail (list, node))
1127  {
1128    rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
1129
1130    if (rtems_bdbuf_tracer)
1131      printf ("bdbuf:next-bd: %tu (%td:%" PRId32 ") %zd -> %zd\n",
1132              bd - bdbuf_cache.bds,
1133              bd->group - bdbuf_cache.groups, bd->group->users,
1134              bd->group->bds_per_group, bds_per_group);
1135
1136    /*
1137     * If this bd is already part of a group that supports the same number of
1138     * BDs per group return it. If the bd is part of another group check the
1139     * number of users and if 0 we can take this group and resize it.
1140     */
1141    if (bd->group->bds_per_group == bds_per_group)
1142    {
1143      rtems_chain_extract (node);
1144      return bd;
1145    }
1146
1147    if (bd->group->users == 0)
1148    {
1149      /*
1150       * We use the group to locate the start of the BDs for this group.
1151       */
1152      rtems_bdbuf_group_realloc (bd->group, bds_per_group);
1153      bd = (rtems_bdbuf_buffer*) rtems_chain_get (&bdbuf_cache.ready);
1154      return bd;
1155    }
1156
1157    node = rtems_chain_next (node);
1158  }
1159 
1160  return NULL;
1161}
1162
1163/**
1164 * Initialise the cache.
1165 *
1166 * @return rtems_status_code The initialisation status.
1167 */
1168rtems_status_code
1169rtems_bdbuf_init (void)
1170{
1171  rtems_bdbuf_group*  group;
1172  rtems_bdbuf_buffer* bd;
1173  uint8_t*            buffer;
1174  size_t              b;
1175  size_t              cache_aligment;
1176  rtems_status_code   sc;
1177  rtems_mode          prev_mode;
1178
1179  if (rtems_bdbuf_tracer)
1180    printf ("bdbuf:init\n");
1181
1182  if (rtems_interrupt_is_in_progress())
1183    return RTEMS_CALLED_FROM_ISR;
1184
1185  /*
1186   * Check the configuration table values.
1187   */
1188  if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0)
1189    return RTEMS_INVALID_NUMBER;
1190 
1191  /*
1192   * We use a special variable to manage the initialisation incase we have
1193   * completing threads doing this. You may get errors if the another thread
1194   * makes a call and we have not finished initialisation.
1195   */
1196  prev_mode = rtems_bdbuf_disable_preemption ();
1197  if (bdbuf_cache.initialised)
1198  {
1199    rtems_bdbuf_restore_preemption (prev_mode);
1200
1201    return RTEMS_RESOURCE_IN_USE;
1202  }
1203  memset(&bdbuf_cache, 0, sizeof(bdbuf_cache));
1204  bdbuf_cache.initialised = true;
1205  rtems_bdbuf_restore_preemption (prev_mode);
1206 
1207  /*
1208   * For unspecified cache alignments we use the CPU alignment.
1209   */
1210  cache_aligment = 32; /* FIXME rtems_cache_get_data_line_size() */
1211  if (cache_aligment <= 0)
1212    cache_aligment = CPU_ALIGNMENT;
1213
1214  bdbuf_cache.sync_device = BDBUF_INVALID_DEV;
1215
1216  rtems_chain_initialize_empty (&bdbuf_cache.swapout_workers);
1217  rtems_chain_initialize_empty (&bdbuf_cache.ready);
1218  rtems_chain_initialize_empty (&bdbuf_cache.lru);
1219  rtems_chain_initialize_empty (&bdbuf_cache.modified);
1220  rtems_chain_initialize_empty (&bdbuf_cache.sync);
1221
1222  /*
1223   * Create the locks for the cache.
1224   */
1225  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'l'),
1226                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1227                               &bdbuf_cache.lock);
1228  if (sc != RTEMS_SUCCESSFUL)
1229    goto error;
1230
1231  rtems_bdbuf_lock_cache ();
1232 
1233  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 's'),
1234                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1235                               &bdbuf_cache.sync_lock);
1236  if (sc != RTEMS_SUCCESSFUL)
1237    goto error;
1238 
1239  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'a'),
1240                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1241                               &bdbuf_cache.access_waiters.sema);
1242  if (sc != RTEMS_SUCCESSFUL)
1243    goto error;
1244
1245  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 't'),
1246                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1247                               &bdbuf_cache.transfer_waiters.sema);
1248  if (sc != RTEMS_SUCCESSFUL)
1249    goto error;
1250
1251  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'w'),
1252                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1253                               &bdbuf_cache.buffer_waiters.sema);
1254  if (sc != RTEMS_SUCCESSFUL)
1255    goto error;
1256 
1257  /*
1258   * Compute the various number of elements in the cache.
1259   */
1260  bdbuf_cache.buffer_min_count =
1261    bdbuf_config.size / bdbuf_config.buffer_min;
1262  bdbuf_cache.max_bds_per_group =
1263    bdbuf_config.buffer_max / bdbuf_config.buffer_min;
1264  bdbuf_cache.group_count =
1265    bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group;
1266
1267  /*
1268   * Allocate the memory for the buffer descriptors.
1269   */
1270  bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer),
1271                            bdbuf_cache.buffer_min_count);
1272  if (!bdbuf_cache.bds)
1273    goto error;
1274
1275  /*
1276   * Allocate the memory for the buffer descriptors.
1277   */
1278  bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group),
1279                               bdbuf_cache.group_count);
1280  if (!bdbuf_cache.groups)
1281    goto error;
1282 
1283  /*
1284   * Allocate memory for buffer memory. The buffer memory will be cache
1285   * aligned. It is possible to free the memory allocated by rtems_memalign()
1286   * with free(). Return 0 if allocated.
1287   *
1288   * The memory allocate allows a
1289   */
1290  if (rtems_memalign ((void **) &bdbuf_cache.buffers,
1291                      cache_aligment,
1292                      bdbuf_cache.buffer_min_count * bdbuf_config.buffer_min) != 0)
1293    goto error;
1294
1295  /*
1296   * The cache is empty after opening so we need to add all the buffers to it
1297   * and initialise the groups.
1298   */
1299  for (b = 0, group = bdbuf_cache.groups,
1300         bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers;
1301       b < bdbuf_cache.buffer_min_count;
1302       b++, bd++, buffer += bdbuf_config.buffer_min)
1303  {
1304    bd->dev        = BDBUF_INVALID_DEV;
1305    bd->group      = group;
1306    bd->buffer     = buffer;
1307    bd->avl.left   = NULL;
1308    bd->avl.right  = NULL;
1309    bd->state      = RTEMS_BDBUF_STATE_EMPTY;
1310    bd->error      = 0;
1311    bd->waiters    = 0;
1312    bd->hold_timer = 0;
1313    bd->references = 0;
1314    bd->user       = NULL;
1315   
1316    rtems_chain_append (&bdbuf_cache.ready, &bd->link);
1317
1318    if ((b % bdbuf_cache.max_bds_per_group) ==
1319        (bdbuf_cache.max_bds_per_group - 1))
1320      group++;
1321  }
1322
1323  for (b = 0,
1324         group = bdbuf_cache.groups,
1325         bd = bdbuf_cache.bds;
1326       b < bdbuf_cache.group_count;
1327       b++,
1328         group++,
1329         bd += bdbuf_cache.max_bds_per_group)
1330  {
1331    group->bds_per_group = bdbuf_cache.max_bds_per_group;
1332    group->users = 0;
1333    group->bdbuf = bd;
1334  }
1335         
1336  /*
1337   * Create and start swapout task. This task will create and manage the worker
1338   * threads.
1339   */
1340  bdbuf_cache.swapout_enabled = true;
1341 
1342  sc = rtems_task_create (rtems_build_name('B', 'S', 'W', 'P'),
1343                          (bdbuf_config.swapout_priority ?
1344                           bdbuf_config.swapout_priority :
1345                           RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT),
1346                          SWAPOUT_TASK_STACK_SIZE,
1347                          RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
1348                          RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
1349                          &bdbuf_cache.swapout);
1350  if (sc != RTEMS_SUCCESSFUL)
1351    goto error;
1352
1353  sc = rtems_task_start (bdbuf_cache.swapout,
1354                         rtems_bdbuf_swapout_task,
1355                         (rtems_task_argument) &bdbuf_cache);
1356  if (sc != RTEMS_SUCCESSFUL)
1357    goto error;
1358
1359  rtems_bdbuf_unlock_cache ();
1360
1361  return RTEMS_SUCCESSFUL;
1362
1363error:
1364
1365  if (bdbuf_cache.swapout != 0)
1366    rtems_task_delete (bdbuf_cache.swapout);
1367
1368  free (bdbuf_cache.buffers);
1369  free (bdbuf_cache.groups);
1370  free (bdbuf_cache.bds);
1371
1372  rtems_semaphore_delete (bdbuf_cache.buffer_waiters.sema);
1373  rtems_semaphore_delete (bdbuf_cache.access_waiters.sema);
1374  rtems_semaphore_delete (bdbuf_cache.transfer_waiters.sema);
1375  rtems_semaphore_delete (bdbuf_cache.sync_lock);
1376
1377  if (bdbuf_cache.lock != 0)
1378  {
1379    rtems_bdbuf_unlock_cache ();
1380    rtems_semaphore_delete (bdbuf_cache.lock);
1381  }
1382
1383  bdbuf_cache.initialised = false;
1384
1385  return RTEMS_UNSATISFIED;
1386}
1387
1388static inline void
1389rtems_bdbuf_group_obtain (rtems_bdbuf_buffer *bd)
1390{
1391  ++bd->group->users;
1392}
1393
1394static inline void
1395rtems_bdbuf_group_release (rtems_bdbuf_buffer *bd)
1396{
1397  --bd->group->users;
1398}
1399
1400/**
1401 * Get a buffer for this device and block. This function returns a buffer once
1402 * placed into the AVL tree. If no buffer is available and it is not a read
1403 * ahead request and no buffers are waiting to the written to disk wait until a
1404 * buffer is available. If buffers are waiting to be written to disk and none
1405 * are available expire the hold timer's of the queued buffers and wake the
1406 * swap out task. If the buffer is for a read ahead transfer return NULL if
1407 * there are no buffers available or the buffer is already in the cache.
1408 *
1409 * The AVL tree of buffers for the cache is searched and if not found obtain a
1410 * buffer and insert it into the AVL tree. Buffers are first obtained from the
1411 * ready list until all empty/ready buffers are used. Once all buffers are in
1412 * use the LRU list is searched for a buffer of the same group size or a group
1413 * that has no active buffers in use. A buffer taken from the LRU list is
1414 * removed from the AVL tree and assigned the new block number. The ready or
1415 * LRU list buffer is initialised to this device and block. If no buffers are
1416 * available due to the ready and LRU lists being empty a check is made of the
1417 * modified list. Buffers may be queued waiting for the hold timer to
1418 * expire. These buffers should be written to disk and returned to the LRU list
1419 * where they can be used. If buffers are on the modified list the max. write
1420 * block size of buffers have their hold timer's expired and the swap out task
1421 * woken. The caller then blocks on the waiting semaphore and counter. When
1422 * buffers return from the upper layers (access) or lower driver (transfer) the
1423 * blocked caller task is woken and this procedure is repeated. The repeat
1424 * handles a case of a another thread pre-empting getting a buffer first and
1425 * adding it to the AVL tree.
1426 *
1427 * A buffer located in the AVL tree means it is already in the cache and maybe
1428 * in use somewhere. The buffer can be either:
1429 *
1430 * # Cached. Not being accessed or part of a media transfer.
1431 * # Access or modifed access. Is with an upper layer being accessed.
1432 * # Transfer. Is with the driver and part of a media transfer.
1433 *
1434 * If cached we assign the new state, extract it from any list it maybe part of
1435 * and return to the user.
1436 *
1437 * This function assumes the cache the buffer is being taken from is locked and
1438 * it will make sure the cache is locked when it returns. The cache will be
1439 * unlocked if the call could block.
1440 *
1441 * Variable sized buffer is handled by groups. A group is the size of the
1442 * maximum buffer that can be allocated. The group can size in multiples of the
1443 * minimum buffer size where the mulitples are 1,2,4,8, etc. If the buffer is
1444 * found in the AVL tree the number of BDs in the group is check and if
1445 * different the buffer size for the block has changed. The buffer needs to be
1446 * invalidated.
1447 *
1448 * The returned buffer will be a user its group.
1449 *
1450 * @param dd The disk device. Has the configured block size.
1451 * @param bds_per_group The number of BDs in a group for this block.
1452 * @param block Absolute media block number for the device
1453 * @param read_ahead The get is for a read ahead buffer if true
1454 * @return RTEMS status code (if operation completed successfully or error
1455 *         code if error is occured)
1456 */
1457static rtems_bdbuf_buffer*
1458rtems_bdbuf_get_buffer (rtems_disk_device* dd,
1459                        size_t             bds_per_group,
1460                        rtems_blkdev_bnum  block,
1461                        bool               read_ahead)
1462{
1463  dev_t               device = dd->dev;
1464  rtems_bdbuf_buffer* bd;
1465  bool                available;
1466 
1467  /*
1468   * Loop until we get a buffer. Under load we could find no buffers are
1469   * available requiring this task to wait until some become available before
1470   * proceeding. There is no timeout. If this call is to block and the buffer
1471   * is for a read ahead buffer return NULL. The read ahead is nice but not
1472   * that important.
1473   *
1474   * The search procedure is repeated as another thread could have pre-empted
1475   * us while we waited for a buffer, obtained an empty buffer and loaded the
1476   * AVL tree with the one we are after. In this case we move down and wait for
1477   * the buffer to return to the cache.
1478   */
1479  do
1480  {
1481    /*
1482     * Search for buffer descriptor for this dev/block key.
1483     */
1484    bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, device, block);
1485
1486    /*
1487     * No buffer in the cache for this block. We need to obtain a buffer and
1488     * this means take a buffer that is ready to use. If all buffers are in use
1489     * take the least recently used buffer. If there are none then the cache is
1490     * empty. All the buffers are either queued to be written to disk or with
1491     * the user. We cannot do much with the buffers with the user how-ever with
1492     * the modified buffers waiting to be written to disk flush the maximum
1493     * number transfered in a block to disk. After this all that can be done is
1494     * to wait for a buffer to return to the cache.
1495     */
1496    if (!bd)
1497    {
1498      /*
1499       * Assign new buffer descriptor from the ready list if one is present. If
1500       * the ready queue is empty get the oldest buffer from LRU list. If the
1501       * LRU list is empty there are no available buffers check the modified
1502       * list.
1503       */
1504      bd = rtems_bdbuf_get_next_bd (bds_per_group, &bdbuf_cache.ready);
1505
1506      if (!bd)
1507      {
1508        /*
1509         * No unused or read-ahead buffers.
1510         *
1511         * If this is a read ahead buffer just return. No need to place further
1512         * pressure on the cache by reading something that may be needed when
1513         * we have data in the cache that was needed and may still be in the
1514         * future.
1515         */
1516        if (read_ahead)
1517          return NULL;
1518
1519        /*
1520         * Check the LRU list.
1521         */
1522        bd = rtems_bdbuf_get_next_bd (bds_per_group, &bdbuf_cache.lru);
1523       
1524        if (bd)
1525        {
1526          /*
1527           * Remove the buffer from the AVL tree if the state says it is in the
1528           * cache or a read ahead buffer. The buffer could be in the empty
1529           * state as a result of reallocations.
1530           */
1531          switch (bd->state)
1532          {
1533            case RTEMS_BDBUF_STATE_CACHED:
1534            case RTEMS_BDBUF_STATE_READ_AHEAD:
1535              if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1536                rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_2);
1537              break;
1538            default:
1539              break;
1540          }
1541        }
1542        else
1543        {
1544          /*
1545           * If there are buffers on the modified list expire the hold timer
1546           * and wake the swap out task then wait else just go and wait.
1547           *
1548           * The check for an empty list is made so the swapper is only woken
1549           * when if timers are changed.
1550           */
1551          if (!rtems_chain_is_empty (&bdbuf_cache.modified))
1552          {
1553            rtems_chain_node* node = rtems_chain_first (&bdbuf_cache.modified);
1554            uint32_t          write_blocks = 0;
1555           
1556            while ((write_blocks < bdbuf_config.max_write_blocks) &&
1557                   !rtems_chain_is_tail (&bdbuf_cache.modified, node))
1558            {
1559              rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
1560              bd->hold_timer = 0;
1561              write_blocks++;
1562              node = rtems_chain_next (node);
1563            }
1564
1565            rtems_bdbuf_wake_swapper ();
1566          }
1567         
1568          /*
1569           * Wait for a buffer to be returned to the cache. The buffer will be
1570           * placed on the LRU list.
1571           */
1572          rtems_bdbuf_wait (&bdbuf_cache.buffer_waiters);
1573        }
1574      }
1575      else
1576      {
1577        /*
1578         * We have a new buffer for this block.
1579         */
1580        if ((bd->state != RTEMS_BDBUF_STATE_EMPTY) &&
1581            (bd->state != RTEMS_BDBUF_STATE_READ_AHEAD))
1582          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_3);
1583
1584        if (bd->state == RTEMS_BDBUF_STATE_READ_AHEAD)
1585        {
1586          if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1587            rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_4);
1588        }
1589      }
1590
1591      if (bd)
1592      {
1593        bd->dev       = device;
1594        bd->block     = block;
1595        bd->avl.left  = NULL;
1596        bd->avl.right = NULL;
1597        bd->state     = RTEMS_BDBUF_STATE_EMPTY;
1598        bd->error     = 0;
1599        bd->waiters   = 0;
1600
1601        if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
1602          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_5);
1603
1604        rtems_bdbuf_group_obtain (bd);
1605
1606        return bd;
1607      }
1608    }
1609    else
1610    {
1611      /*
1612       * We have the buffer for the block from the cache. Check if the buffer
1613       * in the cache is the same size and the requested size we are after.
1614       */
1615      if (bd->group->bds_per_group != bds_per_group)
1616      {
1617        /*
1618         * Remove the buffer from the AVL tree.
1619         */
1620        if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1621          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_2);
1622        bd->state = RTEMS_BDBUF_STATE_EMPTY;
1623        rtems_chain_extract (&bd->link);
1624        rtems_chain_prepend (&bdbuf_cache.ready, &bd->link);
1625        bd = NULL;
1626      }
1627    }
1628  }
1629  while (!bd);
1630
1631  /*
1632   * If the buffer is for read ahead and it exists in the AVL cache or is being
1633   * accessed or being transfered then return NULL stopping further read ahead
1634   * requests.
1635   */
1636  if (read_ahead)
1637    return NULL;
1638
1639  /*
1640   * Before we wait for the buffer, we have to obtain its group.  This prevents
1641   * a reallocation.
1642   */
1643  rtems_bdbuf_group_obtain (bd);
1644
1645  /*
1646   * Loop waiting for the buffer to enter the cached state. If the buffer is in
1647   * the access or transfer state then wait until it is not.
1648   */
1649  available = false;
1650  while (!available)
1651  {
1652    switch (bd->state)
1653    {
1654      case RTEMS_BDBUF_STATE_CACHED:
1655      case RTEMS_BDBUF_STATE_MODIFIED:
1656      case RTEMS_BDBUF_STATE_READ_AHEAD:
1657        available = true;
1658        break;
1659
1660      case RTEMS_BDBUF_STATE_ACCESS:
1661      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1662        bd->waiters++;
1663        rtems_bdbuf_wait (&bdbuf_cache.access_waiters);
1664        bd->waiters--;
1665        break;
1666
1667      case RTEMS_BDBUF_STATE_SYNC:
1668      case RTEMS_BDBUF_STATE_TRANSFER:
1669        bd->waiters++;
1670        rtems_bdbuf_wait (&bdbuf_cache.transfer_waiters);
1671        bd->waiters--;
1672        break;
1673
1674      default:
1675        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_6);
1676    }
1677  }
1678
1679  /*
1680   * Buffer is linked to the LRU, modifed, or sync lists. Remove it from there.
1681   */
1682  rtems_chain_extract (&bd->link);
1683
1684  /*
1685   * The modified list is no longer a user of the buffers group.
1686   */
1687  if (bd->state == RTEMS_BDBUF_STATE_MODIFIED)
1688    rtems_bdbuf_group_release (bd);
1689
1690  return bd;
1691}
1692
1693rtems_status_code
1694rtems_bdbuf_get (dev_t                device,
1695                 rtems_blkdev_bnum    block,
1696                 rtems_bdbuf_buffer** bdp)
1697{
1698  rtems_disk_device*  dd;
1699  rtems_bdbuf_buffer* bd;
1700  rtems_blkdev_bnum   media_block;
1701  size_t              bds_per_group;
1702
1703  if (!bdbuf_cache.initialised)
1704    return RTEMS_NOT_CONFIGURED;
1705
1706  /*
1707   * Do not hold the cache lock when obtaining the disk table.
1708   */
1709  dd = rtems_disk_obtain (device);
1710  if (!dd)
1711    return RTEMS_INVALID_ID;
1712
1713  /*
1714   * Compute the media block number. Drivers work with media block number not
1715   * the block number a BD may have as this depends on the block size set by
1716   * the user.
1717   */
1718  media_block = rtems_bdbuf_media_block (block,
1719                                         dd->block_size,
1720                                         dd->media_block_size);
1721  if (media_block >= dd->size)
1722  {
1723    rtems_disk_release(dd);
1724    return RTEMS_INVALID_NUMBER;
1725  }
1726
1727  bds_per_group = rtems_bdbuf_bds_per_group (dd->block_size);
1728  if (!bds_per_group)
1729  {
1730    rtems_disk_release (dd);
1731    return RTEMS_INVALID_NUMBER;
1732  }
1733
1734  media_block += dd->start;
1735
1736  rtems_bdbuf_lock_cache ();
1737
1738  /*
1739   * Print the block index relative to the physical disk.
1740   */
1741  if (rtems_bdbuf_tracer)
1742    printf ("bdbuf:get: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
1743            media_block, block, (unsigned) device);
1744
1745  bd = rtems_bdbuf_get_buffer (dd, bds_per_group, media_block, false);
1746
1747  /*
1748   * To get a modified buffer could be considered a bug in the caller because
1749   * you should not be getting an already modified buffer but user may have
1750   * modified a byte in a block then decided to seek the start and write the
1751   * whole block and the file system will have no record of this so just gets
1752   * the block to fill.
1753   */
1754  if (bd->state != RTEMS_BDBUF_STATE_MODIFIED)
1755    bd->state = RTEMS_BDBUF_STATE_ACCESS;
1756  else
1757    bd->state = RTEMS_BDBUF_STATE_ACCESS_MODIFIED;
1758 
1759  if (rtems_bdbuf_tracer)
1760  {
1761    rtems_bdbuf_show_users ("get", bd);
1762    rtems_bdbuf_show_usage ();
1763  }
1764
1765  rtems_bdbuf_unlock_cache ();
1766
1767  rtems_disk_release(dd);
1768
1769  *bdp = bd;
1770
1771  return RTEMS_SUCCESSFUL;
1772}
1773
1774/**
1775 * Call back handler called by the low level driver when the transfer has
1776 * completed. This function may be invoked from interrupt handler.
1777 *
1778 * @param arg Arbitrary argument specified in block device request
1779 *            structure (in this case - pointer to the appropriate
1780 *            block device request structure).
1781 * @param status I/O completion status
1782 * @param error errno error code if status != RTEMS_SUCCESSFUL
1783 */
1784static void
1785rtems_bdbuf_read_done (void* arg, rtems_status_code status, int error)
1786{
1787  rtems_blkdev_request* req = (rtems_blkdev_request*) arg;
1788
1789  req->error = error;
1790  req->status = status;
1791
1792  rtems_event_send (req->io_task, RTEMS_BDBUF_TRANSFER_SYNC);
1793}
1794
1795rtems_status_code
1796rtems_bdbuf_read (dev_t                device,
1797                  rtems_blkdev_bnum    block,
1798                  rtems_bdbuf_buffer** bdp)
1799{
1800  rtems_disk_device*    dd;
1801  rtems_bdbuf_buffer*   bd = NULL;
1802  uint32_t              read_ahead_count;
1803  rtems_blkdev_request* req;
1804  size_t                bds_per_group;
1805  rtems_blkdev_bnum     media_block;
1806  rtems_blkdev_bnum     media_block_count;
1807  bool                  read_ahead = false;
1808 
1809  if (!bdbuf_cache.initialised)
1810    return RTEMS_NOT_CONFIGURED;
1811
1812  /*
1813   * @todo This type of request structure is wrong and should be removed.
1814   */
1815#define bdbuf_alloc(size) __builtin_alloca (size)
1816
1817  req = bdbuf_alloc (sizeof (rtems_blkdev_request) +
1818                     (sizeof ( rtems_blkdev_sg_buffer) *
1819                      rtems_bdbuf_configuration.max_read_ahead_blocks));
1820
1821  /*
1822   * Do not hold the cache lock when obtaining the disk table.
1823   */
1824  dd = rtems_disk_obtain (device);
1825  if (!dd)
1826    return RTEMS_INVALID_ID;
1827 
1828  /*
1829   * Compute the media block number. Drivers work with media block number not
1830   * the block number a BD may have as this depends on the block size set by
1831   * the user.
1832   */
1833  media_block = rtems_bdbuf_media_block (block,
1834                                         dd->block_size,
1835                                         dd->media_block_size);
1836  if (media_block >= dd->size)
1837  {
1838    rtems_disk_release(dd);
1839    return RTEMS_INVALID_NUMBER;
1840  }
1841 
1842  bds_per_group = rtems_bdbuf_bds_per_group (dd->block_size);
1843  if (!bds_per_group)
1844  {
1845    rtems_disk_release (dd);
1846    return RTEMS_INVALID_NUMBER;
1847  }
1848 
1849  /*
1850   * Print the block index relative to the physical disk and the user block
1851   * number
1852   */
1853  if (rtems_bdbuf_tracer)
1854    printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
1855            media_block + dd->start, block, (unsigned) device);
1856
1857  /*
1858   * Read the block plus the required number of blocks ahead. The number of
1859   * blocks to read ahead is configured by the user and limited by the size of
1860   * the disk or reaching a read ahead block that is also cached.
1861   *
1862   * Limit the blocks read by the size of the disk.
1863   */
1864  if ((rtems_bdbuf_configuration.max_read_ahead_blocks + media_block) < dd->size)
1865    read_ahead_count = rtems_bdbuf_configuration.max_read_ahead_blocks;
1866  else
1867    read_ahead_count = dd->size - media_block;
1868
1869  media_block_count = dd->block_size / dd->media_block_size;
1870 
1871  req->bufnum = 0;
1872
1873  rtems_bdbuf_lock_cache ();
1874
1875  while (req->bufnum < read_ahead_count)
1876  {
1877    /*
1878     * Get the buffer for the requested block. If the block is cached then
1879     * return it. If it is not cached transfer the block from the disk media
1880     * into memory.
1881     *
1882     * We need to clean up any buffers allocated and not passed back to the
1883     * caller.
1884     */
1885    bd = rtems_bdbuf_get_buffer (dd, bds_per_group, media_block + dd->start,
1886                                 read_ahead);
1887
1888    /*
1889     * Read ahead buffer is in the cache or none available. Read what we
1890     * can.
1891     */
1892    if (!bd)
1893      break;
1894
1895    /*
1896     * Is the block we are interested in the cache ?
1897     */
1898    if ((bd->state == RTEMS_BDBUF_STATE_CACHED) ||
1899        (bd->state == RTEMS_BDBUF_STATE_MODIFIED))
1900    {
1901      if (read_ahead)
1902        rtems_bdbuf_group_release (bd);
1903
1904      break;
1905    }
1906
1907    bd->state = RTEMS_BDBUF_STATE_TRANSFER;
1908    bd->error = 0;
1909
1910    if (rtems_bdbuf_tracer)
1911      rtems_bdbuf_show_users ("reading", bd);
1912   
1913    /*
1914     * @todo The use of these req blocks is not a great design. The req is a
1915     *       struct with a single 'bufs' declared in the req struct and the
1916     *       others are added in the outer level struct. This relies on the
1917     *       structs joining as a single array and that assumes the compiler
1918     *       packs the structs. Why not just place on a list ? The BD has a
1919     *       node that can be used.
1920     */
1921    req->bufs[req->bufnum].user   = bd;
1922    req->bufs[req->bufnum].block  = media_block + dd->start;
1923    req->bufs[req->bufnum].length = dd->block_size;
1924    req->bufs[req->bufnum].buffer = bd->buffer;
1925    req->bufnum++;
1926
1927    /*
1928     * Move the media block count by the number of media blocks in the
1929     * disk device's set block size.
1930     */
1931    media_block += media_block_count;
1932
1933    /*
1934     * After the first buffer we have read ahead buffers.
1935     */
1936    read_ahead = true;
1937  }
1938
1939  /*
1940   * Transfer any requested buffers. If the request count is 0 we have found
1941   * the block in the cache so return it.
1942   */
1943  if (req->bufnum)
1944  {
1945    /*
1946     * Unlock the cache. We have the buffer for the block and it will be in the
1947     * access or transfer state. We may also have a number of read ahead blocks
1948     * if we need to transfer data. At this point any other threads can gain
1949     * access to the cache and if they are after any of the buffers we have
1950     * they will block and be woken when the buffer is returned to the cache.
1951     *
1952     * If a transfer is needed the I/O operation will occur with pre-emption
1953     * enabled and the cache unlocked. This is a change to the previous version
1954     * of the bdbuf code.
1955     */
1956    rtems_event_set out;
1957    int             result;
1958    uint32_t        b;
1959    bool            wake_transfer;
1960
1961    /*
1962     * Flush any events.
1963     */
1964    rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
1965                         RTEMS_EVENT_ALL | RTEMS_NO_WAIT,
1966                         0, &out);
1967                         
1968    rtems_bdbuf_unlock_cache ();
1969
1970    req->req = RTEMS_BLKDEV_REQ_READ;
1971    req->req_done = rtems_bdbuf_read_done;
1972    req->done_arg = req;
1973    req->io_task = rtems_task_self ();
1974    req->status = RTEMS_RESOURCE_IN_USE;
1975    req->error = 0;
1976 
1977    result = dd->ioctl (dd, RTEMS_BLKIO_REQUEST, req);
1978
1979    /*
1980     * Inspection of the DOS FS code shows the result from this function is
1981     * handled and a buffer must be returned.
1982     */
1983    if (result < 0)
1984    {
1985      req->error = errno;
1986      req->status = RTEMS_IO_ERROR;
1987    }
1988    else
1989    {
1990      rtems_status_code sc;
1991     
1992      sc = rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
1993                                RTEMS_EVENT_ALL | RTEMS_WAIT,
1994                                0, &out);
1995
1996      if (sc != RTEMS_SUCCESSFUL)
1997        rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
1998    }
1999
2000    wake_transfer = false;
2001   
2002    rtems_bdbuf_lock_cache ();
2003
2004    for (b = 1; b < req->bufnum; b++)
2005    {
2006      bd = req->bufs[b].user;
2007      if (!bd->error)
2008        bd->error = req->error;
2009      bd->state = RTEMS_BDBUF_STATE_READ_AHEAD;
2010
2011      rtems_bdbuf_group_release (bd);
2012
2013      if (rtems_bdbuf_tracer)
2014        rtems_bdbuf_show_users ("read-ahead", bd);
2015
2016      rtems_chain_prepend (&bdbuf_cache.ready, &bd->link);
2017
2018      /*
2019       * If there is an error remove the BD from the AVL tree as it is invalid,
2020       * then wake any threads that may be waiting. A thread may have been
2021       * waiting for this block and assumed it was in the tree.
2022       */
2023      if (bd->error)
2024      {
2025        bd->state = RTEMS_BDBUF_STATE_EMPTY;
2026        if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
2027          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_9);
2028      }
2029
2030      if (bd->waiters)
2031        wake_transfer = true;
2032    }
2033
2034    if (wake_transfer)
2035      rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2036    else
2037      rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2038   
2039    bd = req->bufs[0].user;
2040
2041    if (rtems_bdbuf_tracer)
2042      rtems_bdbuf_show_users ("read-done", bd);
2043  }
2044
2045  if (bd->state != RTEMS_BDBUF_STATE_MODIFIED)
2046    bd->state = RTEMS_BDBUF_STATE_ACCESS;
2047  else
2048    bd->state = RTEMS_BDBUF_STATE_ACCESS_MODIFIED;
2049
2050  if (rtems_bdbuf_tracer)
2051  {
2052    rtems_bdbuf_show_users ("read", bd);
2053    rtems_bdbuf_show_usage ();
2054  }
2055 
2056  rtems_bdbuf_unlock_cache ();
2057  rtems_disk_release (dd);
2058
2059  *bdp = bd;
2060
2061  return RTEMS_SUCCESSFUL;
2062}
2063
2064rtems_status_code
2065rtems_bdbuf_release (rtems_bdbuf_buffer* bd)
2066{
2067  if (!bdbuf_cache.initialised)
2068    return RTEMS_NOT_CONFIGURED;
2069
2070  if (bd == NULL)
2071    return RTEMS_INVALID_ADDRESS;
2072
2073  rtems_bdbuf_lock_cache ();
2074
2075  if (rtems_bdbuf_tracer)
2076    printf ("bdbuf:release: %" PRIu32 "\n", bd->block);
2077 
2078  if (bd->state == RTEMS_BDBUF_STATE_ACCESS_MODIFIED)
2079  {
2080    rtems_bdbuf_append_modified (bd);
2081  }
2082  else
2083  {
2084    bd->state = RTEMS_BDBUF_STATE_CACHED;
2085    rtems_chain_append (&bdbuf_cache.lru, &bd->link);
2086
2087    rtems_bdbuf_group_release (bd);
2088  }
2089 
2090  if (rtems_bdbuf_tracer)
2091    rtems_bdbuf_show_users ("release", bd);
2092 
2093  /*
2094   * If there are threads waiting to access the buffer wake them. Wake any
2095   * waiters if this buffer is placed back onto the LRU queue.
2096   */
2097  if (bd->waiters)
2098    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
2099  else
2100    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2101 
2102  if (rtems_bdbuf_tracer)
2103    rtems_bdbuf_show_usage ();
2104 
2105  rtems_bdbuf_unlock_cache ();
2106
2107  return RTEMS_SUCCESSFUL;
2108}
2109
2110rtems_status_code
2111rtems_bdbuf_release_modified (rtems_bdbuf_buffer* bd)
2112{
2113  if (!bdbuf_cache.initialised)
2114    return RTEMS_NOT_CONFIGURED;
2115
2116  if (!bd)
2117    return RTEMS_INVALID_ADDRESS;
2118
2119  rtems_bdbuf_lock_cache ();
2120
2121  if (rtems_bdbuf_tracer)
2122    printf ("bdbuf:release modified: %" PRIu32 "\n", bd->block);
2123
2124  bd->hold_timer = rtems_bdbuf_configuration.swap_block_hold;
2125 
2126  if (rtems_bdbuf_tracer)
2127    rtems_bdbuf_show_users ("release-modified", bd);
2128 
2129  rtems_bdbuf_append_modified (bd);
2130
2131  if (bd->waiters)
2132    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
2133 
2134  if (rtems_bdbuf_tracer)
2135    rtems_bdbuf_show_usage ();
2136 
2137  rtems_bdbuf_unlock_cache ();
2138
2139  return RTEMS_SUCCESSFUL;
2140}
2141
2142rtems_status_code
2143rtems_bdbuf_sync (rtems_bdbuf_buffer* bd)
2144{
2145  bool available;
2146
2147  if (rtems_bdbuf_tracer)
2148    printf ("bdbuf:sync: %" PRIu32 "\n", bd->block);
2149 
2150  if (!bdbuf_cache.initialised)
2151    return RTEMS_NOT_CONFIGURED;
2152
2153  if (!bd)
2154    return RTEMS_INVALID_ADDRESS;
2155
2156  rtems_bdbuf_lock_cache ();
2157
2158  bd->state = RTEMS_BDBUF_STATE_SYNC;
2159
2160  rtems_chain_append (&bdbuf_cache.sync, &bd->link);
2161
2162  rtems_bdbuf_wake_swapper ();
2163
2164  available = false;
2165  while (!available)
2166  {
2167    switch (bd->state)
2168    {
2169      case RTEMS_BDBUF_STATE_CACHED:
2170      case RTEMS_BDBUF_STATE_READ_AHEAD:
2171      case RTEMS_BDBUF_STATE_MODIFIED:
2172      case RTEMS_BDBUF_STATE_ACCESS:
2173      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2174        available = true;
2175        break;
2176
2177      case RTEMS_BDBUF_STATE_SYNC:
2178      case RTEMS_BDBUF_STATE_TRANSFER:
2179        bd->waiters++;
2180        rtems_bdbuf_wait (&bdbuf_cache.transfer_waiters);
2181        bd->waiters--;
2182        break;
2183
2184      default:
2185        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_7);
2186    }
2187  }
2188
2189  rtems_bdbuf_unlock_cache ();
2190 
2191  return RTEMS_SUCCESSFUL;
2192}
2193
2194rtems_status_code
2195rtems_bdbuf_syncdev (dev_t dev)
2196{
2197  rtems_disk_device*  dd;
2198  rtems_status_code   sc;
2199  rtems_event_set     out;
2200
2201  if (rtems_bdbuf_tracer)
2202    printf ("bdbuf:syncdev: %08x\n", (unsigned) dev);
2203
2204  if (!bdbuf_cache.initialised)
2205    return RTEMS_NOT_CONFIGURED;
2206
2207  /*
2208   * Do not hold the cache lock when obtaining the disk table.
2209   */
2210  dd = rtems_disk_obtain (dev);
2211  if (!dd)
2212    return RTEMS_INVALID_ID;
2213
2214  /*
2215   * Take the sync lock before locking the cache. Once we have the sync lock we
2216   * can lock the cache. If another thread has the sync lock it will cause this
2217   * thread to block until it owns the sync lock then it can own the cache. The
2218   * sync lock can only be obtained with the cache unlocked.
2219   */
2220 
2221  rtems_bdbuf_lock_sync ();
2222  rtems_bdbuf_lock_cache (); 
2223
2224  /*
2225   * Set the cache to have a sync active for a specific device and let the swap
2226   * out task know the id of the requester to wake when done.
2227   *
2228   * The swap out task will negate the sync active flag when no more buffers
2229   * for the device are held on the "modified for sync" queues.
2230   */
2231  bdbuf_cache.sync_active    = true;
2232  bdbuf_cache.sync_requester = rtems_task_self ();
2233  bdbuf_cache.sync_device    = dev;
2234 
2235  rtems_bdbuf_wake_swapper ();
2236  rtems_bdbuf_unlock_cache ();
2237 
2238  sc = rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
2239                            RTEMS_EVENT_ALL | RTEMS_WAIT,
2240                            0, &out);
2241
2242  if (sc != RTEMS_SUCCESSFUL)
2243    rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2244     
2245  rtems_bdbuf_unlock_sync ();
2246 
2247  return rtems_disk_release (dd);
2248}
2249
2250/**
2251 * Call back handler called by the low level driver when the transfer has
2252 * completed. This function may be invoked from interrupt handlers.
2253 *
2254 * @param arg Arbitrary argument specified in block device request
2255 *            structure (in this case - pointer to the appropriate
2256 *            block device request structure).
2257 * @param status I/O completion status
2258 * @param error errno error code if status != RTEMS_SUCCESSFUL
2259 */
2260static void
2261rtems_bdbuf_write_done(void *arg, rtems_status_code status, int error)
2262{
2263  rtems_blkdev_request* req = (rtems_blkdev_request*) arg;
2264
2265  req->error = error;
2266  req->status = status;
2267
2268  rtems_event_send (req->io_task, RTEMS_BDBUF_TRANSFER_SYNC);
2269}
2270
2271/**
2272 * Swapout transfer to the driver. The driver will break this I/O into groups
2273 * of consecutive write requests is multiple consecutive buffers are required
2274 * by the driver.
2275 *
2276 * @param transfer The transfer transaction.
2277 */
2278static void
2279rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer)
2280{
2281  rtems_disk_device* dd;
2282 
2283  if (rtems_bdbuf_tracer)
2284    printf ("bdbuf:swapout transfer: %08x\n", (unsigned) transfer->dev);
2285
2286  /*
2287   * If there are buffers to transfer to the media transfer them.
2288   */
2289  if (!rtems_chain_is_empty (&transfer->bds))
2290  {
2291    /*
2292     * Obtain the disk device. The cache's mutex has been released to avoid a
2293     * dead lock.
2294     */
2295    dd = rtems_disk_obtain (transfer->dev);
2296    if (dd)
2297    {
2298      /*
2299       * The last block number used when the driver only supports
2300       * continuous blocks in a single request.
2301       */
2302      uint32_t last_block = 0;
2303
2304      /*
2305       * Number of buffers per bd. This is used to detect the next
2306       * block.
2307       */
2308      uint32_t bufs_per_bd = dd->block_size / bdbuf_config.buffer_min;
2309     
2310      /*
2311       * Take as many buffers as configured and pass to the driver. Note, the
2312       * API to the drivers has an array of buffers and if a chain was passed
2313       * we could have just passed the list. If the driver API is updated it
2314       * should be possible to make this change with little effect in this
2315       * code. The array that is passed is broken in design and should be
2316       * removed. Merging members of a struct into the first member is
2317       * trouble waiting to happen.
2318       */
2319      transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2320      transfer->write_req->error = 0;
2321      transfer->write_req->bufnum = 0;
2322
2323      while (!rtems_chain_is_empty (&transfer->bds))
2324      {
2325        rtems_bdbuf_buffer* bd =
2326          (rtems_bdbuf_buffer*) rtems_chain_get (&transfer->bds);
2327
2328        bool write = false;
2329       
2330        /*
2331         * If the device only accepts sequential buffers and this is not the
2332         * first buffer (the first is always sequential, and the buffer is not
2333         * sequential then put the buffer back on the transfer chain and write
2334         * the committed buffers.
2335         */
2336       
2337        if (rtems_bdbuf_tracer)
2338          printf ("bdbuf:swapout write: bd:%" PRIu32 ", bufnum:%" PRIu32 " mode:%s\n",
2339                  bd->block, transfer->write_req->bufnum,
2340                  dd->phys_dev->capabilities &
2341                  RTEMS_BLKDEV_CAP_MULTISECTOR_CONT ? "MULIT" : "SCAT");
2342       
2343        if ((dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) &&
2344            transfer->write_req->bufnum &&
2345            (bd->block != (last_block + bufs_per_bd)))
2346        {
2347          rtems_chain_prepend (&transfer->bds, &bd->link);
2348          write = true;
2349        }
2350        else
2351        {
2352          rtems_blkdev_sg_buffer* buf;
2353          buf = &transfer->write_req->bufs[transfer->write_req->bufnum];
2354          transfer->write_req->bufnum++;
2355          buf->user   = bd;
2356          buf->block  = bd->block;
2357          buf->length = dd->block_size;
2358          buf->buffer = bd->buffer;
2359          last_block  = bd->block;
2360        }
2361
2362        /*
2363         * Perform the transfer if there are no more buffers, or the transfer
2364         * size has reached the configured max. value.
2365         */
2366
2367        if (rtems_chain_is_empty (&transfer->bds) ||
2368            (transfer->write_req->bufnum >= rtems_bdbuf_configuration.max_write_blocks))
2369          write = true;
2370
2371        if (write)
2372        {
2373          int result;
2374          uint32_t b;
2375
2376          if (rtems_bdbuf_tracer)
2377            printf ("bdbuf:swapout write: writing bufnum:%" PRIu32 "\n",
2378                    transfer->write_req->bufnum);
2379
2380          /*
2381           * Perform the transfer. No cache locks, no preemption, only the disk
2382           * device is being held.
2383           */
2384          result = dd->ioctl (dd, RTEMS_BLKIO_REQUEST, transfer->write_req); 
2385          if (result < 0)
2386          {
2387            rtems_bdbuf_lock_cache ();
2388             
2389            for (b = 0; b < transfer->write_req->bufnum; b++)
2390            {
2391              bd = transfer->write_req->bufs[b].user;
2392              bd->state  = RTEMS_BDBUF_STATE_MODIFIED;
2393              bd->error = errno;
2394
2395              /*
2396               * Place back on the cache's modified queue and try again.
2397               *
2398               * @warning Not sure this is the best option but I do not know
2399               *          what else can be done.
2400               */
2401              rtems_chain_append (&bdbuf_cache.modified, &bd->link);
2402            }
2403          }
2404          else
2405          {
2406            rtems_status_code sc = 0;
2407            rtems_event_set   out;
2408
2409            sc = rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
2410                                      RTEMS_EVENT_ALL | RTEMS_WAIT,
2411                                      0, &out);
2412
2413            if (sc != RTEMS_SUCCESSFUL)
2414              rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2415
2416            rtems_bdbuf_lock_cache ();
2417
2418            for (b = 0; b < transfer->write_req->bufnum; b++)
2419            {
2420              bd = transfer->write_req->bufs[b].user;
2421              bd->state = RTEMS_BDBUF_STATE_CACHED;
2422              bd->error = 0;
2423
2424              rtems_bdbuf_group_release (bd);
2425
2426              if (rtems_bdbuf_tracer)
2427                rtems_bdbuf_show_users ("write", bd);
2428
2429              rtems_chain_append (&bdbuf_cache.lru, &bd->link);
2430             
2431              if (bd->waiters)
2432                rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2433              else
2434                rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2435            }
2436          }
2437
2438          if (rtems_bdbuf_tracer)
2439            rtems_bdbuf_show_usage ();
2440
2441          rtems_bdbuf_unlock_cache ();
2442
2443          transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2444          transfer->write_req->error = 0;
2445          transfer->write_req->bufnum = 0;
2446        }
2447      }
2448         
2449      rtems_disk_release (dd);
2450    }
2451    else
2452    {
2453      /*
2454       * We have buffers but no device. Put the BDs back onto the
2455       * ready queue and exit.
2456       */
2457      /* @todo fixme */
2458    }
2459  }
2460}
2461
2462/**
2463 * Process the modified list of buffers. There is a sync or modified list that
2464 * needs to be handled so we have a common function to do the work.
2465 *
2466 * @param dev The device to handle. If BDBUF_INVALID_DEV no device is selected
2467 * so select the device of the first buffer to be written to disk.
2468 * @param chain The modified chain to process.
2469 * @param transfer The chain to append buffers to be written too.
2470 * @param sync_active If true this is a sync operation so expire all timers.
2471 * @param update_timers If true update the timers.
2472 * @param timer_delta It update_timers is true update the timers by this
2473 *                    amount.
2474 */
2475static void
2476rtems_bdbuf_swapout_modified_processing (dev_t*               dev,
2477                                         rtems_chain_control* chain,
2478                                         rtems_chain_control* transfer,
2479                                         bool                 sync_active,
2480                                         bool                 update_timers,
2481                                         uint32_t             timer_delta)
2482{
2483  if (!rtems_chain_is_empty (chain))
2484  {
2485    rtems_chain_node* node = rtems_chain_head (chain);
2486    node = node->next;
2487
2488    while (!rtems_chain_is_tail (chain, node))
2489    {
2490      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2491   
2492      /*
2493       * Check if the buffer's hold timer has reached 0. If a sync is active
2494       * force all the timers to 0.
2495       *
2496       * @note Lots of sync requests will skew this timer. It should be based
2497       *       on TOD to be accurate. Does it matter ?
2498       */
2499      if (sync_active)
2500        bd->hold_timer = 0;
2501 
2502      if (bd->hold_timer)
2503      {
2504        if (update_timers)
2505        {
2506          if (bd->hold_timer > timer_delta)
2507            bd->hold_timer -= timer_delta;
2508          else
2509            bd->hold_timer = 0;
2510        }
2511
2512        if (bd->hold_timer)
2513        {
2514          node = node->next;
2515          continue;
2516        }
2517      }
2518
2519      /*
2520       * This assumes we can set dev_t to BDBUF_INVALID_DEV which is just an
2521       * assumption. Cannot use the transfer list being empty the sync dev
2522       * calls sets the dev to use.
2523       */
2524      if (*dev == BDBUF_INVALID_DEV)
2525        *dev = bd->dev;
2526
2527      if (bd->dev == *dev)
2528      {
2529        rtems_chain_node* next_node = node->next;
2530        rtems_chain_node* tnode = rtems_chain_tail (transfer);
2531   
2532        /*
2533         * The blocks on the transfer list are sorted in block order. This
2534         * means multi-block transfers for drivers that require consecutive
2535         * blocks perform better with sorted blocks and for real disks it may
2536         * help lower head movement.
2537         */
2538
2539        bd->state = RTEMS_BDBUF_STATE_TRANSFER;
2540
2541        rtems_chain_extract (node);
2542
2543        tnode = tnode->previous;
2544         
2545        while (node && !rtems_chain_is_head (transfer, tnode))
2546        {
2547          rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
2548
2549          if (bd->block > tbd->block)
2550          {
2551            rtems_chain_insert (tnode, node);
2552            node = NULL;
2553          }
2554          else
2555            tnode = tnode->previous;
2556        }
2557       
2558        if (node)
2559          rtems_chain_prepend (transfer, node);
2560         
2561        node = next_node;
2562      }
2563      else
2564      {
2565        node = node->next;
2566      }
2567    }
2568  }
2569}
2570
2571/**
2572 * Process the cache's modified buffers. Check the sync list first then the
2573 * modified list extracting the buffers suitable to be written to disk. We have
2574 * a device at a time. The task level loop will repeat this operation while
2575 * there are buffers to be written. If the transfer fails place the buffers
2576 * back on the modified list and try again later. The cache is unlocked while
2577 * the buffers are being written to disk.
2578 *
2579 * @param timer_delta It update_timers is true update the timers by this
2580 *                    amount.
2581 * @param update_timers If true update the timers.
2582 * @param transfer The transfer transaction data.
2583 *
2584 * @retval true Buffers where written to disk so scan again.
2585 * @retval false No buffers where written to disk.
2586 */
2587static bool
2588rtems_bdbuf_swapout_processing (unsigned long                 timer_delta,
2589                                bool                          update_timers,
2590                                rtems_bdbuf_swapout_transfer* transfer)
2591{
2592  rtems_bdbuf_swapout_worker* worker;
2593  bool                        transfered_buffers = false;
2594
2595  rtems_bdbuf_lock_cache ();
2596
2597  /*
2598   * If a sync is active do not use a worker because the current code does not
2599   * cleaning up after. We need to know the buffers have been written when
2600   * syncing to the release sync lock and currently worker threads do not
2601   * return to here. We do not know the worker is the last in a sequence of
2602   * sync writes until after we have it running so we do not know to tell it to
2603   * release the lock. The simplest solution is to get the main swap out task
2604   * perform all sync operations.
2605   */
2606  if (bdbuf_cache.sync_active)
2607    worker = NULL;
2608  else
2609  {
2610    worker = (rtems_bdbuf_swapout_worker*)
2611      rtems_chain_get (&bdbuf_cache.swapout_workers);
2612    if (worker)
2613      transfer = &worker->transfer;
2614  }
2615 
2616  rtems_chain_initialize_empty (&transfer->bds);
2617  transfer->dev = BDBUF_INVALID_DEV;
2618 
2619  /*
2620   * When the sync is for a device limit the sync to that device. If the sync
2621   * is for a buffer handle process the devices in the order on the sync
2622   * list. This means the dev is BDBUF_INVALID_DEV.
2623   */
2624  if (bdbuf_cache.sync_active)
2625    transfer->dev = bdbuf_cache.sync_device;
2626 
2627  /*
2628   * If we have any buffers in the sync queue move them to the modified
2629   * list. The first sync buffer will select the device we use.
2630   */
2631  rtems_bdbuf_swapout_modified_processing (&transfer->dev,
2632                                           &bdbuf_cache.sync,
2633                                           &transfer->bds,
2634                                           true, false,
2635                                           timer_delta);
2636
2637  /*
2638   * Process the cache's modified list.
2639   */
2640  rtems_bdbuf_swapout_modified_processing (&transfer->dev,
2641                                           &bdbuf_cache.modified,
2642                                           &transfer->bds,
2643                                           bdbuf_cache.sync_active,
2644                                           update_timers,
2645                                           timer_delta);
2646
2647  /*
2648   * We have all the buffers that have been modified for this device so the
2649   * cache can be unlocked because the state of each buffer has been set to
2650   * TRANSFER.
2651   */
2652  rtems_bdbuf_unlock_cache ();
2653
2654  /*
2655   * If there are buffers to transfer to the media transfer them.
2656   */
2657  if (!rtems_chain_is_empty (&transfer->bds))
2658  {
2659    if (worker)
2660    {
2661      rtems_status_code sc = rtems_event_send (worker->id,
2662                                               RTEMS_BDBUF_SWAPOUT_SYNC);
2663      if (sc != RTEMS_SUCCESSFUL)
2664        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
2665    }
2666    else
2667    {
2668      rtems_bdbuf_swapout_write (transfer);
2669    }
2670   
2671    transfered_buffers = true;
2672  }
2673   
2674  if (bdbuf_cache.sync_active && !transfered_buffers)
2675  {
2676    rtems_id sync_requester;
2677    rtems_bdbuf_lock_cache ();
2678    sync_requester = bdbuf_cache.sync_requester;
2679    bdbuf_cache.sync_active = false;
2680    bdbuf_cache.sync_requester = 0;
2681    rtems_bdbuf_unlock_cache ();
2682    if (sync_requester)
2683      rtems_event_send (sync_requester, RTEMS_BDBUF_TRANSFER_SYNC);
2684  }
2685 
2686  return transfered_buffers;
2687}
2688
2689/**
2690 * Allocate the write request and initialise it for good measure.
2691 *
2692 * @return rtems_blkdev_request* The write reference memory.
2693 */
2694static rtems_blkdev_request*
2695rtems_bdbuf_swapout_writereq_alloc (void)
2696{
2697  /*
2698   * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
2699   * I am disappointment at finding code like this in RTEMS. The request should
2700   * have been a rtems_chain_control. Simple, fast and less storage as the node
2701   * is already part of the buffer structure.
2702   */
2703  rtems_blkdev_request* write_req =
2704    malloc (sizeof (rtems_blkdev_request) +
2705            (rtems_bdbuf_configuration.max_write_blocks *
2706             sizeof (rtems_blkdev_sg_buffer)));
2707
2708  if (!write_req)
2709    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2710
2711  write_req->req = RTEMS_BLKDEV_REQ_WRITE;
2712  write_req->req_done = rtems_bdbuf_write_done;
2713  write_req->done_arg = write_req;
2714  write_req->io_task = rtems_task_self ();
2715
2716  return write_req;
2717}
2718
2719/**
2720 * The swapout worker thread body.
2721 *
2722 * @param arg A pointer to the worker thread's private data.
2723 * @return rtems_task Not used.
2724 */
2725static rtems_task
2726rtems_bdbuf_swapout_worker_task (rtems_task_argument arg)
2727{
2728  rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg;
2729
2730  while (worker->enabled)
2731  {
2732    rtems_event_set   out;
2733    rtems_status_code sc;
2734   
2735    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2736                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2737                              RTEMS_NO_TIMEOUT,
2738                              &out);
2739
2740    if (sc != RTEMS_SUCCESSFUL)
2741      rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2742
2743    rtems_bdbuf_swapout_write (&worker->transfer);
2744
2745    rtems_bdbuf_lock_cache ();
2746
2747    rtems_chain_initialize_empty (&worker->transfer.bds);
2748    worker->transfer.dev = BDBUF_INVALID_DEV;
2749
2750    rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link);
2751   
2752    rtems_bdbuf_unlock_cache ();
2753  }
2754
2755  free (worker->transfer.write_req);
2756  free (worker);
2757
2758  rtems_task_delete (RTEMS_SELF);
2759}
2760
2761/**
2762 * Open the swapout worker threads.
2763 */
2764static void
2765rtems_bdbuf_swapout_workers_open (void)
2766{
2767  rtems_status_code sc;
2768  size_t            w;
2769 
2770  rtems_bdbuf_lock_cache ();
2771 
2772  for (w = 0; w < rtems_bdbuf_configuration.swapout_workers; w++)
2773  {
2774    rtems_bdbuf_swapout_worker* worker;
2775
2776    worker = malloc (sizeof (rtems_bdbuf_swapout_worker));
2777    if (!worker)
2778      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2779
2780    rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link);
2781    worker->enabled = true;
2782    worker->transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2783   
2784    rtems_chain_initialize_empty (&worker->transfer.bds);
2785    worker->transfer.dev = BDBUF_INVALID_DEV;
2786
2787    sc = rtems_task_create (rtems_build_name('B', 'D', 'o', 'a' + w),
2788                            (rtems_bdbuf_configuration.swapout_priority ?
2789                             rtems_bdbuf_configuration.swapout_priority :
2790                             RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT),
2791                            SWAPOUT_TASK_STACK_SIZE,
2792                            RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
2793                            RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
2794                            &worker->id);
2795    if (sc != RTEMS_SUCCESSFUL)
2796      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE);
2797
2798    sc = rtems_task_start (worker->id,
2799                           rtems_bdbuf_swapout_worker_task,
2800                           (rtems_task_argument) worker);
2801    if (sc != RTEMS_SUCCESSFUL)
2802      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START);
2803  }
2804 
2805  rtems_bdbuf_unlock_cache ();
2806}
2807
2808/**
2809 * Close the swapout worker threads.
2810 */
2811static void
2812rtems_bdbuf_swapout_workers_close (void)
2813{
2814  rtems_chain_node* node;
2815 
2816  rtems_bdbuf_lock_cache ();
2817 
2818  node = rtems_chain_first (&bdbuf_cache.swapout_workers);
2819  while (!rtems_chain_is_tail (&bdbuf_cache.swapout_workers, node))
2820  {
2821    rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node;
2822    worker->enabled = false;
2823    rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC);
2824    node = rtems_chain_next (node);
2825  }
2826 
2827  rtems_bdbuf_unlock_cache ();
2828}
2829
2830/**
2831 * Body of task which takes care on flushing modified buffers to the disk.
2832 *
2833 * @param arg A pointer to the global cache data. Use the global variable and
2834 *            not this.
2835 * @return rtems_task Not used.
2836 */
2837static rtems_task
2838rtems_bdbuf_swapout_task (rtems_task_argument arg)
2839{
2840  rtems_bdbuf_swapout_transfer transfer;
2841  uint32_t                     period_in_ticks;
2842  const uint32_t               period_in_msecs = bdbuf_config.swapout_period;;
2843  uint32_t                     timer_delta;
2844
2845  transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2846  rtems_chain_initialize_empty (&transfer.bds);
2847  transfer.dev = BDBUF_INVALID_DEV;
2848
2849  /*
2850   * Localise the period.
2851   */
2852  period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
2853
2854  /*
2855   * This is temporary. Needs to be changed to use the real time clock.
2856   */
2857  timer_delta = period_in_msecs;
2858
2859  /*
2860   * Create the worker threads.
2861   */
2862  rtems_bdbuf_swapout_workers_open ();
2863 
2864  while (bdbuf_cache.swapout_enabled)
2865  {
2866    rtems_event_set   out;
2867    rtems_status_code sc;
2868
2869    /*
2870     * Only update the timers once in the processing cycle.
2871     */
2872    bool update_timers = true;
2873   
2874    /*
2875     * If we write buffers to any disk perform a check again. We only write a
2876     * single device at a time and the cache may have more than one device's
2877     * buffers modified waiting to be written.
2878     */
2879    bool transfered_buffers;
2880
2881    do
2882    {
2883      transfered_buffers = false;
2884
2885      /*
2886       * Extact all the buffers we find for a specific device. The device is
2887       * the first one we find on a modified list. Process the sync queue of
2888       * buffers first.
2889       */
2890      if (rtems_bdbuf_swapout_processing (timer_delta,
2891                                          update_timers,
2892                                          &transfer))
2893      {
2894        transfered_buffers = true;
2895      }
2896     
2897      /*
2898       * Only update the timers once.
2899       */
2900      update_timers = false;
2901    }
2902    while (transfered_buffers);
2903
2904    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2905                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2906                              period_in_ticks,
2907                              &out);
2908
2909    if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
2910      rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2911  }
2912
2913  rtems_bdbuf_swapout_workers_close ();
2914 
2915  free (transfer.write_req);
2916
2917  rtems_task_delete (RTEMS_SELF);
2918}
Note: See TracBrowser for help on using the repository browser.