source: rtems/cpukit/libblock/src/bdbuf.c @ 94d7bd7

4.104.115
Last change on this file since 94d7bd7 was b96e09c, checked in by Thomas Doerfler <Thomas.Doerfler@…>, on 10/13/09 at 07:58:33
  • libblock/include/rtems/diskdevs.h: Added driver data pointer to IO control function. The IO control handler takes now the disk device as first parameter instead of the physical device number.
  • cpukit/libblock/include/rtems/blkdev.h, libblock/src/bdbuf.c, libblock/src/blkdev.c, libblock/src/diskdevs.c, libblock/src/nvdisk.c, libblock/src/flashdisk.c, libblock/src/ramdisk.c: Update for block device API change.
  • Property mode set to 100644
File size: 85.0 KB
Line 
1/**
2 * @file
3 *
4 * @ingroup rtems_bdbuf
5 *
6 * Block device buffer management.
7 */
8
9/*
10 * Disk I/O buffering
11 * Buffer managment
12 *
13 * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
14 * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
15 *         Victor V. Vengerov <vvv@oktet.ru>
16 *         Alexander Kukuta <kam@oktet.ru>
17 *
18 * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org>
19 *    Rewritten to remove score mutex access. Fixes many performance
20 *    issues.
21 *
22 * @(#) bdbuf.c,v 1.14 2004/04/17 08:15:17 ralf Exp
23 */
24
25/**
26 * Set to 1 to enable debug tracing.
27 */
28#define RTEMS_BDBUF_TRACE 0
29
30#if HAVE_CONFIG_H
31#include "config.h"
32#endif
33
34#include <rtems.h>
35#include <rtems/error.h>
36#include <rtems/malloc.h>
37#include <limits.h>
38#include <errno.h>
39#include <assert.h>
40#include <stdio.h>
41
42#include "rtems/bdbuf.h"
43
44/*
45 * Simpler label for this file.
46 */
47#define bdbuf_config rtems_bdbuf_configuration
48
49/**
50 * A swapout transfer transaction data. This data is passed to a worked thread
51 * to handle the write phase of the transfer.
52 */
53typedef struct rtems_bdbuf_swapout_transfer
54{
55  rtems_chain_control   bds;         /**< The transfer list of BDs. */
56  dev_t                 dev;         /**< The device the transfer is for. */
57  rtems_blkdev_request* write_req;   /**< The write request array. */
58  uint32_t              bufs_per_bd; /**< Number of buffers per bd. */
59} rtems_bdbuf_swapout_transfer;
60
61/**
62 * Swapout worker thread. These are available to take processing from the
63 * main swapout thread and handle the I/O operation.
64 */
65typedef struct rtems_bdbuf_swapout_worker
66{
67  rtems_chain_node             link;     /**< The threads sit on a chain when
68                                          * idle. */
69  rtems_id                     id;       /**< The id of the task so we can wake
70                                          * it. */
71  volatile bool                enabled;  /**< The worked is enabled. */
72  rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this
73                                          * thread. */
74} rtems_bdbuf_swapout_worker;
75
76/**
77 * The BD buffer cache.
78 */
79typedef struct rtems_bdbuf_cache
80{
81  rtems_id            swapout;           /**< Swapout task ID */
82  volatile bool       swapout_enabled;   /**< Swapout is only running if
83                                          * enabled. Set to false to kill the
84                                          * swap out task. It deletes itself. */
85  rtems_chain_control swapout_workers;   /**< The work threads for the swapout
86                                          * task. */
87 
88  rtems_bdbuf_buffer* bds;               /**< Pointer to table of buffer
89                                          * descriptors. */
90  void*               buffers;           /**< The buffer's memory. */
91  size_t              buffer_min_count;  /**< Number of minimum size buffers
92                                          * that fit the buffer memory. */
93  size_t              max_bds_per_group; /**< The number of BDs of minimum
94                                          * buffer size that fit in a group. */
95  uint32_t            flags;             /**< Configuration flags. */
96
97  rtems_id            lock;              /**< The cache lock. It locks all
98                                          * cache data, BD and lists. */
99  rtems_id            sync_lock;         /**< Sync calls block writes. */
100  volatile bool       sync_active;       /**< True if a sync is active. */
101  volatile rtems_id   sync_requester;    /**< The sync requester. */
102  volatile dev_t      sync_device;       /**< The device to sync and -1 not a
103                                          * device sync. */
104
105  rtems_bdbuf_buffer* tree;              /**< Buffer descriptor lookup AVL tree
106                                          * root. There is only one. */
107  rtems_chain_control ready;             /**< Free buffers list, read-ahead, or
108                                          * resized group buffers. */
109  rtems_chain_control lru;               /**< Least recently used list */
110  rtems_chain_control modified;          /**< Modified buffers list */
111  rtems_chain_control sync;              /**< Buffers to sync list */
112
113  rtems_id            access;            /**< Obtain if waiting for a buffer in
114                                          * the ACCESS state. */
115  volatile uint32_t   access_waiters;    /**< Count of access blockers. */
116  rtems_id            transfer;          /**< Obtain if waiting for a buffer in
117                                          * the TRANSFER state. */
118  volatile uint32_t   transfer_waiters;  /**< Count of transfer blockers. */
119  rtems_id            waiting;           /**< Obtain if waiting for a buffer
120                                          * and the none are available. */
121  volatile uint32_t   wait_waiters;      /**< Count of waiting blockers. */
122
123  size_t              group_count;       /**< The number of groups. */
124  rtems_bdbuf_group*  groups;            /**< The groups. */
125 
126  bool                initialised;       /**< Initialised state. */
127} rtems_bdbuf_cache;
128
129/**
130 * Fatal errors
131 */
132#define RTEMS_BLKDEV_FATAL_ERROR(n) \
133  (((uint32_t)'B' << 24) | ((uint32_t)(n) & (uint32_t)0x00FFFFFF))
134
135#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_1 RTEMS_BLKDEV_FATAL_ERROR(1)
136#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_2 RTEMS_BLKDEV_FATAL_ERROR(2)
137#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_3 RTEMS_BLKDEV_FATAL_ERROR(3)
138#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_4 RTEMS_BLKDEV_FATAL_ERROR(4)
139#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_5 RTEMS_BLKDEV_FATAL_ERROR(5)
140#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_6 RTEMS_BLKDEV_FATAL_ERROR(6)
141#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_7 RTEMS_BLKDEV_FATAL_ERROR(7)
142#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_8 RTEMS_BLKDEV_FATAL_ERROR(8)
143#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_9 RTEMS_BLKDEV_FATAL_ERROR(9)
144#define RTEMS_BLKDEV_FATAL_BDBUF_SWAPOUT       RTEMS_BLKDEV_FATAL_ERROR(10)
145#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK     RTEMS_BLKDEV_FATAL_ERROR(11)
146#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK   RTEMS_BLKDEV_FATAL_ERROR(12)
147#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK    RTEMS_BLKDEV_FATAL_ERROR(13)
148#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK  RTEMS_BLKDEV_FATAL_ERROR(14)
149#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_1  RTEMS_BLKDEV_FATAL_ERROR(15)
150#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2  RTEMS_BLKDEV_FATAL_ERROR(16)
151#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_3  RTEMS_BLKDEV_FATAL_ERROR(17)
152#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO RTEMS_BLKDEV_FATAL_ERROR(18)
153#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE    RTEMS_BLKDEV_FATAL_ERROR(19)
154#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE       RTEMS_BLKDEV_FATAL_ERROR(20)
155#define RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM      RTEMS_BLKDEV_FATAL_ERROR(21)
156#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE  RTEMS_BLKDEV_FATAL_ERROR(22)
157#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START   RTEMS_BLKDEV_FATAL_ERROR(23)
158#define BLKDEV_FATAL_BDBUF_SWAPOUT_RE          RTEMS_BLKDEV_FATAL_ERROR(24)
159#define BLKDEV_FATAL_BDBUF_SWAPOUT_TS          RTEMS_BLKDEV_FATAL_ERROR(25)
160
161/**
162 * The events used in this code. These should be system events rather than
163 * application events.
164 */
165#define RTEMS_BDBUF_TRANSFER_SYNC  RTEMS_EVENT_1
166#define RTEMS_BDBUF_SWAPOUT_SYNC   RTEMS_EVENT_2
167
168/**
169 * The swap out task size. Should be more than enough for most drivers with
170 * tracing turned on.
171 */
172#define SWAPOUT_TASK_STACK_SIZE (8 * 1024)
173
174/**
175 * Lock semaphore attributes. This is used for locking type mutexes.
176 *
177 * @warning Priority inheritance is on.
178 */
179#define RTEMS_BDBUF_CACHE_LOCK_ATTRIBS \
180  (RTEMS_PRIORITY | RTEMS_BINARY_SEMAPHORE | \
181   RTEMS_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
182
183/**
184 * Waiter semaphore attributes.
185 *
186 * @warning Do not configure as inherit priority. If a driver is in the driver
187 *          initialisation table this locked semaphore will have the IDLE task
188 *          as the holder and a blocking task will raise the priority of the
189 *          IDLE task which can cause unsual side effects.
190 */
191#define RTEMS_BDBUF_CACHE_WAITER_ATTRIBS \
192  (RTEMS_PRIORITY | RTEMS_SIMPLE_BINARY_SEMAPHORE | \
193   RTEMS_NO_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
194
195/**
196 * Waiter timeout. Set to non-zero to find some info on a waiter that is
197 * waiting too long.
198 */
199#define RTEMS_BDBUF_WAIT_TIMEOUT RTEMS_NO_TIMEOUT
200#if !defined (RTEMS_BDBUF_WAIT_TIMEOUT)
201#define RTEMS_BDBUF_WAIT_TIMEOUT \
202  (TOD_MICROSECONDS_TO_TICKS (20000000))
203#endif
204
205/*
206 * The swap out task.
207 */
208static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
209
210/**
211 * The Buffer Descriptor cache.
212 */
213static rtems_bdbuf_cache bdbuf_cache;
214
215#if RTEMS_BDBUF_TRACE
216/**
217 * If true output the trace message.
218 */
219bool rtems_bdbuf_tracer;
220
221/**
222 * Return the number of items on the list.
223 *
224 * @param list The chain control.
225 * @return uint32_t The number of items on the list.
226 */
227uint32_t
228rtems_bdbuf_list_count (rtems_chain_control* list)
229{
230  rtems_chain_node* node = rtems_chain_first (list);
231  uint32_t          count = 0;
232  while (!rtems_chain_is_tail (list, node))
233  {
234    count++;
235    node = rtems_chain_next (node);
236  }
237  return count;
238}
239
240/**
241 * Show the usage for the bdbuf cache.
242 */
243void
244rtems_bdbuf_show_usage (void)
245{
246  uint32_t group;
247  uint32_t total = 0;
248  uint32_t val;
249  for (group = 0; group < bdbuf_cache.group_count; group++)
250    total += bdbuf_cache.groups[group].users;
251  printf ("bdbuf:group users=%lu", total);
252  val = rtems_bdbuf_list_count (&bdbuf_cache.ready);
253  printf (", ready=%lu", val);
254  total = val;
255  val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
256  printf (", lru=%lu", val);
257  total += val;
258  val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
259  printf (", mod=%lu", val);
260  total += val;
261  val = rtems_bdbuf_list_count (&bdbuf_cache.sync);
262  printf (", sync=%lu", val);
263  total += val;
264  printf (", total=%lu\n", total);
265}
266
267/**
268 * Show the users for a group of a bd.
269 *
270 * @param where A label to show the context of output.
271 * @param bd The bd to show the users of.
272 */
273void
274rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd)
275{
276  const char* states[] =
277    { "EM", "RA", "CH", "AC", "MD", "AM", "SY", "TR" };
278  printf ("bdbuf:users: %15s: [%ld (%s)] %ld:%ld = %lu %s\n",
279          where,
280          bd->block, states[bd->state],
281          bd->group - bdbuf_cache.groups,
282          bd - bdbuf_cache.bds,
283          bd->group->users,
284          bd->group->users > 8 ? "<<<<<<<" : "");
285}
286#else
287#define rtems_bdbuf_tracer (0)
288#define rtems_bdbuf_show_usage()
289#define rtems_bdbuf_show_users(_w, _b)
290#endif
291
292/**
293 * The default maximum height of 32 allows for AVL trees having between
294 * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion.  You may
295 * change this compile-time constant as you wish.
296 */
297#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
298#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
299#endif
300
301/**
302 * Searches for the node with specified dev/block.
303 *
304 * @param root pointer to the root node of the AVL-Tree
305 * @param dev device search key
306 * @param block block search key
307 * @retval NULL node with the specified dev/block is not found
308 * @return pointer to the node with specified dev/block
309 */
310static rtems_bdbuf_buffer *
311rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
312                        dev_t                dev,
313                        rtems_blkdev_bnum    block)
314{
315  rtems_bdbuf_buffer* p = *root;
316
317  while ((p != NULL) && ((p->dev != dev) || (p->block != block)))
318  {
319    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
320    {
321      p = p->avl.right;
322    }
323    else
324    {
325      p = p->avl.left;
326    }
327  }
328
329  return p;
330}
331
332/**
333 * Inserts the specified node to the AVl-Tree.
334 *
335 * @param root pointer to the root node of the AVL-Tree
336 * @param node Pointer to the node to add.
337 * @retval 0 The node added successfully
338 * @retval -1 An error occured
339 */
340static int
341rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
342                       rtems_bdbuf_buffer*  node)
343{
344  dev_t             dev = node->dev;
345  rtems_blkdev_bnum block = node->block;
346
347  rtems_bdbuf_buffer*  p = *root;
348  rtems_bdbuf_buffer*  q;
349  rtems_bdbuf_buffer*  p1;
350  rtems_bdbuf_buffer*  p2;
351  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
352  rtems_bdbuf_buffer** buf_prev = buf_stack;
353
354  bool modified = false;
355
356  if (p == NULL)
357  {
358    *root = node;
359    node->avl.left = NULL;
360    node->avl.right = NULL;
361    node->avl.bal = 0;
362    return 0;
363  }
364
365  while (p != NULL)
366  {
367    *buf_prev++ = p;
368
369    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
370    {
371      p->avl.cache = 1;
372      q = p->avl.right;
373      if (q == NULL)
374      {
375        q = node;
376        p->avl.right = q = node;
377        break;
378      }
379    }
380    else if ((p->dev != dev) || (p->block != block))
381    {
382      p->avl.cache = -1;
383      q = p->avl.left;
384      if (q == NULL)
385      {
386        q = node;
387        p->avl.left = q;
388        break;
389      }
390    }
391    else
392    {
393      return -1;
394    }
395
396    p = q;
397  }
398 
399  q->avl.left = q->avl.right = NULL;
400  q->avl.bal = 0;
401  modified = true;
402  buf_prev--;
403
404  while (modified)
405  {
406    if (p->avl.cache == -1)
407    {
408      switch (p->avl.bal)
409      {
410        case 1:
411          p->avl.bal = 0;
412          modified = false;
413          break;
414
415        case 0:
416          p->avl.bal = -1;
417          break;
418
419        case -1:
420          p1 = p->avl.left;
421          if (p1->avl.bal == -1) /* simple LL-turn */
422          {
423            p->avl.left = p1->avl.right;
424            p1->avl.right = p;
425            p->avl.bal = 0;
426            p = p1;
427          }
428          else /* double LR-turn */
429          {
430            p2 = p1->avl.right;
431            p1->avl.right = p2->avl.left;
432            p2->avl.left = p1;
433            p->avl.left = p2->avl.right;
434            p2->avl.right = p;
435            if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
436            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
437            p = p2;
438          }
439          p->avl.bal = 0;
440          modified = false;
441          break;
442
443        default:
444          break;
445      }
446    }
447    else
448    {
449      switch (p->avl.bal)
450      {
451        case -1:
452          p->avl.bal = 0;
453          modified = false;
454          break;
455
456        case 0:
457          p->avl.bal = 1;
458          break;
459
460        case 1:
461          p1 = p->avl.right;
462          if (p1->avl.bal == 1) /* simple RR-turn */
463          {
464            p->avl.right = p1->avl.left;
465            p1->avl.left = p;
466            p->avl.bal = 0;
467            p = p1;
468          }
469          else /* double RL-turn */
470          {
471            p2 = p1->avl.left;
472            p1->avl.left = p2->avl.right;
473            p2->avl.right = p1;
474            p->avl.right = p2->avl.left;
475            p2->avl.left = p;
476            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
477            if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
478            p = p2;
479          }
480          p->avl.bal = 0;
481          modified = false;
482          break;
483
484        default:
485          break;
486      }
487    }
488    q = p;
489    if (buf_prev > buf_stack)
490    {
491      p = *--buf_prev;
492
493      if (p->avl.cache == -1)
494      {
495        p->avl.left = q;
496      }
497      else
498      {
499        p->avl.right = q;
500      }
501    }
502    else
503    {
504      *root = p;
505      break;
506    }
507  };
508
509  return 0;
510}
511
512
513/**
514 * Removes the node from the tree.
515 *
516 * @param root Pointer to pointer to the root node
517 * @param node Pointer to the node to remove
518 * @retval 0 Item removed
519 * @retval -1 No such item found
520 */
521static int
522rtems_bdbuf_avl_remove(rtems_bdbuf_buffer**      root,
523                       const rtems_bdbuf_buffer* node)
524{
525  dev_t             dev = node->dev;
526  rtems_blkdev_bnum block = node->block;
527
528  rtems_bdbuf_buffer*  p = *root;
529  rtems_bdbuf_buffer*  q;
530  rtems_bdbuf_buffer*  r;
531  rtems_bdbuf_buffer*  s;
532  rtems_bdbuf_buffer*  p1;
533  rtems_bdbuf_buffer*  p2;
534  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
535  rtems_bdbuf_buffer** buf_prev = buf_stack;
536
537  bool modified = false;
538
539  memset (buf_stack, 0, sizeof(buf_stack));
540
541  while (p != NULL)
542  {
543    *buf_prev++ = p;
544
545    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
546    {
547      p->avl.cache = 1;
548      p = p->avl.right;
549    }
550    else if ((p->dev != dev) || (p->block != block))
551    {
552      p->avl.cache = -1;
553      p = p->avl.left;
554    }
555    else
556    {
557      /* node found */
558      break;
559    }
560  }
561
562  if (p == NULL)
563  {
564    /* there is no such node */
565    return -1;
566  }
567
568  q = p;
569
570  buf_prev--;
571  if (buf_prev > buf_stack)
572  {
573    p = *(buf_prev - 1);
574  }
575  else
576  {
577    p = NULL;
578  }
579
580  /* at this moment q - is a node to delete, p is q's parent */
581  if (q->avl.right == NULL)
582  {
583    r = q->avl.left;
584    if (r != NULL)
585    {
586      r->avl.bal = 0;
587    }
588    q = r;
589  }
590  else
591  {
592    rtems_bdbuf_buffer **t;
593
594    r = q->avl.right;
595
596    if (r->avl.left == NULL)
597    {
598      r->avl.left = q->avl.left;
599      r->avl.bal = q->avl.bal;
600      r->avl.cache = 1;
601      *buf_prev++ = q = r;
602    }
603    else
604    {
605      t = buf_prev++;
606      s = r;
607
608      while (s->avl.left != NULL)
609      {
610        *buf_prev++ = r = s;
611        s = r->avl.left;
612        r->avl.cache = -1;
613      }
614
615      s->avl.left = q->avl.left;
616      r->avl.left = s->avl.right;
617      s->avl.right = q->avl.right;
618      s->avl.bal = q->avl.bal;
619      s->avl.cache = 1;
620
621      *t = q = s;
622    }
623  }
624
625  if (p != NULL)
626  {
627    if (p->avl.cache == -1)
628    {
629      p->avl.left = q;
630    }
631    else
632    {
633      p->avl.right = q;
634    }
635  }
636  else
637  {
638    *root = q;
639  }
640
641  modified = true;
642
643  while (modified)
644  {
645    if (buf_prev > buf_stack)
646    {
647      p = *--buf_prev;
648    }
649    else
650    {
651      break;
652    }
653
654    if (p->avl.cache == -1)
655    {
656      /* rebalance left branch */
657      switch (p->avl.bal)
658      {
659        case -1:
660          p->avl.bal = 0;
661          break;
662        case  0:
663          p->avl.bal = 1;
664          modified = false;
665          break;
666
667        case +1:
668          p1 = p->avl.right;
669
670          if (p1->avl.bal >= 0) /* simple RR-turn */
671          {
672            p->avl.right = p1->avl.left;
673            p1->avl.left = p;
674
675            if (p1->avl.bal == 0)
676            {
677              p1->avl.bal = -1;
678              modified = false;
679            }
680            else
681            {
682              p->avl.bal = 0;
683              p1->avl.bal = 0;
684            }
685            p = p1;
686          }
687          else /* double RL-turn */
688          {
689            p2 = p1->avl.left;
690
691            p1->avl.left = p2->avl.right;
692            p2->avl.right = p1;
693            p->avl.right = p2->avl.left;
694            p2->avl.left = p;
695
696            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
697            if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
698
699            p = p2;
700            p2->avl.bal = 0;
701          }
702          break;
703
704        default:
705          break;
706      }
707    }
708    else
709    {
710      /* rebalance right branch */
711      switch (p->avl.bal)
712      {
713        case +1:
714          p->avl.bal = 0;
715          break;
716
717        case  0:
718          p->avl.bal = -1;
719          modified = false;
720          break;
721
722        case -1:
723          p1 = p->avl.left;
724
725          if (p1->avl.bal <= 0) /* simple LL-turn */
726          {
727            p->avl.left = p1->avl.right;
728            p1->avl.right = p;
729            if (p1->avl.bal == 0)
730            {
731              p1->avl.bal = 1;
732              modified = false;
733            }
734            else
735            {
736              p->avl.bal = 0;
737              p1->avl.bal = 0;
738            }
739            p = p1;
740          }
741          else /* double LR-turn */
742          {
743            p2 = p1->avl.right;
744
745            p1->avl.right = p2->avl.left;
746            p2->avl.left = p1;
747            p->avl.left = p2->avl.right;
748            p2->avl.right = p;
749
750            if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
751            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
752
753            p = p2;
754            p2->avl.bal = 0;
755          }
756          break;
757
758        default:
759          break;
760      }
761    }
762
763    if (buf_prev > buf_stack)
764    {
765      q = *(buf_prev - 1);
766
767      if (q->avl.cache == -1)
768      {
769        q->avl.left = p;
770      }
771      else
772      {
773        q->avl.right = p;
774      }
775    }
776    else
777    {
778      *root = p;
779      break;
780    }
781
782  }
783
784  return 0;
785}
786
787/**
788 * Change the block number for the block size to the block number for the media
789 * block size. We have to use 64bit maths. There is no short cut here.
790 *
791 * @param block The logical block number in the block size terms.
792 * @param block_size The block size.
793 * @param media_block_size The block size of the media.
794 * @return rtems_blkdev_bnum The media block number.
795 */
796static rtems_blkdev_bnum
797rtems_bdbuf_media_block (rtems_blkdev_bnum block,
798                         size_t            block_size,
799                         size_t            media_block_size)
800{
801  return (((uint64_t) block) * block_size) / media_block_size;
802}
803
804/**
805 * Lock the mutex. A single task can nest calls.
806 *
807 * @param lock The mutex to lock.
808 * @param fatal_error_code The error code if the call fails.
809 */
810static void
811rtems_bdbuf_lock (rtems_id lock, uint32_t fatal_error_code)
812{
813  rtems_status_code sc = rtems_semaphore_obtain (lock,
814                                                 RTEMS_WAIT,
815                                                 RTEMS_NO_TIMEOUT);
816  if (sc != RTEMS_SUCCESSFUL)
817    rtems_fatal_error_occurred (fatal_error_code);
818}
819
820/**
821 * Unlock the mutex.
822 *
823 * @param lock The mutex to unlock.
824 * @param fatal_error_code The error code if the call fails.
825 */
826static void
827rtems_bdbuf_unlock (rtems_id lock, uint32_t fatal_error_code)
828{
829  rtems_status_code sc = rtems_semaphore_release (lock);
830  if (sc != RTEMS_SUCCESSFUL)
831    rtems_fatal_error_occurred (fatal_error_code);
832}
833
834/**
835 * Lock the cache. A single task can nest calls.
836 */
837static void
838rtems_bdbuf_lock_cache (void)
839{
840  rtems_bdbuf_lock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK);
841}
842
843/**
844 * Unlock the cache.
845 */
846static void
847rtems_bdbuf_unlock_cache (void)
848{
849  rtems_bdbuf_unlock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK);
850}
851
852/**
853 * Lock the cache's sync. A single task can nest calls.
854 */
855static void
856rtems_bdbuf_lock_sync (void)
857{
858  rtems_bdbuf_lock (bdbuf_cache.sync_lock, RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK);
859}
860
861/**
862 * Unlock the cache's sync lock. Any blocked writers are woken.
863 */
864static void
865rtems_bdbuf_unlock_sync (void)
866{
867  rtems_bdbuf_unlock (bdbuf_cache.sync_lock,
868                      RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK);
869}
870
871/**
872 * Wait until woken. Semaphores are used so a number of tasks can wait and can
873 * be woken at once. Task events would require we maintain a list of tasks to
874 * be woken and this would require storgage and we do not know the number of
875 * tasks that could be waiting.
876 *
877 * While we have the cache locked we can try and claim the semaphore and
878 * therefore know when we release the lock to the cache we will block until the
879 * semaphore is released. This may even happen before we get to block.
880 *
881 * A counter is used to save the release call when no one is waiting.
882 *
883 * The function assumes the cache is locked on entry and it will be locked on
884 * exit.
885 *
886 * @param sema The semaphore to block on and wait.
887 * @param waiters The wait counter for this semaphore.
888 */
889static void
890rtems_bdbuf_wait (rtems_id* sema, volatile uint32_t* waiters)
891{
892  rtems_status_code sc;
893  rtems_mode        prev_mode;
894 
895  /*
896   * Indicate we are waiting.
897   */
898  *waiters += 1;
899
900  /*
901   * Disable preemption then unlock the cache and block.  There is no POSIX
902   * condition variable in the core API so this is a work around.
903   *
904   * The issue is a task could preempt after the cache is unlocked because it is
905   * blocking or just hits that window, and before this task has blocked on the
906   * semaphore. If the preempting task flushes the queue this task will not see
907   * the flush and may block for ever or until another transaction flushes this
908   * semaphore.
909   */
910  sc = rtems_task_mode (RTEMS_NO_PREEMPT, RTEMS_PREEMPT_MASK, &prev_mode);
911
912  if (sc != RTEMS_SUCCESSFUL)
913    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_1);
914 
915  /*
916   * Unlock the cache, wait, and lock the cache when we return.
917   */
918  rtems_bdbuf_unlock_cache ();
919
920  sc = rtems_semaphore_obtain (*sema, RTEMS_WAIT, RTEMS_BDBUF_WAIT_TIMEOUT);
921
922  if (sc == RTEMS_TIMEOUT)
923    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO);
924 
925  if (sc != RTEMS_UNSATISFIED)
926    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2);
927 
928  rtems_bdbuf_lock_cache ();
929
930  sc = rtems_task_mode (prev_mode, RTEMS_ALL_MODE_MASKS, &prev_mode);
931
932  if (sc != RTEMS_SUCCESSFUL)
933    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_3);
934 
935  *waiters -= 1;
936}
937
938/**
939 * Wake a blocked resource. The resource has a counter that lets us know if
940 * there are any waiters.
941 *
942 * @param sema The semaphore to release.
943 * @param waiters The wait counter for this semaphore.
944 */
945static void
946rtems_bdbuf_wake (rtems_id sema, volatile uint32_t* waiters)
947{
948  if (*waiters)
949  {
950    rtems_status_code sc;
951
952    sc = rtems_semaphore_flush (sema);
953 
954    if (sc != RTEMS_SUCCESSFUL)
955      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE);
956  }
957}
958
959/**
960 * Add a buffer descriptor to the modified list. This modified list is treated
961 * a litte differently to the other lists. To access it you must have the cache
962 * locked and this is assumed to be the case on entry to this call.
963 *
964 * If the cache has a device being sync'ed and the bd is for that device the
965 * call must block and wait until the sync is over before adding the bd to the
966 * modified list. Once a sync happens for a device no bd's can be added the
967 * modified list. The disk image is forced to be snapshot at that moment in
968 * time.
969 *
970 * @note Do not lower the group user count as the modified list is a user of
971 * the buffer.
972 *
973 * @param bd The bd to queue to the cache's modified list.
974 */
975static void
976rtems_bdbuf_append_modified (rtems_bdbuf_buffer* bd)
977{
978  /*
979   * If the cache has a device being sync'ed check if this bd is for that
980   * device. If it is unlock the cache and block on the sync lock. Once we have
981   * the sync lock release it.
982   */
983  if (bdbuf_cache.sync_active && (bdbuf_cache.sync_device == bd->dev))
984  {
985    rtems_bdbuf_unlock_cache ();
986    /* Wait for the sync lock */
987    rtems_bdbuf_lock_sync ();
988    rtems_bdbuf_unlock_sync ();
989    rtems_bdbuf_lock_cache ();
990  }
991     
992  bd->state = RTEMS_BDBUF_STATE_MODIFIED;
993
994  rtems_chain_append (&bdbuf_cache.modified, &bd->link);
995}
996
997/**
998 * Wait the swapper task.
999 */
1000static void
1001rtems_bdbuf_wake_swapper (void)
1002{
1003  rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout,
1004                                           RTEMS_BDBUF_SWAPOUT_SYNC);
1005  if (sc != RTEMS_SUCCESSFUL)
1006    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
1007}
1008
1009/**
1010 * Compute the number of BDs per group for a given buffer size.
1011 *
1012 * @param size The buffer size. It can be any size and we scale up.
1013 */
1014static size_t
1015rtems_bdbuf_bds_per_group (size_t size)
1016{
1017  size_t bufs_per_size;
1018  size_t bds_per_size;
1019 
1020  if (size > rtems_bdbuf_configuration.buffer_max)
1021    return 0;
1022 
1023  bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1;
1024 
1025  for (bds_per_size = 1;
1026       bds_per_size < bufs_per_size;
1027       bds_per_size <<= 1)
1028    ;
1029
1030  return bdbuf_cache.max_bds_per_group / bds_per_size;
1031}
1032
1033/**
1034 * Reallocate a group. The BDs currently allocated in the group are removed
1035 * from the ALV tree and any lists then the new BD's are prepended to the ready
1036 * list of the cache.
1037 *
1038 * @param group The group to reallocate.
1039 * @param new_bds_per_group The new count of BDs per group.
1040 */
1041static void
1042rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
1043{
1044  rtems_bdbuf_buffer* bd;
1045  int                 b;
1046  size_t              bufs_per_bd;
1047
1048  if (rtems_bdbuf_tracer)
1049    printf ("bdbuf:realloc: %lu: %ld -> %ld\n",
1050            group - bdbuf_cache.groups, group->bds_per_group,
1051            new_bds_per_group);
1052 
1053  bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group;
1054 
1055  for (b = 0, bd = group->bdbuf;
1056       b < group->bds_per_group;
1057       b++, bd += bufs_per_bd)
1058  {
1059    switch (bd->state)
1060    {
1061      case RTEMS_BDBUF_STATE_EMPTY:
1062        break;
1063      case RTEMS_BDBUF_STATE_CACHED:
1064      case RTEMS_BDBUF_STATE_READ_AHEAD:
1065        if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1066          rtems_fatal_error_occurred ((bd->state << 16) |
1067                                      RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_1);
1068        break;
1069      default:
1070        rtems_fatal_error_occurred ((bd->state << 16) |
1071                                    RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_8);
1072    }
1073   
1074    rtems_chain_extract (&bd->link);
1075  }
1076 
1077  group->bds_per_group = new_bds_per_group;
1078  bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
1079 
1080  for (b = 0, bd = group->bdbuf;
1081       b < group->bds_per_group;
1082       b++, bd += bufs_per_bd)
1083  {
1084    bd->state = RTEMS_BDBUF_STATE_EMPTY;
1085    rtems_chain_prepend (&bdbuf_cache.ready, &bd->link);
1086  }
1087}
1088
1089/**
1090 * Get the next BD from the list. This call assumes the cache is locked.
1091 *
1092 * @param bds_per_group The number of BDs per block we are need.
1093 * @param list The list to find the BD on.
1094 * @return The next BD if found or NULL is none are available.
1095 */
1096static rtems_bdbuf_buffer*
1097rtems_bdbuf_get_next_bd (size_t               bds_per_group,
1098                         rtems_chain_control* list)
1099{
1100  rtems_chain_node* node = rtems_chain_first (list);
1101  while (!rtems_chain_is_tail (list, node))
1102  {
1103    rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
1104
1105    if (rtems_bdbuf_tracer)
1106      printf ("bdbuf:next-bd: %lu (%ld:%ld) %ld -> %ld\n",
1107              bd - bdbuf_cache.bds,
1108              bd->group - bdbuf_cache.groups, bd->group->users,
1109              bd->group->bds_per_group, bds_per_group);
1110
1111    /*
1112     * If this bd is already part of a group that supports the same number of
1113     * BDs per group return it. If the bd is part of another group check the
1114     * number of users and if 0 we can take this group and resize it.
1115     */
1116    if (bd->group->bds_per_group == bds_per_group)
1117    {
1118      rtems_chain_extract (node);
1119      return bd;
1120    }
1121
1122    if (bd->group->users == 0)
1123    {
1124      /*
1125       * We use the group to locate the start of the BDs for this group.
1126       */
1127      rtems_bdbuf_group_realloc (bd->group, bds_per_group);
1128      bd = (rtems_bdbuf_buffer*) rtems_chain_get (&bdbuf_cache.ready);
1129      return bd;
1130    }
1131
1132    node = rtems_chain_next (node);
1133  }
1134 
1135  return NULL;
1136}
1137
1138/**
1139 * Initialise the cache.
1140 *
1141 * @return rtems_status_code The initialisation status.
1142 */
1143rtems_status_code
1144rtems_bdbuf_init (void)
1145{
1146  rtems_bdbuf_group*  group;
1147  rtems_bdbuf_buffer* bd;
1148  uint8_t*            buffer;
1149  int                 b;
1150  int                 cache_aligment;
1151  rtems_status_code   sc;
1152
1153  if (rtems_bdbuf_tracer)
1154    printf ("bdbuf:init\n");
1155
1156  /*
1157   * Check the configuration table values.
1158   */
1159  if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0)
1160    return RTEMS_INVALID_NUMBER;
1161 
1162  /*
1163   * We use a special variable to manage the initialisation incase we have
1164   * completing threads doing this. You may get errors if the another thread
1165   * makes a call and we have not finished initialisation.
1166   */
1167  if (bdbuf_cache.initialised)
1168    return RTEMS_RESOURCE_IN_USE;
1169
1170  bdbuf_cache.initialised = true;
1171 
1172  /*
1173   * For unspecified cache alignments we use the CPU alignment.
1174   */
1175  cache_aligment = 32; /* FIXME rtems_cache_get_data_line_size() */
1176  if (cache_aligment <= 0)
1177    cache_aligment = CPU_ALIGNMENT;
1178
1179  bdbuf_cache.sync_active    = false;
1180  bdbuf_cache.sync_device    = -1;
1181  bdbuf_cache.sync_requester = 0;
1182  bdbuf_cache.tree           = NULL;
1183
1184  rtems_chain_initialize_empty (&bdbuf_cache.swapout_workers);
1185  rtems_chain_initialize_empty (&bdbuf_cache.ready);
1186  rtems_chain_initialize_empty (&bdbuf_cache.lru);
1187  rtems_chain_initialize_empty (&bdbuf_cache.modified);
1188  rtems_chain_initialize_empty (&bdbuf_cache.sync);
1189
1190  bdbuf_cache.access           = 0;
1191  bdbuf_cache.access_waiters   = 0;
1192  bdbuf_cache.transfer         = 0;
1193  bdbuf_cache.transfer_waiters = 0;
1194  bdbuf_cache.waiting          = 0;
1195  bdbuf_cache.wait_waiters     = 0;
1196
1197  /*
1198   * Create the locks for the cache.
1199   */
1200  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'l'),
1201                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1202                               &bdbuf_cache.lock);
1203  if (sc != RTEMS_SUCCESSFUL)
1204  {
1205    bdbuf_cache.initialised = false;
1206    return sc;
1207  }
1208
1209  rtems_bdbuf_lock_cache ();
1210 
1211  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 's'),
1212                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1213                               &bdbuf_cache.sync_lock);
1214  if (sc != RTEMS_SUCCESSFUL)
1215  {
1216    rtems_bdbuf_unlock_cache ();
1217    rtems_semaphore_delete (bdbuf_cache.lock);
1218    bdbuf_cache.initialised = false;
1219    return sc;
1220  }
1221 
1222  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'a'),
1223                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1224                               &bdbuf_cache.access);
1225  if (sc != RTEMS_SUCCESSFUL)
1226  {
1227    rtems_semaphore_delete (bdbuf_cache.sync_lock);
1228    rtems_bdbuf_unlock_cache ();
1229    rtems_semaphore_delete (bdbuf_cache.lock);
1230    bdbuf_cache.initialised = false;
1231    return sc;
1232  }
1233
1234  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 't'),
1235                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1236                               &bdbuf_cache.transfer);
1237  if (sc != RTEMS_SUCCESSFUL)
1238  {
1239    rtems_semaphore_delete (bdbuf_cache.access);
1240    rtems_semaphore_delete (bdbuf_cache.sync_lock);
1241    rtems_bdbuf_unlock_cache ();
1242    rtems_semaphore_delete (bdbuf_cache.lock);
1243    bdbuf_cache.initialised = false;
1244    return sc;
1245  }
1246
1247  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'w'),
1248                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1249                               &bdbuf_cache.waiting);
1250  if (sc != RTEMS_SUCCESSFUL)
1251  {
1252    rtems_semaphore_delete (bdbuf_cache.transfer);
1253    rtems_semaphore_delete (bdbuf_cache.access);
1254    rtems_semaphore_delete (bdbuf_cache.sync_lock);
1255    rtems_bdbuf_unlock_cache ();
1256    rtems_semaphore_delete (bdbuf_cache.lock);
1257    bdbuf_cache.initialised = false;
1258    return sc;
1259  }
1260 
1261  /*
1262   * Compute the various number of elements in the cache.
1263   */
1264  bdbuf_cache.buffer_min_count =
1265    bdbuf_config.size / bdbuf_config.buffer_min;
1266  bdbuf_cache.max_bds_per_group =
1267    bdbuf_config.buffer_max / bdbuf_config.buffer_min;
1268  bdbuf_cache.group_count =
1269    bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group;
1270
1271  /*
1272   * Allocate the memory for the buffer descriptors.
1273   */
1274  bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer),
1275                            bdbuf_cache.buffer_min_count);
1276  if (!bdbuf_cache.bds)
1277  {
1278    rtems_semaphore_delete (bdbuf_cache.transfer);
1279    rtems_semaphore_delete (bdbuf_cache.access);
1280    rtems_semaphore_delete (bdbuf_cache.sync_lock);
1281    rtems_bdbuf_unlock_cache ();
1282    rtems_semaphore_delete (bdbuf_cache.lock);
1283    bdbuf_cache.initialised = false;
1284    return RTEMS_NO_MEMORY;
1285  }
1286
1287  /*
1288   * Allocate the memory for the buffer descriptors.
1289   */
1290  bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group),
1291                               bdbuf_cache.group_count);
1292  if (!bdbuf_cache.groups)
1293  {
1294    free (bdbuf_cache.bds);
1295    rtems_semaphore_delete (bdbuf_cache.transfer);
1296    rtems_semaphore_delete (bdbuf_cache.access);
1297    rtems_semaphore_delete (bdbuf_cache.sync_lock);
1298    rtems_bdbuf_unlock_cache ();
1299    rtems_semaphore_delete (bdbuf_cache.lock);
1300    bdbuf_cache.initialised = false;
1301    return RTEMS_NO_MEMORY;
1302  }
1303 
1304  /*
1305   * Allocate memory for buffer memory. The buffer memory will be cache
1306   * aligned. It is possible to free the memory allocated by rtems_memalign()
1307   * with free(). Return 0 if allocated.
1308   *
1309   * The memory allocate allows a
1310   */
1311  if (rtems_memalign ((void **) &bdbuf_cache.buffers,
1312                      cache_aligment,
1313                      bdbuf_cache.buffer_min_count * bdbuf_config.buffer_min) != 0)
1314  {
1315    free (bdbuf_cache.groups);
1316    free (bdbuf_cache.bds);
1317    rtems_semaphore_delete (bdbuf_cache.transfer);
1318    rtems_semaphore_delete (bdbuf_cache.access);
1319    rtems_semaphore_delete (bdbuf_cache.sync_lock);
1320    rtems_bdbuf_unlock_cache ();
1321    rtems_semaphore_delete (bdbuf_cache.lock);
1322    bdbuf_cache.initialised = false;
1323    return RTEMS_NO_MEMORY;
1324  }
1325
1326  /*
1327   * The cache is empty after opening so we need to add all the buffers to it
1328   * and initialise the groups.
1329   */
1330  for (b = 0, group = bdbuf_cache.groups,
1331         bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers;
1332       b < bdbuf_cache.buffer_min_count;
1333       b++, bd++, buffer += bdbuf_config.buffer_min)
1334  {
1335    bd->dev        = -1;
1336    bd->group      = group;
1337    bd->buffer     = buffer;
1338    bd->avl.left   = NULL;
1339    bd->avl.right  = NULL;
1340    bd->state      = RTEMS_BDBUF_STATE_EMPTY;
1341    bd->error      = 0;
1342    bd->waiters    = 0;
1343    bd->hold_timer = 0;
1344    bd->references = 0;
1345    bd->user       = NULL;
1346   
1347    rtems_chain_append (&bdbuf_cache.ready, &bd->link);
1348
1349    if ((b % bdbuf_cache.max_bds_per_group) ==
1350        (bdbuf_cache.max_bds_per_group - 1))
1351      group++;
1352  }
1353
1354  for (b = 0,
1355         group = bdbuf_cache.groups,
1356         bd = bdbuf_cache.bds;
1357       b < bdbuf_cache.group_count;
1358       b++,
1359         group++,
1360         bd += bdbuf_cache.max_bds_per_group)
1361  {
1362    group->bds_per_group = bdbuf_cache.max_bds_per_group;
1363    group->users = 0;
1364    group->bdbuf = bd;
1365  }
1366         
1367  /*
1368   * Create and start swapout task. This task will create and manage the worker
1369   * threads.
1370   */
1371  bdbuf_cache.swapout_enabled = true;
1372 
1373  sc = rtems_task_create (rtems_build_name('B', 'S', 'W', 'P'),
1374                          (bdbuf_config.swapout_priority ?
1375                           bdbuf_config.swapout_priority :
1376                           RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT),
1377                          SWAPOUT_TASK_STACK_SIZE,
1378                          RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
1379                          RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
1380                          &bdbuf_cache.swapout);
1381  if (sc != RTEMS_SUCCESSFUL)
1382  {
1383    free (bdbuf_cache.buffers);
1384    free (bdbuf_cache.groups);
1385    free (bdbuf_cache.bds);
1386    rtems_semaphore_delete (bdbuf_cache.transfer);
1387    rtems_semaphore_delete (bdbuf_cache.access);
1388    rtems_semaphore_delete (bdbuf_cache.sync_lock);
1389    rtems_bdbuf_unlock_cache ();
1390    rtems_semaphore_delete (bdbuf_cache.lock);
1391    bdbuf_cache.initialised = false;
1392    return sc;
1393  }
1394
1395  sc = rtems_task_start (bdbuf_cache.swapout,
1396                         rtems_bdbuf_swapout_task,
1397                         (rtems_task_argument) &bdbuf_cache);
1398  if (sc != RTEMS_SUCCESSFUL)
1399  {
1400    rtems_task_delete (bdbuf_cache.swapout);
1401    free (bdbuf_cache.buffers);
1402    free (bdbuf_cache.groups);
1403    free (bdbuf_cache.bds);
1404    rtems_semaphore_delete (bdbuf_cache.transfer);
1405    rtems_semaphore_delete (bdbuf_cache.access);
1406    rtems_semaphore_delete (bdbuf_cache.sync_lock);
1407    rtems_bdbuf_unlock_cache ();
1408    rtems_semaphore_delete (bdbuf_cache.lock);
1409    bdbuf_cache.initialised = false;
1410    return sc;
1411  }
1412
1413  rtems_bdbuf_unlock_cache ();
1414 
1415  return RTEMS_SUCCESSFUL;
1416}
1417
1418/**
1419 * Get a buffer for this device and block. This function returns a buffer once
1420 * placed into the AVL tree. If no buffer is available and it is not a read
1421 * ahead request and no buffers are waiting to the written to disk wait until a
1422 * buffer is available. If buffers are waiting to be written to disk and none
1423 * are available expire the hold timer's of the queued buffers and wake the
1424 * swap out task. If the buffer is for a read ahead transfer return NULL if
1425 * there are no buffers available or the buffer is already in the cache.
1426 *
1427 * The AVL tree of buffers for the cache is searched and if not found obtain a
1428 * buffer and insert it into the AVL tree. Buffers are first obtained from the
1429 * ready list until all empty/ready buffers are used. Once all buffers are in
1430 * use the LRU list is searched for a buffer of the same group size or a group
1431 * that has no active buffers in use. A buffer taken from the LRU list is
1432 * removed from the AVL tree and assigned the new block number. The ready or
1433 * LRU list buffer is initialised to this device and block. If no buffers are
1434 * available due to the ready and LRU lists being empty a check is made of the
1435 * modified list. Buffers may be queued waiting for the hold timer to
1436 * expire. These buffers should be written to disk and returned to the LRU list
1437 * where they can be used. If buffers are on the modified list the max. write
1438 * block size of buffers have their hold timer's expired and the swap out task
1439 * woken. The caller then blocks on the waiting semaphore and counter. When
1440 * buffers return from the upper layers (access) or lower driver (transfer) the
1441 * blocked caller task is woken and this procedure is repeated. The repeat
1442 * handles a case of a another thread pre-empting getting a buffer first and
1443 * adding it to the AVL tree.
1444 *
1445 * A buffer located in the AVL tree means it is already in the cache and maybe
1446 * in use somewhere. The buffer can be either:
1447 *
1448 * # Cached. Not being accessed or part of a media transfer.
1449 * # Access or modifed access. Is with an upper layer being accessed.
1450 * # Transfer. Is with the driver and part of a media transfer.
1451 *
1452 * If cached we assign the new state, extract it from any list it maybe part of
1453 * and return to the user.
1454 *
1455 * This function assumes the cache the buffer is being taken from is locked and
1456 * it will make sure the cache is locked when it returns. The cache will be
1457 * unlocked if the call could block.
1458 *
1459 * Variable sized buffer is handled by groups. A group is the size of the
1460 * maximum buffer that can be allocated. The group can size in multiples of the
1461 * minimum buffer size where the mulitples are 1,2,4,8, etc. If the buffer is
1462 * found in the AVL tree the number of BDs in the group is check and if
1463 * different the buffer size for the block has changed. The buffer needs to be
1464 * invalidated.
1465 *
1466 * @param dd The disk device. Has the configured block size.
1467 * @param bds_per_group The number of BDs in a group for this block.
1468 * @param block Absolute media block number for the device
1469 * @param read_ahead The get is for a read ahead buffer if true
1470 * @return RTEMS status code (if operation completed successfully or error
1471 *         code if error is occured)
1472 */
1473static rtems_bdbuf_buffer*
1474rtems_bdbuf_get_buffer (rtems_disk_device* dd,
1475                        size_t             bds_per_group,
1476                        rtems_blkdev_bnum  block,
1477                        bool               read_ahead)
1478{
1479  dev_t               device = dd->dev;
1480  rtems_bdbuf_buffer* bd;
1481  bool                available;
1482 
1483  /*
1484   * Loop until we get a buffer. Under load we could find no buffers are
1485   * available requiring this task to wait until some become available before
1486   * proceeding. There is no timeout. If this call is to block and the buffer
1487   * is for a read ahead buffer return NULL. The read ahead is nice but not
1488   * that important.
1489   *
1490   * The search procedure is repeated as another thread could have pre-empted
1491   * us while we waited for a buffer, obtained an empty buffer and loaded the
1492   * AVL tree with the one we are after. In this case we move down and wait for
1493   * the buffer to return to the cache.
1494   */
1495  do
1496  {
1497    /*
1498     * Search for buffer descriptor for this dev/block key.
1499     */
1500    bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, device, block);
1501
1502    /*
1503     * No buffer in the cache for this block. We need to obtain a buffer and
1504     * this means take a buffer that is ready to use. If all buffers are in use
1505     * take the least recently used buffer. If there are none then the cache is
1506     * empty. All the buffers are either queued to be written to disk or with
1507     * the user. We cannot do much with the buffers with the user how-ever with
1508     * the modified buffers waiting to be written to disk flush the maximum
1509     * number transfered in a block to disk. After this all that can be done is
1510     * to wait for a buffer to return to the cache.
1511     */
1512    if (!bd)
1513    {
1514      /*
1515       * Assign new buffer descriptor from the ready list if one is present. If
1516       * the ready queue is empty get the oldest buffer from LRU list. If the
1517       * LRU list is empty there are no available buffers check the modified
1518       * list.
1519       */
1520      bd = rtems_bdbuf_get_next_bd (bds_per_group, &bdbuf_cache.ready);
1521
1522      if (!bd)
1523      {
1524        /*
1525         * No unused or read-ahead buffers.
1526         *
1527         * If this is a read ahead buffer just return. No need to place further
1528         * pressure on the cache by reading something that may be needed when
1529         * we have data in the cache that was needed and may still be in the
1530         * future.
1531         */
1532        if (read_ahead)
1533          return NULL;
1534
1535        /*
1536         * Check the LRU list.
1537         */
1538        bd = rtems_bdbuf_get_next_bd (bds_per_group, &bdbuf_cache.lru);
1539       
1540        if (bd)
1541        {
1542          /*
1543           * Remove the buffer from the AVL tree if the state says it is in the
1544           * cache or a read ahead buffer. The buffer could be in the empty
1545           * state as a result of reallocations.
1546           */
1547          switch (bd->state)
1548          {
1549            case RTEMS_BDBUF_STATE_CACHED:
1550            case RTEMS_BDBUF_STATE_READ_AHEAD:
1551              if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1552                rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_2);
1553              break;
1554            default:
1555              break;
1556          }
1557        }
1558        else
1559        {
1560          /*
1561           * If there are buffers on the modified list expire the hold timer
1562           * and wake the swap out task then wait else just go and wait.
1563           *
1564           * The check for an empty list is made so the swapper is only woken
1565           * when if timers are changed.
1566           */
1567          if (!rtems_chain_is_empty (&bdbuf_cache.modified))
1568          {
1569            rtems_chain_node* node = rtems_chain_first (&bdbuf_cache.modified);
1570            uint32_t          write_blocks = 0;
1571           
1572            while ((write_blocks < bdbuf_config.max_write_blocks) &&
1573                   !rtems_chain_is_tail (&bdbuf_cache.modified, node))
1574            {
1575              rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
1576              bd->hold_timer = 0;
1577              write_blocks++;
1578              node = rtems_chain_next (node);
1579            }
1580
1581            rtems_bdbuf_wake_swapper ();
1582          }
1583         
1584          /*
1585           * Wait for a buffer to be returned to the cache. The buffer will be
1586           * placed on the LRU list.
1587           */
1588          rtems_bdbuf_wait (&bdbuf_cache.waiting, &bdbuf_cache.wait_waiters);
1589        }
1590      }
1591      else
1592      {
1593        /*
1594         * We have a new buffer for this block.
1595         */
1596        if ((bd->state != RTEMS_BDBUF_STATE_EMPTY) &&
1597            (bd->state != RTEMS_BDBUF_STATE_READ_AHEAD))
1598          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_3);
1599
1600        if (bd->state == RTEMS_BDBUF_STATE_READ_AHEAD)
1601        {
1602          if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1603            rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_4);
1604        }
1605      }
1606
1607      if (bd)
1608      {
1609        bd->dev       = device;
1610        bd->block     = block;
1611        bd->avl.left  = NULL;
1612        bd->avl.right = NULL;
1613        bd->state     = RTEMS_BDBUF_STATE_EMPTY;
1614        bd->error     = 0;
1615        bd->waiters   = 0;
1616
1617        if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
1618          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_5);
1619
1620        return bd;
1621      }
1622    }
1623    else
1624    {
1625      /*
1626       * We have the buffer for the block from the cache. Check if the buffer
1627       * in the cache is the same size and the requested size we are after.
1628       */
1629      if (bd->group->bds_per_group != bds_per_group)
1630      {
1631        /*
1632         * Remove the buffer from the AVL tree.
1633         */
1634        if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1635          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_2);
1636        bd->state = RTEMS_BDBUF_STATE_EMPTY;
1637        rtems_chain_extract (&bd->link);
1638        rtems_chain_prepend (&bdbuf_cache.ready, &bd->link);
1639        bd = NULL;
1640      }
1641    }
1642  }
1643  while (!bd);
1644
1645  /*
1646   * If the buffer is for read ahead and it exists in the AVL cache or is being
1647   * accessed or being transfered then return NULL stopping further read ahead
1648   * requests.
1649   */
1650  if (read_ahead)
1651    return NULL;
1652
1653  /*
1654   * Loop waiting for the buffer to enter the cached state. If the buffer is in
1655   * the access or transfer state then wait until it is not.
1656   */
1657  available = false;
1658  while (!available)
1659  {
1660    switch (bd->state)
1661    {
1662      case RTEMS_BDBUF_STATE_CACHED:
1663      case RTEMS_BDBUF_STATE_MODIFIED:
1664      case RTEMS_BDBUF_STATE_READ_AHEAD:
1665        available = true;
1666        break;
1667
1668      case RTEMS_BDBUF_STATE_ACCESS:
1669      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1670        bd->waiters++;
1671        rtems_bdbuf_wait (&bdbuf_cache.access, &bdbuf_cache.access_waiters);
1672        bd->waiters--;
1673        break;
1674
1675      case RTEMS_BDBUF_STATE_SYNC:
1676      case RTEMS_BDBUF_STATE_TRANSFER:
1677        bd->waiters++;
1678        rtems_bdbuf_wait (&bdbuf_cache.transfer, &bdbuf_cache.transfer_waiters);
1679        bd->waiters--;
1680        break;
1681
1682      default:
1683        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_6);
1684    }
1685  }
1686
1687  /*
1688   * Buffer is linked to the LRU, modifed, or sync lists. Remove it from there.
1689   */
1690  rtems_chain_extract (&bd->link);
1691
1692  return bd;
1693}
1694
1695rtems_status_code
1696rtems_bdbuf_get (dev_t                device,
1697                 rtems_blkdev_bnum    block,
1698                 rtems_bdbuf_buffer** bdp)
1699{
1700  rtems_disk_device*  dd;
1701  rtems_bdbuf_buffer* bd;
1702  rtems_blkdev_bnum   media_block;
1703  size_t              bds_per_group;
1704
1705  if (!bdbuf_cache.initialised)
1706    return RTEMS_NOT_CONFIGURED;
1707
1708  /*
1709   * Do not hold the cache lock when obtaining the disk table.
1710   */
1711  dd = rtems_disk_obtain (device);
1712  if (!dd)
1713    return RTEMS_INVALID_ID;
1714
1715  /*
1716   * Compute the media block number. Drivers work with media block number not
1717   * the block number a BD may have as this depends on the block size set by
1718   * the user.
1719   */
1720  media_block = rtems_bdbuf_media_block (block,
1721                                         dd->block_size,
1722                                         dd->media_block_size);
1723  if (media_block >= dd->size)
1724  {
1725    rtems_disk_release(dd);
1726    return RTEMS_INVALID_NUMBER;
1727  }
1728
1729  bds_per_group = rtems_bdbuf_bds_per_group (dd->block_size);
1730  if (!bds_per_group)
1731  {
1732    rtems_disk_release (dd);
1733    return RTEMS_INVALID_NUMBER;
1734  }
1735
1736  media_block += dd->start;
1737
1738  rtems_bdbuf_lock_cache ();
1739
1740  /*
1741   * Print the block index relative to the physical disk.
1742   */
1743  if (rtems_bdbuf_tracer)
1744    printf ("bdbuf:get: %lu (%lu) (dev = %08x)\n",
1745            media_block, block, (unsigned int) device);
1746
1747  bd = rtems_bdbuf_get_buffer (dd, bds_per_group, media_block, false);
1748
1749  /*
1750   * This could be considered a bug in the caller because you should not be
1751   * getting an already modified buffer but user may have modified a byte in a
1752   * block then decided to seek the start and write the whole block and the
1753   * file system will have no record of this so just gets the block to fill.
1754   */
1755  if (bd->state == RTEMS_BDBUF_STATE_MODIFIED)
1756    bd->state = RTEMS_BDBUF_STATE_ACCESS_MODIFIED;
1757  else
1758  {
1759    bd->state = RTEMS_BDBUF_STATE_ACCESS;
1760    /*
1761     * Indicate a buffer in this group is being used.
1762     */
1763    bd->group->users++;
1764  }
1765 
1766  if (rtems_bdbuf_tracer)
1767  {
1768    rtems_bdbuf_show_users ("get", bd);
1769    rtems_bdbuf_show_usage ();
1770  }
1771
1772  rtems_bdbuf_unlock_cache ();
1773
1774  rtems_disk_release(dd);
1775
1776  *bdp = bd;
1777
1778  return RTEMS_SUCCESSFUL;
1779}
1780
1781/**
1782 * Call back handler called by the low level driver when the transfer has
1783 * completed. This function may be invoked from interrupt handler.
1784 *
1785 * @param arg Arbitrary argument specified in block device request
1786 *            structure (in this case - pointer to the appropriate
1787 *            block device request structure).
1788 * @param status I/O completion status
1789 * @param error errno error code if status != RTEMS_SUCCESSFUL
1790 */
1791static void
1792rtems_bdbuf_read_done (void* arg, rtems_status_code status, int error)
1793{
1794  rtems_blkdev_request* req = (rtems_blkdev_request*) arg;
1795
1796  req->error = error;
1797  req->status = status;
1798
1799  rtems_event_send (req->io_task, RTEMS_BDBUF_TRANSFER_SYNC);
1800}
1801
1802rtems_status_code
1803rtems_bdbuf_read (dev_t                device,
1804                  rtems_blkdev_bnum    block,
1805                  rtems_bdbuf_buffer** bdp)
1806{
1807  rtems_disk_device*    dd;
1808  rtems_bdbuf_buffer*   bd = NULL;
1809  uint32_t              read_ahead_count;
1810  rtems_blkdev_request* req;
1811  size_t                bds_per_group;
1812  rtems_blkdev_bnum     media_block;
1813  rtems_blkdev_bnum     media_block_count;
1814 
1815  if (!bdbuf_cache.initialised)
1816    return RTEMS_NOT_CONFIGURED;
1817
1818  /*
1819   * @todo This type of request structure is wrong and should be removed.
1820   */
1821#define bdbuf_alloc(size) __builtin_alloca (size)
1822
1823  req = bdbuf_alloc (sizeof (rtems_blkdev_request) +
1824                     (sizeof ( rtems_blkdev_sg_buffer) *
1825                      rtems_bdbuf_configuration.max_read_ahead_blocks));
1826
1827  /*
1828   * Do not hold the cache lock when obtaining the disk table.
1829   */
1830  dd = rtems_disk_obtain (device);
1831  if (!dd)
1832    return RTEMS_INVALID_ID;
1833 
1834  /*
1835   * Compute the media block number. Drivers work with media block number not
1836   * the block number a BD may have as this depends on the block size set by
1837   * the user.
1838   */
1839  media_block = rtems_bdbuf_media_block (block,
1840                                         dd->block_size,
1841                                         dd->media_block_size);
1842  if (media_block >= dd->size)
1843  {
1844    rtems_disk_release(dd);
1845    return RTEMS_INVALID_NUMBER;
1846  }
1847 
1848  bds_per_group = rtems_bdbuf_bds_per_group (dd->block_size);
1849  if (!bds_per_group)
1850  {
1851    rtems_disk_release (dd);
1852    return RTEMS_INVALID_NUMBER;
1853  }
1854 
1855  /*
1856   * Print the block index relative to the physical disk and the user block
1857   * number
1858   */
1859  if (rtems_bdbuf_tracer)
1860    printf ("bdbuf:read: %lu (%lu) (dev = %08x)\n",
1861            media_block + dd->start, block, (unsigned int) device);
1862
1863  /*
1864   * Read the block plus the required number of blocks ahead. The number of
1865   * blocks to read ahead is configured by the user and limited by the size of
1866   * the disk or reaching a read ahead block that is also cached.
1867   *
1868   * Limit the blocks read by the size of the disk.
1869   */
1870  if ((rtems_bdbuf_configuration.max_read_ahead_blocks + media_block) < dd->size)
1871    read_ahead_count = rtems_bdbuf_configuration.max_read_ahead_blocks;
1872  else
1873    read_ahead_count = dd->size - media_block;
1874
1875  media_block_count = dd->block_size / dd->media_block_size;
1876 
1877  req->bufnum = 0;
1878
1879  rtems_bdbuf_lock_cache ();
1880
1881  while (req->bufnum < read_ahead_count)
1882  {
1883    /*
1884     * Get the buffer for the requested block. If the block is cached then
1885     * return it. If it is not cached transfer the block from the disk media
1886     * into memory.
1887     *
1888     * We need to clean up any buffers allocated and not passed back to the
1889     * caller.
1890     */
1891    bd = rtems_bdbuf_get_buffer (dd, bds_per_group, media_block + dd->start,
1892                                 req->bufnum == 0 ? false : true);
1893
1894    /*
1895     * Read ahead buffer is in the cache or none available. Read what we
1896     * can.
1897     */
1898    if (!bd)
1899      break;
1900
1901    /*
1902     * Is the block we are interested in the cache ?
1903     */
1904    if ((bd->state == RTEMS_BDBUF_STATE_CACHED) ||
1905        (bd->state == RTEMS_BDBUF_STATE_MODIFIED))
1906      break;
1907
1908    bd->state = RTEMS_BDBUF_STATE_TRANSFER;
1909    bd->error = 0;
1910
1911    /*
1912     * The buffer will be passed to the driver so this buffer has a user.
1913     */
1914    bd->group->users++;
1915
1916    if (rtems_bdbuf_tracer)
1917      rtems_bdbuf_show_users ("reading", bd);
1918   
1919    /*
1920     * @todo The use of these req blocks is not a great design. The req is a
1921     *       struct with a single 'bufs' declared in the req struct and the
1922     *       others are added in the outer level struct. This relies on the
1923     *       structs joining as a single array and that assumes the compiler
1924     *       packs the structs. Why not just place on a list ? The BD has a
1925     *       node that can be used.
1926     */
1927    req->bufs[req->bufnum].user   = bd;
1928    req->bufs[req->bufnum].block  = media_block + dd->start;
1929    req->bufs[req->bufnum].length = dd->block_size;
1930    req->bufs[req->bufnum].buffer = bd->buffer;
1931    req->bufnum++;
1932
1933    /*
1934     * Move the media block count by the number of media blocks in the
1935     * disk device's set block size.
1936     */
1937    media_block += media_block_count;
1938  }
1939
1940  /*
1941   * Transfer any requested buffers. If the request count is 0 we have found
1942   * the block in the cache so return it.
1943   */
1944  if (req->bufnum)
1945  {
1946    /*
1947     * Unlock the cache. We have the buffer for the block and it will be in the
1948     * access or transfer state. We may also have a number of read ahead blocks
1949     * if we need to transfer data. At this point any other threads can gain
1950     * access to the cache and if they are after any of the buffers we have
1951     * they will block and be woken when the buffer is returned to the cache.
1952     *
1953     * If a transfer is needed the I/O operation will occur with pre-emption
1954     * enabled and the cache unlocked. This is a change to the previous version
1955     * of the bdbuf code.
1956     */
1957    rtems_event_set out;
1958    int             result;
1959    uint32_t        b;
1960    bool            wake_transfer;
1961
1962    /*
1963     * Flush any events.
1964     */
1965    rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
1966                         RTEMS_EVENT_ALL | RTEMS_NO_WAIT,
1967                         0, &out);
1968                         
1969    rtems_bdbuf_unlock_cache ();
1970
1971    req->req = RTEMS_BLKDEV_REQ_READ;
1972    req->req_done = rtems_bdbuf_read_done;
1973    req->done_arg = req;
1974    req->io_task = rtems_task_self ();
1975    req->status = RTEMS_RESOURCE_IN_USE;
1976    req->error = 0;
1977 
1978    result = dd->ioctl (dd, RTEMS_BLKIO_REQUEST, req);
1979
1980    /*
1981     * Inspection of the DOS FS code shows the result from this function is
1982     * handled and a buffer must be returned.
1983     */
1984    if (result < 0)
1985    {
1986      req->error = errno;
1987      req->status = RTEMS_IO_ERROR;
1988    }
1989    else
1990    {
1991      rtems_status_code sc;
1992     
1993      sc = rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
1994                                RTEMS_EVENT_ALL | RTEMS_WAIT,
1995                                0, &out);
1996
1997      if (sc != RTEMS_SUCCESSFUL)
1998        rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
1999    }
2000
2001    wake_transfer = false;
2002   
2003    rtems_bdbuf_lock_cache ();
2004
2005    for (b = 1; b < req->bufnum; b++)
2006    {
2007      bd = req->bufs[b].user;
2008      if (!bd->error)
2009        bd->error = req->error;
2010      bd->state = RTEMS_BDBUF_STATE_READ_AHEAD;
2011      bd->group->users--;
2012
2013      if (rtems_bdbuf_tracer)
2014        rtems_bdbuf_show_users ("read-ahead", bd);
2015
2016      rtems_chain_prepend (&bdbuf_cache.ready, &bd->link);
2017
2018      /*
2019       * If there is an error remove the BD from the AVL tree as it is invalid,
2020       * then wake any threads that may be waiting. A thread may have been
2021       * waiting for this block and assumed it was in the tree.
2022       */
2023      if (bd->error)
2024      {
2025        bd->state = RTEMS_BDBUF_STATE_EMPTY;
2026        if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
2027          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_9);
2028      }
2029
2030      if (bd->waiters)
2031        wake_transfer = true;
2032    }
2033
2034    if (wake_transfer)
2035      rtems_bdbuf_wake (bdbuf_cache.transfer, &bdbuf_cache.transfer_waiters);
2036    else
2037      rtems_bdbuf_wake (bdbuf_cache.waiting, &bdbuf_cache.wait_waiters);
2038   
2039    bd = req->bufs[0].user;
2040
2041    /*
2042     * One less user for the BD we return. The loop above is only for the read
2043     * head buffers. We do this here then increment again so the case of the
2044     * buffer in the cache or modified and no read leaves the user counts at
2045     * the correct level.
2046     */
2047    bd->group->users--;
2048
2049    if (rtems_bdbuf_tracer)
2050      rtems_bdbuf_show_users ("read-done", bd);
2051  }
2052
2053  /*
2054   * The data for this block is cached in the buffer.
2055   */
2056  if (bd->state == RTEMS_BDBUF_STATE_MODIFIED)
2057    bd->state = RTEMS_BDBUF_STATE_ACCESS_MODIFIED;
2058  else
2059  {
2060    /*
2061     * The file system is a user of the buffer.
2062     */
2063    bd->group->users++;
2064    bd->state = RTEMS_BDBUF_STATE_ACCESS;
2065  }
2066
2067  if (rtems_bdbuf_tracer)
2068  {
2069    rtems_bdbuf_show_users ("read", bd);
2070    rtems_bdbuf_show_usage ();
2071  }
2072 
2073  rtems_bdbuf_unlock_cache ();
2074  rtems_disk_release (dd);
2075
2076  *bdp = bd;
2077
2078  return RTEMS_SUCCESSFUL;
2079}
2080
2081rtems_status_code
2082rtems_bdbuf_release (rtems_bdbuf_buffer* bd)
2083{
2084  if (!bdbuf_cache.initialised)
2085    return RTEMS_NOT_CONFIGURED;
2086
2087  if (bd == NULL)
2088    return RTEMS_INVALID_ADDRESS;
2089
2090  rtems_bdbuf_lock_cache ();
2091
2092  if (rtems_bdbuf_tracer)
2093    printf ("bdbuf:release: %lu\n", bd->block);
2094 
2095  if (bd->state == RTEMS_BDBUF_STATE_ACCESS_MODIFIED)
2096  {
2097    rtems_bdbuf_append_modified (bd);
2098  }
2099  else
2100  {
2101    bd->state = RTEMS_BDBUF_STATE_CACHED;
2102    rtems_chain_append (&bdbuf_cache.lru, &bd->link);
2103
2104    /*
2105     * One less user for the group of bds.
2106     */
2107    bd->group->users--;
2108  }
2109 
2110  if (rtems_bdbuf_tracer)
2111    rtems_bdbuf_show_users ("release", bd);
2112 
2113  /*
2114   * If there are threads waiting to access the buffer wake them. Wake any
2115   * waiters if this buffer is placed back onto the LRU queue.
2116   */
2117  if (bd->waiters)
2118    rtems_bdbuf_wake (bdbuf_cache.access, &bdbuf_cache.access_waiters);
2119  else
2120    rtems_bdbuf_wake (bdbuf_cache.waiting, &bdbuf_cache.wait_waiters);
2121 
2122  if (rtems_bdbuf_tracer)
2123    rtems_bdbuf_show_usage ();
2124 
2125  rtems_bdbuf_unlock_cache ();
2126
2127  return RTEMS_SUCCESSFUL;
2128}
2129
2130rtems_status_code
2131rtems_bdbuf_release_modified (rtems_bdbuf_buffer* bd)
2132{
2133  if (!bdbuf_cache.initialised)
2134    return RTEMS_NOT_CONFIGURED;
2135
2136  if (!bd)
2137    return RTEMS_INVALID_ADDRESS;
2138
2139  rtems_bdbuf_lock_cache ();
2140
2141  if (rtems_bdbuf_tracer)
2142    printf ("bdbuf:release modified: %lu\n", bd->block);
2143
2144  bd->hold_timer = rtems_bdbuf_configuration.swap_block_hold;
2145 
2146  if (rtems_bdbuf_tracer)
2147    rtems_bdbuf_show_users ("release-modified", bd);
2148 
2149  rtems_bdbuf_append_modified (bd);
2150
2151  if (bd->waiters)
2152    rtems_bdbuf_wake (bdbuf_cache.access, &bdbuf_cache.access_waiters);
2153 
2154  if (rtems_bdbuf_tracer)
2155    rtems_bdbuf_show_usage ();
2156 
2157  rtems_bdbuf_unlock_cache ();
2158
2159  return RTEMS_SUCCESSFUL;
2160}
2161
2162rtems_status_code
2163rtems_bdbuf_sync (rtems_bdbuf_buffer* bd)
2164{
2165  bool available;
2166
2167  if (rtems_bdbuf_tracer)
2168    printf ("bdbuf:sync: %lu\n", bd->block);
2169 
2170  if (!bdbuf_cache.initialised)
2171    return RTEMS_NOT_CONFIGURED;
2172
2173  if (!bd)
2174    return RTEMS_INVALID_ADDRESS;
2175
2176  rtems_bdbuf_lock_cache ();
2177
2178  bd->state = RTEMS_BDBUF_STATE_SYNC;
2179
2180  rtems_chain_append (&bdbuf_cache.sync, &bd->link);
2181
2182  rtems_bdbuf_wake_swapper ();
2183
2184  available = false;
2185  while (!available)
2186  {
2187    switch (bd->state)
2188    {
2189      case RTEMS_BDBUF_STATE_CACHED:
2190      case RTEMS_BDBUF_STATE_READ_AHEAD:
2191      case RTEMS_BDBUF_STATE_MODIFIED:
2192      case RTEMS_BDBUF_STATE_ACCESS:
2193      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2194        available = true;
2195        break;
2196
2197      case RTEMS_BDBUF_STATE_SYNC:
2198      case RTEMS_BDBUF_STATE_TRANSFER:
2199        bd->waiters++;
2200        rtems_bdbuf_wait (&bdbuf_cache.transfer, &bdbuf_cache.transfer_waiters);
2201        bd->waiters--;
2202        break;
2203
2204      default:
2205        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_7);
2206    }
2207  }
2208
2209  rtems_bdbuf_unlock_cache ();
2210 
2211  return RTEMS_SUCCESSFUL;
2212}
2213
2214rtems_status_code
2215rtems_bdbuf_syncdev (dev_t dev)
2216{
2217  rtems_disk_device*  dd;
2218  rtems_status_code   sc;
2219  rtems_event_set     out;
2220
2221  if (rtems_bdbuf_tracer)
2222    printf ("bdbuf:syncdev: %08x\n", (unsigned int) dev);
2223
2224  if (!bdbuf_cache.initialised)
2225    return RTEMS_NOT_CONFIGURED;
2226
2227  /*
2228   * Do not hold the cache lock when obtaining the disk table.
2229   */
2230  dd = rtems_disk_obtain (dev);
2231  if (!dd)
2232    return RTEMS_INVALID_ID;
2233
2234  /*
2235   * Take the sync lock before locking the cache. Once we have the sync lock we
2236   * can lock the cache. If another thread has the sync lock it will cause this
2237   * thread to block until it owns the sync lock then it can own the cache. The
2238   * sync lock can only be obtained with the cache unlocked.
2239   */
2240 
2241  rtems_bdbuf_lock_sync ();
2242  rtems_bdbuf_lock_cache (); 
2243
2244  /*
2245   * Set the cache to have a sync active for a specific device and let the swap
2246   * out task know the id of the requester to wake when done.
2247   *
2248   * The swap out task will negate the sync active flag when no more buffers
2249   * for the device are held on the "modified for sync" queues.
2250   */
2251  bdbuf_cache.sync_active    = true;
2252  bdbuf_cache.sync_requester = rtems_task_self ();
2253  bdbuf_cache.sync_device    = dev;
2254 
2255  rtems_bdbuf_wake_swapper ();
2256  rtems_bdbuf_unlock_cache ();
2257 
2258  sc = rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
2259                            RTEMS_EVENT_ALL | RTEMS_WAIT,
2260                            0, &out);
2261
2262  if (sc != RTEMS_SUCCESSFUL)
2263    rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2264     
2265  rtems_bdbuf_unlock_sync ();
2266 
2267  return rtems_disk_release (dd);
2268}
2269
2270/**
2271 * Call back handler called by the low level driver when the transfer has
2272 * completed. This function may be invoked from interrupt handlers.
2273 *
2274 * @param arg Arbitrary argument specified in block device request
2275 *            structure (in this case - pointer to the appropriate
2276 *            block device request structure).
2277 * @param status I/O completion status
2278 * @param error errno error code if status != RTEMS_SUCCESSFUL
2279 */
2280static void
2281rtems_bdbuf_write_done(void *arg, rtems_status_code status, int error)
2282{
2283  rtems_blkdev_request* req = (rtems_blkdev_request*) arg;
2284
2285  req->error = error;
2286  req->status = status;
2287
2288  rtems_event_send (req->io_task, RTEMS_BDBUF_TRANSFER_SYNC);
2289}
2290
2291/**
2292 * Swapout transfer to the driver. The driver will break this I/O into groups
2293 * of consecutive write requests is multiple consecutive buffers are required
2294 * by the driver.
2295 *
2296 * @param transfer The transfer transaction.
2297 */
2298static void
2299rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer)
2300{
2301  rtems_disk_device* dd;
2302 
2303  if (rtems_bdbuf_tracer)
2304    printf ("bdbuf:swapout transfer: %08x\n", (unsigned int) transfer->dev);
2305
2306  /*
2307   * If there are buffers to transfer to the media transfer them.
2308   */
2309  if (!rtems_chain_is_empty (&transfer->bds))
2310  {
2311    /*
2312     * Obtain the disk device. The cache's mutex has been released to avoid a
2313     * dead lock.
2314     */
2315    dd = rtems_disk_obtain (transfer->dev);
2316    if (dd)
2317    {
2318      /*
2319       * The last block number used when the driver only supports
2320       * continuous blocks in a single request.
2321       */
2322      uint32_t last_block = 0;
2323
2324      /*
2325       * Number of buffers per bd. This is used to detect the next
2326       * block.
2327       */
2328      uint32_t bufs_per_bd = dd->block_size / bdbuf_config.buffer_min;
2329     
2330      /*
2331       * Take as many buffers as configured and pass to the driver. Note, the
2332       * API to the drivers has an array of buffers and if a chain was passed
2333       * we could have just passed the list. If the driver API is updated it
2334       * should be possible to make this change with little effect in this
2335       * code. The array that is passed is broken in design and should be
2336       * removed. Merging members of a struct into the first member is
2337       * trouble waiting to happen.
2338       */
2339      transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2340      transfer->write_req->error = 0;
2341      transfer->write_req->bufnum = 0;
2342
2343      while (!rtems_chain_is_empty (&transfer->bds))
2344      {
2345        rtems_bdbuf_buffer* bd =
2346          (rtems_bdbuf_buffer*) rtems_chain_get (&transfer->bds);
2347
2348        bool write = false;
2349       
2350        /*
2351         * If the device only accepts sequential buffers and this is not the
2352         * first buffer (the first is always sequential, and the buffer is not
2353         * sequential then put the buffer back on the transfer chain and write
2354         * the committed buffers.
2355         */
2356       
2357        if (rtems_bdbuf_tracer)
2358          printf ("bdbuf:swapout write: bd:%lu, bufnum:%lu mode:%s\n",
2359                  bd->block, transfer->write_req->bufnum,
2360                  dd->phys_dev->capabilities &
2361                  RTEMS_BLKDEV_CAP_MULTISECTOR_CONT ? "MULIT" : "SCAT");
2362       
2363        if ((dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) &&
2364            transfer->write_req->bufnum &&
2365            (bd->block != (last_block + bufs_per_bd)))
2366        {
2367          rtems_chain_prepend (&transfer->bds, &bd->link);
2368          write = true;
2369        }
2370        else
2371        {
2372          rtems_blkdev_sg_buffer* buf;
2373          buf = &transfer->write_req->bufs[transfer->write_req->bufnum];
2374          transfer->write_req->bufnum++;
2375          buf->user   = bd;
2376          buf->block  = bd->block;
2377          buf->length = dd->block_size;
2378          buf->buffer = bd->buffer;
2379          last_block  = bd->block;
2380        }
2381
2382        /*
2383         * Perform the transfer if there are no more buffers, or the transfer
2384         * size has reached the configured max. value.
2385         */
2386
2387        if (rtems_chain_is_empty (&transfer->bds) ||
2388            (transfer->write_req->bufnum >= rtems_bdbuf_configuration.max_write_blocks))
2389          write = true;
2390
2391        if (write)
2392        {
2393          int result;
2394          uint32_t b;
2395
2396          if (rtems_bdbuf_tracer)
2397            printf ("bdbuf:swapout write: writing bufnum:%lu\n",
2398                    transfer->write_req->bufnum);
2399
2400          /*
2401           * Perform the transfer. No cache locks, no preemption, only the disk
2402           * device is being held.
2403           */
2404          result = dd->ioctl (dd, RTEMS_BLKIO_REQUEST, transfer->write_req);
2405          if (result < 0)
2406          {
2407            rtems_bdbuf_lock_cache ();
2408             
2409            for (b = 0; b < transfer->write_req->bufnum; b++)
2410            {
2411              bd = transfer->write_req->bufs[b].user;
2412              bd->state  = RTEMS_BDBUF_STATE_MODIFIED;
2413              bd->error = errno;
2414
2415              /*
2416               * Place back on the cache's modified queue and try again.
2417               *
2418               * @warning Not sure this is the best option but I do not know
2419               *          what else can be done.
2420               */
2421              rtems_chain_append (&bdbuf_cache.modified, &bd->link);
2422            }
2423          }
2424          else
2425          {
2426            rtems_status_code sc = 0;
2427            rtems_event_set   out;
2428
2429            sc = rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
2430                                      RTEMS_EVENT_ALL | RTEMS_WAIT,
2431                                      0, &out);
2432
2433            if (sc != RTEMS_SUCCESSFUL)
2434              rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2435
2436            rtems_bdbuf_lock_cache ();
2437
2438            for (b = 0; b < transfer->write_req->bufnum; b++)
2439            {
2440              bd = transfer->write_req->bufs[b].user;
2441              bd->state = RTEMS_BDBUF_STATE_CACHED;
2442              bd->error = 0;
2443
2444              /*
2445               * The buffer is now not modified so lower the user count for the group.
2446               */
2447              bd->group->users--;
2448
2449              if (rtems_bdbuf_tracer)
2450                rtems_bdbuf_show_users ("write", bd);
2451
2452              rtems_chain_append (&bdbuf_cache.lru, &bd->link);
2453             
2454              if (bd->waiters)
2455                rtems_bdbuf_wake (bdbuf_cache.transfer, &bdbuf_cache.transfer_waiters);
2456              else
2457                rtems_bdbuf_wake (bdbuf_cache.waiting, &bdbuf_cache.wait_waiters);
2458            }
2459          }
2460
2461          if (rtems_bdbuf_tracer)
2462            rtems_bdbuf_show_usage ();
2463
2464          rtems_bdbuf_unlock_cache ();
2465
2466          transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2467          transfer->write_req->error = 0;
2468          transfer->write_req->bufnum = 0;
2469        }
2470      }
2471         
2472      rtems_disk_release (dd);
2473    }
2474    else
2475    {
2476      /*
2477       * We have buffers but no device. Put the BDs back onto the
2478       * ready queue and exit.
2479       */
2480      /* @todo fixme */
2481    }
2482  }
2483}
2484
2485/**
2486 * Process the modified list of buffers. There is a sync or modified list that
2487 * needs to be handled so we have a common function to do the work.
2488 *
2489 * @param dev The device to handle. If -1 no device is selected so select the
2490 *            device of the first buffer to be written to disk.
2491 * @param chain The modified chain to process.
2492 * @param transfer The chain to append buffers to be written too.
2493 * @param sync_active If true this is a sync operation so expire all timers.
2494 * @param update_timers If true update the timers.
2495 * @param timer_delta It update_timers is true update the timers by this
2496 *                    amount.
2497 */
2498static void
2499rtems_bdbuf_swapout_modified_processing (dev_t*               dev,
2500                                         rtems_chain_control* chain,
2501                                         rtems_chain_control* transfer,
2502                                         bool                 sync_active,
2503                                         bool                 update_timers,
2504                                         uint32_t             timer_delta)
2505{
2506  if (!rtems_chain_is_empty (chain))
2507  {
2508    rtems_chain_node* node = rtems_chain_head (chain);
2509    node = node->next;
2510
2511    while (!rtems_chain_is_tail (chain, node))
2512    {
2513      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2514   
2515      /*
2516       * Check if the buffer's hold timer has reached 0. If a sync is active
2517       * force all the timers to 0.
2518       *
2519       * @note Lots of sync requests will skew this timer. It should be based
2520       *       on TOD to be accurate. Does it matter ?
2521       */
2522      if (sync_active)
2523        bd->hold_timer = 0;
2524 
2525      if (bd->hold_timer)
2526      {
2527        if (update_timers)
2528        {
2529          if (bd->hold_timer > timer_delta)
2530            bd->hold_timer -= timer_delta;
2531          else
2532            bd->hold_timer = 0;
2533        }
2534
2535        if (bd->hold_timer)
2536        {
2537          node = node->next;
2538          continue;
2539        }
2540      }
2541
2542      /*
2543       * This assumes we can set dev_t to -1 which is just an
2544       * assumption. Cannot use the transfer list being empty the sync dev
2545       * calls sets the dev to use.
2546       */
2547      if (*dev == (dev_t)-1)
2548        *dev = bd->dev;
2549
2550      if (bd->dev == *dev)
2551      {
2552        rtems_chain_node* next_node = node->next;
2553        rtems_chain_node* tnode = rtems_chain_tail (transfer);
2554   
2555        /*
2556         * The blocks on the transfer list are sorted in block order. This
2557         * means multi-block transfers for drivers that require consecutive
2558         * blocks perform better with sorted blocks and for real disks it may
2559         * help lower head movement.
2560         */
2561
2562        bd->state = RTEMS_BDBUF_STATE_TRANSFER;
2563
2564        rtems_chain_extract (node);
2565
2566        tnode = tnode->previous;
2567         
2568        while (node && !rtems_chain_is_head (transfer, tnode))
2569        {
2570          rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
2571
2572          if (bd->block > tbd->block)
2573          {
2574            rtems_chain_insert (tnode, node);
2575            node = NULL;
2576          }
2577          else
2578            tnode = tnode->previous;
2579        }
2580       
2581        if (node)
2582          rtems_chain_prepend (transfer, node);
2583         
2584        node = next_node;
2585      }
2586      else
2587      {
2588        node = node->next;
2589      }
2590    }
2591  }
2592}
2593
2594/**
2595 * Process the cache's modified buffers. Check the sync list first then the
2596 * modified list extracting the buffers suitable to be written to disk. We have
2597 * a device at a time. The task level loop will repeat this operation while
2598 * there are buffers to be written. If the transfer fails place the buffers
2599 * back on the modified list and try again later. The cache is unlocked while
2600 * the buffers are being written to disk.
2601 *
2602 * @param timer_delta It update_timers is true update the timers by this
2603 *                    amount.
2604 * @param update_timers If true update the timers.
2605 * @param transfer The transfer transaction data.
2606 *
2607 * @retval true Buffers where written to disk so scan again.
2608 * @retval false No buffers where written to disk.
2609 */
2610static bool
2611rtems_bdbuf_swapout_processing (unsigned long                 timer_delta,
2612                                bool                          update_timers,
2613                                rtems_bdbuf_swapout_transfer* transfer)
2614{
2615  rtems_bdbuf_swapout_worker* worker;
2616  bool                        transfered_buffers = false;
2617
2618  rtems_bdbuf_lock_cache ();
2619
2620  /*
2621   * If a sync is active do not use a worker because the current code does not
2622   * cleaning up after. We need to know the buffers have been written when
2623   * syncing to the release sync lock and currently worker threads do not
2624   * return to here. We do not know the worker is the last in a sequence of
2625   * sync writes until after we have it running so we do not know to tell it to
2626   * release the lock. The simplest solution is to get the main swap out task
2627   * perform all sync operations.
2628   */
2629  if (bdbuf_cache.sync_active)
2630    worker = NULL;
2631  else
2632  {
2633    worker = (rtems_bdbuf_swapout_worker*)
2634      rtems_chain_get (&bdbuf_cache.swapout_workers);
2635    if (worker)
2636      transfer = &worker->transfer;
2637  }
2638 
2639  rtems_chain_initialize_empty (&transfer->bds);
2640  transfer->dev = -1;
2641 
2642  /*
2643   * When the sync is for a device limit the sync to that device. If the sync
2644   * is for a buffer handle process the devices in the order on the sync
2645   * list. This means the dev is -1.
2646   */
2647  if (bdbuf_cache.sync_active)
2648    transfer->dev = bdbuf_cache.sync_device;
2649 
2650  /*
2651   * If we have any buffers in the sync queue move them to the modified
2652   * list. The first sync buffer will select the device we use.
2653   */
2654  rtems_bdbuf_swapout_modified_processing (&transfer->dev,
2655                                           &bdbuf_cache.sync,
2656                                           &transfer->bds,
2657                                           true, false,
2658                                           timer_delta);
2659
2660  /*
2661   * Process the cache's modified list.
2662   */
2663  rtems_bdbuf_swapout_modified_processing (&transfer->dev,
2664                                           &bdbuf_cache.modified,
2665                                           &transfer->bds,
2666                                           bdbuf_cache.sync_active,
2667                                           update_timers,
2668                                           timer_delta);
2669
2670  /*
2671   * We have all the buffers that have been modified for this device so the
2672   * cache can be unlocked because the state of each buffer has been set to
2673   * TRANSFER.
2674   */
2675  rtems_bdbuf_unlock_cache ();
2676
2677  /*
2678   * If there are buffers to transfer to the media transfer them.
2679   */
2680  if (!rtems_chain_is_empty (&transfer->bds))
2681  {
2682    if (worker)
2683    {
2684      rtems_status_code sc = rtems_event_send (worker->id,
2685                                               RTEMS_BDBUF_SWAPOUT_SYNC);
2686      if (sc != RTEMS_SUCCESSFUL)
2687        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
2688    }
2689    else
2690    {
2691      rtems_bdbuf_swapout_write (transfer);
2692    }
2693   
2694    transfered_buffers = true;
2695  }
2696   
2697  if (bdbuf_cache.sync_active && !transfered_buffers)
2698  {
2699    rtems_id sync_requester;
2700    rtems_bdbuf_lock_cache ();
2701    sync_requester = bdbuf_cache.sync_requester;
2702    bdbuf_cache.sync_active = false;
2703    bdbuf_cache.sync_requester = 0;
2704    rtems_bdbuf_unlock_cache ();
2705    if (sync_requester)
2706      rtems_event_send (sync_requester, RTEMS_BDBUF_TRANSFER_SYNC);
2707  }
2708 
2709  return transfered_buffers;
2710}
2711
2712/**
2713 * Allocate the write request and initialise it for good measure.
2714 *
2715 * @return rtems_blkdev_request* The write reference memory.
2716 */
2717static rtems_blkdev_request*
2718rtems_bdbuf_swapout_writereq_alloc (void)
2719{
2720  /*
2721   * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
2722   * I am disappointment at finding code like this in RTEMS. The request should
2723   * have been a rtems_chain_control. Simple, fast and less storage as the node
2724   * is already part of the buffer structure.
2725   */
2726  rtems_blkdev_request* write_req =
2727    malloc (sizeof (rtems_blkdev_request) +
2728            (rtems_bdbuf_configuration.max_write_blocks *
2729             sizeof (rtems_blkdev_sg_buffer)));
2730
2731  if (!write_req)
2732    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2733
2734  write_req->req = RTEMS_BLKDEV_REQ_WRITE;
2735  write_req->req_done = rtems_bdbuf_write_done;
2736  write_req->done_arg = write_req;
2737  write_req->io_task = rtems_task_self ();
2738
2739  return write_req;
2740}
2741
2742/**
2743 * The swapout worker thread body.
2744 *
2745 * @param arg A pointer to the worker thread's private data.
2746 * @return rtems_task Not used.
2747 */
2748static rtems_task
2749rtems_bdbuf_swapout_worker_task (rtems_task_argument arg)
2750{
2751  rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg;
2752
2753  while (worker->enabled)
2754  {
2755    rtems_event_set   out;
2756    rtems_status_code sc;
2757   
2758    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2759                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2760                              RTEMS_NO_TIMEOUT,
2761                              &out);
2762
2763    if (sc != RTEMS_SUCCESSFUL)
2764      rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2765
2766    rtems_bdbuf_swapout_write (&worker->transfer);
2767
2768    rtems_bdbuf_lock_cache ();
2769
2770    rtems_chain_initialize_empty (&worker->transfer.bds);
2771    worker->transfer.dev = -1;
2772
2773    rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link);
2774   
2775    rtems_bdbuf_unlock_cache ();
2776  }
2777
2778  free (worker->transfer.write_req);
2779  free (worker);
2780
2781  rtems_task_delete (RTEMS_SELF);
2782}
2783
2784/**
2785 * Open the swapout worker threads.
2786 */
2787static void
2788rtems_bdbuf_swapout_workers_open (void)
2789{
2790  rtems_status_code sc;
2791  int               w;
2792 
2793  rtems_bdbuf_lock_cache ();
2794 
2795  for (w = 0; w < rtems_bdbuf_configuration.swapout_workers; w++)
2796  {
2797    rtems_bdbuf_swapout_worker* worker;
2798
2799    worker = malloc (sizeof (rtems_bdbuf_swapout_worker));
2800    if (!worker)
2801      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2802
2803    rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link);
2804    worker->enabled = true;
2805    worker->transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2806   
2807    rtems_chain_initialize_empty (&worker->transfer.bds);
2808    worker->transfer.dev = -1;
2809
2810    sc = rtems_task_create (rtems_build_name('B', 'D', 'o', 'a' + w),
2811                            (rtems_bdbuf_configuration.swapout_priority ?
2812                             rtems_bdbuf_configuration.swapout_priority :
2813                             RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT),
2814                            SWAPOUT_TASK_STACK_SIZE,
2815                            RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
2816                            RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
2817                            &worker->id);
2818    if (sc != RTEMS_SUCCESSFUL)
2819      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE);
2820
2821    sc = rtems_task_start (worker->id,
2822                           rtems_bdbuf_swapout_worker_task,
2823                           (rtems_task_argument) worker);
2824    if (sc != RTEMS_SUCCESSFUL)
2825      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START);
2826  }
2827 
2828  rtems_bdbuf_unlock_cache ();
2829}
2830
2831/**
2832 * Close the swapout worker threads.
2833 */
2834static void
2835rtems_bdbuf_swapout_workers_close (void)
2836{
2837  rtems_chain_node* node;
2838 
2839  rtems_bdbuf_lock_cache ();
2840 
2841  node = rtems_chain_first (&bdbuf_cache.swapout_workers);
2842  while (!rtems_chain_is_tail (&bdbuf_cache.swapout_workers, node))
2843  {
2844    rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node;
2845    worker->enabled = false;
2846    rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC);
2847    node = rtems_chain_next (node);
2848  }
2849 
2850  rtems_bdbuf_unlock_cache ();
2851}
2852
2853/**
2854 * Body of task which takes care on flushing modified buffers to the disk.
2855 *
2856 * @param arg A pointer to the global cache data. Use the global variable and
2857 *            not this.
2858 * @return rtems_task Not used.
2859 */
2860static rtems_task
2861rtems_bdbuf_swapout_task (rtems_task_argument arg)
2862{
2863  rtems_bdbuf_swapout_transfer transfer;
2864  uint32_t                     period_in_ticks;
2865  const uint32_t               period_in_msecs = bdbuf_config.swapout_period;;
2866  uint32_t                     timer_delta;
2867
2868  transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2869  rtems_chain_initialize_empty (&transfer.bds);
2870  transfer.dev = -1;
2871
2872  /*
2873   * Localise the period.
2874   */
2875  period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
2876
2877  /*
2878   * This is temporary. Needs to be changed to use the real time clock.
2879   */
2880  timer_delta = period_in_msecs;
2881
2882  /*
2883   * Create the worker threads.
2884   */
2885  rtems_bdbuf_swapout_workers_open ();
2886 
2887  while (bdbuf_cache.swapout_enabled)
2888  {
2889    rtems_event_set   out;
2890    rtems_status_code sc;
2891
2892    /*
2893     * Only update the timers once in the processing cycle.
2894     */
2895    bool update_timers = true;
2896   
2897    /*
2898     * If we write buffers to any disk perform a check again. We only write a
2899     * single device at a time and the cache may have more than one device's
2900     * buffers modified waiting to be written.
2901     */
2902    bool transfered_buffers;
2903
2904    do
2905    {
2906      transfered_buffers = false;
2907
2908      /*
2909       * Extact all the buffers we find for a specific device. The device is
2910       * the first one we find on a modified list. Process the sync queue of
2911       * buffers first.
2912       */
2913      if (rtems_bdbuf_swapout_processing (timer_delta,
2914                                          update_timers,
2915                                          &transfer))
2916      {
2917        transfered_buffers = true;
2918      }
2919     
2920      /*
2921       * Only update the timers once.
2922       */
2923      update_timers = false;
2924    }
2925    while (transfered_buffers);
2926
2927    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2928                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2929                              period_in_ticks,
2930                              &out);
2931
2932    if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
2933      rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2934  }
2935
2936  rtems_bdbuf_swapout_workers_close ();
2937 
2938  free (transfer.write_req);
2939
2940  rtems_task_delete (RTEMS_SELF);
2941}
2942
Note: See TracBrowser for help on using the repository browser.